From 4014b12ab4cdae7654e67ab14f5b36fb581e9cf3 Mon Sep 17 00:00:00 2001 From: "l.gabrysiak" Date: Tue, 25 Feb 2025 15:22:15 +0100 Subject: [PATCH] mod --- hft.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/hft.py b/hft.py index eccec6c..8909b4b 100644 --- a/hft.py +++ b/hft.py @@ -159,7 +159,7 @@ tokenizer.pad_token = tokenizer.eos_token catalog_path = "file_catalog.json" data = prepare_dataset("files", catalog_path, source_mapper) dataset = Dataset.from_list(data) -tokenized_dataset = dataset.map(tokenize_function, batched=True, batch_size=32) +tokenized_dataset = dataset.map(tokenize_function, batched=True, batch_size=8) # Inicjalizacja modelu config = AutoModelForCausalLM.from_pretrained(model_name).config @@ -187,7 +187,6 @@ trainer = CustomTrainer( args=training_args, train_dataset=tokenized_dataset, data_collator=custom_collate_fn, # Użyj niestandardowego collate_fn - batch_size=8 # zmniejszenie rozmiaru batcha ) trainer.train()