diff --git a/hft.py b/hft.py index eccec6c..8909b4b 100644 --- a/hft.py +++ b/hft.py @@ -159,7 +159,7 @@ tokenizer.pad_token = tokenizer.eos_token catalog_path = "file_catalog.json" data = prepare_dataset("files", catalog_path, source_mapper) dataset = Dataset.from_list(data) -tokenized_dataset = dataset.map(tokenize_function, batched=True, batch_size=32) +tokenized_dataset = dataset.map(tokenize_function, batched=True, batch_size=8) # Inicjalizacja modelu config = AutoModelForCausalLM.from_pretrained(model_name).config @@ -187,7 +187,6 @@ trainer = CustomTrainer( args=training_args, train_dataset=tokenized_dataset, data_collator=custom_collate_fn, # Użyj niestandardowego collate_fn - batch_size=8 # zmniejszenie rozmiaru batcha ) trainer.train()