This commit is contained in:
l.gabrysiak 2025-02-25 15:22:15 +01:00
parent eb1f2229f0
commit 4014b12ab4
1 changed files with 1 additions and 2 deletions

3
hft.py
View File

@ -159,7 +159,7 @@ tokenizer.pad_token = tokenizer.eos_token
catalog_path = "file_catalog.json" catalog_path = "file_catalog.json"
data = prepare_dataset("files", catalog_path, source_mapper) data = prepare_dataset("files", catalog_path, source_mapper)
dataset = Dataset.from_list(data) dataset = Dataset.from_list(data)
tokenized_dataset = dataset.map(tokenize_function, batched=True, batch_size=32) tokenized_dataset = dataset.map(tokenize_function, batched=True, batch_size=8)
# Inicjalizacja modelu # Inicjalizacja modelu
config = AutoModelForCausalLM.from_pretrained(model_name).config config = AutoModelForCausalLM.from_pretrained(model_name).config
@ -187,7 +187,6 @@ trainer = CustomTrainer(
args=training_args, args=training_args,
train_dataset=tokenized_dataset, train_dataset=tokenized_dataset,
data_collator=custom_collate_fn, # Użyj niestandardowego collate_fn data_collator=custom_collate_fn, # Użyj niestandardowego collate_fn
batch_size=8 # zmniejszenie rozmiaru batcha
) )
trainer.train() trainer.train()