mod
This commit is contained in:
parent
eb1f2229f0
commit
4014b12ab4
3
hft.py
3
hft.py
|
|
@ -159,7 +159,7 @@ tokenizer.pad_token = tokenizer.eos_token
|
||||||
catalog_path = "file_catalog.json"
|
catalog_path = "file_catalog.json"
|
||||||
data = prepare_dataset("files", catalog_path, source_mapper)
|
data = prepare_dataset("files", catalog_path, source_mapper)
|
||||||
dataset = Dataset.from_list(data)
|
dataset = Dataset.from_list(data)
|
||||||
tokenized_dataset = dataset.map(tokenize_function, batched=True, batch_size=32)
|
tokenized_dataset = dataset.map(tokenize_function, batched=True, batch_size=8)
|
||||||
|
|
||||||
# Inicjalizacja modelu
|
# Inicjalizacja modelu
|
||||||
config = AutoModelForCausalLM.from_pretrained(model_name).config
|
config = AutoModelForCausalLM.from_pretrained(model_name).config
|
||||||
|
|
@ -187,7 +187,6 @@ trainer = CustomTrainer(
|
||||||
args=training_args,
|
args=training_args,
|
||||||
train_dataset=tokenized_dataset,
|
train_dataset=tokenized_dataset,
|
||||||
data_collator=custom_collate_fn, # Użyj niestandardowego collate_fn
|
data_collator=custom_collate_fn, # Użyj niestandardowego collate_fn
|
||||||
batch_size=8 # zmniejszenie rozmiaru batcha
|
|
||||||
)
|
)
|
||||||
trainer.train()
|
trainer.train()
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue