poprawa prepare_dataset

This commit is contained in:
l.gabrysiak 2025-02-25 12:19:36 +01:00
parent fee23b6e5d
commit 2edb4eda95
1 changed files with 1 additions and 2 deletions

3
hft.py
View File

@ -109,8 +109,7 @@ tokenizer = AutoTokenizer.from_pretrained(model_name)
model = CustomModel.from_pretrained(model_name)
# Przygotowanie datasetu
catalog_path = "file_catalog.json"
data = prepare_dataset("files")
data = prepare_dataset("files", "file_catalog.json")
dataset = load_dataset("dict", data=data)
tokenized_dataset = dataset.map(tokenize_function, batched=True, remove_columns=dataset["train"].column_names)