poprawka c.d.

This commit is contained in:
l.gabrysiak 2025-02-25 12:20:29 +01:00
parent 2edb4eda95
commit d116bcaec9
1 changed files with 2 additions and 1 deletions

3
hft.py
View File

@ -109,7 +109,8 @@ tokenizer = AutoTokenizer.from_pretrained(model_name)
model = CustomModel.from_pretrained(model_name)
# Przygotowanie datasetu
data = prepare_dataset("files", "file_catalog.json")
catalog_path = "file_catalog.json"
data = prepare_dataset("files", catalog_path)
dataset = load_dataset("dict", data=data)
tokenized_dataset = dataset.map(tokenize_function, batched=True, remove_columns=dataset["train"].column_names)