poprawa prepare_dataset
This commit is contained in:
parent
fee23b6e5d
commit
2edb4eda95
3
hft.py
3
hft.py
|
|
@ -109,8 +109,7 @@ tokenizer = AutoTokenizer.from_pretrained(model_name)
|
|||
model = CustomModel.from_pretrained(model_name)
|
||||
|
||||
# Przygotowanie datasetu
|
||||
catalog_path = "file_catalog.json"
|
||||
data = prepare_dataset("files")
|
||||
data = prepare_dataset("files", "file_catalog.json")
|
||||
dataset = load_dataset("dict", data=data)
|
||||
tokenized_dataset = dataset.map(tokenize_function, batched=True, remove_columns=dataset["train"].column_names)
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue