diff --git a/hft.py b/hft.py index 7f4292f..7c184d5 100644 --- a/hft.py +++ b/hft.py @@ -207,7 +207,9 @@ def main(): print("\nBrak danych do treningu!") return - dataset = Dataset.from_list(data) + #dataset = Dataset.from_list(data) + dataset = Dataset.from_dict({k: [d[k] for d in data] for k in data[0]}) + def tokenize_function(examples): tokenized = tokenizer(