diff --git a/allegro.py b/allegro.py index 92f20d3..dfc74bb 100644 --- a/allegro.py +++ b/allegro.py @@ -11,7 +11,7 @@ dataset = load_dataset("wmt16", "ro-en") # Przetwórz dane do formatu odpowiedniego dla modelu def tokenize_function(examples): - return tokenizer(examples['translation'], truncation=True, padding='max_length', max_length=128) + return tokenizer(examples['translation']['ro'], examples['translation']['en'], truncation=True, padding='max_length', max_length=128) tokenized_datasets = dataset.map(tokenize_function, batched=True)