From 33eff363bc7b4b5e9b21cbbf9cfdf839675b36ea Mon Sep 17 00:00:00 2001 From: "l.gabrysiak" Date: Fri, 28 Feb 2025 21:44:24 +0100 Subject: [PATCH] mod allegro --- allegro.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/allegro.py b/allegro.py index dfc74bb..a058b10 100644 --- a/allegro.py +++ b/allegro.py @@ -11,7 +11,10 @@ dataset = load_dataset("wmt16", "ro-en") # Przetwórz dane do formatu odpowiedniego dla modelu def tokenize_function(examples): - return tokenizer(examples['translation']['ro'], examples['translation']['en'], truncation=True, padding='max_length', max_length=128) + # Jeśli 'translation' to lista słowników, np. [{'en': 'text1', 'ro': 'text1_translated'}, ...] + return tokenizer([example['en'] for example in examples['translation']], + [example['ro'] for example in examples['translation']], + truncation=True, padding='max_length', max_length=128) tokenized_datasets = dataset.map(tokenize_function, batched=True)