mod allegro
This commit is contained in:
parent
447de65d83
commit
33eff363bc
|
|
@ -11,7 +11,10 @@ dataset = load_dataset("wmt16", "ro-en")
|
|||
|
||||
# Przetwórz dane do formatu odpowiedniego dla modelu
|
||||
def tokenize_function(examples):
|
||||
return tokenizer(examples['translation']['ro'], examples['translation']['en'], truncation=True, padding='max_length', max_length=128)
|
||||
# Jeśli 'translation' to lista słowników, np. [{'en': 'text1', 'ro': 'text1_translated'}, ...]
|
||||
return tokenizer([example['en'] for example in examples['translation']],
|
||||
[example['ro'] for example in examples['translation']],
|
||||
truncation=True, padding='max_length', max_length=128)
|
||||
|
||||
tokenized_datasets = dataset.map(tokenize_function, batched=True)
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue