diff --git a/allegro.py b/allegro.py index 7a1582f..a7ff189 100644 --- a/allegro.py +++ b/allegro.py @@ -6,19 +6,24 @@ model_name = "allegro/multislav-5lang" model = MarianForCausalLM.from_pretrained(model_name) tokenizer = MarianTokenizer.from_pretrained(model_name) +model.save_pretrained("./models/ably") +tokenizer.save_pretrained("./models/ably") + +print("✅ Model został wytrenowany i zapisany!") + # Załaduj dane (przykład dla tłumaczenia z języka rumuńskiego na angielski) -dataset = load_dataset("wmt16", "ro-en") +#dataset = load_dataset("wmt16", "ro-en") -def tokenize_function(examples): - # Tokenizacja - tokenized = tokenizer([example['en'] for example in examples['translation']], - [example['ro'] for example in examples['translation']], - truncation=True, padding='max_length', max_length=128) - # Ustawienie labels - tokenized['labels'] = tokenized['input_ids'].copy() - return tokenized +#def tokenize_function(examples): +# # Tokenizacja +# tokenized = tokenizer([example['en'] for example in examples['translation']], +# [example['ro'] for example in examples['translation']], +# truncation=True, padding='max_length', max_length=128) +# # Ustawienie labels +# tokenized['labels'] = tokenized['input_ids'].copy() +# return tokenized -tokenized_datasets = dataset.map(tokenize_function, batched=True) +#tokenized_datasets = dataset.map(tokenize_function, batched=True) # Skonfiguruj trenera training_args = TrainingArguments( @@ -31,12 +36,12 @@ training_args = TrainingArguments( weight_decay=0.01, ) -trainer = Trainer( - model=model, - args=training_args, - train_dataset=tokenized_datasets["train"], - eval_dataset=tokenized_datasets["test"], -) +#trainer = Trainer( +# model=model, +# args=training_args, +# train_dataset=tokenized_datasets["train"], +# eval_dataset=tokenized_datasets["test"], +#) # Trening modelu -trainer.train() \ No newline at end of file +#trainer.train() \ No newline at end of file