diff --git a/allegro.py b/allegro.py index a7ff189..53b9036 100644 --- a/allegro.py +++ b/allegro.py @@ -1,7 +1,5 @@ -from transformers import MarianForCausalLM, MarianTokenizer, Trainer, TrainingArguments -from datasets import load_dataset +from transformers import MarianForCausalLM, MarianTokenizer, TrainingArguments -# Załaduj model i tokenizer model_name = "allegro/multislav-5lang" model = MarianForCausalLM.from_pretrained(model_name) tokenizer = MarianTokenizer.from_pretrained(model_name) @@ -9,39 +7,4 @@ tokenizer = MarianTokenizer.from_pretrained(model_name) model.save_pretrained("./models/ably") tokenizer.save_pretrained("./models/ably") -print("✅ Model został wytrenowany i zapisany!") - -# Załaduj dane (przykład dla tłumaczenia z języka rumuńskiego na angielski) -#dataset = load_dataset("wmt16", "ro-en") - -#def tokenize_function(examples): -# # Tokenizacja -# tokenized = tokenizer([example['en'] for example in examples['translation']], -# [example['ro'] for example in examples['translation']], -# truncation=True, padding='max_length', max_length=128) -# # Ustawienie labels -# tokenized['labels'] = tokenized['input_ids'].copy() -# return tokenized - -#tokenized_datasets = dataset.map(tokenize_function, batched=True) - -# Skonfiguruj trenera -training_args = TrainingArguments( - output_dir="./results", - evaluation_strategy="epoch", - learning_rate=5e-5, - per_device_train_batch_size=4, - per_device_eval_batch_size=4, - num_train_epochs=3, - weight_decay=0.01, -) - -#trainer = Trainer( -# model=model, -# args=training_args, -# train_dataset=tokenized_datasets["train"], -# eval_dataset=tokenized_datasets["test"], -#) - -# Trening modelu -#trainer.train() \ No newline at end of file +print("✅ Model został wytrenowany i zapisany!") \ No newline at end of file