mod allegro

2025-02-28 21:40:42 +01:00 · 2025-02-28 21:40:42 +01:00 · cd535b4fe3
parent 03faf77ee4
commit cd535b4fe3
2 changed files with 6 additions and 4 deletions
--- a/allegro.py
+++ b/allegro.py
@ -9,7 +9,7 @@ import numpy as np
 from sentence_transformers import SentenceTransformer
 from datasets import Dataset
 from peft import LoraConfig, get_peft_model
-from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer, DataCollatorForLanguageModeling
+from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer, DataCollatorForLanguageModeling, MarianForCausalLM, MarianTokenizer
 embed_model = SentenceTransformer("all-MiniLM-L6-v2")
@ -64,8 +64,8 @@ eval_dataset = split_dataset["test"]
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model_name = "allegro/multislav-5lang"
-model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16).to(device)
+model = MarianForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16).to(device)
-tokenizer = AutoTokenizer.from_pretrained(model_name)
+tokenizer = MarianForCausalLM.from_pretrained(model_name)
 lora_config = LoraConfig(
    r=8, lora_alpha=32, lora_dropout=0.1, bias="none", task_type="CAUSAL_LM"
--- a/requirements.txt
+++ b/requirements.txt
@ -12,3 +12,5 @@ peft
 weaviate-client
 sentence_transformers
 faiss-gpu
 sentencepiece
 sacremoses