diff --git a/hft.py b/hft.py index 11f15b8..05014cd 100644 --- a/hft.py +++ b/hft.py @@ -15,7 +15,6 @@ from huggingface_hub import login login(token="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") os.environ["TOKENIZERS_PARALLELISM"] = "false" -# Nowa klasa do zarządzania źródłami class SourceMapper: def __init__(self): self.source_to_idx = defaultdict(lambda: len(self.source_to_idx)) @@ -119,7 +118,7 @@ class CustomModel(nn.Module): super().__init__() self.base_model = AutoModelForCausalLM.from_pretrained(model_name, config=config) self.source_embedding = nn.Embedding( - num_embeddings=1000, # Maksymalna liczba unikalnych źródeł + num_embeddings=1000, embedding_dim=config.hidden_size, padding_idx=-1 ) @@ -133,8 +132,9 @@ class CustomModel(nn.Module): ) if source_idx is not None: - # Dodaj embedding źródła do logits - source_embeds = self.source_embedding(source_idx).unsqueeze(1) + print("outputs.logits shape:", outputs.logits.shape) + source_embeds = self.source_embedding(source_idx).unsqueeze(1).expand(-1, outputs.logits.size(1), -1) + print("source_embeds shape:", source_embeds.shape) outputs.logits += source_embeds return outputs @@ -149,7 +149,7 @@ class CustomTrainer(Trainer): # Inicjalizacja komponentów source_mapper = SourceMapper() -model_name = "crumb/nano-mistral" #"google/gemma-2-2b" +model_name = "crumb/nano-mistral" tokenizer = AutoTokenizer.from_pretrained(model_name) tokenizer.pad_token = tokenizer.eos_token @@ -162,6 +162,7 @@ tokenized_dataset = dataset.map(tokenize_function, batched=True, batch_size=8) # Inicjalizacja modelu config = AutoModelForCausalLM.from_pretrained(model_name).config model = CustomModel(model_name, config) +model.to("cpu") # Konfiguracja treningu training_args = TrainingArguments( @@ -171,11 +172,10 @@ training_args = TrainingArguments( gradient_accumulation_steps=4, learning_rate=2e-5, fp16=True, - logging_steps=1, # Częstsze logowanie - logging_dir="./logs", # Katalog na logi + logging_steps=1, + logging_dir="./logs", save_strategy="steps", save_steps=1000, - #report_to="none" ) # Trening @@ -183,7 +183,7 @@ trainer = CustomTrainer( model=model, args=training_args, train_dataset=tokenized_dataset, - data_collator=custom_collate_fn, # Użyj niestandardowego collate_fn + data_collator=custom_collate_fn, ) trainer.train() @@ -211,4 +211,4 @@ def generate_answer(question, model, tokenizer, source_mapper, max_length=200): # Przykład użycia question = "Ile dni urlopu przysługuje pracownikowi?" answer = generate_answer(question, model, tokenizer, source_mapper) -print(answer) \ No newline at end of file +print(answer)