diff --git a/hft.py b/hft.py index f3f9e2c..17c259f 100644 --- a/hft.py +++ b/hft.py @@ -1,7 +1,8 @@ import os import torch import torch.nn as nn -from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer +#from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer +from transformers import GPTNeoForCausalLM # Zmiana importu from datasets import Dataset from PIL import Image import re @@ -118,11 +119,11 @@ def custom_collate_fn(batch): return {"input_ids": input_ids, "attention_mask": attention_mask, "labels": labels, "source_idx": source_idx} -class CustomModel(AutoModelForCausalLM): +class CustomModel(GPTNeoForCausalLM): # Zmiana klasy bazowej def __init__(self, config): super().__init__(config) self.source_embedding = nn.Embedding( - num_embeddings=1000, # Maksymalna liczba unikalnych źródeł + num_embeddings=1000, embedding_dim=config.hidden_size, padding_idx=-1 ) @@ -136,7 +137,6 @@ class CustomModel(AutoModelForCausalLM): ) if source_idx is not None: - # Dodajemy embedding źródła do hidden states source_embeds = self.source_embedding(source_idx).unsqueeze(1) outputs.logits += source_embeds @@ -163,7 +163,9 @@ tokenized_dataset = dataset.map(tokenize_function, batched=True, batch_size=32) # Inicjalizacja modelu config = AutoModelForCausalLM.from_pretrained(model_name).config -model = CustomModel.from_pretrained(model_name, config=config) +#model = CustomModel.from_pretrained(model_name, config=config) +model = CustomModel.from_pretrained(model_name) +model.resize_token_embeddings(len(tokenizer)) model.gradient_checkpointing_enable() # Konfiguracja treningu