This commit is contained in:
l.gabrysiak 2025-02-26 00:30:01 +01:00
parent 0df49895cf
commit 746ce6bb8a
1 changed files with 5 additions and 5 deletions

10
gpt.py
View File

@ -16,10 +16,7 @@ def prepare_simple_dataset():
def main(): def main():
# Inicjalizacja tokenizera # Inicjalizacja tokenizera
tokenizer = AutoTokenizer.from_pretrained( tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
MODEL_NAME,
mean_resizing=False
)
tokenizer.add_special_tokens({"additional_special_tokens": SPECIAL_TOKENS}) tokenizer.add_special_tokens({"additional_special_tokens": SPECIAL_TOKENS})
tokenizer.pad_token = tokenizer.eos_token tokenizer.pad_token = tokenizer.eos_token
@ -42,7 +39,10 @@ def main():
tokenized_dataset = dataset.map(tokenize_function, batched=True) tokenized_dataset = dataset.map(tokenize_function, batched=True)
# Model i data collator # Model i data collator
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME) model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
mean_resizing=False
)
model.resize_token_embeddings(len(tokenizer)) model.resize_token_embeddings(len(tokenizer))
data_collator = DataCollatorForLanguageModeling( data_collator = DataCollatorForLanguageModeling(