From 746ce6bb8ab91b12fb3966099d6f9ce3a8b1cccf Mon Sep 17 00:00:00 2001 From: "l.gabrysiak" Date: Wed, 26 Feb 2025 00:30:01 +0100 Subject: [PATCH] mod gpt --- gpt.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/gpt.py b/gpt.py index 19f7358..99c2dda 100644 --- a/gpt.py +++ b/gpt.py @@ -16,10 +16,7 @@ def prepare_simple_dataset(): def main(): # Inicjalizacja tokenizera - tokenizer = AutoTokenizer.from_pretrained( - MODEL_NAME, - mean_resizing=False - ) + tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) tokenizer.add_special_tokens({"additional_special_tokens": SPECIAL_TOKENS}) tokenizer.pad_token = tokenizer.eos_token @@ -42,7 +39,10 @@ def main(): tokenized_dataset = dataset.map(tokenize_function, batched=True) # Model i data collator - model = AutoModelForCausalLM.from_pretrained(MODEL_NAME) + model = AutoModelForCausalLM.from_pretrained( + MODEL_NAME, + mean_resizing=False + ) model.resize_token_embeddings(len(tokenizer)) data_collator = DataCollatorForLanguageModeling(