This commit is contained in:
l.gabrysiak 2025-02-26 00:32:41 +01:00
parent 746ce6bb8a
commit ffe1bf5eab
1 changed files with 2 additions and 5 deletions

7
gpt.py
View File

@ -39,11 +39,8 @@ def main():
tokenized_dataset = dataset.map(tokenize_function, batched=True)
# Model i data collator
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
mean_resizing=False
)
model.resize_token_embeddings(len(tokenizer))
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
model.resize_token_embeddings(len(tokenizer), mean_resizing=False)
data_collator = DataCollatorForLanguageModeling(
tokenizer=tokenizer,