mod gpt
This commit is contained in:
parent
0df49895cf
commit
746ce6bb8a
10
gpt.py
10
gpt.py
|
|
@ -16,10 +16,7 @@ def prepare_simple_dataset():
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
# Inicjalizacja tokenizera
|
# Inicjalizacja tokenizera
|
||||||
tokenizer = AutoTokenizer.from_pretrained(
|
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
||||||
MODEL_NAME,
|
|
||||||
mean_resizing=False
|
|
||||||
)
|
|
||||||
tokenizer.add_special_tokens({"additional_special_tokens": SPECIAL_TOKENS})
|
tokenizer.add_special_tokens({"additional_special_tokens": SPECIAL_TOKENS})
|
||||||
tokenizer.pad_token = tokenizer.eos_token
|
tokenizer.pad_token = tokenizer.eos_token
|
||||||
|
|
||||||
|
|
@ -42,7 +39,10 @@ def main():
|
||||||
tokenized_dataset = dataset.map(tokenize_function, batched=True)
|
tokenized_dataset = dataset.map(tokenize_function, batched=True)
|
||||||
|
|
||||||
# Model i data collator
|
# Model i data collator
|
||||||
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
|
model = AutoModelForCausalLM.from_pretrained(
|
||||||
|
MODEL_NAME,
|
||||||
|
mean_resizing=False
|
||||||
|
)
|
||||||
model.resize_token_embeddings(len(tokenizer))
|
model.resize_token_embeddings(len(tokenizer))
|
||||||
|
|
||||||
data_collator = DataCollatorForLanguageModeling(
|
data_collator = DataCollatorForLanguageModeling(
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue