This commit is contained in:
l.gabrysiak 2025-02-25 22:08:18 +01:00
parent 1b4449f619
commit d073a1733b
1 changed files with 2 additions and 3 deletions

5
hft.py
View File

@ -221,7 +221,7 @@ def main():
"input_ids": tokenized["input_ids"].squeeze(),
"attention_mask": tokenized["attention_mask"].squeeze(),
"labels": tokenized["input_ids"].squeeze().clone(),
"source_idx": torch.tensor(examples["source_idx"], dtype=torch.long)
"source_idx": examples["source_idx"] # Dodano bez konwersji do tensora
}
tokenized_dataset = dataset.map(tokenize_function, batched=True, batch_size=16)
@ -249,8 +249,7 @@ def main():
model=model,
args=training_args,
train_dataset=tokenized_dataset,
data_collator=CustomDataCollator(tokenizer=tokenizer, mlm=False),
tokenizer=tokenizer
data_collator=CustomDataCollator(tokenizer=tokenizer, mlm=False)
)
print("\nRozpoczęcie treningu...")