diff --git a/hft.py b/hft.py index 5679524..7f4292f 100644 --- a/hft.py +++ b/hft.py @@ -221,7 +221,7 @@ def main(): "input_ids": tokenized["input_ids"].squeeze(), "attention_mask": tokenized["attention_mask"].squeeze(), "labels": tokenized["input_ids"].squeeze().clone(), - "source_idx": torch.tensor(examples["source_idx"], dtype=torch.long) + "source_idx": examples["source_idx"] # Dodano bez konwersji do tensora } tokenized_dataset = dataset.map(tokenize_function, batched=True, batch_size=16) @@ -249,8 +249,7 @@ def main(): model=model, args=training_args, train_dataset=tokenized_dataset, - data_collator=CustomDataCollator(tokenizer=tokenizer, mlm=False), - tokenizer=tokenizer + data_collator=CustomDataCollator(tokenizer=tokenizer, mlm=False) ) print("\nRozpoczęcie treningu...")