mod
This commit is contained in:
parent
1b4449f619
commit
d073a1733b
5
hft.py
5
hft.py
|
|
@ -221,7 +221,7 @@ def main():
|
||||||
"input_ids": tokenized["input_ids"].squeeze(),
|
"input_ids": tokenized["input_ids"].squeeze(),
|
||||||
"attention_mask": tokenized["attention_mask"].squeeze(),
|
"attention_mask": tokenized["attention_mask"].squeeze(),
|
||||||
"labels": tokenized["input_ids"].squeeze().clone(),
|
"labels": tokenized["input_ids"].squeeze().clone(),
|
||||||
"source_idx": torch.tensor(examples["source_idx"], dtype=torch.long)
|
"source_idx": examples["source_idx"] # Dodano bez konwersji do tensora
|
||||||
}
|
}
|
||||||
|
|
||||||
tokenized_dataset = dataset.map(tokenize_function, batched=True, batch_size=16)
|
tokenized_dataset = dataset.map(tokenize_function, batched=True, batch_size=16)
|
||||||
|
|
@ -249,8 +249,7 @@ def main():
|
||||||
model=model,
|
model=model,
|
||||||
args=training_args,
|
args=training_args,
|
||||||
train_dataset=tokenized_dataset,
|
train_dataset=tokenized_dataset,
|
||||||
data_collator=CustomDataCollator(tokenizer=tokenizer, mlm=False),
|
data_collator=CustomDataCollator(tokenizer=tokenizer, mlm=False)
|
||||||
tokenizer=tokenizer
|
|
||||||
)
|
)
|
||||||
|
|
||||||
print("\nRozpoczęcie treningu...")
|
print("\nRozpoczęcie treningu...")
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue