diff --git a/hft.py b/hft.py index 3e30477..195993f 100644 --- a/hft.py +++ b/hft.py @@ -12,6 +12,9 @@ import json from collections import defaultdict from huggingface_hub import login +import torch +torch.cuda.empty_cache() + login(token="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") os.environ["TOKENIZERS_PARALLELISM"] = "false" @@ -173,7 +176,8 @@ training_args = TrainingArguments( logging_steps=100, save_strategy="steps", save_steps=1000, - report_to="none" + report_to="none", + gradient_checkpointing=True ) # Trening