diff --git a/hft.py b/hft.py
index 3e30477..195993f 100644
--- a/hft.py
+++ b/hft.py
@@ -12,6 +12,9 @@ import json
 from collections import defaultdict
 from huggingface_hub import login
 
+import torch
+torch.cuda.empty_cache()
+
 login(token="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")
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
 
@@ -173,7 +176,8 @@ training_args = TrainingArguments(
     logging_steps=100,
     save_strategy="steps",
     save_steps=1000,
-    report_to="none"
+    report_to="none",
+    gradient_checkpointing=True
 )
 
 # Trening