diff --git a/hft.py b/hft.py index 9827899..3e00a5c 100644 --- a/hft.py +++ b/hft.py @@ -8,6 +8,7 @@ import re import pytesseract import docx2txt import PyPDF2 +from huggingface_hub import login def load_file_catalog(catalog_path): with open(catalog_path, 'r', encoding='utf-8') as file: @@ -105,6 +106,7 @@ tokenizer = AutoTokenizer.from_pretrained(model_name) model = CustomModel.from_pretrained(model_name) # Przygotowanie datasetu +login(token="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") catalog_path = "file_catalog.json" data = prepare_dataset("files") dataset = load_dataset("dict", data=data)