mod gemma

2025-02-26 13:53:34 +01:00 · 2025-02-26 13:53:34 +01:00 · b0525e303d
parent 1241d01180
commit b0525e303d
1 changed files with 17 additions and 6 deletions
--- a/gemma.py
+++ b/gemma.py
@ -13,12 +13,23 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments,
 embed_model = SentenceTransformer("all-MiniLM-L6-v2")
 # 2️⃣ Dodanie dokumentów i embeddingów
-documents = [
+def read_documents_from_file(file_path):
-    "Jak założyć firmę w Polsce?", 
+    with open(file_path, 'r', encoding='utf-8') as file:
-    "Jak rozliczyć podatek VAT?", 
+        content = file.read()
-    "Procedura składania reklamacji w e-sklepie.",
+        articles = content.split('\n\n')
-    "Jakie dokumenty są potrzebne do rejestracji działalności?"
+        documents = []
-]
+        for article in articles:
            if article.strip().startswith('Art.'):
                documents.append(article.strip())
    return documents
 #documents = [
 #    "Jak założyć firmę w Polsce?", 
 #    "Jak rozliczyć podatek VAT?", 
 #    "Procedura składania reklamacji w e-sklepie.",
 #    "Jakie dokumenty są potrzebne do rejestracji działalności?"
 #]
 file_path = './docs/kodekspracy.txt'  # Zmień na właściwą ścieżkę
 documents = read_documents_from_file(file_path)
 embeddings = embed_model.encode(documents)
 # 3️⃣ Inicjalizacja FAISS i dodanie wektorów