mod gemma

2025-02-26 13:53:34 +01:00 · 2025-02-26 13:53:34 +01:00 · b0525e303d
parent 1241d01180
commit b0525e303d
1 changed files with 17 additions and 6 deletions
--- a/gemma.py
+++ b/gemma.py
@ -13,12 +13,23 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments,
 embed_model = SentenceTransformer("all-MiniLM-L6-v2")

 # 2️⃣ Dodanie dokumentów i embeddingów
-documents = [
-    "Jak założyć firmę w Polsce?", 
-    "Jak rozliczyć podatek VAT?", 
-    "Procedura składania reklamacji w e-sklepie.",
-    "Jakie dokumenty są potrzebne do rejestracji działalności?"
-]
+def read_documents_from_file(file_path):
+    with open(file_path, 'r', encoding='utf-8') as file:
+        content = file.read()
+        articles = content.split('\n\n')
+        documents = []
+        for article in articles:
+            if article.strip().startswith('Art.'):
+                documents.append(article.strip())
+    return documents
+#documents = [
+#    "Jak założyć firmę w Polsce?", 
+#    "Jak rozliczyć podatek VAT?", 
+#    "Procedura składania reklamacji w e-sklepie.",
+#    "Jakie dokumenty są potrzebne do rejestracji działalności?"
+#]
+file_path = './docs/kodekspracy.txt'  # Zmień na właściwą ścieżkę
+documents = read_documents_from_file(file_path)
 embeddings = embed_model.encode(documents)

 # 3️⃣ Inicjalizacja FAISS i dodanie wektorów