import weaviate from weaviate.connect import ConnectionParams import re # Konfiguracja klienta Weaviate client = weaviate.WeaviateClient( connection_params=ConnectionParams.from_params( http_host="weaviate", http_port=8080, http_secure=False, grpc_host="weaviate", grpc_port=50051, grpc_secure=False, ) ) client.connect() # Pobierz kolekcję collection = client.collections.get("Document") def extract_full_article(content, article_number): pattern = rf"Art\.\s*{article_number}\..*?(?=Art\.\s*\d+\.|\Z)" match = re.search(pattern, content, re.DOTALL) if match: return match.group(0).strip() return None def extract_relevant_fragment(content, query, context_size=100): article_match = re.match(r"Art\.\s*(\d+)", query) if article_match: article_number = article_match.group(1) full_article = extract_full_article(content, article_number) if full_article: return full_article index = content.lower().find(query.lower()) if index != -1: start = max(0, index - context_size) end = min(len(content), index + len(query) + context_size) return f"...{content[start:end]}..." return content[:200] + "..." def vector_search(query, limit=5): print(f"\nWyszukiwanie wektorowe dla zapytania: '{query}'") response = collection.query.near_text( query=query, limit=limit ) for obj in response.objects: print(f"UUID: {obj.uuid}") relevant_fragment = extract_relevant_fragment(obj.properties['content'], query) print(f"Relewantny fragment:\n{relevant_fragment}") print(f"Nazwa pliku: {obj.properties['fileName']}") print("---") def hybrid_search(query, limit=5, alpha=0.5): print(f"\nWyszukiwanie hybrydowe dla zapytania: '{query}'") response = collection.query.hybrid( query=query, alpha=alpha, limit=limit ) for obj in response.objects: print(f"UUID: {obj.uuid}") relevant_fragment = extract_relevant_fragment(obj.properties['content'], query) print(f"Relewantny fragment:\n{relevant_fragment}") print(f"Nazwa pliku: {obj.properties['fileName']}") print("---") #exists = client.collections.exists("Document") #print(f"Czy kolekcja 'Document' istnieje: {exists}") #schema = collection.config.get() #print(f"Nazwa kolekcji: {schema.name}") #print("Właściwości:") #for prop in schema.properties: # print(f"- {prop.name}: {prop.data_type}") #collection = client.collections.get("Document") #count = collection.aggregate.over_all(total_count=True).total_count #print(f"Liczba obiektów w kolekcji: {count}") #results = collection.query.fetch_objects(limit=5) #for obj in results.objects: # print(f"UUID: {obj.uuid}") # print(f"Nazwa pliku: {obj.properties['fileName']}") # print(f"Zawartość: {obj.properties['content'][:100]}...") # Pierwsze 100 znaków # print("---") # Przykładowe użycie queries = ["Art. 154", "urlop wypoczynkowy", "Państwowa Inspekcja Pracy", "Art. 154 Kodeks pracy"] for query in queries: vector_search(query) hybrid_search(query) # Zamknij połączenie client.close()