ably.do/ollama_service.py

315 lines
11 KiB
Python
Raw Normal View History

2025-02-27 14:31:55 -05:00
from fastapi import FastAPI, HTTPException, Request
from fastapi.responses import StreamingResponse
2025-02-27 09:58:30 -05:00
from pydantic import BaseModel
import ollama
import weaviate
from weaviate.connect import ConnectionParams
from weaviate.collections.classes.filters import Filter
import re
2025-02-27 14:31:55 -05:00
import json
2025-02-27 09:58:30 -05:00
import uvicorn
2025-02-27 14:31:55 -05:00
import httpx
from typing import List, Optional
import asyncio
2025-02-28 05:44:28 -05:00
import os
2025-02-28 12:12:08 -05:00
from elasticsearch import Elasticsearch
from datetime import datetime
2025-02-27 09:58:30 -05:00
app = FastAPI()
OLLAMA_BASE_URL = "http://ollama:11434"
2025-02-28 12:12:08 -05:00
ES_BASE_URL = "http://elastic:9200"
2025-02-27 09:58:30 -05:00
WEAVIATE_URL = "http://weaviate:8080"
2025-02-28 05:44:28 -05:00
PROMPT_DIR_PATCH = "./prompts"
2025-02-27 09:58:30 -05:00
# Inicjalizacja klientów
ollama_client = ollama.Client(host=OLLAMA_BASE_URL)
2025-02-28 12:12:08 -05:00
es = Elasticsearch(ES_BASE_URL)
2025-02-27 09:58:30 -05:00
weaviate_client = weaviate.WeaviateClient(
connection_params=ConnectionParams.from_params(
http_host="weaviate",
http_port=8080,
http_secure=False,
grpc_host="weaviate",
grpc_port=50051,
grpc_secure=False,
)
)
weaviate_client.connect()
collection = weaviate_client.collections.get("Document")
2025-02-28 05:44:28 -05:00
files_content = None
2025-02-27 14:31:55 -05:00
class Message(BaseModel):
role: str
content: str
class ChatRequest(BaseModel):
model: str
messages: List[Message]
stream: Optional[bool] = False
options: Optional[dict] = None
class ChatResponse(BaseModel):
model: str
created_at: str
message: Message
done: bool
total_duration: int
load_duration: int
prompt_eval_count: int
prompt_eval_duration: int
eval_count: int
eval_duration: int
2025-02-28 05:44:28 -05:00
def read_text_files_from_directory(directory_path):
files_dict = {}
# Iterowanie przez wszystkie pliki w katalogu
for filename in os.listdir(directory_path):
# Sprawdzanie, czy plik ma rozszerzenie .txt
if filename.endswith('.txt'):
file_path = os.path.join(directory_path, filename)
try:
# Odczytywanie zawartości pliku
with open(file_path, 'r', encoding='utf-8') as file:
content = file.read()
# Dodawanie do słownika z nazwą pliku bez rozszerzenia jako kluczem
file_name_without_extension = os.path.splitext(filename)[0]
files_dict[file_name_without_extension] = content
except Exception as e:
print(f"Błąd przy odczycie pliku {filename}: {e}")
return files_dict
2025-02-27 09:58:30 -05:00
2025-02-28 05:44:28 -05:00
def analyze_query(content):
2025-02-27 09:58:30 -05:00
analysis = ollama_client.chat(
model="gemma2:2b",
2025-02-28 05:44:28 -05:00
messages=[{"role": "user", "content": content}]
2025-02-27 09:58:30 -05:00
)
keywords = [word.strip() for word in analysis['message']['content'].split(',') if word.strip()]
print("Słowa kluczowe:", keywords)
return keywords
2025-02-27 14:31:55 -05:00
def extract_full_article(content, article_number):
pattern = rf"Art\.\s*{article_number}\..*?(?=Art\.\s*\d+\.|\Z)"
match = re.search(pattern, content, re.DOTALL)
2025-02-27 09:58:30 -05:00
if match:
return match.group(0).strip()
2025-02-27 14:31:55 -05:00
return None
def extract_relevant_fragment(content, query, context_size=100):
article_match = re.match(r"Art\.\s*(\d+)", query)
if article_match:
article_number = article_match.group(1)
full_article = extract_full_article(content, article_number)
if full_article:
return full_article
2025-02-27 09:58:30 -05:00
index = content.lower().find(query.lower())
if index != -1:
start = max(0, index - context_size)
end = min(len(content), index + len(query) + context_size)
return f"...{content[start:end]}..."
2025-02-27 14:31:55 -05:00
return content[:200] + "..."
2025-02-27 09:58:30 -05:00
2025-02-28 05:44:28 -05:00
def hybrid_search(keywords, limit=100, alpha=0.5):
2025-02-27 09:58:30 -05:00
if isinstance(keywords, str):
keywords = [keywords]
2025-02-27 14:31:55 -05:00
query = " ".join(keywords)
2025-02-28 12:12:08 -05:00
#print(f"\nWyszukiwanie hybrydowe dla słowa kluczowego: '{query}'")
2025-02-27 14:31:55 -05:00
response = collection.query.hybrid(
query=query,
alpha=alpha,
limit=limit * 2
)
results = []
for obj in response.objects:
#print(f"UUID: {obj.uuid}")
relevant_fragment = extract_relevant_fragment(obj.properties['content'], query)
#print(f"Relewantny fragment:\n{relevant_fragment}")
#print(f"Nazwa pliku: {obj.properties['fileName']}")
#print("---")
# Zmieniamy warunek na 'any' zamiast 'all'
#if any(term.lower() in relevant_fragment.lower() for term in keywords):
results.append({
"uuid": obj.uuid,
"relevant_fragment": relevant_fragment,
"file_name": obj.properties['fileName'],
"keyword": query
})
2025-02-28 12:12:08 -05:00
#print(f"Dodano do wyników: {obj.uuid}")
2025-02-27 09:58:30 -05:00
2025-02-27 14:31:55 -05:00
if len(results) >= limit:
2025-02-27 09:58:30 -05:00
break
2025-02-27 14:31:55 -05:00
return results[:limit]
@app.get("/api/tags")
async def tags_proxy():
async with httpx.AsyncClient() as client:
response = await client.get(f"{OLLAMA_BASE_URL}/api/tags")
return response.json()
@app.get("/api/version")
async def tags_proxy():
async with httpx.AsyncClient() as client:
response = await client.get(f"{OLLAMA_BASE_URL}/api/version")
return response.json()
@app.post("/api/generate")
async def generate_proxy(request: Request):
data = await request.json()
async with httpx.AsyncClient() as client:
response = await client.post(f"{OLLAMA_BASE_URL}/api/generate", json=data)
return response.json()
@app.get("/api/models")
async def list_models():
try:
models = ollama_client.list()
return {"models": [model['name'] for model in models['models']]}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
2025-02-27 09:58:30 -05:00
2025-02-27 14:31:55 -05:00
async def stream_chat(model, messages, options):
try:
# Użycie httpx do asynchronicznego pobrania danych od Ollamy
async with httpx.AsyncClient() as client:
async with client.stream(
"POST",
f"{OLLAMA_BASE_URL}/api/chat",
json={"model": model, "messages": messages, "stream": True, "options": options},
) as response:
async for line in response.aiter_lines():
yield line + "\n"
except Exception as e:
yield json.dumps({"error": str(e)}) + "\n"
2025-02-27 09:58:30 -05:00
2025-02-28 12:12:08 -05:00
def save_to_elasticsearch(index, request_data, response_data, search_data=None):
try:
def message_to_dict(message):
if isinstance(message, dict):
return {
"role": message.get("role"),
"content": message.get("content")
}
return {
"role": getattr(message, "role", None),
"content": getattr(message, "content", None)
}
if isinstance(request_data.get("messages"), list):
request_data["messages"] = [message_to_dict(msg) for msg in request_data["messages"]]
if "message" in response_data:
response_data["message"] = message_to_dict(response_data["message"])
if "timestamp" in response_data:
response_data["timestamp"] = response_data["timestamp"].isoformat()
document = {
"request": request_data,
"response": response_data,
"timestamp": datetime.utcnow().isoformat(),
}
if search_data:
document["vector_search"] = {
"keywords": search_data.get("keywords", []),
"results": search_data.get("results", [])
}
json_document = json.dumps(document, default=str)
index_name = index
response = es.index(index=index_name, body=json_document)
#print(response)
except Exception as e:
print(f"Error saving to Elasticsearch: {e}")
2025-02-27 14:31:55 -05:00
@app.post("/api/chat")
2025-02-27 09:58:30 -05:00
async def chat_endpoint(request: ChatRequest):
try:
2025-02-28 05:44:28 -05:00
files_content = read_text_files_from_directory(PROMPT_DIR_PATCH)
if files_content is None:
raise KeyError(f"Nie wczytano promptów!!!")
prompt_seach = files_content.get("prompt_seach")
if prompt_seach is None:
raise KeyError(f"Nie znaleziono promptu o nazwie '{prompt_seach}'.")
prompt_system = files_content.get("prompt_system")
if prompt_system is None:
raise KeyError(f"Nie znaleziono promptu o nazwie '{prompt_system}'.")
prompt_answer = files_content.get("prompt_answer")
if prompt_answer is None:
raise KeyError(f"Nie znaleziono promptu o nazwie '{prompt_answer}'.")
prompt_data = files_content.get("prompt_data")
if prompt_data is None:
raise KeyError(f"Nie znaleziono promptu o nazwie '{prompt_data}'.")
2025-02-27 14:31:55 -05:00
query = request.messages[-1].content if request.messages else ""
2025-02-28 05:44:28 -05:00
keywords = analyze_query(prompt_seach.format(query=query))
2025-02-27 09:58:30 -05:00
weaviate_results = hybrid_search(keywords)
2025-02-28 12:12:08 -05:00
prompt_data += "\n".join([f"Źródło: {doc['file_name']}\n{doc['relevant_fragment']}\n\n" for doc in weaviate_results])
2025-02-27 14:31:55 -05:00
messages_with_context =[
2025-02-28 05:44:28 -05:00
{"role": "system", "content": prompt_system},
{"role": "system", "content": prompt_data},
{"role": "user", "content": prompt_answer.format(query=query)}
]
2025-02-27 09:58:30 -05:00
2025-02-27 14:31:55 -05:00
if request.stream:
return StreamingResponse(stream_chat(request.model, messages_with_context, request.options), media_type="application/json")
ollama_response = ollama_client.chat(
model=request.model,
messages=messages_with_context,
stream=False,
options=request.options
)
2025-02-28 12:12:08 -05:00
request_data = {
"query": query,
"messages": request.messages,
"options": request.options
}
response_data = {
"model": request.model,
"created_at": ollama_response.get('created_at', ''),
"message": ollama_response['message'],
"done": ollama_response.get('done', True),
"total_duration": ollama_response.get('total_duration', 0),
"load_duration": ollama_response.get('load_duration', 0),
"prompt_eval_count": ollama_response.get('prompt_eval_count', 0),
"prompt_eval_duration": ollama_response.get('prompt_eval_duration', 0),
"eval_count": ollama_response.get('eval_count', 0),
"eval_duration": ollama_response.get('eval_duration', 0)
}
save_to_elasticsearch("ably.do", request_data, response_data, {"keywords": keywords, "results": weaviate_results })
2025-02-27 09:58:30 -05:00
return ChatResponse(
2025-02-27 14:31:55 -05:00
model=request.model,
created_at=ollama_response.get('created_at', ''),
message=Message(
role=ollama_response['message']['role'],
content=ollama_response['message']['content']
),
done=ollama_response.get('done', True),
total_duration=ollama_response.get('total_duration', 0),
load_duration=ollama_response.get('load_duration', 0),
prompt_eval_count=ollama_response.get('prompt_eval_count', 0),
prompt_eval_duration=ollama_response.get('prompt_eval_duration', 0),
eval_count=ollama_response.get('eval_count', 0),
eval_duration=ollama_response.get('eval_duration', 0)
2025-02-27 09:58:30 -05:00
)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
if __name__ == "__main__":
2025-02-27 14:31:55 -05:00
uvicorn.run(app, host="0.0.0.0", port=8000)