Update app.py
Browse files
app.py
CHANGED
|
@@ -6,6 +6,10 @@ from duckduckgo_search import DDGS
|
|
| 6 |
from typing import List, Dict
|
| 7 |
from pydantic import BaseModel
|
| 8 |
from huggingface_hub import InferenceClient
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
# Set up basic configuration for logging
|
| 11 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
|
@@ -36,23 +40,70 @@ class ConversationManager:
|
|
| 36 |
|
| 37 |
conversation_manager = ConversationManager()
|
| 38 |
|
| 39 |
-
|
|
|
|
|
|
|
| 40 |
with DDGS() as ddgs:
|
| 41 |
-
results = ddgs.text(query, max_results=
|
| 42 |
-
|
| 43 |
|
| 44 |
-
def get_web_search_results(query: str,
|
| 45 |
try:
|
| 46 |
-
|
| 47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
else:
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
except Exception as e:
|
| 54 |
-
|
| 55 |
-
|
| 56 |
|
| 57 |
def rephrase_query(original_query: str, conversation_manager: ConversationManager) -> str:
|
| 58 |
context = conversation_manager.get_context()
|
|
|
|
| 6 |
from typing import List, Dict
|
| 7 |
from pydantic import BaseModel
|
| 8 |
from huggingface_hub import InferenceClient
|
| 9 |
+
from typing import List, Dict
|
| 10 |
+
from sentence_transformers import SentenceTransformer
|
| 11 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
| 12 |
+
import numpy as np
|
| 13 |
|
| 14 |
# Set up basic configuration for logging
|
| 15 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
|
|
|
| 40 |
|
| 41 |
conversation_manager = ConversationManager()
|
| 42 |
|
| 43 |
+
huggingface_token = os.environ.get("HUGGINGFACE_TOKEN")
|
| 44 |
+
|
| 45 |
+
def duckduckgo_search(query: str, max_results: int = 10) -> List[Dict[str, str]]:
|
| 46 |
with DDGS() as ddgs:
|
| 47 |
+
results = list(ddgs.text(query, max_results=max_results))
|
| 48 |
+
return results
|
| 49 |
|
| 50 |
+
def get_web_search_results(query: str, model: str, num_calls: int = 3, temperature: float = 0.2, max_results: int = 10) -> Dict[str, any]:
|
| 51 |
try:
|
| 52 |
+
# Perform web search
|
| 53 |
+
search_results = duckduckgo_search(query, max_results)
|
| 54 |
+
|
| 55 |
+
if not search_results:
|
| 56 |
+
return {"error": f"No results found for query: {query}"}
|
| 57 |
+
|
| 58 |
+
# Create embeddings for search results
|
| 59 |
+
embedder = SentenceTransformer('all-MiniLM-L6-v2')
|
| 60 |
+
web_search_vectors = embedder.encode([result['body'] for result in search_results])
|
| 61 |
+
|
| 62 |
+
# Retrieve relevant documents
|
| 63 |
+
query_vector = embedder.encode([query])
|
| 64 |
+
similarities = cosine_similarity(query_vector, web_search_vectors)[0]
|
| 65 |
+
top_indices = np.argsort(similarities)[-5:][::-1]
|
| 66 |
+
relevant_docs = [search_results[i] for i in top_indices]
|
| 67 |
+
|
| 68 |
+
# Prepare context
|
| 69 |
+
context = "\n".join([f"Title: {doc['title']}\nContent: {doc['body']}" for doc in relevant_docs])
|
| 70 |
+
|
| 71 |
+
# Prepare prompt
|
| 72 |
+
prompt = f"""Using the following context from web search results:
|
| 73 |
+
|
| 74 |
+
{context}
|
| 75 |
+
|
| 76 |
+
Write a detailed and complete research document that fulfills the following user request: '{query}'
|
| 77 |
+
After writing the document, please provide a list of sources used in your response."""
|
| 78 |
+
|
| 79 |
+
# Generate response based on the selected model
|
| 80 |
+
if model == "@cf/meta/llama-3.1-8b-instruct":
|
| 81 |
+
# Use Cloudflare API (placeholder, as the actual implementation is not provided)
|
| 82 |
+
response = get_response_from_cloudflare(prompt="", context=context, query=query, num_calls=num_calls, temperature=temperature, search_type="web")
|
| 83 |
else:
|
| 84 |
+
# Use Hugging Face API
|
| 85 |
+
client = InferenceClient(model, token=huggingface_token)
|
| 86 |
+
response = ""
|
| 87 |
+
for _ in range(num_calls):
|
| 88 |
+
for message in client.chat_completion(
|
| 89 |
+
messages=[{"role": "user", "content": prompt}],
|
| 90 |
+
max_tokens=10000,
|
| 91 |
+
temperature=temperature,
|
| 92 |
+
stream=True,
|
| 93 |
+
):
|
| 94 |
+
if message.choices and message.choices[0].delta and message.choices[0].delta.content:
|
| 95 |
+
response += message.choices[0].delta.content
|
| 96 |
+
|
| 97 |
+
return {
|
| 98 |
+
"query": query,
|
| 99 |
+
"search_results": search_results,
|
| 100 |
+
"relevant_docs": relevant_docs,
|
| 101 |
+
"response": response
|
| 102 |
+
}
|
| 103 |
+
|
| 104 |
except Exception as e:
|
| 105 |
+
return {"error": f"An error occurred during web search and processing: {str(e)}"}
|
| 106 |
+
|
| 107 |
|
| 108 |
def rephrase_query(original_query: str, conversation_manager: ConversationManager) -> str:
|
| 109 |
context = conversation_manager.get_context()
|