Sentinel-AI-Web-Search-Test-v2-Testing-Score

Build error

App Files Files Community

Shreyas094 commited on Sep 16, 2024

Commit

34018a5

verified ·

1 Parent(s): 0f26a54

Update app.py

Browse files

Files changed (1) hide show

app.py +55 -68

app.py CHANGED Viewed

@@ -85,7 +85,7 @@ def get_embeddings():
 def duckduckgo_search(query):
     with DDGS() as ddgs:
-        results = ddgs.text(query, max_results=5)
     return results
 class CitingSources(BaseModel):
@@ -127,10 +127,8 @@ def respond(message, history, model, temperature, num_calls, use_embeddings, sys
     logging.info(f"System Prompt: {system_prompt}")
     try:
-        for main_content, sources in get_response_with_search(message, model, num_calls=num_calls, temperature=temperature, use_embeddings=use_embeddings, system_prompt=system_prompt):
-            response = f"{main_content}\n\n{sources}"
-            first_line = response.split('\n')[0] if response else ''
-            yield response
     except Exception as e:
         logging.error(f"Error with {model}: {str(e)}")
         yield f"An error occurred with the {model} model: {str(e)}. Please try again or select a different model."
@@ -146,31 +144,15 @@ def create_web_search_vectors(search_results):
     return FAISS.from_documents(documents, embed)
-def get_response_with_search(query, model, num_calls=3, temperature=0.2, use_embeddings=True, system_prompt=DEFAULT_SYSTEM_PROMPT):
-    search_results = duckduckgo_search(query)
-    if use_embeddings:
-        web_search_database = create_web_search_vectors(search_results)
-        if not web_search_database:
-            yield "No web search results available. Please try again.", ""
-            return
-        retriever = web_search_database.as_retriever(search_kwargs={"k": 5})
-        relevant_docs = retriever.get_relevant_documents(query)
-        context = "\n".join([doc.page_content for doc in relevant_docs])
-    else:
-        context = "\n".join([f"{result['title']}\n{result['body']}\nSource: {result['href']}" for result in search_results])
-    prompt = f"""Using the following context from web search results:
-{context}
-Write a detailed and complete research document that fulfills the following user request: '{query}'
-After writing the document, please provide a list of sources with their URLs used in your response."""
-    # Use Hugging Face API
-    client = InferenceClient(model, token=huggingface_token)
     # Calculate input tokens (this is an approximation, you might need a more accurate method)
     input_tokens = len(prompt.split()) // 4
@@ -178,46 +160,51 @@ After writing the document, please provide a list of sources with their URLs use
     model_token_limit = MODEL_TOKEN_LIMITS.get(model, 8192)  # Default to 8192 if model not found
     # Calculate max_new_tokens
-    max_new_tokens = min(model_token_limit - input_tokens, 6500)  # Cap at 4096 to be safe
-    main_content = ""
-    for i in range(num_calls):
-        try:
-            response = client.chat_completion(
-                messages=[
-                    {"role": "system", "content": system_prompt},
-                    {"role": "user", "content": prompt}
-                ],
-                max_tokens=max_new_tokens,
-                temperature=temperature,
-                stream=False,
-                top_p=0.8,
-            )
-            # Log the raw response for debugging
-            logging.info(f"Raw API response: {response}")
-            # Check if the response is a string (which might be an error message)
-            if isinstance(response, str):
-                logging.error(f"API returned an unexpected string response: {response}")
-                yield f"An error occurred: {response}", ""
-                return
-            # If it's not a string, assume it's the expected object structure
-            if hasattr(response, 'choices') and response.choices:
-                for choice in response.choices:
-                    if hasattr(choice, 'message') and hasattr(choice.message, 'content'):
-                        chunk = choice.message.content
-                        main_content += chunk
-                        yield main_content, ""  # Yield partial main content without sources
-            else:
-                logging.error(f"Unexpected response structure: {response}")
-                yield "An unexpected error occurred. Please try again.", ""
-        except Exception as e:
-            logging.error(f"Error in API call: {str(e)}")
-            yield f"An error occurred: {str(e)}", ""
-            return
 def vote(data: gr.LikeData):
     if data.liked:

 def duckduckgo_search(query):
     with DDGS() as ddgs:
+        results = list(ddgs.text(query, max_results=5))
     return results
 class CitingSources(BaseModel):
     logging.info(f"System Prompt: {system_prompt}")
     try:
+        for main_content, _ in get_response_with_search(message, model, num_calls=num_calls, temperature=temperature, use_embeddings=use_embeddings, system_prompt=system_prompt):
+            yield main_content
     except Exception as e:
         logging.error(f"Error with {model}: {str(e)}")
         yield f"An error occurred with the {model} model: {str(e)}. Please try again or select a different model."
     return FAISS.from_documents(documents, embed)
+def summarize_article(article, model, system_prompt, user_query, client, temperature=0.2):
+    prompt = f"""Using the following article:
+Title: {article['title']}
+Content: {article['body']}
+URL: {article['href']}
+Write a concise summary that addresses the following user query: '{user_query}'
+"""
     # Calculate input tokens (this is an approximation, you might need a more accurate method)
     input_tokens = len(prompt.split()) // 4
     model_token_limit = MODEL_TOKEN_LIMITS.get(model, 8192)  # Default to 8192 if model not found
     # Calculate max_new_tokens
+    max_new_tokens = min(model_token_limit - input_tokens, 6500)  # Cap at 6500 to be safe
+    try:
+        response = client.chat_completion(
+            messages=[
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": prompt}
+            ],
+            max_tokens=max_new_tokens,
+            temperature=temperature,
+            stream=False,
+            top_p=0.8,
+        )
+        if hasattr(response, 'choices') and response.choices:
+            for choice in response.choices:
+                if hasattr(choice, 'message') and hasattr(choice.message, 'content'):
+                    return choice.message.content.strip()
+    except Exception as e:
+        logging.error(f"Error summarizing article: {str(e)}")
+        return f"Error summarizing article: {str(e)}"
+    return "Unable to generate summary."
+def get_response_with_search(query, model, num_calls=3, temperature=0.2, use_embeddings=True, system_prompt=DEFAULT_SYSTEM_PROMPT):
+    search_results = duckduckgo_search(query)
+    client = InferenceClient(model, token=huggingface_token)
+    summaries = []
+    for result in search_results:
+        summary = summarize_article(result, model, system_prompt, query, client, temperature)
+        summaries.append({
+            "title": result['title'],
+            "url": result['href'],
+            "summary": summary
+        })
+        yield format_output(summaries), ""
+def format_output(summaries):
+    output = "Here are the summarized search results:\n\n"
+    for item in summaries:
+        output += f"News Title: {item['title']}\n"
+        output += f"URL: {item['url']}\n"
+        output += f"Summary: {item['summary']}\n\n"
+    return output
 def vote(data: gr.LikeData):
     if data.liked: