Sentinel-AI-Web-Search-Test-v2-Testing-Score

Build error

App Files Files Community

Shreyas094 commited on Aug 3, 2024

Commit

d52f389

verified ·

1 Parent(s): 978efd2

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -13

app.py CHANGED Viewed

@@ -66,17 +66,30 @@ def load_document(file: NamedTemporaryFile, parser: str = "llamaparse") -> List[
 def get_embeddings():
     return HuggingFaceEmbeddings(model_name="sentence-transformers/stsb-roberta-large")
 def update_vectors(files, parser):
     global uploaded_documents
     logging.info(f"Entering update_vectors with {len(files)} files and parser: {parser}")
     if not files:
         logging.warning("No files provided for update_vectors")
-        return "Please upload at least one PDF file.", gr.CheckboxGroup(
-            choices=[doc["name"] for doc in uploaded_documents],
-            value=[doc["name"] for doc in uploaded_documents if doc["selected"]],
-            label="Select documents to query"
-        )
     embed = get_embeddings()
     total_chunks = 0
@@ -89,7 +102,6 @@ def update_vectors(files, parser):
             logging.info(f"Loaded {len(data)} chunks from {file.name}")
             all_data.extend(data)
             total_chunks += len(data)
-            # Append new documents instead of replacing
             if not any(doc["name"] == file.name for doc in uploaded_documents):
                 uploaded_documents.append({"name": file.name, "selected": True})
                 logging.info(f"Added new document to uploaded_documents: {file.name}")
@@ -110,12 +122,11 @@ def update_vectors(files, parser):
     database.save_local("faiss_database")
     logging.info("FAISS database saved")
-    return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files using {parser}.", gr.CheckboxGroup(
-        choices=[doc["name"] for doc in uploaded_documents],
-        value=[doc["name"] for doc in uploaded_documents if doc["selected"]],
-        label="Select documents to query"
-    )
 def generate_chunked_response(prompt, model, max_tokens=10000, num_calls=3, temperature=0.2, should_stop=False):
     print(f"Starting generate_chunked_response with {num_calls} calls")
@@ -528,6 +539,12 @@ def display_documents():
         label="Select documents to query"
     )
 def initial_conversation():
     return [
         (None, "Welcome! I'm your AI assistant for web search and PDF analysis. Here's how you can use me:\n\n"
@@ -539,7 +556,7 @@ def initial_conversation():
     ]
 # Define the checkbox outside the demo block
-document_selector = gr.CheckboxGroup(label="Select documents to query")
 use_web_search = gr.Checkbox(label="Use Web Search", value=True)
@@ -603,6 +620,7 @@ with demo:
         file_input = gr.Files(label="Upload your PDF documents", file_types=[".pdf"])
         parser_dropdown = gr.Dropdown(choices=["pypdf", "llamaparse"], label="Select PDF Parser", value="llamaparse")
         update_button = gr.Button("Upload Document")
     update_output = gr.Textbox(label="Update Status")
@@ -610,6 +628,11 @@ with demo:
     update_button.click(update_vectors,
                         inputs=[file_input, parser_dropdown],
                         outputs=[update_output, document_selector])
     gr.Markdown(
     """

 def get_embeddings():
     return HuggingFaceEmbeddings(model_name="sentence-transformers/stsb-roberta-large")
+# Add this at the beginning of your script, after imports
+DOCUMENTS_FILE = "uploaded_documents.json"
+def load_documents():
+    if os.path.exists(DOCUMENTS_FILE):
+        with open(DOCUMENTS_FILE, "r") as f:
+            return json.load(f)
+    return []
+def save_documents(documents):
+    with open(DOCUMENTS_FILE, "w") as f:
+        json.dump(documents, f)
+# Replace the global uploaded_documents with this
+uploaded_documents = load_documents()
+# Modify the update_vectors function
 def update_vectors(files, parser):
     global uploaded_documents
     logging.info(f"Entering update_vectors with {len(files)} files and parser: {parser}")
     if not files:
         logging.warning("No files provided for update_vectors")
+        return "Please upload at least one PDF file.", display_documents()
     embed = get_embeddings()
     total_chunks = 0
             logging.info(f"Loaded {len(data)} chunks from {file.name}")
             all_data.extend(data)
             total_chunks += len(data)
             if not any(doc["name"] == file.name for doc in uploaded_documents):
                 uploaded_documents.append({"name": file.name, "selected": True})
                 logging.info(f"Added new document to uploaded_documents: {file.name}")
     database.save_local("faiss_database")
     logging.info("FAISS database saved")
+    # Save the updated list of documents
+    save_documents(uploaded_documents)
+    return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files using {parser}.", display_documents()
 def generate_chunked_response(prompt, model, max_tokens=10000, num_calls=3, temperature=0.2, should_stop=False):
     print(f"Starting generate_chunked_response with {num_calls} calls")
         label="Select documents to query"
     )
+# Add this new function
+def refresh_documents():
+    global uploaded_documents
+    uploaded_documents = load_documents()
+    return display_documents()
 def initial_conversation():
     return [
         (None, "Welcome! I'm your AI assistant for web search and PDF analysis. Here's how you can use me:\n\n"
     ]
 # Define the checkbox outside the demo block
+document_selector = display_documents()
 use_web_search = gr.Checkbox(label="Use Web Search", value=True)
         file_input = gr.Files(label="Upload your PDF documents", file_types=[".pdf"])
         parser_dropdown = gr.Dropdown(choices=["pypdf", "llamaparse"], label="Select PDF Parser", value="llamaparse")
         update_button = gr.Button("Upload Document")
+        refresh_button = gr.Button("Refresh Document List")
     update_output = gr.Textbox(label="Update Status")
     update_button.click(update_vectors,
                         inputs=[file_input, parser_dropdown],
                         outputs=[update_output, document_selector])
+    # Add the refresh button functionality
+    refresh_button.click(refresh_documents,
+                         inputs=[],
+                         outputs=[document_selector])
     gr.Markdown(
     """