Update app.py
Browse files
app.py
CHANGED
|
@@ -66,17 +66,30 @@ def load_document(file: NamedTemporaryFile, parser: str = "llamaparse") -> List[
|
|
| 66 |
def get_embeddings():
|
| 67 |
return HuggingFaceEmbeddings(model_name="sentence-transformers/stsb-roberta-large")
|
| 68 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
def update_vectors(files, parser):
|
| 70 |
global uploaded_documents
|
| 71 |
logging.info(f"Entering update_vectors with {len(files)} files and parser: {parser}")
|
| 72 |
|
| 73 |
if not files:
|
| 74 |
logging.warning("No files provided for update_vectors")
|
| 75 |
-
return "Please upload at least one PDF file.",
|
| 76 |
-
choices=[doc["name"] for doc in uploaded_documents],
|
| 77 |
-
value=[doc["name"] for doc in uploaded_documents if doc["selected"]],
|
| 78 |
-
label="Select documents to query"
|
| 79 |
-
)
|
| 80 |
|
| 81 |
embed = get_embeddings()
|
| 82 |
total_chunks = 0
|
|
@@ -89,7 +102,6 @@ def update_vectors(files, parser):
|
|
| 89 |
logging.info(f"Loaded {len(data)} chunks from {file.name}")
|
| 90 |
all_data.extend(data)
|
| 91 |
total_chunks += len(data)
|
| 92 |
-
# Append new documents instead of replacing
|
| 93 |
if not any(doc["name"] == file.name for doc in uploaded_documents):
|
| 94 |
uploaded_documents.append({"name": file.name, "selected": True})
|
| 95 |
logging.info(f"Added new document to uploaded_documents: {file.name}")
|
|
@@ -110,12 +122,11 @@ def update_vectors(files, parser):
|
|
| 110 |
|
| 111 |
database.save_local("faiss_database")
|
| 112 |
logging.info("FAISS database saved")
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
)
|
| 119 |
|
| 120 |
def generate_chunked_response(prompt, model, max_tokens=10000, num_calls=3, temperature=0.2, should_stop=False):
|
| 121 |
print(f"Starting generate_chunked_response with {num_calls} calls")
|
|
@@ -528,6 +539,12 @@ def display_documents():
|
|
| 528 |
label="Select documents to query"
|
| 529 |
)
|
| 530 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 531 |
def initial_conversation():
|
| 532 |
return [
|
| 533 |
(None, "Welcome! I'm your AI assistant for web search and PDF analysis. Here's how you can use me:\n\n"
|
|
@@ -539,7 +556,7 @@ def initial_conversation():
|
|
| 539 |
]
|
| 540 |
|
| 541 |
# Define the checkbox outside the demo block
|
| 542 |
-
document_selector =
|
| 543 |
|
| 544 |
use_web_search = gr.Checkbox(label="Use Web Search", value=True)
|
| 545 |
|
|
@@ -603,6 +620,7 @@ with demo:
|
|
| 603 |
file_input = gr.Files(label="Upload your PDF documents", file_types=[".pdf"])
|
| 604 |
parser_dropdown = gr.Dropdown(choices=["pypdf", "llamaparse"], label="Select PDF Parser", value="llamaparse")
|
| 605 |
update_button = gr.Button("Upload Document")
|
|
|
|
| 606 |
|
| 607 |
update_output = gr.Textbox(label="Update Status")
|
| 608 |
|
|
@@ -610,6 +628,11 @@ with demo:
|
|
| 610 |
update_button.click(update_vectors,
|
| 611 |
inputs=[file_input, parser_dropdown],
|
| 612 |
outputs=[update_output, document_selector])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 613 |
|
| 614 |
gr.Markdown(
|
| 615 |
"""
|
|
|
|
| 66 |
def get_embeddings():
|
| 67 |
return HuggingFaceEmbeddings(model_name="sentence-transformers/stsb-roberta-large")
|
| 68 |
|
| 69 |
+
# Add this at the beginning of your script, after imports
|
| 70 |
+
DOCUMENTS_FILE = "uploaded_documents.json"
|
| 71 |
+
|
| 72 |
+
def load_documents():
|
| 73 |
+
if os.path.exists(DOCUMENTS_FILE):
|
| 74 |
+
with open(DOCUMENTS_FILE, "r") as f:
|
| 75 |
+
return json.load(f)
|
| 76 |
+
return []
|
| 77 |
+
|
| 78 |
+
def save_documents(documents):
|
| 79 |
+
with open(DOCUMENTS_FILE, "w") as f:
|
| 80 |
+
json.dump(documents, f)
|
| 81 |
+
|
| 82 |
+
# Replace the global uploaded_documents with this
|
| 83 |
+
uploaded_documents = load_documents()
|
| 84 |
+
|
| 85 |
+
# Modify the update_vectors function
|
| 86 |
def update_vectors(files, parser):
|
| 87 |
global uploaded_documents
|
| 88 |
logging.info(f"Entering update_vectors with {len(files)} files and parser: {parser}")
|
| 89 |
|
| 90 |
if not files:
|
| 91 |
logging.warning("No files provided for update_vectors")
|
| 92 |
+
return "Please upload at least one PDF file.", display_documents()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
|
| 94 |
embed = get_embeddings()
|
| 95 |
total_chunks = 0
|
|
|
|
| 102 |
logging.info(f"Loaded {len(data)} chunks from {file.name}")
|
| 103 |
all_data.extend(data)
|
| 104 |
total_chunks += len(data)
|
|
|
|
| 105 |
if not any(doc["name"] == file.name for doc in uploaded_documents):
|
| 106 |
uploaded_documents.append({"name": file.name, "selected": True})
|
| 107 |
logging.info(f"Added new document to uploaded_documents: {file.name}")
|
|
|
|
| 122 |
|
| 123 |
database.save_local("faiss_database")
|
| 124 |
logging.info("FAISS database saved")
|
| 125 |
+
|
| 126 |
+
# Save the updated list of documents
|
| 127 |
+
save_documents(uploaded_documents)
|
| 128 |
+
|
| 129 |
+
return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files using {parser}.", display_documents()
|
|
|
|
| 130 |
|
| 131 |
def generate_chunked_response(prompt, model, max_tokens=10000, num_calls=3, temperature=0.2, should_stop=False):
|
| 132 |
print(f"Starting generate_chunked_response with {num_calls} calls")
|
|
|
|
| 539 |
label="Select documents to query"
|
| 540 |
)
|
| 541 |
|
| 542 |
+
# Add this new function
|
| 543 |
+
def refresh_documents():
|
| 544 |
+
global uploaded_documents
|
| 545 |
+
uploaded_documents = load_documents()
|
| 546 |
+
return display_documents()
|
| 547 |
+
|
| 548 |
def initial_conversation():
|
| 549 |
return [
|
| 550 |
(None, "Welcome! I'm your AI assistant for web search and PDF analysis. Here's how you can use me:\n\n"
|
|
|
|
| 556 |
]
|
| 557 |
|
| 558 |
# Define the checkbox outside the demo block
|
| 559 |
+
document_selector = display_documents()
|
| 560 |
|
| 561 |
use_web_search = gr.Checkbox(label="Use Web Search", value=True)
|
| 562 |
|
|
|
|
| 620 |
file_input = gr.Files(label="Upload your PDF documents", file_types=[".pdf"])
|
| 621 |
parser_dropdown = gr.Dropdown(choices=["pypdf", "llamaparse"], label="Select PDF Parser", value="llamaparse")
|
| 622 |
update_button = gr.Button("Upload Document")
|
| 623 |
+
refresh_button = gr.Button("Refresh Document List")
|
| 624 |
|
| 625 |
update_output = gr.Textbox(label="Update Status")
|
| 626 |
|
|
|
|
| 628 |
update_button.click(update_vectors,
|
| 629 |
inputs=[file_input, parser_dropdown],
|
| 630 |
outputs=[update_output, document_selector])
|
| 631 |
+
|
| 632 |
+
# Add the refresh button functionality
|
| 633 |
+
refresh_button.click(refresh_documents,
|
| 634 |
+
inputs=[],
|
| 635 |
+
outputs=[document_selector])
|
| 636 |
|
| 637 |
gr.Markdown(
|
| 638 |
"""
|