Jurk06 commited on
Commit
3f4f0ff
Β·
verified Β·
1 Parent(s): 50ad5fa

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +113 -0
app.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ import pdfplumber
4
+ import tempfile
5
+ from huggingface_hub import InferenceClient
6
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
7
+ from langchain.vectorstores import FAISS
8
+ from langchain.embeddings import HuggingFaceEmbeddings
9
+
10
+ # Initialize Hugging Face InferenceClient
11
+ client = InferenceClient(
12
+ provider="novita",
13
+ api_key=hf_token #"hf_xxxxxxxxxxxxxxxxxxxxxxxxx" # Replace with your HF token
14
+ )
15
+
16
+ # Global vectorstore
17
+ vectorstore = None
18
+
19
+ # Load and process the uploaded PDF
20
+ def load_pdf(file):
21
+ global vectorstore
22
+
23
+ try:
24
+ # Save uploaded file to temp path (file is already bytes in Kaggle!)
25
+ temp_pdf_path = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf").name
26
+ with open(temp_pdf_path, "wb") as f:
27
+ f.write(file) # <--- FIXED LINE
28
+
29
+ # Extract text using pdfplumber
30
+ import pdfplumber
31
+ raw_text = ""
32
+ with pdfplumber.open(temp_pdf_path) as pdf:
33
+ for page in pdf.pages:
34
+ text = page.extract_text()
35
+ if text:
36
+ raw_text += text + "\n"
37
+
38
+ if not raw_text.strip():
39
+ return "❌ No extractable text found in the PDF."
40
+
41
+ # Chunk the text
42
+ splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)
43
+ texts = splitter.split_text(raw_text)
44
+
45
+ # Create FAISS vectorstore
46
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
47
+ vectorstore = FAISS.from_texts(texts, embeddings)
48
+
49
+ return "βœ… PDF successfully processed. You can now ask questions!"
50
+
51
+ except Exception as e:
52
+ return f"❌ Error: {str(e)}"
53
+
54
+
55
+
56
+
57
+
58
+
59
+ def ask_question(query):
60
+ global vectorstore
61
+
62
+ if vectorstore is None:
63
+ return "❌ Please upload a PDF first."
64
+
65
+ try:
66
+ docs = vectorstore.similarity_search(query, k=3)
67
+ context = "\n\n".join([doc.page_content for doc in docs])
68
+
69
+ # Prepare chat message format
70
+ messages = [
71
+ {
72
+ "role": "system",
73
+ "content": "You are a helpful assistant that answers questions based on a document."
74
+ },
75
+ {
76
+ "role": "user",
77
+ "content": f"Answer this question using the context below:\n\nContext:\n{context}\n\nQuestion:\n{query}"
78
+ }
79
+ ]
80
+
81
+ # Use chat.completions.create
82
+ completion = client.chat.completions.create(
83
+ model="meta-llama/Llama-4-Scout-17B-16E-Instruct",
84
+ messages=messages,
85
+ max_tokens=500
86
+ )
87
+
88
+ return completion.choices[0].message.content.strip()
89
+
90
+ except Exception as e:
91
+ return f"❌ Failed to generate answer: {str(e)}"
92
+
93
+
94
+ # Gradio UI
95
+ with gr.Blocks() as demo:
96
+ gr.Markdown("## πŸ“„ RAG PDF Chatbot using Hugging Face Inference API")
97
+
98
+ with gr.Row():
99
+ file_input = gr.File(label="Upload PDF", type="binary")
100
+ upload_btn = gr.Button("Process")
101
+
102
+ status_box = gr.Textbox(label="Status", interactive=False)
103
+
104
+ with gr.Row():
105
+ question = gr.Textbox(label="Ask a Question")
106
+ ask_btn = gr.Button("Ask")
107
+
108
+ answer = gr.Textbox(label="Answer", lines=6)
109
+
110
+ upload_btn.click(load_pdf, inputs=file_input, outputs=status_box)
111
+ ask_btn.click(ask_question, inputs=question, outputs=answer)
112
+
113
+ demo.launch()