import os import io import math import requests import pdfplumber import gradio as gr HF_TOKEN = os.getenv("HUGGINGFACE_API_TOKEN") EMBED_MODEL = "ibm-granite/granite-embedding-english-r2" GEN_MODEL = "ibm-granite/granite-3.3-2b-instruct" store = [] # simple in-memory vector store def hf_request(model, payload): res = requests.post( f"https://api-inference.huggingface.co/models/{model}", headers={"Authorization": f"Bearer {HF_TOKEN}"}, json=payload, ) res.raise_for_status() return res.json() def cosine(a, b): dot = sum(x * y for x, y in zip(a, b)) na = math.sqrt(sum(x * x for x in a)) nb = math.sqrt(sum(y * y for y in b)) return dot / (na * nb + 1e-9) def upload_pdf(pdf_file): global store if pdf_file is None: return "Please upload a PDF first." with open(pdf_file.name, "rb") as f: pdf_bytes = f.read() text = "" with pdfplumber.open(io.BytesIO(pdf_bytes)) as pdf: for page in pdf.pages: text += page.extract_text() or "" chunks, buf = [], "" for sent in text.split(". "): if len(buf) + len(sent) > 800: chunks.append(buf.strip()) buf = sent else: buf += " " + sent if buf: chunks.append(buf.strip()) if not chunks: return "No text extracted from PDF." embeds = hf_request(EMBED_MODEL, {"inputs": chunks}) store = [{"text": c, "vec": embeds[i]} for i, c in enumerate(chunks)] return f"✅ PDF processed. {len(store)} chunks indexed." def ask_question(q): if not store: return "⚠️ Please upload a PDF first." q_embed = hf_request(EMBED_MODEL, {"inputs": [q]})[0] best = max(store, key=lambda it: cosine(q_embed, it["vec"])) prompt = f"Answer the question using this context:\n{best['text']}\n\nQ: {q}" out = hf_request(GEN_MODEL, {"inputs": prompt}) return out[0].get("generated_text", "No answer") with gr.Blocks() as demo: gr.Markdown("# 📘 StudyMate — PDF Q&A with IBM Granite") with gr.Row(): pdf_file = gr.File(label="Upload PDF", file_types=[".pdf"]) upload_btn = gr.Button("Process PDF") status = gr.Textbox(label="Status", interactive=False) with gr.Row(): question = gr.Textbox(label="Ask a Question") ask_btn = gr.Button("Get Answer") answer = gr.Textbox(label="Answer", interactive=False) upload_btn.click(upload_pdf, inputs=pdf_file, outputs=status) ask_btn.click(ask_question, inputs=question, outputs=answer) demo.launch()