import logging import gradio as gr from queue import Queue import time from prometheus_client import start_http_server, Counter, Histogram import threading import psutil import random from transformers import pipeline, AutoConfig # Load the model and its configuration model_name = "Sevixdd/roberta-base-finetuned-ner" # Make sure this model is available ner_pipeline = pipeline("ner", model=model_name) config = AutoConfig.from_pretrained(model_name) # --- Prometheus Metrics --- REQUEST_COUNT = Counter('gradio_request_count', 'Total requests') REQUEST_LATENCY = Histogram('gradio_request_latency_seconds', 'Request latency (s)') # --- Logging --- logging.basicConfig(filename="chat_log.txt", level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') # --- Chat Queue --- chat_queue = Queue(maxsize=1) # Allow only one request at a time # --- Chat Function --- def chat_function(message, history): with REQUEST_LATENCY.time(): REQUEST_COUNT.inc() try: if chat_queue.full(): return "The model is busy. Please wait..." # More user-friendly message chat_queue.put(message) logging.info(f"User: {message}") ner_result = ner_pipeline(message) response = f"Response from NER model: {ner_result}" logging.info(f"Bot: {response}") time.sleep(random.uniform(0.5, 2.5)) # Simulate processing (adjust as needed) chat_queue.get() return response except Exception as e: logging.error(f"Error: {e}") return "An error occurred. Please try again later." # More helpful error message # --- Gradio Interface --- with gr.Blocks( css=""" body { background-image: url("stag.jpeg"); background-size: cover; background-repeat: no-repeat; } """, title="PLOD Filtered with Monitoring" ) as demo: with gr.Tab("Chat"): gr.Markdown("## Chat with the Bot") chatbot = gr.ChatInterface(fn=chat_function) with gr.Tab("Model Details"): gr.Markdown("## Model Configuration") gr.JSON(value=config.to_dict(), interactive=False) # ... other tabs (Performance Metrics, Infrastructure, Logs) ... # --- Update Functions --- # ... (Implement update functions for metrics, usage, and logs here) # --- Background Threads --- threading.Thread(target=start_http_server, args=(8000,), daemon=True).start() # ... (Threads for metrics, usage, and logs update) # Launch the app demo.launch(share=True)