Spaces:

yonkoyonks
/

csvBot

Sleeping

App Files Files Community

yonkoyonks commited on 27 days ago

Commit

a320d92

verified ·

1 Parent(s): d825032

Update utils.py

Browse files

Files changed (1) hide show

utils.py +9 -14

utils.py CHANGED Viewed

@@ -1,14 +1,12 @@
-from langchain_community.llms import LlamaCpp
 from dotenv import load_dotenv
 import os
 import pandas as pd
-# Load environment variables
 load_dotenv()
-MODEL_PATH = os.getenv("MODEL_PATH", "TheBloke/gemma-2b-it-GGUF/gemma-2b-it.Q4_K_M.gguf")
 def summarize_dataframe(df: pd.DataFrame, max_rows: int = 30) -> str:
-    """Summarize a dataframe to avoid overloading the context window."""
     summary = f"Columns: {', '.join(df.columns)}\n\n"
     if len(df) > max_rows:
         sample = df.sample(max_rows, random_state=42)
@@ -20,8 +18,6 @@ def summarize_dataframe(df: pd.DataFrame, max_rows: int = 30) -> str:
     return summary
 def query_agent(df: pd.DataFrame, query: str) -> str:
-    """Query a CSV/DataFrame using your local Gemma model with context-aware limits."""
-    #  Attempt to handle simple analytical questions directly with pandas
     query_lower = query.lower()
     try:
         if "most common" in query_lower or "most frequent" in query_lower:
@@ -32,12 +28,10 @@ def query_agent(df: pd.DataFrame, query: str) -> str:
     except Exception as e:
         print("Direct analysis failed:", e)
-    #  Otherwise summarize dataset for LLM
     data_text = summarize_dataframe(df)
     prompt = f"""
     You are a data analysis assistant with expertise in statistics and data interpretation.
     Analyze the dataset sample below and answer the user's question in a **clear, detailed, and well-explained way**.
     Include both the **direct answer** and a short **explanation or reasoning** behind it.
@@ -50,12 +44,13 @@ def query_agent(df: pd.DataFrame, query: str) -> str:
     Answer (with explanation):
     """
-    llm = LlamaCpp(
-        model_path=MODEL_PATH,
-        temperature=0.7,
-        max_new_tokens=1024,
-        n_ctx=16384,
-        verbose=True,
     )
     answer = llm(prompt)

+from langchain_community.llms import HuggingFaceHub
 from dotenv import load_dotenv
 import os
 import pandas as pd
+# Load .env variables (still useful for local dev)
 load_dotenv()
 def summarize_dataframe(df: pd.DataFrame, max_rows: int = 30) -> str:
     summary = f"Columns: {', '.join(df.columns)}\n\n"
     if len(df) > max_rows:
         sample = df.sample(max_rows, random_state=42)
     return summary
 def query_agent(df: pd.DataFrame, query: str) -> str:
     query_lower = query.lower()
     try:
         if "most common" in query_lower or "most frequent" in query_lower:
     except Exception as e:
         print("Direct analysis failed:", e)
     data_text = summarize_dataframe(df)
     prompt = f"""
     You are a data analysis assistant with expertise in statistics and data interpretation.
     Analyze the dataset sample below and answer the user's question in a **clear, detailed, and well-explained way**.
     Include both the **direct answer** and a short **explanation or reasoning** behind it.
     Answer (with explanation):
     """
+    # Use a hosted model instead of local LlamaCpp
+    llm = HuggingFaceHub(
+        repo_id="google/gemma-2b-it",  # or another model like mistralai/Mistral-7B-Instruct-v0.2
+        model_kwargs={
+            "temperature": 0.7,
+            "max_new_tokens": 1024,
+        }
     )
     answer = llm(prompt)