yonkoyonks commited on
Commit
a320d92
·
verified ·
1 Parent(s): d825032

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +9 -14
utils.py CHANGED
@@ -1,14 +1,12 @@
1
- from langchain_community.llms import LlamaCpp
2
  from dotenv import load_dotenv
3
  import os
4
  import pandas as pd
5
 
6
- # Load environment variables
7
  load_dotenv()
8
- MODEL_PATH = os.getenv("MODEL_PATH", "TheBloke/gemma-2b-it-GGUF/gemma-2b-it.Q4_K_M.gguf")
9
 
10
  def summarize_dataframe(df: pd.DataFrame, max_rows: int = 30) -> str:
11
- """Summarize a dataframe to avoid overloading the context window."""
12
  summary = f"Columns: {', '.join(df.columns)}\n\n"
13
  if len(df) > max_rows:
14
  sample = df.sample(max_rows, random_state=42)
@@ -20,8 +18,6 @@ def summarize_dataframe(df: pd.DataFrame, max_rows: int = 30) -> str:
20
  return summary
21
 
22
  def query_agent(df: pd.DataFrame, query: str) -> str:
23
- """Query a CSV/DataFrame using your local Gemma model with context-aware limits."""
24
- # Attempt to handle simple analytical questions directly with pandas
25
  query_lower = query.lower()
26
  try:
27
  if "most common" in query_lower or "most frequent" in query_lower:
@@ -32,12 +28,10 @@ def query_agent(df: pd.DataFrame, query: str) -> str:
32
  except Exception as e:
33
  print("Direct analysis failed:", e)
34
 
35
- # Otherwise summarize dataset for LLM
36
  data_text = summarize_dataframe(df)
37
 
38
  prompt = f"""
39
  You are a data analysis assistant with expertise in statistics and data interpretation.
40
-
41
  Analyze the dataset sample below and answer the user's question in a **clear, detailed, and well-explained way**.
42
  Include both the **direct answer** and a short **explanation or reasoning** behind it.
43
 
@@ -50,12 +44,13 @@ def query_agent(df: pd.DataFrame, query: str) -> str:
50
  Answer (with explanation):
51
  """
52
 
53
- llm = LlamaCpp(
54
- model_path=MODEL_PATH,
55
- temperature=0.7,
56
- max_new_tokens=1024,
57
- n_ctx=16384,
58
- verbose=True,
 
59
  )
60
 
61
  answer = llm(prompt)
 
1
+ from langchain_community.llms import HuggingFaceHub
2
  from dotenv import load_dotenv
3
  import os
4
  import pandas as pd
5
 
6
+ # Load .env variables (still useful for local dev)
7
  load_dotenv()
 
8
 
9
  def summarize_dataframe(df: pd.DataFrame, max_rows: int = 30) -> str:
 
10
  summary = f"Columns: {', '.join(df.columns)}\n\n"
11
  if len(df) > max_rows:
12
  sample = df.sample(max_rows, random_state=42)
 
18
  return summary
19
 
20
  def query_agent(df: pd.DataFrame, query: str) -> str:
 
 
21
  query_lower = query.lower()
22
  try:
23
  if "most common" in query_lower or "most frequent" in query_lower:
 
28
  except Exception as e:
29
  print("Direct analysis failed:", e)
30
 
 
31
  data_text = summarize_dataframe(df)
32
 
33
  prompt = f"""
34
  You are a data analysis assistant with expertise in statistics and data interpretation.
 
35
  Analyze the dataset sample below and answer the user's question in a **clear, detailed, and well-explained way**.
36
  Include both the **direct answer** and a short **explanation or reasoning** behind it.
37
 
 
44
  Answer (with explanation):
45
  """
46
 
47
+ # Use a hosted model instead of local LlamaCpp
48
+ llm = HuggingFaceHub(
49
+ repo_id="google/gemma-2b-it", # or another model like mistralai/Mistral-7B-Instruct-v0.2
50
+ model_kwargs={
51
+ "temperature": 0.7,
52
+ "max_new_tokens": 1024,
53
+ }
54
  )
55
 
56
  answer = llm(prompt)