cassiebuhler commited on
Commit
c41a4d5
·
1 Parent(s): a861900

pmtiles filter by ids + changed nonconserved areas ids

Browse files
app/app.py CHANGED
@@ -95,7 +95,7 @@ def main():
95
  sql_query = output.sql_query
96
  explanation =output.explanation
97
  if not sql_query: # if the chatbot can't generate a SQL query.
98
- return pd.DataFrame({'sub_id' : []}),'', explanation
99
  result = ca.sql(sql_query).execute()
100
  if result.empty:
101
  explanation = "This query did not return any results. Please try again with a different query."
@@ -138,8 +138,8 @@ def main():
138
  st.code(sql_query, language="sql")
139
  st.stop()
140
 
141
- # output without mapping columns (sub_id, geom)
142
- elif "sub_id" not in llm_output.columns and "geom" not in llm_output.columns:
143
  st.write(llm_output)
144
  not_mapping = True
145
 
@@ -151,8 +151,8 @@ def main():
151
  st.code(sql_query,language = "sql")
152
 
153
  # extract ids, columns, bounds if present
154
- if "sub_id" in llm_output.columns and not llm_output.empty:
155
- ids = list(set(llm_output['sub_id'].tolist()))
156
  llm_cols = extract_columns(sql_query)
157
  bounds = llm_output.total_bounds.tolist()
158
  else:
@@ -376,12 +376,12 @@ def main():
376
  if ('geom' in llm_output.columns) and (not llm_output.empty):
377
  llm_output = llm_output.drop('geom',axis = 1)
378
  if not llm_output.empty:
379
- if 'name' in llm_output.columns and 'sub_id' in llm_output.columns:
380
  llm_grouped = (llm_output.groupby('name')
381
  .agg({col: ('sum' if col == 'acres' else 'first')
382
  for col in llm_output.columns
383
  if col != 'name'})).reset_index()
384
- llm_grouped.drop(['sub_id'], axis=1, inplace = True)
385
  st.dataframe(llm_grouped, use_container_width = True)
386
  else:
387
  st.dataframe(llm_output, use_container_width = True)
 
95
  sql_query = output.sql_query
96
  explanation =output.explanation
97
  if not sql_query: # if the chatbot can't generate a SQL query.
98
+ return pd.DataFrame({'id' : []}),'', explanation
99
  result = ca.sql(sql_query).execute()
100
  if result.empty:
101
  explanation = "This query did not return any results. Please try again with a different query."
 
138
  st.code(sql_query, language="sql")
139
  st.stop()
140
 
141
+ # output without mapping columns (id, geom)
142
+ elif "id" not in llm_output.columns and "geom" not in llm_output.columns:
143
  st.write(llm_output)
144
  not_mapping = True
145
 
 
151
  st.code(sql_query,language = "sql")
152
 
153
  # extract ids, columns, bounds if present
154
+ if "id" in llm_output.columns and not llm_output.empty:
155
+ ids = list(set(llm_output['id'].tolist()))
156
  llm_cols = extract_columns(sql_query)
157
  bounds = llm_output.total_bounds.tolist()
158
  else:
 
376
  if ('geom' in llm_output.columns) and (not llm_output.empty):
377
  llm_output = llm_output.drop('geom',axis = 1)
378
  if not llm_output.empty:
379
+ if 'name' in llm_output.columns and 'id' in llm_output.columns:
380
  llm_grouped = (llm_output.groupby('name')
381
  .agg({col: ('sum' if col == 'acres' else 'first')
382
  for col in llm_output.columns
383
  if col != 'name'})).reset_index()
384
+ llm_grouped.drop(['id'], axis=1, inplace = True)
385
  st.dataframe(llm_grouped, use_container_width = True)
386
  else:
387
  st.dataframe(llm_output, use_container_width = True)
app/system_prompt.txt CHANGED
@@ -179,7 +179,7 @@ example_assistant: {{"sql_query":
179
  "pct_top_plant_richness" > 0.50
180
  );
181
 
182
- example_user: "Show me GAP 3 lands that are in the top 5% of mean amphibian richness"
183
  example_assistant: {{"sql_query":
184
  WITH percentile AS (
185
  SELECT PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY "mean_amphibian_richness") AS mean_amphibian_richness_95_percentile
@@ -187,8 +187,8 @@ example_assistant: {{"sql_query":
187
  )
188
  SELECT "sub_id", "geom", "name", "acres","mean_amphibian_richness"
189
  FROM mydata
190
- WHERE "gap_code" = 3
191
- AND "mean_amphibian_richness" > (SELECT mean_amphibian_richness_95_percentile FROM percentile);
192
 
193
  example_user: "Show nonconserved areas in climate zone 2"
194
  example_assistant: {{"sql_query":
 
179
  "pct_top_plant_richness" > 0.50
180
  );
181
 
182
+ example_user: "Show me easements that are in the top 5% of mean amphibian richness"
183
  example_assistant: {{"sql_query":
184
  WITH percentile AS (
185
  SELECT PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY "mean_amphibian_richness") AS mean_amphibian_richness_95_percentile
 
187
  )
188
  SELECT "sub_id", "geom", "name", "acres","mean_amphibian_richness"
189
  FROM mydata
190
+ WHERE "land_tenure" = 'Easement'
191
+ AND "mean_amphibian_richness" > (SELECT mean_amphibian_richness_95_percentile FROM percentile);
192
 
193
  example_user: "Show nonconserved areas in climate zone 2"
194
  example_assistant: {{"sql_query":
app/utils.py CHANGED
@@ -184,7 +184,7 @@ def get_summary_table_sql(ca, column, colors, ids, feature_col = None):
184
  """
185
  Generates a summary table using specific IDs as filters.
186
  """
187
- combined_filter = _.sub_id.isin(ids)
188
  df_network = get_summary(ca, combined_filter, [column], column, feature_col, colors)
189
  df_feature = get_summary(ca, combined_filter, [column], column, feature_col, colors, feature = True)
190
  return df_network, df_feature
@@ -206,7 +206,7 @@ def get_pmtiles_style(paint, pmtiles_file, low_res, filter_cols=None, filter_val
206
  Generates a MapLibre GL style for PMTiles with either filters or a list of IDs.
207
  """
208
  if ids:
209
- filter_expr = ["in", ["get", "sub_id"], ["literal", ids]]
210
  else:
211
  # we don't want to overwhelm streamlit so if they didn't filter anything, don't provide filter arg
212
  filter_length = sum([len(x) for x in filter_vals])
 
184
  """
185
  Generates a summary table using specific IDs as filters.
186
  """
187
+ combined_filter = _.id.isin(ids)
188
  df_network = get_summary(ca, combined_filter, [column], column, feature_col, colors)
189
  df_feature = get_summary(ca, combined_filter, [column], column, feature_col, colors, feature = True)
190
  return df_network, df_feature
 
206
  Generates a MapLibre GL style for PMTiles with either filters or a list of IDs.
207
  """
208
  if ids:
209
+ filter_expr = ["in", ["get", "id"], ["literal", ids]]
210
  else:
211
  # we don't want to overwhelm streamlit so if they didn't filter anything, don't provide filter arg
212
  filter_length = sum([len(x) for x in filter_vals])
app/variables.py CHANGED
@@ -697,8 +697,6 @@ llm_options = {
697
  "mistral-small-3.2-24b-instruct": ChatOpenAI(model = "mistralai/mistral-small-3.2-24b-instruct:free", api_key=openrouter_api, base_url = "https://openrouter.ai/api/v1", temperature=0),
698
  "hunyuan-a13b-instruct": ChatOpenAI(model = "tencent/hunyuan-a13b-instruct:free", api_key=openrouter_api, base_url = "https://openrouter.ai/api/v1", temperature=0),
699
  "deepseek-r1t2-chimera": ChatOpenAI(model = "tngtech/deepseek-r1t2-chimera:free", api_key=openrouter_api, base_url = "https://openrouter.ai/api/v1", temperature=0),
700
- "deepseek-r1-0528-qwen3-8b": ChatOpenAI(model = "deepseek/deepseek-r1-0528-qwen3-8b:free", api_key=openrouter_api, base_url = "https://openrouter.ai/api/v1", temperature=0),
701
- "sarvam-m": ChatOpenAI(model = "sarvamai/sarvam-m:free", api_key=openrouter_api, base_url = "https://openrouter.ai/api/v1", temperature=0),
702
  "devstral-small-2505": ChatOpenAI(model = "mistralai/devstral-small-2505:free", api_key=openrouter_api, base_url = "https://openrouter.ai/api/v1", temperature=0),
703
  "deepseek-chat-v3-0324": ChatOpenAI(model = "deepseek/deepseek-chat-v3-0324:free", api_key=openrouter_api, base_url = "https://openrouter.ai/api/v1", temperature=0),
704
  "gpt-oss-20b": ChatOpenAI(model = "openai/gpt-oss-20b:free", api_key=openrouter_api, base_url = "https://openrouter.ai/api/v1", temperature=0),
 
697
  "mistral-small-3.2-24b-instruct": ChatOpenAI(model = "mistralai/mistral-small-3.2-24b-instruct:free", api_key=openrouter_api, base_url = "https://openrouter.ai/api/v1", temperature=0),
698
  "hunyuan-a13b-instruct": ChatOpenAI(model = "tencent/hunyuan-a13b-instruct:free", api_key=openrouter_api, base_url = "https://openrouter.ai/api/v1", temperature=0),
699
  "deepseek-r1t2-chimera": ChatOpenAI(model = "tngtech/deepseek-r1t2-chimera:free", api_key=openrouter_api, base_url = "https://openrouter.ai/api/v1", temperature=0),
 
 
700
  "devstral-small-2505": ChatOpenAI(model = "mistralai/devstral-small-2505:free", api_key=openrouter_api, base_url = "https://openrouter.ai/api/v1", temperature=0),
701
  "deepseek-chat-v3-0324": ChatOpenAI(model = "deepseek/deepseek-chat-v3-0324:free", api_key=openrouter_api, base_url = "https://openrouter.ai/api/v1", temperature=0),
702
  "gpt-oss-20b": ChatOpenAI(model = "openai/gpt-oss-20b:free", api_key=openrouter_api, base_url = "https://openrouter.ai/api/v1", temperature=0),
preprocess/generate_pmtiles.ipynb CHANGED
@@ -39,11 +39,12 @@
39
  "outputs": [],
40
  "source": [
41
  "%%time\n",
42
- "#Wall time: 9min 39s\n",
43
  "parquet_file = f's3://public-ca30x30/ca30x30_cbn_v3.parquet'\n",
44
  "geobuf_file = 'ca30x30_cbn_v3.fgb'\n",
45
  "ca_geo = con.read_parquet(parquet_file).mutate(acres = _.acres.round(4))\n",
46
- "ca_geo.select(~s.startswith([\"mean_\", \"pct_\"])).execute().set_crs('epsg:4326').to_file(geobuf_file)"
 
47
  ]
48
  },
49
  {
@@ -54,7 +55,7 @@
54
  "outputs": [],
55
  "source": [
56
  "%%time \n",
57
- "#Wall time: 20min 7s\n",
58
  "pmtiles_file = 'ca30x30_cbn_v3.pmtiles'\n",
59
  "pmtiles = f's3://public-ca30x30/{pmtiles_file}'\n",
60
  "source_layer_name = re.sub(r'\\W+', '', os.path.splitext(os.path.basename(pmtiles_file))[0])\n",
@@ -68,7 +69,7 @@
68
  " '--no-tiny-polygon-reduction',\n",
69
  " '--no-simplification-of-shared-nodes'\n",
70
  " ]\n",
71
- "new_pmtiles = to_pmtiles(geojson_file, pmtiles_file, options = options )\n",
72
  "s3_cp(new_pmtiles,pmtiles)"
73
  ]
74
  },
 
39
  "outputs": [],
40
  "source": [
41
  "%%time\n",
42
+ "#Wall time: 48.9 s\n",
43
  "parquet_file = f's3://public-ca30x30/ca30x30_cbn_v3.parquet'\n",
44
  "geobuf_file = 'ca30x30_cbn_v3.fgb'\n",
45
  "ca_geo = con.read_parquet(parquet_file).mutate(acres = _.acres.round(4))\n",
46
+ "ca_geo.select(~s.startswith([\"mean_\", \"pct_\"])).execute().set_crs('epsg:4326').to_file(geobuf_file)\n",
47
+ "s3_cp(geobuf_file,f's3://public-ca30x30/{geobuf_file}')"
48
  ]
49
  },
50
  {
 
55
  "outputs": [],
56
  "source": [
57
  "%%time \n",
58
+ "#Wall time: 19min 11s\n",
59
  "pmtiles_file = 'ca30x30_cbn_v3.pmtiles'\n",
60
  "pmtiles = f's3://public-ca30x30/{pmtiles_file}'\n",
61
  "source_layer_name = re.sub(r'\\W+', '', os.path.splitext(os.path.basename(pmtiles_file))[0])\n",
 
69
  " '--no-tiny-polygon-reduction',\n",
70
  " '--no-simplification-of-shared-nodes'\n",
71
  " ]\n",
72
+ "new_pmtiles = to_pmtiles(geobuf_file, pmtiles_file, options = options )\n",
73
  "s3_cp(new_pmtiles,pmtiles)"
74
  ]
75
  },