Spaces:

boettiger-lab
/

ca-30x30-cbn

Build error

App Files Files Community

cassiebuhler commited on Aug 8

Commit

c41a4d5

1 Parent(s): a861900

pmtiles filter by ids + changed nonconserved areas ids

Browse files

Files changed (5) hide show

app/app.py +7 -7
app/system_prompt.txt +3 -3
app/utils.py +2 -2
app/variables.py +0 -2
preprocess/generate_pmtiles.ipynb +5 -4

app/app.py CHANGED Viewed

@@ -95,7 +95,7 @@ def main():
         sql_query = output.sql_query
         explanation =output.explanation
         if not sql_query: # if the chatbot can't generate a SQL query.
-            return pd.DataFrame({'sub_id' : []}),'', explanation
         result = ca.sql(sql_query).execute()
         if result.empty:
             explanation = "This query did not return any results. Please try again with a different query."
@@ -138,8 +138,8 @@ def main():
                                 st.code(sql_query, language="sql")
                                 st.stop()
-                            # output without mapping columns (sub_id, geom)
-                            elif "sub_id" not in llm_output.columns and "geom" not in llm_output.columns:
                                 st.write(llm_output)
                                 not_mapping = True
@@ -151,8 +151,8 @@ def main():
                                     st.code(sql_query,language = "sql")
                         # extract ids, columns, bounds if present
-                        if "sub_id" in llm_output.columns and not llm_output.empty:
-                            ids = list(set(llm_output['sub_id'].tolist()))
                             llm_cols = extract_columns(sql_query)
                             bounds = llm_output.total_bounds.tolist()
                         else:
@@ -376,12 +376,12 @@ def main():
                     if ('geom' in llm_output.columns) and (not llm_output.empty):
                         llm_output = llm_output.drop('geom',axis = 1)
                     if not llm_output.empty:
-                        if 'name' in llm_output.columns and 'sub_id' in llm_output.columns:
                             llm_grouped = (llm_output.groupby('name')
                                             .agg({col: ('sum' if col == 'acres' else 'first')
                                               for col in llm_output.columns
                                               if col != 'name'})).reset_index()
-                            llm_grouped.drop(['sub_id'], axis=1, inplace = True)
                             st.dataframe(llm_grouped, use_container_width = True)
                         else:
                             st.dataframe(llm_output, use_container_width = True)

         sql_query = output.sql_query
         explanation =output.explanation
         if not sql_query: # if the chatbot can't generate a SQL query.
+            return pd.DataFrame({'id' : []}),'', explanation
         result = ca.sql(sql_query).execute()
         if result.empty:
             explanation = "This query did not return any results. Please try again with a different query."
                                 st.code(sql_query, language="sql")
                                 st.stop()
+                            # output without mapping columns (id, geom)
+                            elif "id" not in llm_output.columns and "geom" not in llm_output.columns:
                                 st.write(llm_output)
                                 not_mapping = True
                                     st.code(sql_query,language = "sql")
                         # extract ids, columns, bounds if present
+                        if "id" in llm_output.columns and not llm_output.empty:
+                            ids = list(set(llm_output['id'].tolist()))
                             llm_cols = extract_columns(sql_query)
                             bounds = llm_output.total_bounds.tolist()
                         else:
                     if ('geom' in llm_output.columns) and (not llm_output.empty):
                         llm_output = llm_output.drop('geom',axis = 1)
                     if not llm_output.empty:
+                        if 'name' in llm_output.columns and 'id' in llm_output.columns:
                             llm_grouped = (llm_output.groupby('name')
                                             .agg({col: ('sum' if col == 'acres' else 'first')
                                               for col in llm_output.columns
                                               if col != 'name'})).reset_index()
+                            llm_grouped.drop(['id'], axis=1, inplace = True)
                             st.dataframe(llm_grouped, use_container_width = True)
                         else:
                             st.dataframe(llm_output, use_container_width = True)

app/system_prompt.txt CHANGED Viewed

@@ -179,7 +179,7 @@ example_assistant: {{"sql_query":
         "pct_top_plant_richness" > 0.50
       );
-example_user: "Show me GAP 3 lands that are in the top 5% of mean amphibian richness"
 example_assistant: {{"sql_query":
     WITH percentile AS (
         SELECT PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY "mean_amphibian_richness") AS mean_amphibian_richness_95_percentile
@@ -187,8 +187,8 @@ example_assistant: {{"sql_query":
     )
     SELECT "sub_id", "geom", "name", "acres","mean_amphibian_richness"
     FROM mydata
-    WHERE "gap_code" = 3
-        AND "mean_amphibian_richness" > (SELECT mean_amphibian_richness_95_percentile FROM percentile);
 example_user: "Show nonconserved areas in climate zone 2"
 example_assistant: {{"sql_query":

         "pct_top_plant_richness" > 0.50
       );
+example_user: "Show me easements that are in the top 5% of mean amphibian richness"
 example_assistant: {{"sql_query":
     WITH percentile AS (
         SELECT PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY "mean_amphibian_richness") AS mean_amphibian_richness_95_percentile
     )
     SELECT "sub_id", "geom", "name", "acres","mean_amphibian_richness"
     FROM mydata
+    WHERE "land_tenure" = 'Easement'
+    AND "mean_amphibian_richness" > (SELECT mean_amphibian_richness_95_percentile FROM percentile);
 example_user: "Show nonconserved areas in climate zone 2"
 example_assistant: {{"sql_query":

app/utils.py CHANGED Viewed

@@ -184,7 +184,7 @@ def get_summary_table_sql(ca, column, colors, ids, feature_col = None):
     """
     Generates a summary table using specific IDs as filters.
     """
-    combined_filter = _.sub_id.isin(ids)
     df_network = get_summary(ca, combined_filter, [column], column, feature_col, colors)
     df_feature = get_summary(ca, combined_filter, [column], column, feature_col, colors, feature = True)
     return df_network, df_feature
@@ -206,7 +206,7 @@ def get_pmtiles_style(paint, pmtiles_file, low_res, filter_cols=None, filter_val
     Generates a MapLibre GL style for PMTiles with either filters or a list of IDs.
     """
     if ids:
-        filter_expr = ["in", ["get", "sub_id"], ["literal", ids]]
     else:
         # we don't want to overwhelm streamlit so if they didn't filter anything, don't provide filter arg
         filter_length = sum([len(x) for x in filter_vals])

     """
     Generates a summary table using specific IDs as filters.
     """
+    combined_filter = _.id.isin(ids)
     df_network = get_summary(ca, combined_filter, [column], column, feature_col, colors)
     df_feature = get_summary(ca, combined_filter, [column], column, feature_col, colors, feature = True)
     return df_network, df_feature
     Generates a MapLibre GL style for PMTiles with either filters or a list of IDs.
     """
     if ids:
+        filter_expr = ["in", ["get", "id"], ["literal", ids]]
     else:
         # we don't want to overwhelm streamlit so if they didn't filter anything, don't provide filter arg
         filter_length = sum([len(x) for x in filter_vals])

app/variables.py CHANGED Viewed

@@ -697,8 +697,6 @@ llm_options = {
     "mistral-small-3.2-24b-instruct": ChatOpenAI(model = "mistralai/mistral-small-3.2-24b-instruct:free", api_key=openrouter_api, base_url = "https://openrouter.ai/api/v1",  temperature=0),
     "hunyuan-a13b-instruct": ChatOpenAI(model = "tencent/hunyuan-a13b-instruct:free", api_key=openrouter_api, base_url = "https://openrouter.ai/api/v1",  temperature=0),
     "deepseek-r1t2-chimera": ChatOpenAI(model = "tngtech/deepseek-r1t2-chimera:free", api_key=openrouter_api, base_url = "https://openrouter.ai/api/v1",  temperature=0),
-    "deepseek-r1-0528-qwen3-8b": ChatOpenAI(model = "deepseek/deepseek-r1-0528-qwen3-8b:free", api_key=openrouter_api, base_url = "https://openrouter.ai/api/v1",  temperature=0),
-    "sarvam-m": ChatOpenAI(model = "sarvamai/sarvam-m:free", api_key=openrouter_api, base_url = "https://openrouter.ai/api/v1",  temperature=0),
     "devstral-small-2505": ChatOpenAI(model = "mistralai/devstral-small-2505:free", api_key=openrouter_api, base_url = "https://openrouter.ai/api/v1",  temperature=0),
         "deepseek-chat-v3-0324": ChatOpenAI(model = "deepseek/deepseek-chat-v3-0324:free", api_key=openrouter_api, base_url = "https://openrouter.ai/api/v1",  temperature=0),
     "gpt-oss-20b": ChatOpenAI(model = "openai/gpt-oss-20b:free", api_key=openrouter_api, base_url = "https://openrouter.ai/api/v1",  temperature=0),

     "mistral-small-3.2-24b-instruct": ChatOpenAI(model = "mistralai/mistral-small-3.2-24b-instruct:free", api_key=openrouter_api, base_url = "https://openrouter.ai/api/v1",  temperature=0),
     "hunyuan-a13b-instruct": ChatOpenAI(model = "tencent/hunyuan-a13b-instruct:free", api_key=openrouter_api, base_url = "https://openrouter.ai/api/v1",  temperature=0),
     "deepseek-r1t2-chimera": ChatOpenAI(model = "tngtech/deepseek-r1t2-chimera:free", api_key=openrouter_api, base_url = "https://openrouter.ai/api/v1",  temperature=0),
     "devstral-small-2505": ChatOpenAI(model = "mistralai/devstral-small-2505:free", api_key=openrouter_api, base_url = "https://openrouter.ai/api/v1",  temperature=0),
         "deepseek-chat-v3-0324": ChatOpenAI(model = "deepseek/deepseek-chat-v3-0324:free", api_key=openrouter_api, base_url = "https://openrouter.ai/api/v1",  temperature=0),
     "gpt-oss-20b": ChatOpenAI(model = "openai/gpt-oss-20b:free", api_key=openrouter_api, base_url = "https://openrouter.ai/api/v1",  temperature=0),

preprocess/generate_pmtiles.ipynb CHANGED Viewed

@@ -39,11 +39,12 @@
    "outputs": [],
    "source": [
     "%%time\n",
-    "#Wall time: 9min 39s\n",
     "parquet_file = f's3://public-ca30x30/ca30x30_cbn_v3.parquet'\n",
     "geobuf_file = 'ca30x30_cbn_v3.fgb'\n",
     "ca_geo = con.read_parquet(parquet_file).mutate(acres = _.acres.round(4))\n",
-    "ca_geo.select(~s.startswith([\"mean_\", \"pct_\"])).execute().set_crs('epsg:4326').to_file(geobuf_file)"
    ]
   },
   {
@@ -54,7 +55,7 @@
    "outputs": [],
    "source": [
     "%%time \n",
-    "#Wall time: 20min 7s\n",
     "pmtiles_file = 'ca30x30_cbn_v3.pmtiles'\n",
     "pmtiles = f's3://public-ca30x30/{pmtiles_file}'\n",
     "source_layer_name = re.sub(r'\\W+', '', os.path.splitext(os.path.basename(pmtiles_file))[0])\n",
@@ -68,7 +69,7 @@
     "            '--no-tiny-polygon-reduction',\n",
     "            '--no-simplification-of-shared-nodes'\n",
     "         ]\n",
-    "new_pmtiles = to_pmtiles(geojson_file, pmtiles_file, options = options )\n",
     "s3_cp(new_pmtiles,pmtiles)"
    ]
   },

    "outputs": [],
    "source": [
     "%%time\n",
+    "#Wall time: 48.9 s\n",
     "parquet_file = f's3://public-ca30x30/ca30x30_cbn_v3.parquet'\n",
     "geobuf_file = 'ca30x30_cbn_v3.fgb'\n",
     "ca_geo = con.read_parquet(parquet_file).mutate(acres = _.acres.round(4))\n",
+    "ca_geo.select(~s.startswith([\"mean_\", \"pct_\"])).execute().set_crs('epsg:4326').to_file(geobuf_file)\n",
+    "s3_cp(geobuf_file,f's3://public-ca30x30/{geobuf_file}')"
    ]
   },
   {
    "outputs": [],
    "source": [
     "%%time \n",
+    "#Wall time: 19min 11s\n",
     "pmtiles_file = 'ca30x30_cbn_v3.pmtiles'\n",
     "pmtiles = f's3://public-ca30x30/{pmtiles_file}'\n",
     "source_layer_name = re.sub(r'\\W+', '', os.path.splitext(os.path.basename(pmtiles_file))[0])\n",
     "            '--no-tiny-polygon-reduction',\n",
     "            '--no-simplification-of-shared-nodes'\n",
     "         ]\n",
+    "new_pmtiles = to_pmtiles(geobuf_file, pmtiles_file, options = options )\n",
     "s3_cp(new_pmtiles,pmtiles)"
    ]
   },