Spaces:
Build error
Build error
Commit
·
c41a4d5
1
Parent(s):
a861900
pmtiles filter by ids + changed nonconserved areas ids
Browse files- app/app.py +7 -7
- app/system_prompt.txt +3 -3
- app/utils.py +2 -2
- app/variables.py +0 -2
- preprocess/generate_pmtiles.ipynb +5 -4
app/app.py
CHANGED
|
@@ -95,7 +95,7 @@ def main():
|
|
| 95 |
sql_query = output.sql_query
|
| 96 |
explanation =output.explanation
|
| 97 |
if not sql_query: # if the chatbot can't generate a SQL query.
|
| 98 |
-
return pd.DataFrame({'
|
| 99 |
result = ca.sql(sql_query).execute()
|
| 100 |
if result.empty:
|
| 101 |
explanation = "This query did not return any results. Please try again with a different query."
|
|
@@ -138,8 +138,8 @@ def main():
|
|
| 138 |
st.code(sql_query, language="sql")
|
| 139 |
st.stop()
|
| 140 |
|
| 141 |
-
# output without mapping columns (
|
| 142 |
-
elif "
|
| 143 |
st.write(llm_output)
|
| 144 |
not_mapping = True
|
| 145 |
|
|
@@ -151,8 +151,8 @@ def main():
|
|
| 151 |
st.code(sql_query,language = "sql")
|
| 152 |
|
| 153 |
# extract ids, columns, bounds if present
|
| 154 |
-
if "
|
| 155 |
-
ids = list(set(llm_output['
|
| 156 |
llm_cols = extract_columns(sql_query)
|
| 157 |
bounds = llm_output.total_bounds.tolist()
|
| 158 |
else:
|
|
@@ -376,12 +376,12 @@ def main():
|
|
| 376 |
if ('geom' in llm_output.columns) and (not llm_output.empty):
|
| 377 |
llm_output = llm_output.drop('geom',axis = 1)
|
| 378 |
if not llm_output.empty:
|
| 379 |
-
if 'name' in llm_output.columns and '
|
| 380 |
llm_grouped = (llm_output.groupby('name')
|
| 381 |
.agg({col: ('sum' if col == 'acres' else 'first')
|
| 382 |
for col in llm_output.columns
|
| 383 |
if col != 'name'})).reset_index()
|
| 384 |
-
llm_grouped.drop(['
|
| 385 |
st.dataframe(llm_grouped, use_container_width = True)
|
| 386 |
else:
|
| 387 |
st.dataframe(llm_output, use_container_width = True)
|
|
|
|
| 95 |
sql_query = output.sql_query
|
| 96 |
explanation =output.explanation
|
| 97 |
if not sql_query: # if the chatbot can't generate a SQL query.
|
| 98 |
+
return pd.DataFrame({'id' : []}),'', explanation
|
| 99 |
result = ca.sql(sql_query).execute()
|
| 100 |
if result.empty:
|
| 101 |
explanation = "This query did not return any results. Please try again with a different query."
|
|
|
|
| 138 |
st.code(sql_query, language="sql")
|
| 139 |
st.stop()
|
| 140 |
|
| 141 |
+
# output without mapping columns (id, geom)
|
| 142 |
+
elif "id" not in llm_output.columns and "geom" not in llm_output.columns:
|
| 143 |
st.write(llm_output)
|
| 144 |
not_mapping = True
|
| 145 |
|
|
|
|
| 151 |
st.code(sql_query,language = "sql")
|
| 152 |
|
| 153 |
# extract ids, columns, bounds if present
|
| 154 |
+
if "id" in llm_output.columns and not llm_output.empty:
|
| 155 |
+
ids = list(set(llm_output['id'].tolist()))
|
| 156 |
llm_cols = extract_columns(sql_query)
|
| 157 |
bounds = llm_output.total_bounds.tolist()
|
| 158 |
else:
|
|
|
|
| 376 |
if ('geom' in llm_output.columns) and (not llm_output.empty):
|
| 377 |
llm_output = llm_output.drop('geom',axis = 1)
|
| 378 |
if not llm_output.empty:
|
| 379 |
+
if 'name' in llm_output.columns and 'id' in llm_output.columns:
|
| 380 |
llm_grouped = (llm_output.groupby('name')
|
| 381 |
.agg({col: ('sum' if col == 'acres' else 'first')
|
| 382 |
for col in llm_output.columns
|
| 383 |
if col != 'name'})).reset_index()
|
| 384 |
+
llm_grouped.drop(['id'], axis=1, inplace = True)
|
| 385 |
st.dataframe(llm_grouped, use_container_width = True)
|
| 386 |
else:
|
| 387 |
st.dataframe(llm_output, use_container_width = True)
|
app/system_prompt.txt
CHANGED
|
@@ -179,7 +179,7 @@ example_assistant: {{"sql_query":
|
|
| 179 |
"pct_top_plant_richness" > 0.50
|
| 180 |
);
|
| 181 |
|
| 182 |
-
example_user: "Show me
|
| 183 |
example_assistant: {{"sql_query":
|
| 184 |
WITH percentile AS (
|
| 185 |
SELECT PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY "mean_amphibian_richness") AS mean_amphibian_richness_95_percentile
|
|
@@ -187,8 +187,8 @@ example_assistant: {{"sql_query":
|
|
| 187 |
)
|
| 188 |
SELECT "sub_id", "geom", "name", "acres","mean_amphibian_richness"
|
| 189 |
FROM mydata
|
| 190 |
-
WHERE "
|
| 191 |
-
|
| 192 |
|
| 193 |
example_user: "Show nonconserved areas in climate zone 2"
|
| 194 |
example_assistant: {{"sql_query":
|
|
|
|
| 179 |
"pct_top_plant_richness" > 0.50
|
| 180 |
);
|
| 181 |
|
| 182 |
+
example_user: "Show me easements that are in the top 5% of mean amphibian richness"
|
| 183 |
example_assistant: {{"sql_query":
|
| 184 |
WITH percentile AS (
|
| 185 |
SELECT PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY "mean_amphibian_richness") AS mean_amphibian_richness_95_percentile
|
|
|
|
| 187 |
)
|
| 188 |
SELECT "sub_id", "geom", "name", "acres","mean_amphibian_richness"
|
| 189 |
FROM mydata
|
| 190 |
+
WHERE "land_tenure" = 'Easement'
|
| 191 |
+
AND "mean_amphibian_richness" > (SELECT mean_amphibian_richness_95_percentile FROM percentile);
|
| 192 |
|
| 193 |
example_user: "Show nonconserved areas in climate zone 2"
|
| 194 |
example_assistant: {{"sql_query":
|
app/utils.py
CHANGED
|
@@ -184,7 +184,7 @@ def get_summary_table_sql(ca, column, colors, ids, feature_col = None):
|
|
| 184 |
"""
|
| 185 |
Generates a summary table using specific IDs as filters.
|
| 186 |
"""
|
| 187 |
-
combined_filter = _.
|
| 188 |
df_network = get_summary(ca, combined_filter, [column], column, feature_col, colors)
|
| 189 |
df_feature = get_summary(ca, combined_filter, [column], column, feature_col, colors, feature = True)
|
| 190 |
return df_network, df_feature
|
|
@@ -206,7 +206,7 @@ def get_pmtiles_style(paint, pmtiles_file, low_res, filter_cols=None, filter_val
|
|
| 206 |
Generates a MapLibre GL style for PMTiles with either filters or a list of IDs.
|
| 207 |
"""
|
| 208 |
if ids:
|
| 209 |
-
filter_expr = ["in", ["get", "
|
| 210 |
else:
|
| 211 |
# we don't want to overwhelm streamlit so if they didn't filter anything, don't provide filter arg
|
| 212 |
filter_length = sum([len(x) for x in filter_vals])
|
|
|
|
| 184 |
"""
|
| 185 |
Generates a summary table using specific IDs as filters.
|
| 186 |
"""
|
| 187 |
+
combined_filter = _.id.isin(ids)
|
| 188 |
df_network = get_summary(ca, combined_filter, [column], column, feature_col, colors)
|
| 189 |
df_feature = get_summary(ca, combined_filter, [column], column, feature_col, colors, feature = True)
|
| 190 |
return df_network, df_feature
|
|
|
|
| 206 |
Generates a MapLibre GL style for PMTiles with either filters or a list of IDs.
|
| 207 |
"""
|
| 208 |
if ids:
|
| 209 |
+
filter_expr = ["in", ["get", "id"], ["literal", ids]]
|
| 210 |
else:
|
| 211 |
# we don't want to overwhelm streamlit so if they didn't filter anything, don't provide filter arg
|
| 212 |
filter_length = sum([len(x) for x in filter_vals])
|
app/variables.py
CHANGED
|
@@ -697,8 +697,6 @@ llm_options = {
|
|
| 697 |
"mistral-small-3.2-24b-instruct": ChatOpenAI(model = "mistralai/mistral-small-3.2-24b-instruct:free", api_key=openrouter_api, base_url = "https://openrouter.ai/api/v1", temperature=0),
|
| 698 |
"hunyuan-a13b-instruct": ChatOpenAI(model = "tencent/hunyuan-a13b-instruct:free", api_key=openrouter_api, base_url = "https://openrouter.ai/api/v1", temperature=0),
|
| 699 |
"deepseek-r1t2-chimera": ChatOpenAI(model = "tngtech/deepseek-r1t2-chimera:free", api_key=openrouter_api, base_url = "https://openrouter.ai/api/v1", temperature=0),
|
| 700 |
-
"deepseek-r1-0528-qwen3-8b": ChatOpenAI(model = "deepseek/deepseek-r1-0528-qwen3-8b:free", api_key=openrouter_api, base_url = "https://openrouter.ai/api/v1", temperature=0),
|
| 701 |
-
"sarvam-m": ChatOpenAI(model = "sarvamai/sarvam-m:free", api_key=openrouter_api, base_url = "https://openrouter.ai/api/v1", temperature=0),
|
| 702 |
"devstral-small-2505": ChatOpenAI(model = "mistralai/devstral-small-2505:free", api_key=openrouter_api, base_url = "https://openrouter.ai/api/v1", temperature=0),
|
| 703 |
"deepseek-chat-v3-0324": ChatOpenAI(model = "deepseek/deepseek-chat-v3-0324:free", api_key=openrouter_api, base_url = "https://openrouter.ai/api/v1", temperature=0),
|
| 704 |
"gpt-oss-20b": ChatOpenAI(model = "openai/gpt-oss-20b:free", api_key=openrouter_api, base_url = "https://openrouter.ai/api/v1", temperature=0),
|
|
|
|
| 697 |
"mistral-small-3.2-24b-instruct": ChatOpenAI(model = "mistralai/mistral-small-3.2-24b-instruct:free", api_key=openrouter_api, base_url = "https://openrouter.ai/api/v1", temperature=0),
|
| 698 |
"hunyuan-a13b-instruct": ChatOpenAI(model = "tencent/hunyuan-a13b-instruct:free", api_key=openrouter_api, base_url = "https://openrouter.ai/api/v1", temperature=0),
|
| 699 |
"deepseek-r1t2-chimera": ChatOpenAI(model = "tngtech/deepseek-r1t2-chimera:free", api_key=openrouter_api, base_url = "https://openrouter.ai/api/v1", temperature=0),
|
|
|
|
|
|
|
| 700 |
"devstral-small-2505": ChatOpenAI(model = "mistralai/devstral-small-2505:free", api_key=openrouter_api, base_url = "https://openrouter.ai/api/v1", temperature=0),
|
| 701 |
"deepseek-chat-v3-0324": ChatOpenAI(model = "deepseek/deepseek-chat-v3-0324:free", api_key=openrouter_api, base_url = "https://openrouter.ai/api/v1", temperature=0),
|
| 702 |
"gpt-oss-20b": ChatOpenAI(model = "openai/gpt-oss-20b:free", api_key=openrouter_api, base_url = "https://openrouter.ai/api/v1", temperature=0),
|
preprocess/generate_pmtiles.ipynb
CHANGED
|
@@ -39,11 +39,12 @@
|
|
| 39 |
"outputs": [],
|
| 40 |
"source": [
|
| 41 |
"%%time\n",
|
| 42 |
-
"#Wall time:
|
| 43 |
"parquet_file = f's3://public-ca30x30/ca30x30_cbn_v3.parquet'\n",
|
| 44 |
"geobuf_file = 'ca30x30_cbn_v3.fgb'\n",
|
| 45 |
"ca_geo = con.read_parquet(parquet_file).mutate(acres = _.acres.round(4))\n",
|
| 46 |
-
"ca_geo.select(~s.startswith([\"mean_\", \"pct_\"])).execute().set_crs('epsg:4326').to_file(geobuf_file)"
|
|
|
|
| 47 |
]
|
| 48 |
},
|
| 49 |
{
|
|
@@ -54,7 +55,7 @@
|
|
| 54 |
"outputs": [],
|
| 55 |
"source": [
|
| 56 |
"%%time \n",
|
| 57 |
-
"#Wall time:
|
| 58 |
"pmtiles_file = 'ca30x30_cbn_v3.pmtiles'\n",
|
| 59 |
"pmtiles = f's3://public-ca30x30/{pmtiles_file}'\n",
|
| 60 |
"source_layer_name = re.sub(r'\\W+', '', os.path.splitext(os.path.basename(pmtiles_file))[0])\n",
|
|
@@ -68,7 +69,7 @@
|
|
| 68 |
" '--no-tiny-polygon-reduction',\n",
|
| 69 |
" '--no-simplification-of-shared-nodes'\n",
|
| 70 |
" ]\n",
|
| 71 |
-
"new_pmtiles = to_pmtiles(
|
| 72 |
"s3_cp(new_pmtiles,pmtiles)"
|
| 73 |
]
|
| 74 |
},
|
|
|
|
| 39 |
"outputs": [],
|
| 40 |
"source": [
|
| 41 |
"%%time\n",
|
| 42 |
+
"#Wall time: 48.9 s\n",
|
| 43 |
"parquet_file = f's3://public-ca30x30/ca30x30_cbn_v3.parquet'\n",
|
| 44 |
"geobuf_file = 'ca30x30_cbn_v3.fgb'\n",
|
| 45 |
"ca_geo = con.read_parquet(parquet_file).mutate(acres = _.acres.round(4))\n",
|
| 46 |
+
"ca_geo.select(~s.startswith([\"mean_\", \"pct_\"])).execute().set_crs('epsg:4326').to_file(geobuf_file)\n",
|
| 47 |
+
"s3_cp(geobuf_file,f's3://public-ca30x30/{geobuf_file}')"
|
| 48 |
]
|
| 49 |
},
|
| 50 |
{
|
|
|
|
| 55 |
"outputs": [],
|
| 56 |
"source": [
|
| 57 |
"%%time \n",
|
| 58 |
+
"#Wall time: 19min 11s\n",
|
| 59 |
"pmtiles_file = 'ca30x30_cbn_v3.pmtiles'\n",
|
| 60 |
"pmtiles = f's3://public-ca30x30/{pmtiles_file}'\n",
|
| 61 |
"source_layer_name = re.sub(r'\\W+', '', os.path.splitext(os.path.basename(pmtiles_file))[0])\n",
|
|
|
|
| 69 |
" '--no-tiny-polygon-reduction',\n",
|
| 70 |
" '--no-simplification-of-shared-nodes'\n",
|
| 71 |
" ]\n",
|
| 72 |
+
"new_pmtiles = to_pmtiles(geobuf_file, pmtiles_file, options = options )\n",
|
| 73 |
"s3_cp(new_pmtiles,pmtiles)"
|
| 74 |
]
|
| 75 |
},
|