Spaces:
Running
Running
Commit
·
2db4717
1
Parent(s):
e5599e8
added newly protected areas!
Browse files- app/utils.py +2 -3
- app/variables.py +59 -33
- preprocess/CBN-data.ipynb +92 -16
- preprocess/h3_utils.py +25 -9
app/utils.py
CHANGED
|
@@ -91,6 +91,7 @@ def get_summary(ca, combined_filter, column, main_group, colors = None):
|
|
| 91 |
"percent_CA": (_.acres.sum() / ca_area_acres),
|
| 92 |
"acres": _.acres.sum(),
|
| 93 |
}
|
|
|
|
| 94 |
# add percent + acres aggregates
|
| 95 |
dynamic_aggs = {}
|
| 96 |
for key in keys:
|
|
@@ -107,7 +108,7 @@ def get_summary(ca, combined_filter, column, main_group, colors = None):
|
|
| 107 |
.aggregate(**all_aggs)
|
| 108 |
.mutate(percent_CA=_.percent_CA.round(5), acres=_.acres.round(0))
|
| 109 |
)
|
| 110 |
-
|
| 111 |
# Compute total acres by group and percent of group
|
| 112 |
group_totals = (ca.filter(combined_filter)
|
| 113 |
.group_by(main_group)
|
|
@@ -177,7 +178,6 @@ def get_pmtiles_style(paint, alpha, filter_cols, filter_vals):
|
|
| 177 |
|
| 178 |
if "non-conserved" in chain.from_iterable(filter_vals):
|
| 179 |
combined_filters = ["any", combined_filters, ["match", ["get", "status"], ["non-conserved"], True, False]]
|
| 180 |
-
source_layer_name = re.sub(r'\W+', '', os.path.splitext(os.path.basename(ca_pmtiles))[0]) #stripping hyphens to get layer name
|
| 181 |
return {
|
| 182 |
"version": 8,
|
| 183 |
"sources": {"ca": {"type": "vector", "url": f"pmtiles://{ca_pmtiles}"}},
|
|
@@ -198,7 +198,6 @@ def get_pmtiles_style_llm(paint, ids):
|
|
| 198 |
"""
|
| 199 |
Generates a MapLibre GL style for PMTiles using specific IDs as filters.
|
| 200 |
"""
|
| 201 |
-
source_layer_name = re.sub(r'\W+', '', os.path.splitext(os.path.basename(ca_pmtiles))[0]) #stripping hyphens to get layer name
|
| 202 |
return {
|
| 203 |
"version": 8,
|
| 204 |
"sources": {"ca": {"type": "vector", "url": f"pmtiles://{ca_pmtiles}"}},
|
|
|
|
| 91 |
"percent_CA": (_.acres.sum() / ca_area_acres),
|
| 92 |
"acres": _.acres.sum(),
|
| 93 |
}
|
| 94 |
+
|
| 95 |
# add percent + acres aggregates
|
| 96 |
dynamic_aggs = {}
|
| 97 |
for key in keys:
|
|
|
|
| 108 |
.aggregate(**all_aggs)
|
| 109 |
.mutate(percent_CA=_.percent_CA.round(5), acres=_.acres.round(0))
|
| 110 |
)
|
| 111 |
+
|
| 112 |
# Compute total acres by group and percent of group
|
| 113 |
group_totals = (ca.filter(combined_filter)
|
| 114 |
.group_by(main_group)
|
|
|
|
| 178 |
|
| 179 |
if "non-conserved" in chain.from_iterable(filter_vals):
|
| 180 |
combined_filters = ["any", combined_filters, ["match", ["get", "status"], ["non-conserved"], True, False]]
|
|
|
|
| 181 |
return {
|
| 182 |
"version": 8,
|
| 183 |
"sources": {"ca": {"type": "vector", "url": f"pmtiles://{ca_pmtiles}"}},
|
|
|
|
| 198 |
"""
|
| 199 |
Generates a MapLibre GL style for PMTiles using specific IDs as filters.
|
| 200 |
"""
|
|
|
|
| 201 |
return {
|
| 202 |
"version": 8,
|
| 203 |
"sources": {"ca": {"type": "vector", "url": f"pmtiles://{ca_pmtiles}"}},
|
app/variables.py
CHANGED
|
@@ -1,13 +1,7 @@
|
|
| 1 |
# urls for main layer
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
ca_parquet = 'https://minio.carlboettiger.info/public-ca30x30/ca-30x30-cbn.parquet'
|
| 6 |
-
ca_pmtiles = 'https://minio.carlboettiger.info/public-ca30x30/ca-30x30-cbn.pmtiles'
|
| 7 |
-
|
| 8 |
-
# ca_parquet = 'https://minio.carlboettiger.info/public-ca30x30/hex/zoom8/ca-30x30-cbn-newlyprotected.parquet'
|
| 9 |
-
# ca_pmtiles = 'https://minio.carlboettiger.info/public-ca30x30/ca-30x30-cbn-newlyprotected.pmtiles'
|
| 10 |
-
|
| 11 |
# computed by taking the sum of all the acres in this file:
|
| 12 |
# https://minio.carlboettiger.info/public-ca30x30/CBN-data/Progress_data_new_protection/Land_Status_Zone_Ecoregion_Counties/all_regions_reGAP_county_eco.parquet
|
| 13 |
ca_area_acres = 101523750.68856516
|
|
@@ -29,7 +23,10 @@ def get_url(folder, file, base_folder = 'CBN'):
|
|
| 29 |
path = os.path.join(bucket,base_folder,folder,file)
|
| 30 |
url = minio+path
|
| 31 |
return url
|
| 32 |
-
|
|
|
|
|
|
|
|
|
|
| 33 |
#vector data
|
| 34 |
url_ACE_rarerank_statewide = get_url('ACE_biodiversity/ACE_rarerank_statewide','ACE_rarerank_statewide.pmtiles')
|
| 35 |
url_ACE_rarerank_ecoregion = get_url('ACE_biodiversity/ACE_rarerank_ecoregion','ACE_rarerank_ecoregion.pmtiles')
|
|
@@ -66,6 +63,7 @@ url_resilient_conn_network = get_url('Connectivity_resilience/Resilient_connecte
|
|
| 66 |
|
| 67 |
# column names for all data layers
|
| 68 |
keys = [
|
|
|
|
| 69 |
"ACE_amphibian_richness", "ACE_reptile_richness", "ACE_bird_richness",
|
| 70 |
"ACE_mammal_richness", "ACE_rare_amphibian_richness", "ACE_rare_reptile_richness",
|
| 71 |
"ACE_rare_bird_richness", "ACE_rare_mammal_richness", "ACE_endemic_amphibian_richness",
|
|
@@ -78,43 +76,48 @@ chatbot_toggles = {key: False for key in keys}
|
|
| 78 |
# data layers dict
|
| 79 |
layer_config = [
|
| 80 |
#[(section, 'a_amph', [(col_name, full name, key, chatbot toggle key)])]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
('🐸 Amphibian', 'a_amph', [
|
| 82 |
-
('amphibian_richness', 'Amphibian Richness', keys[
|
| 83 |
-
('rare_amphibian_richness', 'Rare Amphibian Richness', keys[
|
| 84 |
-
('endemic_amphibian_richness', 'Endemic Amphibian Richness', keys[
|
| 85 |
]),
|
| 86 |
('🐍 Reptile', 'a_rept', [
|
| 87 |
-
('reptile_richness', 'Reptile Richness', keys[
|
| 88 |
-
('rare_reptile_richness', 'Rare Reptile Richness', keys[
|
| 89 |
-
('endemic_reptile_richness', 'Endemic Reptile Richness', keys[
|
| 90 |
]),
|
| 91 |
('🦜 Bird', 'a_bird', [
|
| 92 |
-
('bird_richness', 'Bird Richness', keys[
|
| 93 |
-
('rare_bird_richness', 'Rare Bird Richness', keys[
|
| 94 |
-
('endemic_bird_richness', 'Endemic Bird Richness', keys[
|
| 95 |
]),
|
| 96 |
('🦌 Mammal', 'a_mammal', [
|
| 97 |
-
('mammal_richness', 'Mammal Richness', keys[
|
| 98 |
-
('rare_mammal_richness', 'Rare Mammal Richness', keys[
|
| 99 |
-
('endemic_mammal_richness', 'Endemic Mammal Richness', keys[
|
| 100 |
]),
|
| 101 |
('🌿 Plant', 'a_plant', [
|
| 102 |
-
('plant_richness', 'Plant Richness', keys[
|
| 103 |
-
('rarityweighted_endemic_plant_richness', 'Rarity-Weighted\nEndemic Plant Richness', keys[
|
| 104 |
]),
|
| 105 |
('💧 Freshwater Resources', 'freshwater', [
|
| 106 |
-
('wetlands', 'Wetlands', keys[
|
| 107 |
]),
|
| 108 |
('🚜 Agriculture', 'agriculture', [
|
| 109 |
-
('farmland', 'Farmland', keys[
|
| 110 |
-
('grazing', 'Lands Suitable for Grazing', keys[
|
| 111 |
]),
|
| 112 |
('👤 People', 'SVI', [
|
| 113 |
-
('DAC', 'Disadvantaged Communities', keys[
|
| 114 |
-
('low_income', 'Low-Income Communities', keys[
|
| 115 |
]),
|
| 116 |
('🔥 Climate Risks', 'calfire', [
|
| 117 |
-
('fire', 'Historical Fire Perimeters', keys[
|
| 118 |
])
|
| 119 |
]
|
| 120 |
|
|
@@ -135,8 +138,8 @@ county_color = "#DE3163" # magenta
|
|
| 135 |
city_color = "#ADD8E6" #light blue
|
| 136 |
hoa_color = "#A89BBC" # purple
|
| 137 |
nonprofit_color = "#D77031" #orange
|
| 138 |
-
|
| 139 |
-
|
| 140 |
white = "#FFFFFF"
|
| 141 |
|
| 142 |
|
|
@@ -406,6 +409,30 @@ networks = {
|
|
| 406 |
'default': white
|
| 407 |
}
|
| 408 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 409 |
style_options = {
|
| 410 |
"30x30 Status": status,
|
| 411 |
"GAP Code": gap,
|
|
@@ -419,7 +446,6 @@ style_options = {
|
|
| 419 |
"Access Type": access,
|
| 420 |
}
|
| 421 |
|
| 422 |
-
print(style_options)
|
| 423 |
select_column = {
|
| 424 |
"30x30 Status": "status",
|
| 425 |
"GAP Code": "gap_code",
|
|
|
|
| 1 |
# urls for main layer
|
| 2 |
+
ca_parquet = 'https://minio.carlboettiger.info/public-ca30x30/ca30x30cbn_newlyprotected.parquet'
|
| 3 |
+
ca_pmtiles = 'https://minio.carlboettiger.info/public-ca30x30/ca30x30cbn_newlyprotected.pmtiles'
|
| 4 |
+
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
# computed by taking the sum of all the acres in this file:
|
| 6 |
# https://minio.carlboettiger.info/public-ca30x30/CBN-data/Progress_data_new_protection/Land_Status_Zone_Ecoregion_Counties/all_regions_reGAP_county_eco.parquet
|
| 7 |
ca_area_acres = 101523750.68856516
|
|
|
|
| 23 |
path = os.path.join(bucket,base_folder,folder,file)
|
| 24 |
url = minio+path
|
| 25 |
return url
|
| 26 |
+
|
| 27 |
+
import re
|
| 28 |
+
source_layer_name = re.sub(r'\W+', '', os.path.splitext(os.path.basename(ca_pmtiles))[0]) #stripping hyphens to get layer name
|
| 29 |
+
|
| 30 |
#vector data
|
| 31 |
url_ACE_rarerank_statewide = get_url('ACE_biodiversity/ACE_rarerank_statewide','ACE_rarerank_statewide.pmtiles')
|
| 32 |
url_ACE_rarerank_ecoregion = get_url('ACE_biodiversity/ACE_rarerank_ecoregion','ACE_rarerank_ecoregion.pmtiles')
|
|
|
|
| 63 |
|
| 64 |
# column names for all data layers
|
| 65 |
keys = [
|
| 66 |
+
"update_newly_protected", "update_increased_management", "update_data_improvement",
|
| 67 |
"ACE_amphibian_richness", "ACE_reptile_richness", "ACE_bird_richness",
|
| 68 |
"ACE_mammal_richness", "ACE_rare_amphibian_richness", "ACE_rare_reptile_richness",
|
| 69 |
"ACE_rare_bird_richness", "ACE_rare_mammal_richness", "ACE_endemic_amphibian_richness",
|
|
|
|
| 76 |
# data layers dict
|
| 77 |
layer_config = [
|
| 78 |
#[(section, 'a_amph', [(col_name, full name, key, chatbot toggle key)])]
|
| 79 |
+
('📈 Data Updates', 'a_new', [
|
| 80 |
+
('update_newly_protected', 'Newly Protected', keys[0], chatbot_toggles[keys[0]]),
|
| 81 |
+
('update_increased_management', 'Increased Management', keys[1], chatbot_toggles[keys[1]]),
|
| 82 |
+
('update_data_improvement', 'Data Improvement', keys[2], chatbot_toggles[keys[2]]),
|
| 83 |
+
]),
|
| 84 |
('🐸 Amphibian', 'a_amph', [
|
| 85 |
+
('amphibian_richness', 'Amphibian Richness', keys[3], chatbot_toggles[keys[3]]),
|
| 86 |
+
('rare_amphibian_richness', 'Rare Amphibian Richness', keys[4], chatbot_toggles[keys[4]]),
|
| 87 |
+
('endemic_amphibian_richness', 'Endemic Amphibian Richness', keys[5], chatbot_toggles[keys[5]]),
|
| 88 |
]),
|
| 89 |
('🐍 Reptile', 'a_rept', [
|
| 90 |
+
('reptile_richness', 'Reptile Richness', keys[6], chatbot_toggles[keys[6]]),
|
| 91 |
+
('rare_reptile_richness', 'Rare Reptile Richness', keys[7], chatbot_toggles[keys[7]]),
|
| 92 |
+
('endemic_reptile_richness', 'Endemic Reptile Richness', keys[8], chatbot_toggles[keys[8]]),
|
| 93 |
]),
|
| 94 |
('🦜 Bird', 'a_bird', [
|
| 95 |
+
('bird_richness', 'Bird Richness', keys[9], chatbot_toggles[keys[9]]),
|
| 96 |
+
('rare_bird_richness', 'Rare Bird Richness', keys[10], chatbot_toggles[keys[10]]),
|
| 97 |
+
('endemic_bird_richness', 'Endemic Bird Richness', keys[11], chatbot_toggles[keys[11]]),
|
| 98 |
]),
|
| 99 |
('🦌 Mammal', 'a_mammal', [
|
| 100 |
+
('mammal_richness', 'Mammal Richness', keys[12], chatbot_toggles[keys[12]]),
|
| 101 |
+
('rare_mammal_richness', 'Rare Mammal Richness', keys[13], chatbot_toggles[keys[13]]),
|
| 102 |
+
('endemic_mammal_richness', 'Endemic Mammal Richness', keys[14], chatbot_toggles[keys[14]]),
|
| 103 |
]),
|
| 104 |
('🌿 Plant', 'a_plant', [
|
| 105 |
+
('plant_richness', 'Plant Richness', keys[15], chatbot_toggles[keys[15]]),
|
| 106 |
+
('rarityweighted_endemic_plant_richness', 'Rarity-Weighted\nEndemic Plant Richness', keys[16], chatbot_toggles[keys[16]]),
|
| 107 |
]),
|
| 108 |
('💧 Freshwater Resources', 'freshwater', [
|
| 109 |
+
('wetlands', 'Wetlands', keys[17], chatbot_toggles[keys[17]]),
|
| 110 |
]),
|
| 111 |
('🚜 Agriculture', 'agriculture', [
|
| 112 |
+
('farmland', 'Farmland', keys[18], chatbot_toggles[keys[18]]),
|
| 113 |
+
('grazing', 'Lands Suitable for Grazing', keys[19], chatbot_toggles[keys[19]]),
|
| 114 |
]),
|
| 115 |
('👤 People', 'SVI', [
|
| 116 |
+
('DAC', 'Disadvantaged Communities', keys[20], chatbot_toggles[keys[20]]),
|
| 117 |
+
('low_income', 'Low-Income Communities', keys[21], chatbot_toggles[keys[21]]),
|
| 118 |
]),
|
| 119 |
('🔥 Climate Risks', 'calfire', [
|
| 120 |
+
('fire', 'Historical Fire Perimeters', keys[22], chatbot_toggles[keys[22]]),
|
| 121 |
])
|
| 122 |
]
|
| 123 |
|
|
|
|
| 138 |
city_color = "#ADD8E6" #light blue
|
| 139 |
hoa_color = "#A89BBC" # purple
|
| 140 |
nonprofit_color = "#D77031" #orange
|
| 141 |
+
purple = "#00008B" #purple
|
| 142 |
+
cyan = "#1bc7c3" #cyan
|
| 143 |
white = "#FFFFFF"
|
| 144 |
|
| 145 |
|
|
|
|
| 409 |
'default': white
|
| 410 |
}
|
| 411 |
|
| 412 |
+
update_type_style = {
|
| 413 |
+
"version": 8,
|
| 414 |
+
"sources": {"ca": {"type": "vector", "url": f"pmtiles://{ca_pmtiles}"}},
|
| 415 |
+
"layers": [
|
| 416 |
+
{
|
| 417 |
+
"id": "ca30x30",
|
| 418 |
+
"source": "ca",
|
| 419 |
+
"source-layer": source_layer_name,
|
| 420 |
+
"type": "fill",
|
| 421 |
+
"paint": {
|
| 422 |
+
"fill-color": [
|
| 423 |
+
"interpolate", ["linear"], ["get", "update_newly_protected"],
|
| 424 |
+
0, white,
|
| 425 |
+
1, purple
|
| 426 |
+
]
|
| 427 |
+
}
|
| 428 |
+
}
|
| 429 |
+
]
|
| 430 |
+
}
|
| 431 |
+
|
| 432 |
+
|
| 433 |
+
|
| 434 |
+
|
| 435 |
+
|
| 436 |
style_options = {
|
| 437 |
"30x30 Status": status,
|
| 438 |
"GAP Code": gap,
|
|
|
|
| 446 |
"Access Type": access,
|
| 447 |
}
|
| 448 |
|
|
|
|
| 449 |
select_column = {
|
| 450 |
"30x30 Status": "status",
|
| 451 |
"GAP Code": "gap_code",
|
preprocess/CBN-data.ipynb
CHANGED
|
@@ -22,7 +22,6 @@
|
|
| 22 |
"\n",
|
| 23 |
"import os\n",
|
| 24 |
"os.chdir('../data/')\n",
|
| 25 |
-
"\n",
|
| 26 |
"duckdb_install_h3()"
|
| 27 |
]
|
| 28 |
},
|
|
@@ -753,7 +752,8 @@
|
|
| 753 |
" 'CA_Marine_','Release_Ye','ORIG_FID',\n",
|
| 754 |
" 'updatetype']\n",
|
| 755 |
"\n",
|
| 756 |
-
"convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\",
|
|
|
|
| 757 |
]
|
| 758 |
},
|
| 759 |
{
|
|
@@ -959,16 +959,18 @@
|
|
| 959 |
"# convert_pmtiles(con, s3, folder = folder, file = f\"{name}.parquet\")\n",
|
| 960 |
"# process_vector(s3, folder = folder, file = f\"{name}.shp\", crs=\"EPSG:4326\")\n",
|
| 961 |
"\n",
|
| 962 |
-
"convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols= cols,
|
|
|
|
| 963 |
"\n"
|
| 964 |
]
|
| 965 |
},
|
| 966 |
{
|
| 967 |
"cell_type": "markdown",
|
| 968 |
-
"id": "
|
| 969 |
"metadata": {},
|
| 970 |
"source": [
|
| 971 |
-
"#### join with newly protected
|
|
|
|
| 972 |
]
|
| 973 |
},
|
| 974 |
{
|
|
@@ -980,31 +982,105 @@
|
|
| 980 |
"source": [
|
| 981 |
"con = ibis.duckdb.connect('joined',extensions = [\"spatial\", \"h3\"])\n",
|
| 982 |
"set_secrets(con)\n",
|
|
|
|
| 983 |
"\n",
|
| 984 |
-
"ca_nature_url = \"s3://public-ca30x30/hex/
|
| 985 |
-
"new_lands_url = \"s3://public-ca30x30/CBN/Progress_data_new_protection/Newly_counted_lands/hex/
|
| 986 |
"\n",
|
| 987 |
"ca_nature = (con.read_parquet(ca_nature_url)\n",
|
| 988 |
-
" .mutate(update_type =
|
|
|
|
| 989 |
" )\n",
|
| 990 |
"\n",
|
| 991 |
"new = (con.read_parquet(new_lands_url)\n",
|
| 992 |
" .mutate(update_type = 'updatetype')\n",
|
| 993 |
-
" .select(\"update_type\",\"
|
| 994 |
" )\n",
|
| 995 |
"\n",
|
| 996 |
-
"joined = (ca_nature.left_join(new,\"
|
| 997 |
-
" .drop('
|
| 998 |
" .rename(update_type = 'update_type_right')\n",
|
| 999 |
" )\n",
|
| 1000 |
"\n",
|
| 1001 |
-
"name = '
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1002 |
"\n",
|
| 1003 |
-
"
|
| 1004 |
-
"
|
|
|
|
| 1005 |
"\n",
|
| 1006 |
-
"#
|
| 1007 |
-
"
|
| 1008 |
]
|
| 1009 |
}
|
| 1010 |
],
|
|
|
|
| 22 |
"\n",
|
| 23 |
"import os\n",
|
| 24 |
"os.chdir('../data/')\n",
|
|
|
|
| 25 |
"duckdb_install_h3()"
|
| 26 |
]
|
| 27 |
},
|
|
|
|
| 752 |
" 'CA_Marine_','Release_Ye','ORIG_FID',\n",
|
| 753 |
" 'updatetype']\n",
|
| 754 |
"\n",
|
| 755 |
+
"convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", \n",
|
| 756 |
+
" cols = cols, zoom = 12)"
|
| 757 |
]
|
| 758 |
},
|
| 759 |
{
|
|
|
|
| 959 |
"# convert_pmtiles(con, s3, folder = folder, file = f\"{name}.parquet\")\n",
|
| 960 |
"# process_vector(s3, folder = folder, file = f\"{name}.shp\", crs=\"EPSG:4326\")\n",
|
| 961 |
"\n",
|
| 962 |
+
"convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols= cols, \n",
|
| 963 |
+
" group = 'ecoregion', zoom = 12)\n",
|
| 964 |
"\n"
|
| 965 |
]
|
| 966 |
},
|
| 967 |
{
|
| 968 |
"cell_type": "markdown",
|
| 969 |
+
"id": "e6b708e4-671a-4ffd-9102-6aebb21fcf84",
|
| 970 |
"metadata": {},
|
| 971 |
"source": [
|
| 972 |
+
"#### join with newly protected data\n",
|
| 973 |
+
"- Needed to get zoom 12 hexes to join newly protected data with CA Nature data\n"
|
| 974 |
]
|
| 975 |
},
|
| 976 |
{
|
|
|
|
| 982 |
"source": [
|
| 983 |
"con = ibis.duckdb.connect('joined',extensions = [\"spatial\", \"h3\"])\n",
|
| 984 |
"set_secrets(con)\n",
|
| 985 |
+
"zoom = 12\n",
|
| 986 |
"\n",
|
| 987 |
+
"ca_nature_url = f\"s3://public-ca30x30/hex/zoom{zoom}/ca-30x30-cbn.parquet\"\n",
|
| 988 |
+
"new_lands_url = f\"s3://public-ca30x30/CBN/Progress_data_new_protection/Newly_counted_lands/hex/zoom{zoom}/newly_counted_lands_2024.parquet\"\n",
|
| 989 |
"\n",
|
| 990 |
"ca_nature = (con.read_parquet(ca_nature_url)\n",
|
| 991 |
+
" .mutate(update_type = ibis.literal('d - no update')\n",
|
| 992 |
+
" )\n",
|
| 993 |
" )\n",
|
| 994 |
"\n",
|
| 995 |
"new = (con.read_parquet(new_lands_url)\n",
|
| 996 |
" .mutate(update_type = 'updatetype')\n",
|
| 997 |
+
" .select(\"update_type\",\"h12\")\n",
|
| 998 |
" )\n",
|
| 999 |
"\n",
|
| 1000 |
+
"joined = (ca_nature.left_join(new,\"h12\")\n",
|
| 1001 |
+
" .drop('h12_right','update_type')\n",
|
| 1002 |
" .rename(update_type = 'update_type_right')\n",
|
| 1003 |
" )\n",
|
| 1004 |
"\n",
|
| 1005 |
+
"name = 'ca30x30cbn_newlyprotected_'\n",
|
| 1006 |
+
"# joined.to_parquet(f\"s3://public-ca30x30/hex/zoom{zoom}/{name}.parquet\")\n"
|
| 1007 |
+
]
|
| 1008 |
+
},
|
| 1009 |
+
{
|
| 1010 |
+
"cell_type": "markdown",
|
| 1011 |
+
"id": "3a0accf9-500d-4430-ba45-8d0a4ad2e43e",
|
| 1012 |
+
"metadata": {},
|
| 1013 |
+
"source": [
|
| 1014 |
+
"Once joined, we can group by \"ids\" again and lose the hexes"
|
| 1015 |
+
]
|
| 1016 |
+
},
|
| 1017 |
+
{
|
| 1018 |
+
"cell_type": "code",
|
| 1019 |
+
"execution_count": null,
|
| 1020 |
+
"id": "6832e4bc-a359-4674-bad3-13052566176d",
|
| 1021 |
+
"metadata": {},
|
| 1022 |
+
"outputs": [],
|
| 1023 |
+
"source": [
|
| 1024 |
+
"original_geoms = con.read_parquet(f\"s3://public-ca30x30/ca-30x30-cbn.parquet\")\n",
|
| 1025 |
+
"\n",
|
| 1026 |
+
"new = (\n",
|
| 1027 |
+
" con.read_parquet(f\"s3://public-ca30x30/hex/zoom12/ca30x30cbn_newlyprotected_*\")\n",
|
| 1028 |
+
" .drop('acres')\n",
|
| 1029 |
+
" .mutate(update_type = _.update_type.substitute(\n",
|
| 1030 |
+
" {'a - newly protected':'update_newly_protected',\n",
|
| 1031 |
+
" 'b - increased management':'update_increased_management',\n",
|
| 1032 |
+
" 'c - data improvement':'update_data_improvement'\n",
|
| 1033 |
+
" }))\n",
|
| 1034 |
+
" .mutate(update_type = _.update_type.fill_null('update_none')\n",
|
| 1035 |
+
" )\n",
|
| 1036 |
+
")\n",
|
| 1037 |
+
"\n",
|
| 1038 |
+
"# aggregate data\n",
|
| 1039 |
+
"pivot = (\n",
|
| 1040 |
+
" new.pivot_wider(id_cols='id', names_from='update_type', values_from='id', values_agg='count')\n",
|
| 1041 |
+
" .mutate(total = _.update_newly_protected+ _.update_increased_management+ _.update_data_improvement + _.update_none)\n",
|
| 1042 |
+
" .mutate(update_newly_protected = (_.update_newly_protected/_.total).round(4),\n",
|
| 1043 |
+
" update_increased_management = (_.update_increased_management/_.total).round(4),\n",
|
| 1044 |
+
" update_data_improvement =( _.update_data_improvement/_.total).round(4),\n",
|
| 1045 |
+
" update_none = (_.update_none/_.total).round(4)\n",
|
| 1046 |
+
" )\n",
|
| 1047 |
+
" .drop(_.total)\n",
|
| 1048 |
+
" .left_join(original_geoms,'id')\n",
|
| 1049 |
+
" .drop('id_right')\n",
|
| 1050 |
+
")\n",
|
| 1051 |
+
"\n",
|
| 1052 |
+
"pivot.to_parquet(f\"s3://public-ca30x30/ca30x30cbn_newlyprotected.parquet\")"
|
| 1053 |
+
]
|
| 1054 |
+
},
|
| 1055 |
+
{
|
| 1056 |
+
"cell_type": "markdown",
|
| 1057 |
+
"id": "e348fc27-bb5e-4fdc-b694-9b499ba71d9d",
|
| 1058 |
+
"metadata": {},
|
| 1059 |
+
"source": [
|
| 1060 |
+
"Making PMTiles"
|
| 1061 |
+
]
|
| 1062 |
+
},
|
| 1063 |
+
{
|
| 1064 |
+
"cell_type": "code",
|
| 1065 |
+
"execution_count": null,
|
| 1066 |
+
"id": "9b82a499-8431-4e64-ab97-5062e0f98969",
|
| 1067 |
+
"metadata": {},
|
| 1068 |
+
"outputs": [],
|
| 1069 |
+
"source": [
|
| 1070 |
+
"# upload parquet to minio \n",
|
| 1071 |
+
"ca_geojson = \"ca30x30cbn_newlyprotected.geojson\"\n",
|
| 1072 |
+
"ca_pmtiles = \"ca30x30cbn_newlyprotected.pmtiles\"\n",
|
| 1073 |
+
"\n",
|
| 1074 |
+
"url = f\"s3://public-ca30x30/ca30x30cbn_newlyprotected.parquet\"\n",
|
| 1075 |
+
"#to use PMTiles, need to convert to geojson\n",
|
| 1076 |
+
"ca_geo = con.read_parquet(url)\n",
|
| 1077 |
"\n",
|
| 1078 |
+
"#can't go directly from parquet -> pmtiles, need to go parquet -> geojson -> pmtiles \n",
|
| 1079 |
+
"ca_geo.execute().to_file(ca_geojson) \n",
|
| 1080 |
+
"pmtiles = to_pmtiles(ca_geojson, ca_pmtiles, options = ['--extend-zooms-if-still-dropping'])\n",
|
| 1081 |
"\n",
|
| 1082 |
+
"# upload pmtiles to minio\n",
|
| 1083 |
+
"s3_cp(ca_pmtiles, \"s3://public-ca30x30/\"+ca_pmtiles, \"minio\")"
|
| 1084 |
]
|
| 1085 |
}
|
| 1086 |
],
|
preprocess/h3_utils.py
CHANGED
|
@@ -44,6 +44,7 @@ def h3_from_geom(con, name, cols, save_path, zoom):
|
|
| 44 |
FROM {name}
|
| 45 |
)
|
| 46 |
''')
|
|
|
|
| 47 |
con.sql(f'''
|
| 48 |
SELECT {cols}, UNNEST(h{zoom}) AS h{zoom},
|
| 49 |
ST_GeomFromText(h3_cell_to_boundary_wkt(UNNEST(h{zoom}))) AS geom
|
|
@@ -52,14 +53,29 @@ def h3_from_geom(con, name, cols, save_path, zoom):
|
|
| 52 |
|
| 53 |
|
| 54 |
def compute_grouped(con, name, cols, zoom, group, path):
|
| 55 |
-
|
|
|
|
| 56 |
# separate data by group
|
| 57 |
-
for sub in
|
| 58 |
sub_name = f"{name}_{re.sub(r'\W+', '_', sub)}"
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
FROM {name}
|
| 45 |
)
|
| 46 |
''')
|
| 47 |
+
|
| 48 |
con.sql(f'''
|
| 49 |
SELECT {cols}, UNNEST(h{zoom}) AS h{zoom},
|
| 50 |
ST_GeomFromText(h3_cell_to_boundary_wkt(UNNEST(h{zoom}))) AS geom
|
|
|
|
| 53 |
|
| 54 |
|
| 55 |
def compute_grouped(con, name, cols, zoom, group, path):
|
| 56 |
+
groups = con.table(name).select(group).distinct().execute()[group].tolist()
|
| 57 |
+
chunk_size = 500
|
| 58 |
# separate data by group
|
| 59 |
+
for sub in groups:
|
| 60 |
sub_name = f"{name}_{re.sub(r'\W+', '_', sub)}"
|
| 61 |
+
offset = 0
|
| 62 |
+
i = 0
|
| 63 |
+
# chunk data within groups
|
| 64 |
+
while True:
|
| 65 |
+
print(f'Processing group {sub_name} chunk {i} offset {offset}')
|
| 66 |
+
chunk_name = f"{sub_name}_chunk{i}"
|
| 67 |
+
con.raw_sql(f"""
|
| 68 |
+
CREATE OR REPLACE TEMP TABLE {chunk_name} AS
|
| 69 |
+
SELECT * FROM {name}
|
| 70 |
+
WHERE {group} = '{sub}'
|
| 71 |
+
LIMIT {chunk_size}
|
| 72 |
+
OFFSET {offset}
|
| 73 |
+
""")
|
| 74 |
+
if con.sql(f"SELECT 1 FROM {chunk_name} LIMIT 1").execute().empty:
|
| 75 |
+
break
|
| 76 |
+
save_path = f"s3://{path}/hex/zoom{zoom}/group_{group}/{sub_name}_chunk{i}.parquet"
|
| 77 |
+
h3_from_geom(con, chunk_name, cols, save_path, zoom)
|
| 78 |
+
offset += chunk_size
|
| 79 |
+
i += 1
|
| 80 |
+
|
| 81 |
+
|