cassiebuhler commited on
Commit
2db4717
·
1 Parent(s): e5599e8

added newly protected areas!

Browse files
app/utils.py CHANGED
@@ -91,6 +91,7 @@ def get_summary(ca, combined_filter, column, main_group, colors = None):
91
  "percent_CA": (_.acres.sum() / ca_area_acres),
92
  "acres": _.acres.sum(),
93
  }
 
94
  # add percent + acres aggregates
95
  dynamic_aggs = {}
96
  for key in keys:
@@ -107,7 +108,7 @@ def get_summary(ca, combined_filter, column, main_group, colors = None):
107
  .aggregate(**all_aggs)
108
  .mutate(percent_CA=_.percent_CA.round(5), acres=_.acres.round(0))
109
  )
110
-
111
  # Compute total acres by group and percent of group
112
  group_totals = (ca.filter(combined_filter)
113
  .group_by(main_group)
@@ -177,7 +178,6 @@ def get_pmtiles_style(paint, alpha, filter_cols, filter_vals):
177
 
178
  if "non-conserved" in chain.from_iterable(filter_vals):
179
  combined_filters = ["any", combined_filters, ["match", ["get", "status"], ["non-conserved"], True, False]]
180
- source_layer_name = re.sub(r'\W+', '', os.path.splitext(os.path.basename(ca_pmtiles))[0]) #stripping hyphens to get layer name
181
  return {
182
  "version": 8,
183
  "sources": {"ca": {"type": "vector", "url": f"pmtiles://{ca_pmtiles}"}},
@@ -198,7 +198,6 @@ def get_pmtiles_style_llm(paint, ids):
198
  """
199
  Generates a MapLibre GL style for PMTiles using specific IDs as filters.
200
  """
201
- source_layer_name = re.sub(r'\W+', '', os.path.splitext(os.path.basename(ca_pmtiles))[0]) #stripping hyphens to get layer name
202
  return {
203
  "version": 8,
204
  "sources": {"ca": {"type": "vector", "url": f"pmtiles://{ca_pmtiles}"}},
 
91
  "percent_CA": (_.acres.sum() / ca_area_acres),
92
  "acres": _.acres.sum(),
93
  }
94
+
95
  # add percent + acres aggregates
96
  dynamic_aggs = {}
97
  for key in keys:
 
108
  .aggregate(**all_aggs)
109
  .mutate(percent_CA=_.percent_CA.round(5), acres=_.acres.round(0))
110
  )
111
+
112
  # Compute total acres by group and percent of group
113
  group_totals = (ca.filter(combined_filter)
114
  .group_by(main_group)
 
178
 
179
  if "non-conserved" in chain.from_iterable(filter_vals):
180
  combined_filters = ["any", combined_filters, ["match", ["get", "status"], ["non-conserved"], True, False]]
 
181
  return {
182
  "version": 8,
183
  "sources": {"ca": {"type": "vector", "url": f"pmtiles://{ca_pmtiles}"}},
 
198
  """
199
  Generates a MapLibre GL style for PMTiles using specific IDs as filters.
200
  """
 
201
  return {
202
  "version": 8,
203
  "sources": {"ca": {"type": "vector", "url": f"pmtiles://{ca_pmtiles}"}},
app/variables.py CHANGED
@@ -1,13 +1,7 @@
1
  # urls for main layer
2
- # ca_parquet = "https://huggingface.co/datasets/boettiger-lab/ca-30x30/resolve/da85dd9ca1c774d4ddf821555e3c3c9e13c9b857/ca-30x30.parquet"
3
- # ca_pmtiles = "https://huggingface.co/datasets/boettiger-lab/ca-30x30/resolve/896db6c9a37488ee7c53ee56df67b3ccfd44d150/ca-30x30.pmtiles"
4
-
5
- ca_parquet = 'https://minio.carlboettiger.info/public-ca30x30/ca-30x30-cbn.parquet'
6
- ca_pmtiles = 'https://minio.carlboettiger.info/public-ca30x30/ca-30x30-cbn.pmtiles'
7
-
8
- # ca_parquet = 'https://minio.carlboettiger.info/public-ca30x30/hex/zoom8/ca-30x30-cbn-newlyprotected.parquet'
9
- # ca_pmtiles = 'https://minio.carlboettiger.info/public-ca30x30/ca-30x30-cbn-newlyprotected.pmtiles'
10
-
11
  # computed by taking the sum of all the acres in this file:
12
  # https://minio.carlboettiger.info/public-ca30x30/CBN-data/Progress_data_new_protection/Land_Status_Zone_Ecoregion_Counties/all_regions_reGAP_county_eco.parquet
13
  ca_area_acres = 101523750.68856516
@@ -29,7 +23,10 @@ def get_url(folder, file, base_folder = 'CBN'):
29
  path = os.path.join(bucket,base_folder,folder,file)
30
  url = minio+path
31
  return url
32
-
 
 
 
33
  #vector data
34
  url_ACE_rarerank_statewide = get_url('ACE_biodiversity/ACE_rarerank_statewide','ACE_rarerank_statewide.pmtiles')
35
  url_ACE_rarerank_ecoregion = get_url('ACE_biodiversity/ACE_rarerank_ecoregion','ACE_rarerank_ecoregion.pmtiles')
@@ -66,6 +63,7 @@ url_resilient_conn_network = get_url('Connectivity_resilience/Resilient_connecte
66
 
67
  # column names for all data layers
68
  keys = [
 
69
  "ACE_amphibian_richness", "ACE_reptile_richness", "ACE_bird_richness",
70
  "ACE_mammal_richness", "ACE_rare_amphibian_richness", "ACE_rare_reptile_richness",
71
  "ACE_rare_bird_richness", "ACE_rare_mammal_richness", "ACE_endemic_amphibian_richness",
@@ -78,43 +76,48 @@ chatbot_toggles = {key: False for key in keys}
78
  # data layers dict
79
  layer_config = [
80
  #[(section, 'a_amph', [(col_name, full name, key, chatbot toggle key)])]
 
 
 
 
 
81
  ('🐸 Amphibian', 'a_amph', [
82
- ('amphibian_richness', 'Amphibian Richness', keys[0], chatbot_toggles[keys[0]]),
83
- ('rare_amphibian_richness', 'Rare Amphibian Richness', keys[1], chatbot_toggles[keys[1]]),
84
- ('endemic_amphibian_richness', 'Endemic Amphibian Richness', keys[2], chatbot_toggles[keys[2]]),
85
  ]),
86
  ('🐍 Reptile', 'a_rept', [
87
- ('reptile_richness', 'Reptile Richness', keys[3], chatbot_toggles[keys[3]]),
88
- ('rare_reptile_richness', 'Rare Reptile Richness', keys[4], chatbot_toggles[keys[4]]),
89
- ('endemic_reptile_richness', 'Endemic Reptile Richness', keys[5], chatbot_toggles[keys[5]]),
90
  ]),
91
  ('🦜 Bird', 'a_bird', [
92
- ('bird_richness', 'Bird Richness', keys[6], chatbot_toggles[keys[6]]),
93
- ('rare_bird_richness', 'Rare Bird Richness', keys[7], chatbot_toggles[keys[7]]),
94
- ('endemic_bird_richness', 'Endemic Bird Richness', keys[8], chatbot_toggles[keys[8]]),
95
  ]),
96
  ('🦌 Mammal', 'a_mammal', [
97
- ('mammal_richness', 'Mammal Richness', keys[9], chatbot_toggles[keys[9]]),
98
- ('rare_mammal_richness', 'Rare Mammal Richness', keys[10], chatbot_toggles[keys[10]]),
99
- ('endemic_mammal_richness', 'Endemic Mammal Richness', keys[11], chatbot_toggles[keys[11]]),
100
  ]),
101
  ('🌿 Plant', 'a_plant', [
102
- ('plant_richness', 'Plant Richness', keys[12], chatbot_toggles[keys[12]]),
103
- ('rarityweighted_endemic_plant_richness', 'Rarity-Weighted\nEndemic Plant Richness', keys[13], chatbot_toggles[keys[13]]),
104
  ]),
105
  ('💧 Freshwater Resources', 'freshwater', [
106
- ('wetlands', 'Wetlands', keys[14], chatbot_toggles[keys[14]]),
107
  ]),
108
  ('🚜 Agriculture', 'agriculture', [
109
- ('farmland', 'Farmland', keys[15], chatbot_toggles[keys[15]]),
110
- ('grazing', 'Lands Suitable for Grazing', keys[16], chatbot_toggles[keys[16]]),
111
  ]),
112
  ('👤 People', 'SVI', [
113
- ('DAC', 'Disadvantaged Communities', keys[17], chatbot_toggles[keys[17]]),
114
- ('low_income', 'Low-Income Communities', keys[18], chatbot_toggles[keys[18]]),
115
  ]),
116
  ('🔥 Climate Risks', 'calfire', [
117
- ('fire', 'Historical Fire Perimeters', keys[19], chatbot_toggles[keys[19]]),
118
  ])
119
  ]
120
 
@@ -135,8 +138,8 @@ county_color = "#DE3163" # magenta
135
  city_color = "#ADD8E6" #light blue
136
  hoa_color = "#A89BBC" # purple
137
  nonprofit_color = "#D77031" #orange
138
- justice40_color = "#00008B" #purple
139
- svi_color = "#1bc7c3" #cyan
140
  white = "#FFFFFF"
141
 
142
 
@@ -406,6 +409,30 @@ networks = {
406
  'default': white
407
  }
408
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
409
  style_options = {
410
  "30x30 Status": status,
411
  "GAP Code": gap,
@@ -419,7 +446,6 @@ style_options = {
419
  "Access Type": access,
420
  }
421
 
422
- print(style_options)
423
  select_column = {
424
  "30x30 Status": "status",
425
  "GAP Code": "gap_code",
 
1
  # urls for main layer
2
+ ca_parquet = 'https://minio.carlboettiger.info/public-ca30x30/ca30x30cbn_newlyprotected.parquet'
3
+ ca_pmtiles = 'https://minio.carlboettiger.info/public-ca30x30/ca30x30cbn_newlyprotected.pmtiles'
4
+ #
 
 
 
 
 
 
5
  # computed by taking the sum of all the acres in this file:
6
  # https://minio.carlboettiger.info/public-ca30x30/CBN-data/Progress_data_new_protection/Land_Status_Zone_Ecoregion_Counties/all_regions_reGAP_county_eco.parquet
7
  ca_area_acres = 101523750.68856516
 
23
  path = os.path.join(bucket,base_folder,folder,file)
24
  url = minio+path
25
  return url
26
+
27
+ import re
28
+ source_layer_name = re.sub(r'\W+', '', os.path.splitext(os.path.basename(ca_pmtiles))[0]) #stripping hyphens to get layer name
29
+
30
  #vector data
31
  url_ACE_rarerank_statewide = get_url('ACE_biodiversity/ACE_rarerank_statewide','ACE_rarerank_statewide.pmtiles')
32
  url_ACE_rarerank_ecoregion = get_url('ACE_biodiversity/ACE_rarerank_ecoregion','ACE_rarerank_ecoregion.pmtiles')
 
63
 
64
  # column names for all data layers
65
  keys = [
66
+ "update_newly_protected", "update_increased_management", "update_data_improvement",
67
  "ACE_amphibian_richness", "ACE_reptile_richness", "ACE_bird_richness",
68
  "ACE_mammal_richness", "ACE_rare_amphibian_richness", "ACE_rare_reptile_richness",
69
  "ACE_rare_bird_richness", "ACE_rare_mammal_richness", "ACE_endemic_amphibian_richness",
 
76
  # data layers dict
77
  layer_config = [
78
  #[(section, 'a_amph', [(col_name, full name, key, chatbot toggle key)])]
79
+ ('📈 Data Updates', 'a_new', [
80
+ ('update_newly_protected', 'Newly Protected', keys[0], chatbot_toggles[keys[0]]),
81
+ ('update_increased_management', 'Increased Management', keys[1], chatbot_toggles[keys[1]]),
82
+ ('update_data_improvement', 'Data Improvement', keys[2], chatbot_toggles[keys[2]]),
83
+ ]),
84
  ('🐸 Amphibian', 'a_amph', [
85
+ ('amphibian_richness', 'Amphibian Richness', keys[3], chatbot_toggles[keys[3]]),
86
+ ('rare_amphibian_richness', 'Rare Amphibian Richness', keys[4], chatbot_toggles[keys[4]]),
87
+ ('endemic_amphibian_richness', 'Endemic Amphibian Richness', keys[5], chatbot_toggles[keys[5]]),
88
  ]),
89
  ('🐍 Reptile', 'a_rept', [
90
+ ('reptile_richness', 'Reptile Richness', keys[6], chatbot_toggles[keys[6]]),
91
+ ('rare_reptile_richness', 'Rare Reptile Richness', keys[7], chatbot_toggles[keys[7]]),
92
+ ('endemic_reptile_richness', 'Endemic Reptile Richness', keys[8], chatbot_toggles[keys[8]]),
93
  ]),
94
  ('🦜 Bird', 'a_bird', [
95
+ ('bird_richness', 'Bird Richness', keys[9], chatbot_toggles[keys[9]]),
96
+ ('rare_bird_richness', 'Rare Bird Richness', keys[10], chatbot_toggles[keys[10]]),
97
+ ('endemic_bird_richness', 'Endemic Bird Richness', keys[11], chatbot_toggles[keys[11]]),
98
  ]),
99
  ('🦌 Mammal', 'a_mammal', [
100
+ ('mammal_richness', 'Mammal Richness', keys[12], chatbot_toggles[keys[12]]),
101
+ ('rare_mammal_richness', 'Rare Mammal Richness', keys[13], chatbot_toggles[keys[13]]),
102
+ ('endemic_mammal_richness', 'Endemic Mammal Richness', keys[14], chatbot_toggles[keys[14]]),
103
  ]),
104
  ('🌿 Plant', 'a_plant', [
105
+ ('plant_richness', 'Plant Richness', keys[15], chatbot_toggles[keys[15]]),
106
+ ('rarityweighted_endemic_plant_richness', 'Rarity-Weighted\nEndemic Plant Richness', keys[16], chatbot_toggles[keys[16]]),
107
  ]),
108
  ('💧 Freshwater Resources', 'freshwater', [
109
+ ('wetlands', 'Wetlands', keys[17], chatbot_toggles[keys[17]]),
110
  ]),
111
  ('🚜 Agriculture', 'agriculture', [
112
+ ('farmland', 'Farmland', keys[18], chatbot_toggles[keys[18]]),
113
+ ('grazing', 'Lands Suitable for Grazing', keys[19], chatbot_toggles[keys[19]]),
114
  ]),
115
  ('👤 People', 'SVI', [
116
+ ('DAC', 'Disadvantaged Communities', keys[20], chatbot_toggles[keys[20]]),
117
+ ('low_income', 'Low-Income Communities', keys[21], chatbot_toggles[keys[21]]),
118
  ]),
119
  ('🔥 Climate Risks', 'calfire', [
120
+ ('fire', 'Historical Fire Perimeters', keys[22], chatbot_toggles[keys[22]]),
121
  ])
122
  ]
123
 
 
138
  city_color = "#ADD8E6" #light blue
139
  hoa_color = "#A89BBC" # purple
140
  nonprofit_color = "#D77031" #orange
141
+ purple = "#00008B" #purple
142
+ cyan = "#1bc7c3" #cyan
143
  white = "#FFFFFF"
144
 
145
 
 
409
  'default': white
410
  }
411
 
412
+ update_type_style = {
413
+ "version": 8,
414
+ "sources": {"ca": {"type": "vector", "url": f"pmtiles://{ca_pmtiles}"}},
415
+ "layers": [
416
+ {
417
+ "id": "ca30x30",
418
+ "source": "ca",
419
+ "source-layer": source_layer_name,
420
+ "type": "fill",
421
+ "paint": {
422
+ "fill-color": [
423
+ "interpolate", ["linear"], ["get", "update_newly_protected"],
424
+ 0, white,
425
+ 1, purple
426
+ ]
427
+ }
428
+ }
429
+ ]
430
+ }
431
+
432
+
433
+
434
+
435
+
436
  style_options = {
437
  "30x30 Status": status,
438
  "GAP Code": gap,
 
446
  "Access Type": access,
447
  }
448
 
 
449
  select_column = {
450
  "30x30 Status": "status",
451
  "GAP Code": "gap_code",
preprocess/CBN-data.ipynb CHANGED
@@ -22,7 +22,6 @@
22
  "\n",
23
  "import os\n",
24
  "os.chdir('../data/')\n",
25
- "\n",
26
  "duckdb_install_h3()"
27
  ]
28
  },
@@ -753,7 +752,8 @@
753
  " 'CA_Marine_','Release_Ye','ORIG_FID',\n",
754
  " 'updatetype']\n",
755
  "\n",
756
- "convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols = cols, zoom = 8)"
 
757
  ]
758
  },
759
  {
@@ -959,16 +959,18 @@
959
  "# convert_pmtiles(con, s3, folder = folder, file = f\"{name}.parquet\")\n",
960
  "# process_vector(s3, folder = folder, file = f\"{name}.shp\", crs=\"EPSG:4326\")\n",
961
  "\n",
962
- "convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols= cols, group = 'ecoregion', zoom = 8)\n",
 
963
  "\n"
964
  ]
965
  },
966
  {
967
  "cell_type": "markdown",
968
- "id": "980c7e88-8dc6-4bc6-bfa4-8ea301c6ee80",
969
  "metadata": {},
970
  "source": [
971
- "#### join with newly protected lands"
 
972
  ]
973
  },
974
  {
@@ -980,31 +982,105 @@
980
  "source": [
981
  "con = ibis.duckdb.connect('joined',extensions = [\"spatial\", \"h3\"])\n",
982
  "set_secrets(con)\n",
 
983
  "\n",
984
- "ca_nature_url = \"s3://public-ca30x30/hex/zoom8/ca-30x30-cbn.parquet\"\n",
985
- "new_lands_url = \"s3://public-ca30x30/CBN/Progress_data_new_protection/Newly_counted_lands/hex/zoom8/newly_counted_lands_2024.parquet\"\n",
986
  "\n",
987
  "ca_nature = (con.read_parquet(ca_nature_url)\n",
988
- " .mutate(update_type = None)\n",
 
989
  " )\n",
990
  "\n",
991
  "new = (con.read_parquet(new_lands_url)\n",
992
  " .mutate(update_type = 'updatetype')\n",
993
- " .select(\"update_type\",\"h8\")\n",
994
  " )\n",
995
  "\n",
996
- "joined = (ca_nature.left_join(new,\"h8\")\n",
997
- " .drop('h8_right','update_type')\n",
998
  " .rename(update_type = 'update_type_right')\n",
999
  " )\n",
1000
  "\n",
1001
- "name = 'ca-30x30-cbn-newlyprotected'\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1002
  "\n",
1003
- "joined.to_parquet(f\"{name}.parquet\")\n",
1004
- "joined.to_parquet(f\"s3://public-ca30x30/hex/zoom8/{name}.parquet\")\n",
 
1005
  "\n",
1006
- "#maybe get pmtiles?\n",
1007
- "convert_pmtiles(con, s3, folder = None, file = f\"{name}.parquet\", base_folder = None, current_crs = 'epsg:4326')"
1008
  ]
1009
  }
1010
  ],
 
22
  "\n",
23
  "import os\n",
24
  "os.chdir('../data/')\n",
 
25
  "duckdb_install_h3()"
26
  ]
27
  },
 
752
  " 'CA_Marine_','Release_Ye','ORIG_FID',\n",
753
  " 'updatetype']\n",
754
  "\n",
755
+ "convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", \n",
756
+ " cols = cols, zoom = 12)"
757
  ]
758
  },
759
  {
 
959
  "# convert_pmtiles(con, s3, folder = folder, file = f\"{name}.parquet\")\n",
960
  "# process_vector(s3, folder = folder, file = f\"{name}.shp\", crs=\"EPSG:4326\")\n",
961
  "\n",
962
+ "convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols= cols, \n",
963
+ " group = 'ecoregion', zoom = 12)\n",
964
  "\n"
965
  ]
966
  },
967
  {
968
  "cell_type": "markdown",
969
+ "id": "e6b708e4-671a-4ffd-9102-6aebb21fcf84",
970
  "metadata": {},
971
  "source": [
972
+ "#### join with newly protected data\n",
973
+ "- Needed to get zoom 12 hexes to join newly protected data with CA Nature data\n"
974
  ]
975
  },
976
  {
 
982
  "source": [
983
  "con = ibis.duckdb.connect('joined',extensions = [\"spatial\", \"h3\"])\n",
984
  "set_secrets(con)\n",
985
+ "zoom = 12\n",
986
  "\n",
987
+ "ca_nature_url = f\"s3://public-ca30x30/hex/zoom{zoom}/ca-30x30-cbn.parquet\"\n",
988
+ "new_lands_url = f\"s3://public-ca30x30/CBN/Progress_data_new_protection/Newly_counted_lands/hex/zoom{zoom}/newly_counted_lands_2024.parquet\"\n",
989
  "\n",
990
  "ca_nature = (con.read_parquet(ca_nature_url)\n",
991
+ " .mutate(update_type = ibis.literal('d - no update')\n",
992
+ " )\n",
993
  " )\n",
994
  "\n",
995
  "new = (con.read_parquet(new_lands_url)\n",
996
  " .mutate(update_type = 'updatetype')\n",
997
+ " .select(\"update_type\",\"h12\")\n",
998
  " )\n",
999
  "\n",
1000
+ "joined = (ca_nature.left_join(new,\"h12\")\n",
1001
+ " .drop('h12_right','update_type')\n",
1002
  " .rename(update_type = 'update_type_right')\n",
1003
  " )\n",
1004
  "\n",
1005
+ "name = 'ca30x30cbn_newlyprotected_'\n",
1006
+ "# joined.to_parquet(f\"s3://public-ca30x30/hex/zoom{zoom}/{name}.parquet\")\n"
1007
+ ]
1008
+ },
1009
+ {
1010
+ "cell_type": "markdown",
1011
+ "id": "3a0accf9-500d-4430-ba45-8d0a4ad2e43e",
1012
+ "metadata": {},
1013
+ "source": [
1014
+ "Once joined, we can group by \"ids\" again and lose the hexes"
1015
+ ]
1016
+ },
1017
+ {
1018
+ "cell_type": "code",
1019
+ "execution_count": null,
1020
+ "id": "6832e4bc-a359-4674-bad3-13052566176d",
1021
+ "metadata": {},
1022
+ "outputs": [],
1023
+ "source": [
1024
+ "original_geoms = con.read_parquet(f\"s3://public-ca30x30/ca-30x30-cbn.parquet\")\n",
1025
+ "\n",
1026
+ "new = (\n",
1027
+ " con.read_parquet(f\"s3://public-ca30x30/hex/zoom12/ca30x30cbn_newlyprotected_*\")\n",
1028
+ " .drop('acres')\n",
1029
+ " .mutate(update_type = _.update_type.substitute(\n",
1030
+ " {'a - newly protected':'update_newly_protected',\n",
1031
+ " 'b - increased management':'update_increased_management',\n",
1032
+ " 'c - data improvement':'update_data_improvement'\n",
1033
+ " }))\n",
1034
+ " .mutate(update_type = _.update_type.fill_null('update_none')\n",
1035
+ " )\n",
1036
+ ")\n",
1037
+ "\n",
1038
+ "# aggregate data\n",
1039
+ "pivot = (\n",
1040
+ " new.pivot_wider(id_cols='id', names_from='update_type', values_from='id', values_agg='count')\n",
1041
+ " .mutate(total = _.update_newly_protected+ _.update_increased_management+ _.update_data_improvement + _.update_none)\n",
1042
+ " .mutate(update_newly_protected = (_.update_newly_protected/_.total).round(4),\n",
1043
+ " update_increased_management = (_.update_increased_management/_.total).round(4),\n",
1044
+ " update_data_improvement =( _.update_data_improvement/_.total).round(4),\n",
1045
+ " update_none = (_.update_none/_.total).round(4)\n",
1046
+ " )\n",
1047
+ " .drop(_.total)\n",
1048
+ " .left_join(original_geoms,'id')\n",
1049
+ " .drop('id_right')\n",
1050
+ ")\n",
1051
+ "\n",
1052
+ "pivot.to_parquet(f\"s3://public-ca30x30/ca30x30cbn_newlyprotected.parquet\")"
1053
+ ]
1054
+ },
1055
+ {
1056
+ "cell_type": "markdown",
1057
+ "id": "e348fc27-bb5e-4fdc-b694-9b499ba71d9d",
1058
+ "metadata": {},
1059
+ "source": [
1060
+ "Making PMTiles"
1061
+ ]
1062
+ },
1063
+ {
1064
+ "cell_type": "code",
1065
+ "execution_count": null,
1066
+ "id": "9b82a499-8431-4e64-ab97-5062e0f98969",
1067
+ "metadata": {},
1068
+ "outputs": [],
1069
+ "source": [
1070
+ "# upload parquet to minio \n",
1071
+ "ca_geojson = \"ca30x30cbn_newlyprotected.geojson\"\n",
1072
+ "ca_pmtiles = \"ca30x30cbn_newlyprotected.pmtiles\"\n",
1073
+ "\n",
1074
+ "url = f\"s3://public-ca30x30/ca30x30cbn_newlyprotected.parquet\"\n",
1075
+ "#to use PMTiles, need to convert to geojson\n",
1076
+ "ca_geo = con.read_parquet(url)\n",
1077
  "\n",
1078
+ "#can't go directly from parquet -> pmtiles, need to go parquet -> geojson -> pmtiles \n",
1079
+ "ca_geo.execute().to_file(ca_geojson) \n",
1080
+ "pmtiles = to_pmtiles(ca_geojson, ca_pmtiles, options = ['--extend-zooms-if-still-dropping'])\n",
1081
  "\n",
1082
+ "# upload pmtiles to minio\n",
1083
+ "s3_cp(ca_pmtiles, \"s3://public-ca30x30/\"+ca_pmtiles, \"minio\")"
1084
  ]
1085
  }
1086
  ],
preprocess/h3_utils.py CHANGED
@@ -44,6 +44,7 @@ def h3_from_geom(con, name, cols, save_path, zoom):
44
  FROM {name}
45
  )
46
  ''')
 
47
  con.sql(f'''
48
  SELECT {cols}, UNNEST(h{zoom}) AS h{zoom},
49
  ST_GeomFromText(h3_cell_to_boundary_wkt(UNNEST(h{zoom}))) AS geom
@@ -52,14 +53,29 @@ def h3_from_geom(con, name, cols, save_path, zoom):
52
 
53
 
54
  def compute_grouped(con, name, cols, zoom, group, path):
55
- unique_groups = con.table(name).select(group).distinct().execute()[group].tolist()
 
56
  # separate data by group
57
- for sub in unique_groups:
58
  sub_name = f"{name}_{re.sub(r'\W+', '_', sub)}"
59
- con.raw_sql(f"""
60
- CREATE OR REPLACE TEMP TABLE {sub_name} AS
61
- SELECT * FROM {name} WHERE {group} = '{sub}'
62
- """)
63
- save_path = f"s3://{path}/hex/zoom{zoom}/group_{group}/{sub.replace(' ', '')}.parquet"
64
- h3_from_geom(con, sub_name, cols, save_path, zoom)
65
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  FROM {name}
45
  )
46
  ''')
47
+
48
  con.sql(f'''
49
  SELECT {cols}, UNNEST(h{zoom}) AS h{zoom},
50
  ST_GeomFromText(h3_cell_to_boundary_wkt(UNNEST(h{zoom}))) AS geom
 
53
 
54
 
55
  def compute_grouped(con, name, cols, zoom, group, path):
56
+ groups = con.table(name).select(group).distinct().execute()[group].tolist()
57
+ chunk_size = 500
58
  # separate data by group
59
+ for sub in groups:
60
  sub_name = f"{name}_{re.sub(r'\W+', '_', sub)}"
61
+ offset = 0
62
+ i = 0
63
+ # chunk data within groups
64
+ while True:
65
+ print(f'Processing group {sub_name} chunk {i} offset {offset}')
66
+ chunk_name = f"{sub_name}_chunk{i}"
67
+ con.raw_sql(f"""
68
+ CREATE OR REPLACE TEMP TABLE {chunk_name} AS
69
+ SELECT * FROM {name}
70
+ WHERE {group} = '{sub}'
71
+ LIMIT {chunk_size}
72
+ OFFSET {offset}
73
+ """)
74
+ if con.sql(f"SELECT 1 FROM {chunk_name} LIMIT 1").execute().empty:
75
+ break
76
+ save_path = f"s3://{path}/hex/zoom{zoom}/group_{group}/{sub_name}_chunk{i}.parquet"
77
+ h3_from_geom(con, chunk_name, cols, save_path, zoom)
78
+ offset += chunk_size
79
+ i += 1
80
+
81
+