cassiebuhler commited on
Commit
8419f19
·
1 Parent(s): 5c0d990

removing h3 code, will revisit later

Browse files

later: fix h3 to raster, process h3 by looping.

Files changed (3) hide show
  1. preprocess/CBN-data.ipynb +105 -122
  2. preprocess/h3_utils.py +30 -32
  3. preprocess/utils.py +72 -63
preprocess/CBN-data.ipynb CHANGED
@@ -18,11 +18,11 @@
18
  "from cng.utils import *\n",
19
  "from utils import *\n",
20
  "from h3_utils import *\n",
 
21
  "\n",
22
  "import os\n",
23
  "os.chdir('../data/')\n",
24
  "\n",
25
- "s3 = s3_client()\n",
26
  "duckdb_install_h3()"
27
  ]
28
  },
@@ -48,11 +48,11 @@
48
  "folder = 'Counties'\n",
49
  "name = 'CA_counties'\n",
50
  "\n",
51
- "# unzip(s3, folder = folder, file = '30x30_Counties.zip')\n",
52
- "cols = process_vector(s3, folder = folder, file = f\"{name}.shp\")\n",
53
- "# convert_pmtiles(con, s3, folder = folder, file = f\"{name}.parquet\")\n",
54
  "\n",
55
- "convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols = cols, zoom = 8)"
56
  ]
57
  },
58
  {
@@ -76,10 +76,11 @@
76
  "\n",
77
  "folder = 'Climate_zones'\n",
78
  "name = 'climate_zones_10'\n",
79
- "# download(s3, folder = folder, file = 'clusters_10.tif')\n",
80
- "cols = process_raster(s3, folder = folder, file = 'clusters_10.tif', file_name = f\"{name}.tif\")\n",
81
- "convert_h3(con, s3, folder = folder, file = f\"{name}_processed.parquet\", cols = cols,\n",
82
- " zoom = 8)\n"
 
83
  ]
84
  },
85
  {
@@ -104,10 +105,10 @@
104
  "folder = 'Ecoregion'\n",
105
  "name = 'ACE_ecoregions'\n",
106
  "\n",
107
- "# unzip(s3, folder = folder, file = '30x30_Ecoregions.zip')\n",
108
- "cols = process_vector(s3, folder = folder, file = f\"{name}.shp\")\n",
109
  "\n",
110
- "convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols = cols, zoom = 8)"
111
  ]
112
  },
113
  {
@@ -139,21 +140,21 @@
139
  "\n",
140
  "folder = 'Habitat'\n",
141
  "name = 'fveg22_1'\n",
142
- "# unzip(s3, folder = folder, file = 'fveg221gdb.zip')\n",
143
  "\n",
144
- "# command = [\n",
145
- "# \"gdalwarp\",\n",
146
- "# \"-of\", \"GTiff\",\n",
147
- "# 'fveg22_1.gdb',\n",
148
- "# 'fveg22_1.tif' \n",
149
- "# ]\n",
150
  "\n",
151
- "# subprocess.run(command, check=True)\n",
152
- "cols = process_raster(s3, folder = folder, file = f\"{name}.tif\")\n",
153
- "# upload(folder = folder, file = f'{name}_processed.tif.aux.xml')\n",
154
  "\n",
155
- "convert_h3(con, s3, folder = folder, file = f\"{name}_processed.parquet\", cols = cols,\n",
156
- " zoom = 8)"
157
  ]
158
  },
159
  {
@@ -186,14 +187,15 @@
186
  "folder = 'ACE_biodiversity'\n",
187
  "name = 'ACE_terrestrial_biodiversity_summary_ds2739'\n",
188
  "\n",
189
- "# download(s3, folder = folder, file = 'Terrestrial_Biodiversity_Summary_-_ACE_[ds2739].geojson',\n",
190
- " # file_name = f\"{name}.geojson\")\n",
191
  "\n",
192
- "cols = process_vector(s3, folder = folder, file = f\"{name}.geojson\")\n",
193
- "cols = [item for item in cols if item not in [\"Hex_ID\",\"Shape__Area\",\"Shape__Length\"]]\n",
194
- "# convert_pmtiles(con, s3, folder = folder, file = f\"{name}.geojson\")\n",
195
- "convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols = cols, zoom = 8)\n",
196
- "# gdf = gpd.read_parquet(f\"{name}.parquet\")\n"
 
197
  ]
198
  },
199
  {
@@ -222,7 +224,7 @@
222
  " 'County', 'Shape__Area', 'Shape__Length', 'geometry']\n",
223
  " cols.append(col) #select only the cols we want + the new col. \n",
224
  " rank_df = gdf[gdf[col]==5][cols]# filter ranks = 5\n",
225
- " cols = process_vector(s3, folder = 'ACE_biodiversity/'+name, file = name+'.parquet',gdf = rank_df)\n",
226
  " convert_pmtiles(con, s3, folder ='ACE_biodiversity/'+name, file = name+'.parquet')\n"
227
  ]
228
  },
@@ -261,7 +263,7 @@
261
  " percentile = 0.95\n",
262
  " threshold = gdf[col].quantile(percentile)\n",
263
  " ace = gdf[gdf[col]>=threshold][cols]\n",
264
- " cols = process_vector(s3, folder = 'ACE_biodiversity/'+name, file = name+'.parquet',gdf = ace)\n",
265
  " convert_pmtiles(con, s3, folder ='ACE_biodiversity/'+name, file = name+'.parquet')\n",
266
  "\n",
267
  "\n",
@@ -299,9 +301,10 @@
299
  "folder = 'Biodiversity_unique/Plant_richness'\n",
300
  "name = 'species_D'\n",
301
  "\n",
302
- "# download(s3, folder = folder, file = f\"{name}.tif\")\n",
303
- "cols = filter_raster(s3, folder = folder, file = f\"{name}.tif\", percentile = 80)\n",
304
- "convert_h3(con, s3, folder = folder, file = f\"{name}_processed.parquet\", cols = cols, zoom = 8)"
 
305
  ]
306
  },
307
  {
@@ -327,8 +330,9 @@
327
  "name = 'endemicspecies_E'\n",
328
  "\n",
329
  "download(s3, folder = folder, file = f\"{name}.tif\")\n",
330
- "cols = filter_raster(s3, folder = folder, file = f\"{name}.tif\", percentile = 80)\n",
331
- "convert_h3(con, s3, folder = folder, file = f\"{name}_processed.parquet\", cols = cols, zoom = 8)"
 
332
  ]
333
  },
334
  {
@@ -370,9 +374,10 @@
370
  "folder = 'Connectivity_resilience/Resilient_connected_network_allcategories'\n",
371
  "name = 'rcn_wIntactBioCat_caOnly_2020-10-27'\n",
372
  "\n",
373
- "cols = process_raster(s3, folder = folder, file = f\"{name}.tif\")\n",
374
- "convert_h3(con, s3, folder = folder, file = f\"{name}_processed.parquet\", cols = cols, \n",
375
- " zoom = 8)"
 
376
  ]
377
  },
378
  {
@@ -459,10 +464,11 @@
459
  "wetlands = ['Freshwater Emergent Wetland', 'Freshwater Forested/Shrub Wetland', 'Estuarine and Marine Wetland']\n",
460
  "gdf = gdf[gdf['WETLAND_TYPE'].isin(wetlands)]\n",
461
  "\n",
462
- "cols = process_vector(s3, folder = folder, file = f\"{name}.parquet\", gdf = gdf)\n",
463
- "cols = [item for item in cols if item not in ['ACRES','Shape_Length','Shape_Area','__index_level_0__']]\n",
464
  "convert_pmtiles(con, s3, folder =folder, file = f\"{name}.parquet\")\n",
465
- "geom_to_h3(con, folder = folder, file = f\"{name}.parquet\", cols = cols, zoom = 8)\n"
 
 
466
  ]
467
  },
468
  {
@@ -566,28 +572,29 @@
566
  "\n",
567
  "folder = 'NBS_agriculture/Farmland_all'\n",
568
  "name = 'Important_Farmland_2018'\n",
569
- "# unzip(s3, folder = folder, file = f\"{name}.zip\")\n",
570
- "cols = process_vector(s3, folder = folder, file = f\"{name}.gdb\",crs = \"epsg:4326\")\n",
571
- "cols = [item for item in cols if item not in ['Shape_Length','Shape_Area']]\n",
 
572
  "\n",
573
- "# convert_pmtiles(con, s3, folder = folder, file =f\"{name}.parquet\")\n",
574
- "convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols = cols, zoom = 8)\n",
575
  "\n",
576
  "# only pick a subset \n",
577
  "folder = 'NBS_agriculture/Farmland_all/Farmland'\n",
578
  "name = 'Farmland_2018'\n",
579
- "# gdf = gpd.read_file('Important_Farmland_2018.gdb')\n",
580
- "# farmland_type = ['P','S','L','U'] # prime, statewide importance, local importance, unique\n",
581
- "# gdf_farmland = gdf[gdf['polygon_ty'].isin(farmland_type)]\n",
582
- "# cols = process_vector(s3, folder = folder, file = f\"{name}.parquet\", gdf = gdf_farmland)\n",
583
- "# convert_pmtiles(con, s3, folder = folder, file =f\"{name}.parquet\")\n",
584
  "\n",
585
  "# grazing lands \n",
586
  "folder = 'NBS_agriculture/Farmland_all/Lands_suitable_grazing'\n",
587
  "name = 'Grazing_land_2018'\n",
588
- "# gdf_grazing = gdf[gdf['polygon_ty'] == 'G']\n",
589
- "# cols = process_vector(s3, folder = folder, file = f\"{name}.parquet\", gdf = gdf_grazing)\n",
590
- "# convert_pmtiles(con, s3, folder = folder, file =f\"{name}.parquet\")\n"
591
  ]
592
  },
593
  {
@@ -640,15 +647,14 @@
640
  "\n",
641
  "unzip(s3, folder = folder, file = 'fire23-1gdb.zip')\n",
642
  "gdf = gpd.read_file('fire23_1.gdb')\n",
643
- "# gdf = gdf[~gdf['YEAR_'].isna()]\n",
644
- "# gdf['YEAR_'] = gdf['YEAR_'].astype('int64')\n",
645
- "# gdf = gdf[gdf['YEAR_']>=2014]\n",
646
- "cols = process_vector(s3, folder = folder, file = f\"{name}.parquet\", gdf = gdf)\n",
647
- "cols = [item for item in cols if item not in ['Shape_Length','Shape_Area']]\n",
648
  "\n",
649
- "# convert_pmtiles(con, s3, folder = folder, file = f\"{name}.parquet\")\n",
650
- "\n",
651
- "convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols = cols, zoom = 8)"
652
  ]
653
  },
654
  {
@@ -732,13 +738,12 @@
732
  "folder = 'Progress_data_new_protection/Newly_counted_lands'\n",
733
  "name = 'newly_counted_lands_2024'\n",
734
  "\n",
 
 
 
735
  "\n",
736
- "# unzip(s3, folder = folder, file = f\"{name}.shp.zip\")\n",
737
- "cols = process_vector(s3, folder = folder, file = f\"{name}.shp\",crs = \"epsg:4326\")\n",
738
- "cols = [item for item in cols if item not in ['Shape_Leng', 'Shape_Area']]\n",
739
- "\n",
740
- "# convert_pmtiles(con, s3, folder = folder, file = f\"{name}.parquet\")\n",
741
- "convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols = cols, zoom = 8)\n"
742
  ]
743
  },
744
  {
@@ -764,8 +769,8 @@
764
  "name = 'DAC_2022'\n",
765
  "\n",
766
  "unzip(s3, folder = folder, file = 'sb535dacgdbf2022gdb.zip')\n",
767
- "cols = process_vector(s3, folder = folder, file = 'SB535DACgdb_F_2022.gdb', file_name = f\"{name}.parquet\")\n",
768
- "convert_pmtiles(con, s3, folder = folder, file = f\"{name}.parquet\")\n"
769
  ]
770
  },
771
  {
@@ -795,14 +800,22 @@
795
  " .mutate(id=ibis.row_number().over()) #making a unique id \n",
796
  " ).execute().set_crs('EPSG:3857')\n",
797
  "\n",
798
- "cols = process_vector(s3, folder = folder, file = 'Priority Populations 4.0 Combined Layer.gdb',\n",
799
  " file_name = f\"{name}.parquet\", gdf = gdf)\n",
800
- "cols = [item for item in cols if item not in ['Shape_Length','Shape_Area']]\n",
801
  "\n",
802
- "# convert_pmtiles(con, s3, folder = folder, file = f\"{name}.parquet\")\n",
803
- "convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols = cols, zoom = 8)\n"
804
  ]
805
  },
 
 
 
 
 
 
 
 
806
  {
807
  "cell_type": "markdown",
808
  "id": "a919ff5f-dff3-4db7-81c2-694f07f37d1d",
@@ -828,7 +841,7 @@
828
  "\n",
829
  "gdf = gpd.read_file('Priority Populations 4.0 Combined Layer.gdb')\n",
830
  "gdf = gdf[gdf['Designatio'] =='Low-income community']\n",
831
- "cols = process_vector(s3, folder = folder, file = f\"{name}.parquet\", gdf = gdf)\n",
832
  "convert_pmtiles(con, s3, folder = folder, file = f\"{name}.parquet\")"
833
  ]
834
  },
@@ -853,42 +866,12 @@
853
  "folder = 'Progress_data_new_protection/Land_Status_Zone_Ecoregion_Counties'\n",
854
  "name = 'all_regions_reGAP_county_eco'\n",
855
  "\n",
856
- "# unzip(s3, folder = folder, file = 'Land_Status_Zone_Ecoregion_Counties.shp.zip')\n",
857
- "cols = process_vector(s3, folder = folder, file = 'Land_Status_Zone_Ecoregion_Counties.shp',\n",
858
  " file_name = f\"{name}.parquet\")\n",
859
- "convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols = cols, zoom = 5)\n",
860
- "\n",
861
- "# convert_pmtiles(con, s3, folder = folder, file = f\"{name}.parquet\")"
862
- ]
863
- },
864
- {
865
- "cell_type": "markdown",
866
- "id": "df6e2e1e-b74f-4b14-8140-7e425a3dec20",
867
- "metadata": {},
868
- "source": [
869
- "# CA Nature data"
870
- ]
871
- },
872
- {
873
- "cell_type": "code",
874
- "execution_count": null,
875
- "id": "16f9f330-c10c-4cec-9eba-0878aab9a5f7",
876
- "metadata": {},
877
- "outputs": [],
878
- "source": [
879
- "%%time \n",
880
- "con = ibis.duckdb.connect('ca_30x30_base',extensions = [\"spatial\", \"h3\"])\n",
881
- "set_secrets(con)\n",
882
- "\n",
883
- "# file = 'ca-30x30-base.parquet'\n",
884
- "folder = \"CA_Nature/2024/Preprocessing\"\n",
885
- "name = 'ca-30x30-base'\n",
886
- "# download(s3, folder = folder, file = f\"{name}.parquet\")\n",
887
  "\n",
888
- "# cols = process_vector(s3, folder = folder, file = f\"{name}.parquet\", crs=\"EPSG:4326\")\n",
889
- "# convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols = cols, \n",
890
- " # zoom = 8)\n",
891
- "convert_pmtiles(con, s3, folder = folder, file = f\"{name}.parquet\")\n"
892
  ]
893
  },
894
  {
@@ -912,18 +895,18 @@
912
  "folder = 'CPAD'\n",
913
  "name = 'cced_2024b_release'\n",
914
  "\n",
915
- "# unzip(s3, folder = folder, file = f\"{name}.shp.zip\")\n",
916
- "# cols = process_vector(s3, folder = folder, file = f\"{name}.shp\", crs=\"EPSG:3310\")\n",
917
- "# convert_pmtiles(con, s3, folder = folder, file = f\"{name}.parquet\")\n",
918
- "cols = process_vector(s3, folder = folder, file = f\"{name}.shp\", crs=\"EPSG:4326\")\n",
919
- "convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols= cols, zoom = 8)\n",
920
  "\n",
921
  "name = 'cpad_2024b_release'\n",
922
- "# unzip(s3, folder = folder, file = f\"{name}.shp.zip\")\n",
923
- "# cols = process_vector(s3, folder = folder, file = f\"{name}.shp\", crs=\"EPSG:3310\")\n",
924
- "# convert_pmtiles(con, s3, folder = folder, file = f\"{name}.parquet\")\n",
925
- "cols = process_vector(s3, folder = folder, file = f\"{name}.shp\", crs=\"EPSG:4326\")\n",
926
- "convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols= cols, zoom = 8)"
927
  ]
928
  }
929
  ],
 
18
  "from cng.utils import *\n",
19
  "from utils import *\n",
20
  "from h3_utils import *\n",
21
+ "s3 = s3_client()\n",
22
  "\n",
23
  "import os\n",
24
  "os.chdir('../data/')\n",
25
  "\n",
 
26
  "duckdb_install_h3()"
27
  ]
28
  },
 
48
  "folder = 'Counties'\n",
49
  "name = 'CA_counties'\n",
50
  "\n",
51
+ "unzip(s3, folder = folder, file = '30x30_Counties.zip')\n",
52
+ "process_vector(s3, folder = folder, file = f\"{name}.shp\")\n",
53
+ "convert_pmtiles(con, s3, folder = folder, file = f\"{name}.parquet\")\n",
54
  "\n",
55
+ "# convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols = cols, zoom = 8)"
56
  ]
57
  },
58
  {
 
76
  "\n",
77
  "folder = 'Climate_zones'\n",
78
  "name = 'climate_zones_10'\n",
79
+ "download(s3, folder = folder, file = 'clusters_10.tif')\n",
80
+ "process_raster(s3, folder = folder, file = 'clusters_10.tif', file_name = f\"{name}.tif\")\n",
81
+ "\n",
82
+ "# convert_h3(con, s3, folder = folder, file = f\"{name}_processed.parquet\", cols = cols,\n",
83
+ " # zoom = 8)"
84
  ]
85
  },
86
  {
 
105
  "folder = 'Ecoregion'\n",
106
  "name = 'ACE_ecoregions'\n",
107
  "\n",
108
+ "unzip(s3, folder = folder, file = '30x30_Ecoregions.zip')\n",
109
+ "process_vector(s3, folder = folder, file = f\"{name}.shp\")\n",
110
  "\n",
111
+ "# convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols = cols, zoom = 8)"
112
  ]
113
  },
114
  {
 
140
  "\n",
141
  "folder = 'Habitat'\n",
142
  "name = 'fveg22_1'\n",
143
+ "unzip(s3, folder = folder, file = 'fveg221gdb.zip')\n",
144
  "\n",
145
+ "command = [\n",
146
+ " \"gdalwarp\",\n",
147
+ " \"-of\", \"GTiff\",\n",
148
+ " 'fveg22_1.gdb',\n",
149
+ " 'fveg22_1.tif' \n",
150
+ " ]\n",
151
  "\n",
152
+ "subprocess.run(command, check=True)\n",
153
+ "process_raster(s3, folder = folder, file = f\"{name}.tif\")\n",
154
+ "upload(folder = folder, file = f'{name}_processed.tif.aux.xml')\n",
155
  "\n",
156
+ "# convert_h3(con, s3, folder = folder, file = f\"{name}_processed.parquet\", cols = cols,\n",
157
+ "# zoom = 8)"
158
  ]
159
  },
160
  {
 
187
  "folder = 'ACE_biodiversity'\n",
188
  "name = 'ACE_terrestrial_biodiversity_summary_ds2739'\n",
189
  "\n",
190
+ "download(s3, folder = folder, file = 'Terrestrial_Biodiversity_Summary_-_ACE_[ds2739].geojson',\n",
191
+ " file_name = f\"{name}.geojson\")\n",
192
  "\n",
193
+ "process_vector(s3, folder = folder, file = f\"{name}.geojson\")\n",
194
+ "convert_pmtiles(con, s3, folder = folder, file = f\"{name}.geojson\")\n",
195
+ "gdf = gpd.read_parquet(f\"{name}.parquet\")\n",
196
+ "\n",
197
+ "# cols = [item for item in cols if item not in [\"Hex_ID\",\"Shape__Area\",\"Shape__Length\"]]\n",
198
+ "# convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols = cols, zoom = 8)"
199
  ]
200
  },
201
  {
 
224
  " 'County', 'Shape__Area', 'Shape__Length', 'geometry']\n",
225
  " cols.append(col) #select only the cols we want + the new col. \n",
226
  " rank_df = gdf[gdf[col]==5][cols]# filter ranks = 5\n",
227
+ " process_vector(s3, folder = 'ACE_biodiversity/'+name, file = name+'.parquet',gdf = rank_df)\n",
228
  " convert_pmtiles(con, s3, folder ='ACE_biodiversity/'+name, file = name+'.parquet')\n"
229
  ]
230
  },
 
263
  " percentile = 0.95\n",
264
  " threshold = gdf[col].quantile(percentile)\n",
265
  " ace = gdf[gdf[col]>=threshold][cols]\n",
266
+ " process_vector(s3, folder = 'ACE_biodiversity/'+name, file = name+'.parquet',gdf = ace)\n",
267
  " convert_pmtiles(con, s3, folder ='ACE_biodiversity/'+name, file = name+'.parquet')\n",
268
  "\n",
269
  "\n",
 
301
  "folder = 'Biodiversity_unique/Plant_richness'\n",
302
  "name = 'species_D'\n",
303
  "\n",
304
+ "download(s3, folder = folder, file = f\"{name}.tif\")\n",
305
+ "filter_raster(s3, folder = folder, file = f\"{name}.tif\", percentile = 80)\n",
306
+ "\n",
307
+ "# convert_h3(con, s3, folder = folder, file = f\"{name}_processed.parquet\", cols = cols, zoom = 8)"
308
  ]
309
  },
310
  {
 
330
  "name = 'endemicspecies_E'\n",
331
  "\n",
332
  "download(s3, folder = folder, file = f\"{name}.tif\")\n",
333
+ "filter_raster(s3, folder = folder, file = f\"{name}.tif\", percentile = 80)\n",
334
+ "\n",
335
+ "# convert_h3(con, s3, folder = folder, file = f\"{name}_processed.parquet\", cols = cols, zoom = 8)"
336
  ]
337
  },
338
  {
 
374
  "folder = 'Connectivity_resilience/Resilient_connected_network_allcategories'\n",
375
  "name = 'rcn_wIntactBioCat_caOnly_2020-10-27'\n",
376
  "\n",
377
+ "process_raster(s3, folder = folder, file = f\"{name}.tif\")\n",
378
+ "\n",
379
+ "# convert_h3(con, s3, folder = folder, file = f\"{name}_processed.parquet\", cols = cols, \n",
380
+ " # zoom = 8)"
381
  ]
382
  },
383
  {
 
464
  "wetlands = ['Freshwater Emergent Wetland', 'Freshwater Forested/Shrub Wetland', 'Estuarine and Marine Wetland']\n",
465
  "gdf = gdf[gdf['WETLAND_TYPE'].isin(wetlands)]\n",
466
  "\n",
467
+ "process_vector(s3, folder = folder, file = f\"{name}.parquet\", gdf = gdf)\n",
 
468
  "convert_pmtiles(con, s3, folder =folder, file = f\"{name}.parquet\")\n",
469
+ "\n",
470
+ "# cols = [item for item in cols if item not in ['ACRES','Shape_Length','Shape_Area','__index_level_0__']]\n",
471
+ "# geom_to_h3(con, folder = folder, file = f\"{name}.parquet\", cols = cols, zoom = 8)"
472
  ]
473
  },
474
  {
 
572
  "\n",
573
  "folder = 'NBS_agriculture/Farmland_all'\n",
574
  "name = 'Important_Farmland_2018'\n",
575
+ "unzip(s3, folder = folder, file = f\"{name}.zip\")\n",
576
+ "process_vector(s3, folder = folder, file = f\"{name}.gdb\",crs = \"epsg:4326\")\n",
577
+ "\n",
578
+ "convert_pmtiles(con, s3, folder = folder, file =f\"{name}.parquet\")\n",
579
  "\n",
580
+ "# cols = [item for item in cols if item not in ['Shape_Length','Shape_Area']]\n",
581
+ "# convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols = cols, zoom = 8)\n",
582
  "\n",
583
  "# only pick a subset \n",
584
  "folder = 'NBS_agriculture/Farmland_all/Farmland'\n",
585
  "name = 'Farmland_2018'\n",
586
+ "gdf = gpd.read_file('Important_Farmland_2018.gdb')\n",
587
+ "farmland_type = ['P','S','L','U'] # prime, statewide importance, local importance, unique\n",
588
+ "gdf_farmland = gdf[gdf['polygon_ty'].isin(farmland_type)]\n",
589
+ "process_vector(s3, folder = folder, file = f\"{name}.parquet\", gdf = gdf_farmland)\n",
590
+ "convert_pmtiles(con, s3, folder = folder, file =f\"{name}.parquet\")\n",
591
  "\n",
592
  "# grazing lands \n",
593
  "folder = 'NBS_agriculture/Farmland_all/Lands_suitable_grazing'\n",
594
  "name = 'Grazing_land_2018'\n",
595
+ "gdf_grazing = gdf[gdf['polygon_ty'] == 'G']\n",
596
+ "process_vector(s3, folder = folder, file = f\"{name}.parquet\", gdf = gdf_grazing)\n",
597
+ "convert_pmtiles(con, s3, folder = folder, file =f\"{name}.parquet\")\n"
598
  ]
599
  },
600
  {
 
647
  "\n",
648
  "unzip(s3, folder = folder, file = 'fire23-1gdb.zip')\n",
649
  "gdf = gpd.read_file('fire23_1.gdb')\n",
650
+ "gdf = gdf[~gdf['YEAR_'].isna()]\n",
651
+ "gdf['YEAR_'] = gdf['YEAR_'].astype('int64')\n",
652
+ "gdf = gdf[gdf['YEAR_']>=2014]\n",
653
+ "process_vector(s3, folder = folder, file = f\"{name}.parquet\", gdf = gdf)\n",
654
+ "convert_pmtiles(con, s3, folder = folder, file = f\"{name}.parquet\")\n",
655
  "\n",
656
+ "# cols = [item for item in cols if item not in ['Shape_Length','Shape_Area']]\n",
657
+ "# convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols = cols, zoom = 8)"
 
658
  ]
659
  },
660
  {
 
738
  "folder = 'Progress_data_new_protection/Newly_counted_lands'\n",
739
  "name = 'newly_counted_lands_2024'\n",
740
  "\n",
741
+ "unzip(s3, folder = folder, file = f\"{name}.shp.zip\")\n",
742
+ "process_vector(s3, folder = folder, file = f\"{name}.shp\",crs = \"epsg:4326\")\n",
743
+ "convert_pmtiles(con, s3, folder = folder, file = f\"{name}.parquet\")\n",
744
  "\n",
745
+ "# cols = [item for item in cols if item not in ['Shape_Leng', 'Shape_Area']]\n",
746
+ "# convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols = cols, zoom = 8)"
 
 
 
 
747
  ]
748
  },
749
  {
 
769
  "name = 'DAC_2022'\n",
770
  "\n",
771
  "unzip(s3, folder = folder, file = 'sb535dacgdbf2022gdb.zip')\n",
772
+ "process_vector(s3, folder = folder, file = 'SB535DACgdb_F_2022.gdb', file_name = f\"{name}.parquet\")\n",
773
+ "convert_pmtiles(con, s3, folder = folder, file = f\"{name}.parquet\")"
774
  ]
775
  },
776
  {
 
800
  " .mutate(id=ibis.row_number().over()) #making a unique id \n",
801
  " ).execute().set_crs('EPSG:3857')\n",
802
  "\n",
803
+ "process_vector(s3, folder = folder, file = 'Priority Populations 4.0 Combined Layer.gdb',\n",
804
  " file_name = f\"{name}.parquet\", gdf = gdf)\n",
805
+ "convert_pmtiles(con, s3, folder = folder, file = f\"{name}.parquet\")\n",
806
  "\n",
807
+ "# cols = [item for item in cols if item not in ['Shape_Length','Shape_Area']]\n",
808
+ "# convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols = cols, zoom = 8)"
809
  ]
810
  },
811
+ {
812
+ "cell_type": "code",
813
+ "execution_count": null,
814
+ "id": "df1a939c-cb89-4a2f-8309-2819fe52ac45",
815
+ "metadata": {},
816
+ "outputs": [],
817
+ "source": []
818
+ },
819
  {
820
  "cell_type": "markdown",
821
  "id": "a919ff5f-dff3-4db7-81c2-694f07f37d1d",
 
841
  "\n",
842
  "gdf = gpd.read_file('Priority Populations 4.0 Combined Layer.gdb')\n",
843
  "gdf = gdf[gdf['Designatio'] =='Low-income community']\n",
844
+ "process_vector(s3, folder = folder, file = f\"{name}.parquet\", gdf = gdf)\n",
845
  "convert_pmtiles(con, s3, folder = folder, file = f\"{name}.parquet\")"
846
  ]
847
  },
 
866
  "folder = 'Progress_data_new_protection/Land_Status_Zone_Ecoregion_Counties'\n",
867
  "name = 'all_regions_reGAP_county_eco'\n",
868
  "\n",
869
+ "unzip(s3, folder = folder, file = 'Land_Status_Zone_Ecoregion_Counties.shp.zip')\n",
870
+ "process_vector(s3, folder = folder, file = 'Land_Status_Zone_Ecoregion_Counties.shp',\n",
871
  " file_name = f\"{name}.parquet\")\n",
872
+ "convert_pmtiles(con, s3, folder = folder, file = f\"{name}.parquet\")\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
873
  "\n",
874
+ "# convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols = cols, zoom = 5)"
 
 
 
875
  ]
876
  },
877
  {
 
895
  "folder = 'CPAD'\n",
896
  "name = 'cced_2024b_release'\n",
897
  "\n",
898
+ "unzip(s3, folder = folder, file = f\"{name}.shp.zip\")\n",
899
+ "process_vector(s3, folder = folder, file = f\"{name}.shp\", crs=\"EPSG:3310\")\n",
900
+ "convert_pmtiles(con, s3, folder = folder, file = f\"{name}.parquet\")\n",
901
+ "process_vector(s3, folder = folder, file = f\"{name}.shp\", crs=\"EPSG:4326\")\n",
902
+ "# convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols= cols, zoom = 8)\n",
903
  "\n",
904
  "name = 'cpad_2024b_release'\n",
905
+ "unzip(s3, folder = folder, file = f\"{name}.shp.zip\")\n",
906
+ "process_vector(s3, folder = folder, file = f\"{name}.shp\", crs=\"EPSG:3310\")\n",
907
+ "convert_pmtiles(con, s3, folder = folder, file = f\"{name}.parquet\")\n",
908
+ "process_vector(s3, folder = folder, file = f\"{name}.shp\", crs=\"EPSG:4326\")\n",
909
+ "# convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols= cols, zoom = 8)"
910
  ]
911
  }
912
  ],
preprocess/h3_utils.py CHANGED
@@ -1,14 +1,13 @@
1
  from utils import *
2
  import re
3
 
4
- def convert_h3(con, s3, folder, file, cols, zoom):
5
  """
6
  Driver function to convert geometries to h3.
7
  If no zoom levels exist -> compute from geometry at target zoom.
8
- If lower zoom exists -> compute children from max available until target zoom.
9
  """
10
  cols = ", ".join(cols) if isinstance(cols, list) else cols
11
- bucket, path = info(folder, file)
12
  path, file = os.path.split(path)
13
  name, ext = os.path.splitext(file)
14
  name = name.replace('-', '')
@@ -23,7 +22,7 @@ def convert_h3(con, s3, folder, file, cols, zoom):
23
  zooms.append(int(match.group(1)))
24
 
25
  if not zooms: # if no h3 files exist
26
- print(f'No h3 files exists, computing {zoom} from geometry.')
27
  con.read_parquet(f"s3://{bucket}/{path}/{file}", table_name=name)
28
  h3_from_geom(con, name, cols, zoom)
29
  con.sql(f'''
@@ -38,21 +37,21 @@ def convert_h3(con, s3, folder, file, cols, zoom):
38
  print(f'Zoom {zoom} already exists!')
39
  return
40
 
41
- elif current_zoom < zoom: #compute child of most refined zoom level
42
- print(f'Reading zoom {current_zoom}')
43
- con.read_parquet(
44
- f"s3://{bucket}/{path}/hex/zoom{current_zoom}/{name}.parquet",
45
- table_name=f"h3_h{current_zoom}"
46
- )
47
- print(f'Computing {zoom} from {current_zoom}')
48
 
49
- for z in range(current_zoom + 1, zoom + 1):
50
- print(f'Current zoom {z}')
51
- h3_from_parent(con, z)
52
- con.sql(f'''
53
- SELECT *, UNNEST(h3_cell_to_children(h{z-1}, {z})) AS h{z}
54
- FROM h3_h{z-1}
55
- ''').to_parquet(f"s3://{bucket}/{path}/hex/zoom{z}/{name}.parquet")
56
 
57
 
58
  def h3_from_geom(con, name, cols, zoom):
@@ -60,20 +59,19 @@ def h3_from_geom(con, name, cols, zoom):
60
  Computes hexes directly from geometry.
61
  """
62
  con.raw_sql(f'''
63
- CREATE OR REPLACE TEMP TABLE t2 AS
64
- WITH t1 AS (
65
- SELECT {cols}, ST_Dump(geom) AS geom
66
- FROM {name}
67
- )
68
- SELECT {cols},
69
- h3_polygon_wkt_to_cells_string(ST_Force2D(UNNEST(geom).geom), {zoom}) AS h{zoom}
70
- FROM t1
71
  ''')
72
 
73
 
74
- def h3_from_parent(con, zoom):
75
- con.raw_sql(f'''
76
- CREATE OR REPLACE TEMP TABLE h3_h{zoom} AS
77
- SELECT *, UNNEST(h3_cell_to_children(h{zoom-1}, {zoom})) AS h{zoom}
78
- FROM h3_h{zoom-1}
79
- ''')
 
1
  from utils import *
2
  import re
3
 
4
+ def convert_h3(con, s3, folder, file, cols, zoom, base_folder = "CBN/"):
5
  """
6
  Driver function to convert geometries to h3.
7
  If no zoom levels exist -> compute from geometry at target zoom.
 
8
  """
9
  cols = ", ".join(cols) if isinstance(cols, list) else cols
10
+ bucket, path = info(folder, file, base_folder)
11
  path, file = os.path.split(path)
12
  name, ext = os.path.splitext(file)
13
  name = name.replace('-', '')
 
22
  zooms.append(int(match.group(1)))
23
 
24
  if not zooms: # if no h3 files exist
25
+ print(f'No h3 files exists, computing zoom level {zoom} from geometry.')
26
  con.read_parquet(f"s3://{bucket}/{path}/{file}", table_name=name)
27
  h3_from_geom(con, name, cols, zoom)
28
  con.sql(f'''
 
37
  print(f'Zoom {zoom} already exists!')
38
  return
39
 
40
+ # elif current_zoom < zoom: #compute child of most refined zoom level
41
+ # print(f'Reading zoom {current_zoom}')
42
+ # con.read_parquet(
43
+ # f"s3://{bucket}/{path}/hex/zoom{current_zoom}/{name}.parquet",
44
+ # table_name=f"h3_h{current_zoom}"
45
+ # )
46
+ # print(f'Computing {zoom} from {current_zoom}')
47
 
48
+ # for z in range(current_zoom + 1, zoom + 1):
49
+ # print(f'Current zoom {z}')
50
+ # h3_from_parent(con, z)
51
+ # con.sql(f'''
52
+ # SELECT *, UNNEST(h3_cell_to_children(h{z-1}, {z})) AS h{z}
53
+ # FROM h3_h{z-1}
54
+ # ''').to_parquet(f"s3://{bucket}/{path}/hex/zoom{z}/{name}.parquet")
55
 
56
 
57
  def h3_from_geom(con, name, cols, zoom):
 
59
  Computes hexes directly from geometry.
60
  """
61
  con.raw_sql(f'''
62
+ CREATE OR REPLACE TEMP TABLE t2 AS
63
+ SELECT {cols},
64
+ h3_polygon_wkt_to_cells_string(ST_Force2D(dump.geom), {zoom}) AS h{zoom}
65
+ FROM (
66
+ SELECT {cols}, UNNEST(ST_Dump(geom)) AS dump
67
+ FROM {name}
68
+ )
 
69
  ''')
70
 
71
 
72
+ # def h3_from_parent(con, zoom):
73
+ # con.raw_sql(f'''
74
+ # CREATE OR REPLACE TEMP TABLE h3_h{zoom} AS
75
+ # SELECT *, UNNEST(h3_cell_to_children(h{zoom-1}, {zoom})) AS h{zoom}
76
+ # FROM h3_h{zoom-1}
77
+ # ''')
preprocess/utils.py CHANGED
@@ -10,48 +10,50 @@ import ibis
10
  from ibis import _
11
 
12
  import rasterio
13
- from rasterio.features import shapes
14
- from shapely.geometry import shape
15
  import numpy as np
 
16
 
17
-
18
- def info(folder, file, bucket = "public-ca30x30", base_folder = 'CBN/'):
19
  """
20
  Extract minio path to upload/download data
21
  """
22
- path = os.path.join(base_folder, folder, file)
 
 
 
23
  # path = os.path.join(folder, file)
24
  return bucket, path
25
 
26
- def download(s3, folder, file, file_name = None):
27
  """
28
  Downloading file from minio
29
  """
30
  if not file_name:
31
  file_name = file
32
- bucket, path = info(folder, file)
33
- s3.fget_object(bucket, path ,file_name)
34
  return
35
 
36
- def upload(s3, folder, file):
37
  """
38
  Uploading file from minio
39
  """
40
- bucket, path = info(folder, file)
41
  s3.fput_object(bucket, path ,file)
42
  return
43
 
44
- def unzip(s3, folder, file):
45
  """
46
  Unzipping zip files
47
  """
48
- download(s3, folder, file)
49
  with zipfile.ZipFile(file, 'r') as zip_ref:
50
  zip_ref.extractall()
51
  return
52
 
53
- # def process_vector(s3, folder, file, file_name = None, gdf = None, crs="EPSG:3310"):
54
- def process_vector(s3, folder, file, file_name = None, gdf = None, crs="EPSG:4326"):
55
  """
56
  Driver function to process vectors
57
  """
@@ -67,11 +69,11 @@ def process_vector(s3, folder, file, file_name = None, gdf = None, crs="EPSG:432
67
  name, ext = os.path.splitext(file)
68
  parquet_file = f"{name}{'.parquet'}"
69
  gdf.to_parquet(parquet_file)
70
- upload(s3, folder, parquet_file)
71
-
72
- return gdf.drop('geom',axis = 1).columns.to_list()
73
 
74
- def process_raster(s3, folder, file, file_name = None):
75
  """
76
  Driver function to process rasters
77
  """
@@ -82,29 +84,30 @@ def process_raster(s3, folder, file, file_name = None):
82
  output_cog_file = f"{name}_processed_COG{ext}"
83
  output_vector_file = f"{name}_processed.parquet"
84
  # Reproject raster
85
- if not exists_on_s3(s3, folder, output_file):
86
  output_file = reproject_raster(file)
87
- upload(s3, folder, output_file)
88
  else:
89
  print(f"{output_file} already exists on S3, skipping reprojection/upload.")
90
 
91
  # Make COG
92
- if not exists_on_s3(s3, folder, output_cog_file):
93
  output_cog_file = make_cog(output_file)
94
- upload(s3, folder, output_cog_file)
95
  else:
96
  print(f"{output_cog_file} already exists on S3, skipping COG conversion/upload.")
97
 
98
- # Vectorize raster
99
- if not exists_on_s3(s3, folder, output_vector_file):
100
- output_vector_file, cols = make_vector(output_file)
101
- upload(s3, folder, output_vector_file)
102
- else:
103
- print(f"{output_vector_file} already exists on S3, skipping vectorization/upload.")
104
- # We still need column names
105
- gdf = gpd.read_parquet(output_vector_file)
106
- cols = gdf.drop('geom', axis=1).columns.to_list()
107
- return cols
 
108
 
109
  def reproject_raster(input_file, crs="EPSG:3310"):
110
  """
@@ -147,31 +150,37 @@ def make_cog(input_file, crs="EPSG:4326"):
147
  print(f"Error occurred during processing: {e}")
148
  return output_file
149
 
150
- def make_vector(input_file, crs="EPSG:4326"):
151
- """
152
- Converting rasters to vector formats in order to convert to h3
153
- """
154
- name, ext = os.path.splitext(input_file)
155
- output_file = f"{name}.parquet"
156
- # Open raster
157
- with rasterio.open(input_file) as src:
158
- image = src.read(1) # read first band
159
- mask = image != src.nodata # mask out nodata
160
 
161
- results = (
162
- {"geom": shape(geom), "value": value}
163
- for geom, value in shapes(image, mask=mask, transform=src.transform)
164
- )
165
-
166
- gdf = gpd.GeoDataFrame.from_records(results)
167
- gdf.set_geometry('geom', inplace=True)
168
- gdf['id'] = np.arange(len(gdf))
169
- gdf.set_crs(src.crs, inplace=True)
170
- if gdf.crs != crs:
171
- gdf.to_crs(crs, inplace=True)
172
-
173
- gdf.to_parquet(output_file)
174
- return output_file, gdf.drop('geom',axis = 1).columns.to_list()
 
 
 
 
 
 
 
 
 
 
175
 
176
  def filter_raster(s3, folder, file, percentile):
177
  """
@@ -192,11 +201,11 @@ def filter_raster(s3, folder, file, percentile):
192
  profile.update(dtype=rasterio.float64)
193
  with rasterio.open(new_file, "w", **profile) as dst:
194
  dst.write(filtered, 1)
195
- cols = process_raster(s3, folder, file)
196
- return cols
197
-
198
 
199
- def convert_pmtiles(con, s3, folder, file):
200
  """
201
  Convert to PMTiles with tippecanoe
202
  """
@@ -205,14 +214,14 @@ def convert_pmtiles(con, s3, folder, file):
205
  (con.read_parquet(file).execute().set_crs('epsg:3310')
206
  .to_crs('epsg:4326').to_file(name+'.geojson'))
207
  to_pmtiles(name+'.geojson', name+'.pmtiles', options = ['--extend-zooms-if-still-dropping'])
208
- upload(s3, folder, name+'.pmtiles')
209
  return
210
 
211
- def exists_on_s3(s3, folder, file):
212
  """
213
  Check if a file exists on S3
214
  """
215
- bucket, path = info(folder, file)
216
  try:
217
  s3.stat_object(bucket, path)
218
  return True
 
10
  from ibis import _
11
 
12
  import rasterio
13
+ from rasterio.transform import xy
14
+ from shapely.geometry import Point
15
  import numpy as np
16
+ from pyproj import Transformer
17
 
18
+ def info(folder, file, base_folder, bucket = "public-ca30x30"):
 
19
  """
20
  Extract minio path to upload/download data
21
  """
22
+ if (folder is None) & (base_folder is None):
23
+ path = file
24
+ else:
25
+ path = os.path.join(base_folder, folder, file)
26
  # path = os.path.join(folder, file)
27
  return bucket, path
28
 
29
+ def download(s3, folder, file, file_name = None, base_folder = "CBN/"):
30
  """
31
  Downloading file from minio
32
  """
33
  if not file_name:
34
  file_name = file
35
+ bucket, path = info(folder, file, base_folder)
36
+ s3.fget_object(bucket, path , file_name)
37
  return
38
 
39
+ def upload(s3, folder, file, base_folder = "CBN/"):
40
  """
41
  Uploading file from minio
42
  """
43
+ bucket, path = info(folder, file, base_folder)
44
  s3.fput_object(bucket, path ,file)
45
  return
46
 
47
+ def unzip(s3, folder, file, base_folder = "CBN/"):
48
  """
49
  Unzipping zip files
50
  """
51
+ download(s3, folder, file, base_folder)
52
  with zipfile.ZipFile(file, 'r') as zip_ref:
53
  zip_ref.extractall()
54
  return
55
 
56
+ def process_vector(s3, folder, file, file_name = None, gdf = None, crs="EPSG:4326", base_folder = "CBN/"):
 
57
  """
58
  Driver function to process vectors
59
  """
 
69
  name, ext = os.path.splitext(file)
70
  parquet_file = f"{name}{'.parquet'}"
71
  gdf.to_parquet(parquet_file)
72
+ upload(s3, folder, parquet_file, base_folder)
73
+ # return gdf.drop('geom',axis = 1).columns.to_list()
74
+ return
75
 
76
+ def process_raster(s3, folder, file, file_name = None, base_folder = "CBN/"):
77
  """
78
  Driver function to process rasters
79
  """
 
84
  output_cog_file = f"{name}_processed_COG{ext}"
85
  output_vector_file = f"{name}_processed.parquet"
86
  # Reproject raster
87
+ if not exists_on_s3(s3, folder, output_file, base_folder):
88
  output_file = reproject_raster(file)
89
+ upload(s3, folder, output_file, base_folder)
90
  else:
91
  print(f"{output_file} already exists on S3, skipping reprojection/upload.")
92
 
93
  # Make COG
94
+ if not exists_on_s3(s3, folder, output_cog_file, base_folder):
95
  output_cog_file = make_cog(output_file)
96
+ upload(s3, folder, output_cog_file, base_folder)
97
  else:
98
  print(f"{output_cog_file} already exists on S3, skipping COG conversion/upload.")
99
 
100
+ # # Vectorize raster
101
+ # if not exists_on_s3(s3, folder, output_vector_file, base_folder):
102
+ # output_vector_file, cols = make_vector(output_file)
103
+ # upload(s3, folder, output_vector_file, base_folder)
104
+ # else:
105
+ # print(f"{output_vector_file} already exists on S3, skipping vectorization/upload.")
106
+ # # We still need column names
107
+ # gdf = gpd.read_parquet(output_vector_file)
108
+ # cols = gdf.drop('geom', axis=1).columns.to_list()
109
+ # return cols
110
+ return
111
 
112
  def reproject_raster(input_file, crs="EPSG:3310"):
113
  """
 
150
  print(f"Error occurred during processing: {e}")
151
  return output_file
152
 
153
+ # def make_vector(input_file, crs="EPSG:4326"):
154
+ # """
155
+ # Converting rasters to vector formats in order to convert to h3
156
+ # """
157
+ # name, ext = os.path.splitext(input_file)
158
+ # output_file = f"{name}.parquet"
 
 
 
 
159
 
160
+ # with rasterio.open(input_file) as src:
161
+ # band = src.read(1) # read first band
162
+ # mask = band != src.nodata # mask out nodata
163
+ # rows, cols = np.where(mask)
164
+ # x, y = rasterio.transform.xy(src.transform, rows, cols, offset = "center")
165
+
166
+ # # reproject
167
+ # if src.crs and src.crs.to_string() != crs:
168
+ # transformer = Transformer.from_crs(src.crs, crs, always_xy=True)
169
+ # x, y = transformer.transform(x, y)
170
+ # crs_out = crs
171
+ # else:
172
+ # crs_out = src.crs
173
+
174
+ # gdf = gpd.GeoDataFrame(
175
+ # {"value": band[rows, cols]},
176
+ # geometry=[Point(xy) for xy in zip(x, y)],
177
+ # crs=crs_out
178
+ # )
179
+
180
+ # gdf.rename_geometry('geom', inplace=True)
181
+ # gdf['id'] = np.arange(len(gdf))
182
+ # gdf.to_parquet(output_file)
183
+ # return output_file, gdf.drop('geom',axis = 1).columns.to_list()
184
 
185
  def filter_raster(s3, folder, file, percentile):
186
  """
 
201
  profile.update(dtype=rasterio.float64)
202
  with rasterio.open(new_file, "w", **profile) as dst:
203
  dst.write(filtered, 1)
204
+ process_raster(s3, folder, file)
205
+ # return cols
206
+ return
207
 
208
+ def convert_pmtiles(con, s3, folder, file, base_folder = "CBN/"):
209
  """
210
  Convert to PMTiles with tippecanoe
211
  """
 
214
  (con.read_parquet(file).execute().set_crs('epsg:3310')
215
  .to_crs('epsg:4326').to_file(name+'.geojson'))
216
  to_pmtiles(name+'.geojson', name+'.pmtiles', options = ['--extend-zooms-if-still-dropping'])
217
+ upload(s3, folder, name+'.pmtiles', base_folder)
218
  return
219
 
220
+ def exists_on_s3(s3, folder, file, base_folder = "CBN/"):
221
  """
222
  Check if a file exists on S3
223
  """
224
+ bucket, path = info(folder, file, base_folder)
225
  try:
226
  s3.stat_object(bucket, path)
227
  return True