Spaces:
Running
Running
Commit
·
8419f19
1
Parent(s):
5c0d990
removing h3 code, will revisit later
Browse fileslater: fix h3 to raster, process h3 by looping.
- preprocess/CBN-data.ipynb +105 -122
- preprocess/h3_utils.py +30 -32
- preprocess/utils.py +72 -63
preprocess/CBN-data.ipynb
CHANGED
|
@@ -18,11 +18,11 @@
|
|
| 18 |
"from cng.utils import *\n",
|
| 19 |
"from utils import *\n",
|
| 20 |
"from h3_utils import *\n",
|
|
|
|
| 21 |
"\n",
|
| 22 |
"import os\n",
|
| 23 |
"os.chdir('../data/')\n",
|
| 24 |
"\n",
|
| 25 |
-
"s3 = s3_client()\n",
|
| 26 |
"duckdb_install_h3()"
|
| 27 |
]
|
| 28 |
},
|
|
@@ -48,11 +48,11 @@
|
|
| 48 |
"folder = 'Counties'\n",
|
| 49 |
"name = 'CA_counties'\n",
|
| 50 |
"\n",
|
| 51 |
-
"
|
| 52 |
-
"
|
| 53 |
-
"
|
| 54 |
"\n",
|
| 55 |
-
"convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols = cols, zoom = 8)"
|
| 56 |
]
|
| 57 |
},
|
| 58 |
{
|
|
@@ -76,10 +76,11 @@
|
|
| 76 |
"\n",
|
| 77 |
"folder = 'Climate_zones'\n",
|
| 78 |
"name = 'climate_zones_10'\n",
|
| 79 |
-
"
|
| 80 |
-
"
|
| 81 |
-
"
|
| 82 |
-
"
|
|
|
|
| 83 |
]
|
| 84 |
},
|
| 85 |
{
|
|
@@ -104,10 +105,10 @@
|
|
| 104 |
"folder = 'Ecoregion'\n",
|
| 105 |
"name = 'ACE_ecoregions'\n",
|
| 106 |
"\n",
|
| 107 |
-
"
|
| 108 |
-
"
|
| 109 |
"\n",
|
| 110 |
-
"convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols = cols, zoom = 8)"
|
| 111 |
]
|
| 112 |
},
|
| 113 |
{
|
|
@@ -139,21 +140,21 @@
|
|
| 139 |
"\n",
|
| 140 |
"folder = 'Habitat'\n",
|
| 141 |
"name = 'fveg22_1'\n",
|
| 142 |
-
"
|
| 143 |
"\n",
|
| 144 |
-
"
|
| 145 |
-
"
|
| 146 |
-
"
|
| 147 |
-
"
|
| 148 |
-
"
|
| 149 |
-
"
|
| 150 |
"\n",
|
| 151 |
-
"
|
| 152 |
-
"
|
| 153 |
-
"
|
| 154 |
"\n",
|
| 155 |
-
"convert_h3(con, s3, folder = folder, file = f\"{name}_processed.parquet\", cols = cols,\n",
|
| 156 |
-
"
|
| 157 |
]
|
| 158 |
},
|
| 159 |
{
|
|
@@ -186,14 +187,15 @@
|
|
| 186 |
"folder = 'ACE_biodiversity'\n",
|
| 187 |
"name = 'ACE_terrestrial_biodiversity_summary_ds2739'\n",
|
| 188 |
"\n",
|
| 189 |
-
"
|
| 190 |
-
"
|
| 191 |
"\n",
|
| 192 |
-
"
|
| 193 |
-
"
|
| 194 |
-
"
|
| 195 |
-
"
|
| 196 |
-
"#
|
|
|
|
| 197 |
]
|
| 198 |
},
|
| 199 |
{
|
|
@@ -222,7 +224,7 @@
|
|
| 222 |
" 'County', 'Shape__Area', 'Shape__Length', 'geometry']\n",
|
| 223 |
" cols.append(col) #select only the cols we want + the new col. \n",
|
| 224 |
" rank_df = gdf[gdf[col]==5][cols]# filter ranks = 5\n",
|
| 225 |
-
"
|
| 226 |
" convert_pmtiles(con, s3, folder ='ACE_biodiversity/'+name, file = name+'.parquet')\n"
|
| 227 |
]
|
| 228 |
},
|
|
@@ -261,7 +263,7 @@
|
|
| 261 |
" percentile = 0.95\n",
|
| 262 |
" threshold = gdf[col].quantile(percentile)\n",
|
| 263 |
" ace = gdf[gdf[col]>=threshold][cols]\n",
|
| 264 |
-
"
|
| 265 |
" convert_pmtiles(con, s3, folder ='ACE_biodiversity/'+name, file = name+'.parquet')\n",
|
| 266 |
"\n",
|
| 267 |
"\n",
|
|
@@ -299,9 +301,10 @@
|
|
| 299 |
"folder = 'Biodiversity_unique/Plant_richness'\n",
|
| 300 |
"name = 'species_D'\n",
|
| 301 |
"\n",
|
| 302 |
-
"
|
| 303 |
-
"
|
| 304 |
-
"
|
|
|
|
| 305 |
]
|
| 306 |
},
|
| 307 |
{
|
|
@@ -327,8 +330,9 @@
|
|
| 327 |
"name = 'endemicspecies_E'\n",
|
| 328 |
"\n",
|
| 329 |
"download(s3, folder = folder, file = f\"{name}.tif\")\n",
|
| 330 |
-
"
|
| 331 |
-
"
|
|
|
|
| 332 |
]
|
| 333 |
},
|
| 334 |
{
|
|
@@ -370,9 +374,10 @@
|
|
| 370 |
"folder = 'Connectivity_resilience/Resilient_connected_network_allcategories'\n",
|
| 371 |
"name = 'rcn_wIntactBioCat_caOnly_2020-10-27'\n",
|
| 372 |
"\n",
|
| 373 |
-
"
|
| 374 |
-
"
|
| 375 |
-
"
|
|
|
|
| 376 |
]
|
| 377 |
},
|
| 378 |
{
|
|
@@ -459,10 +464,11 @@
|
|
| 459 |
"wetlands = ['Freshwater Emergent Wetland', 'Freshwater Forested/Shrub Wetland', 'Estuarine and Marine Wetland']\n",
|
| 460 |
"gdf = gdf[gdf['WETLAND_TYPE'].isin(wetlands)]\n",
|
| 461 |
"\n",
|
| 462 |
-
"
|
| 463 |
-
"cols = [item for item in cols if item not in ['ACRES','Shape_Length','Shape_Area','__index_level_0__']]\n",
|
| 464 |
"convert_pmtiles(con, s3, folder =folder, file = f\"{name}.parquet\")\n",
|
| 465 |
-
"
|
|
|
|
|
|
|
| 466 |
]
|
| 467 |
},
|
| 468 |
{
|
|
@@ -566,28 +572,29 @@
|
|
| 566 |
"\n",
|
| 567 |
"folder = 'NBS_agriculture/Farmland_all'\n",
|
| 568 |
"name = 'Important_Farmland_2018'\n",
|
| 569 |
-
"
|
| 570 |
-
"
|
| 571 |
-
"
|
|
|
|
| 572 |
"\n",
|
| 573 |
-
"#
|
| 574 |
-
"convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols = cols, zoom = 8)\n",
|
| 575 |
"\n",
|
| 576 |
"# only pick a subset \n",
|
| 577 |
"folder = 'NBS_agriculture/Farmland_all/Farmland'\n",
|
| 578 |
"name = 'Farmland_2018'\n",
|
| 579 |
-
"
|
| 580 |
-
"
|
| 581 |
-
"
|
| 582 |
-
"
|
| 583 |
-
"
|
| 584 |
"\n",
|
| 585 |
"# grazing lands \n",
|
| 586 |
"folder = 'NBS_agriculture/Farmland_all/Lands_suitable_grazing'\n",
|
| 587 |
"name = 'Grazing_land_2018'\n",
|
| 588 |
-
"
|
| 589 |
-
"
|
| 590 |
-
"
|
| 591 |
]
|
| 592 |
},
|
| 593 |
{
|
|
@@ -640,15 +647,14 @@
|
|
| 640 |
"\n",
|
| 641 |
"unzip(s3, folder = folder, file = 'fire23-1gdb.zip')\n",
|
| 642 |
"gdf = gpd.read_file('fire23_1.gdb')\n",
|
| 643 |
-
"
|
| 644 |
-
"
|
| 645 |
-
"
|
| 646 |
-
"
|
| 647 |
-
"
|
| 648 |
"\n",
|
| 649 |
-
"#
|
| 650 |
-
"\
|
| 651 |
-
"convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols = cols, zoom = 8)"
|
| 652 |
]
|
| 653 |
},
|
| 654 |
{
|
|
@@ -732,13 +738,12 @@
|
|
| 732 |
"folder = 'Progress_data_new_protection/Newly_counted_lands'\n",
|
| 733 |
"name = 'newly_counted_lands_2024'\n",
|
| 734 |
"\n",
|
|
|
|
|
|
|
|
|
|
| 735 |
"\n",
|
| 736 |
-
"#
|
| 737 |
-
"
|
| 738 |
-
"cols = [item for item in cols if item not in ['Shape_Leng', 'Shape_Area']]\n",
|
| 739 |
-
"\n",
|
| 740 |
-
"# convert_pmtiles(con, s3, folder = folder, file = f\"{name}.parquet\")\n",
|
| 741 |
-
"convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols = cols, zoom = 8)\n"
|
| 742 |
]
|
| 743 |
},
|
| 744 |
{
|
|
@@ -764,8 +769,8 @@
|
|
| 764 |
"name = 'DAC_2022'\n",
|
| 765 |
"\n",
|
| 766 |
"unzip(s3, folder = folder, file = 'sb535dacgdbf2022gdb.zip')\n",
|
| 767 |
-
"
|
| 768 |
-
"convert_pmtiles(con, s3, folder = folder, file = f\"{name}.parquet\")
|
| 769 |
]
|
| 770 |
},
|
| 771 |
{
|
|
@@ -795,14 +800,22 @@
|
|
| 795 |
" .mutate(id=ibis.row_number().over()) #making a unique id \n",
|
| 796 |
" ).execute().set_crs('EPSG:3857')\n",
|
| 797 |
"\n",
|
| 798 |
-
"
|
| 799 |
" file_name = f\"{name}.parquet\", gdf = gdf)\n",
|
| 800 |
-
"
|
| 801 |
"\n",
|
| 802 |
-
"#
|
| 803 |
-
"convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols = cols, zoom = 8)
|
| 804 |
]
|
| 805 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 806 |
{
|
| 807 |
"cell_type": "markdown",
|
| 808 |
"id": "a919ff5f-dff3-4db7-81c2-694f07f37d1d",
|
|
@@ -828,7 +841,7 @@
|
|
| 828 |
"\n",
|
| 829 |
"gdf = gpd.read_file('Priority Populations 4.0 Combined Layer.gdb')\n",
|
| 830 |
"gdf = gdf[gdf['Designatio'] =='Low-income community']\n",
|
| 831 |
-
"
|
| 832 |
"convert_pmtiles(con, s3, folder = folder, file = f\"{name}.parquet\")"
|
| 833 |
]
|
| 834 |
},
|
|
@@ -853,42 +866,12 @@
|
|
| 853 |
"folder = 'Progress_data_new_protection/Land_Status_Zone_Ecoregion_Counties'\n",
|
| 854 |
"name = 'all_regions_reGAP_county_eco'\n",
|
| 855 |
"\n",
|
| 856 |
-
"
|
| 857 |
-
"
|
| 858 |
" file_name = f\"{name}.parquet\")\n",
|
| 859 |
-
"
|
| 860 |
-
"\n",
|
| 861 |
-
"# convert_pmtiles(con, s3, folder = folder, file = f\"{name}.parquet\")"
|
| 862 |
-
]
|
| 863 |
-
},
|
| 864 |
-
{
|
| 865 |
-
"cell_type": "markdown",
|
| 866 |
-
"id": "df6e2e1e-b74f-4b14-8140-7e425a3dec20",
|
| 867 |
-
"metadata": {},
|
| 868 |
-
"source": [
|
| 869 |
-
"# CA Nature data"
|
| 870 |
-
]
|
| 871 |
-
},
|
| 872 |
-
{
|
| 873 |
-
"cell_type": "code",
|
| 874 |
-
"execution_count": null,
|
| 875 |
-
"id": "16f9f330-c10c-4cec-9eba-0878aab9a5f7",
|
| 876 |
-
"metadata": {},
|
| 877 |
-
"outputs": [],
|
| 878 |
-
"source": [
|
| 879 |
-
"%%time \n",
|
| 880 |
-
"con = ibis.duckdb.connect('ca_30x30_base',extensions = [\"spatial\", \"h3\"])\n",
|
| 881 |
-
"set_secrets(con)\n",
|
| 882 |
-
"\n",
|
| 883 |
-
"# file = 'ca-30x30-base.parquet'\n",
|
| 884 |
-
"folder = \"CA_Nature/2024/Preprocessing\"\n",
|
| 885 |
-
"name = 'ca-30x30-base'\n",
|
| 886 |
-
"# download(s3, folder = folder, file = f\"{name}.parquet\")\n",
|
| 887 |
"\n",
|
| 888 |
-
"#
|
| 889 |
-
"# convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols = cols, \n",
|
| 890 |
-
" # zoom = 8)\n",
|
| 891 |
-
"convert_pmtiles(con, s3, folder = folder, file = f\"{name}.parquet\")\n"
|
| 892 |
]
|
| 893 |
},
|
| 894 |
{
|
|
@@ -912,18 +895,18 @@
|
|
| 912 |
"folder = 'CPAD'\n",
|
| 913 |
"name = 'cced_2024b_release'\n",
|
| 914 |
"\n",
|
| 915 |
-
"
|
| 916 |
-
"
|
| 917 |
-
"
|
| 918 |
-
"
|
| 919 |
-
"convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols= cols, zoom = 8)\n",
|
| 920 |
"\n",
|
| 921 |
"name = 'cpad_2024b_release'\n",
|
| 922 |
-
"
|
| 923 |
-
"
|
| 924 |
-
"
|
| 925 |
-
"
|
| 926 |
-
"convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols= cols, zoom = 8)"
|
| 927 |
]
|
| 928 |
}
|
| 929 |
],
|
|
|
|
| 18 |
"from cng.utils import *\n",
|
| 19 |
"from utils import *\n",
|
| 20 |
"from h3_utils import *\n",
|
| 21 |
+
"s3 = s3_client()\n",
|
| 22 |
"\n",
|
| 23 |
"import os\n",
|
| 24 |
"os.chdir('../data/')\n",
|
| 25 |
"\n",
|
|
|
|
| 26 |
"duckdb_install_h3()"
|
| 27 |
]
|
| 28 |
},
|
|
|
|
| 48 |
"folder = 'Counties'\n",
|
| 49 |
"name = 'CA_counties'\n",
|
| 50 |
"\n",
|
| 51 |
+
"unzip(s3, folder = folder, file = '30x30_Counties.zip')\n",
|
| 52 |
+
"process_vector(s3, folder = folder, file = f\"{name}.shp\")\n",
|
| 53 |
+
"convert_pmtiles(con, s3, folder = folder, file = f\"{name}.parquet\")\n",
|
| 54 |
"\n",
|
| 55 |
+
"# convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols = cols, zoom = 8)"
|
| 56 |
]
|
| 57 |
},
|
| 58 |
{
|
|
|
|
| 76 |
"\n",
|
| 77 |
"folder = 'Climate_zones'\n",
|
| 78 |
"name = 'climate_zones_10'\n",
|
| 79 |
+
"download(s3, folder = folder, file = 'clusters_10.tif')\n",
|
| 80 |
+
"process_raster(s3, folder = folder, file = 'clusters_10.tif', file_name = f\"{name}.tif\")\n",
|
| 81 |
+
"\n",
|
| 82 |
+
"# convert_h3(con, s3, folder = folder, file = f\"{name}_processed.parquet\", cols = cols,\n",
|
| 83 |
+
" # zoom = 8)"
|
| 84 |
]
|
| 85 |
},
|
| 86 |
{
|
|
|
|
| 105 |
"folder = 'Ecoregion'\n",
|
| 106 |
"name = 'ACE_ecoregions'\n",
|
| 107 |
"\n",
|
| 108 |
+
"unzip(s3, folder = folder, file = '30x30_Ecoregions.zip')\n",
|
| 109 |
+
"process_vector(s3, folder = folder, file = f\"{name}.shp\")\n",
|
| 110 |
"\n",
|
| 111 |
+
"# convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols = cols, zoom = 8)"
|
| 112 |
]
|
| 113 |
},
|
| 114 |
{
|
|
|
|
| 140 |
"\n",
|
| 141 |
"folder = 'Habitat'\n",
|
| 142 |
"name = 'fveg22_1'\n",
|
| 143 |
+
"unzip(s3, folder = folder, file = 'fveg221gdb.zip')\n",
|
| 144 |
"\n",
|
| 145 |
+
"command = [\n",
|
| 146 |
+
" \"gdalwarp\",\n",
|
| 147 |
+
" \"-of\", \"GTiff\",\n",
|
| 148 |
+
" 'fveg22_1.gdb',\n",
|
| 149 |
+
" 'fveg22_1.tif' \n",
|
| 150 |
+
" ]\n",
|
| 151 |
"\n",
|
| 152 |
+
"subprocess.run(command, check=True)\n",
|
| 153 |
+
"process_raster(s3, folder = folder, file = f\"{name}.tif\")\n",
|
| 154 |
+
"upload(folder = folder, file = f'{name}_processed.tif.aux.xml')\n",
|
| 155 |
"\n",
|
| 156 |
+
"# convert_h3(con, s3, folder = folder, file = f\"{name}_processed.parquet\", cols = cols,\n",
|
| 157 |
+
"# zoom = 8)"
|
| 158 |
]
|
| 159 |
},
|
| 160 |
{
|
|
|
|
| 187 |
"folder = 'ACE_biodiversity'\n",
|
| 188 |
"name = 'ACE_terrestrial_biodiversity_summary_ds2739'\n",
|
| 189 |
"\n",
|
| 190 |
+
"download(s3, folder = folder, file = 'Terrestrial_Biodiversity_Summary_-_ACE_[ds2739].geojson',\n",
|
| 191 |
+
" file_name = f\"{name}.geojson\")\n",
|
| 192 |
"\n",
|
| 193 |
+
"process_vector(s3, folder = folder, file = f\"{name}.geojson\")\n",
|
| 194 |
+
"convert_pmtiles(con, s3, folder = folder, file = f\"{name}.geojson\")\n",
|
| 195 |
+
"gdf = gpd.read_parquet(f\"{name}.parquet\")\n",
|
| 196 |
+
"\n",
|
| 197 |
+
"# cols = [item for item in cols if item not in [\"Hex_ID\",\"Shape__Area\",\"Shape__Length\"]]\n",
|
| 198 |
+
"# convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols = cols, zoom = 8)"
|
| 199 |
]
|
| 200 |
},
|
| 201 |
{
|
|
|
|
| 224 |
" 'County', 'Shape__Area', 'Shape__Length', 'geometry']\n",
|
| 225 |
" cols.append(col) #select only the cols we want + the new col. \n",
|
| 226 |
" rank_df = gdf[gdf[col]==5][cols]# filter ranks = 5\n",
|
| 227 |
+
" process_vector(s3, folder = 'ACE_biodiversity/'+name, file = name+'.parquet',gdf = rank_df)\n",
|
| 228 |
" convert_pmtiles(con, s3, folder ='ACE_biodiversity/'+name, file = name+'.parquet')\n"
|
| 229 |
]
|
| 230 |
},
|
|
|
|
| 263 |
" percentile = 0.95\n",
|
| 264 |
" threshold = gdf[col].quantile(percentile)\n",
|
| 265 |
" ace = gdf[gdf[col]>=threshold][cols]\n",
|
| 266 |
+
" process_vector(s3, folder = 'ACE_biodiversity/'+name, file = name+'.parquet',gdf = ace)\n",
|
| 267 |
" convert_pmtiles(con, s3, folder ='ACE_biodiversity/'+name, file = name+'.parquet')\n",
|
| 268 |
"\n",
|
| 269 |
"\n",
|
|
|
|
| 301 |
"folder = 'Biodiversity_unique/Plant_richness'\n",
|
| 302 |
"name = 'species_D'\n",
|
| 303 |
"\n",
|
| 304 |
+
"download(s3, folder = folder, file = f\"{name}.tif\")\n",
|
| 305 |
+
"filter_raster(s3, folder = folder, file = f\"{name}.tif\", percentile = 80)\n",
|
| 306 |
+
"\n",
|
| 307 |
+
"# convert_h3(con, s3, folder = folder, file = f\"{name}_processed.parquet\", cols = cols, zoom = 8)"
|
| 308 |
]
|
| 309 |
},
|
| 310 |
{
|
|
|
|
| 330 |
"name = 'endemicspecies_E'\n",
|
| 331 |
"\n",
|
| 332 |
"download(s3, folder = folder, file = f\"{name}.tif\")\n",
|
| 333 |
+
"filter_raster(s3, folder = folder, file = f\"{name}.tif\", percentile = 80)\n",
|
| 334 |
+
"\n",
|
| 335 |
+
"# convert_h3(con, s3, folder = folder, file = f\"{name}_processed.parquet\", cols = cols, zoom = 8)"
|
| 336 |
]
|
| 337 |
},
|
| 338 |
{
|
|
|
|
| 374 |
"folder = 'Connectivity_resilience/Resilient_connected_network_allcategories'\n",
|
| 375 |
"name = 'rcn_wIntactBioCat_caOnly_2020-10-27'\n",
|
| 376 |
"\n",
|
| 377 |
+
"process_raster(s3, folder = folder, file = f\"{name}.tif\")\n",
|
| 378 |
+
"\n",
|
| 379 |
+
"# convert_h3(con, s3, folder = folder, file = f\"{name}_processed.parquet\", cols = cols, \n",
|
| 380 |
+
" # zoom = 8)"
|
| 381 |
]
|
| 382 |
},
|
| 383 |
{
|
|
|
|
| 464 |
"wetlands = ['Freshwater Emergent Wetland', 'Freshwater Forested/Shrub Wetland', 'Estuarine and Marine Wetland']\n",
|
| 465 |
"gdf = gdf[gdf['WETLAND_TYPE'].isin(wetlands)]\n",
|
| 466 |
"\n",
|
| 467 |
+
"process_vector(s3, folder = folder, file = f\"{name}.parquet\", gdf = gdf)\n",
|
|
|
|
| 468 |
"convert_pmtiles(con, s3, folder =folder, file = f\"{name}.parquet\")\n",
|
| 469 |
+
"\n",
|
| 470 |
+
"# cols = [item for item in cols if item not in ['ACRES','Shape_Length','Shape_Area','__index_level_0__']]\n",
|
| 471 |
+
"# geom_to_h3(con, folder = folder, file = f\"{name}.parquet\", cols = cols, zoom = 8)"
|
| 472 |
]
|
| 473 |
},
|
| 474 |
{
|
|
|
|
| 572 |
"\n",
|
| 573 |
"folder = 'NBS_agriculture/Farmland_all'\n",
|
| 574 |
"name = 'Important_Farmland_2018'\n",
|
| 575 |
+
"unzip(s3, folder = folder, file = f\"{name}.zip\")\n",
|
| 576 |
+
"process_vector(s3, folder = folder, file = f\"{name}.gdb\",crs = \"epsg:4326\")\n",
|
| 577 |
+
"\n",
|
| 578 |
+
"convert_pmtiles(con, s3, folder = folder, file =f\"{name}.parquet\")\n",
|
| 579 |
"\n",
|
| 580 |
+
"# cols = [item for item in cols if item not in ['Shape_Length','Shape_Area']]\n",
|
| 581 |
+
"# convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols = cols, zoom = 8)\n",
|
| 582 |
"\n",
|
| 583 |
"# only pick a subset \n",
|
| 584 |
"folder = 'NBS_agriculture/Farmland_all/Farmland'\n",
|
| 585 |
"name = 'Farmland_2018'\n",
|
| 586 |
+
"gdf = gpd.read_file('Important_Farmland_2018.gdb')\n",
|
| 587 |
+
"farmland_type = ['P','S','L','U'] # prime, statewide importance, local importance, unique\n",
|
| 588 |
+
"gdf_farmland = gdf[gdf['polygon_ty'].isin(farmland_type)]\n",
|
| 589 |
+
"process_vector(s3, folder = folder, file = f\"{name}.parquet\", gdf = gdf_farmland)\n",
|
| 590 |
+
"convert_pmtiles(con, s3, folder = folder, file =f\"{name}.parquet\")\n",
|
| 591 |
"\n",
|
| 592 |
"# grazing lands \n",
|
| 593 |
"folder = 'NBS_agriculture/Farmland_all/Lands_suitable_grazing'\n",
|
| 594 |
"name = 'Grazing_land_2018'\n",
|
| 595 |
+
"gdf_grazing = gdf[gdf['polygon_ty'] == 'G']\n",
|
| 596 |
+
"process_vector(s3, folder = folder, file = f\"{name}.parquet\", gdf = gdf_grazing)\n",
|
| 597 |
+
"convert_pmtiles(con, s3, folder = folder, file =f\"{name}.parquet\")\n"
|
| 598 |
]
|
| 599 |
},
|
| 600 |
{
|
|
|
|
| 647 |
"\n",
|
| 648 |
"unzip(s3, folder = folder, file = 'fire23-1gdb.zip')\n",
|
| 649 |
"gdf = gpd.read_file('fire23_1.gdb')\n",
|
| 650 |
+
"gdf = gdf[~gdf['YEAR_'].isna()]\n",
|
| 651 |
+
"gdf['YEAR_'] = gdf['YEAR_'].astype('int64')\n",
|
| 652 |
+
"gdf = gdf[gdf['YEAR_']>=2014]\n",
|
| 653 |
+
"process_vector(s3, folder = folder, file = f\"{name}.parquet\", gdf = gdf)\n",
|
| 654 |
+
"convert_pmtiles(con, s3, folder = folder, file = f\"{name}.parquet\")\n",
|
| 655 |
"\n",
|
| 656 |
+
"# cols = [item for item in cols if item not in ['Shape_Length','Shape_Area']]\n",
|
| 657 |
+
"# convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols = cols, zoom = 8)"
|
|
|
|
| 658 |
]
|
| 659 |
},
|
| 660 |
{
|
|
|
|
| 738 |
"folder = 'Progress_data_new_protection/Newly_counted_lands'\n",
|
| 739 |
"name = 'newly_counted_lands_2024'\n",
|
| 740 |
"\n",
|
| 741 |
+
"unzip(s3, folder = folder, file = f\"{name}.shp.zip\")\n",
|
| 742 |
+
"process_vector(s3, folder = folder, file = f\"{name}.shp\",crs = \"epsg:4326\")\n",
|
| 743 |
+
"convert_pmtiles(con, s3, folder = folder, file = f\"{name}.parquet\")\n",
|
| 744 |
"\n",
|
| 745 |
+
"# cols = [item for item in cols if item not in ['Shape_Leng', 'Shape_Area']]\n",
|
| 746 |
+
"# convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols = cols, zoom = 8)"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 747 |
]
|
| 748 |
},
|
| 749 |
{
|
|
|
|
| 769 |
"name = 'DAC_2022'\n",
|
| 770 |
"\n",
|
| 771 |
"unzip(s3, folder = folder, file = 'sb535dacgdbf2022gdb.zip')\n",
|
| 772 |
+
"process_vector(s3, folder = folder, file = 'SB535DACgdb_F_2022.gdb', file_name = f\"{name}.parquet\")\n",
|
| 773 |
+
"convert_pmtiles(con, s3, folder = folder, file = f\"{name}.parquet\")"
|
| 774 |
]
|
| 775 |
},
|
| 776 |
{
|
|
|
|
| 800 |
" .mutate(id=ibis.row_number().over()) #making a unique id \n",
|
| 801 |
" ).execute().set_crs('EPSG:3857')\n",
|
| 802 |
"\n",
|
| 803 |
+
"process_vector(s3, folder = folder, file = 'Priority Populations 4.0 Combined Layer.gdb',\n",
|
| 804 |
" file_name = f\"{name}.parquet\", gdf = gdf)\n",
|
| 805 |
+
"convert_pmtiles(con, s3, folder = folder, file = f\"{name}.parquet\")\n",
|
| 806 |
"\n",
|
| 807 |
+
"# cols = [item for item in cols if item not in ['Shape_Length','Shape_Area']]\n",
|
| 808 |
+
"# convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols = cols, zoom = 8)"
|
| 809 |
]
|
| 810 |
},
|
| 811 |
+
{
|
| 812 |
+
"cell_type": "code",
|
| 813 |
+
"execution_count": null,
|
| 814 |
+
"id": "df1a939c-cb89-4a2f-8309-2819fe52ac45",
|
| 815 |
+
"metadata": {},
|
| 816 |
+
"outputs": [],
|
| 817 |
+
"source": []
|
| 818 |
+
},
|
| 819 |
{
|
| 820 |
"cell_type": "markdown",
|
| 821 |
"id": "a919ff5f-dff3-4db7-81c2-694f07f37d1d",
|
|
|
|
| 841 |
"\n",
|
| 842 |
"gdf = gpd.read_file('Priority Populations 4.0 Combined Layer.gdb')\n",
|
| 843 |
"gdf = gdf[gdf['Designatio'] =='Low-income community']\n",
|
| 844 |
+
"process_vector(s3, folder = folder, file = f\"{name}.parquet\", gdf = gdf)\n",
|
| 845 |
"convert_pmtiles(con, s3, folder = folder, file = f\"{name}.parquet\")"
|
| 846 |
]
|
| 847 |
},
|
|
|
|
| 866 |
"folder = 'Progress_data_new_protection/Land_Status_Zone_Ecoregion_Counties'\n",
|
| 867 |
"name = 'all_regions_reGAP_county_eco'\n",
|
| 868 |
"\n",
|
| 869 |
+
"unzip(s3, folder = folder, file = 'Land_Status_Zone_Ecoregion_Counties.shp.zip')\n",
|
| 870 |
+
"process_vector(s3, folder = folder, file = 'Land_Status_Zone_Ecoregion_Counties.shp',\n",
|
| 871 |
" file_name = f\"{name}.parquet\")\n",
|
| 872 |
+
"convert_pmtiles(con, s3, folder = folder, file = f\"{name}.parquet\")\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 873 |
"\n",
|
| 874 |
+
"# convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols = cols, zoom = 5)"
|
|
|
|
|
|
|
|
|
|
| 875 |
]
|
| 876 |
},
|
| 877 |
{
|
|
|
|
| 895 |
"folder = 'CPAD'\n",
|
| 896 |
"name = 'cced_2024b_release'\n",
|
| 897 |
"\n",
|
| 898 |
+
"unzip(s3, folder = folder, file = f\"{name}.shp.zip\")\n",
|
| 899 |
+
"process_vector(s3, folder = folder, file = f\"{name}.shp\", crs=\"EPSG:3310\")\n",
|
| 900 |
+
"convert_pmtiles(con, s3, folder = folder, file = f\"{name}.parquet\")\n",
|
| 901 |
+
"process_vector(s3, folder = folder, file = f\"{name}.shp\", crs=\"EPSG:4326\")\n",
|
| 902 |
+
"# convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols= cols, zoom = 8)\n",
|
| 903 |
"\n",
|
| 904 |
"name = 'cpad_2024b_release'\n",
|
| 905 |
+
"unzip(s3, folder = folder, file = f\"{name}.shp.zip\")\n",
|
| 906 |
+
"process_vector(s3, folder = folder, file = f\"{name}.shp\", crs=\"EPSG:3310\")\n",
|
| 907 |
+
"convert_pmtiles(con, s3, folder = folder, file = f\"{name}.parquet\")\n",
|
| 908 |
+
"process_vector(s3, folder = folder, file = f\"{name}.shp\", crs=\"EPSG:4326\")\n",
|
| 909 |
+
"# convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols= cols, zoom = 8)"
|
| 910 |
]
|
| 911 |
}
|
| 912 |
],
|
preprocess/h3_utils.py
CHANGED
|
@@ -1,14 +1,13 @@
|
|
| 1 |
from utils import *
|
| 2 |
import re
|
| 3 |
|
| 4 |
-
def convert_h3(con, s3, folder, file, cols, zoom):
|
| 5 |
"""
|
| 6 |
Driver function to convert geometries to h3.
|
| 7 |
If no zoom levels exist -> compute from geometry at target zoom.
|
| 8 |
-
If lower zoom exists -> compute children from max available until target zoom.
|
| 9 |
"""
|
| 10 |
cols = ", ".join(cols) if isinstance(cols, list) else cols
|
| 11 |
-
bucket, path = info(folder, file)
|
| 12 |
path, file = os.path.split(path)
|
| 13 |
name, ext = os.path.splitext(file)
|
| 14 |
name = name.replace('-', '')
|
|
@@ -23,7 +22,7 @@ def convert_h3(con, s3, folder, file, cols, zoom):
|
|
| 23 |
zooms.append(int(match.group(1)))
|
| 24 |
|
| 25 |
if not zooms: # if no h3 files exist
|
| 26 |
-
print(f'No h3 files exists, computing {zoom} from geometry.')
|
| 27 |
con.read_parquet(f"s3://{bucket}/{path}/{file}", table_name=name)
|
| 28 |
h3_from_geom(con, name, cols, zoom)
|
| 29 |
con.sql(f'''
|
|
@@ -38,21 +37,21 @@ def convert_h3(con, s3, folder, file, cols, zoom):
|
|
| 38 |
print(f'Zoom {zoom} already exists!')
|
| 39 |
return
|
| 40 |
|
| 41 |
-
elif current_zoom < zoom: #compute child of most refined zoom level
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
|
| 57 |
|
| 58 |
def h3_from_geom(con, name, cols, zoom):
|
|
@@ -60,20 +59,19 @@ def h3_from_geom(con, name, cols, zoom):
|
|
| 60 |
Computes hexes directly from geometry.
|
| 61 |
"""
|
| 62 |
con.raw_sql(f'''
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
)
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
FROM t1
|
| 71 |
''')
|
| 72 |
|
| 73 |
|
| 74 |
-
def h3_from_parent(con, zoom):
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
|
|
|
| 1 |
from utils import *
|
| 2 |
import re
|
| 3 |
|
| 4 |
+
def convert_h3(con, s3, folder, file, cols, zoom, base_folder = "CBN/"):
|
| 5 |
"""
|
| 6 |
Driver function to convert geometries to h3.
|
| 7 |
If no zoom levels exist -> compute from geometry at target zoom.
|
|
|
|
| 8 |
"""
|
| 9 |
cols = ", ".join(cols) if isinstance(cols, list) else cols
|
| 10 |
+
bucket, path = info(folder, file, base_folder)
|
| 11 |
path, file = os.path.split(path)
|
| 12 |
name, ext = os.path.splitext(file)
|
| 13 |
name = name.replace('-', '')
|
|
|
|
| 22 |
zooms.append(int(match.group(1)))
|
| 23 |
|
| 24 |
if not zooms: # if no h3 files exist
|
| 25 |
+
print(f'No h3 files exists, computing zoom level {zoom} from geometry.')
|
| 26 |
con.read_parquet(f"s3://{bucket}/{path}/{file}", table_name=name)
|
| 27 |
h3_from_geom(con, name, cols, zoom)
|
| 28 |
con.sql(f'''
|
|
|
|
| 37 |
print(f'Zoom {zoom} already exists!')
|
| 38 |
return
|
| 39 |
|
| 40 |
+
# elif current_zoom < zoom: #compute child of most refined zoom level
|
| 41 |
+
# print(f'Reading zoom {current_zoom}')
|
| 42 |
+
# con.read_parquet(
|
| 43 |
+
# f"s3://{bucket}/{path}/hex/zoom{current_zoom}/{name}.parquet",
|
| 44 |
+
# table_name=f"h3_h{current_zoom}"
|
| 45 |
+
# )
|
| 46 |
+
# print(f'Computing {zoom} from {current_zoom}')
|
| 47 |
|
| 48 |
+
# for z in range(current_zoom + 1, zoom + 1):
|
| 49 |
+
# print(f'Current zoom {z}')
|
| 50 |
+
# h3_from_parent(con, z)
|
| 51 |
+
# con.sql(f'''
|
| 52 |
+
# SELECT *, UNNEST(h3_cell_to_children(h{z-1}, {z})) AS h{z}
|
| 53 |
+
# FROM h3_h{z-1}
|
| 54 |
+
# ''').to_parquet(f"s3://{bucket}/{path}/hex/zoom{z}/{name}.parquet")
|
| 55 |
|
| 56 |
|
| 57 |
def h3_from_geom(con, name, cols, zoom):
|
|
|
|
| 59 |
Computes hexes directly from geometry.
|
| 60 |
"""
|
| 61 |
con.raw_sql(f'''
|
| 62 |
+
CREATE OR REPLACE TEMP TABLE t2 AS
|
| 63 |
+
SELECT {cols},
|
| 64 |
+
h3_polygon_wkt_to_cells_string(ST_Force2D(dump.geom), {zoom}) AS h{zoom}
|
| 65 |
+
FROM (
|
| 66 |
+
SELECT {cols}, UNNEST(ST_Dump(geom)) AS dump
|
| 67 |
+
FROM {name}
|
| 68 |
+
)
|
|
|
|
| 69 |
''')
|
| 70 |
|
| 71 |
|
| 72 |
+
# def h3_from_parent(con, zoom):
|
| 73 |
+
# con.raw_sql(f'''
|
| 74 |
+
# CREATE OR REPLACE TEMP TABLE h3_h{zoom} AS
|
| 75 |
+
# SELECT *, UNNEST(h3_cell_to_children(h{zoom-1}, {zoom})) AS h{zoom}
|
| 76 |
+
# FROM h3_h{zoom-1}
|
| 77 |
+
# ''')
|
preprocess/utils.py
CHANGED
|
@@ -10,48 +10,50 @@ import ibis
|
|
| 10 |
from ibis import _
|
| 11 |
|
| 12 |
import rasterio
|
| 13 |
-
from rasterio.
|
| 14 |
-
from shapely.geometry import
|
| 15 |
import numpy as np
|
|
|
|
| 16 |
|
| 17 |
-
|
| 18 |
-
def info(folder, file, bucket = "public-ca30x30", base_folder = 'CBN/'):
|
| 19 |
"""
|
| 20 |
Extract minio path to upload/download data
|
| 21 |
"""
|
| 22 |
-
|
|
|
|
|
|
|
|
|
|
| 23 |
# path = os.path.join(folder, file)
|
| 24 |
return bucket, path
|
| 25 |
|
| 26 |
-
def download(s3, folder, file, file_name = None):
|
| 27 |
"""
|
| 28 |
Downloading file from minio
|
| 29 |
"""
|
| 30 |
if not file_name:
|
| 31 |
file_name = file
|
| 32 |
-
bucket, path = info(folder, file)
|
| 33 |
-
s3.fget_object(bucket, path ,file_name)
|
| 34 |
return
|
| 35 |
|
| 36 |
-
def upload(s3, folder, file):
|
| 37 |
"""
|
| 38 |
Uploading file from minio
|
| 39 |
"""
|
| 40 |
-
bucket, path = info(folder, file)
|
| 41 |
s3.fput_object(bucket, path ,file)
|
| 42 |
return
|
| 43 |
|
| 44 |
-
def unzip(s3, folder, file):
|
| 45 |
"""
|
| 46 |
Unzipping zip files
|
| 47 |
"""
|
| 48 |
-
download(s3, folder, file)
|
| 49 |
with zipfile.ZipFile(file, 'r') as zip_ref:
|
| 50 |
zip_ref.extractall()
|
| 51 |
return
|
| 52 |
|
| 53 |
-
|
| 54 |
-
def process_vector(s3, folder, file, file_name = None, gdf = None, crs="EPSG:4326"):
|
| 55 |
"""
|
| 56 |
Driver function to process vectors
|
| 57 |
"""
|
|
@@ -67,11 +69,11 @@ def process_vector(s3, folder, file, file_name = None, gdf = None, crs="EPSG:432
|
|
| 67 |
name, ext = os.path.splitext(file)
|
| 68 |
parquet_file = f"{name}{'.parquet'}"
|
| 69 |
gdf.to_parquet(parquet_file)
|
| 70 |
-
upload(s3, folder, parquet_file)
|
| 71 |
-
|
| 72 |
-
return
|
| 73 |
|
| 74 |
-
def process_raster(s3, folder, file, file_name = None):
|
| 75 |
"""
|
| 76 |
Driver function to process rasters
|
| 77 |
"""
|
|
@@ -82,29 +84,30 @@ def process_raster(s3, folder, file, file_name = None):
|
|
| 82 |
output_cog_file = f"{name}_processed_COG{ext}"
|
| 83 |
output_vector_file = f"{name}_processed.parquet"
|
| 84 |
# Reproject raster
|
| 85 |
-
if not exists_on_s3(s3, folder, output_file):
|
| 86 |
output_file = reproject_raster(file)
|
| 87 |
-
upload(s3, folder, output_file)
|
| 88 |
else:
|
| 89 |
print(f"{output_file} already exists on S3, skipping reprojection/upload.")
|
| 90 |
|
| 91 |
# Make COG
|
| 92 |
-
if not exists_on_s3(s3, folder, output_cog_file):
|
| 93 |
output_cog_file = make_cog(output_file)
|
| 94 |
-
upload(s3, folder, output_cog_file)
|
| 95 |
else:
|
| 96 |
print(f"{output_cog_file} already exists on S3, skipping COG conversion/upload.")
|
| 97 |
|
| 98 |
-
# Vectorize raster
|
| 99 |
-
if not exists_on_s3(s3, folder, output_vector_file):
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
else:
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
return cols
|
|
|
|
| 108 |
|
| 109 |
def reproject_raster(input_file, crs="EPSG:3310"):
|
| 110 |
"""
|
|
@@ -147,31 +150,37 @@ def make_cog(input_file, crs="EPSG:4326"):
|
|
| 147 |
print(f"Error occurred during processing: {e}")
|
| 148 |
return output_file
|
| 149 |
|
| 150 |
-
def make_vector(input_file, crs="EPSG:4326"):
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
# Open raster
|
| 157 |
-
with rasterio.open(input_file) as src:
|
| 158 |
-
image = src.read(1) # read first band
|
| 159 |
-
mask = image != src.nodata # mask out nodata
|
| 160 |
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 175 |
|
| 176 |
def filter_raster(s3, folder, file, percentile):
|
| 177 |
"""
|
|
@@ -192,11 +201,11 @@ def filter_raster(s3, folder, file, percentile):
|
|
| 192 |
profile.update(dtype=rasterio.float64)
|
| 193 |
with rasterio.open(new_file, "w", **profile) as dst:
|
| 194 |
dst.write(filtered, 1)
|
| 195 |
-
|
| 196 |
-
return cols
|
| 197 |
-
|
| 198 |
|
| 199 |
-
def convert_pmtiles(con, s3, folder, file):
|
| 200 |
"""
|
| 201 |
Convert to PMTiles with tippecanoe
|
| 202 |
"""
|
|
@@ -205,14 +214,14 @@ def convert_pmtiles(con, s3, folder, file):
|
|
| 205 |
(con.read_parquet(file).execute().set_crs('epsg:3310')
|
| 206 |
.to_crs('epsg:4326').to_file(name+'.geojson'))
|
| 207 |
to_pmtiles(name+'.geojson', name+'.pmtiles', options = ['--extend-zooms-if-still-dropping'])
|
| 208 |
-
upload(s3, folder, name+'.pmtiles')
|
| 209 |
return
|
| 210 |
|
| 211 |
-
def exists_on_s3(s3, folder, file):
|
| 212 |
"""
|
| 213 |
Check if a file exists on S3
|
| 214 |
"""
|
| 215 |
-
bucket, path = info(folder, file)
|
| 216 |
try:
|
| 217 |
s3.stat_object(bucket, path)
|
| 218 |
return True
|
|
|
|
| 10 |
from ibis import _
|
| 11 |
|
| 12 |
import rasterio
|
| 13 |
+
from rasterio.transform import xy
|
| 14 |
+
from shapely.geometry import Point
|
| 15 |
import numpy as np
|
| 16 |
+
from pyproj import Transformer
|
| 17 |
|
| 18 |
+
def info(folder, file, base_folder, bucket = "public-ca30x30"):
|
|
|
|
| 19 |
"""
|
| 20 |
Extract minio path to upload/download data
|
| 21 |
"""
|
| 22 |
+
if (folder is None) & (base_folder is None):
|
| 23 |
+
path = file
|
| 24 |
+
else:
|
| 25 |
+
path = os.path.join(base_folder, folder, file)
|
| 26 |
# path = os.path.join(folder, file)
|
| 27 |
return bucket, path
|
| 28 |
|
| 29 |
+
def download(s3, folder, file, file_name = None, base_folder = "CBN/"):
|
| 30 |
"""
|
| 31 |
Downloading file from minio
|
| 32 |
"""
|
| 33 |
if not file_name:
|
| 34 |
file_name = file
|
| 35 |
+
bucket, path = info(folder, file, base_folder)
|
| 36 |
+
s3.fget_object(bucket, path , file_name)
|
| 37 |
return
|
| 38 |
|
| 39 |
+
def upload(s3, folder, file, base_folder = "CBN/"):
|
| 40 |
"""
|
| 41 |
Uploading file from minio
|
| 42 |
"""
|
| 43 |
+
bucket, path = info(folder, file, base_folder)
|
| 44 |
s3.fput_object(bucket, path ,file)
|
| 45 |
return
|
| 46 |
|
| 47 |
+
def unzip(s3, folder, file, base_folder = "CBN/"):
|
| 48 |
"""
|
| 49 |
Unzipping zip files
|
| 50 |
"""
|
| 51 |
+
download(s3, folder, file, base_folder)
|
| 52 |
with zipfile.ZipFile(file, 'r') as zip_ref:
|
| 53 |
zip_ref.extractall()
|
| 54 |
return
|
| 55 |
|
| 56 |
+
def process_vector(s3, folder, file, file_name = None, gdf = None, crs="EPSG:4326", base_folder = "CBN/"):
|
|
|
|
| 57 |
"""
|
| 58 |
Driver function to process vectors
|
| 59 |
"""
|
|
|
|
| 69 |
name, ext = os.path.splitext(file)
|
| 70 |
parquet_file = f"{name}{'.parquet'}"
|
| 71 |
gdf.to_parquet(parquet_file)
|
| 72 |
+
upload(s3, folder, parquet_file, base_folder)
|
| 73 |
+
# return gdf.drop('geom',axis = 1).columns.to_list()
|
| 74 |
+
return
|
| 75 |
|
| 76 |
+
def process_raster(s3, folder, file, file_name = None, base_folder = "CBN/"):
|
| 77 |
"""
|
| 78 |
Driver function to process rasters
|
| 79 |
"""
|
|
|
|
| 84 |
output_cog_file = f"{name}_processed_COG{ext}"
|
| 85 |
output_vector_file = f"{name}_processed.parquet"
|
| 86 |
# Reproject raster
|
| 87 |
+
if not exists_on_s3(s3, folder, output_file, base_folder):
|
| 88 |
output_file = reproject_raster(file)
|
| 89 |
+
upload(s3, folder, output_file, base_folder)
|
| 90 |
else:
|
| 91 |
print(f"{output_file} already exists on S3, skipping reprojection/upload.")
|
| 92 |
|
| 93 |
# Make COG
|
| 94 |
+
if not exists_on_s3(s3, folder, output_cog_file, base_folder):
|
| 95 |
output_cog_file = make_cog(output_file)
|
| 96 |
+
upload(s3, folder, output_cog_file, base_folder)
|
| 97 |
else:
|
| 98 |
print(f"{output_cog_file} already exists on S3, skipping COG conversion/upload.")
|
| 99 |
|
| 100 |
+
# # Vectorize raster
|
| 101 |
+
# if not exists_on_s3(s3, folder, output_vector_file, base_folder):
|
| 102 |
+
# output_vector_file, cols = make_vector(output_file)
|
| 103 |
+
# upload(s3, folder, output_vector_file, base_folder)
|
| 104 |
+
# else:
|
| 105 |
+
# print(f"{output_vector_file} already exists on S3, skipping vectorization/upload.")
|
| 106 |
+
# # We still need column names
|
| 107 |
+
# gdf = gpd.read_parquet(output_vector_file)
|
| 108 |
+
# cols = gdf.drop('geom', axis=1).columns.to_list()
|
| 109 |
+
# return cols
|
| 110 |
+
return
|
| 111 |
|
| 112 |
def reproject_raster(input_file, crs="EPSG:3310"):
|
| 113 |
"""
|
|
|
|
| 150 |
print(f"Error occurred during processing: {e}")
|
| 151 |
return output_file
|
| 152 |
|
| 153 |
+
# def make_vector(input_file, crs="EPSG:4326"):
|
| 154 |
+
# """
|
| 155 |
+
# Converting rasters to vector formats in order to convert to h3
|
| 156 |
+
# """
|
| 157 |
+
# name, ext = os.path.splitext(input_file)
|
| 158 |
+
# output_file = f"{name}.parquet"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 159 |
|
| 160 |
+
# with rasterio.open(input_file) as src:
|
| 161 |
+
# band = src.read(1) # read first band
|
| 162 |
+
# mask = band != src.nodata # mask out nodata
|
| 163 |
+
# rows, cols = np.where(mask)
|
| 164 |
+
# x, y = rasterio.transform.xy(src.transform, rows, cols, offset = "center")
|
| 165 |
+
|
| 166 |
+
# # reproject
|
| 167 |
+
# if src.crs and src.crs.to_string() != crs:
|
| 168 |
+
# transformer = Transformer.from_crs(src.crs, crs, always_xy=True)
|
| 169 |
+
# x, y = transformer.transform(x, y)
|
| 170 |
+
# crs_out = crs
|
| 171 |
+
# else:
|
| 172 |
+
# crs_out = src.crs
|
| 173 |
+
|
| 174 |
+
# gdf = gpd.GeoDataFrame(
|
| 175 |
+
# {"value": band[rows, cols]},
|
| 176 |
+
# geometry=[Point(xy) for xy in zip(x, y)],
|
| 177 |
+
# crs=crs_out
|
| 178 |
+
# )
|
| 179 |
+
|
| 180 |
+
# gdf.rename_geometry('geom', inplace=True)
|
| 181 |
+
# gdf['id'] = np.arange(len(gdf))
|
| 182 |
+
# gdf.to_parquet(output_file)
|
| 183 |
+
# return output_file, gdf.drop('geom',axis = 1).columns.to_list()
|
| 184 |
|
| 185 |
def filter_raster(s3, folder, file, percentile):
|
| 186 |
"""
|
|
|
|
| 201 |
profile.update(dtype=rasterio.float64)
|
| 202 |
with rasterio.open(new_file, "w", **profile) as dst:
|
| 203 |
dst.write(filtered, 1)
|
| 204 |
+
process_raster(s3, folder, file)
|
| 205 |
+
# return cols
|
| 206 |
+
return
|
| 207 |
|
| 208 |
+
def convert_pmtiles(con, s3, folder, file, base_folder = "CBN/"):
|
| 209 |
"""
|
| 210 |
Convert to PMTiles with tippecanoe
|
| 211 |
"""
|
|
|
|
| 214 |
(con.read_parquet(file).execute().set_crs('epsg:3310')
|
| 215 |
.to_crs('epsg:4326').to_file(name+'.geojson'))
|
| 216 |
to_pmtiles(name+'.geojson', name+'.pmtiles', options = ['--extend-zooms-if-still-dropping'])
|
| 217 |
+
upload(s3, folder, name+'.pmtiles', base_folder)
|
| 218 |
return
|
| 219 |
|
| 220 |
+
def exists_on_s3(s3, folder, file, base_folder = "CBN/"):
|
| 221 |
"""
|
| 222 |
Check if a file exists on S3
|
| 223 |
"""
|
| 224 |
+
bucket, path = info(folder, file, base_folder)
|
| 225 |
try:
|
| 226 |
s3.stat_object(bucket, path)
|
| 227 |
return True
|