cassiebuhler commited on
Commit
49369e2
·
1 Parent(s): 46779ef

joined newly added areas to CA nature data with h3

Browse files
app/footer.md CHANGED
@@ -27,3 +27,5 @@ Data: https://data.carlboettiger.info/public-ca30x30
27
 
28
  - Historic Fire Perimeters by CAL FIRE (2023). Data: https://www.fire.ca.gov/Home/What-We-Do/Fire-Resource-Assessment-Program/GIS-Mapping-and-Data-Analytics
29
 
 
 
 
27
 
28
  - Historic Fire Perimeters by CAL FIRE (2023). Data: https://www.fire.ca.gov/Home/What-We-Do/Fire-Resource-Assessment-Program/GIS-Mapping-and-Data-Analytics
29
 
30
+ ### LLMs
31
+ This app can use a selection of open-weights language models hosted on the National Research Platform, https://nrp.ai/documentation/userdocs/ai/llm-managed/.
preprocess/CBN-data.ipynb CHANGED
@@ -738,12 +738,22 @@
738
  "folder = 'Progress_data_new_protection/Newly_counted_lands'\n",
739
  "name = 'newly_counted_lands_2024'\n",
740
  "\n",
741
- "unzip(s3, folder = folder, file = f\"{name}.shp.zip\")\n",
742
- "process_vector(s3, folder = folder, file = f\"{name}.shp\",crs = \"epsg:4326\")\n",
743
- "convert_pmtiles(con, s3, folder = folder, file = f\"{name}.parquet\")\n",
744
- "\n",
745
  "# cols = [item for item in cols if item not in ['Shape_Leng', 'Shape_Area']]\n",
746
- "# convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols = cols, zoom = 8)"
 
 
 
 
 
 
 
 
 
 
 
747
  ]
748
  },
749
  {
@@ -808,14 +818,6 @@
808
  "# convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols = cols, zoom = 8)"
809
  ]
810
  },
811
- {
812
- "cell_type": "code",
813
- "execution_count": null,
814
- "id": "df1a939c-cb89-4a2f-8309-2819fe52ac45",
815
- "metadata": {},
816
- "outputs": [],
817
- "source": []
818
- },
819
  {
820
  "cell_type": "markdown",
821
  "id": "a919ff5f-dff3-4db7-81c2-694f07f37d1d",
@@ -908,6 +910,102 @@
908
  "process_vector(s3, folder = folder, file = f\"{name}.shp\", crs=\"EPSG:4326\")\n",
909
  "# convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols= cols, zoom = 8)"
910
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
911
  }
912
  ],
913
  "metadata": {
 
738
  "folder = 'Progress_data_new_protection/Newly_counted_lands'\n",
739
  "name = 'newly_counted_lands_2024'\n",
740
  "\n",
741
+ "# unzip(s3, folder = folder, file = f\"{name}.shp.zip\")\n",
742
+ "# cols = process_vector(s3, folder = folder, file = f\"{name}.shp\",crs = \"epsg:4326\")\n",
743
+ "# convert_pmtiles(con, s3, folder = folder, file = f\"{name}.parquet\")\n",
 
744
  "# cols = [item for item in cols if item not in ['Shape_Leng', 'Shape_Area']]\n",
745
+ "\n",
746
+ "cols = ['mgmt_stack', 'reGAP', 'Easement',\n",
747
+ " 'TYPE','CA_County_','CA_Region_',\n",
748
+ " 'TerrMar','CA_Ecoregi','DefaultSel',\n",
749
+ " 'CA_Ecore_1','CA_Region1','CA_County1',\n",
750
+ " 'ACCESS_TYP','MNG_AGNCY','MNG_AG_LEV',\n",
751
+ " 'UNIT_NAME','Acres','cpad_ACCES',\n",
752
+ " 'cpad_PARK_','cpad_MNG_A','cpad_MNG_1',\n",
753
+ " 'CA_Marine_','Release_Ye','ORIG_FID',\n",
754
+ " 'updatetype']\n",
755
+ "\n",
756
+ "convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols = cols, zoom = 8)"
757
  ]
758
  },
759
  {
 
818
  "# convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols = cols, zoom = 8)"
819
  ]
820
  },
 
 
 
 
 
 
 
 
821
  {
822
  "cell_type": "markdown",
823
  "id": "a919ff5f-dff3-4db7-81c2-694f07f37d1d",
 
910
  "process_vector(s3, folder = folder, file = f\"{name}.shp\", crs=\"EPSG:4326\")\n",
911
  "# convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols= cols, zoom = 8)"
912
  ]
913
+ },
914
+ {
915
+ "cell_type": "markdown",
916
+ "id": "c7f43e03-d6f9-4109-8876-b8f384b1c42e",
917
+ "metadata": {},
918
+ "source": [
919
+ "# CA Nature"
920
+ ]
921
+ },
922
+ {
923
+ "cell_type": "code",
924
+ "execution_count": null,
925
+ "id": "ebaecd00-8df6-4dd0-a374-45f5937607f2",
926
+ "metadata": {},
927
+ "outputs": [],
928
+ "source": [
929
+ "%%time\n",
930
+ "con = ibis.duckdb.connect('ca',extensions = [\"spatial\", \"h3\"])\n",
931
+ "set_secrets(con)\n",
932
+ "folder = None\n",
933
+ "name = 'ca-30x30-cbn'\n",
934
+ "\n",
935
+ "cols = ['id','established',\n",
936
+ " 'gap_code','status','name','access_type',\n",
937
+ " 'manager','manager_type','ecoregion',\n",
938
+ " 'easement','acres', 'type','county',\n",
939
+ " 'climate_zone','habitat_type',\n",
940
+ " 'resilient_connected_network',\n",
941
+ " 'ACE_amphibian_richness',\n",
942
+ " 'ACE_reptile_richness',\n",
943
+ " 'ACE_bird_richness',\n",
944
+ " 'ACE_mammal_richness',\n",
945
+ " 'ACE_rare_amphibian_richness',\n",
946
+ " 'ACE_rare_reptile_richness',\n",
947
+ " 'ACE_rare_bird_richness',\n",
948
+ " 'ACE_rare_mammal_richness',\n",
949
+ " 'ACE_endemic_amphibian_richness',\n",
950
+ " 'ACE_endemic_reptile_richness',\n",
951
+ " 'ACE_endemic_bird_richness',\n",
952
+ " 'ACE_endemic_mammal_richness',\n",
953
+ " 'wetlands','fire','farmland',\n",
954
+ " 'grazing','DAC','low_income',\n",
955
+ " 'plant_richness',\n",
956
+ " 'rarityweighted_endemic_plant_richness']\n",
957
+ "# download(s3, folder = folder, file = f\"{name}.parquet\")\n",
958
+ "# process_vector(s3, folder = folder, file = f\"{name}.shp\", crs=\"EPSG:3310\")\n",
959
+ "# convert_pmtiles(con, s3, folder = folder, file = f\"{name}.parquet\")\n",
960
+ "# process_vector(s3, folder = folder, file = f\"{name}.shp\", crs=\"EPSG:4326\")\n",
961
+ "\n",
962
+ "convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols= cols, group = 'ecoregion', zoom = 8)\n",
963
+ "\n"
964
+ ]
965
+ },
966
+ {
967
+ "cell_type": "markdown",
968
+ "id": "980c7e88-8dc6-4bc6-bfa4-8ea301c6ee80",
969
+ "metadata": {},
970
+ "source": [
971
+ "#### join with newly protected lands"
972
+ ]
973
+ },
974
+ {
975
+ "cell_type": "code",
976
+ "execution_count": null,
977
+ "id": "92b2107c-4771-4217-a566-72c75389b677",
978
+ "metadata": {},
979
+ "outputs": [],
980
+ "source": [
981
+ "con = ibis.duckdb.connect('joined',extensions = [\"spatial\", \"h3\"])\n",
982
+ "set_secrets(con)\n",
983
+ "\n",
984
+ "ca_nature_url = \"s3://public-ca30x30/hex/zoom8/ca-30x30-cbn.parquet\"\n",
985
+ "new_lands_url = \"s3://public-ca30x30/CBN/Progress_data_new_protection/Newly_counted_lands/hex/zoom8/newly_counted_lands_2024.parquet\"\n",
986
+ "\n",
987
+ "ca_nature = (con.read_parquet(ca_nature_url)\n",
988
+ " .mutate(update_type = None)\n",
989
+ " )\n",
990
+ "\n",
991
+ "new = (con.read_parquet(new_lands_url)\n",
992
+ " .mutate(update_type = 'updatetype')\n",
993
+ " .select(\"update_type\",\"h8\")\n",
994
+ " )\n",
995
+ "\n",
996
+ "joined = (ca_nature.left_join(new,\"h8\")\n",
997
+ " .drop('h8_right','update_type')\n",
998
+ " .rename(update_type = 'update_type_right')\n",
999
+ " )\n",
1000
+ "\n",
1001
+ "name = 'ca-30x30-cbn-newlyprotected'\n",
1002
+ "\n",
1003
+ "joined.to_parquet(f\"{name}.parquet\")\n",
1004
+ "joined.to_parquet(f\"s3://public-ca30x30/hex/zoom8/{name}.parquet\")\n",
1005
+ "\n",
1006
+ "#maybe get pmtiles?\n",
1007
+ "convert_pmtiles(con, s3, folder = None, file = f\"{name}.parquet\", base_folder = None, current_crs = 'epsg:4326')"
1008
+ ]
1009
  }
1010
  ],
1011
  "metadata": {
preprocess/h3_utils.py CHANGED
@@ -1,60 +1,37 @@
1
  from utils import *
2
  import re
3
 
4
- def convert_h3(con, s3, folder, file, cols, zoom, base_folder = "CBN/"):
5
  """
6
  Driver function to convert geometries to h3.
7
- If no zoom levels exist -> compute from geometry at target zoom.
8
  """
9
  cols = ", ".join(cols) if isinstance(cols, list) else cols
10
- bucket, path = info(folder, file, base_folder)
 
 
 
11
  path, file = os.path.split(path)
12
  name, ext = os.path.splitext(file)
13
- name = name.replace('-', '')
14
  print(f"Processing: {name}")
 
15
 
16
- hex_paths = s3.list_objects(bucket, prefix=f"{path}/hex/", recursive=True)
17
- zooms = []
18
- # check what zooms exist
19
- for obj in hex_paths:
20
- match = re.search(r"/zoom(\d{1,2})/", obj.object_name)
21
- if match:
22
- zooms.append(int(match.group(1)))
23
-
24
- if not zooms: # if no h3 files exist
25
- print(f'No h3 files exists, computing zoom level {zoom} from geometry.')
26
- con.read_parquet(f"s3://{bucket}/{path}/{file}", table_name=name)
27
- h3_from_geom(con, name, cols, zoom)
28
- con.sql(f'''
29
- SELECT {cols}, UNNEST(h{zoom}) AS h{zoom}
30
- FROM t2
31
- ''').to_parquet(f"s3://{bucket}/{path}/hex/zoom{zoom}/{name}.parquet")
32
-
33
- else:
34
- current_zoom = max(zooms)
35
 
36
- if zoom in zooms:
37
- print(f'Zoom {zoom} already exists!')
38
- return
39
-
40
- # elif current_zoom < zoom: #compute child of most refined zoom level
41
- # print(f'Reading zoom {current_zoom}')
42
- # con.read_parquet(
43
- # f"s3://{bucket}/{path}/hex/zoom{current_zoom}/{name}.parquet",
44
- # table_name=f"h3_h{current_zoom}"
45
- # )
46
- # print(f'Computing {zoom} from {current_zoom}')
47
-
48
- # for z in range(current_zoom + 1, zoom + 1):
49
- # print(f'Current zoom {z}')
50
- # h3_from_parent(con, z)
51
- # con.sql(f'''
52
- # SELECT *, UNNEST(h3_cell_to_children(h{z-1}, {z})) AS h{z}
53
- # FROM h3_h{z-1}
54
- # ''').to_parquet(f"s3://{bucket}/{path}/hex/zoom{z}/{name}.parquet")
55
-
56
 
57
- def h3_from_geom(con, name, cols, zoom):
58
  """
59
  Computes hexes directly from geometry.
60
  """
@@ -67,11 +44,22 @@ def h3_from_geom(con, name, cols, zoom):
67
  FROM {name}
68
  )
69
  ''')
 
 
 
 
 
70
 
71
 
72
- # def h3_from_parent(con, zoom):
73
- # con.raw_sql(f'''
74
- # CREATE OR REPLACE TEMP TABLE h3_h{zoom} AS
75
- # SELECT *, UNNEST(h3_cell_to_children(h{zoom-1}, {zoom})) AS h{zoom}
76
- # FROM h3_h{zoom-1}
77
- # ''')
 
 
 
 
 
 
 
1
  from utils import *
2
  import re
3
 
4
+ def convert_h3(con, s3, folder, file, cols, zoom, group = None, base_folder = "CBN/"):
5
  """
6
  Driver function to convert geometries to h3.
 
7
  """
8
  cols = ", ".join(cols) if isinstance(cols, list) else cols
9
+ if folder:
10
+ bucket, path = info(folder, file, base_folder)
11
+ else:
12
+ bucket, path = info(None, file, None)
13
  path, file = os.path.split(path)
14
  name, ext = os.path.splitext(file)
 
15
  print(f"Processing: {name}")
16
+ t_name = name.replace('-', '')
17
 
18
+ if group:
19
+ con.read_parquet(f"s3://{bucket}/{name}.parquet", table_name=t_name)
20
+ print(f'Computing zoom level {zoom}, grouping the data based on {group}')
21
+ compute_grouped(con, t_name, cols, zoom, group, path = f"{bucket}/{path}")
22
+ (con.read_parquet(f"s3://{bucket}/hex/zoom{zoom}/group_{group}/**")
23
+ .to_parquet(f"s3://{bucket}/hex/zoom{zoom}/{name}.parquet")
24
+ )
25
+
26
+ else:
27
+ con.read_parquet(f"s3://{bucket}/{path}/{file}", table_name=t_name)
28
+ print(f'Computing zoom level {zoom} without grouping.')
29
+ save_path = f"s3://{bucket}/{path}/hex/zoom{zoom}/{name}.parquet"
30
+ h3_from_geom(con, t_name, cols, save_path, zoom)
 
 
 
 
 
 
31
 
32
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
+ def h3_from_geom(con, name, cols, save_path, zoom):
35
  """
36
  Computes hexes directly from geometry.
37
  """
 
44
  FROM {name}
45
  )
46
  ''')
47
+ con.sql(f'''
48
+ SELECT {cols}, UNNEST(h{zoom}) AS h{zoom},
49
+ ST_GeomFromText(h3_cell_to_boundary_wkt(UNNEST(h{zoom}))) AS geom
50
+ FROM t2
51
+ ''').to_parquet(save_path)
52
 
53
 
54
+ def compute_grouped(con, name, cols, zoom, group, path):
55
+ unique_groups = con.table(name).select(group).distinct().execute()[group].tolist()
56
+ # separate data by group
57
+ for sub in unique_groups:
58
+ sub_name = f"{name}_{re.sub(r'\W+', '_', sub)}"
59
+ con.raw_sql(f"""
60
+ CREATE OR REPLACE TEMP TABLE {sub_name} AS
61
+ SELECT * FROM {name} WHERE {group} = '{sub}'
62
+ """)
63
+ save_path = f"s3://{path}/hex/zoom{zoom}/group_{group}/{sub.replace(' ', '')}.parquet"
64
+ h3_from_geom(con, sub_name, cols, save_path, zoom)
65
+
preprocess/utils.py CHANGED
@@ -208,15 +208,20 @@ def filter_raster(s3, folder, file, percentile):
208
  # return cols
209
  return
210
 
211
- def convert_pmtiles(con, s3, folder, file, base_folder = "CBN/"):
212
  """
213
  Convert to PMTiles with tippecanoe
214
  """
 
215
  name, ext = os.path.splitext(file)
216
  if ext != '.geojson':
217
- (con.read_parquet(file).execute().set_crs('epsg:3310')
218
- .to_crs('epsg:4326').to_file(name+'.geojson'))
219
- to_pmtiles(name+'.geojson', name+'.pmtiles', options = ['--extend-zooms-if-still-dropping'])
 
 
 
 
220
  upload(s3, folder, name+'.pmtiles', base_folder)
221
  return
222
 
 
208
  # return cols
209
  return
210
 
211
+ def convert_pmtiles(con, s3, folder, file, base_folder = "CBN/", current_crs = 'epsg:3310'):
212
  """
213
  Convert to PMTiles with tippecanoe
214
  """
215
+ print('converting pmtiles')
216
  name, ext = os.path.splitext(file)
217
  if ext != '.geojson':
218
+ if current_crs != 'epsg:4326':
219
+ data = (con.read_parquet(file).execute().set_crs(current_crs)
220
+ .to_crs('epsg:4326'))
221
+ else:
222
+ data = (con.read_parquet(file).execute().set_crs(current_crs))
223
+ data.to_file(name+'.geojson')
224
+ to_pmtiles(name+'.geojson', name+'.pmtiles', options = ['--extend-zooms-if-still-dropping','--drop-densest-as-needed'])
225
  upload(s3, folder, name+'.pmtiles', base_folder)
226
  return
227