Spaces:
Running
Running
Commit
·
49369e2
1
Parent(s):
46779ef
joined newly added areas to CA nature data with h3
Browse files- app/footer.md +2 -0
- preprocess/CBN-data.ipynb +111 -13
- preprocess/h3_utils.py +38 -50
- preprocess/utils.py +9 -4
app/footer.md
CHANGED
|
@@ -27,3 +27,5 @@ Data: https://data.carlboettiger.info/public-ca30x30
|
|
| 27 |
|
| 28 |
- Historic Fire Perimeters by CAL FIRE (2023). Data: https://www.fire.ca.gov/Home/What-We-Do/Fire-Resource-Assessment-Program/GIS-Mapping-and-Data-Analytics
|
| 29 |
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
- Historic Fire Perimeters by CAL FIRE (2023). Data: https://www.fire.ca.gov/Home/What-We-Do/Fire-Resource-Assessment-Program/GIS-Mapping-and-Data-Analytics
|
| 29 |
|
| 30 |
+
### LLMs
|
| 31 |
+
This app can use a selection of open-weights language models hosted on the National Research Platform, https://nrp.ai/documentation/userdocs/ai/llm-managed/.
|
preprocess/CBN-data.ipynb
CHANGED
|
@@ -738,12 +738,22 @@
|
|
| 738 |
"folder = 'Progress_data_new_protection/Newly_counted_lands'\n",
|
| 739 |
"name = 'newly_counted_lands_2024'\n",
|
| 740 |
"\n",
|
| 741 |
-
"unzip(s3, folder = folder, file = f\"{name}.shp.zip\")\n",
|
| 742 |
-
"process_vector(s3, folder = folder, file = f\"{name}.shp\",crs = \"epsg:4326\")\n",
|
| 743 |
-
"convert_pmtiles(con, s3, folder = folder, file = f\"{name}.parquet\")\n",
|
| 744 |
-
"\n",
|
| 745 |
"# cols = [item for item in cols if item not in ['Shape_Leng', 'Shape_Area']]\n",
|
| 746 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 747 |
]
|
| 748 |
},
|
| 749 |
{
|
|
@@ -808,14 +818,6 @@
|
|
| 808 |
"# convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols = cols, zoom = 8)"
|
| 809 |
]
|
| 810 |
},
|
| 811 |
-
{
|
| 812 |
-
"cell_type": "code",
|
| 813 |
-
"execution_count": null,
|
| 814 |
-
"id": "df1a939c-cb89-4a2f-8309-2819fe52ac45",
|
| 815 |
-
"metadata": {},
|
| 816 |
-
"outputs": [],
|
| 817 |
-
"source": []
|
| 818 |
-
},
|
| 819 |
{
|
| 820 |
"cell_type": "markdown",
|
| 821 |
"id": "a919ff5f-dff3-4db7-81c2-694f07f37d1d",
|
|
@@ -908,6 +910,102 @@
|
|
| 908 |
"process_vector(s3, folder = folder, file = f\"{name}.shp\", crs=\"EPSG:4326\")\n",
|
| 909 |
"# convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols= cols, zoom = 8)"
|
| 910 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 911 |
}
|
| 912 |
],
|
| 913 |
"metadata": {
|
|
|
|
| 738 |
"folder = 'Progress_data_new_protection/Newly_counted_lands'\n",
|
| 739 |
"name = 'newly_counted_lands_2024'\n",
|
| 740 |
"\n",
|
| 741 |
+
"# unzip(s3, folder = folder, file = f\"{name}.shp.zip\")\n",
|
| 742 |
+
"# cols = process_vector(s3, folder = folder, file = f\"{name}.shp\",crs = \"epsg:4326\")\n",
|
| 743 |
+
"# convert_pmtiles(con, s3, folder = folder, file = f\"{name}.parquet\")\n",
|
|
|
|
| 744 |
"# cols = [item for item in cols if item not in ['Shape_Leng', 'Shape_Area']]\n",
|
| 745 |
+
"\n",
|
| 746 |
+
"cols = ['mgmt_stack', 'reGAP', 'Easement',\n",
|
| 747 |
+
" 'TYPE','CA_County_','CA_Region_',\n",
|
| 748 |
+
" 'TerrMar','CA_Ecoregi','DefaultSel',\n",
|
| 749 |
+
" 'CA_Ecore_1','CA_Region1','CA_County1',\n",
|
| 750 |
+
" 'ACCESS_TYP','MNG_AGNCY','MNG_AG_LEV',\n",
|
| 751 |
+
" 'UNIT_NAME','Acres','cpad_ACCES',\n",
|
| 752 |
+
" 'cpad_PARK_','cpad_MNG_A','cpad_MNG_1',\n",
|
| 753 |
+
" 'CA_Marine_','Release_Ye','ORIG_FID',\n",
|
| 754 |
+
" 'updatetype']\n",
|
| 755 |
+
"\n",
|
| 756 |
+
"convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols = cols, zoom = 8)"
|
| 757 |
]
|
| 758 |
},
|
| 759 |
{
|
|
|
|
| 818 |
"# convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols = cols, zoom = 8)"
|
| 819 |
]
|
| 820 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 821 |
{
|
| 822 |
"cell_type": "markdown",
|
| 823 |
"id": "a919ff5f-dff3-4db7-81c2-694f07f37d1d",
|
|
|
|
| 910 |
"process_vector(s3, folder = folder, file = f\"{name}.shp\", crs=\"EPSG:4326\")\n",
|
| 911 |
"# convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols= cols, zoom = 8)"
|
| 912 |
]
|
| 913 |
+
},
|
| 914 |
+
{
|
| 915 |
+
"cell_type": "markdown",
|
| 916 |
+
"id": "c7f43e03-d6f9-4109-8876-b8f384b1c42e",
|
| 917 |
+
"metadata": {},
|
| 918 |
+
"source": [
|
| 919 |
+
"# CA Nature"
|
| 920 |
+
]
|
| 921 |
+
},
|
| 922 |
+
{
|
| 923 |
+
"cell_type": "code",
|
| 924 |
+
"execution_count": null,
|
| 925 |
+
"id": "ebaecd00-8df6-4dd0-a374-45f5937607f2",
|
| 926 |
+
"metadata": {},
|
| 927 |
+
"outputs": [],
|
| 928 |
+
"source": [
|
| 929 |
+
"%%time\n",
|
| 930 |
+
"con = ibis.duckdb.connect('ca',extensions = [\"spatial\", \"h3\"])\n",
|
| 931 |
+
"set_secrets(con)\n",
|
| 932 |
+
"folder = None\n",
|
| 933 |
+
"name = 'ca-30x30-cbn'\n",
|
| 934 |
+
"\n",
|
| 935 |
+
"cols = ['id','established',\n",
|
| 936 |
+
" 'gap_code','status','name','access_type',\n",
|
| 937 |
+
" 'manager','manager_type','ecoregion',\n",
|
| 938 |
+
" 'easement','acres', 'type','county',\n",
|
| 939 |
+
" 'climate_zone','habitat_type',\n",
|
| 940 |
+
" 'resilient_connected_network',\n",
|
| 941 |
+
" 'ACE_amphibian_richness',\n",
|
| 942 |
+
" 'ACE_reptile_richness',\n",
|
| 943 |
+
" 'ACE_bird_richness',\n",
|
| 944 |
+
" 'ACE_mammal_richness',\n",
|
| 945 |
+
" 'ACE_rare_amphibian_richness',\n",
|
| 946 |
+
" 'ACE_rare_reptile_richness',\n",
|
| 947 |
+
" 'ACE_rare_bird_richness',\n",
|
| 948 |
+
" 'ACE_rare_mammal_richness',\n",
|
| 949 |
+
" 'ACE_endemic_amphibian_richness',\n",
|
| 950 |
+
" 'ACE_endemic_reptile_richness',\n",
|
| 951 |
+
" 'ACE_endemic_bird_richness',\n",
|
| 952 |
+
" 'ACE_endemic_mammal_richness',\n",
|
| 953 |
+
" 'wetlands','fire','farmland',\n",
|
| 954 |
+
" 'grazing','DAC','low_income',\n",
|
| 955 |
+
" 'plant_richness',\n",
|
| 956 |
+
" 'rarityweighted_endemic_plant_richness']\n",
|
| 957 |
+
"# download(s3, folder = folder, file = f\"{name}.parquet\")\n",
|
| 958 |
+
"# process_vector(s3, folder = folder, file = f\"{name}.shp\", crs=\"EPSG:3310\")\n",
|
| 959 |
+
"# convert_pmtiles(con, s3, folder = folder, file = f\"{name}.parquet\")\n",
|
| 960 |
+
"# process_vector(s3, folder = folder, file = f\"{name}.shp\", crs=\"EPSG:4326\")\n",
|
| 961 |
+
"\n",
|
| 962 |
+
"convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols= cols, group = 'ecoregion', zoom = 8)\n",
|
| 963 |
+
"\n"
|
| 964 |
+
]
|
| 965 |
+
},
|
| 966 |
+
{
|
| 967 |
+
"cell_type": "markdown",
|
| 968 |
+
"id": "980c7e88-8dc6-4bc6-bfa4-8ea301c6ee80",
|
| 969 |
+
"metadata": {},
|
| 970 |
+
"source": [
|
| 971 |
+
"#### join with newly protected lands"
|
| 972 |
+
]
|
| 973 |
+
},
|
| 974 |
+
{
|
| 975 |
+
"cell_type": "code",
|
| 976 |
+
"execution_count": null,
|
| 977 |
+
"id": "92b2107c-4771-4217-a566-72c75389b677",
|
| 978 |
+
"metadata": {},
|
| 979 |
+
"outputs": [],
|
| 980 |
+
"source": [
|
| 981 |
+
"con = ibis.duckdb.connect('joined',extensions = [\"spatial\", \"h3\"])\n",
|
| 982 |
+
"set_secrets(con)\n",
|
| 983 |
+
"\n",
|
| 984 |
+
"ca_nature_url = \"s3://public-ca30x30/hex/zoom8/ca-30x30-cbn.parquet\"\n",
|
| 985 |
+
"new_lands_url = \"s3://public-ca30x30/CBN/Progress_data_new_protection/Newly_counted_lands/hex/zoom8/newly_counted_lands_2024.parquet\"\n",
|
| 986 |
+
"\n",
|
| 987 |
+
"ca_nature = (con.read_parquet(ca_nature_url)\n",
|
| 988 |
+
" .mutate(update_type = None)\n",
|
| 989 |
+
" )\n",
|
| 990 |
+
"\n",
|
| 991 |
+
"new = (con.read_parquet(new_lands_url)\n",
|
| 992 |
+
" .mutate(update_type = 'updatetype')\n",
|
| 993 |
+
" .select(\"update_type\",\"h8\")\n",
|
| 994 |
+
" )\n",
|
| 995 |
+
"\n",
|
| 996 |
+
"joined = (ca_nature.left_join(new,\"h8\")\n",
|
| 997 |
+
" .drop('h8_right','update_type')\n",
|
| 998 |
+
" .rename(update_type = 'update_type_right')\n",
|
| 999 |
+
" )\n",
|
| 1000 |
+
"\n",
|
| 1001 |
+
"name = 'ca-30x30-cbn-newlyprotected'\n",
|
| 1002 |
+
"\n",
|
| 1003 |
+
"joined.to_parquet(f\"{name}.parquet\")\n",
|
| 1004 |
+
"joined.to_parquet(f\"s3://public-ca30x30/hex/zoom8/{name}.parquet\")\n",
|
| 1005 |
+
"\n",
|
| 1006 |
+
"#maybe get pmtiles?\n",
|
| 1007 |
+
"convert_pmtiles(con, s3, folder = None, file = f\"{name}.parquet\", base_folder = None, current_crs = 'epsg:4326')"
|
| 1008 |
+
]
|
| 1009 |
}
|
| 1010 |
],
|
| 1011 |
"metadata": {
|
preprocess/h3_utils.py
CHANGED
|
@@ -1,60 +1,37 @@
|
|
| 1 |
from utils import *
|
| 2 |
import re
|
| 3 |
|
| 4 |
-
def convert_h3(con, s3, folder, file, cols, zoom, base_folder = "CBN/"):
|
| 5 |
"""
|
| 6 |
Driver function to convert geometries to h3.
|
| 7 |
-
If no zoom levels exist -> compute from geometry at target zoom.
|
| 8 |
"""
|
| 9 |
cols = ", ".join(cols) if isinstance(cols, list) else cols
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
| 11 |
path, file = os.path.split(path)
|
| 12 |
name, ext = os.path.splitext(file)
|
| 13 |
-
name = name.replace('-', '')
|
| 14 |
print(f"Processing: {name}")
|
|
|
|
| 15 |
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
con
|
| 29 |
-
SELECT {cols}, UNNEST(h{zoom}) AS h{zoom}
|
| 30 |
-
FROM t2
|
| 31 |
-
''').to_parquet(f"s3://{bucket}/{path}/hex/zoom{zoom}/{name}.parquet")
|
| 32 |
-
|
| 33 |
-
else:
|
| 34 |
-
current_zoom = max(zooms)
|
| 35 |
|
| 36 |
-
|
| 37 |
-
print(f'Zoom {zoom} already exists!')
|
| 38 |
-
return
|
| 39 |
-
|
| 40 |
-
# elif current_zoom < zoom: #compute child of most refined zoom level
|
| 41 |
-
# print(f'Reading zoom {current_zoom}')
|
| 42 |
-
# con.read_parquet(
|
| 43 |
-
# f"s3://{bucket}/{path}/hex/zoom{current_zoom}/{name}.parquet",
|
| 44 |
-
# table_name=f"h3_h{current_zoom}"
|
| 45 |
-
# )
|
| 46 |
-
# print(f'Computing {zoom} from {current_zoom}')
|
| 47 |
-
|
| 48 |
-
# for z in range(current_zoom + 1, zoom + 1):
|
| 49 |
-
# print(f'Current zoom {z}')
|
| 50 |
-
# h3_from_parent(con, z)
|
| 51 |
-
# con.sql(f'''
|
| 52 |
-
# SELECT *, UNNEST(h3_cell_to_children(h{z-1}, {z})) AS h{z}
|
| 53 |
-
# FROM h3_h{z-1}
|
| 54 |
-
# ''').to_parquet(f"s3://{bucket}/{path}/hex/zoom{z}/{name}.parquet")
|
| 55 |
-
|
| 56 |
|
| 57 |
-
def h3_from_geom(con, name, cols, zoom):
|
| 58 |
"""
|
| 59 |
Computes hexes directly from geometry.
|
| 60 |
"""
|
|
@@ -67,11 +44,22 @@ def h3_from_geom(con, name, cols, zoom):
|
|
| 67 |
FROM {name}
|
| 68 |
)
|
| 69 |
''')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
|
| 71 |
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
#
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from utils import *
|
| 2 |
import re
|
| 3 |
|
| 4 |
+
def convert_h3(con, s3, folder, file, cols, zoom, group = None, base_folder = "CBN/"):
|
| 5 |
"""
|
| 6 |
Driver function to convert geometries to h3.
|
|
|
|
| 7 |
"""
|
| 8 |
cols = ", ".join(cols) if isinstance(cols, list) else cols
|
| 9 |
+
if folder:
|
| 10 |
+
bucket, path = info(folder, file, base_folder)
|
| 11 |
+
else:
|
| 12 |
+
bucket, path = info(None, file, None)
|
| 13 |
path, file = os.path.split(path)
|
| 14 |
name, ext = os.path.splitext(file)
|
|
|
|
| 15 |
print(f"Processing: {name}")
|
| 16 |
+
t_name = name.replace('-', '')
|
| 17 |
|
| 18 |
+
if group:
|
| 19 |
+
con.read_parquet(f"s3://{bucket}/{name}.parquet", table_name=t_name)
|
| 20 |
+
print(f'Computing zoom level {zoom}, grouping the data based on {group}')
|
| 21 |
+
compute_grouped(con, t_name, cols, zoom, group, path = f"{bucket}/{path}")
|
| 22 |
+
(con.read_parquet(f"s3://{bucket}/hex/zoom{zoom}/group_{group}/**")
|
| 23 |
+
.to_parquet(f"s3://{bucket}/hex/zoom{zoom}/{name}.parquet")
|
| 24 |
+
)
|
| 25 |
+
|
| 26 |
+
else:
|
| 27 |
+
con.read_parquet(f"s3://{bucket}/{path}/{file}", table_name=t_name)
|
| 28 |
+
print(f'Computing zoom level {zoom} without grouping.')
|
| 29 |
+
save_path = f"s3://{bucket}/{path}/hex/zoom{zoom}/{name}.parquet"
|
| 30 |
+
h3_from_geom(con, t_name, cols, save_path, zoom)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
|
| 34 |
+
def h3_from_geom(con, name, cols, save_path, zoom):
|
| 35 |
"""
|
| 36 |
Computes hexes directly from geometry.
|
| 37 |
"""
|
|
|
|
| 44 |
FROM {name}
|
| 45 |
)
|
| 46 |
''')
|
| 47 |
+
con.sql(f'''
|
| 48 |
+
SELECT {cols}, UNNEST(h{zoom}) AS h{zoom},
|
| 49 |
+
ST_GeomFromText(h3_cell_to_boundary_wkt(UNNEST(h{zoom}))) AS geom
|
| 50 |
+
FROM t2
|
| 51 |
+
''').to_parquet(save_path)
|
| 52 |
|
| 53 |
|
| 54 |
+
def compute_grouped(con, name, cols, zoom, group, path):
|
| 55 |
+
unique_groups = con.table(name).select(group).distinct().execute()[group].tolist()
|
| 56 |
+
# separate data by group
|
| 57 |
+
for sub in unique_groups:
|
| 58 |
+
sub_name = f"{name}_{re.sub(r'\W+', '_', sub)}"
|
| 59 |
+
con.raw_sql(f"""
|
| 60 |
+
CREATE OR REPLACE TEMP TABLE {sub_name} AS
|
| 61 |
+
SELECT * FROM {name} WHERE {group} = '{sub}'
|
| 62 |
+
""")
|
| 63 |
+
save_path = f"s3://{path}/hex/zoom{zoom}/group_{group}/{sub.replace(' ', '')}.parquet"
|
| 64 |
+
h3_from_geom(con, sub_name, cols, save_path, zoom)
|
| 65 |
+
|
preprocess/utils.py
CHANGED
|
@@ -208,15 +208,20 @@ def filter_raster(s3, folder, file, percentile):
|
|
| 208 |
# return cols
|
| 209 |
return
|
| 210 |
|
| 211 |
-
def convert_pmtiles(con, s3, folder, file, base_folder = "CBN/"):
|
| 212 |
"""
|
| 213 |
Convert to PMTiles with tippecanoe
|
| 214 |
"""
|
|
|
|
| 215 |
name, ext = os.path.splitext(file)
|
| 216 |
if ext != '.geojson':
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 220 |
upload(s3, folder, name+'.pmtiles', base_folder)
|
| 221 |
return
|
| 222 |
|
|
|
|
| 208 |
# return cols
|
| 209 |
return
|
| 210 |
|
| 211 |
+
def convert_pmtiles(con, s3, folder, file, base_folder = "CBN/", current_crs = 'epsg:3310'):
|
| 212 |
"""
|
| 213 |
Convert to PMTiles with tippecanoe
|
| 214 |
"""
|
| 215 |
+
print('converting pmtiles')
|
| 216 |
name, ext = os.path.splitext(file)
|
| 217 |
if ext != '.geojson':
|
| 218 |
+
if current_crs != 'epsg:4326':
|
| 219 |
+
data = (con.read_parquet(file).execute().set_crs(current_crs)
|
| 220 |
+
.to_crs('epsg:4326'))
|
| 221 |
+
else:
|
| 222 |
+
data = (con.read_parquet(file).execute().set_crs(current_crs))
|
| 223 |
+
data.to_file(name+'.geojson')
|
| 224 |
+
to_pmtiles(name+'.geojson', name+'.pmtiles', options = ['--extend-zooms-if-still-dropping','--drop-densest-as-needed'])
|
| 225 |
upload(s3, folder, name+'.pmtiles', base_folder)
|
| 226 |
return
|
| 227 |
|