Spaces:

boettiger-lab
/

ca-30x30-cbn

Running

App Files Files Community

cassiebuhler commited on Apr 15

Commit

49369e2

1 Parent(s): 46779ef

joined newly added areas to CA nature data with h3

Browse files

Files changed (4) hide show

app/footer.md +2 -0
preprocess/CBN-data.ipynb +111 -13
preprocess/h3_utils.py +38 -50
preprocess/utils.py +9 -4

app/footer.md CHANGED Viewed

	@@ -27,3 +27,5 @@ Data: https://data.carlboettiger.info/public-ca30x30
27
28	- Historic Fire Perimeters by CAL FIRE (2023). Data: https://www.fire.ca.gov/Home/What-We-Do/Fire-Resource-Assessment-Program/GIS-Mapping-and-Data-Analytics
29

 - Historic Fire Perimeters by CAL FIRE (2023). Data: https://www.fire.ca.gov/Home/What-We-Do/Fire-Resource-Assessment-Program/GIS-Mapping-and-Data-Analytics
+### LLMs
+This app can use a selection of open-weights language models hosted on the National Research Platform, https://nrp.ai/documentation/userdocs/ai/llm-managed/.

preprocess/CBN-data.ipynb CHANGED Viewed

@@ -738,12 +738,22 @@
     "folder = 'Progress_data_new_protection/Newly_counted_lands'\n",
     "name = 'newly_counted_lands_2024'\n",
     "\n",
-    "unzip(s3, folder = folder, file = f\"{name}.shp.zip\")\n",
-    "process_vector(s3, folder = folder, file = f\"{name}.shp\",crs = \"epsg:4326\")\n",
-    "convert_pmtiles(con, s3, folder = folder, file = f\"{name}.parquet\")\n",
-    "\n",
     "# cols = [item for item in cols if item not in ['Shape_Leng', 'Shape_Area']]\n",
-    "# convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols = cols, zoom = 8)"
    ]
   },
   {
@@ -808,14 +818,6 @@
     "# convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols = cols, zoom = 8)"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "df1a939c-cb89-4a2f-8309-2819fe52ac45",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
   {
    "cell_type": "markdown",
    "id": "a919ff5f-dff3-4db7-81c2-694f07f37d1d",
@@ -908,6 +910,102 @@
     "process_vector(s3, folder = folder, file = f\"{name}.shp\", crs=\"EPSG:4326\")\n",
     "# convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols= cols, zoom = 8)"
    ]
   }
  ],
  "metadata": {

     "folder = 'Progress_data_new_protection/Newly_counted_lands'\n",
     "name = 'newly_counted_lands_2024'\n",
     "\n",
+    "# unzip(s3, folder = folder, file = f\"{name}.shp.zip\")\n",
+    "# cols = process_vector(s3, folder = folder, file = f\"{name}.shp\",crs = \"epsg:4326\")\n",
+    "# convert_pmtiles(con, s3, folder = folder, file = f\"{name}.parquet\")\n",
     "# cols = [item for item in cols if item not in ['Shape_Leng', 'Shape_Area']]\n",
+    "\n",
+    "cols = ['mgmt_stack', 'reGAP', 'Easement',\n",
+    "         'TYPE','CA_County_','CA_Region_',\n",
+    "         'TerrMar','CA_Ecoregi','DefaultSel',\n",
+    "         'CA_Ecore_1','CA_Region1','CA_County1',\n",
+    "         'ACCESS_TYP','MNG_AGNCY','MNG_AG_LEV',\n",
+    "         'UNIT_NAME','Acres','cpad_ACCES',\n",
+    "         'cpad_PARK_','cpad_MNG_A','cpad_MNG_1',\n",
+    "         'CA_Marine_','Release_Ye','ORIG_FID',\n",
+    "         'updatetype']\n",
+    "\n",
+    "convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols = cols, zoom = 8)"
    ]
   },
   {
     "# convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols = cols, zoom = 8)"
    ]
   },
   {
    "cell_type": "markdown",
    "id": "a919ff5f-dff3-4db7-81c2-694f07f37d1d",
     "process_vector(s3, folder = folder, file = f\"{name}.shp\", crs=\"EPSG:4326\")\n",
     "# convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols= cols, zoom = 8)"
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c7f43e03-d6f9-4109-8876-b8f384b1c42e",
+   "metadata": {},
+   "source": [
+    "# CA Nature"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ebaecd00-8df6-4dd0-a374-45f5937607f2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%time\n",
+    "con = ibis.duckdb.connect('ca',extensions = [\"spatial\", \"h3\"])\n",
+    "set_secrets(con)\n",
+    "folder = None\n",
+    "name = 'ca-30x30-cbn'\n",
+    "\n",
+    "cols = ['id','established',\n",
+    " 'gap_code','status','name','access_type',\n",
+    " 'manager','manager_type','ecoregion',\n",
+    " 'easement','acres', 'type','county',\n",
+    " 'climate_zone','habitat_type',\n",
+    " 'resilient_connected_network',\n",
+    " 'ACE_amphibian_richness',\n",
+    " 'ACE_reptile_richness',\n",
+    " 'ACE_bird_richness',\n",
+    " 'ACE_mammal_richness',\n",
+    " 'ACE_rare_amphibian_richness',\n",
+    " 'ACE_rare_reptile_richness',\n",
+    " 'ACE_rare_bird_richness',\n",
+    " 'ACE_rare_mammal_richness',\n",
+    " 'ACE_endemic_amphibian_richness',\n",
+    " 'ACE_endemic_reptile_richness',\n",
+    " 'ACE_endemic_bird_richness',\n",
+    " 'ACE_endemic_mammal_richness',\n",
+    " 'wetlands','fire','farmland',\n",
+    " 'grazing','DAC','low_income',\n",
+    " 'plant_richness',\n",
+    " 'rarityweighted_endemic_plant_richness']\n",
+    "# download(s3, folder = folder, file = f\"{name}.parquet\")\n",
+    "# process_vector(s3, folder = folder, file = f\"{name}.shp\", crs=\"EPSG:3310\")\n",
+    "# convert_pmtiles(con, s3, folder = folder, file = f\"{name}.parquet\")\n",
+    "# process_vector(s3, folder = folder, file = f\"{name}.shp\", crs=\"EPSG:4326\")\n",
+    "\n",
+    "convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols= cols, group = 'ecoregion', zoom = 8)\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "980c7e88-8dc6-4bc6-bfa4-8ea301c6ee80",
+   "metadata": {},
+   "source": [
+    "#### join with newly protected lands"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "92b2107c-4771-4217-a566-72c75389b677",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "con = ibis.duckdb.connect('joined',extensions = [\"spatial\", \"h3\"])\n",
+    "set_secrets(con)\n",
+    "\n",
+    "ca_nature_url = \"s3://public-ca30x30/hex/zoom8/ca-30x30-cbn.parquet\"\n",
+    "new_lands_url = \"s3://public-ca30x30/CBN/Progress_data_new_protection/Newly_counted_lands/hex/zoom8/newly_counted_lands_2024.parquet\"\n",
+    "\n",
+    "ca_nature = (con.read_parquet(ca_nature_url)\n",
+    "             .mutate(update_type = None)\n",
+    "            )\n",
+    "\n",
+    "new = (con.read_parquet(new_lands_url)\n",
+    "       .mutate(update_type = 'updatetype')\n",
+    "       .select(\"update_type\",\"h8\")\n",
+    "      )\n",
+    "\n",
+    "joined = (ca_nature.left_join(new,\"h8\")\n",
+    "     .drop('h8_right','update_type')\n",
+    "    .rename(update_type = 'update_type_right')\n",
+    "    )\n",
+    "\n",
+    "name = 'ca-30x30-cbn-newlyprotected'\n",
+    "\n",
+    "joined.to_parquet(f\"{name}.parquet\")\n",
+    "joined.to_parquet(f\"s3://public-ca30x30/hex/zoom8/{name}.parquet\")\n",
+    "\n",
+    "#maybe get pmtiles?\n",
+    "convert_pmtiles(con, s3, folder = None, file = f\"{name}.parquet\", base_folder = None, current_crs = 'epsg:4326')"
+   ]
   }
  ],
  "metadata": {

preprocess/h3_utils.py CHANGED Viewed

@@ -1,60 +1,37 @@
 from utils import *
 import re
-def convert_h3(con, s3, folder, file, cols, zoom, base_folder = "CBN/"):
     """
     Driver function to convert geometries to h3.
-    If no zoom levels exist -> compute from geometry at target zoom.
     """
     cols = ", ".join(cols) if isinstance(cols, list) else cols
-    bucket, path = info(folder, file, base_folder)
     path, file = os.path.split(path)
     name, ext = os.path.splitext(file)
-    name = name.replace('-', '')
     print(f"Processing: {name}")
-    hex_paths = s3.list_objects(bucket, prefix=f"{path}/hex/", recursive=True)
-    zooms = []
-    # check what zooms exist
-    for obj in hex_paths:
-        match = re.search(r"/zoom(\d{1,2})/", obj.object_name)
-        if match:
-            zooms.append(int(match.group(1)))
-    if not zooms: # if no h3 files exist
-        print(f'No h3 files exists, computing zoom level {zoom} from geometry.')
-        con.read_parquet(f"s3://{bucket}/{path}/{file}", table_name=name)
-        h3_from_geom(con, name, cols, zoom)
-        con.sql(f'''
-            SELECT {cols}, UNNEST(h{zoom}) AS h{zoom}
-            FROM t2
-        ''').to_parquet(f"s3://{bucket}/{path}/hex/zoom{zoom}/{name}.parquet")
-    else:
-        current_zoom = max(zooms)
-        if zoom in zooms:
-            print(f'Zoom {zoom} already exists!')
-            return
-        # elif current_zoom < zoom: #compute child of most refined zoom level
-        #     print(f'Reading zoom {current_zoom}')
-        #     con.read_parquet(
-        #         f"s3://{bucket}/{path}/hex/zoom{current_zoom}/{name}.parquet",
-        #         table_name=f"h3_h{current_zoom}"
-        #     )
-        #     print(f'Computing {zoom} from {current_zoom}')
-        #     for z in range(current_zoom + 1, zoom + 1):
-        #         print(f'Current zoom {z}')
-        #         h3_from_parent(con, z)
-        #         con.sql(f'''
-        #             SELECT *, UNNEST(h3_cell_to_children(h{z-1}, {z})) AS h{z}
-        #             FROM h3_h{z-1}
-        #         ''').to_parquet(f"s3://{bucket}/{path}/hex/zoom{z}/{name}.parquet")
-def h3_from_geom(con, name, cols, zoom):
     """
     Computes hexes directly from geometry.
     """
@@ -67,11 +44,22 @@ def h3_from_geom(con, name, cols, zoom):
         FROM {name}
     )
     ''')
-# def h3_from_parent(con, zoom):
-#     con.raw_sql(f'''
-#         CREATE OR REPLACE TEMP TABLE h3_h{zoom} AS
-#         SELECT *, UNNEST(h3_cell_to_children(h{zoom-1}, {zoom})) AS h{zoom}
-#         FROM h3_h{zoom-1}
-#     ''')

 from utils import *
 import re
+def convert_h3(con, s3, folder, file, cols, zoom, group = None, base_folder = "CBN/"):
     """
     Driver function to convert geometries to h3.
     """
     cols = ", ".join(cols) if isinstance(cols, list) else cols
+    if folder:
+        bucket, path = info(folder, file, base_folder)
+    else:
+        bucket, path = info(None, file, None)
     path, file = os.path.split(path)
     name, ext = os.path.splitext(file)
     print(f"Processing: {name}")
+    t_name = name.replace('-', '')
+    if group:
+        con.read_parquet(f"s3://{bucket}/{name}.parquet", table_name=t_name)
+        print(f'Computing zoom level {zoom}, grouping the data based on {group}')
+        compute_grouped(con, t_name, cols, zoom, group, path = f"{bucket}/{path}")
+        (con.read_parquet(f"s3://{bucket}/hex/zoom{zoom}/group_{group}/**")
+         .to_parquet(f"s3://{bucket}/hex/zoom{zoom}/{name}.parquet")
+        )
+    else:
+        con.read_parquet(f"s3://{bucket}/{path}/{file}", table_name=t_name)
+        print(f'Computing zoom level {zoom} without grouping.')
+        save_path = f"s3://{bucket}/{path}/hex/zoom{zoom}/{name}.parquet"
+        h3_from_geom(con, t_name, cols, save_path, zoom)
+def h3_from_geom(con, name, cols, save_path, zoom):
     """
     Computes hexes directly from geometry.
     """
         FROM {name}
     )
     ''')
+    con.sql(f'''
+        SELECT {cols}, UNNEST(h{zoom}) AS h{zoom},
+        ST_GeomFromText(h3_cell_to_boundary_wkt(UNNEST(h{zoom}))) AS geom
+        FROM t2
+    ''').to_parquet(save_path)
+def compute_grouped(con, name, cols, zoom, group, path):
+    unique_groups = con.table(name).select(group).distinct().execute()[group].tolist()
+    # separate data by group
+    for sub in unique_groups:
+        sub_name = f"{name}_{re.sub(r'\W+', '_', sub)}"
+        con.raw_sql(f"""
+            CREATE OR REPLACE TEMP TABLE {sub_name} AS
+            SELECT * FROM {name} WHERE {group} = '{sub}'
+        """)
+        save_path = f"s3://{path}/hex/zoom{zoom}/group_{group}/{sub.replace(' ', '')}.parquet"
+        h3_from_geom(con, sub_name, cols, save_path, zoom)

preprocess/utils.py CHANGED Viewed

@@ -208,15 +208,20 @@ def filter_raster(s3, folder, file, percentile):
     # return cols
     return
-def convert_pmtiles(con, s3, folder, file, base_folder = "CBN/"):
     """
     Convert to PMTiles with tippecanoe
     """
     name, ext = os.path.splitext(file)
     if ext != '.geojson':
-            (con.read_parquet(file).execute().set_crs('epsg:3310')
-             .to_crs('epsg:4326').to_file(name+'.geojson'))
-    to_pmtiles(name+'.geojson', name+'.pmtiles', options = ['--extend-zooms-if-still-dropping'])
     upload(s3, folder, name+'.pmtiles', base_folder)
     return

     # return cols
     return
+def convert_pmtiles(con, s3, folder, file, base_folder = "CBN/", current_crs = 'epsg:3310'):
     """
     Convert to PMTiles with tippecanoe
     """
+    print('converting pmtiles')
     name, ext = os.path.splitext(file)
     if ext != '.geojson':
+        if current_crs != 'epsg:4326':
+            data = (con.read_parquet(file).execute().set_crs(current_crs)
+                     .to_crs('epsg:4326'))
+        else:
+            data = (con.read_parquet(file).execute().set_crs(current_crs))
+        data.to_file(name+'.geojson')
+    to_pmtiles(name+'.geojson', name+'.pmtiles', options = ['--extend-zooms-if-still-dropping','--drop-densest-as-needed'])
     upload(s3, folder, name+'.pmtiles', base_folder)
     return