{ "cells": [ { "cell_type": "code", "execution_count": 21, "id": "e84709ab-1b47-49ee-8cbd-8aa69744b6c3", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "c4382edb57b643e6907b0314c79387bd", "version_major": 2, "version_minor": 0 }, "text/plain": [ "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "b4ae3a5cd5f84c5b86fdcd767d330adf", "version_major": 2, "version_minor": 0 }, "text/plain": [ "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
DatabaseTable: tmp2.main.t2\n",
" gid int64\n",
" cpad_ACCESS_TYP string\n",
" cpad_PARK_NAME string\n",
" cpad_MNG_AGENCY string\n",
" cpad_MNG_AG_LEV string\n",
" reGAP int16\n",
" Easement int16\n",
" TYPE string\n",
" CA_County_Name string\n",
" CA_Region_Name string\n",
" TerrMar string\n",
" CA_Ecoregion_Name string\n",
" ACCESS_TYP string\n",
" MNG_AGNCY string\n",
" MNG_AG_LEV string\n",
" UNIT_NAME string\n",
" DefaultSelection string\n",
" CA_Ecoregion_Acres float32\n",
" CA_Region_Acres float32\n",
" CA_County_Acres float32\n",
" Acres float32\n",
" CA_Marine_Acres float32\n",
" Release_Year int16\n",
" mgmt_stack string\n",
" geom geospatial:geometry\n",
" SHAPE_bbox xmin: float32\n",
" ymin: float32\n",
" xmax: float32\n",
" ymax: float32\n",
"\n"
],
"text/plain": [
"DatabaseTable: tmp2.main.t2\n",
" gid int64\n",
" cpad_ACCESS_TYP string\n",
" cpad_PARK_NAME string\n",
" cpad_MNG_AGENCY string\n",
" cpad_MNG_AG_LEV string\n",
" reGAP int16\n",
" Easement int16\n",
" TYPE string\n",
" CA_County_Name string\n",
" CA_Region_Name string\n",
" TerrMar string\n",
" CA_Ecoregion_Name string\n",
" ACCESS_TYP string\n",
" MNG_AGNCY string\n",
" MNG_AG_LEV string\n",
" UNIT_NAME string\n",
" DefaultSelection string\n",
" CA_Ecoregion_Acres float32\n",
" CA_Region_Acres float32\n",
" CA_County_Acres float32\n",
" Acres float32\n",
" CA_Marine_Acres float32\n",
" Release_Year int16\n",
" mgmt_stack string\n",
" geom geospatial:geometry\n",
" SHAPE_bbox xmin: float32\n",
" ymin: float32\n",
" xmax: float32\n",
" ymax: float32"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import ibis\n",
"from ibis import _\n",
"conn = ibis.duckdb.connect(\"tmp2\", extensions=[\"spatial\"])\n",
"\n",
"tbl = (\n",
" conn.read_parquet(\"https://data.source.coop/cboettig/ca30x30/ca_areas.parquet\")\n",
" .cast({\"SHAPE\": \"geometry\"})\n",
" .rename(geom = \"SHAPE\", gid = \"OBJECTID\")\n",
" # .filter(_.UNIT_NAME == \"Angeles National Forest\")\n",
" .filter(_.reGAP < 3) \n",
")\n",
"conn.create_table(\"t1\", tbl.filter(_.Release_Year == 2024), overwrite = True)\n",
"conn.create_table(\"t2\", tbl.filter(_.Release_Year == 2023), overwrite = True)\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "303792ac-9b1d-41b2-a17b-5cf855d70633",
"metadata": {},
"outputs": [],
"source": [
"ca2024 = conn.table(\"t1\").execute()\n",
"ca2023 = conn.table(\"t2\").execute()\n",
"\n",
"import leafmap.maplibregl as leafmap\n",
"m = leafmap.Map()\n",
"m.add_gdf(ca2024, name = \"2024\")\n",
"m.add_gdf(ca2023, name =\"2023\")\n",
"\n",
"m"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9fdd2ed2-623f-479c-b0b7-7c723f3f6728",
"metadata": {},
"outputs": [],
"source": [
"\n",
"conn.disconnect()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "121c3cad-680c-4f3a-9075-638711ea1634",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "c69a096d24974e9ea8ad3d5b937b723a",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 22min 10s, sys: 43 s, total: 22min 53s\n",
"Wall time: 11min 47s\n"
]
}
],
"source": [
"%%time\n",
"\n",
"## RUN this on a machine with a whole lot of RAM. consider filtering federal/non-federal first.\n",
"import duckdb\n",
"db = duckdb.connect(\"tmp2\")\n",
"db.install_extension(\"spatial\")\n",
"db.load_extension(\"spatial\")\n",
"\n",
"db.sql('''\n",
"CREATE OR REPLACE TABLE diff AS (\n",
"with temp as \n",
"(\n",
" select b.gid, st_union_agg(a.geom) as geom\n",
" from t1 b join t2 a on st_intersects(a.geom, b.geom)\n",
" group by b.gid\n",
") \n",
"select st_difference(b.geom,coalesce(t.geom, 'GEOMETRYCOLLECTION EMPTY'::geometry)) as geom\n",
"from t1 b left join temp t on b.gid = t.gid\n",
")\n",
"''')\n"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "cf3d3e5b-2ff1-4ef9-a147-01f15e970e49",
"metadata": {},
"outputs": [],
"source": [
"## Cannot go straight to geoparquet due to M geometries!\n",
"#db.table(\"diff\").to_parquet(\"diff.parquet\")\n",
"\n",
"## This doesn't work either: \n",
"#db.sql('''CREATE OR REPLACE TABLE diff2024 AS SELECT *, st_force2d(geom) AS geom FROM diff''')\n",
"\n",
"## We could cast geom as blob...."
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "308dc665-1323-4e9b-bc2d-69201c325c4b",
"metadata": {},
"outputs": [],
"source": [
"# F*ck it. Let's do it all in RAM via geopandas, which drops M geoms due to a fortunate bug! \n",
"conn = ibis.duckdb.connect(\"tmp2\", extensions=[\"spatial\"])\n",
"gdf = conn.table(\"diff\").mutate(geom = _.geom.convert(\"epsg:3310\",\"epsg:4326\")).execute()\n",
"gdf.to_parquet(\"ca2024_diffs.parquet\")\n"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "063a11d8-15d7-4b91-b67c-3ccae3edcc8d",
"metadata": {},
"outputs": [],
"source": [
"# stash in our team S3 storage \n",
"\n",
"import streamlit as st\n",
"from minio import Minio\n",
"import os\n",
"# Get signed URLs to access license-controlled layers\n",
"key = st.secrets[\"MINIO_KEY\"]\n",
"secret = st.secrets[\"MINIO_SECRET\"]\n",
"client = Minio(\"minio.carlboettiger.info\", key, secret, secure=True)\n",
"\n",
"size = os.path.getsize(\"ca2024_diffs.parquet\")\n",
"with open(\"ca2024_diffs.parquet\", \"rb\") as file_data:\n",
" client.put_object(\"public-biodiversity\", \"ca30x30/ca2024_diffs.parquet\", file_data, length = size)\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "34425101-0592-42fd-9d62-22c9e7a6d6ac",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "c5cc696c15374d519fb940e054a902f0",
"version_major": 2,
"version_minor": 1
},
"text/plain": [
"Map(height='600px', map_options={'bearing': 0, 'center': (0, 20), 'pitch': 0, 'style': 'https://basemaps.carto…"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# can read from S3 and plot the whole thing. Note gdf has no metadata.\n",
"\n",
"import leafmap.maplibregl as leafmap\n",
"import ibis\n",
"conn = ibis.duckdb.connect(extensions=[\"spatial\"])\n",
"gdf = conn.read_parquet(\"https://minio.carlboettiger.info/public-biodiversity/ca30x30/ca2024_diffs.parquet\").execute()\n",
"m = leafmap.Map()\n",
"m.add_gdf(gdf)\n",
"#m.to_html(\"ca2024.html\")\n",
"m"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "17af39a0-9a69-4bfa-9084-d7b26adf74fc",
"metadata": {},
"outputs": [],
"source": [
"path = \"ca2024.html\"\n",
"size = os.path.getsize(path)\n",
"with open(path, \"rb\") as file_data:\n",
" client.put_object(\"public-biodiversity\", \"ca30x30/\"+path, file_data, length = size)\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "775abaca-f21a-4cd8-ad7d-0310cc6b33b6",
"metadata": {},
"outputs": [],
"source": [
"# \"TO 'new2024.geojson' WITH (FORMAT GDAL, DRIVER 'GeoJSON', LAYER_CREATION_OPTIONS 'WRITE_BBOX=YES')\""
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.10"
}
},
"nbformat": 4,
"nbformat_minor": 5
}