{ "cells": [ { "cell_type": "markdown", "id": "4b4adc2a-bf0c-4ace-87be-dbaf90be0125", "metadata": {}, "source": [ "# Pre-processing script" ] }, { "cell_type": "code", "execution_count": null, "id": "f7e6298c-d886-432a-a1b7-c3fee914c24f", "metadata": { "editable": true, "slideshow": { "slide_type": "" }, "tags": [] }, "outputs": [], "source": [ "import os\n", "data_path = '../data/CBN-layers/'\n", "os.chdir(data_path)\n", "\n", "from cng.utils import set_secrets, s3_client, s3_cp, to_pmtiles\n", "s3 = s3_client()\n", "\n", " \n", "import ibis\n", "from ibis import _\n", "import ibis.expr.datatypes as dt\n", "con = ibis.duckdb.connect(extensions=[\"spatial\"])\n", "\n", "import geopandas as gpd\n", "import duckdb" ] }, { "cell_type": "markdown", "id": "8741e72a-5976-440b-9279-77f959c6ae24", "metadata": {}, "source": [ "#### Helper functions" ] }, { "cell_type": "code", "execution_count": null, "id": "63dd33b8-6d3c-4852-9899-6ed5775d19c0", "metadata": {}, "outputs": [], "source": [ "def get_url(folder, file, base_folder = 'CBN'):\n", " minio = 'https://minio.carlboettiger.info/'\n", " bucket = 'public-ca30x30'\n", " if base_folder is None:\n", " path = os.path.join(bucket,folder,file)\n", " else:\n", " path = os.path.join(bucket,base_folder,folder,file)\n", " url = minio+path\n", " return url\n", "\n", "\n", "# usage: t.mutate(geom_valid = ST_MakeValid(t.geom))\n", "@ibis.udf.scalar.builtin\n", "def ST_MakeValid(geom) -> dt.geometry:\n", " ..." ] }, { "cell_type": "markdown", "id": "743cbbfd-4da4-47eb-b0f6-b5f713687839", "metadata": {}, "source": [ "#### Variables" ] }, { "cell_type": "code", "execution_count": null, "id": "13214bbe-3a74-4247-981f-5a6eb6c486f5", "metadata": {}, "outputs": [], "source": [ "# CA Nature data \n", "ca_raw_parquet = \"https://data.source.coop/cboettig/ca30x30/ca_areas.parquet\"\n", "# ca_raw_parquet = 'ca_areas.parquet'\n", "\n", "# Boundary of CA, used to computed 'non-conserved' areas\n", "ca_boundary_parquet = get_url('CA_Nature/2024/Preprocessing','ca_boundary.parquet',base_folder = None)\n", "\n", "# newly protected areas \n", "newly_protected = get_url('Progress_data_new_protection/Newly_counted_lands','newly_counted_lands_2024.parquet')\n", "\n", "# Ecoregions\n", "ecoregions = get_url('Ecoregion','ACE_ecoregions.parquet')\n", "\n", "# file to save non-conserved areas; costly operation so we save results \n", "ca_nonconserved_url = get_url('Progress_data_new_protection/Land_Status_Zone_Ecoregion_Counties','all_regions_reGAP_county_eco.parquet')\n", "\n", "# temp file only of CA Nature data + non-conserved areas \n", "ca_base_parquet = \"ca-30x30-base.parquet\"\n", "ca_temp_parquet = \"ca-30x30-temp.parquet\" \n", "\n", "# temp file used to compute metrics w/ data layers \n", "ca_temp_stats_parquet = \"ca-30x30-stats-temp.parquet\" \n", "\n", "#vector data \n", "ACE_rarerank_statewide = get_url('ACE_biodiversity/ACE_rarerank_statewide','ACE_rarerank_statewide.parquet')\n", "ACE_rarerank_ecoregion = get_url('ACE_biodiversity/ACE_rarerank_ecoregion','ACE_rarerank_ecoregion.parquet')\n", "ACE_biorank_statewide = get_url('ACE_biodiversity/ACE_biorank_statewide','ACE_biorank_statewide.parquet')\n", "ACE_biorank_ecoregion = get_url('ACE_biodiversity/ACE_biorank_ecoregion','ACE_biorank_ecoregion.parquet')\n", "\n", "ACE_amph_richness = get_url('ACE_biodiversity/ACE_amphibian_richness','ACE_amphibian_richness.parquet')\n", "ACE_reptile_richness = get_url('ACE_biodiversity/ACE_reptile_richness','ACE_reptile_richness.parquet')\n", "ACE_bird_richness = get_url('ACE_biodiversity/ACE_bird_richness','ACE_bird_richness.parquet')\n", "ACE_mammal_richness = get_url('ACE_biodiversity/ACE_mammal_richness','ACE_mammal_richness.parquet')\n", "ACE_rare_amphibian_richness = get_url('ACE_biodiversity/ACE_rare_amphibian_richness','ACE_rare_amphibian_richness.parquet')\n", "ACE_rare_reptile_richness = get_url('ACE_biodiversity/ACE_rare_reptile_richness','ACE_rare_reptile_richness.parquet')\n", "ACE_rare_bird_richness = get_url('ACE_biodiversity/ACE_rare_bird_richness','ACE_rare_bird_richness.parquet')\n", "ACE_rare_mammal_richness = get_url('ACE_biodiversity/ACE_rare_mammal_richness','ACE_rare_mammal_richness.parquet')\n", "ACE_endemic_amphibian_richness = get_url('ACE_biodiversity/ACE_endemic_amphibian_richness','ACE_endemic_amphibian_richness.parquet')\n", "ACE_endemic_reptile_richness = get_url('ACE_biodiversity/ACE_endemic_reptile_richness','ACE_endemic_reptile_richness.parquet')\n", "ACE_endemic_bird_richness = get_url('ACE_biodiversity/ACE_endemic_bird_richness','ACE_endemic_bird_richness.parquet')\n", "ACE_endemic_mammal_richness = get_url('ACE_biodiversity/ACE_endemic_mammal_richness','ACE_endemic_mammal_richness.parquet')\n", "\n", "wetlands = get_url('Freshwater_resources/Wetlands','CA_wetlands.parquet')\n", "fire = get_url('Climate_risks/Historical_fire_perimeters','calfire_2023.parquet')\n", "farmland = get_url('NBS_agriculture/Farmland','Farmland_2018.parquet')\n", "grazing = get_url('NBS_agriculture/Lands_suitable_grazing','Grazing_land_2018.parquet')\n", "DAC = get_url('Progress_data_new_protection/DAC','DAC_2022.parquet')\n", "low_income = get_url('Progress_data_new_protection/Low_income_communities','low_income_CalEnviroScreen4.parquet')\n", "\n", "# raster data\n", "climate_zones = get_url('Climate_zones', 'climate_zones_10_processed.tif')\n", "# habitat = get_url('Habitat', 'CWHR13_2022_processed.tif')\n", "habitat = get_url('Habitat', 'fveg22_1_processed.tif')\n", "plant_richness = get_url('Biodiversity_unique/Plant_richness', 'species_D_80percentile_processed.tif')\n", "endemic_plant_richness = get_url('Biodiversity_unique/Rarityweighted_endemic_plant_richness', 'endemicspecies_E_80percentile_processed.tif')\n", "resilient_conn_network = get_url('Connectivity_resilience/Resilient_connected_network_allcategories', \n", " 'rcn_wIntactBioCat_caOnly_2020-10-27_processed.tif')\n", "\n", "# final files: conserved + non-conserved areas + data layers \n", "ca_parquet = \"ca-30x30-cbn.parquet\"\n", "ca_geojson = \"ca-30x30-cbn.geojson\"\n", "ca_pmtiles = \"ca-30x30-cbn.pmtiles\" " ] }, { "cell_type": "markdown", "id": "907235f6-48a5-4c55-b779-3bb6839acf2b", "metadata": {}, "source": [ "# Step 1: Cleaning up \"non-conserved\" areas" ] }, { "cell_type": "markdown", "id": "ce52b1e0-027e-4915-9e7b-e51e946560ed", "metadata": {}, "source": [ "#### Non-conserved areas need to match CA Nature schema when merging" ] }, { "cell_type": "code", "execution_count": null, "id": "0f9666d1-7c2b-45af-9399-e4189bba34f5", "metadata": {}, "outputs": [], "source": [ "%%time \n", "# match CA Nature schema \n", "\n", "non_conserved = (con.read_parquet(ca_nonconserved_url)\n", " .filter(_.reGAP == 0)\n", " .rename(county = \"COUNTY_NAM\", ecoregion = \"CA_Ecoregi\",acres = \"Acres\", gap_code = \"reGAP\")\n", " .mutate(id=ibis.row_number().over())\n", " .select( _.id, _.county, _.ecoregion, _.acres,_.geom, _.gap_code)\n", " .mutate(established = ibis.null(), name = ibis.literal(\"Non-Conserved Areas\"),\n", " access_type = ibis.null(), manager = ibis.null(), manager_type = ibis.null(),\n", " easement = ibis.null(), type = ibis.literal(\"Land\"),\n", " status = ibis.literal(\"non-conserved\"),\n", " acres = _.acres.round(4)\n", " )\n", " .cast({\"geom\": \"geometry\", \"established\": \"string\", \"gap_code\": \"int16\", \"status\": \"string\",\"name\": \"string\",\n", " \"access_type\": \"string\", \"manager\": \"string\", \"manager_type\": \"string\",\n", " \"ecoregion\": \"string\", \"easement\": \"string\", \"id\": \"string\", \"type\": \"string\",\n", " \"acres\":\"float64\"}) #match schema to CA Nature\n", " .mutate(geom = ST_MakeValid(_.geom))\n", " .drop_null(['geom'],how = \"any\")\n", " )\n", "\n", "non_conserved.execute().set_crs('epsg:3310').to_parquet('ca_cbn_nonconserved_areas.parquet')\n", "s3.fput_object(\"public-ca30x30\", 'Preprocessing/ca_cbn_nonconserved_areas.parquet', 'ca_cbn_nonconserved_areas.parquet') " ] }, { "cell_type": "markdown", "id": "104254ef-f6e9-4f03-8797-de55091774d5", "metadata": {}, "source": [ "# Step 2: Isolate the \"newly protected\" polygons" ] }, { "cell_type": "code", "execution_count": null, "id": "a3d4f189-1563-4868-9f1f-64d67569df27", "metadata": {}, "outputs": [], "source": [ "# negative buffer to account for overlapping boundaries. \n", "buffer = -30 #30m buffer \n", "\n", "tbl = (\n", " con.read_parquet(ca_raw_parquet)\n", " .cast({\"SHAPE\": \"geometry\"})\n", " .rename(geom = \"SHAPE\")\n", " .filter(_.reGAP < 3) # only gap 1 and 2 count towards 30x30\n", ")\n", "\n", "\n", "# polygons with release_year 2024 are a superset of release_year 2023. \n", "# use anti_join to isolate the objects that are in release_year 2024 but not release_year 2023 (aka newly established). \n", "tbl_2023 = tbl.filter(_.Release_Year == 2023).mutate(geom=_.geom.buffer(buffer)) \n", "tbl_2024 = tbl.filter(_.Release_Year == 2024)\n", "intersects = tbl_2024.anti_join(tbl_2023, _.geom.intersects(tbl_2023.geom))" ] }, { "cell_type": "code", "execution_count": null, "id": "e07a4f0a-e717-4a11-bbce-4e96d7da7293", "metadata": {}, "outputs": [], "source": [ "# buffer = 160 #0.1mile buffer \n", "\n", "# tbl_2024 = (\n", "# con.read_parquet(ca_raw_parquet)\n", "# .cast({\"SHAPE\": \"geometry\"})\n", "# .rename(geom = \"SHAPE\")\n", "# .filter(_.Release_Year == 2024)\n", "# .mutate(geom=_.geom.buffer(buffer)) \n", "# )\n", "\n", "# tbl_new = (\n", "# con.read_parquet(newly_protected)\n", "# )\n", "\n", "# intersects = tbl_new.anti_join(tbl_2024, _.geom.intersects(tbl_2024.geom))" ] }, { "cell_type": "markdown", "id": "1f335433-ff89-4966-bf98-c11a0b233686", "metadata": {}, "source": [ "# Step 3: Join all protected lands + non-conserved areas " ] }, { "cell_type": "code", "execution_count": null, "id": "a59c976b-3c36-40f9-a15b-cefcd155c647", "metadata": {}, "outputs": [], "source": [ "# %%time\n", "new2024 = intersects.select(\"OBJECTID\").mutate(established = ibis.literal(\"2024\")) # saving IDs to join on\n", "\n", "ca_merged = (con\n", " .read_parquet(ca_raw_parquet)\n", " .cast({\"SHAPE\": \"geometry\"})\n", " .mutate(area = _.SHAPE.area())\n", " .filter(_.Release_Year == 2024) # having both 2023 and 2024 is redudant since 2024 is the superset.\n", " .left_join(new2024, \"OBJECTID\") # newly established 2024 polygons \n", " .mutate(established=_.established.fill_null(\"pre-2024\")) \n", " .rename(name = \"cpad_PARK_NAME\", access_type = \"cpad_ACCESS_TYP\", manager = \"cpad_MNG_AGENCY\",\n", " manager_type = \"cpad_MNG_AG_LEV\", id = \"OBJECTID\", type = \"TYPE\", \n", " ecoregion = \"CA_Ecoregion_Name\", acres = \"Acres\", gap_code = \"reGAP\", geom = \"SHAPE\")\n", " .cast({\"gap_code\": \"int16\"})\n", " .cast({\"id\": \"int64\"})\n", " .mutate(manager = _.manager.substitute({\"\": \"Unknown\"})) \n", " .mutate(manager_type = _.manager_type.substitute({\"\": \"Unknown\"}))\n", " .mutate(access_type = _.access_type.substitute({\"\": \"Unknown Access\"}))\n", " .mutate(name = _.name.substitute({\"\": \"Unknown\"}))\n", " .mutate(manager_type = _.manager_type.substitute({\"Home Owners Association\": \"HOA\"}))\n", " .mutate(easement=_.Easement.cast(\"string\").substitute({\"0\": \"False\", \"1\": \"True\"}))\n", " .mutate(status=_.gap_code.cast(\"string\")\n", " .substitute({\"1\": \"30x30-conserved\", \"2\": \"30x30-conserved\", \"3\": \"other-conserved\", \n", " \"4\": \"unknown\"}))\n", " .select(_.established, _.gap_code, _.status, _.name, _.access_type, _.manager, _.manager_type,\n", " _.ecoregion, _.easement, _.acres, _.id, _.type, _.geom)\n", " )\n" ] }, { "cell_type": "markdown", "id": "70fcaef1-2cea-4330-9715-76fa94357921", "metadata": {}, "source": [ "#### Adding county data + non conserved areas\n", "\n", "Non conserved data already has counties, so we join it after we add counties to cpad" ] }, { "cell_type": "code", "execution_count": null, "id": "4d6177e2-8ece-4eb9-acc2-5fb5c5beb8bb", "metadata": {}, "outputs": [], "source": [ "%%time \n", "counties = con.read_parquet('../CA_counties.parquet')\n", "# ca = con.read_parquet(ca_temp_parquet)\n", "\n", "con.create_table(\"counties\", counties.select(\"COUNTY_NAM\",\"geom\"), overwrite = True)\n", "con.create_table(\"ca\", ca_merged, overwrite = True)\n", "\n", "# getting county name(s) for each protected area \n", "con.con.execute('''\n", "CREATE TABLE counties_data AS\n", "SELECT \n", " ca.*, \n", " counties.COUNTY_NAM AS county,\n", " ST_Intersection(ca.geom, counties.geom) AS geom\n", "FROM ca\n", "JOIN counties \n", " ON ST_Intersects(ca.geom, counties.geom)\n", "WHERE NOT ST_IsEmpty(ST_Intersection(ca.geom, counties.geom))\n", " AND ST_GeometryType(ST_Intersection(ca.geom, counties.geom)) IN ('POLYGON', 'MULTIPOLYGON');\n", "''')\n", "\n", "import string\n", "from ibis import window, literal, row_number\n", "\n", "win = window(group_by = \"id\", order_by=\"geom\")\n", "\n", "# add suffix to duplicate ids (caused by separating the areas by county)\n", "def map_idx_to_letter(idx):\n", " case = idx.case()\n", " for i, letter in enumerate(string.ascii_lowercase[:26]):\n", " case = case.when(i, letter)\n", " return case.else_(\"\").end()\n", " \n", "all_data = (\n", " con.table(\"counties_data\")\n", " .drop(\"geom\")\n", " .rename(geom=\"geom_1\")\n", " # # modify the ids for areas that span multiple counties \n", " .mutate(\n", " geom = ST_MakeValid(_.geom),\n", " acres=_.geom.area() / 4046.8564224,\n", " id_count=_.id.count().over(win), # \n", " idx= row_number().over(win) - 1,\n", " )\n", " # e.g. if id = 11 has 2 rows (bc it spans 2 counties), make each row 11a and 11b. \n", " .mutate(\n", " id=_.id.cast(\"string\") + (_.id_count > 1).ifelse(map_idx_to_letter(_.idx.cast(\"int\")), \"\")\n", " )\n", " .drop(\"id_count\", \"idx\")\n", " .drop_null(['geom'],how = \"any\")\n", " .union(non_conserved)\n", " .mutate(acres=_.acres.round(4))\n", ")\n", "\n", "gdf = all_data.execute()\n", "\n", "gdf.set_crs(\"epsg:3310\").to_parquet(ca_base_parquet)\n", "s3.fput_object(\"public-ca30x30\", 'CA_Nature/2024/Preprocessing/'+ca_base_parquet, ca_base_parquet) " ] }, { "cell_type": "markdown", "id": "44d64f2b-a65b-4ac1-9943-2d96f5c91e1d", "metadata": {}, "source": [ "# Step 4: Compute metrics w/ data layers" ] }, { "cell_type": "markdown", "id": "94e924fd-d927-4458-ba1f-670b4047d149", "metadata": {}, "source": [ "#### Raster data" ] }, { "cell_type": "code", "execution_count": null, "id": "cfa88d41-d634-4916-9248-4368aab12117", "metadata": {}, "outputs": [], "source": [ "# getting the habitat name from pixel\n", "import xml.etree.ElementTree as ET\n", "\n", "def get_habitat_type(fieldname):\n", " aux_xml_path = 'fveg22_1_processed.tif.aux.xml'\n", " s3.fget_object('public-ca30x30','CBN/Habitat/'+aux_xml_path, aux_xml_path)\n", " tree = ET.parse(aux_xml_path)\n", " root = tree.find(\".//GDALRasterAttributeTable\")\n", " field_names = [f.find(\"Name\").text for f in root.findall(\"FieldDefn\")]\n", " val_i, name_i = field_names.index(\"Value\"), field_names.index(fieldname)\n", "\n", " return {\n", " int(r.findall(\"F\")[val_i].text): r.findall(\"F\")[name_i].text\n", " for r in root.findall(\"Row\")\n", " }\n", "\n", "habitat_lookup = get_habitat_type(\"WHR13NAME\")" ] }, { "cell_type": "code", "execution_count": null, "id": "856e7760-d5ec-4bc1-ac33-a9ddaccec0c0", "metadata": {}, "outputs": [], "source": [ "# computing overlap \n", "def compute_overlap(stats, name):\n", " return [\n", " round(s[\"count\"] / (s[\"count\"] + s[\"nodata\"]), 4)\n", " if (s[\"count\"] + s[\"nodata\"]) > 0 else 0\n", " for s in stats\n", " ]" ] }, { "cell_type": "code", "execution_count": null, "id": "24fd5a25-2d42-482f-a560-9c57dbb2f093", "metadata": {}, "outputs": [], "source": [ "%%time\n", "from rasterstats import zonal_stats\n", "import numpy as np\n", "import warnings\n", "warnings.filterwarnings(\"ignore\", message=\"Warning: converting a masked element to nan\") #ignoring warning \n", "\n", "rasters = [climate_zones, habitat, plant_richness, endemic_plant_richness, resilient_conn_network]\n", "names = ['climate_zone','habitat_type','plant_richness','rarityweighted_endemic_plant_richness', 'resilient_connected_network']\n", "\n", "gdf_stats = gpd.read_parquet(ca_base_parquet) # read in data if it's not already created \n", "\n", "for file,name in zip(rasters,names):\n", " if name in ['climate_zone','habitat_type','resilient_connected_network']:\n", " metric = \"majority\"\n", " else: \n", " metric = [\"count\", \"nodata\"]\n", " raster_stats = zonal_stats(ca_base_parquet, file, stats = metric)\n", " if name in ['plant_richness','rarityweighted_endemic_plant_richness']:\n", " values = compute_overlap(raster_stats, name)\n", " else:\n", " values = [d[metric] for d in raster_stats]\n", " gdf_stats[name] = values\n", "\n", "# getting the habitat name from the pixel\n", "gdf_stats['habitat_type'] = gdf_stats['habitat_type'].map(habitat_lookup)\n", "gdf_stats.to_parquet(ca_temp_stats_parquet) \n" ] }, { "cell_type": "markdown", "id": "f45a0f52-6d18-45b4-8585-af3f1190b000", "metadata": {}, "source": [ "#### Vector data" ] }, { "cell_type": "code", "execution_count": null, "id": "fdeeb7ac-efa0-4a7b-9143-72d8ec911809", "metadata": {}, "outputs": [], "source": [ "def vector_vector_stats(base, data_layer):\n", " t1 = con.read_parquet(base).select(_.id, _.geom)\n", " t2 = con.read_parquet(data_layer).select(_.geom)\n", "\n", " expr = (t1\n", " .left_join(t2, t1.geom.intersects(t2.geom))\n", " .group_by(t1.id, t1.geom)\n", " .agg(overlap_fraction = (t1.geom.intersection(t2.geom).area() / t1.geom.area()) \n", " .sum().coalesce(0).round(3) ) # overlap \n", " )\n", " ibis.to_sql(expr)\n", " stats = expr.execute()\n", " return stats[['id','overlap_fraction']]" ] }, { "cell_type": "code", "execution_count": null, "id": "b110da15-d2ac-4457-9241-f02f44dc436a", "metadata": {}, "outputs": [], "source": [ "%%time\n", "\n", "## this takes ~24 hours\n", "\n", "names = ['ACE_rarerank_statewide', 'ACE_rarerank_ecoregion',\n", " 'ACE_biorank_statewide', 'ACE_biorank_ecoregion',\n", " 'ACE_amphibian_richness','ACE_reptile_richness',\n", " 'ACE_bird_richness','ACE_mammal_richness',\n", " 'ACE_rare_amphibian_richness','ACE_rare_reptile_richness',\n", " 'ACE_rare_bird_richness','ACE_rare_mammal_richness',\n", " 'ACE_endemic_amphibian_richness','ACE_endemic_reptile_richness',\n", " 'ACE_endemic_bird_richness','ACE_endemic_mammal_richness',\n", " 'wetlands','fire','farmland','grazing','DAC','low_income']\n", "\n", "vectors = [ACE_rarerank_statewide, ACE_rarerank_ecoregion,\n", " ACE_biorank_statewide, ACE_biorank_ecoregion,\n", " ACE_amph_richness, ACE_reptile_richness,\n", " ACE_bird_richness, ACE_mammal_richness,\n", " ACE_rare_amphibian_richness, ACE_rare_reptile_richness,\n", " ACE_rare_bird_richness, ACE_rare_mammal_richness,\n", " ACE_endemic_amphibian_richness,\n", " ACE_endemic_reptile_richness,\n", " ACE_endemic_bird_richness,\n", " ACE_endemic_mammal_richness,\n", " wetlands, fire,\n", " farmland, grazing,\n", " DAC, low_income]\n", "\n", "\n", "gdf_stats = gpd.read_parquet(ca_temp_stats_parquet) \n", "\n", " # set the index to the col we are joining on for gpd.join()\n", "gdf_stats = gdf_stats.set_index('id')\n", "\n", "for file,name in zip(vectors,names):\n", " vector_stats = vector_vector_stats(ca_base_parquet, file) \n", " vector_stats = vector_stats.rename(columns ={'overlap_fraction':name}) \n", "\n", " # joining new zonal stats column with CA Nature data. \n", " gdf_stats = gdf_stats.join(vector_stats.set_index('id'))\n", " # gdf_stats.to_parquet(name+'_v2.parquet') #save CA Nature + zonal stats \n", "\n", "gdf_stats = gdf_stats.reset_index()\n", "gdf_stats = gdf_stats.to_crs(\"epsg:4326\") # to make pmtiles, we need to switch to epsg:4326\n", "gdf_stats.to_parquet(ca_parquet)\n" ] }, { "cell_type": "markdown", "id": "ec619f4e-1338-492a-a334-a7796f4f55a1", "metadata": {}, "source": [ "# Step 5: Upload file + Generate PMTiles" ] }, { "cell_type": "code", "execution_count": null, "id": "30f47b26-cd18-4e8c-a19b-9d1f19b10873", "metadata": {}, "outputs": [], "source": [ "# upload parquet to minio \n", "s3_cp(ca_parquet, \"s3://public-ca30x30/\"+ca_parquet, \"minio\")\n", "\n", "#to use PMTiles, need to convert to geojson\n", "ca_geo = (con\n", " .read_parquet(ca_parquet)\n", " )\n", "\n", "#can't go directly from parquet -> pmtiles, need to go parquet -> geojson -> pmtiles \n", "ca_geo.execute().to_file(ca_geojson) \n", "pmtiles = to_pmtiles(ca_geojson, ca_pmtiles, options = ['--extend-zooms-if-still-dropping'])\n", "\n", "# upload pmtiles to minio\n", "s3_cp(ca_pmtiles, \"s3://public-ca30x30/\"+ca_pmtiles, \"minio\")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.9" } }, "nbformat": 4, "nbformat_minor": 5 }