Spaces:

boettiger-lab
/

ca-30x30-cbn

Running

App Files Files Community

cassiebuhler commited on Apr 20

Commit

2db4717

1 Parent(s): e5599e8

added newly protected areas!

Browse files

Files changed (4) hide show

app/utils.py +2 -3
app/variables.py +59 -33
preprocess/CBN-data.ipynb +92 -16
preprocess/h3_utils.py +25 -9

app/utils.py CHANGED Viewed

@@ -91,6 +91,7 @@ def get_summary(ca, combined_filter, column, main_group, colors = None):
         "percent_CA": (_.acres.sum() / ca_area_acres),
         "acres": _.acres.sum(),
         }
     # add percent + acres aggregates
     dynamic_aggs = {}
     for key in keys:
@@ -107,7 +108,7 @@ def get_summary(ca, combined_filter, column, main_group, colors = None):
           .aggregate(**all_aggs)
           .mutate(percent_CA=_.percent_CA.round(5), acres=_.acres.round(0))
         )
     # Compute total acres by group and percent of group
     group_totals = (ca.filter(combined_filter)
                       .group_by(main_group)
@@ -177,7 +178,6 @@ def get_pmtiles_style(paint, alpha, filter_cols, filter_vals):
     if "non-conserved" in chain.from_iterable(filter_vals):
         combined_filters = ["any", combined_filters, ["match", ["get", "status"], ["non-conserved"], True, False]]
-    source_layer_name = re.sub(r'\W+', '', os.path.splitext(os.path.basename(ca_pmtiles))[0]) #stripping hyphens to get layer name
     return {
         "version": 8,
         "sources": {"ca": {"type": "vector", "url": f"pmtiles://{ca_pmtiles}"}},
@@ -198,7 +198,6 @@ def get_pmtiles_style_llm(paint, ids):
     """
     Generates a MapLibre GL style for PMTiles using specific IDs as filters.
     """
-    source_layer_name = re.sub(r'\W+', '', os.path.splitext(os.path.basename(ca_pmtiles))[0]) #stripping hyphens to get layer name
     return {
         "version": 8,
         "sources": {"ca": {"type": "vector", "url": f"pmtiles://{ca_pmtiles}"}},

         "percent_CA": (_.acres.sum() / ca_area_acres),
         "acres": _.acres.sum(),
         }
     # add percent + acres aggregates
     dynamic_aggs = {}
     for key in keys:
           .aggregate(**all_aggs)
           .mutate(percent_CA=_.percent_CA.round(5), acres=_.acres.round(0))
         )
     # Compute total acres by group and percent of group
     group_totals = (ca.filter(combined_filter)
                       .group_by(main_group)
     if "non-conserved" in chain.from_iterable(filter_vals):
         combined_filters = ["any", combined_filters, ["match", ["get", "status"], ["non-conserved"], True, False]]
     return {
         "version": 8,
         "sources": {"ca": {"type": "vector", "url": f"pmtiles://{ca_pmtiles}"}},
     """
     Generates a MapLibre GL style for PMTiles using specific IDs as filters.
     """
     return {
         "version": 8,
         "sources": {"ca": {"type": "vector", "url": f"pmtiles://{ca_pmtiles}"}},

app/variables.py CHANGED Viewed

@@ -1,13 +1,7 @@
 # urls for main layer
-# ca_parquet = "https://huggingface.co/datasets/boettiger-lab/ca-30x30/resolve/da85dd9ca1c774d4ddf821555e3c3c9e13c9b857/ca-30x30.parquet"
-# ca_pmtiles = "https://huggingface.co/datasets/boettiger-lab/ca-30x30/resolve/896db6c9a37488ee7c53ee56df67b3ccfd44d150/ca-30x30.pmtiles"
-ca_parquet = 'https://minio.carlboettiger.info/public-ca30x30/ca-30x30-cbn.parquet'
-ca_pmtiles = 'https://minio.carlboettiger.info/public-ca30x30/ca-30x30-cbn.pmtiles'
-# ca_parquet = 'https://minio.carlboettiger.info/public-ca30x30/hex/zoom8/ca-30x30-cbn-newlyprotected.parquet'
-# ca_pmtiles = 'https://minio.carlboettiger.info/public-ca30x30/ca-30x30-cbn-newlyprotected.pmtiles'
 # computed by taking the sum of all the acres in this file:
 # https://minio.carlboettiger.info/public-ca30x30/CBN-data/Progress_data_new_protection/Land_Status_Zone_Ecoregion_Counties/all_regions_reGAP_county_eco.parquet
 ca_area_acres = 101523750.68856516
@@ -29,7 +23,10 @@ def get_url(folder, file, base_folder = 'CBN'):
         path = os.path.join(bucket,base_folder,folder,file)
     url = minio+path
     return url
 #vector data
 url_ACE_rarerank_statewide = get_url('ACE_biodiversity/ACE_rarerank_statewide','ACE_rarerank_statewide.pmtiles')
 url_ACE_rarerank_ecoregion = get_url('ACE_biodiversity/ACE_rarerank_ecoregion','ACE_rarerank_ecoregion.pmtiles')
@@ -66,6 +63,7 @@ url_resilient_conn_network = get_url('Connectivity_resilience/Resilient_connecte
 # column names for all data layers
 keys = [
     "ACE_amphibian_richness", "ACE_reptile_richness", "ACE_bird_richness",
     "ACE_mammal_richness", "ACE_rare_amphibian_richness", "ACE_rare_reptile_richness",
     "ACE_rare_bird_richness", "ACE_rare_mammal_richness", "ACE_endemic_amphibian_richness",
@@ -78,43 +76,48 @@ chatbot_toggles = {key: False for key in keys}
 # data layers dict
 layer_config = [
     #[(section, 'a_amph', [(col_name, full name, key, chatbot toggle key)])]
     ('🐸 Amphibian', 'a_amph', [
-        ('amphibian_richness', 'Amphibian Richness', keys[0], chatbot_toggles[keys[0]]),
-        ('rare_amphibian_richness', 'Rare Amphibian Richness', keys[1], chatbot_toggles[keys[1]]),
-        ('endemic_amphibian_richness', 'Endemic Amphibian Richness', keys[2], chatbot_toggles[keys[2]]),
     ]),
     ('🐍 Reptile', 'a_rept', [
-        ('reptile_richness', 'Reptile Richness', keys[3], chatbot_toggles[keys[3]]),
-        ('rare_reptile_richness', 'Rare Reptile Richness', keys[4], chatbot_toggles[keys[4]]),
-        ('endemic_reptile_richness', 'Endemic Reptile Richness', keys[5], chatbot_toggles[keys[5]]),
     ]),
     ('🦜 Bird', 'a_bird', [
-        ('bird_richness', 'Bird Richness', keys[6], chatbot_toggles[keys[6]]),
-        ('rare_bird_richness', 'Rare Bird Richness', keys[7], chatbot_toggles[keys[7]]),
-        ('endemic_bird_richness', 'Endemic Bird Richness', keys[8], chatbot_toggles[keys[8]]),
     ]),
     ('🦌 Mammal', 'a_mammal', [
-        ('mammal_richness', 'Mammal Richness', keys[9], chatbot_toggles[keys[9]]),
-        ('rare_mammal_richness', 'Rare Mammal Richness', keys[10], chatbot_toggles[keys[10]]),
-        ('endemic_mammal_richness', 'Endemic Mammal Richness', keys[11], chatbot_toggles[keys[11]]),
     ]),
     ('🌿 Plant', 'a_plant', [
-        ('plant_richness', 'Plant Richness', keys[12], chatbot_toggles[keys[12]]),
-        ('rarityweighted_endemic_plant_richness', 'Rarity-Weighted\nEndemic Plant Richness', keys[13], chatbot_toggles[keys[13]]),
     ]),
     ('💧 Freshwater Resources', 'freshwater', [
-        ('wetlands', 'Wetlands', keys[14], chatbot_toggles[keys[14]]),
     ]),
     ('🚜 Agriculture', 'agriculture', [
-        ('farmland', 'Farmland', keys[15], chatbot_toggles[keys[15]]),
-        ('grazing', 'Lands Suitable for Grazing', keys[16], chatbot_toggles[keys[16]]),
     ]),
     ('👤 People', 'SVI', [
-        ('DAC', 'Disadvantaged Communities', keys[17], chatbot_toggles[keys[17]]),
-        ('low_income', 'Low-Income Communities', keys[18], chatbot_toggles[keys[18]]),
     ]),
     ('🔥 Climate Risks', 'calfire', [
-        ('fire', 'Historical Fire Perimeters', keys[19], chatbot_toggles[keys[19]]),
     ])
 ]
@@ -135,8 +138,8 @@ county_color = "#DE3163" # magenta
 city_color = "#ADD8E6" #light blue
 hoa_color = "#A89BBC" # purple
 nonprofit_color =  "#D77031" #orange
-justice40_color =  "#00008B" #purple
-svi_color = "#1bc7c3" #cyan
 white =  "#FFFFFF"
@@ -406,6 +409,30 @@ networks = {
     'default': white
 }
 style_options = {
     "30x30 Status": status,
     "GAP Code": gap,
@@ -419,7 +446,6 @@ style_options = {
     "Access Type": access,
 }
-print(style_options)
 select_column = {
     "30x30 Status":  "status",
     "GAP Code": "gap_code",

 # urls for main layer
+ca_parquet = 'https://minio.carlboettiger.info/public-ca30x30/ca30x30cbn_newlyprotected.parquet'
+ca_pmtiles = 'https://minio.carlboettiger.info/public-ca30x30/ca30x30cbn_newlyprotected.pmtiles'
+#
 # computed by taking the sum of all the acres in this file:
 # https://minio.carlboettiger.info/public-ca30x30/CBN-data/Progress_data_new_protection/Land_Status_Zone_Ecoregion_Counties/all_regions_reGAP_county_eco.parquet
 ca_area_acres = 101523750.68856516
         path = os.path.join(bucket,base_folder,folder,file)
     url = minio+path
     return url
+import re
+source_layer_name = re.sub(r'\W+', '', os.path.splitext(os.path.basename(ca_pmtiles))[0]) #stripping hyphens to get layer name
 #vector data
 url_ACE_rarerank_statewide = get_url('ACE_biodiversity/ACE_rarerank_statewide','ACE_rarerank_statewide.pmtiles')
 url_ACE_rarerank_ecoregion = get_url('ACE_biodiversity/ACE_rarerank_ecoregion','ACE_rarerank_ecoregion.pmtiles')
 # column names for all data layers
 keys = [
+    "update_newly_protected", "update_increased_management", "update_data_improvement",
     "ACE_amphibian_richness", "ACE_reptile_richness", "ACE_bird_richness",
     "ACE_mammal_richness", "ACE_rare_amphibian_richness", "ACE_rare_reptile_richness",
     "ACE_rare_bird_richness", "ACE_rare_mammal_richness", "ACE_endemic_amphibian_richness",
 # data layers dict
 layer_config = [
     #[(section, 'a_amph', [(col_name, full name, key, chatbot toggle key)])]
+    ('📈 Data Updates', 'a_new', [
+        ('update_newly_protected', 'Newly Protected', keys[0], chatbot_toggles[keys[0]]),
+        ('update_increased_management', 'Increased Management', keys[1], chatbot_toggles[keys[1]]),
+        ('update_data_improvement', 'Data Improvement', keys[2], chatbot_toggles[keys[2]]),
+    ]),
     ('🐸 Amphibian', 'a_amph', [
+        ('amphibian_richness', 'Amphibian Richness', keys[3], chatbot_toggles[keys[3]]),
+        ('rare_amphibian_richness', 'Rare Amphibian Richness', keys[4], chatbot_toggles[keys[4]]),
+        ('endemic_amphibian_richness', 'Endemic Amphibian Richness', keys[5], chatbot_toggles[keys[5]]),
     ]),
     ('🐍 Reptile', 'a_rept', [
+        ('reptile_richness', 'Reptile Richness', keys[6], chatbot_toggles[keys[6]]),
+        ('rare_reptile_richness', 'Rare Reptile Richness', keys[7], chatbot_toggles[keys[7]]),
+        ('endemic_reptile_richness', 'Endemic Reptile Richness', keys[8], chatbot_toggles[keys[8]]),
     ]),
     ('🦜 Bird', 'a_bird', [
+        ('bird_richness', 'Bird Richness', keys[9], chatbot_toggles[keys[9]]),
+        ('rare_bird_richness', 'Rare Bird Richness', keys[10], chatbot_toggles[keys[10]]),
+        ('endemic_bird_richness', 'Endemic Bird Richness', keys[11], chatbot_toggles[keys[11]]),
     ]),
     ('🦌 Mammal', 'a_mammal', [
+        ('mammal_richness', 'Mammal Richness', keys[12], chatbot_toggles[keys[12]]),
+        ('rare_mammal_richness', 'Rare Mammal Richness', keys[13], chatbot_toggles[keys[13]]),
+        ('endemic_mammal_richness', 'Endemic Mammal Richness', keys[14], chatbot_toggles[keys[14]]),
     ]),
     ('🌿 Plant', 'a_plant', [
+        ('plant_richness', 'Plant Richness', keys[15], chatbot_toggles[keys[15]]),
+        ('rarityweighted_endemic_plant_richness', 'Rarity-Weighted\nEndemic Plant Richness', keys[16], chatbot_toggles[keys[16]]),
     ]),
     ('💧 Freshwater Resources', 'freshwater', [
+        ('wetlands', 'Wetlands', keys[17], chatbot_toggles[keys[17]]),
     ]),
     ('🚜 Agriculture', 'agriculture', [
+        ('farmland', 'Farmland', keys[18], chatbot_toggles[keys[18]]),
+        ('grazing', 'Lands Suitable for Grazing', keys[19], chatbot_toggles[keys[19]]),
     ]),
     ('👤 People', 'SVI', [
+        ('DAC', 'Disadvantaged Communities', keys[20], chatbot_toggles[keys[20]]),
+        ('low_income', 'Low-Income Communities', keys[21], chatbot_toggles[keys[21]]),
     ]),
     ('🔥 Climate Risks', 'calfire', [
+        ('fire', 'Historical Fire Perimeters', keys[22], chatbot_toggles[keys[22]]),
     ])
 ]
 city_color = "#ADD8E6" #light blue
 hoa_color = "#A89BBC" # purple
 nonprofit_color =  "#D77031" #orange
+purple =  "#00008B" #purple
+cyan = "#1bc7c3" #cyan
 white =  "#FFFFFF"
     'default': white
 }
+update_type_style = {
+        "version": 8,
+        "sources": {"ca": {"type": "vector", "url": f"pmtiles://{ca_pmtiles}"}},
+        "layers": [
+            {
+                "id": "ca30x30",
+                "source": "ca",
+                "source-layer": source_layer_name,
+                "type": "fill",
+                "paint": {
+                    "fill-color": [
+                        "interpolate", ["linear"], ["get", "update_newly_protected"],
+                        0, white,
+                        1, purple
+                    ]
+                }
+            }
+        ]
+    }
 style_options = {
     "30x30 Status": status,
     "GAP Code": gap,
     "Access Type": access,
 }
 select_column = {
     "30x30 Status":  "status",
     "GAP Code": "gap_code",

preprocess/CBN-data.ipynb CHANGED Viewed

@@ -22,7 +22,6 @@
     "\n",
     "import os\n",
     "os.chdir('../data/')\n",
-    "\n",
     "duckdb_install_h3()"
    ]
   },
@@ -753,7 +752,8 @@
     "         'CA_Marine_','Release_Ye','ORIG_FID',\n",
     "         'updatetype']\n",
     "\n",
-    "convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols = cols, zoom = 8)"
    ]
   },
   {
@@ -959,16 +959,18 @@
     "# convert_pmtiles(con, s3, folder = folder, file = f\"{name}.parquet\")\n",
     "# process_vector(s3, folder = folder, file = f\"{name}.shp\", crs=\"EPSG:4326\")\n",
     "\n",
-    "convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols= cols, group = 'ecoregion', zoom = 8)\n",
     "\n"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "980c7e88-8dc6-4bc6-bfa4-8ea301c6ee80",
    "metadata": {},
    "source": [
-    "#### join with newly protected lands"
    ]
   },
   {
@@ -980,31 +982,105 @@
    "source": [
     "con = ibis.duckdb.connect('joined',extensions = [\"spatial\", \"h3\"])\n",
     "set_secrets(con)\n",
     "\n",
-    "ca_nature_url = \"s3://public-ca30x30/hex/zoom8/ca-30x30-cbn.parquet\"\n",
-    "new_lands_url = \"s3://public-ca30x30/CBN/Progress_data_new_protection/Newly_counted_lands/hex/zoom8/newly_counted_lands_2024.parquet\"\n",
     "\n",
     "ca_nature = (con.read_parquet(ca_nature_url)\n",
-    "             .mutate(update_type = None)\n",
     "            )\n",
     "\n",
     "new = (con.read_parquet(new_lands_url)\n",
     "       .mutate(update_type = 'updatetype')\n",
-    "       .select(\"update_type\",\"h8\")\n",
     "      )\n",
     "\n",
-    "joined = (ca_nature.left_join(new,\"h8\")\n",
-    "     .drop('h8_right','update_type')\n",
     "    .rename(update_type = 'update_type_right')\n",
     "    )\n",
     "\n",
-    "name = 'ca-30x30-cbn-newlyprotected'\n",
     "\n",
-    "joined.to_parquet(f\"{name}.parquet\")\n",
-    "joined.to_parquet(f\"s3://public-ca30x30/hex/zoom8/{name}.parquet\")\n",
     "\n",
-    "#maybe get pmtiles?\n",
-    "convert_pmtiles(con, s3, folder = None, file = f\"{name}.parquet\", base_folder = None, current_crs = 'epsg:4326')"
    ]
   }
  ],

     "\n",
     "import os\n",
     "os.chdir('../data/')\n",
     "duckdb_install_h3()"
    ]
   },
     "         'CA_Marine_','Release_Ye','ORIG_FID',\n",
     "         'updatetype']\n",
     "\n",
+    "convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", \n",
+    "           cols = cols, zoom = 12)"
    ]
   },
   {
     "# convert_pmtiles(con, s3, folder = folder, file = f\"{name}.parquet\")\n",
     "# process_vector(s3, folder = folder, file = f\"{name}.shp\", crs=\"EPSG:4326\")\n",
     "\n",
+    "convert_h3(con, s3, folder = folder, file = f\"{name}.parquet\", cols= cols, \n",
+    "           group = 'ecoregion', zoom = 12)\n",
     "\n"
    ]
   },
   {
    "cell_type": "markdown",
+   "id": "e6b708e4-671a-4ffd-9102-6aebb21fcf84",
    "metadata": {},
    "source": [
+    "#### join with newly protected data\n",
+    "- Needed to get zoom 12 hexes to join newly protected data with CA Nature data\n"
    ]
   },
   {
    "source": [
     "con = ibis.duckdb.connect('joined',extensions = [\"spatial\", \"h3\"])\n",
     "set_secrets(con)\n",
+    "zoom = 12\n",
     "\n",
+    "ca_nature_url = f\"s3://public-ca30x30/hex/zoom{zoom}/ca-30x30-cbn.parquet\"\n",
+    "new_lands_url = f\"s3://public-ca30x30/CBN/Progress_data_new_protection/Newly_counted_lands/hex/zoom{zoom}/newly_counted_lands_2024.parquet\"\n",
     "\n",
     "ca_nature = (con.read_parquet(ca_nature_url)\n",
+    "             .mutate(update_type = ibis.literal('d - no update')\n",
+    "            )\n",
     "            )\n",
     "\n",
     "new = (con.read_parquet(new_lands_url)\n",
     "       .mutate(update_type = 'updatetype')\n",
+    "       .select(\"update_type\",\"h12\")\n",
     "      )\n",
     "\n",
+    "joined = (ca_nature.left_join(new,\"h12\")\n",
+    "     .drop('h12_right','update_type')\n",
     "    .rename(update_type = 'update_type_right')\n",
     "    )\n",
     "\n",
+    "name = 'ca30x30cbn_newlyprotected_'\n",
+    "# joined.to_parquet(f\"s3://public-ca30x30/hex/zoom{zoom}/{name}.parquet\")\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3a0accf9-500d-4430-ba45-8d0a4ad2e43e",
+   "metadata": {},
+   "source": [
+    "Once joined, we can group by \"ids\" again and lose the hexes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6832e4bc-a359-4674-bad3-13052566176d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "original_geoms = con.read_parquet(f\"s3://public-ca30x30/ca-30x30-cbn.parquet\")\n",
+    "\n",
+    "new = (\n",
+    "    con.read_parquet(f\"s3://public-ca30x30/hex/zoom12/ca30x30cbn_newlyprotected_*\")\n",
+    "    .drop('acres')\n",
+    "    .mutate(update_type = _.update_type.substitute(\n",
+    "        {'a - newly protected':'update_newly_protected',\n",
+    "        'b - increased management':'update_increased_management',\n",
+    "        'c - data improvement':'update_data_improvement'\n",
+    "        }))\n",
+    "    .mutate(update_type = _.update_type.fill_null('update_none')\n",
+    "           )\n",
+    ")\n",
+    "\n",
+    "# aggregate data\n",
+    "pivot = (\n",
+    "    new.pivot_wider(id_cols='id', names_from='update_type', values_from='id', values_agg='count')\n",
+    "    .mutate(total = _.update_newly_protected+ _.update_increased_management+ _.update_data_improvement + _.update_none)\n",
+    "    .mutate(update_newly_protected = (_.update_newly_protected/_.total).round(4),\n",
+    "            update_increased_management = (_.update_increased_management/_.total).round(4),\n",
+    "            update_data_improvement =( _.update_data_improvement/_.total).round(4),\n",
+    "            update_none = (_.update_none/_.total).round(4)\n",
+    "           )\n",
+    "    .drop(_.total)\n",
+    "    .left_join(original_geoms,'id')\n",
+    "    .drop('id_right')\n",
+    ")\n",
+    "\n",
+    "pivot.to_parquet(f\"s3://public-ca30x30/ca30x30cbn_newlyprotected.parquet\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e348fc27-bb5e-4fdc-b694-9b499ba71d9d",
+   "metadata": {},
+   "source": [
+    "Making PMTiles"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9b82a499-8431-4e64-ab97-5062e0f98969",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# upload parquet to minio \n",
+    "ca_geojson = \"ca30x30cbn_newlyprotected.geojson\"\n",
+    "ca_pmtiles = \"ca30x30cbn_newlyprotected.pmtiles\"\n",
+    "\n",
+    "url = f\"s3://public-ca30x30/ca30x30cbn_newlyprotected.parquet\"\n",
+    "#to use PMTiles, need to convert to geojson\n",
+    "ca_geo = con.read_parquet(url)\n",
     "\n",
+    "#can't go directly from parquet -> pmtiles, need to go parquet -> geojson -> pmtiles \n",
+    "ca_geo.execute().to_file(ca_geojson) \n",
+    "pmtiles = to_pmtiles(ca_geojson, ca_pmtiles, options = ['--extend-zooms-if-still-dropping'])\n",
     "\n",
+    "# upload pmtiles to minio\n",
+    "s3_cp(ca_pmtiles, \"s3://public-ca30x30/\"+ca_pmtiles, \"minio\")"
    ]
   }
  ],

preprocess/h3_utils.py CHANGED Viewed

@@ -44,6 +44,7 @@ def h3_from_geom(con, name, cols, save_path, zoom):
         FROM {name}
     )
     ''')
     con.sql(f'''
         SELECT {cols}, UNNEST(h{zoom}) AS h{zoom},
         ST_GeomFromText(h3_cell_to_boundary_wkt(UNNEST(h{zoom}))) AS geom
@@ -52,14 +53,29 @@ def h3_from_geom(con, name, cols, save_path, zoom):
 def compute_grouped(con, name, cols, zoom, group, path):
-    unique_groups = con.table(name).select(group).distinct().execute()[group].tolist()
     # separate data by group
-    for sub in unique_groups:
         sub_name = f"{name}_{re.sub(r'\W+', '_', sub)}"
-        con.raw_sql(f"""
-            CREATE OR REPLACE TEMP TABLE {sub_name} AS
-            SELECT * FROM {name} WHERE {group} = '{sub}'
-        """)
-        save_path = f"s3://{path}/hex/zoom{zoom}/group_{group}/{sub.replace(' ', '')}.parquet"
-        h3_from_geom(con, sub_name, cols, save_path, zoom)

         FROM {name}
     )
     ''')
     con.sql(f'''
         SELECT {cols}, UNNEST(h{zoom}) AS h{zoom},
         ST_GeomFromText(h3_cell_to_boundary_wkt(UNNEST(h{zoom}))) AS geom
 def compute_grouped(con, name, cols, zoom, group, path):
+    groups = con.table(name).select(group).distinct().execute()[group].tolist()
+    chunk_size = 500
     # separate data by group
+    for sub in groups:
         sub_name = f"{name}_{re.sub(r'\W+', '_', sub)}"
+        offset = 0
+        i = 0
+        # chunk data within groups
+        while True:
+            print(f'Processing group {sub_name} chunk {i} offset {offset}')
+            chunk_name = f"{sub_name}_chunk{i}"
+            con.raw_sql(f"""
+                CREATE OR REPLACE TEMP TABLE {chunk_name} AS
+                SELECT * FROM {name}
+                WHERE {group} = '{sub}'
+                LIMIT {chunk_size}
+                OFFSET {offset}
+            """)
+            if con.sql(f"SELECT 1 FROM {chunk_name} LIMIT 1").execute().empty:
+                break
+            save_path = f"s3://{path}/hex/zoom{zoom}/group_{group}/{sub_name}_chunk{i}.parquet"
+            h3_from_geom(con, chunk_name, cols, save_path, zoom)
+            offset += chunk_size
+            i += 1