{ "cells": [ { "cell_type": "markdown", "id": "96bbe4c4-a600-437b-b096-dfb4ba2cc8fe", "metadata": {}, "source": [ "# Split geometries into habitat types and climate zones \n", "To assign each feature a habitat type and climate zone, we split up protected areas that span multiple" ] }, { "cell_type": "code", "execution_count": null, "id": "d7150257-5f70-4419-a8fb-63bb12dd0963", "metadata": {}, "outputs": [], "source": [ "from split import * \n", "import os\n", "import sys\n", "base_dir = os.path.abspath(os.path.join(os.getcwd(), '..'))\n", "if base_dir not in sys.path:\n", " sys.path.insert(0, base_dir)\n", " \n", "from minio_utils import * \n", "con, _ = connect_minio()" ] }, { "cell_type": "code", "execution_count": null, "id": "3b3c5bb2-86d1-419d-8d3d-0a99fe18f442", "metadata": {}, "outputs": [], "source": [ "%%time\n", "# run for all ecoregions + gap codes. \n", "# If you don't process the data in subsets (gap codes and ecoregions), it'll take a few days and you'll need 64GB+ of memory \n", "eco = get_ecoregion(10)\n", "label = 'gap2'\n", "print(label)\n", "print(eco)\n", "\n", "url = f's3://public-ca30x30/CA_Nature/2024/Preprocessing/v3/subsets/base/{label}/{eco}_epsg3310.parquet'\n", "result = split_layer(url, con)\n", "save_url = f's3://public-ca30x30/CA_Nature/2024/Preprocessing/v3/subsets/split_habitat_climate/{label}/{label}_{eco}_habitat_climate.parquet'\n", "result.to_parquet(save_url)\n", "check_results(con, url,save_url)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.10" } }, "nbformat": 4, "nbformat_minor": 5 }