ca-30x30-cbn / preprocess /h3_utils.py
cassiebuhler's picture
added newly protected areas!
2db4717
from utils import *
import re
def convert_h3(con, s3, folder, file, cols, zoom, group = None, base_folder = "CBN/"):
"""
Driver function to convert geometries to h3.
"""
cols = ", ".join(cols) if isinstance(cols, list) else cols
if folder:
bucket, path = info(folder, file, base_folder)
else:
bucket, path = info(None, file, None)
path, file = os.path.split(path)
name, ext = os.path.splitext(file)
print(f"Processing: {name}")
t_name = name.replace('-', '')
if group:
con.read_parquet(f"s3://{bucket}/{name}.parquet", table_name=t_name)
print(f'Computing zoom level {zoom}, grouping the data based on {group}')
compute_grouped(con, t_name, cols, zoom, group, path = f"{bucket}/{path}")
(con.read_parquet(f"s3://{bucket}/hex/zoom{zoom}/group_{group}/**")
.to_parquet(f"s3://{bucket}/hex/zoom{zoom}/{name}.parquet")
)
else:
con.read_parquet(f"s3://{bucket}/{path}/{file}", table_name=t_name)
print(f'Computing zoom level {zoom} without grouping.')
save_path = f"s3://{bucket}/{path}/hex/zoom{zoom}/{name}.parquet"
h3_from_geom(con, t_name, cols, save_path, zoom)
def h3_from_geom(con, name, cols, save_path, zoom):
"""
Computes hexes directly from geometry.
"""
con.raw_sql(f'''
CREATE OR REPLACE TEMP TABLE t2 AS
SELECT {cols},
h3_polygon_wkt_to_cells_string(ST_Force2D(dump.geom), {zoom}) AS h{zoom}
FROM (
SELECT {cols}, UNNEST(ST_Dump(geom)) AS dump
FROM {name}
)
''')
con.sql(f'''
SELECT {cols}, UNNEST(h{zoom}) AS h{zoom},
ST_GeomFromText(h3_cell_to_boundary_wkt(UNNEST(h{zoom}))) AS geom
FROM t2
''').to_parquet(save_path)
def compute_grouped(con, name, cols, zoom, group, path):
groups = con.table(name).select(group).distinct().execute()[group].tolist()
chunk_size = 500
# separate data by group
for sub in groups:
sub_name = f"{name}_{re.sub(r'\W+', '_', sub)}"
offset = 0
i = 0
# chunk data within groups
while True:
print(f'Processing group {sub_name} chunk {i} offset {offset}')
chunk_name = f"{sub_name}_chunk{i}"
con.raw_sql(f"""
CREATE OR REPLACE TEMP TABLE {chunk_name} AS
SELECT * FROM {name}
WHERE {group} = '{sub}'
LIMIT {chunk_size}
OFFSET {offset}
""")
if con.sql(f"SELECT 1 FROM {chunk_name} LIMIT 1").execute().empty:
break
save_path = f"s3://{path}/hex/zoom{zoom}/group_{group}/{sub_name}_chunk{i}.parquet"
h3_from_geom(con, chunk_name, cols, save_path, zoom)
offset += chunk_size
i += 1