File size: 2,861 Bytes
0d2d459
5a3c665
db54b60
49369e2
5a3c665
 
 
 
49369e2
 
 
 
5a3c665
 
 
49369e2
0d2d459
49369e2
 
 
 
 
 
 
 
 
 
 
 
 
5a3c665
49369e2
5a3c665
49369e2
db54b60
5a3c665
db54b60
 
8419f19
 
 
 
 
 
 
db54b60
2db4717
49369e2
 
 
 
 
db54b60
 
49369e2
2db4717
 
49369e2
2db4717
49369e2
2db4717
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
from utils import *
import re

def convert_h3(con, s3, folder, file, cols, zoom, group = None, base_folder = "CBN/"):
    """
    Driver function to convert geometries to h3.
    """
    cols = ", ".join(cols) if isinstance(cols, list) else cols
    if folder:
        bucket, path = info(folder, file, base_folder)
    else:
        bucket, path = info(None, file, None)
    path, file = os.path.split(path)
    name, ext = os.path.splitext(file)
    print(f"Processing: {name}")
    t_name = name.replace('-', '')

    if group:
        con.read_parquet(f"s3://{bucket}/{name}.parquet", table_name=t_name)
        print(f'Computing zoom level {zoom}, grouping the data based on {group}')
        compute_grouped(con, t_name, cols, zoom, group, path = f"{bucket}/{path}")
        (con.read_parquet(f"s3://{bucket}/hex/zoom{zoom}/group_{group}/**")
         .to_parquet(f"s3://{bucket}/hex/zoom{zoom}/{name}.parquet")
        )
        
    else:
        con.read_parquet(f"s3://{bucket}/{path}/{file}", table_name=t_name)
        print(f'Computing zoom level {zoom} without grouping.')
        save_path = f"s3://{bucket}/{path}/hex/zoom{zoom}/{name}.parquet"
        h3_from_geom(con, t_name, cols, save_path, zoom)
        

    
def h3_from_geom(con, name, cols, save_path, zoom):
    """
    Computes hexes directly from geometry.
    """
    con.raw_sql(f'''
    CREATE OR REPLACE TEMP TABLE t2 AS
    SELECT {cols},
           h3_polygon_wkt_to_cells_string(ST_Force2D(dump.geom), {zoom}) AS h{zoom}
    FROM (
        SELECT {cols}, UNNEST(ST_Dump(geom)) AS dump
        FROM {name}
    )
    ''')
    
    con.sql(f'''
        SELECT {cols}, UNNEST(h{zoom}) AS h{zoom},
        ST_GeomFromText(h3_cell_to_boundary_wkt(UNNEST(h{zoom}))) AS geom
        FROM t2
    ''').to_parquet(save_path)


def compute_grouped(con, name, cols, zoom, group, path):
    groups = con.table(name).select(group).distinct().execute()[group].tolist()
    chunk_size = 500
    # separate data by group
    for sub in groups:
        sub_name = f"{name}_{re.sub(r'\W+', '_', sub)}"
        offset = 0
        i = 0
        # chunk data within groups 
        while True:
            print(f'Processing group {sub_name} chunk {i} offset {offset}')
            chunk_name = f"{sub_name}_chunk{i}"
            con.raw_sql(f"""
                CREATE OR REPLACE TEMP TABLE {chunk_name} AS
                SELECT * FROM {name} 
                WHERE {group} = '{sub}'
                LIMIT {chunk_size} 
                OFFSET {offset}
            """)
            if con.sql(f"SELECT 1 FROM {chunk_name} LIMIT 1").execute().empty:
                break
            save_path = f"s3://{path}/hex/zoom{zoom}/group_{group}/{sub_name}_chunk{i}.parquet"
            h3_from_geom(con, chunk_name, cols, save_path, zoom)
            offset += chunk_size
            i += 1