File size: 10,304 Bytes
b0bb178
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
import ibis
from ibis import _
import ibis.selectors as s
import ibis.expr.datatypes as dt
import os

def get_url(folder, file, bucket = 'public-ca30x30', base_folder = 'CBN', method = 'write'):
    if method == 'write':
        minio = 's3://'
    else:
        minio = 'https://minio.carlboettiger.info/'
    if base_folder is None:
        path = os.path.join(bucket,folder,file)
    else:
        path = os.path.join(bucket,base_folder,folder,file)
    url = minio+path
    return url

def get_vector_file(metric):
    # CBN vector data
    amph_richness = get_url('ACE_biodiversity/ACE_amphibian_richness','ACE_amphibian_richness_epsg3310.parquet')
    pct_amph_richness = get_url('ACE_biodiversity/ACE_amphibian_richness','ACE_amphibian_richness_80percent_epsg3310.parquet')
    
    reptile_richness = get_url('ACE_biodiversity/ACE_reptile_richness','ACE_reptile_richness_epsg3310.parquet')
    pct_reptile_richness = get_url('ACE_biodiversity/ACE_reptile_richness','ACE_reptile_richness_80percent_epsg3310.parquet')
    
    bird_richness = get_url('ACE_biodiversity/ACE_bird_richness','ACE_bird_richness_epsg3310.parquet')
    pct_bird_richness = get_url('ACE_biodiversity/ACE_bird_richness','ACE_bird_richness_80percent_epsg3310.parquet')
    
    mammal_richness = get_url('ACE_biodiversity/ACE_mammal_richness','ACE_mammal_richness_epsg3310.parquet')
    pct_mammal_richness = get_url('ACE_biodiversity/ACE_mammal_richness','ACE_mammal_richness_80percent_epsg3310.parquet')
    
    rare_amphibian_richness = get_url('ACE_biodiversity/ACE_rare_amphibian_richness','ACE_rare_amphibian_richness_epsg3310.parquet')
    pct_rare_amphibian_richness = get_url('ACE_biodiversity/ACE_rare_amphibian_richness','ACE_rare_amphibian_richness_95percent_epsg3310.parquet')
    
    rare_reptile_richness = get_url('ACE_biodiversity/ACE_rare_reptile_richness','ACE_rare_reptile_richness_epsg3310.parquet')
    pct_rare_reptile_richness = get_url('ACE_biodiversity/ACE_rare_reptile_richness','ACE_rare_reptile_richness_95percent_epsg3310.parquet')
    
    rare_bird_richness = get_url('ACE_biodiversity/ACE_rare_bird_richness','ACE_rare_bird_richness_epsg3310.parquet')
    pct_rare_bird_richness = get_url('ACE_biodiversity/ACE_rare_bird_richness','ACE_rare_bird_richness_95percent_epsg3310.parquet')
    
    rare_mammal_richness = get_url('ACE_biodiversity/ACE_rare_mammal_richness','ACE_rare_mammal_richness_epsg3310.parquet')
    pct_rare_mammal_richness = get_url('ACE_biodiversity/ACE_rare_mammal_richness','ACE_rare_mammal_richness_95percent_epsg3310.parquet')
    
    endemic_amphibian_richness = get_url('ACE_biodiversity/ACE_endemic_amphibian_richness','ACE_endemic_amphibian_richness_epsg3310.parquet')
    pct_endemic_amphibian_richness = get_url('ACE_biodiversity/ACE_endemic_amphibian_richness','ACE_endemic_amphibian_richness_95percent_epsg3310.parquet')
    
    endemic_reptile_richness = get_url('ACE_biodiversity/ACE_endemic_reptile_richness','ACE_endemic_reptile_richness_epsg3310.parquet')
    pct_endemic_reptile_richness = get_url('ACE_biodiversity/ACE_endemic_reptile_richness','ACE_endemic_reptile_richness_95percent_epsg3310.parquet')
    
    endemic_bird_richness = get_url('ACE_biodiversity/ACE_endemic_bird_richness','ACE_endemic_bird_richness_epsg3310.parquet')
    pct_endemic_bird_richness = get_url('ACE_biodiversity/ACE_endemic_bird_richness','ACE_endemic_bird_richness_95percent_epsg3310.parquet')
    
    endemic_mammal_richness = get_url('ACE_biodiversity/ACE_endemic_mammal_richness','ACE_endemic_mammal_richness_epsg3310.parquet')
    pct_endemic_mammal_richness = get_url('ACE_biodiversity/ACE_endemic_mammal_richness','ACE_endemic_mammal_richness_95percent_epsg3310.parquet')
    
    freshwater_richness = get_url('Freshwater_resources/Freshwater_species_richness','freshwater_species_richness_ds1197_epsg3310.parquet')
    pct_freshwater_richness = get_url('Freshwater_resources/Freshwater_species_richness','freshwater_species_richness_ds1197_80percent_epsg3310.parquet')
    
    wetlands = get_url('Freshwater_resources/Wetlands','CA_wetlands_epsg3310.parquet')
    fire = get_url('Climate_risks/Historical_fire_perimeters','calfire_2023_epsg3310.parquet')
    farmland = get_url('NBS_agriculture/Farmland_all/Farmland','Farmland_2018_epsg3310.parquet')
    grazing = get_url('NBS_agriculture/Farmland_all/Lands_suitable_grazing','Grazing_land_2018_epsg3310.parquet')
    DAC = get_url('Progress_data_new_protection/DAC','DAC_2022_epsg3310.parquet')
    low_income = get_url('Progress_data_new_protection/Low_income_communities','low_income_CalEnviroScreen4_epsg3310.parquet')
    
    pct_newly_protected = get_url('Progress_data_new_protection/Newly_counted_lands/dissolved','newly_protected_2024_union_epsg3310.parquet')
    pct_data_improvement = get_url('Progress_data_new_protection/Newly_counted_lands/dissolved','data_improvement_2024_union_epsg3310.parquet')
    pct_increased_management = get_url('Progress_data_new_protection/Newly_counted_lands/dissolved','increased_management_2024_union_epsg3310.parquet')

    if metric == 'mean':
        names = ['mean_amphibian_richness','mean_reptile_richness',
                 'mean_bird_richness','mean_mammal_richness',
                 'mean_rare_amphibian_richness','mean_rare_reptile_richness',
                 'mean_rare_bird_richness','mean_rare_mammal_richness',
                 'mean_endemic_amphibian_richness','mean_endemic_reptile_richness',
                 'mean_endemic_bird_richness','mean_endemic_mammal_richness',
                 'mean_freshwater_richness']
        
        vectors = [amph_richness, reptile_richness,
                   bird_richness, mammal_richness,
                   rare_amphibian_richness, rare_reptile_richness,
                   rare_bird_richness, rare_mammal_richness,
                   endemic_amphibian_richness,
                   endemic_reptile_richness,
                   endemic_bird_richness,
                   endemic_mammal_richness,
                   freshwater_richness]
        
        cols = ['NtvAmph','NtvRept','NtvBird','NtvMamm',
                'RarAmph','RarRept','RarBird','RarMamm',
                'AmphEndem','ReptEndem','BirdEndem','MammEndem',
                'Freshwater_Species_Count']
        
    elif metric == 'overlap':
        names = ['pct_top_amphibian_richness','pct_top_reptile_richness',
                 'pct_top_bird_richness','pct_top_mammal_richness',
                 'pct_rare_amphibian_richness','pct_rare_reptile_richness',
                 'pct_rare_bird_richness','pct_rare_mammal_richness',
                 'pct_endemic_amphibian_richness','pct_endemic_reptile_richness',
                 'pct_endemic_bird_richness','pct_endemic_mammal_richness',
                 'pct_top_freshwater_richness',
                 'pct_wetlands','pct_fire','pct_farmland','pct_grazing',
                 'pct_disadvantaged_community','pct_low_income_community',
                'pct_newly_protected','pct_data_improvement','pct_increased_management']
    
        vectors = [pct_amph_richness, pct_reptile_richness,
                    pct_bird_richness, pct_mammal_richness,
                    pct_rare_amphibian_richness, pct_rare_reptile_richness,
                    pct_rare_bird_richness, pct_rare_mammal_richness,
                    pct_endemic_amphibian_richness,
                    pct_endemic_reptile_richness,
                    pct_endemic_bird_richness,
                    pct_endemic_mammal_richness,
                    pct_freshwater_richness,
                    wetlands,
                    fire, farmland, grazing,
                    DAC, low_income,
                    pct_newly_protected,
                    pct_data_improvement,
                    pct_increased_management
                  ]
        cols = [None] * len(vectors)
    return names, vectors, cols 


def get_vector_stats(con, index, label, metric):
    names, vectors, cols = get_vector_file(metric)
    name = names[index]
    vector = vectors[index]
    col = cols[index]
    print(label)
    print(name)
    url = f's3://public-ca30x30/CA_Nature/2024/Preprocessing/v3/subsets/split_habitat_climate/{label}_habitat_climate.parquet'
    stats_url = f's3://public-ca30x30/CA_Nature/2024/Preprocessing/v3/stats/{label}/{name}.parquet'
    vector_stats = vector_vector_stats(con, url, vector, metric, col)
    vector_stats = vector_stats.rename(**{name: metric})
    return vector_stats, stats_url


# usage: t.mutate(geom_valid = ST_MakeValid(t.geom))
@ibis.udf.scalar.builtin
def ST_MakeValid(geom) -> dt.geometry:
 ...


def vector_vector_stats(con, base, data_layer, metric, col):
    print(f'metric: {metric}')
    print(f'column name: {col}')
    t1 = con.read_parquet(base).select(_.id, _.sub_id, _.geom)
    if metric == 'mean':
        t2 = con.read_parquet(data_layer).rename(value = col).select(_.geom, _.value)
    else:
        t2 = con.read_parquet(data_layer).select(_.geom)

    t1 = t1.mutate(geom = ST_MakeValid(_.geom))
    t2 = t2.mutate(geom = ST_MakeValid(_.geom))

    stats = (t1
        .left_join(t2, t1.geom.intersects(t2.geom))
        .group_by(t1.id, t1.sub_id, t1.geom)
            )
    if metric == 'overlap':
        stats = (stats.agg(overlap = (
                t1.geom.intersection(t2.geom).area() / t1.geom.area())
                .sum().coalesce(0).round(3) )) # overlap  
    elif metric == 'mean':
        stats = (stats.agg(mean=(
                (t1.geom.intersection(t2.geom).area() / t1.geom.area() * t2.value)
                .sum().coalesce(0).round(3))))
    else:
        print('Select a metric.')
        return 
    #####  for some ACE data, nonconserved areas don't get captured, so we assign it a 0
    ##### only used this for ACE data + nonconserved. 
    # left join to keep all sub_ids
    # non_overlapping = t1.anti_join(stats, 'sub_id')
    # zero rows for non-overlapping sub_ids
    # if metric == 'overlap':
    # zeros = (non_overlapping
    #          .mutate(overlap = 0)
    #          .cast({'overlap':'float64'})
    #         )
    # elif metric == 'mean':
    #     zeros = (non_overlapping
    #              .mutate(mean = 0)
    #              .cast({'mean':'float64'})
    #             )
    # stats = stats.union(zeros)
    return stats