import ibis from ibis import _ import ibis.selectors as s import ibis.expr.datatypes as dt import os def get_url(folder, file, bucket = 'public-ca30x30', base_folder = 'CBN', method = 'write'): if method == 'write': minio = 's3://' else: minio = 'https://minio.carlboettiger.info/' if base_folder is None: path = os.path.join(bucket,folder,file) else: path = os.path.join(bucket,base_folder,folder,file) url = minio+path return url def get_vector_file(metric): # CBN vector data amph_richness = get_url('ACE_biodiversity/ACE_amphibian_richness','ACE_amphibian_richness_epsg3310.parquet') pct_amph_richness = get_url('ACE_biodiversity/ACE_amphibian_richness','ACE_amphibian_richness_80percent_epsg3310.parquet') reptile_richness = get_url('ACE_biodiversity/ACE_reptile_richness','ACE_reptile_richness_epsg3310.parquet') pct_reptile_richness = get_url('ACE_biodiversity/ACE_reptile_richness','ACE_reptile_richness_80percent_epsg3310.parquet') bird_richness = get_url('ACE_biodiversity/ACE_bird_richness','ACE_bird_richness_epsg3310.parquet') pct_bird_richness = get_url('ACE_biodiversity/ACE_bird_richness','ACE_bird_richness_80percent_epsg3310.parquet') mammal_richness = get_url('ACE_biodiversity/ACE_mammal_richness','ACE_mammal_richness_epsg3310.parquet') pct_mammal_richness = get_url('ACE_biodiversity/ACE_mammal_richness','ACE_mammal_richness_80percent_epsg3310.parquet') rare_amphibian_richness = get_url('ACE_biodiversity/ACE_rare_amphibian_richness','ACE_rare_amphibian_richness_epsg3310.parquet') pct_rare_amphibian_richness = get_url('ACE_biodiversity/ACE_rare_amphibian_richness','ACE_rare_amphibian_richness_95percent_epsg3310.parquet') rare_reptile_richness = get_url('ACE_biodiversity/ACE_rare_reptile_richness','ACE_rare_reptile_richness_epsg3310.parquet') pct_rare_reptile_richness = get_url('ACE_biodiversity/ACE_rare_reptile_richness','ACE_rare_reptile_richness_95percent_epsg3310.parquet') rare_bird_richness = get_url('ACE_biodiversity/ACE_rare_bird_richness','ACE_rare_bird_richness_epsg3310.parquet') pct_rare_bird_richness = get_url('ACE_biodiversity/ACE_rare_bird_richness','ACE_rare_bird_richness_95percent_epsg3310.parquet') rare_mammal_richness = get_url('ACE_biodiversity/ACE_rare_mammal_richness','ACE_rare_mammal_richness_epsg3310.parquet') pct_rare_mammal_richness = get_url('ACE_biodiversity/ACE_rare_mammal_richness','ACE_rare_mammal_richness_95percent_epsg3310.parquet') endemic_amphibian_richness = get_url('ACE_biodiversity/ACE_endemic_amphibian_richness','ACE_endemic_amphibian_richness_epsg3310.parquet') pct_endemic_amphibian_richness = get_url('ACE_biodiversity/ACE_endemic_amphibian_richness','ACE_endemic_amphibian_richness_95percent_epsg3310.parquet') endemic_reptile_richness = get_url('ACE_biodiversity/ACE_endemic_reptile_richness','ACE_endemic_reptile_richness_epsg3310.parquet') pct_endemic_reptile_richness = get_url('ACE_biodiversity/ACE_endemic_reptile_richness','ACE_endemic_reptile_richness_95percent_epsg3310.parquet') endemic_bird_richness = get_url('ACE_biodiversity/ACE_endemic_bird_richness','ACE_endemic_bird_richness_epsg3310.parquet') pct_endemic_bird_richness = get_url('ACE_biodiversity/ACE_endemic_bird_richness','ACE_endemic_bird_richness_95percent_epsg3310.parquet') endemic_mammal_richness = get_url('ACE_biodiversity/ACE_endemic_mammal_richness','ACE_endemic_mammal_richness_epsg3310.parquet') pct_endemic_mammal_richness = get_url('ACE_biodiversity/ACE_endemic_mammal_richness','ACE_endemic_mammal_richness_95percent_epsg3310.parquet') freshwater_richness = get_url('Freshwater_resources/Freshwater_species_richness','freshwater_species_richness_ds1197_epsg3310.parquet') pct_freshwater_richness = get_url('Freshwater_resources/Freshwater_species_richness','freshwater_species_richness_ds1197_80percent_epsg3310.parquet') wetlands = get_url('Freshwater_resources/Wetlands','CA_wetlands_epsg3310.parquet') fire = get_url('Climate_risks/Historical_fire_perimeters','calfire_2023_epsg3310.parquet') farmland = get_url('NBS_agriculture/Farmland_all/Farmland','Farmland_2018_epsg3310.parquet') grazing = get_url('NBS_agriculture/Farmland_all/Lands_suitable_grazing','Grazing_land_2018_epsg3310.parquet') DAC = get_url('Progress_data_new_protection/DAC','DAC_2022_epsg3310.parquet') low_income = get_url('Progress_data_new_protection/Low_income_communities','low_income_CalEnviroScreen4_epsg3310.parquet') pct_newly_protected = get_url('Progress_data_new_protection/Newly_counted_lands/dissolved','newly_protected_2024_union_epsg3310.parquet') pct_data_improvement = get_url('Progress_data_new_protection/Newly_counted_lands/dissolved','data_improvement_2024_union_epsg3310.parquet') pct_increased_management = get_url('Progress_data_new_protection/Newly_counted_lands/dissolved','increased_management_2024_union_epsg3310.parquet') if metric == 'mean': names = ['mean_amphibian_richness','mean_reptile_richness', 'mean_bird_richness','mean_mammal_richness', 'mean_rare_amphibian_richness','mean_rare_reptile_richness', 'mean_rare_bird_richness','mean_rare_mammal_richness', 'mean_endemic_amphibian_richness','mean_endemic_reptile_richness', 'mean_endemic_bird_richness','mean_endemic_mammal_richness', 'mean_freshwater_richness'] vectors = [amph_richness, reptile_richness, bird_richness, mammal_richness, rare_amphibian_richness, rare_reptile_richness, rare_bird_richness, rare_mammal_richness, endemic_amphibian_richness, endemic_reptile_richness, endemic_bird_richness, endemic_mammal_richness, freshwater_richness] cols = ['NtvAmph','NtvRept','NtvBird','NtvMamm', 'RarAmph','RarRept','RarBird','RarMamm', 'AmphEndem','ReptEndem','BirdEndem','MammEndem', 'Freshwater_Species_Count'] elif metric == 'overlap': names = ['pct_top_amphibian_richness','pct_top_reptile_richness', 'pct_top_bird_richness','pct_top_mammal_richness', 'pct_rare_amphibian_richness','pct_rare_reptile_richness', 'pct_rare_bird_richness','pct_rare_mammal_richness', 'pct_endemic_amphibian_richness','pct_endemic_reptile_richness', 'pct_endemic_bird_richness','pct_endemic_mammal_richness', 'pct_top_freshwater_richness', 'pct_wetlands','pct_fire','pct_farmland','pct_grazing', 'pct_disadvantaged_community','pct_low_income_community', 'pct_newly_protected','pct_data_improvement','pct_increased_management'] vectors = [pct_amph_richness, pct_reptile_richness, pct_bird_richness, pct_mammal_richness, pct_rare_amphibian_richness, pct_rare_reptile_richness, pct_rare_bird_richness, pct_rare_mammal_richness, pct_endemic_amphibian_richness, pct_endemic_reptile_richness, pct_endemic_bird_richness, pct_endemic_mammal_richness, pct_freshwater_richness, wetlands, fire, farmland, grazing, DAC, low_income, pct_newly_protected, pct_data_improvement, pct_increased_management ] cols = [None] * len(vectors) return names, vectors, cols def get_vector_stats(con, index, label, metric): names, vectors, cols = get_vector_file(metric) name = names[index] vector = vectors[index] col = cols[index] print(label) print(name) url = f's3://public-ca30x30/CA_Nature/2024/Preprocessing/v3/subsets/split_habitat_climate/{label}_habitat_climate.parquet' stats_url = f's3://public-ca30x30/CA_Nature/2024/Preprocessing/v3/stats/{label}/{name}.parquet' vector_stats = vector_vector_stats(con, url, vector, metric, col) vector_stats = vector_stats.rename(**{name: metric}) return vector_stats, stats_url # usage: t.mutate(geom_valid = ST_MakeValid(t.geom)) @ibis.udf.scalar.builtin def ST_MakeValid(geom) -> dt.geometry: ... def vector_vector_stats(con, base, data_layer, metric, col): print(f'metric: {metric}') print(f'column name: {col}') t1 = con.read_parquet(base).select(_.id, _.sub_id, _.geom) if metric == 'mean': t2 = con.read_parquet(data_layer).rename(value = col).select(_.geom, _.value) else: t2 = con.read_parquet(data_layer).select(_.geom) t1 = t1.mutate(geom = ST_MakeValid(_.geom)) t2 = t2.mutate(geom = ST_MakeValid(_.geom)) stats = (t1 .left_join(t2, t1.geom.intersects(t2.geom)) .group_by(t1.id, t1.sub_id, t1.geom) ) if metric == 'overlap': stats = (stats.agg(overlap = ( t1.geom.intersection(t2.geom).area() / t1.geom.area()) .sum().coalesce(0).round(3) )) # overlap elif metric == 'mean': stats = (stats.agg(mean=( (t1.geom.intersection(t2.geom).area() / t1.geom.area() * t2.value) .sum().coalesce(0).round(3)))) else: print('Select a metric.') return ##### for some ACE data, nonconserved areas don't get captured, so we assign it a 0 ##### only used this for ACE data + nonconserved. # left join to keep all sub_ids # non_overlapping = t1.anti_join(stats, 'sub_id') # zero rows for non-overlapping sub_ids # if metric == 'overlap': # zeros = (non_overlapping # .mutate(overlap = 0) # .cast({'overlap':'float64'}) # ) # elif metric == 'mean': # zeros = (non_overlapping # .mutate(mean = 0) # .cast({'mean':'float64'}) # ) # stats = stats.union(zeros) return stats