Spaces:
Running
Running
| from minio.error import S3Error | |
| import zipfile | |
| import os | |
| import subprocess | |
| import geopandas as gpd | |
| import ibis | |
| from ibis import _ | |
| import rasterio | |
| from rasterio.features import shapes | |
| from shapely.geometry import shape | |
| import numpy as np | |
| def info(folder, file, bucket = "public-ca30x30", base_folder = 'CBN-data/'): | |
| """ | |
| Extract minio path to upload/download data | |
| """ | |
| path = os.path.join(base_folder, folder, file) | |
| # path = os.path.join(folder, file) | |
| return bucket, path | |
| def download(s3, folder, file, file_name = None): | |
| """ | |
| Downloading file from minio | |
| """ | |
| if not file_name: | |
| file_name = file | |
| bucket, path = info(folder, file) | |
| s3.fget_object(bucket, path ,file_name) | |
| return | |
| def upload(s3, folder, file): | |
| """ | |
| Uploading file from minio | |
| """ | |
| bucket, path = info(folder, file) | |
| s3.fput_object(bucket, path ,file) | |
| return | |
| def unzip(folder, file): | |
| """ | |
| Unzipping zip files | |
| """ | |
| download(s3, folder, file) | |
| with zipfile.ZipFile(file, 'r') as zip_ref: | |
| zip_ref.extractall() | |
| return | |
| # def process_vector(folder, file, file_name = None, gdf = None, crs="EPSG:3310"): | |
| def process_vector(folder, file, file_name = None, gdf = None, crs="EPSG:4326"): | |
| """ | |
| Driver function to process vectors | |
| """ | |
| if gdf is None: | |
| gdf = gpd.read_file(file) | |
| if gdf.crs != crs: | |
| gdf = gdf.to_crs(crs) | |
| if gdf.geometry.name != 'geom': | |
| gdf = gdf.rename_geometry('geom') | |
| if file_name: | |
| file = file_name | |
| # upload_parquet(folder, file, gdf) | |
| name, ext = os.path.splitext(file) | |
| parquet_file = f"{name}{'.parquet'}" | |
| gdf.to_parquet(parquet_file) | |
| upload(s3, folder, parquet_file) | |
| return | |
| # def upload_parquet(folder, file, gdf): | |
| # """ | |
| # Uploading parquets | |
| # """ | |
| # name, ext = os.path.splitext(file) | |
| # parquet_file = f"{name}{'.parquet'}" | |
| # gdf.to_parquet(parquet_file) | |
| # upload(folder, parquet_file) | |
| # return | |
| def reproject_raster(input_file, crs="EPSG:3310"): | |
| """ | |
| Reproject rasters | |
| """ | |
| suffix = '_processed' | |
| name, ext = os.path.splitext(input_file) | |
| output_file = f"{name}{suffix}{ext}" | |
| command = [ | |
| "gdalwarp", | |
| "-t_srs", crs, | |
| input_file, | |
| output_file | |
| ] | |
| try: | |
| subprocess.run(command, check=True) | |
| print(f"Reprojection successful!") | |
| except subprocess.CalledProcessError as e: | |
| print(f"Error occurred during reprojection: {e}") | |
| return output_file | |
| def make_cog(input_file, crs="EPSG:4326"): | |
| """ | |
| Converting TIF to COGs | |
| """ | |
| suffix = '_COG' | |
| name, ext = os.path.splitext(input_file) | |
| output_file = f"{name}{suffix}{ext}" | |
| command = [ | |
| "gdalwarp", | |
| "-t_srs", crs, | |
| "-of", "COG", | |
| input_file, | |
| output_file | |
| ] | |
| try: | |
| subprocess.run(command, check=True) | |
| print(f"Successful!") | |
| except subprocess.CalledProcessError as e: | |
| print(f"Error occurred during processing: {e}") | |
| return output_file | |
| def make_vector(input_file, crs="EPSG:4326"): | |
| """ | |
| Converting rasters to vector formats in order to convert to h3 | |
| """ | |
| name, ext = os.path.splitext(input_file) | |
| output_file = f"{name}.parquet" | |
| # Open raster | |
| with rasterio.open(input_file) as src: | |
| image = src.read(1) # read first band | |
| mask = image != src.nodata # mask out nodata | |
| results = ( | |
| {"geom": shape(geom), "value": value} | |
| for geom, value in shapes(image, mask=mask, transform=src.transform) | |
| ) | |
| gdf = gpd.GeoDataFrame.from_records(results) | |
| gdf.set_geometry('geom', inplace=True) | |
| gdf['id'] = np.arange(len(gdf)) | |
| gdf.set_crs(src.crs, inplace=True) | |
| if gdf.crs != crs: | |
| gdf.to_crs(crs, inplace=True) | |
| gdf.to_parquet(output_file) | |
| print(gdf) | |
| return output_file | |
| def filter_raster(s3, folder, file, percentile): | |
| """ | |
| Helper function to filter rasteres | |
| """ | |
| with rasterio.open(file) as src: | |
| data = src.read(1) # Read the first band | |
| profile = src.profile | |
| # mask no data values | |
| masked_data = np.ma.masked_equal(data, src.nodata) | |
| # compute percentile/threshold | |
| p = np.percentile(masked_data.compressed(),percentile) | |
| filtered = np.where(data >= p, data, src.nodata) | |
| name, ext = os.path.splitext(file) | |
| new_file = f"{name}{'_'}{percentile}{'percentile'}{ext}" | |
| profile.update(dtype=rasterio.float64) | |
| with rasterio.open(new_file, "w", **profile) as dst: | |
| dst.write(filtered, 1) | |
| process_raster(s3, folder, file) | |
| return | |
| def process_raster(s3, folder, file, file_name = None): | |
| """ | |
| Driver function to process rasters | |
| """ | |
| if file_name: | |
| file = file_name | |
| output_file = reproject_raster(file) | |
| upload(s3, folder, output_file) | |
| output_cog_file = make_cog(output_file) | |
| upload(s3, folder, output_cog_file) | |
| output_vector = make_vector(output_file) | |
| upload(s3, folder, output_vector) | |
| return | |
| def convert_pmtiles(folder, file): | |
| """ | |
| Convert to PMTiles with tippecanoe | |
| """ | |
| name, ext = os.path.splitext(file) | |
| if ext != '.geojson': | |
| con.read_parquet(file).execute().set_crs('epsg:3310').to_crs('epsg:4326').to_file(name+'.geojson') | |
| to_pmtiles(name+'.geojson', name+'.pmtiles', options = ['--extend-zooms-if-still-dropping']) | |
| upload(s3, folder, name+'.pmtiles') | |
| return | |