GeoLLM / Task2 /kunkun_dem.py
Ciallo0d00's picture
Upload folder using huggingface_hub
badcf3c verified
import os
import numpy as np
from scipy import ndimage
# import pandas as pd
from osgeo import gdal, ogr, osr
# 1. 定义输入输出路径
dem_path = 'E:/bang/KUNKUN/data/water_depth/Dem_True/Dem11.tif'
area_dir = 'E:/bang/KUNKUN/data/water_depth/Area'
output_dir = 'E:/bang/KUNKUN/data/water_depth/Results/'
# 2. 读取DEM数据
dem_ds = gdal.Open(dem_path)
# # 查看形状
# print(dem_ds.RasterXSize, dem_ds.RasterYSize)
dem_array = dem_ds.GetRasterBand(1).ReadAsArray()
# 3. 初始化结果存储
results = []
# 4. 遍历所有面积文件
for filename in os.listdir(area_dir):
if filename.startswith('S1B_') and filename.endswith('.tif'):
# # 自定义裁剪范围(UTM坐标)
# min_x = 500000 # 最小东坐标
# max_x = 550000 # 最大东坐标
# min_y = 3100000 # 最小北坐标
# max_y = 3150000 # 最大北坐标
# # 裁剪DEM
# dem_transform = dem_ds.GetGeoTransform()
# # 计算DEM裁剪范围
# dem_min_x = int((min_x - dem_transform[0]) / dem_transform[1])
# dem_max_x = int((max_x - dem_transform[0]) / dem_transform[1])
# dem_min_y = int((max_y - dem_transform[3]) / dem_transform[5])
# dem_max_y = int((min_y - dem_transform[3]) / dem_transform[5])
# # 执行DEM裁剪
# cropped_dem = dem_array[dem_min_y:dem_max_y, dem_min_x:dem_max_x]
# # 裁剪面积数据
# area_ds = gdal.Open(os.path.join(area_dir, filename))
# area_transform = area_ds.GetGeoTransform()
# area_array = area_ds.GetRasterBand(1).ReadAsArray()
# # 计算面积数据裁剪范围
# area_min_x = int((min_x - area_transform[0]) / area_transform[1])
# area_max_x = int((max_x - area_transform[0]) / area_transform[1])
# area_min_y = int((max_y - area_transform[3]) / area_transform[5])
# area_max_y = int((min_y - area_transform[3]) / area_transform[5])
# # 执行面积数据裁剪
# cropped_area = area_array[area_min_y:area_max_y, area_min_x:area_max_x]
# 解析日期
parts = filename.split('_')
year = parts[1]
month = parts[2]
day = parts[3].split('.')[0]
date = f"{year}-{month}-{day}"
print(f"日期:{date}")
# 5. 处理当前日期的面积文件
area_ds = gdal.Open(os.path.join(area_dir, filename))
area_array = area_ds.GetRasterBand(1).ReadAsArray()
# 6. 水体连通性分析
labeled_array, num_features = ndimage.label(area_array)
print(f"湖泊数量:{num_features}")
# 去除小面积子湖
min_area_pixels = int(1000 * 1000 / 88.36) # 1000平方公里转换为像素数
removed_count = 0
for i in range(1, num_features + 1):
size = np.sum(labeled_array == i)
if size < min_area_pixels:
labeled_array[labeled_array == i] = 0
removed_count += 1
# print(f"去除小面积湖泊:{i}")
# 计算剩余湖泊数量
remaining_labels = np.unique(labeled_array)
remaining_count = len(remaining_labels) - 1 # 减去背景值0
print(f"去除小湖数量:{removed_count}")
print(f"剩余湖泊数量:{remaining_count}")
# 8. 重新编号
unique_labels = np.unique(labeled_array)
unique_labels = unique_labels[unique_labels != 0] # 排除背景值0
relabeled_array = np.zeros_like(labeled_array)
new_label = 1
for old_label in unique_labels:
size = np.sum(labeled_array == old_label)
if size >= min_area_pixels: # 再次检查湖泊面积
relabeled_array[labeled_array == old_label] = new_label
new_label += 1
print(f"重新编号:{np.unique(relabeled_array[relabeled_array != 0])}") # 只显示非零值
# 计算边缘高程的中位数
for label in range(1, new_label): # 使用重新编号后的标签
mask = relabeled_array == label
if np.sum(mask) == 0: # 如果没有像素点,跳过
continue
# 提取边界点
structure = ndimage.generate_binary_structure(2, 2) # 8连通
eroded_mask = ndimage.binary_erosion(mask, structure=structure)
boundary_mask = mask & ~eroded_mask # 边界掩码
# 提取边界点的高程值
y_indices, x_indices = np.where(boundary_mask)
if len(y_indices) == 0 or len(x_indices) == 0: # 如果没有边界点,跳过
continue
# 将边界点坐标转换为地理坐标
transform = area_ds.GetGeoTransform()
x_coords = transform[0] + x_indices * transform[1]
y_coords = transform[3] + y_indices * transform[5]
# 将地理坐标转换为DEM的像素坐标
dem_transform = dem_ds.GetGeoTransform()
dem_xs = ((x_coords - dem_transform[0]) / dem_transform[1]).astype(int)
dem_ys = ((y_coords - dem_transform[3]) / dem_transform[5]).astype(int)
# 提取边界点的高程值
boundary_depths = []
for dem_x, dem_y in zip(dem_xs, dem_ys):
if 0 <= dem_x < dem_array.shape[1] and 0 <= dem_y < dem_array.shape[0]:
boundary_depths.append(dem_array[dem_y, dem_x])
# 计算边界高程的中位数
if len(boundary_depths) > 0:
median_boundary_depth = np.nanmedian(boundary_depths)
else:
median_boundary_depth = np.nan
# 存储结果
results.append({
'date': date,
'lake_id': label,
'longitude': np.nanmean(x_coords), # 湖泊边界中心经度
'latitude': np.nanmean(y_coords), # 湖泊边界中心纬度
'depth': median_boundary_depth # 边界高程的中位数
})
print(f"结果:{results}")
# 创建新的数组,初始化为nodata值
nodata_value = -9999 # 设置nodata值
output_array = np.full_like(relabeled_array, nodata_value, dtype=np.float32)
# 将符合标准的湖泊赋值为中位数高程
for result in results:
label = result['lake_id']
median_depth = result['depth']
mask = relabeled_array == label
output_array[mask] = median_depth
# 设置GDAL配置选项,强制使用EPSG官方参数
gdal.SetConfigOption('GTIFF_SRS_SOURCE', 'EPSG')
# 写入新的TIFF文件
driver = gdal.GetDriverByName('GTiff')
output_path = os.path.join(output_dir, f'{date}_processed_B.tif')
# 创建输出文件
output_ds = driver.Create(output_path, relabeled_array.shape[1], relabeled_array.shape[0], 1, gdal.GDT_Float32)
# 设置地理变换和投影
output_ds.SetGeoTransform(area_ds.GetGeoTransform())
output_ds.SetProjection(area_ds.GetProjection())
# 写入数据
output_band = output_ds.GetRasterBand(1)
output_band.WriteArray(output_array)
# 设置nodata值
output_band.SetNoDataValue(nodata_value)
# 刷新缓存
output_ds.FlushCache()
# 关闭文件
output_ds = None
print(f"结果已写入:{output_path}")