数据准备是机器学习的基础,俗话说巧妇难为无米之炊,没有数据的机器学习就是耍流氓。
接下来将使用公众号其他成员分享的内容现学现卖一篇,文章中使用了我们公众号成员推荐的Xarray库、wrf-python库,目的是从WRF模式输出提取出站点在不同高度/等压面数据。
def nearest_position( stn_lat, stn_lon, xlat, xlon ): """获取最临近格点坐标索引 parameters: ---------------------- lon : 单个站点经度 lat : 单个站点纬度 lons : numpy.ndarray网格二维经度坐标 lats : numpy.ndarray网格二维纬度坐标 Return: ----------------------- (xindx,yindx) """ import numpy as np difflat = stn_lat - xlat; difflon = stn_lon - xlon; rad = np.multiply(difflat,difflat)+np.multiply(difflon , difflon)#difflat * difflat + difflon * difflon aa=np.where(rad==np.min(rad)) ind=np.squeeze(np.array(aa)) return tuple(ind)
上述函数是为了得到距离某个站点在wrfout输出的二维经纬度上找到最近的格点索引,比较简单粗暴。实际上也可以用scipy.spatial中的cKDTree来做。
import xarray file_name_nc= 'wrfout_dxx_2009-04-21_06_00_00' ds = xarray.open_dataset(file_name_nc) XLAT_WRF2D = np.squeeze(ds.coords['XLAT'].values) XLONG_WRF2D=np.squeeze(ds.coords['XLONG'].values) ind=nearest_position( 133.001 , 18.1944,XLONG_WRF2D, XLAT_WRF2D) print('距离该站点最近的格点经纬度索引为:',ind) print('距离站点最近格点的经度为:',XLONG_WRF2D[ind[0],ind[1]]) print('距离站点最近格点的纬度为:',XLAT_WRF2D[ind[0],ind[1]]) 距离该站点最近的格点经纬度索引为: (96, 93) 第一个站点的经度为: 133.04703 第一个站点的纬度为: 18.168343 注:站点经纬度坐标是随便写的,以脱敏,如有雷同,纯属巧合。 上面的函数只是针对单个站点。用for循环进行批量操作,可以得到类似如下pandas dataframe结果,命名为get_stn: Station_ID LONG LAT Xidx Yidx 6666666 129.996 33.3586 93.0 72.0 7777777 143.001 34.1944 73.0 63.0 8888888 128.663 22.1430 86.0 57.0 . . . . 有了批量的站点信息,下面编写函数进行wrfout站点信息提取. 第二步 编写 读取站点信息函数def read_wrfout_stns_multilevels(flnm_wrfout,get_stn,features_3D,features_2D,features_special,interp_levels,features_all,interp_type='ght_agl'): ''' 作者:MeteoAI -------------------------------------------------------------------- 参数: flnm_wrfout:单个WRF输出文件名称 get_stn:Pandas dataframe 表格文件,列名为 站点ID ,站点经度, 站点纬度, 最近格点经度索引,最近格点纬度索引 features_3D :wrfout中三维变量,如u,v,T features_2D : wrfout中的二维变量: 如降雨 、短波辐射 features_special: 可以是PBLH等二维变量 interp_levels: 插值层列表, 比如高度层列表[0.001,0.01,0.02,0.3] interp_types :插值种类 插值方式暂时使用两种 高度层垂向插值 ght_agl: above groud level,单位Km 等压面 :pressure 单位hPa 详细信息可以help(wrf.vinterp),有更多的插值种类 ''' import os import arrow import numpy as np import wrf import netCDF4 as nc import xarray as xr import pandas as pd from netCDF4 import Dataset from wrf import getvar, vinterp,interplevel ds = xr.open_dataset(flnm_wrfout) data_nc=nc.Dataset(flnm_wrfout) wrfin =data_nc if interp_type == 'ght_agl': units='Km' name_dim0='height' else: units='hPa' name_dim0='pressure' ######################################## va=getvar(wrfin,"va") ua=getvar(wrfin,"ua") ########################## rh=getvar(wrfin,"rh")# RH_interp=wrf.vinterp(data_nc,np.squeeze(rh.values),interp_type,interp_levels,extrapolate=True) UU_interp=wrf.vinterp(data_nc,np.squeeze(ua.values),interp_type,interp_levels,extrapolate=True) VV_interp=wrf.vinterp(data_nc,np.squeeze(va.values),interp_type,interp_levels,extrapolate=True) PR=ds['PB'] +ds['P'] pblh = getvar(wrfin, "PBLH") tc=getvar(wrfin,"tc")# 摄氏度温度 T_interp=wrf.vinterp(data_nc,np.squeeze(tc.values),interp_type,interp_levels,extrapolate=True) PR_interp=wrf.vinterp(data_nc,np.squeeze(PR.values),interp_type,interp_levels,extrapolate=True)/100# #P_interp myds=xr.Dataset() # myds['UU'] = UU_interp myds['VV'] = VV_interp myds['TA'] =T_interp myds['PR']=PR_interp#np.squeeze((ds['PB']+ds['P'])) myds['RH']=RH_interp myds['RA']=ds['RAINC']+ds['RAINNC']# getvar(wrfin,"pw") # # myds['SW']=ds['SWDOWN'] # pressure is just name , in fact it could alos stands for height in this code if interp_type = ght_agl. myds=myds.rename({"dim_0": name_dim0 , "dim_1": "south_north","dim_2": "west_east"})# myds.coords['Pressure'] = xr.DataArray(interp_levels, dims='pressure') myds.coords['XLAT']=xr.DataArray(np.squeeze(ds.coords['XLAT'])) myds.coords['XLONG']=xr.DataArray(np.squeeze(ds.coords['XLONG'])) XLAT_WRF2D = myds.coords['XLAT'] XLAT_WRF2D = np.squeeze(XLAT_WRF2D.values) XLONG_WRF2D=myds.coords['XLONG'] XLONG_WRF2D=np.squeeze(XLONG_WRF2D.values) df_empty=pd.DataFrame(columns=features_all,index=get_stn.index) interp_levels_index=np.arange(len(interp_levels)) for idx ,value in get_stn.iterrows(): # yidx=int(value[1]) xidx=int(value[0]) #lon_value, lat_value ,xidx ,yidx=nearest_point(lon, lat, XLONG_WRF2D, XLAT_WRF2D) for each_var in features_3D: for each_level_index in interp_levels_index: #print(interp_levels[each_level_index]) if interp_type == 'ght_agl': v=myds[each_var][dict(height=each_level_index,south_north=xidx,west_east=yidx)]#myds['UU'][dict(pressure=1,south_north=6,west_east=7)]# feature_name=each_var+'_'+str(interp_levels[each_level_index])+units df_empty.loc[idx][feature_name]=v.values else: v=myds[each_var][dict(pressure=each_level_index,south_north=xidx,west_east=yidx)]#myds['UU'][dict(pressure=1,south_north=6,west_east=7)]# feature_name=each_var+'_'+str(interp_levels[each_level_index])+units df_empty.loc[idx][feature_name]=v.values for each_var in features_2D: v=myds[each_var][dict(south_north=xidx,west_east=yidx)] #myds['UU'][dict(pressure=1,south_north=6,west_east=7)]# feature_name=each_var df_empty.loc[idx][each_var]=v.values[0] if 'PBLH' in features_special: pblh_v=np.squeeze(pblh[dict(south_north=xidx,west_east=yidx)].values)#JUST A number df_empty.loc[idx]['PBLH']=pblh_v return df_empty 测试interp_levels = [0,0.002,0.004,0.008,0.01,0.015,0.02,0.25,0.50,0.75,1.50,3.00,5.00] ######################################################################### features_special=['PBLH'] features_2D=['RA','SW'] features_3D=['UU','VV','TA','PR','RH']#['UU'] features_km_list=[] for each in features_3D: for km in (interp_levels): features_km_list.append(each+'_'+str(km)+'Km') features_all=[] features_all.extend(features_km_list) features_all.extend(features_special) features_all.extend(features_2D) #print(features_all) stns_info=read_wrfout_stns_multilevels(flnm_wrfout, get_stn, features_3D, features_2D, features_special, interp_levels, features_all, interp_type='ght_agl') stns_info.head(3)
本文分享自微信公众号 - MeteoAI(meteoai),作者:😀小铁柱😀
原文出处及转载信息见文内详细说明,如有侵权,请联系 yunjia_community@tencent.com 删除。
原始发表时间:2019-06-07
本文参与腾讯云自媒体分享计划,欢迎正在阅读的你也加入,一起分享。
我来说两句