xarray+eofs：对EOF的简单实现

用户11172986

发布于 2024-06-20 17:08:50

14100

代码可运行

文章被收录于专栏：气python风雨

运行总次数：0

代码可运行

昨日有读者说想看看EOF和小波分析，近期也在搞xarray的推文，就拿xarray的数据直接做了。

小波分析的话其他博主已经写过pycwt库的实现了，就直接转载了(偷懒)

本想写几种eof库比较的，在pyeof和xeof安装都失败后还是简单写一下eofs

导入库

In [54]:

from eofs.standard import Eof
import xarray as xr
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


def get_time_space(df, time_dim, lumped_space_dims):
    """
    Get a DataFrame with the dim: [row (e.g.,time), column (e.g, space)]

    Parameters
    ----------

    df : pandas.core.frame.DataFrame 
        | time | lat | lon | PM2.5 |
        | 2011 | 66  | 88  | 22    |
        | 2011 | 66  | 99  | 23    |
        | 2012 | 66  | 88  | 24    |
        | 2012 | 66  | 99  | 25    |
        
 
    time_dim:
        time dim name (str), e.g., "time"
    
    lumped_space_dims: 
        a list of the lumped space, e.g., ["lat","lon"]

    Returns
    -------
    dataframe:  
        with [row (e.g.,time), column (e.g, space)], e.g.,
        | time | loc1    | loc2    |
        |      | (66,88) | (66,99) |
        | 2011 | 22      | 24      |
        | 2012 | 23      | 25      |
    
    """
    return df.set_index([time_dim]+lumped_space_dims).unstack(lumped_space_dims)

数据预处理

In [80]:

import xarray as xr

# 数据
f = '/home/mw/input/cru3546/cru_ts4.07.2021.2022.pre.dat.nc'

# 打开数据集
ds = xr.open_dataset(f)
ds = ds.sel(lat=slice(20, 60), lon=slice(80, 130))
ds

Out[80]:

In [81]:

df = ds.to_dataframe().reset_index() # get df from da
df=df.drop(columns=['stn'])
df

Out[81]:

	lon	lat	time	pre
0	80.25	20.25	2021-01-16	1.800000
1	80.25	20.25	2021-02-15	3.400000
2	80.25	20.25	2021-03-16	31.100000
3	80.25	20.25	2021-04-16	14.000000
4	80.25	20.25	2021-05-16	22.800001
...	...	...	...	...
191995	129.75	59.75	2022-08-16	86.500000
191996	129.75	59.75	2022-09-16	61.400002
191997	129.75	59.75	2022-10-16	34.799999
191998	129.75	59.75	2022-11-16	26.900000
191999	129.75	59.75	2022-12-16	17.700001

192000 rows × 4 columns

In [82]:

df_data = get_time_space(df, time_dim = "time", lumped_space_dims = ["lat","lon"])
display(df_data.head(5))
print("DataFrame Shape:",df_data.shape)

	pre
lat	20.25	20.75	21.25	21.75	22.25	22.75	23.25	23.75	24.25	24.75	...	55.25	55.75	56.25	56.75	57.25	57.75	58.25	58.75	59.25	59.75
lon	80.25	80.25	80.25	80.25	80.25	80.25	80.25	80.25	80.25	80.25	...	129.75	129.75	129.75	129.75	129.75	129.75	129.75	129.75	129.75	129.75
time
2021-01-16	1.800000	1.800000	2.400000	3.700000	5.400000	6.3	5.100000	4.700000	5.300000	5.900000	...	8.900001	9.100000	10.000000	10.000000	10.200000	10.600000	11.300000	11.600000	10.700000	9.800000
2021-02-15	3.400000	3.500000	3.700000	4.700000	6.700000	7.8	5.700000	4.200000	4.400000	4.600000	...	14.200000	15.900001	16.100000	15.500000	14.500000	13.800000	13.500000	13.700000	14.000000	13.700000
2021-03-16	31.100000	35.100002	41.600002	53.400002	75.900002	78.0	69.700005	62.100002	63.700001	60.100002	...	27.900000	31.100000	28.700001	26.700001	22.900000	20.400000	18.300001	16.400000	16.200001	15.400001
2021-04-16	14.000000	9.900001	7.100000	5.600000	6.600000	6.6	4.600000	3.400000	3.800000	4.000000	...	59.000000	62.400002	57.299999	56.299999	50.600002	45.400002	39.400002	27.600000	25.600000	25.300001
2021-05-16	22.800001	20.300001	18.400000	22.800001	27.600000	25.4	19.900000	13.600000	9.500000	6.600000	...	117.900002	126.099998	109.200005	92.800003	72.200005	58.000000	48.100002	34.799999	28.700001	23.300001

5 rows × 8000 columns

DataFrame Shape: (24, 8000)

简单绘图函数

In [86]:

import matplotlib.pyplot as plt

def visualization(da, pcs, eofs_da, evf):
    n = len(pcs[0])  # 获取模态数
    fig, axs = plt.subplots(n+1, 2, figsize=(20, 6*(n+1)))

    da.mean(dim=["lat","lon"]).plot(ax=axs[0, 0])
    axs[0, 0].set_title("average pre")
    da.mean(dim="time").plot(ax=axs[0, 1])
    axs[0, 1].set_title("average pre")

    for i in range(1, n+1):
        pc_i = xr.DataArray(pcs[:,i-1],dims=["time"])
        pc_i = pc_i.assign_coords(time=ds.time)
        eof_i = eofs_da.sel(mode=i-1)
        frac = str((evf[i-1] * 100).round(2))

        axs[i, 0].plot(pc_i)
        axs[i, 0].set_title("PC"+str(i)+" ("+frac+"%)")

        img = eof_i.plot(x='lon',y='lat',ax=axs[i, 1], vmin=-0.75, vmax=0.75, cmap="RdBu_r", add_colorbar=False)
        axs[i, 1].set_title("EOF"+str(i)+" ("+frac+"%)")
        fig.colorbar(img, ax=axs[i, 1])

    plt.tight_layout()
    plt.show()

eof计算与简单绘图

In [87]:

from eofs.standard import Eof
from sklearn.preprocessing import StandardScaler
n=4
# 使用StandardScaler对数据进行标准化处理
scaler = StandardScaler()
df_data_scaled = scaler.fit_transform(df_data.values)

# 创建EOFs对象
solver = Eof(df_data_scaled)

# 计算主分量分数（PCs）
s_pcs = solver.pcs(npcs=4, pcscaling=2)

# 计算模态函数（EOFs）
s_eofs =solver.eofs(neofs=4, eofscaling=2)

s_eofs = s_eofs.reshape(4,100,80)

# 创建xarray数组
lon = ds.lon
lat = ds.lat
mode = np.arange(0,n)
s_eofs_ds = xr.DataArray(s_eofs,coords=[mode,lon, lat], dims=["mode", "lon", "lat"])
# 计算解释方差比例
s_evfs = solver.varianceFraction(neigs=4)


visualization(ds['pre'], s_pcs, s_eofs_ds, s_evfs)

/opt/conda/lib/python3.7/site-packages/sklearn/utils/extmath.py:770: RuntimeWarning: invalid value encountered in true_divide
  updated_mean = (last_sum + new_sum) / updated_sample_count
/opt/conda/lib/python3.7/site-packages/sklearn/utils/extmath.py:709: RuntimeWarning: Degrees of freedom <= 0 for slice.
  result = op(x, *args, **kwargs, dtype=np.float64)

带权重eof

In [91]:

coslat = np.array(np.sqrt(np.cos(np.deg2rad(ds.lat)))).reshape((1,80))


# 创建EOFs对象
solver1 = Eof(df_data_scaled.reshape(24,100,80),weights = coslat)

# 计算主分量分数（PCs）
s_pcs1 = solver1.pcs(npcs=4, pcscaling=2)

# 计算模态函数（EOFs）
s_eofs1 =solver.eofs(neofs=4, eofscaling=2)

s_eofs1 = s_eofs.reshape(4,100,80)

# 创建xarray数组
lon = ds.lon
lat = ds.lat
mode = np.arange(0,n)
s_eofs_ds1 = xr.DataArray(s_eofs1,coords=[mode,lon, lat], dims=["mode", "lon", "lat"])
# 计算解释方差比例
s_evfs1 = solver.varianceFraction(neigs=4)


visualization(ds['pre'], s_pcs1, s_eofs_ds1, s_evfs1)