import pandas as pd import datetime import numpy as np
pd.set_option('display.width',1000)
url = 'https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/06_Stats/Wind_Stats/wind.data' data = pd.read_table(url,sep='\s+',parse_dates=[[0,1,2]]) print(data.head())
def fix_century(x): year = x.year - 100 if x.year > 1989 else x.year return datetime.date(year, x.month, x.day)
data['Yr_Mo_Dy'] = data['Yr_Mo_Dy'].apply(fix_century) print(data) data.Yr_Mo_Dy = pd.to_datetime(data.Yr_Mo_Dy) data = data.set_index('Yr_Mo_Dy') print(data.head(5))
对应每一个location 一共有多少个缺失值
print(data.isnull().sum())
对应每一个location ,一共由多少完整的数据值
shape[0]获取行数,shape[1]获取列数
print(data.shape[1]-data.isnull().sum())
对于全体数据,计算风速的平均值
print(data.mean().mean())
创建一个名为loc_stats的数据框去计算并存储每一个location的最小值、最大值、平均值、标准差。
loc_satas = pd.DataFrame() loc_satas['min'] = data.min() loc_satas['max'] = data.max() loc_satas['mean'] = data.mean() loc_satas['std'] = data.std() print(loc_satas)
创建一个名为day_stats 的数据框去计算并存储所有locataion的风速最小值、最大值、平均值和标准差 day_stats = pd.DataFrame()
day_stats['min'] = data.min(axis =1) day_stats['max'] = data.max(axis =1) day_stats['mean'] = data.mean(axis =1) day_stats['std'] = data.std(axis =1) print(day_stats)
对于每一个location,计算一月份的平均风速
`data['date'] = data.index
data['month'] = data['date'].apply(lambda date: date.month) data['year'] = data['date'].apply(lambda date: date.year) data['day'] = data['date'].apply(lambda date: date.day)
january_winds = data.query('month == 1')
print(january_winds.loc[:,'RPT':'MAL'].mean())`
对于数据记录安年频率取样
print(data.query('month == 1 and day == 1'))
按月为频率取样
print(data.query('day >= 1 and day <= 5'))
本文由 所长 创作,采用 知识共享署名4.0 国际许可协议进行许可 本站文章除注明转载/出处外,均为本站原创或翻译,转载前请务必署名 最后编辑时间为: Apr 23, 2018 at 11:25 am