前往小程序,Get更优阅读体验!
立即前往
首页
学习
活动
专区
工具
TVP
发布
社区首页 >专栏 >python学习之pandas

python学习之pandas

作者头像
py3study
发布2020-01-15 17:27:10
9400
发布2020-01-15 17:27:10
举报
文章被收录于专栏:python3

#Pandas ''' 1,Pandas是Python的一个数据分析报包,该工具为解决数据分析任务而创建。 2,Pandas纳入大量库和标准数据模型,提供搞笑的操作数据集所需的工具 3.pandas提供大量能使我们快速便捷地处理数据的1函数方法 4,Pandas是字典形式,基于Numpy创建,让Numpy为中心的应用变得更加简单 ''' import pandas as pd import numpy as np #4 Pandas 数据结构 #4.1Series

s = pd.Series([1,2,3,np.nan,5,6])#索引在左边值在右边

print(s)

#4.2 Date Frame #DateFrame是表格型数据结构,包含一组有序的列,每列可以使不同的值类型。DateFrame有行索引和列索引,可以看成由Series组成的字典。

dates = pd.date_range('20180310',periods = 6)

df = pd.DataFrame(np.random.rand(6,4),index=dates,columns=['A','B','C','D'])

print(df)

print(df['B'])

#创建特定数据的DataFrame

df_1 = pd.DataFrame({

'A':1.,

'B':pd.date_range('20180923',periods=4),

'D':np.array([2]*4,dtype='int32'),

'E':pd.Categorical(['test','train','test','train']),

'F':'foo'

})

#

print(df_1)

print(df_1.dtypes)

print(df_1.index)#行的序号

print(df_1.columns)#列的序号

print(df_1.values)#把每个值进行打印

print(df_1.describe())#数字总结

print(df_1.T)#数字反转

print(df_1.sort_index(axis=1,ascending=False))#axis等于按第一列排序,如ABCDEFG,然后ascending倒序进行显示

print(df_1.sort_values(by='E'))#按值进行排列

#pandas选择数据

dates = pd.date_range('20180924',periods=6)

df = pd.DataFrame(np.random.rand(6,4),index=dates,columns=['A','B','C','D'])

print(df)

print(df[0:3],df['20180910':'20180926'])#第一次切片选择,第二次按照筛选条件选择

print(df.loc['20180924',['A','B']])#按照行标签进行选择

print(df.iloc[3,1])#输出第三行第一列的数据

print(df.iloc[3:5,0:2])#3,5行,0,3列

print(df.iloc[[1,2,4],[0,2]])#不连续筛选

print(df[df.A > 0])#筛选出df.A大于0的元素

#pandas设置数据

datas = pd.date_range('20180310',periods=6)

df = pd.DataFrame(np.arange(24).reshape(6,4),index=datas,columns=['A','B','C','D'])

print(df)

df.iloc[2,2] = 999

df.loc['2018-03-15','D'] = 999

print(df)

df[df.A > 0] = 999#A列大于0的为999???

print(df)

df['F'] = np.NAN

print(df)

df['E'] = pd.Series([1,2,3,4,5,6],index=pd.date_range('20180310',periods=6))#添加一列

print(df)

#7Pandas处理数据

dates = pd.date_range('20180310',periods=6)

df = pd.DataFrame(np.arange(24).reshape(6,4),index=dates,columns=['A','B','C','D'])

df.iloc[0,1]=np.nan

df.iloc[1,2]=np.nan

print(df)

print(df.dropna(axis=0,how='any'))#0对行进行操作 1对列进行操作 any:只要存在NaN即可drop掉 all:必须全部是NaN才可drop

print(df.fillna(value=0))#将NaN值替换为0

print(pd.isnull(df))#是nan为true不是nan为false

print(np.any(df.isnull()))#判断数据中是否存在nanz值

#8 pandas的导入导出

data = pd.read_csv('test1.csv')

data.to_pickle('test.pickle')#将资料存取成pickle文件

#9.pandas合并数据

df1 = pd.DataFrame(np.ones((3,4))*0,columns=['a','b','c','d'])

df2 = pd.DataFrame(np.ones((3,4))*1,columns=['a','b','c','d'])

df3 = pd.DataFrame(np.ones((3,4))*2,columns=['a','b','c','d'])

#

res = pd.concat([df1,df2,df3],axis=0,ignore_index=True)#0表示行合并,1表示列合并,ingnore_index重置序列index index变为1-8

print(res)

#join合并

df1 = pd.DataFrame(np.ones((3,4))*0,columns=['a','b','c','d'],index=[1,2,3])

df2 = pd.DataFrame(np.ones((3,4))*1,columns=['a','b','c','d'],index=[2,3,4])

print(df1)

print(df2)

res = pd.concat([df1,df2],axis=1,join='outer')#行往外合并

print(res)

res = pd.concat([df1,df2],axis=1,join_axes=[df1.index])#以df1的序列进行合并,df2中没有的序列NAN值填充

print(res)

#append添加

df1 = pd.DataFrame(np.ones((3,4))*0,columns=['a','b','c','d'])

df2 = pd.DataFrame(np.ones((3,4))*1,columns=['a','b','c','d'])

df3 = pd.DataFrame(np.ones((3,4))*2,columns=['a','b','c','d'])

s1 = pd.Series([1,2,3,4],index=['a','b','c','d'])

res = df1.append(df2,ignore_index=True)#将df2合并到df1下面并重置index

print(res)

res = df1.append(s1,ignore_index=True)#将s1合并到df1下面并重置index

print(res)

#pandas和并merge

#依据一组key合并

left = pd.DataFrame({

'key':['k1','k2','k3','k4'],

'A':['A1','A2','A3','A4'],

'B':['B1','B2','B3','B4']

})

#

print(left)

#

right = pd.DataFrame({

'key':['k1','k2','k3','k4'],

'C':['C1','C2','C3','C4'],

'D':['D1','D2','D3','D4']

})

#

print(right)

#

res = pd.merge(left,right,on = 'key')

print(res)

#依据两组key合并

left = pd.DataFrame({

'key':['k0','k0','k1','k2'],

'key2':['k0','k1','k0','k1'],

'A':['A1','A2','A3','A4'],

'B':['B1','B2','B3','B4']

})

#

right = pd.DataFrame({

'key':['k0','k1','k1','k2'],

'key2':['k0','k0','k0','k0'],

'C':['C1','C2','C3','C4'],

'D':['D1','D2','D3','D4']

})

print(left)

print(right)

res = pd.merge(left,right,on=['key','key2'],how='inner')#how = outer left right

print(res)

#indicator合并

df1 = pd.DataFrame({'col1':[0,1],'col_left':['a','b']}) df2 = pd.DataFrame({'col1':[1,2,2],'col_right':[2,2,2]}) print(df1) print(df2)

res = pd.merge(df1,df2,on='col1',how='outer',indicator=True)#依据col1进行合并 并启用indicator = True输出没想合并式

print(res)

res = pd.merge(df1,df2,on='col1',how='outer',indicator='indicator_column')#自定义indicator column名称 print(res)

本文参与 腾讯云自媒体同步曝光计划,分享自作者个人站点/博客。
原始发表:2019/06/21 ,如有侵权请联系 cloudcommunity@tencent.com 删除

本文分享自 作者个人站点/博客 前往查看

如有侵权,请联系 cloudcommunity@tencent.com 删除。

本文参与 腾讯云自媒体同步曝光计划  ,欢迎热爱写作的你一起参与!

评论
登录后参与评论
0 条评论
热度
最新
推荐阅读
目录
  • s = pd.Series([1,2,3,np.nan,5,6])#索引在左边值在右边
  • print(s)
  • dates = pd.date_range('20180310',periods = 6)
  • df = pd.DataFrame(np.random.rand(6,4),index=dates,columns=['A','B','C','D'])
  • print(df)
  • print(df['B'])
  • df_1 = pd.DataFrame({
  • 'A':1.,
  • 'B':pd.date_range('20180923',periods=4),
  • 'D':np.array([2]*4,dtype='int32'),
  • 'E':pd.Categorical(['test','train','test','train']),
  • 'F':'foo'
  • })
  • print(df_1)
  • print(df_1.dtypes)
  • print(df_1.index)#行的序号
  • print(df_1.columns)#列的序号
  • print(df_1.values)#把每个值进行打印
  • print(df_1.describe())#数字总结
  • print(df_1.T)#数字反转
  • print(df_1.sort_index(axis=1,ascending=False))#axis等于按第一列排序,如ABCDEFG,然后ascending倒序进行显示
  • print(df_1.sort_values(by='E'))#按值进行排列
  • dates = pd.date_range('20180924',periods=6)
  • df = pd.DataFrame(np.random.rand(6,4),index=dates,columns=['A','B','C','D'])
  • print(df)
  • print(df[0:3],df['20180910':'20180926'])#第一次切片选择,第二次按照筛选条件选择
  • print(df.loc['20180924',['A','B']])#按照行标签进行选择
  • print(df.iloc[3,1])#输出第三行第一列的数据
  • print(df.iloc[3:5,0:2])#3,5行,0,3列
  • print(df.iloc[[1,2,4],[0,2]])#不连续筛选
  • print(df[df.A > 0])#筛选出df.A大于0的元素
  • datas = pd.date_range('20180310',periods=6)
  • df = pd.DataFrame(np.arange(24).reshape(6,4),index=datas,columns=['A','B','C','D'])
  • print(df)
  • df.iloc[2,2] = 999
  • df.loc['2018-03-15','D'] = 999
  • print(df)
  • df[df.A > 0] = 999#A列大于0的为999???
  • print(df)
  • df['F'] = np.NAN
  • print(df)
  • df['E'] = pd.Series([1,2,3,4,5,6],index=pd.date_range('20180310',periods=6))#添加一列
  • print(df)
  • dates = pd.date_range('20180310',periods=6)
  • df = pd.DataFrame(np.arange(24).reshape(6,4),index=dates,columns=['A','B','C','D'])
  • df.iloc[0,1]=np.nan
  • df.iloc[1,2]=np.nan
  • print(df)
  • print(df.dropna(axis=0,how='any'))#0对行进行操作 1对列进行操作 any:只要存在NaN即可drop掉 all:必须全部是NaN才可drop
  • print(df.fillna(value=0))#将NaN值替换为0
  • print(pd.isnull(df))#是nan为true不是nan为false
  • print(np.any(df.isnull()))#判断数据中是否存在nanz值
  • data = pd.read_csv('test1.csv')
  • data.to_pickle('test.pickle')#将资料存取成pickle文件
  • df1 = pd.DataFrame(np.ones((3,4))*0,columns=['a','b','c','d'])
  • df2 = pd.DataFrame(np.ones((3,4))*1,columns=['a','b','c','d'])
  • df3 = pd.DataFrame(np.ones((3,4))*2,columns=['a','b','c','d'])
  • res = pd.concat([df1,df2,df3],axis=0,ignore_index=True)#0表示行合并,1表示列合并,ingnore_index重置序列index index变为1-8
  • print(res)
  • df1 = pd.DataFrame(np.ones((3,4))*0,columns=['a','b','c','d'],index=[1,2,3])
  • df2 = pd.DataFrame(np.ones((3,4))*1,columns=['a','b','c','d'],index=[2,3,4])
  • print(df1)
  • print(df2)
  • res = pd.concat([df1,df2],axis=1,join='outer')#行往外合并
  • print(res)
  • res = pd.concat([df1,df2],axis=1,join_axes=[df1.index])#以df1的序列进行合并,df2中没有的序列NAN值填充
  • print(res)
  • df1 = pd.DataFrame(np.ones((3,4))*0,columns=['a','b','c','d'])
  • df2 = pd.DataFrame(np.ones((3,4))*1,columns=['a','b','c','d'])
  • df3 = pd.DataFrame(np.ones((3,4))*2,columns=['a','b','c','d'])
  • s1 = pd.Series([1,2,3,4],index=['a','b','c','d'])
  • res = df1.append(df2,ignore_index=True)#将df2合并到df1下面并重置index
  • print(res)
  • res = df1.append(s1,ignore_index=True)#将s1合并到df1下面并重置index
  • print(res)
  • left = pd.DataFrame({
  • 'key':['k1','k2','k3','k4'],
  • 'A':['A1','A2','A3','A4'],
  • 'B':['B1','B2','B3','B4']
  • })
  • print(left)
  • right = pd.DataFrame({
  • 'key':['k1','k2','k3','k4'],
  • 'C':['C1','C2','C3','C4'],
  • 'D':['D1','D2','D3','D4']
  • })
  • print(right)
  • res = pd.merge(left,right,on = 'key')
  • print(res)
  • left = pd.DataFrame({
  • 'key':['k0','k0','k1','k2'],
  • 'key2':['k0','k1','k0','k1'],
  • 'A':['A1','A2','A3','A4'],
  • 'B':['B1','B2','B3','B4']
  • })
  • right = pd.DataFrame({
  • 'key':['k0','k1','k1','k2'],
  • 'key2':['k0','k0','k0','k0'],
  • 'C':['C1','C2','C3','C4'],
  • 'D':['D1','D2','D3','D4']
  • })
  • print(left)
  • print(right)
  • res = pd.merge(left,right,on=['key','key2'],how='inner')#how = outer left right
  • print(res)
  • res = pd.merge(df1,df2,on='col1',how='outer',indicator=True)#依据col1进行合并 并启用indicator = True输出没想合并式
  • print(res)
领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档