版权声明:本文为博主原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。
本文链接:https://blog.csdn.net/weixin_44580977/article/details/101997962
'''
# 本节 讲解 pandas_datareader和tushare包 common 和pro 获取股票数据
import pandas_datareader.data as web
import datetime
#获取上证指数的2017.1.1日至今的交易数据
df_stockload = web.DataReader("000001.SS", "yahoo", datetime.datetime(2017,1,1), datetime.date.today())
print(df_stockload.head()) # 查看前几行
"""
High Low Open Close Volume Adj Close
Date
2017-01-03 3136.5 3105.3 3105.3 3135.9 141600 3135.9
2017-01-04 3160.1 3130.1 3133.8 3158.8 167900 3158.8
2017-01-05 3168.5 3154.3 3157.9 3165.4 174700 3165.4
2017-01-06 3172.0 3153.0 3163.8 3154.3 183700 3154.3
2017-01-09 3173.1 3147.7 3148.5 3171.2 171700 3171.2
"""
print(df_stockload.tail()) # 查看末尾几行
"""
High Low Open Close Volume Adj Close
Date
2019-03-04 3090.8 3006.9 3015.9 3027.6 525600 3027.6
2019-03-05 3055.0 3009.4 3019.9 3054.2 424100 3054.2
2019-03-06 3103.8 3050.1 3060.4 3102.1 555000 3102.1
2019-03-07 3129.9 3075.0 3103.7 3106.4 583800 3106.4
2019-03-08 3075.0 2969.6 3038.3 2969.9 577900 2969.9
"""
print (df_stockload.columns)#查看列索引信息
"""
Index(['High', 'Low', 'Open', 'Close', 'Volume', 'Adj Close'], dtype='object')
"""
print (df_stockload.index)#查看行索引信息
"""
DatetimeIndex(['2017-01-03', '2017-01-04', '2017-01-05', '2017-01-06',
'2017-01-09', '2017-01-10', '2017-01-11', '2017-01-12',
'2017-01-13', '2017-01-16',
...
'2019-02-25', '2019-02-26', '2019-02-27', '2019-02-28',
'2019-03-01', '2019-03-04', '2019-03-05', '2019-03-06',
'2019-03-07', '2019-03-08'],
dtype='datetime64[ns]', name='Date', length=530, freq=None)
"""
print(df_stockload.shape)#查看形状
"""
(530, 6)
"""
# 接下来查看各列交易数据描述性的统计信息,如最小值、最大值、均值、标准差等
print (df_stockload.describe())#查看各列数据描述性统计
"""
High Low Open Close Volume Adj Close
count 530.0 530.0 530.0 530.0 530.0 530.0
mean 3079.9 3044.5 3061.5 3064.8 172601.3 3064.8
std 281.0 286.0 283.5 284.1 62788.0 284.1
min 2488.5 2440.9 2446.0 2464.4 88200.0 2464.4
25% 2802.3 2754.8 2776.8 2786.1 134200.0 2786.1
50% 3164.6 3134.3 3148.2 3153.9 159200.0 3153.9
75% 3290.1 3261.5 3274.5 3275.9 193600.0 3275.9
max 3587.0 3534.2 3563.6 3559.5 583800.0 3559.5
"""
# 接下来查看交易数据概览信息,如每列数据的类型、个数、是否存在缺失等等
print(df_stockload.info())#查看缺失及每列数据类型
"""
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 530 entries, 2017-01-03 to 2019-03-08
Data columns (total 6 columns):
High 530 non-null float64
Low 530 non-null float64
Open 530 non-null float64
Close 530 non-null float64
Volume 530 non-null int64
Adj Close 530 non-null float64
dtypes: float64(5), int64(1)
memory usage: 29.0 KB
None
"""
import matplotlib.pyplot as plt
#绘制收盘价
df_stockload.Close.plot(c='b')
plt.legend(['Close','30ave','60ave'],loc='best')
plt.show()
'''
'''
# get_hist_data api讲解
import datetime
import pandas as pd
import tushare as ts
df_sh=ts.get_hist_data('sh',start='2017-01-01',end=datetime.datetime.now().strftime('%Y-%m-%d'))
print(df_sh.info())#查看交易数据概览信息
"""
<class 'pandas.core.frame.DataFrame'>
Index: 530 entries, 2019-03-08 to 2017-01-03
Data columns (total 13 columns):
open 530 non-null float64
high 530 non-null float64
close 530 non-null float64
low 530 non-null float64
volume 530 non-null float64
price_change 530 non-null float64
p_change 530 non-null float64
ma5 530 non-null float64
ma10 530 non-null float64
ma20 530 non-null float64
v_ma5 530 non-null float64
v_ma10 530 non-null float64
v_ma20 530 non-null float64
dtypes: float64(13)
memory usage: 58.0+ KB
None
"""
print(df_sh.axes)# 查看行和列的轴标签
"""
[Index(['2019-03-08', '2019-03-07', '2019-03-06', '2019-03-05', '2019-03-04',
'2019-03-01', '2019-02-28', '2019-02-27', '2019-02-26', '2019-02-25',
...
'2017-01-16', '2017-01-13', '2017-01-12', '2017-01-11', '2017-01-10',
'2017-01-09', '2017-01-06', '2017-01-05', '2017-01-04', '2017-01-03'],
dtype='object', name='date', length=530), Index(['open', 'high', 'close', 'low', 'volume', 'price_change', 'p_change',
'ma5', 'ma10', 'ma20', 'v_ma5', 'v_ma10', 'v_ma20'],
dtype='object')]
"""
# 将行字符类型索引转换成 datetime 类型
df_sh.index = pd.to_datetime(df_sh.index)
df_sh.sort_index(inplace=True)
print(df_sh.axes)# 查看行和列的轴标签
"""
[DatetimeIndex(['2017-01-03', '2017-01-04', '2017-01-05', '2017-01-06',
'2017-01-09', '2017-01-10', '2017-01-11', '2017-01-12',
'2017-01-13', '2017-01-16',
...
'2019-02-25', '2019-02-26', '2019-02-27', '2019-02-28',
'2019-03-01', '2019-03-04', '2019-03-05', '2019-03-06',
'2019-03-07', '2019-03-08'],
dtype='datetime64[ns]', name='date', length=530, freq=None), Index(['open', 'high', 'close', 'low', 'volume', 'price_change', 'p_change',
'ma5', 'ma10', 'ma20', 'v_ma5', 'v_ma10', 'v_ma20'],
dtype='object')]
"""
# 以上可知get_hist_data()接口的优点是除返回基本的开盘价(open)、最高价(high)、收盘价(close)等数据外,还包括涨跌幅、均价等指标数据,但是它的缺点是不能获取股票自上市以来的所有日线数据。
# 于是另一个get_k_data接口登场了,它获取数据的速度更快些,
# 可以返回每一只股票从上市开始到当前交易日的所有日线数据
'''
'''
# get_k_data api 讲解
import datetime
import pandas as pd
import tushare as ts
# get_hist_data 未返回2008年数据
df_sh = ts.get_hist_data('sh', start='2008-01-01', end=datetime.datetime.now().strftime('%Y-%m-%d'))
print(df_sh.head())
"""
open high close ... v_ma5 v_ma10 v_ma20
date ...
2019-03-08 3038.3 3075.1 2969.9 ... 5.3e+06 4.9e+06 3.7e+06
2019-03-07 3103.7 3129.9 3106.4 ... 4.9e+06 4.7e+06 3.4e+06
2019-03-06 3060.4 3103.8 3102.1 ... 4.4e+06 4.4e+06 3.2e+06
2019-03-05 3019.9 3055.0 3054.2 ... 4.2e+06 4.1e+06 3.0e+06
2019-03-04 3015.9 3090.8 3027.6 ... 4.5e+06 3.9e+06 2.9e+06
[5 rows x 13 columns]
"""
print(df_sh.tail())
"""
open high close ... v_ma5 v_ma10 v_ma20
date ...
2016-09-13 3025.0 3029.7 3023.5 ... 1.7e+06 1.7e+06 1.7e+06
2016-09-12 3037.5 3040.9 3022.0 ... 1.7e+06 1.7e+06 1.7e+06
2016-09-09 3095.4 3101.8 3078.8 ... 1.6e+06 1.6e+06 1.6e+06
2016-09-08 3089.9 3096.8 3095.9 ... 1.7e+06 1.7e+06 1.7e+06
2016-09-07 3091.3 3105.7 3091.9 ... 1.9e+06 1.9e+06 1.9e+06
[5 rows x 13 columns]
"""
# get_k_data 返回2008年数据
df_sh = ts.get_k_data('sh', start='2008-01-01', end=datetime.datetime.now().strftime('%Y-%m-%d'))
print(df_sh.head())
"""
date open close ... low volume code
0 2008-01-02 5265.0 5272.8 ... 5201.9 7.7e+07 sh
1 2008-01-03 5269.8 5319.9 ... 5211.1 9.9e+07 sh
2 2008-01-04 5328.4 5361.6 ... 5318.5 8.1e+07 sh
3 2008-01-07 5357.4 5393.3 ... 5332.6 9.2e+07 sh
4 2008-01-08 5414.6 5386.5 ... 5344.6 1.0e+08 sh
[5 rows x 7 columns]
"""
print(df_sh.tail())
"""
date open close ... low volume code
2715 2019-03-04 3015.9 3027.6 ... 3006.9 5.3e+08 sh
2716 2019-03-05 3019.9 3054.2 ... 3009.4 4.2e+08 sh
2717 2019-03-06 3060.4 3102.1 ... 3050.1 5.6e+08 sh
2718 2019-03-07 3103.7 3106.4 ... 3075.0 5.8e+08 sh
2719 2019-03-08 3038.3 2969.9 ... 2969.6 5.8e+08 sh
[5 rows x 7 columns]
"""
"""
差异化分析发现get_k_data()接口返回的交易数据,
索引为序号而非交易日期,
因此我们需要进行简单的处理使它与get_hist_data()接口返回的交易数据在格式上兼容,
此处使用to_datetime()方法将date列交易日期替换为行索引,
然后使用drop()方法将date列数据删除,
以避免交易日期重复显示,如下所示:
"""
# to_datetime 使得某列数据变成行索引
df_sh.index = pd.to_datetime(df_sh.date)
# 删除掉冗余的date列
df_sh.drop(axis=1, columns='date', inplace=True)
print(df_sh.head())
"""
open close high low volume code
date
2008-01-02 5265.0 5272.8 5295.0 5201.9 7.7e+07 sh
2008-01-03 5269.8 5319.9 5321.5 5211.1 9.9e+07 sh
2008-01-04 5328.4 5361.6 5372.5 5318.5 8.1e+07 sh
2008-01-07 5357.4 5393.3 5403.4 5332.6 9.2e+07 sh
2008-01-08 5414.6 5386.5 5480.1 5344.6 1.0e+08 sh
"""
import matplotlib.pyplot as plt
df_sh.close.plot(c='b')
plt.legend(['close'], loc='best')
plt.show()
'''
'''
# tushare pro 获取数据
# 注册账号要设置 token
import pandas as pd
import tushare as ts
"""
关于pro版本,使用前需要登陆官网注册账号以获取token,
注册地址:tushare.pro/register?re… 。
不过部分接口设置了权限,需要达到一定的积分才能使用,
比如获取指数每日行情index_daily()接口,用户需要累积200积分才有权限可以调取。
daily()接口用于获取常用的股票行情数据,该接口无权限要求,
下面以daily()接口为例展示下tushare pro获取数据的方法,如下所示:
"""
#设置token
token='your token'
pro = ts.pro_api(token)#初始化pro接口
#获取平安银行日行情数据
pa=pro.daily(ts_code='000001.SZ', start_date='20180101',
end_date='20190101')
print(pa.head())
"""
ts_code trade_date open ... pct_chg vol amount
0 000001.SZ 20181228 9.3 ... 1.1 576604.0 541571.0
1 000001.SZ 20181227 9.4 ... -0.2 624593.3 586343.8
2 000001.SZ 20181226 9.3 ... -0.4 421140.6 393215.1
3 000001.SZ 20181225 9.3 ... -0.8 586615.4 545235.6
4 000001.SZ 20181224 9.4 ... -0.3 509117.7 477186.9
"""
""""
对比可知,daily()接口的主要输入参数与旧版get_k_data()和get_hist_data()接口大体相同,
包括股票代码、开始日期、结束日期。daily()接口返回的数据格式与get_k_data()接口大体相同,
我们可以使用上文处理get_k_data()接口的方法来调整daily()接口返回的数据格式。
对于将序号形式的索引转换为交易日期形式的索引,
此处介绍另一种方法。DatetimeIndex()也可以将字符类型转化成datetime64类型,
等同于to_datetime()的效果。set_index()将指定的列作为索引,
并且可以配置drop参数可删除该列,以避免重复。如下所示:
""""
pa.trade_date = pd.DatetimeIndex(pa.trade_date)
pa.set_index("trade_date", drop=True, inplace=True)
print(pa.axes)
"""
[DatetimeIndex(['2018-12-28', '2018-12-27', '2018-12-26', '2018-12-25',
'2018-12-24', '2018-12-21', '2018-12-20', '2018-12-19',
'2018-12-18', '2018-12-17',
...
'2018-01-15', '2018-01-12', '2018-01-11', '2018-01-10',
'2018-01-09', '2018-01-08', '2018-01-05', '2018-01-04',
'2018-01-03', '2018-01-02'],
dtype='datetime64[ns]', name='trade_date', length=243, freq=None), Index(['ts_code', 'open', 'high', 'low', 'close', 'pre_close', 'change',
'pct_chg', 'vol', 'amount'],
dtype='object')]
"""
print(pa.head())
"""
ts_code open high ... pct_chg vol amount
trade_date ...
2018-12-28 000001.SZ 9.3 9.5 ... 1.1 576604.0 541571.0
2018-12-27 000001.SZ 9.4 9.5 ... -0.2 624593.3 586343.8
2018-12-26 000001.SZ 9.3 9.4 ... -0.4 421140.6 393215.1
2018-12-25 000001.SZ 9.3 9.4 ... -0.8 586615.4 545235.6
2018-12-24 000001.SZ 9.4 9.4 ... -0.3 509117.7 477186.9
[5 rows x 10 columns]
"""
'''