这是奔跑的键盘侠的第110篇文章
感觉写着写着就有点超纲了
本来是要做入门学习的分享,结果一发不可收拾,事情有点搞大了
但是既然硬着头皮开动了,总要善终吧?
其实,挺多事情,都被我半途而废、有始无终,以至于现在很多方面只是一知半解。
索性,感觉头大的小伙伴,可以留着以后慢慢消化。
好了,继续接下来的表演。
上期的目录,再补充一下:
├── README
├── MyQuant_v1 #量化分析程序目录
├── __init__.py
├── data #数据处理目录
│ ├── __init__.py
│ ├── basic_crawler.py# 爬取股票基础信息存入MongoDB数据库.
│ └── data_crawler.py #爬取指数、股票数据
├──util # 公用程序
│ ├── __init__.py
│ ├── stock_util.py#获取股票交易日前,所有股票代码
│ └── database.py #链接数据库
├── backtest #回测
│ ├── __init__.py
│ └── _backtest_ #计划写一下回测走势图
├── factor #因子
│ ├── __init__.py
│ └──_ factor_.py #不准备开发
├── strategy #策略
│ ├── __init__.py
│ └── _strategy_ #计划简单写个,主要用于回测
├── trading #交易
│ ├── __init__.py
│ └── _trading_ #不准备开发
└── log #日志目录
├── __init__.py
├── backtest.log #不准备开发
└── transactions.log#不准备开发
打开pycharm,再随便运行一下上次的代码,继续爬取一些数据,结果一运行就报错
提示mongodb连接不上……
赶快登陆终端看了一眼:
Last login: Sun Jul 14 08:52:32 on ttys000
(base) wangchaodeMacBook-Air:~ Ed_Frey$ mongo
MongoDB shell version v4.0.10
connecting to: mongodb://127.0.0.1:27017/?gssapiServiceName=mongodb
2019-07-14T08:54:41.743+0800 E QUERY [js] Error: couldn't connect to server 127.0.0.1:27017, connection attempt failed: SocketException: Error connecting to 127.0.0.1:27017 :: caused by :: Connection refused :
connect@src/mongo/shell/mongo.js:344:17
@(connect):2:6
exception: connect failed
百度搜索了半天,找到对症的帖子,大概是说上次没有正确的退出mongodb,要重新使用Run MongoDB里面的命令(上一篇安装mongodb里面有贴过),才能正常登入。
至于如何正常退出,要使用exit命令退出,而不能直接点关闭窗口,这……
1
basic_crawler.py
#!/usr/bin/env python3.6
# -*- coding: utf-8 -*-
# @Time : 2019-07-13 18:12
# @Author : Ed Frey
# @File : basic_crawler.py.py
# @Software: PyCharm
# -*- coding: utf-8 -*-
from pymongo import UpdateOne
from util.database import DB_CONN
from util.stock_util import get_trading_dates
import tushare as ts
from datetime import datetime, timedelta
"""
to get basic data from tushare and save it to MongoDB.
"""
class BasicCrawler:
def __init__(self):
self.db = DB_CONN['basic']
def crawl_basic(self, begin_date=None, end_date=None):
'''
to get the basic infomation of stocks between begin_date and end_date.
:param begin_date:
:param end_date:
:return:
'''
if begin_date is None:
begin_date = (datetime.now() - timedelta(days=1)).strftime('%Y-%m-%d')
if end_date is None:
end_date = (datetime.now() - timedelta(days=1)).strftime('%Y-%m-%d')
all_dates = get_trading_dates(begin_date, end_date)
for date in all_dates:
try:
self.crawl_basic_at_date(date)
except:
print(r"ERROR to crawl stocks' information,date:%s" % date, flush=True)
def crawl_basic_at_date(self, date):
'''
to get the basic infomation of stocks on "date"
:param date:
:return:
'''
# if the date is None, get the last trading date
df_basics = ts.get_stock_basics(date)
# if the infomation is None,return.
if df_basics is None:
return
update_requests = []
codes = set(df_basics.index)
for code in codes:
doc = dict(df_basics.loc[code])
try:
# format"20180101'turned to format"2018-01-01'
time_to_market = datetime\
.strptime(str(doc['timeToMarket']), '%Y%m%d')\
.strftime('%Y-%m-%d')
#解决流通股本和总股本单位不一致的情况,有些单位是股,目前a股股本规模的最大的工商银行,是3564亿股,最小的德方纳米4274万股
totals = float(doc['totals'])
if totals > 4000:
totals *= 1E4
else:
totals *= 1E8
outstanding = float(doc['outstanding'])
if outstanding > 4000:
outstanding *= 1E4
else:
outstanding *= 1E8
# 保存时增加date字段,因为每天都会有一条数据
doc.update({
'code': code,
'date': date,
'timeToMarket': time_to_market,
'outstanding': outstanding,
'totals': totals
})
update_requests.append(
UpdateOne(
{'code': code, 'date': date},
{'$set': doc}, upsert=True))
except:
print('发生异常,股票代码:%s,日期:%s' % (code, date), flush=True)
print(doc, flush=True)
if len(update_requests) > 0:
update_result = self.db.bulk_write(update_requests, ordered=False)
print('抓取股票基本信息,日期:%s, 插入:%4d条,更新:%4d条' %
(date, update_result.upserted_count, update_result.modified_count), flush=True)
2
stock_util.py
#!/usr/bin/env python3.6
# -*- coding: utf-8 -*-
# @Time : 2019-07-13 18:19
# @Author : Ed Frey
# @File : stock_util.py
# @Software: PyCharm
from pymongo import ASCENDING
from util.database import DB_CONN
from datetime import datetime, timedelta
def get_trading_dates(begin_date=None, end_date=None):
"""
to get the list of trading dates.
if the begin_date is none, then get the date one year ago'
:param begin_date: beginning date
:param end_date: ending date
:return: a trading dates' list
"""
now = datetime.now()
if begin_date is None:
one_year_ago = now - timedelta(days=365)
begin_date = one_year_ago.strftime('%Y-%m-%d')
if end_date is None:
end_date = now.strftime('%Y-%m-%d')
daily_cursor = DB_CONN.basic.find(
{'code': '000001', 'date': {'$gte': begin_date, '$lte': end_date}, 'index': True},
sort=[('date', ASCENDING)],
projection={'date': True, '_id': False})
dates = [x['date'] for x in daily_cursor]
return dates
def get_all_codes(date=None):
"""
to get the list of stocks.
if there's no date, then get the last day's.
if the last day is not a trading day(can't get any code), then get the next last day's, and then on.
:param date: date
:return: a list of stocks' codes
"""
datetime_obj = datetime.now()
if date is None:
date = datetime_obj.strftime('%Y-%m-%d')
codes = []
code_cursor = DB_CONN.basic.find(
{'date': date},
projection={'code': True, '_id': False})
codes = [x['code'] for x in code_cursor]
while len(codes) == 0:
code_cursor = DB_CONN.basic.find(
{'date': date},
projection={'code': True, '_id': False})
codes = [x['code'] for x in code_cursor]
datetime_obj = datetime_obj - timedelta(days=1)
date = datetime_obj.strftime('%Y-%m-%d')
return codes
反正吧,我看着也有点头大了