建表语句, 以其中部分字段为例:
CREATE TABLE `stockmarket` (
`date` varchar(12) NOT NULL DEFAULT '' COMMENT '时间',
`stockCode` varchar(100) NOT NULL DEFAULT '' COMMENT '股票代码',
`stockName` varchar(100) DEFAULT NULL COMMENT '股票名字',
`close` decimal(19,2) DEFAULT NULL COMMENT '闭市价',
`high` decimal(19,2) DEFAULT NULL COMMENT '最高',
`low` decimal(19,2) DEFAULT NULL COMMENT '最低',
`amplitudeRatio` decimal(19,2) DEFAULT NULL COMMENT '振幅',
`turnoverRatio` decimal(19,2) DEFAULT NULL COMMENT '换手率',
`preClose` decimal(19,2) DEFAULT NULL COMMENT '昨收',
`open` decimal(19,2) DEFAULT NULL COMMENT '开盘价',
PRIMARY KEY (`date`,`stockCode`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
配置json数据到.json文件, 用于读取配置信息,进行数据库连接
"stockMarket":{
"host":"localhost",
"port":3326,
"user":"root",
"password":"password",
"database":"stockMarket",
"charset":"utf8"
}
涉及到的python库
import re,pymysql,json,time,requests
代码编写
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Author : Torre Yang Edit with Python3.6
# @Email : klyweiwei@163.com
# @Time : 2018/6/28 10:50
# 定时 爬取每日股票行情数据;
# 股票数据内容:
import getSoup
import pymysql
import os
import re
import json
import requestsimport connect_dataBase
import time
# db连接
connectDB = connect_dataBase.ConnectDatabase()
get_conf = connectDB.get_conf('databases_conf.json')
conn, cur = connectDB.connect_db(get_conf["stockMarket"]["host"], get_conf["stockMarket"]["user"],
get_conf["stockMarket"]["password"], get_conf["stockMarket"]["database"], get_conf["stockMarket"]["port"])
# 第一步, 通过东方财富网 获取 上海/深圳 所有股票的 股票代码, 存储到list中
url = 'http://quote.eastmoney.com/stocklist.html#'
soup = getSoup.getSoup(url)
uls = soup.select('div#quotesearch li')
# 正则表达式获取所有的股票代码
re1 = re.compile(r'href="http://quote.eastmoney.com/(.+?).html"')
stockCodes = re1.findall(str(uls))
# print(stockCodes)
# 第二步, 将股票代码加入到 股票搜索 的网址中
stockValues = []
for stockCode in stockCodes:
# url = 'https://gupiao.baidu.com/stock/'+stockCode+'.html'
url = 'https://gupiao.baidu.com/api/rails/stockbasicbatch?from=pc&os_ver=1&cuid=xxx&vv=100&format=json&stock_code='+stockCode+''
# print(url)
# url = 'https://gupiao.baidu.com/api/rails/stockbasicbatch?from=pc&os_ver=1&cuid=xxx&vv=100&format=json&stock_code=sh201003'
response = requests.get(url)
response.raise_for_status()
res = response.content
try:
JsonDatas = json.loads(res, encoding='utf-8')
except:
print('解析为空')
datas = JsonDatas['data']
)
for data in datas:
# 添加当天日期(交易日)
date = time.strftime("%Y-%m-%d", time.localtime())
stockCode = data['stockCode']
stockName = data['stockName']
close = data['close']
high = data['high']
low = data['low']
amplitudeRatio = data['amplitudeRatio']
turnoverRatio = data['turnoverRatio']
preClose = data['preClose']
open = data['open']
sql = 'insert into stockmarket(date,stockCode,stockName,close,high,low,amplitudeRatio,turnoverRatio,preClose,open)values("'+str(date)+'","'+str(stockCode)+'","'+str(stockName)+'","'+str(close)+'","'+str(high)+'","'+str(low)+'","'+str(amplitudeRatio)+'","'+str(turnoverRatio)+'","'+str(preClose)+'","'+str(open)+'")'
print(sql)
if 'None' in sql:
print('jump this data')
else:
try:
connectDB.get_fetch(conn, cur, sql)
except:
print('数据异常, 跳过')
print('采集数据完毕')
远程ssh配置,配置定时任务(tip:建议晚上进行采集(或闭市时间),因为交易时间,股票的数据在动态变化)
Jenkins> 系统配置>ssh remote hosts (我是装的虚拟机,centos7版本,已经配置好了JDK,python3,mysql,tomcat等常用软件服务)
源码地址:https://github.com/Testworm/stockMarket.git