前往小程序,Get更优阅读体验!
立即前往
首页
学习
活动
专区
工具
TVP
发布
社区首页 >专栏 >python 爬虫示例--基金查询demo

python 爬虫示例--基金查询demo

作者头像
用户6021899
发布2019-11-05 00:56:52
1.3K0
发布2019-11-05 00:56:52
举报
文章被收录于专栏:Python编程 pyqt matplotlib

这两天试着学了一下爬虫,刚学会了爬取静态网页,就趁热现学现卖,做了一个基金查询的demo。

基金数据来自网易财经基金页面,其URL格式为:

代码语言:javascript
复制
"http://quotes.money.163.com/fund/jzzs_{code}_{page}.html?start={start}&end={end}&sort=TDATE&order=desc".format(
            code=code,page="0",start=start,end=end)
如 "http://quotes.money.163.com/fund/jzzs_001630_0.html?start=2009-02-22&end=2019-10-29&sort=TDATE&order=desc" 

其中code为基金代码,例如"001630";start和end为起始日期和截止日期,格式为 "yyyy-MM-dd"

爬取的基金的净值数据用PyQT的表格控件展示。再将数据用matplotlib绘图,嵌入UI界面。

通过基金代码查询到的新的基金的名称和代码信息会存入文件,以供下次打开程序时程序下拉框自动加载。

代码如下:

代码语言:javascript
复制
import sys
from PyQt5.QtWidgets import *
from PyQt5.QtGui import QColor, QFont, QIcon,QPixmap,QRegExpValidator
from PyQt5.QtCore import Qt, QSize,QDate,QRegExp
import pickle
import requests
import re
from bs4 import BeautifulSoup
from matplotlib import pyplot as plt
from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg,NavigationToolbar2QT
from matplotlib.figure import Figure
#import numpy as np

class Canvas(FigureCanvasQTAgg):
    def __init__(self, parent=None, width=5, height=4, dpi=100):
        fig = Figure(figsize=(width, height), dpi=dpi) #创建画布,设置宽高,每英寸像素点数
        fig.set_tight_layout(True)
        self.axes = fig.add_subplot(111)#
        self.axes.tick_params(axis='x',rotation =90,direction="in")#日期旋转90度显示
        FigureCanvasQTAgg.__init__(self, fig)#调用基类的初始化函数
        self.setParent(parent)
        #self.update_figure(1,1)
        FigureCanvasQTAgg.updateGeometry(self)
    
    def update_figure(self, x ,y,title):
        #x = [4,3,2,1]
        #y=[1,2,3,5]
        x.reverse()
        y.reverse()
        self.axes.cla()#清除已绘的图形
        self.axes.set_title(title,fontsize=18)
        self.axes.plot(x,y)
        self.axes.scatter(x,y,  marker ='o')
        self.axes.set_ylabel("基金净值[元]")
        self.axes.grid(lw=0.5,ls="--",alpha=0.5)
        self.draw()#重新绘制
 
class MainWindow(QMainWindow):
    def __init__(self, parent = None):
        super().__init__(parent)
        self.funds = pickle.load(open("info.obj","rb")) # 基金代码和名称信息存在字典中保存到文件了,pickle加载
        self.setWindowTitle("A股基金查询工具【数据来源于网易财经,python爬虫demo】")
        self.create_table()
        self.create_canvas()
        self.setup_centralWidget()
        #self.setWindowIcon(QIcon(":ICON/ICON/retest.png"))
        self.createActions()
        self.setup_toolBar()
        self.setup_menuBar()
        self.statusBar().showMessage("ready")
        self.code = None
       
        #self.resize(800,500)
    def create_table(self):
        self.table = QTableWidget()
        self.table.setEditTriggers(QAbstractItemView.NoEditTriggers)
        HorizontalHeaderLabels = ["公布日期", "单位净值","累计净值","增长率"]
        columns = len(HorizontalHeaderLabels)
        self.table.setColumnCount(columns)
        self.rows=100
        self.table.setRowCount(self.rows)#
        self.headerWidth = (100,80,80,80)
       
        self.table.setSortingEnabled (True)
        self.table.horizontalHeader().setStyleSheet("QHeaderView::section{background-color:rgb(180,180,250);}")
        for i in range(columns-1):
            self.table.setColumnWidth (i,self.headerWidth[i])
       
        self.table.setHorizontalHeaderLabels(HorizontalHeaderLabels)
    
    def update_table(self):
        self.table.clearContents()#清除内容
        rows = len(self.rate)
        if rows> self.rows:
            self.table.setRowCount(rows)
        for i in range(len(self.rate)):
            item = QTableWidgetItem(self.date[i])
            item.setTextAlignment(Qt.AlignHCenter |Qt.AlignVCenter)
            self.table.setItem(i, 0, item)
            item = QTableWidgetItem(str(self.net[i]))
            item.setTextAlignment(Qt.AlignHCenter |Qt.AlignVCenter)
            self.table.setItem(i, 1, item)
            item = QTableWidgetItem(str(self.acc_net[i]))
            item.setTextAlignment(Qt.AlignHCenter |Qt.AlignVCenter)
            self.table.setItem(i, 2, item)
            rate = self.rate[i]
            item = QTableWidgetItem(rate)#rate用的是文本
            item.setTextAlignment(Qt.AlignHCenter |Qt.AlignVCenter)
            if rate[0] == "-":
                item.setForeground(QColor("green"))
            else:
                item.setForeground(QColor("red"))
            self.table.setItem(i, 3, item)
            
    def create_canvas(self):
        self.canvas = Canvas(self)
       
    def setup_centralWidget(self):
        #设置主窗口中心部件
        self.tabWidget = QTabWidget()
        self.tabWidget.addTab(self.table,"Table ")
       
        vlayout = QVBoxLayout()
        Navigation_toolbar = NavigationToolbar2QT(self.canvas, self)
        vlayout.addWidget(self.canvas)
        vlayout.addWidget(Navigation_toolbar)
        plotWidget = QWidget()
        plotWidget.setLayout(vlayout)
       
        self.tabWidget.addTab(plotWidget,"Plot")
        self.tabWidget.setCurrentIndex(1)
        self.setCentralWidget(self.tabWidget)#指定主窗口中心部件 
    
    def createActions(self):
        #self.newAction = QAction("New record", self)
        #self.newAction.setIcon(QIcon(":new.png"))
        #self.newAction.triggered.connect(self.newRecord)
        #self.newAction.setStatusTip("###")
        self.exitAction = QAction("E&xit",self)
        self.exitAction.triggered.connect(self.close)
        self.queryAction = QAction("查询",self)
        self.queryAction.triggered.connect(self.query)
       
        self.helpAboutAction = QAction("About",self)
        self.helpAboutAction.setShortcut("Ctrl+H")
        self.helpAboutAction.triggered.connect(self.showAboutDlg)  
    
    def setup_menuBar(self):
        fileMenu = self.menuBar().addMenu("&File")
        fileMenu.addAction(self.exitAction)
       
        helpMenu = self.menuBar().addMenu("&Help")
        helpMenu.addAction(self.helpAboutAction)
    
    def showAboutDlg(self):
        QMessageBox.about(self,u"title",
                          u"Version:  0.1\n"
                          u"author:  wsp")
       
    def name_selected(self):
        self.name = self.comboName.currentText()
        self.code = self.name.split(" ")[0]
        print(self.name,self.code)
    
    def closeEvent(self, event):
        reply = QMessageBox.question(self, '提示',"是否要退出程序?",
                                               QMessageBox.Yes | QMessageBox.No,QMessageBox.No)
        if reply == QMessageBox.Yes:
            pickle.dump(self.funds, open("info.obj","wb")) # 基金代码和名称信息存在字典中保存到文件
            event.accept()
        else:
            event.ignore()

    @staticmethod
    def download(url,user_agent='wswp',num_retries=2,proxies=None):
        print("Downloading: ", url)
        headers = {'User-Agent' : user_agent}
        resp = requests.get(url, headers=headers, proxies=proxies)
        html = None
        try:
            resp = requests.get(url, headers=headers, proxies=proxies)
            #print("status: ",resp.status_code)
            html = resp.text
            if resp.status_code >= 400:
                print("Download error: ", html)
                html = None
                if num_retries>0 and 500 < resp.status_code <600:
                    #递归调用,遇到5xx错误,最多重试 2 次
                    return download(url, user_agent, num_retries-1, proxies)
        except requests.exceptions.RequestException as e:
            print('Download error: ' ,e.reason)
            html = None
        finally:
            return html
   
    def query(self):
        if self.code is None:
            QMessageBox.critical(self, "错误", "基金代码为空或格式错误!")
            self.codeInput.setFocus()
            return
           
        code = self.code
        start = self.start.text()
        end = self.end.text()
        #url0 ="http://quotes.money.163.com/fund/jzzs_001630_0.html?start=2009-02-22&end=2019-10-29&sort=TDATE&order=desc"
        #url0="http://quotes.money.163.com/fund/jzzs_001630_0.html?start=2019-10-29&end=2019-10-29&sort=TDATE&order=desc"
        #url0="http://quotes.money.163.com/fund/jzzs_001630_0.html?start=2019-07-01&end=2019-10-29&sort=TDATE&order=desc"
        url0 = "http://quotes.money.163.com/fund/jzzs_{code}_{page}.html?start={start}&end={end}&sort=TDATE&order=desc".format(
            code=code,page="0",start=start,end=end)
        #print(url0)
        html = self.download(url0)
        if html is None:
            QMessageBox.critical(self, "错误", "爬不到有效信息,请检查基金代码是否有误!")
            return
        #print(html[:100])
        soup = BeautifulSoup(html, 'html.parser')
        html = soup.prettify() #修正可能存在的Html错误
        #提取基金名称
        fundInfo =soup.find(name="title")
        #print(fundInfo.text)
        self.name = fundInfo.text.split("_")[0]
        #提取总的页数
        matched =soup.find(name="div", attrs = {"class": "mod_pages"})
        a_founds = matched.find_all(name="a")
        if len(a_founds) ==0:
            pages =1
        else:
            pages = int(matched.find_all(name="a")[-2].text)
        print("pages:", pages)
        self.date, self.net, self.acc_net, self.rate = [], [], [],[]
        i = 0
        for matched in soup.find_all("td"): #提取
            text = matched.text
            if i %4 == 0:
                    self.date.append(text)# datetime string
            elif i%4 ==1:
                    self.net.append(float(text)) # 单位净值
            elif i%4 ==2:
                    self.acc_net.append(float(text)) #累计净值
            else:
                self.rate.append(text)
            i += 1

        if pages>1:
            for page in range(1,pages):
                url = "http://quotes.money.163.com/fund/jzzs_{code}_{page}.html?start={start}&end={end}&sort=TDATE&order=desc".format(code=code,page=str(page),start=start,end=end)
                html = self.download(url)
                #print(html[:100])
                soup = BeautifulSoup(html, 'html.parser')
                html = soup.prettify() #修正可能存在的Html错误
                i = 0
                for matched in soup.find_all("td"): #提取
                    text = matched.text
                    if i %4 == 0:
                            self.date.append(text)# datetime string
                    elif i%4 ==1:
                            self.net.append(float(text)) # 单位净值
                    elif i%4 ==2:
                            self.acc_net.append(float(text)) #累计净值
                    else:
                        self.rate.append(text)
                    i += 1
        self.update_table()
        self.canvas.update_figure(x=self.date ,y =self.net,title="%s (%s) 净值走势"%(self.name,self.code))
        itemText = self.code+" "+self.name
        if self.code not in self.funds:
            self.funds[self.code] = self.name
            self.comboName.addItem(itemText)
        self.comboName.setCurrentText(itemText)
       
    def codeInputFinished(self):
        self.code = self.codeInput.text()
    
    def setup_toolBar(self):      
        label0 = QLabel("选择基金:")
        self.comboName = QComboBox()
        fundItems = list(self.funds.items())
        fundItems.sort()
        for i, fund in enumerate(fundItems):
            self.comboName.addItem(fund[0]+" "+fund[1])
        self.comboName.currentIndexChanged[int].connect(self.name_selected)
        self.comboName.setStatusTip("选择基金")
        label_ = QLabel("   基金代码:")
        self.codeInput = QLineEdit()
        regExp = QRegExp("^\d{6}$")
        validator = QRegExpValidator(regExp)
        self.codeInput.setValidator(validator)
        self.codeInput.setFixedWidth(50)
        self.codeInput.editingFinished.connect(self.codeInputFinished)
       
        label1 = QLabel("   起始日期")
        self.start= QDateEdit()
        self.start.setCalendarPopup(True)
        self.start.setDisplayFormat("yyyy-MM-dd")
        label2 = QLabel("   截止日期")
        self.end= QDateEdit()
        self.end.setCalendarPopup(True)
        self.end.setDisplayFormat("yyyy-MM-dd")
        today = QDate.currentDate()#当前时间
        self.start.setMaximumDate(today) #不超过今天
        self.start.setDate(today.addMonths (-3)) #3月前此时
        self.end.setDate(today)
        self.end.setMaximumDate(today)
       
        toolbar0 = self.addToolBar("选择")#添加工具条      
        toolbar0.addWidget(label0)
        toolbar0.addWidget(self.comboName)
        toolbar0.addWidget(label_)
        toolbar0.addWidget(self.codeInput)
        toolbar0.addWidget(label1)
        toolbar0.addWidget(self.start)
        toolbar0.addWidget(label2)
        toolbar0.addWidget(self.end)
        toolbar0.addSeparator()
        #toolbar0.addAction(self.queryAction)
        self.queryButton = QPushButton("查询")
        self.queryButton.clicked.connect(self.query)
        toolbar0.addWidget(self.queryButton)
        #help(toolbar0)
        toolbar0.addSeparator()
    
       
if __name__ == '__main__':
    app = QApplication(sys.argv)
    mw = MainWindow()
    mw.show()
    sys.exit(app.exec_())
本文参与 腾讯云自媒体同步曝光计划,分享自微信公众号。
原始发表:2019-10-30,如有侵权请联系 cloudcommunity@tencent.com 删除

本文分享自 Python可视化编程机器学习OpenCV 微信公众号,前往查看

如有侵权,请联系 cloudcommunity@tencent.com 删除。

本文参与 腾讯云自媒体同步曝光计划  ,欢迎热爱写作的你一起参与!

评论
登录后参与评论
0 条评论
热度
最新
推荐阅读
领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档