python 爬虫示例–基金查询demo

  • 2019 年 11 月 5 日
  • 筆記

这两天试着学了一下爬虫,刚学会了爬取静态网页,就趁热现学现卖,做了一个基金查询的demo。

基金数据来自网易财经基金页面,其URL格式为:

"http://quotes.money.163.com/fund/jzzs_{code}_{page}.html?start={start}&end={end}&sort=TDATE&order=desc".format(              code=code,page="0",start=start,end=end)  如 "http://quotes.money.163.com/fund/jzzs_001630_0.html?start=2009-02-22&end=2019-10-29&sort=TDATE&order=desc" 

其中code为基金代码,例如"001630";start和end为起始日期和截止日期,格式为 "yyyy-MM-dd"

爬取的基金的净值数据用PyQT的表格控件展示。再将数据用matplotlib绘图,嵌入UI界面。

通过基金代码查询到的新的基金的名称和代码信息会存入文件,以供下次打开程序时程序下拉框自动加载。

代码如下:

import sys  from PyQt5.QtWidgets import *  from PyQt5.QtGui import QColor, QFont, QIcon,QPixmap,QRegExpValidator  from PyQt5.QtCore import Qt, QSize,QDate,QRegExp  import pickle  import requests  import re  from bs4 import BeautifulSoup  from matplotlib import pyplot as plt  from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg,NavigationToolbar2QT  from matplotlib.figure import Figure  #import numpy as np    class Canvas(FigureCanvasQTAgg):      def __init__(self, parent=None, width=5, height=4, dpi=100):          fig = Figure(figsize=(width, height), dpi=dpi) #创建画布,设置宽高,每英寸像素点数          fig.set_tight_layout(True)          self.axes = fig.add_subplot(111)#          self.axes.tick_params(axis='x',rotation =90,direction="in")#日期旋转90度显示          FigureCanvasQTAgg.__init__(self, fig)#调用基类的初始化函数          self.setParent(parent)          #self.update_figure(1,1)          FigureCanvasQTAgg.updateGeometry(self)        def update_figure(self, x ,y,title):          #x = [4,3,2,1]          #y=[1,2,3,5]          x.reverse()          y.reverse()          self.axes.cla()#清除已绘的图形          self.axes.set_title(title,fontsize=18)          self.axes.plot(x,y)          self.axes.scatter(x,y,  marker ='o')          self.axes.set_ylabel("基金净值[元]")          self.axes.grid(lw=0.5,ls="--",alpha=0.5)          self.draw()#重新绘制    class MainWindow(QMainWindow):      def __init__(self, parent = None):          super().__init__(parent)          self.funds = pickle.load(open("info.obj","rb")) # 基金代码和名称信息存在字典中保存到文件了,pickle加载          self.setWindowTitle("A股基金查询工具【数据来源于网易财经,python爬虫demo】")          self.create_table()          self.create_canvas()          self.setup_centralWidget()          #self.setWindowIcon(QIcon(":ICON/ICON/retest.png"))          self.createActions()          self.setup_toolBar()          self.setup_menuBar()          self.statusBar().showMessage("ready")          self.code = None            #self.resize(800,500)      def create_table(self):          self.table = QTableWidget()          self.table.setEditTriggers(QAbstractItemView.NoEditTriggers)          HorizontalHeaderLabels = ["公布日期", "单位净值","累计净值","增长率"]          columns = len(HorizontalHeaderLabels)          self.table.setColumnCount(columns)          self.rows=100          self.table.setRowCount(self.rows)#          self.headerWidth = (100,80,80,80)            self.table.setSortingEnabled (True)          self.table.horizontalHeader().setStyleSheet("QHeaderView::section{background-color:rgb(180,180,250);}")          for i in range(columns-1):              self.table.setColumnWidth (i,self.headerWidth[i])            self.table.setHorizontalHeaderLabels(HorizontalHeaderLabels)        def update_table(self):          self.table.clearContents()#清除内容          rows = len(self.rate)          if rows> self.rows:              self.table.setRowCount(rows)          for i in range(len(self.rate)):              item = QTableWidgetItem(self.date[i])              item.setTextAlignment(Qt.AlignHCenter |Qt.AlignVCenter)              self.table.setItem(i, 0, item)              item = QTableWidgetItem(str(self.net[i]))              item.setTextAlignment(Qt.AlignHCenter |Qt.AlignVCenter)              self.table.setItem(i, 1, item)              item = QTableWidgetItem(str(self.acc_net[i]))              item.setTextAlignment(Qt.AlignHCenter |Qt.AlignVCenter)              self.table.setItem(i, 2, item)              rate = self.rate[i]              item = QTableWidgetItem(rate)#rate用的是文本              item.setTextAlignment(Qt.AlignHCenter |Qt.AlignVCenter)              if rate[0] == "-":                  item.setForeground(QColor("green"))              else:                  item.setForeground(QColor("red"))              self.table.setItem(i, 3, item)        def create_canvas(self):          self.canvas = Canvas(self)        def setup_centralWidget(self):          #设置主窗口中心部件          self.tabWidget = QTabWidget()          self.tabWidget.addTab(self.table,"Table ")            vlayout = QVBoxLayout()          Navigation_toolbar = NavigationToolbar2QT(self.canvas, self)          vlayout.addWidget(self.canvas)          vlayout.addWidget(Navigation_toolbar)          plotWidget = QWidget()          plotWidget.setLayout(vlayout)            self.tabWidget.addTab(plotWidget,"Plot")          self.tabWidget.setCurrentIndex(1)          self.setCentralWidget(self.tabWidget)#指定主窗口中心部件        def createActions(self):          #self.newAction = QAction("New record", self)          #self.newAction.setIcon(QIcon(":new.png"))          #self.newAction.triggered.connect(self.newRecord)          #self.newAction.setStatusTip("###")          self.exitAction = QAction("E&xit",self)          self.exitAction.triggered.connect(self.close)          self.queryAction = QAction("查询",self)          self.queryAction.triggered.connect(self.query)            self.helpAboutAction = QAction("About",self)          self.helpAboutAction.setShortcut("Ctrl+H")          self.helpAboutAction.triggered.connect(self.showAboutDlg)        def setup_menuBar(self):          fileMenu = self.menuBar().addMenu("&File")          fileMenu.addAction(self.exitAction)            helpMenu = self.menuBar().addMenu("&Help")          helpMenu.addAction(self.helpAboutAction)        def showAboutDlg(self):          QMessageBox.about(self,u"title",                            u"Version:  0.1n"                            u"author:  wsp")        def name_selected(self):          self.name = self.comboName.currentText()          self.code = self.name.split(" ")[0]          print(self.name,self.code)        def closeEvent(self, event):          reply = QMessageBox.question(self, '提示',"是否要退出程序?",                                                 QMessageBox.Yes | QMessageBox.No,QMessageBox.No)          if reply == QMessageBox.Yes:              pickle.dump(self.funds, open("info.obj","wb")) # 基金代码和名称信息存在字典中保存到文件              event.accept()          else:              event.ignore()        @staticmethod      def download(url,user_agent='wswp',num_retries=2,proxies=None):          print("Downloading: ", url)          headers = {'User-Agent' : user_agent}          resp = requests.get(url, headers=headers, proxies=proxies)          html = None          try:              resp = requests.get(url, headers=headers, proxies=proxies)              #print("status: ",resp.status_code)              html = resp.text              if resp.status_code >= 400:                  print("Download error: ", html)                  html = None                  if num_retries>0 and 500 < resp.status_code <600:                      #递归调用,遇到5xx错误,最多重试 2 次                      return download(url, user_agent, num_retries-1, proxies)          except requests.exceptions.RequestException as e:              print('Download error: ' ,e.reason)              html = None          finally:              return html        def query(self):          if self.code is None:              QMessageBox.critical(self, "错误", "基金代码为空或格式错误!")              self.codeInput.setFocus()              return            code = self.code          start = self.start.text()          end = self.end.text()          #url0 ="http://quotes.money.163.com/fund/jzzs_001630_0.html?start=2009-02-22&end=2019-10-29&sort=TDATE&order=desc"          #url0="http://quotes.money.163.com/fund/jzzs_001630_0.html?start=2019-10-29&end=2019-10-29&sort=TDATE&order=desc"          #url0="http://quotes.money.163.com/fund/jzzs_001630_0.html?start=2019-07-01&end=2019-10-29&sort=TDATE&order=desc"          url0 = "http://quotes.money.163.com/fund/jzzs_{code}_{page}.html?start={start}&end={end}&sort=TDATE&order=desc".format(              code=code,page="0",start=start,end=end)          #print(url0)          html = self.download(url0)          if html is None:              QMessageBox.critical(self, "错误", "爬不到有效信息,请检查基金代码是否有误!")              return          #print(html[:100])          soup = BeautifulSoup(html, 'html.parser')          html = soup.prettify() #修正可能存在的Html错误          #提取基金名称          fundInfo =soup.find(name="title")          #print(fundInfo.text)          self.name = fundInfo.text.split("_")[0]          #提取总的页数          matched =soup.find(name="div", attrs = {"class": "mod_pages"})          a_founds = matched.find_all(name="a")          if len(a_founds) ==0:              pages =1          else:              pages = int(matched.find_all(name="a")[-2].text)          print("pages:", pages)          self.date, self.net, self.acc_net, self.rate = [], [], [],[]          i = 0          for matched in soup.find_all("td"): #提取              text = matched.text              if i %4 == 0:                      self.date.append(text)# datetime string              elif i%4 ==1:                      self.net.append(float(text)) # 单位净值              elif i%4 ==2:                      self.acc_net.append(float(text)) #累计净值              else:                  self.rate.append(text)              i += 1            if pages>1:              for page in range(1,pages):                  url = "http://quotes.money.163.com/fund/jzzs_{code}_{page}.html?start={start}&end={end}&sort=TDATE&order=desc".format(code=code,page=str(page),start=start,end=end)                  html = self.download(url)                  #print(html[:100])                  soup = BeautifulSoup(html, 'html.parser')                  html = soup.prettify() #修正可能存在的Html错误                  i = 0                  for matched in soup.find_all("td"): #提取                      text = matched.text                      if i %4 == 0:                              self.date.append(text)# datetime string                      elif i%4 ==1:                              self.net.append(float(text)) # 单位净值                      elif i%4 ==2:                              self.acc_net.append(float(text)) #累计净值                      else:                          self.rate.append(text)                      i += 1          self.update_table()          self.canvas.update_figure(x=self.date ,y =self.net,title="%s (%s) 净值走势"%(self.name,self.code))          itemText = self.code+" "+self.name          if self.code not in self.funds:              self.funds[self.code] = self.name              self.comboName.addItem(itemText)          self.comboName.setCurrentText(itemText)        def codeInputFinished(self):          self.code = self.codeInput.text()        def setup_toolBar(self):          label0 = QLabel("选择基金:")          self.comboName = QComboBox()          fundItems = list(self.funds.items())          fundItems.sort()          for i, fund in enumerate(fundItems):              self.comboName.addItem(fund[0]+" "+fund[1])          self.comboName.currentIndexChanged[int].connect(self.name_selected)          self.comboName.setStatusTip("选择基金")          label_ = QLabel("   基金代码:")          self.codeInput = QLineEdit()          regExp = QRegExp("^d{6}$")          validator = QRegExpValidator(regExp)          self.codeInput.setValidator(validator)          self.codeInput.setFixedWidth(50)          self.codeInput.editingFinished.connect(self.codeInputFinished)            label1 = QLabel("   起始日期")          self.start= QDateEdit()          self.start.setCalendarPopup(True)          self.start.setDisplayFormat("yyyy-MM-dd")          label2 = QLabel("   截止日期")          self.end= QDateEdit()          self.end.setCalendarPopup(True)          self.end.setDisplayFormat("yyyy-MM-dd")          today = QDate.currentDate()#当前时间          self.start.setMaximumDate(today) #不超过今天          self.start.setDate(today.addMonths (-3)) #3月前此时          self.end.setDate(today)          self.end.setMaximumDate(today)            toolbar0 = self.addToolBar("选择")#添加工具条          toolbar0.addWidget(label0)          toolbar0.addWidget(self.comboName)          toolbar0.addWidget(label_)          toolbar0.addWidget(self.codeInput)          toolbar0.addWidget(label1)          toolbar0.addWidget(self.start)          toolbar0.addWidget(label2)          toolbar0.addWidget(self.end)          toolbar0.addSeparator()          #toolbar0.addAction(self.queryAction)          self.queryButton = QPushButton("查询")          self.queryButton.clicked.connect(self.query)          toolbar0.addWidget(self.queryButton)          #help(toolbar0)          toolbar0.addSeparator()      if __name__ == '__main__':      app = QApplication(sys.argv)      mw = MainWindow()      mw.show()      sys.exit(app.exec_())