python 爬虫示例–基金查询demo
- 2019 年 11 月 5 日
- 筆記
这两天试着学了一下爬虫,刚学会了爬取静态网页,就趁热现学现卖,做了一个基金查询的demo。

基金数据来自网易财经基金页面,其URL格式为:
"http://quotes.money.163.com/fund/jzzs_{code}_{page}.html?start={start}&end={end}&sort=TDATE&order=desc".format( code=code,page="0",start=start,end=end) 如 "http://quotes.money.163.com/fund/jzzs_001630_0.html?start=2009-02-22&end=2019-10-29&sort=TDATE&order=desc"
其中code为基金代码,例如"001630";start和end为起始日期和截止日期,格式为 "yyyy-MM-dd"
爬取的基金的净值数据用PyQT的表格控件展示。再将数据用matplotlib绘图,嵌入UI界面。

通过基金代码查询到的新的基金的名称和代码信息会存入文件,以供下次打开程序时程序下拉框自动加载。
代码如下:
import sys from PyQt5.QtWidgets import * from PyQt5.QtGui import QColor, QFont, QIcon,QPixmap,QRegExpValidator from PyQt5.QtCore import Qt, QSize,QDate,QRegExp import pickle import requests import re from bs4 import BeautifulSoup from matplotlib import pyplot as plt from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg,NavigationToolbar2QT from matplotlib.figure import Figure #import numpy as np class Canvas(FigureCanvasQTAgg): def __init__(self, parent=None, width=5, height=4, dpi=100): fig = Figure(figsize=(width, height), dpi=dpi) #创建画布,设置宽高,每英寸像素点数 fig.set_tight_layout(True) self.axes = fig.add_subplot(111)# self.axes.tick_params(axis='x',rotation =90,direction="in")#日期旋转90度显示 FigureCanvasQTAgg.__init__(self, fig)#调用基类的初始化函数 self.setParent(parent) #self.update_figure(1,1) FigureCanvasQTAgg.updateGeometry(self) def update_figure(self, x ,y,title): #x = [4,3,2,1] #y=[1,2,3,5] x.reverse() y.reverse() self.axes.cla()#清除已绘的图形 self.axes.set_title(title,fontsize=18) self.axes.plot(x,y) self.axes.scatter(x,y, marker ='o') self.axes.set_ylabel("基金净值[元]") self.axes.grid(lw=0.5,ls="--",alpha=0.5) self.draw()#重新绘制 class MainWindow(QMainWindow): def __init__(self, parent = None): super().__init__(parent) self.funds = pickle.load(open("info.obj","rb")) # 基金代码和名称信息存在字典中保存到文件了,pickle加载 self.setWindowTitle("A股基金查询工具【数据来源于网易财经,python爬虫demo】") self.create_table() self.create_canvas() self.setup_centralWidget() #self.setWindowIcon(QIcon(":ICON/ICON/retest.png")) self.createActions() self.setup_toolBar() self.setup_menuBar() self.statusBar().showMessage("ready") self.code = None #self.resize(800,500) def create_table(self): self.table = QTableWidget() self.table.setEditTriggers(QAbstractItemView.NoEditTriggers) HorizontalHeaderLabels = ["公布日期", "单位净值","累计净值","增长率"] columns = len(HorizontalHeaderLabels) self.table.setColumnCount(columns) self.rows=100 self.table.setRowCount(self.rows)# self.headerWidth = (100,80,80,80) self.table.setSortingEnabled (True) self.table.horizontalHeader().setStyleSheet("QHeaderView::section{background-color:rgb(180,180,250);}") for i in range(columns-1): self.table.setColumnWidth (i,self.headerWidth[i]) self.table.setHorizontalHeaderLabels(HorizontalHeaderLabels) def update_table(self): self.table.clearContents()#清除内容 rows = len(self.rate) if rows> self.rows: self.table.setRowCount(rows) for i in range(len(self.rate)): item = QTableWidgetItem(self.date[i]) item.setTextAlignment(Qt.AlignHCenter |Qt.AlignVCenter) self.table.setItem(i, 0, item) item = QTableWidgetItem(str(self.net[i])) item.setTextAlignment(Qt.AlignHCenter |Qt.AlignVCenter) self.table.setItem(i, 1, item) item = QTableWidgetItem(str(self.acc_net[i])) item.setTextAlignment(Qt.AlignHCenter |Qt.AlignVCenter) self.table.setItem(i, 2, item) rate = self.rate[i] item = QTableWidgetItem(rate)#rate用的是文本 item.setTextAlignment(Qt.AlignHCenter |Qt.AlignVCenter) if rate[0] == "-": item.setForeground(QColor("green")) else: item.setForeground(QColor("red")) self.table.setItem(i, 3, item) def create_canvas(self): self.canvas = Canvas(self) def setup_centralWidget(self): #设置主窗口中心部件 self.tabWidget = QTabWidget() self.tabWidget.addTab(self.table,"Table ") vlayout = QVBoxLayout() Navigation_toolbar = NavigationToolbar2QT(self.canvas, self) vlayout.addWidget(self.canvas) vlayout.addWidget(Navigation_toolbar) plotWidget = QWidget() plotWidget.setLayout(vlayout) self.tabWidget.addTab(plotWidget,"Plot") self.tabWidget.setCurrentIndex(1) self.setCentralWidget(self.tabWidget)#指定主窗口中心部件 def createActions(self): #self.newAction = QAction("New record", self) #self.newAction.setIcon(QIcon(":new.png")) #self.newAction.triggered.connect(self.newRecord) #self.newAction.setStatusTip("###") self.exitAction = QAction("E&xit",self) self.exitAction.triggered.connect(self.close) self.queryAction = QAction("查询",self) self.queryAction.triggered.connect(self.query) self.helpAboutAction = QAction("About",self) self.helpAboutAction.setShortcut("Ctrl+H") self.helpAboutAction.triggered.connect(self.showAboutDlg) def setup_menuBar(self): fileMenu = self.menuBar().addMenu("&File") fileMenu.addAction(self.exitAction) helpMenu = self.menuBar().addMenu("&Help") helpMenu.addAction(self.helpAboutAction) def showAboutDlg(self): QMessageBox.about(self,u"title", u"Version: 0.1n" u"author: wsp") def name_selected(self): self.name = self.comboName.currentText() self.code = self.name.split(" ")[0] print(self.name,self.code) def closeEvent(self, event): reply = QMessageBox.question(self, '提示',"是否要退出程序?", QMessageBox.Yes | QMessageBox.No,QMessageBox.No) if reply == QMessageBox.Yes: pickle.dump(self.funds, open("info.obj","wb")) # 基金代码和名称信息存在字典中保存到文件 event.accept() else: event.ignore() @staticmethod def download(url,user_agent='wswp',num_retries=2,proxies=None): print("Downloading: ", url) headers = {'User-Agent' : user_agent} resp = requests.get(url, headers=headers, proxies=proxies) html = None try: resp = requests.get(url, headers=headers, proxies=proxies) #print("status: ",resp.status_code) html = resp.text if resp.status_code >= 400: print("Download error: ", html) html = None if num_retries>0 and 500 < resp.status_code <600: #递归调用,遇到5xx错误,最多重试 2 次 return download(url, user_agent, num_retries-1, proxies) except requests.exceptions.RequestException as e: print('Download error: ' ,e.reason) html = None finally: return html def query(self): if self.code is None: QMessageBox.critical(self, "错误", "基金代码为空或格式错误!") self.codeInput.setFocus() return code = self.code start = self.start.text() end = self.end.text() #url0 ="http://quotes.money.163.com/fund/jzzs_001630_0.html?start=2009-02-22&end=2019-10-29&sort=TDATE&order=desc" #url0="http://quotes.money.163.com/fund/jzzs_001630_0.html?start=2019-10-29&end=2019-10-29&sort=TDATE&order=desc" #url0="http://quotes.money.163.com/fund/jzzs_001630_0.html?start=2019-07-01&end=2019-10-29&sort=TDATE&order=desc" url0 = "http://quotes.money.163.com/fund/jzzs_{code}_{page}.html?start={start}&end={end}&sort=TDATE&order=desc".format( code=code,page="0",start=start,end=end) #print(url0) html = self.download(url0) if html is None: QMessageBox.critical(self, "错误", "爬不到有效信息,请检查基金代码是否有误!") return #print(html[:100]) soup = BeautifulSoup(html, 'html.parser') html = soup.prettify() #修正可能存在的Html错误 #提取基金名称 fundInfo =soup.find(name="title") #print(fundInfo.text) self.name = fundInfo.text.split("_")[0] #提取总的页数 matched =soup.find(name="div", attrs = {"class": "mod_pages"}) a_founds = matched.find_all(name="a") if len(a_founds) ==0: pages =1 else: pages = int(matched.find_all(name="a")[-2].text) print("pages:", pages) self.date, self.net, self.acc_net, self.rate = [], [], [],[] i = 0 for matched in soup.find_all("td"): #提取 text = matched.text if i %4 == 0: self.date.append(text)# datetime string elif i%4 ==1: self.net.append(float(text)) # 单位净值 elif i%4 ==2: self.acc_net.append(float(text)) #累计净值 else: self.rate.append(text) i += 1 if pages>1: for page in range(1,pages): url = "http://quotes.money.163.com/fund/jzzs_{code}_{page}.html?start={start}&end={end}&sort=TDATE&order=desc".format(code=code,page=str(page),start=start,end=end) html = self.download(url) #print(html[:100]) soup = BeautifulSoup(html, 'html.parser') html = soup.prettify() #修正可能存在的Html错误 i = 0 for matched in soup.find_all("td"): #提取 text = matched.text if i %4 == 0: self.date.append(text)# datetime string elif i%4 ==1: self.net.append(float(text)) # 单位净值 elif i%4 ==2: self.acc_net.append(float(text)) #累计净值 else: self.rate.append(text) i += 1 self.update_table() self.canvas.update_figure(x=self.date ,y =self.net,title="%s (%s) 净值走势"%(self.name,self.code)) itemText = self.code+" "+self.name if self.code not in self.funds: self.funds[self.code] = self.name self.comboName.addItem(itemText) self.comboName.setCurrentText(itemText) def codeInputFinished(self): self.code = self.codeInput.text() def setup_toolBar(self): label0 = QLabel("选择基金:") self.comboName = QComboBox() fundItems = list(self.funds.items()) fundItems.sort() for i, fund in enumerate(fundItems): self.comboName.addItem(fund[0]+" "+fund[1]) self.comboName.currentIndexChanged[int].connect(self.name_selected) self.comboName.setStatusTip("选择基金") label_ = QLabel(" 基金代码:") self.codeInput = QLineEdit() regExp = QRegExp("^d{6}$") validator = QRegExpValidator(regExp) self.codeInput.setValidator(validator) self.codeInput.setFixedWidth(50) self.codeInput.editingFinished.connect(self.codeInputFinished) label1 = QLabel(" 起始日期") self.start= QDateEdit() self.start.setCalendarPopup(True) self.start.setDisplayFormat("yyyy-MM-dd") label2 = QLabel(" 截止日期") self.end= QDateEdit() self.end.setCalendarPopup(True) self.end.setDisplayFormat("yyyy-MM-dd") today = QDate.currentDate()#当前时间 self.start.setMaximumDate(today) #不超过今天 self.start.setDate(today.addMonths (-3)) #3月前此时 self.end.setDate(today) self.end.setMaximumDate(today) toolbar0 = self.addToolBar("选择")#添加工具条 toolbar0.addWidget(label0) toolbar0.addWidget(self.comboName) toolbar0.addWidget(label_) toolbar0.addWidget(self.codeInput) toolbar0.addWidget(label1) toolbar0.addWidget(self.start) toolbar0.addWidget(label2) toolbar0.addWidget(self.end) toolbar0.addSeparator() #toolbar0.addAction(self.queryAction) self.queryButton = QPushButton("查询") self.queryButton.clicked.connect(self.query) toolbar0.addWidget(self.queryButton) #help(toolbar0) toolbar0.addSeparator() if __name__ == '__main__': app = QApplication(sys.argv) mw = MainWindow() mw.show() sys.exit(app.exec_())