python 爬蟲示例–基金查詢demo
- 2019 年 11 月 5 日
- 筆記
這兩天試著學了一下爬蟲,剛學會了爬取靜態網頁,就趁熱現學現賣,做了一個基金查詢的demo。

基金數據來自網易財經基金頁面,其URL格式為:
"http://quotes.money.163.com/fund/jzzs_{code}_{page}.html?start={start}&end={end}&sort=TDATE&order=desc".format( code=code,page="0",start=start,end=end) 如 "http://quotes.money.163.com/fund/jzzs_001630_0.html?start=2009-02-22&end=2019-10-29&sort=TDATE&order=desc"
其中code為基金程式碼,例如"001630";start和end為起始日期和截止日期,格式為 "yyyy-MM-dd"
爬取的基金的凈值數據用PyQT的表格控制項展示。再將數據用matplotlib繪圖,嵌入UI介面。

通過基金程式碼查詢到的新的基金的名稱和程式碼資訊會存入文件,以供下次打開程式時程式下拉框自動載入。
程式碼如下:
import sys from PyQt5.QtWidgets import * from PyQt5.QtGui import QColor, QFont, QIcon,QPixmap,QRegExpValidator from PyQt5.QtCore import Qt, QSize,QDate,QRegExp import pickle import requests import re from bs4 import BeautifulSoup from matplotlib import pyplot as plt from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg,NavigationToolbar2QT from matplotlib.figure import Figure #import numpy as np class Canvas(FigureCanvasQTAgg): def __init__(self, parent=None, width=5, height=4, dpi=100): fig = Figure(figsize=(width, height), dpi=dpi) #創建畫布,設置寬高,每英寸像素點數 fig.set_tight_layout(True) self.axes = fig.add_subplot(111)# self.axes.tick_params(axis='x',rotation =90,direction="in")#日期旋轉90度顯示 FigureCanvasQTAgg.__init__(self, fig)#調用基類的初始化函數 self.setParent(parent) #self.update_figure(1,1) FigureCanvasQTAgg.updateGeometry(self) def update_figure(self, x ,y,title): #x = [4,3,2,1] #y=[1,2,3,5] x.reverse() y.reverse() self.axes.cla()#清除已繪的圖形 self.axes.set_title(title,fontsize=18) self.axes.plot(x,y) self.axes.scatter(x,y, marker ='o') self.axes.set_ylabel("基金凈值[元]") self.axes.grid(lw=0.5,ls="--",alpha=0.5) self.draw()#重新繪製 class MainWindow(QMainWindow): def __init__(self, parent = None): super().__init__(parent) self.funds = pickle.load(open("info.obj","rb")) # 基金程式碼和名稱資訊存在字典中保存到文件了,pickle載入 self.setWindowTitle("A股基金查詢工具【數據來源於網易財經,python爬蟲demo】") self.create_table() self.create_canvas() self.setup_centralWidget() #self.setWindowIcon(QIcon(":ICON/ICON/retest.png")) self.createActions() self.setup_toolBar() self.setup_menuBar() self.statusBar().showMessage("ready") self.code = None #self.resize(800,500) def create_table(self): self.table = QTableWidget() self.table.setEditTriggers(QAbstractItemView.NoEditTriggers) HorizontalHeaderLabels = ["公布日期", "單位凈值","累計凈值","增長率"] columns = len(HorizontalHeaderLabels) self.table.setColumnCount(columns) self.rows=100 self.table.setRowCount(self.rows)# self.headerWidth = (100,80,80,80) self.table.setSortingEnabled (True) self.table.horizontalHeader().setStyleSheet("QHeaderView::section{background-color:rgb(180,180,250);}") for i in range(columns-1): self.table.setColumnWidth (i,self.headerWidth[i]) self.table.setHorizontalHeaderLabels(HorizontalHeaderLabels) def update_table(self): self.table.clearContents()#清除內容 rows = len(self.rate) if rows> self.rows: self.table.setRowCount(rows) for i in range(len(self.rate)): item = QTableWidgetItem(self.date[i]) item.setTextAlignment(Qt.AlignHCenter |Qt.AlignVCenter) self.table.setItem(i, 0, item) item = QTableWidgetItem(str(self.net[i])) item.setTextAlignment(Qt.AlignHCenter |Qt.AlignVCenter) self.table.setItem(i, 1, item) item = QTableWidgetItem(str(self.acc_net[i])) item.setTextAlignment(Qt.AlignHCenter |Qt.AlignVCenter) self.table.setItem(i, 2, item) rate = self.rate[i] item = QTableWidgetItem(rate)#rate用的是文本 item.setTextAlignment(Qt.AlignHCenter |Qt.AlignVCenter) if rate[0] == "-": item.setForeground(QColor("green")) else: item.setForeground(QColor("red")) self.table.setItem(i, 3, item) def create_canvas(self): self.canvas = Canvas(self) def setup_centralWidget(self): #設置主窗口中心部件 self.tabWidget = QTabWidget() self.tabWidget.addTab(self.table,"Table ") vlayout = QVBoxLayout() Navigation_toolbar = NavigationToolbar2QT(self.canvas, self) vlayout.addWidget(self.canvas) vlayout.addWidget(Navigation_toolbar) plotWidget = QWidget() plotWidget.setLayout(vlayout) self.tabWidget.addTab(plotWidget,"Plot") self.tabWidget.setCurrentIndex(1) self.setCentralWidget(self.tabWidget)#指定主窗口中心部件 def createActions(self): #self.newAction = QAction("New record", self) #self.newAction.setIcon(QIcon(":new.png")) #self.newAction.triggered.connect(self.newRecord) #self.newAction.setStatusTip("###") self.exitAction = QAction("E&xit",self) self.exitAction.triggered.connect(self.close) self.queryAction = QAction("查詢",self) self.queryAction.triggered.connect(self.query) self.helpAboutAction = QAction("About",self) self.helpAboutAction.setShortcut("Ctrl+H") self.helpAboutAction.triggered.connect(self.showAboutDlg) def setup_menuBar(self): fileMenu = self.menuBar().addMenu("&File") fileMenu.addAction(self.exitAction) helpMenu = self.menuBar().addMenu("&Help") helpMenu.addAction(self.helpAboutAction) def showAboutDlg(self): QMessageBox.about(self,u"title", u"Version: 0.1n" u"author: wsp") def name_selected(self): self.name = self.comboName.currentText() self.code = self.name.split(" ")[0] print(self.name,self.code) def closeEvent(self, event): reply = QMessageBox.question(self, '提示',"是否要退出程式?", QMessageBox.Yes | QMessageBox.No,QMessageBox.No) if reply == QMessageBox.Yes: pickle.dump(self.funds, open("info.obj","wb")) # 基金程式碼和名稱資訊存在字典中保存到文件 event.accept() else: event.ignore() @staticmethod def download(url,user_agent='wswp',num_retries=2,proxies=None): print("Downloading: ", url) headers = {'User-Agent' : user_agent} resp = requests.get(url, headers=headers, proxies=proxies) html = None try: resp = requests.get(url, headers=headers, proxies=proxies) #print("status: ",resp.status_code) html = resp.text if resp.status_code >= 400: print("Download error: ", html) html = None if num_retries>0 and 500 < resp.status_code <600: #遞歸調用,遇到5xx錯誤,最多重試 2 次 return download(url, user_agent, num_retries-1, proxies) except requests.exceptions.RequestException as e: print('Download error: ' ,e.reason) html = None finally: return html def query(self): if self.code is None: QMessageBox.critical(self, "錯誤", "基金程式碼為空或格式錯誤!") self.codeInput.setFocus() return code = self.code start = self.start.text() end = self.end.text() #url0 ="http://quotes.money.163.com/fund/jzzs_001630_0.html?start=2009-02-22&end=2019-10-29&sort=TDATE&order=desc" #url0="http://quotes.money.163.com/fund/jzzs_001630_0.html?start=2019-10-29&end=2019-10-29&sort=TDATE&order=desc" #url0="http://quotes.money.163.com/fund/jzzs_001630_0.html?start=2019-07-01&end=2019-10-29&sort=TDATE&order=desc" url0 = "http://quotes.money.163.com/fund/jzzs_{code}_{page}.html?start={start}&end={end}&sort=TDATE&order=desc".format( code=code,page="0",start=start,end=end) #print(url0) html = self.download(url0) if html is None: QMessageBox.critical(self, "錯誤", "爬不到有效資訊,請檢查基金程式碼是否有誤!") return #print(html[:100]) soup = BeautifulSoup(html, 'html.parser') html = soup.prettify() #修正可能存在的Html錯誤 #提取基金名稱 fundInfo =soup.find(name="title") #print(fundInfo.text) self.name = fundInfo.text.split("_")[0] #提取總的頁數 matched =soup.find(name="div", attrs = {"class": "mod_pages"}) a_founds = matched.find_all(name="a") if len(a_founds) ==0: pages =1 else: pages = int(matched.find_all(name="a")[-2].text) print("pages:", pages) self.date, self.net, self.acc_net, self.rate = [], [], [],[] i = 0 for matched in soup.find_all("td"): #提取 text = matched.text if i %4 == 0: self.date.append(text)# datetime string elif i%4 ==1: self.net.append(float(text)) # 單位凈值 elif i%4 ==2: self.acc_net.append(float(text)) #累計凈值 else: self.rate.append(text) i += 1 if pages>1: for page in range(1,pages): url = "http://quotes.money.163.com/fund/jzzs_{code}_{page}.html?start={start}&end={end}&sort=TDATE&order=desc".format(code=code,page=str(page),start=start,end=end) html = self.download(url) #print(html[:100]) soup = BeautifulSoup(html, 'html.parser') html = soup.prettify() #修正可能存在的Html錯誤 i = 0 for matched in soup.find_all("td"): #提取 text = matched.text if i %4 == 0: self.date.append(text)# datetime string elif i%4 ==1: self.net.append(float(text)) # 單位凈值 elif i%4 ==2: self.acc_net.append(float(text)) #累計凈值 else: self.rate.append(text) i += 1 self.update_table() self.canvas.update_figure(x=self.date ,y =self.net,title="%s (%s) 凈值走勢"%(self.name,self.code)) itemText = self.code+" "+self.name if self.code not in self.funds: self.funds[self.code] = self.name self.comboName.addItem(itemText) self.comboName.setCurrentText(itemText) def codeInputFinished(self): self.code = self.codeInput.text() def setup_toolBar(self): label0 = QLabel("選擇基金:") self.comboName = QComboBox() fundItems = list(self.funds.items()) fundItems.sort() for i, fund in enumerate(fundItems): self.comboName.addItem(fund[0]+" "+fund[1]) self.comboName.currentIndexChanged[int].connect(self.name_selected) self.comboName.setStatusTip("選擇基金") label_ = QLabel(" 基金程式碼:") self.codeInput = QLineEdit() regExp = QRegExp("^d{6}$") validator = QRegExpValidator(regExp) self.codeInput.setValidator(validator) self.codeInput.setFixedWidth(50) self.codeInput.editingFinished.connect(self.codeInputFinished) label1 = QLabel(" 起始日期") self.start= QDateEdit() self.start.setCalendarPopup(True) self.start.setDisplayFormat("yyyy-MM-dd") label2 = QLabel(" 截止日期") self.end= QDateEdit() self.end.setCalendarPopup(True) self.end.setDisplayFormat("yyyy-MM-dd") today = QDate.currentDate()#當前時間 self.start.setMaximumDate(today) #不超過今天 self.start.setDate(today.addMonths (-3)) #3月前此時 self.end.setDate(today) self.end.setMaximumDate(today) toolbar0 = self.addToolBar("選擇")#添加工具條 toolbar0.addWidget(label0) toolbar0.addWidget(self.comboName) toolbar0.addWidget(label_) toolbar0.addWidget(self.codeInput) toolbar0.addWidget(label1) toolbar0.addWidget(self.start) toolbar0.addWidget(label2) toolbar0.addWidget(self.end) toolbar0.addSeparator() #toolbar0.addAction(self.queryAction) self.queryButton = QPushButton("查詢") self.queryButton.clicked.connect(self.query) toolbar0.addWidget(self.queryButton) #help(toolbar0) toolbar0.addSeparator() if __name__ == '__main__': app = QApplication(sys.argv) mw = MainWindow() mw.show() sys.exit(app.exec_())