用Simple HTML DOM自动查高考分数与录取情况
- 2019 年 10 月 6 日
- 筆記
这个程序原本是做来给自己用的,免去了查成绩的手抖又心跳加速打信息打错等各种问题,页面改动后直接发送邮件到自己的邮箱里,不看也不行→_→哈哈哈 Simple HTML DOM:http://simplehtmldom.sourceforge.net/ 我这里用的查询地址是广州招考的,查询没有验证码,没有了验证码识别的问题。
先模拟一次提交,找到提交数据。
用Curl模拟提交
$cookie_file = tempnam("tmp","cookie"); function postData($url, $data) { global $cookie_file; $ch = curl_init(); $timeout = 300; curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_POST, true); curl_setopt($ch, CURLOPT_POSTFIELDS, $data); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout); curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie_file); $handles = curl_exec($ch); curl_close($ch); return $handles; } $contents = file_get_contents("http://gaokao.gzzk.cn/mopub_login3.aspx"); $html = str_get_html($contents); $__VIEWSTATE = $html->find('#__VIEWSTATE',0)->attr['value']; $__VIEWSTATEGENERATOR = $html->find('#__VIEWSTATEGENERATOR',0)->attr['value']; $__EVENTVALIDATION = $html->find('#__EVENTVALIDATION',0)->attr['value']; $url='http://gaokao.gzzk.cn/mopub_login3.aspx'; $argv = array( '__EVENTTARGET' => "LoginButton", '__EVENTARGUMENT' => "", '__VIEWSTATE' => $__VIEWSTATE, '__VIEWSTATEGENERATOR' => $__VIEWSTATEGENERATOR, '__EVENTVALIDATION' => $__EVENTVALIDATION, 'text_biaoshi' => "", //考号 'text_mima' => '' //密码 ); $rdata = postData($url, $argv);
测试发现还有来源验证。。于是postData函数多加一行设置Referer。
测试成功,还有一层跳转
图中的链接对应结果页。
$html2 = str_get_html($rdata); $newurl = "http://gaokao.gzzk.cn" . $html2->find('a',0)->attr['href']; $contents2 = getData($newurl); ![](https://tlingc.ocdn.wuicent.com/2017/07/TIM截图20170722165515.jpg)
成功!再把成绩表格选取出来,并添加MD5验证,如果MD5没有变,则每隔5秒刷新,在页面改动后,把这个选取出来的表格作为邮件正文发送到我的邮箱即可。
$html3 = str_get_html($contents2); $result = $html3->find('#td_center',0)->innertext; echo $result; //result -> 成绩表格
完整源码
<html> <head> <meta http-equiv="content-type" content="text/html;charset=utf-8"> </head> <body> <?php $cookie_file = tempnam("tmp","cookie"); function postData($url, $data) { global $cookie_file; $ch = curl_init(); $timeout = 300; curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_REFERER, "http://gaokao.gzzk.cn/mopub_login3.aspx"); curl_setopt($ch, CURLOPT_POST, true); curl_setopt($ch, CURLOPT_POSTFIELDS, $data); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout); curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie_file); $handles = curl_exec($ch); curl_close($ch); return $handles; } function getData($url) { global $cookie_file; $ch = curl_init(); $timeout = 300; curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_HEADER, 0); curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout); curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie_file); $handles = curl_exec($ch); curl_close($ch); return $handles; } include("simple_html_dom.php"); $contents = file_get_contents("http://gaokao.gzzk.cn/mopub_login3.aspx"); $html = str_get_html($contents); $__VIEWSTATE = $html->find('#__VIEWSTATE',0)->attr['value']; $__VIEWSTATEGENERATOR = $html->find('#__VIEWSTATEGENERATOR',0)->attr['value']; $__EVENTVALIDATION = $html->find('#__EVENTVALIDATION',0)->attr['value']; $url='http://gaokao.gzzk.cn/mopub_login3.aspx'; $argv = array( '__EVENTTARGET' => "LoginButton", '__EVENTARGUMENT' => "", '__VIEWSTATE' => $__VIEWSTATE, '__VIEWSTATEGENERATOR' => $__VIEWSTATEGENERATOR, '__EVENTVALIDATION' => $__EVENTVALIDATION, 'text_biaoshi' => "", //考号 'text_mima' => '' //密码 ); $rdata = postData($url, $argv); $html2 = str_get_html($rdata); $newurl = "http://gaokao.gzzk.cn" . $html2->find('a',0)->attr['href']; $contents2 = getData($newurl); $html3 = str_get_html($contents2); $result = $html3->find('#td_center',0)->innertext; echo $result; //result -> 成绩表格 $originmd5 = ""; //成绩出来前的td_center的MD5 $currentmd5 = md5($result); if($currentmd5 != $originmd5) { echo "页面发生改动!"; $tablelist = $html3->find('#table_list',0)->innertext; $html4 = str_get_html($tablelist); $totalmark = $html4->find('td.mytdfenge1',23)->innertext; //算总分 //DO STH } else { echo '<meta http-equiv="refresh" content="5;url=checker.php">'; } ?> </body> </html>