python爬蟲之驗證碼識別

  • 2019 年 10 月 10 日
  • 筆記

版權聲明:本文為博主原創文章,遵循 CC 4.0 BY-SA 版權協議,轉載請附上原文出處鏈接和本聲明。

本文鏈接:https://blog.csdn.net/weixin_40313634/article/details/84574797

環境:

  1. subline: https://download.sublimetext.com/Sublime Text Build 3176 x64 Setup.exe
  2. python: https://www.python.org/ftp/python/3.7.0/python-3.7.0-amd64.exe
  3. OCR識別庫工具:https://digi.bib.uni-mannheim.de/tesseract/tesseract-ocr-setup-3.05.01.exe
  4. 圖形驗證碼:https://github.com/Python3WebSpider/CrackImageCode/archive/master.zip
  5. 滑動驗證碼:https://github.com/Python3WebSpider/CrackGeetest/archive/master.zip 平台:https://passport.cnblogs.com/user/signin 例子:https://www.cnblogs.com/moning/p/8318475.html

import tesserocr from PIL import Image

image = Image.open(『code2.jpg』)

image = image.convert(『L』) threshold = 127 table = [] for i in range(256): if i < threshold: table.append(0) else: table.append(1)

image = image.point(table, 『1』) image.show()

result = tesserocr.image_to_text(image) print(result)

import time from io import BytesIO from PIL import Image from selenium import webdriver from selenium.webdriver import ActionChains from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC

EMAIL = 『[email protected]』 PASSWORD = 『』 BORDER = 6 INIT_LEFT = 60

class CrackGeetest(): def init(self): self.url = 『https://account.geetest.com/login』 self.browser = webdriver.Chrome() self.wait = WebDriverWait(self.browser, 20) self.email = EMAIL self.password = PASSWORD

def __del__(self):      self.browser.close()    def get_geetest_button(self):      """      獲取初始驗證按鈕      :return:      """      button = self.wait.until(EC.element_to_be_clickable((By.CLASS_NAME, 'geetest_radar_tip')))      return button    def get_position(self):      """      獲取驗證碼位置      :return: 驗證碼位置元組      """      img = self.wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'geetest_canvas_img')))      time.sleep(2)      location = img.location      size = img.size      top, bottom, left, right = location['y'], location['y'] + size['height'], location['x'], location['x'] + size[          'width']      return (top, bottom, left, right)    def get_screenshot(self):      """      獲取網頁截圖      :return: 截圖對象      """      screenshot = self.browser.get_screenshot_as_png()      screenshot = Image.open(BytesIO(screenshot))      return screenshot    def get_slider(self):      """      獲取滑塊      :return: 滑塊對象      """      slider = self.wait.until(EC.element_to_be_clickable((By.CLASS_NAME, 'geetest_slider_button')))      return slider    def get_geetest_image(self, name='captcha.png'):      """      獲取驗證碼圖片      :return: 圖片對象      """      top, bottom, left, right = self.get_position()      print('驗證碼位置', top, bottom, left, right)      screenshot = self.get_screenshot()      captcha = screenshot.crop((left, top, right, bottom))      captcha.save(name)      return captcha    def open(self):      """      打開網頁輸入用戶名密碼      :return: None      """      self.browser.get(self.url)      email = self.wait.until(EC.presence_of_element_located((By.ID, 'email')))      password = self.wait.until(EC.presence_of_element_located((By.ID, 'password')))      email.send_keys(self.email)      password.send_keys(self.password)    def get_gap(self, image1, image2):      """      獲取缺口偏移量      :param image1: 不帶缺口圖片      :param image2: 帶缺口圖片      :return:      """      left = 60      for i in range(left, image1.size[0]):          for j in range(image1.size[1]):              if not self.is_pixel_equal(image1, image2, i, j):                  left = i                  return left      return left    def is_pixel_equal(self, image1, image2, x, y):      """      判斷兩個像素是否相同      :param image1: 圖片1      :param image2: 圖片2      :param x: 位置x      :param y: 位置y      :return: 像素是否相同      """      # 取兩個圖片的像素點      pixel1 = image1.load()[x, y]      pixel2 = image2.load()[x, y]      threshold = 60      if abs(pixel1[0] - pixel2[0]) < threshold and abs(pixel1[1] - pixel2[1]) < threshold and abs(              pixel1[2] - pixel2[2]) < threshold:          return True      else:          return False    def get_track(self, distance):      """      根據偏移量獲取移動軌跡      :param distance: 偏移量      :return: 移動軌跡      """      # 移動軌跡      track = []      # 當前位移      current = 0      # 減速閾值      mid = distance * 4 / 5      # 計算間隔      t = 0.2      # 初速度      v = 0        while current < distance:          if current < mid:              # 加速度為正2              a = 2          else:              # 加速度為負3              a = -3          # 初速度v0          v0 = v          # 當前速度v = v0 + at          v = v0 + a * t          # 移動距離x = v0t + 1/2 * a * t^2          move = v0 * t + 1 / 2 * a * t * t          # 當前位移          current += move          # 加入軌跡          track.append(round(move))      return track    def move_to_gap(self, slider, track):      """      拖動滑塊到缺口處      :param slider: 滑塊      :param track: 軌跡      :return:      """      ActionChains(self.browser).click_and_hold(slider).perform()      for x in track:          ActionChains(self.browser).move_by_offset(xoffset=x, yoffset=0).perform()      time.sleep(0.5)      ActionChains(self.browser).release().perform()    def login(self):      """      登錄      :return: None      """      submit = self.wait.until(EC.element_to_be_clickable((By.CLASS_NAME, 'login-btn')))      submit.click()      time.sleep(10)      print('登錄成功')    def crack(self):      # 輸入用戶名密碼      self.open()      # 點擊驗證按鈕      button = self.get_geetest_button()      button.click()      # 獲取驗證碼圖片      image1 = self.get_geetest_image('captcha1.png')      # 點按呼出缺口      slider = self.get_slider()      slider.click()      # 獲取帶缺口的驗證碼圖片      image2 = self.get_geetest_image('captcha2.png')      # 獲取缺口位置      gap = self.get_gap(image1, image2)      print('缺口位置', gap)      # 減去缺口位移      gap -= BORDER      # 獲取移動軌跡      track = self.get_track(gap)      print('滑動軌跡', track)      # 拖動滑塊      self.move_to_gap(slider, track)        success = self.wait.until(          EC.text_to_be_present_in_element((By.CLASS_NAME, 'geetest_success_radar_tip_content'), '驗證成功'))      print(success)        # 失敗後重試      if not success:          self.crack()      else:          self.login()

if name == 『main』: crack = CrackGeetest() crack.crack()