OpenCV圖像處理以及人臉識別

  • 2019 年 11 月 10 日
  • 筆記

OpenCV基礎

OpenCV是一個開源的計算機視覺庫。提供了很多圖像處理常用的工具

批註:本文所有圖片數據都在我的GitHub倉庫

讀取圖片並顯示

import numpy as np  import cv2 as cv    original = cv.imread('../machine_learning_date/forest.jpg')  cv.imshow('Original', original)

顯示圖片某個顏色通道的圖像

blue = np.zeros_like(original)  blue[:, :, 0] = original[:, :, 0]   # 0 - 藍色通道  cv.imshow('Blue', blue)  green = np.zeros_like(original)  green[:, :, 1] = original[:, :, 1]  # 1 - 綠色通道  cv.imshow('Green', green)  red = np.zeros_like(original)  red[:, :, 2] = original[:, :, 2]    # 2 - 紅色通道  cv.imshow('Red', red)

  

圖像剪裁

h, w = original.shape[:2]       # (397, 600)  l, t = int(w / 4), int(h / 4)           # 左上  r, b = int(w * 3 / 4), int(h * 3 / 4)   # 右下  cropped = original[t:b, l:r]  cv.imshow('Cropped', cropped)

圖像縮放

cv2.resize(src,dsize,dst=None,fx=None,fy=None,interpolation=None)

參數

  • scr:原圖
  • dsize:輸出圖像尺寸
  • fx:沿水平軸的比例因子
  • fy:沿垂直軸的比例因子
  • interpolation:插值方法
# 輸出圖像大小=輸入圖像大小/4  scaled1 = cv.resize(original, (int(w / 4), int(h / 4)), interpolation=cv.INTER_LINEAR)  cv.imshow('Scaled1', scaled1)

# 原圖像大小,沿x軸,y軸的縮放係數  scaled2 = cv.resize(scaled1, None, fx=4, fy=4, interpolation=cv.INTER_LINEAR)  cv.imshow('Scaled2', scaled2)  cv.waitKey()        # 等待用戶按鍵觸發,或者按 Ese 鍵 停止等待

圖像文件保存

cv.imwrite('../ml_data/blue.jpg', blue)

邊緣檢測

物體的邊緣檢測是物體識別常用的手段。邊緣檢測常用亮度梯度方法。通過識別亮度梯度變化最大的像素點從而檢測出物體的邊緣。

import cv2 as cv  # 讀取並展示圖像  original = cv.imread('../machine_learning_date/chair.jpg', cv.IMREAD_GRAYSCALE)  cv.imshow('Original', original)

索貝爾邊緣識別

cv.Sobel(original, cv.CV_64F, 1, 0, ksize=5)

參數

  • src:源圖像
  • ddepth:cv.CV_64F:卷積運算使用數據類型為64位浮點型(保證微分的精度)
  • dx:1表示取水平方向索貝爾偏微分
  • dy:0表示不取垂直方向索貝爾偏微分
  • ksize:卷積核為5*5的方陣

水平方向索貝爾偏微分

hsobel = cv.Sobel(original, cv.CV_64F, 1, 0, ksize=5)  cv.imshow('H-Sobel', hsobel)

垂直方向索貝爾偏微分

vsobel = cv.Sobel(original, cv.CV_64F, 0, 1, ksize=5)  cv.imshow('V-Sobel', vsobel)

水平和垂直方向索貝爾偏微分

sobel = cv.Sobel(original, cv.CV_64F, 1, 1, ksize=5)  cv.imshow('Sobel', sobel)

拉普拉斯邊緣識別

cv.Laplacian(original, cv.CV_64F)

laplacian = cv.Laplacian(original, cv.CV_64F)  cv.imshow('Laplacian', laplacian)

Canny邊緣識別

cv.Canny(original, 50, 240)

  • image:輸入圖像
  • threshold1:50,水平方向閾值

  • threshold1:240,垂直方向閾值
canny = cv.Canny(original, 50, 80)  cv.imshow('Canny', canny)  cv.waitKey()

亮度提升

OpenCV提供了直方圖均衡化的方式實現亮度提升,更有利於邊緣識別與物體識別模型的訓練。

彩色圖轉為灰度圖

gray = cv.cvtColor(original, cv.COLOR_BGR2GRAY)

直方圖均衡化

equalized_gray = cv.equalizeHist(gray)

案例:

讀取圖像

import cv2 as cv  # 讀取圖片  original = cv.imread('../machine_learning_date/sunrise.jpg')  cv.imshow('Original', original)     # 顯示圖片

彩色圖轉為灰度圖

gray = cv.cvtColor(original, cv.COLOR_BGR2GRAY)  cv.imshow('Gray', gray)

灰度圖直方圖均衡化

equalized_gray = cv.equalizeHist(gray)  cv.imshow('Equalized Gray', equalized_gray)

YUV:亮度,色度,飽和度

yuv = cv.cvtColor(original, cv.COLOR_BGR2YUV)  yuv[..., 0] = cv.equalizeHist(yuv[..., 0])  # 亮度 直方圖均衡化  yuv[..., 1] = cv.equalizeHist(yuv[..., 1])  # 色度 直方圖均衡化  yuv[..., 2] = cv.equalizeHist(yuv[..., 2])  # 飽和度 直方圖均衡化  equalized_color = cv.cvtColor(yuv, cv.COLOR_YUV2BGR)  cv.imshow('Equalized Color', equalized_color)  cv.waitKey()

角點檢測

平直稜線的交匯點(顏色梯度方向改變的像素點的位置)

Harris角點檢測器

gray = cv.cvtColor(original, cv.COLOR_BGR2GRAY)

corners = cv.cornerHarris(gray, 7, 5, 0.04)

  • src:輸入單通道8位或浮點圖像。
  • blockSize:角點檢測區域大小
  • ksize:Sobel求導中使用的窗口大小
  • k:邊緣線方向改變超過閾值0.04弧度即為一個角點,一般取[0.04 0.06]

案例:

import cv2 as cv    original = cv.imread('../machine_learning_date/box.png')  cv.imshow('Original', original)  gray = cv.cvtColor(original, cv.COLOR_BGR2GRAY)     # 轉換成灰度,減少計算量  cv.imshow('Gray', gray)  corners = cv.cornerHarris(gray, 7, 5, 0.04)         # Harris角點檢測器  # 圖像混合  mixture = original.copy()  mixture[corners > corners.max() * 0.01] = [0, 0, 255]   # BGR [0, 0, 255]變紅  cv.imshow('Corner', mixture)  cv.waitKey()

圖像識別

特徵點檢測

常用特徵點檢測有:STAR特徵點檢測 / SIFT特徵點檢測

特徵點檢測結合了 邊緣檢測 與 角點檢測 從而識別出圖形的特徵點

STAR特徵點檢測相關API如下:

star = cv.xfeatures2d.StarDetector_create()  # 創建STAR特徵點檢測器

keypoints = star.detect(gray)     # 檢測出gray圖像所有的特徵點

把所有的特徵點繪製在mixture圖像中

cv.drawKeypoints(original, keypoints, mixture, flags=cv.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)

參數:

  • image:原圖片
  • keypoints:源圖像中的關鍵點
  • outImage:輸出圖片
  • flags:標誌設置圖形特徵

案例:

import cv2 as cv    original = cv.imread('../machine_learning_date/table.jpg')  gray = cv.cvtColor(original, cv.COLOR_BGR2GRAY)  # 變成灰度圖,減少計算  cv.imshow('Gray', gray)  star = cv.xfeatures2d.StarDetector_create()  # 創建STAR特徵點檢測器  keypoints = star.detect(gray)  # 檢測出gray圖像所有的特徵點  mixture = original.copy()  # drawKeypoints方法可以把所有的特徵點繪製在mixture圖像中  cv.drawKeypoints(original, keypoints, mixture,                   flags=cv.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)  cv.imshow('Mixture', mixture)  cv.waitKey()

 

SIFT特徵點檢測相關API:

sift = cv.xfeatures2d.SIFT_create()   # 創建SIFT特徵點檢測器
keypoints = sift.detect(gray)        # 檢測出gray圖像所有的特徵點

案例:

import cv2 as cv    original = cv.imread('../machine_learning_date/table.jpg')  gray = cv.cvtColor(original, cv.COLOR_BGR2GRAY)  cv.imshow('Gray', gray)  sift = cv.xfeatures2d.SIFT_create()  # 創建SIFT特徵點檢測器  keypoints = sift.detect(gray)  # 檢測出gray圖像所有的特徵點  mixture = original.copy()  # 把所有的特徵點繪製在mixture圖像中  cv.drawKeypoints(original, keypoints, mixture,                   flags=cv.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)  cv.imshow('Mixture', mixture)  cv.waitKey()

  

特徵值矩陣

圖像特徵值矩陣(描述)記錄了圖像的特徵點以及每個特徵點的梯度信息,相似圖像的特徵值矩陣也相似。這樣只要有足夠多的樣本,就可以基於隱馬爾科夫模型進行圖像內容的識別。

特徵值矩陣相關API:

sift = cv.xfeatures2d.SIFT_create()  keypoints = sift.detect(gray)  _, desc = sift.compute(gray, keypoints)

案例:

import cv2 as cv  import matplotlib.pyplot as plt    original = cv.imread('../machine_learning_date/table.jpg')  gray = cv.cvtColor(original, cv.COLOR_BGR2GRAY)  cv.imshow('Gray', gray)  sift = cv.xfeatures2d.SIFT_create()     # 創建SIFT特徵點檢測器  keypoints = sift.detect(gray)           # 檢測出gray圖像所有的特徵點  _, desc = sift.compute(gray, keypoints)  print(desc.shape)       # (454, 128)  plt.matshow(desc.T, cmap='jet', fignum='Description')  plt.title('Description')  plt.xlabel('Feature')  plt.ylabel('Sample')  plt.tick_params(which='both', top=False, labeltop=False, labelbottom=True, labelsize=10)  plt.show()

物體識別

1、讀取training文件夾中的訓練圖片樣本,每個圖片對應一個desc矩陣,每個desc都有一個類別(car)

2、把所有類別為car的desc合併在一起,形成訓練集

| desc |       |
| desc | car |
| desc | |
.....

  由上述訓練集樣本可以訓練一個用於匹配car的HMM。

3、訓練3個HMM分別對應每個物體類別。 保存在列表中。

4、讀取testing文件夾中的測試樣本,整理測試樣本

| desc | car   |
| desc | moto |

5、針對每一個測試樣本:

  1. 分別使用3個HMM模型,對測試樣本計算score得分。
  2. 取3個模型中得分最高的模型所屬類別作為預測類別。
import os  import numpy as np  import cv2 as cv  import hmmlearn.hmm as hl      def search_files(directory):      directory = os.path.normpath(directory)        objects = {}      for curdir, subdirs, files in os.walk(directory):          for file in files:              if file.endswith('.jpg'):                  label = curdir.split(os.path.sep)[-1]                  if label not in objects:                      objects[label] = []                  path = os.path.join(curdir, file)                  objects[label].append(path)      return objects      # 加載訓練集樣本數據,訓練模型,模型存儲  train_objects = search_files('../machine_learning_date/objects/training')  train_x, train_y = [], []  for label, filenames in train_objects.items():      descs = np.array([])      for filename in filenames:          image = cv.imread(filename)          gray = cv.cvtColor(image, cv.COLOR_BGR2GRAY)          # 範圍縮放,使特徵描述矩陣樣本數量一致          h, w = gray.shape[:2]          f = 200 / min(h, w)          gray = cv.resize(gray, None, fx=f, fy=f)          sift = cv.xfeatures2d.SIFT_create()     # 創建SIFT特徵點檢測器          keypoints = sift.detect(gray)       # 檢測出gray圖像所有的特徵點          _, desc = sift.compute(gray, keypoints)     # 轉換成特徵值矩陣          if len(descs) == 0:              descs = desc          else:              descs = np.append(descs, desc, axis=0)      train_x.append(descs)      train_y.append(label)  models = {}  for descs, label in zip(train_x, train_y):      model = hl.GaussianHMM(n_components=4, covariance_type='diag', n_iter=100)      models[label] = model.fit(descs)    # 測試模型  test_objects = search_files('../machine_learning_date/objects/testing')  test_x, test_y = [], []  for label, filenames in test_objects.items():      descs = np.array([])      for filename in filenames:          image = cv.imread(filename)          gray = cv.cvtColor(image, cv.COLOR_BGR2GRAY)          sift = cv.xfeatures2d.SIFT_create()          keypoints = sift.detect(gray)          _, desc = sift.compute(gray, keypoints)          if len(descs) == 0:              descs = desc          else:              descs = np.append(descs, desc, axis=0)      test_x.append(descs)      test_y.append(label)    # 遍歷所有測試樣本  使用model匹配測試樣本查看每個模型的匹配分數  pred_y = []  for descs, test_label in zip(test_x, test_y):      best_score, best_label = None, None      for pred_label, model in models.items():          score = model.score(descs)          if (best_score == None) or (best_score < score):              best_score = score              best_label = pred_label          print(test_label, '->', pred_label, score)          # airplane -> airplane -373374.23370679974          # airplane -> car -374022.20182585815          # airplane -> motorbike -374127.46289302857          # car -> airplane -163638.3153800373          # car -> car -163691.52001099114          # car -> motorbike -164410.0557508754          # motorbike -> airplane -467472.6294620241          # motorbike -> car -470149.6143097087          # motorbike -> motorbike -464606.0040570249      pred_y.append(best_label)    print(test_y)       # ['airplane', 'car', 'motorbike']  print(pred_y)       # ['airplane', 'airplane', 'motorbike']

人臉識別

人臉識別與圖像識別的區別在於人臉識別需要識別出兩個人的不同點。

視頻捕捉

通過OpenCV訪問視頻捕捉設備(視頻頭),從而獲取圖像幀。

視頻捕捉相關API:

import cv2 as cv  ​  # 獲取視頻捕捉設備  video_capture = cv.VideoCapture(0)  # 讀取一幀  frame = video_capture.read()[1]  cv.imshow('VideoCapture', frame)  # 釋放視頻捕捉設備  video_capture.release()  # 銷毀cv的所有窗口  cv.destroyAllWindows()

案例:

import cv2 as cv    # 獲取視頻捕獲設備  video_capture = cv.VideoCapture(0)    # 讀取一幀  while True:      frame = video_capture.read()[1]      cv.imshow('frame', frame)      # 每隔33毫秒自動更新圖像      if cv.waitKey(33) == 27:  # 退出鍵是27(Esc)          break    video_capture.release()  cv.destroyAllWindows()

人臉定位

哈爾級聯人臉定位

import cv2 as cv  # 通過特徵描述文件構建哈爾級聯人臉識別器  fd = cv.CascadeClassifier('../data/haar/face.xml')  # 從一個圖像中識別出所有的人臉區域  #   1.3:為最小的人臉尺寸  #   5:最多找5張臉  # 返回:  #   faces: 抓取人臉(矩形區域)列表 [(l,t,w,h),(),()..]  faces = fd.detectMultiScale(frame, 1.3, 5)  face = faces[0] # 第一張臉  # 繪製橢圓  cv.ellipse(      frame,              # 圖像      (l + a, t + b),     # 橢圓心      (a, b),             # 半徑      0,                  # 橢圓旋轉角度      0, 360,             # 起始角, 終止角      (255, 0, 255),      # 顏色      2                   # 線寬  )

案例:

import cv2 as cv  # 哈爾級聯人臉定位器  fd = cv.CascadeClassifier('../../data/haar/face.xml')  ed = cv.CascadeClassifier('../../data/haar/eye.xml')  nd = cv.CascadeClassifier('../../data/haar/nose.xml')  vc = cv.VideoCapture(0)  while True:      frame = vc.read()[1]      faces = fd.detectMultiScale(frame, 1.3, 5)      for l, t, w, h in faces:          a, b = int(w / 2), int(h / 2)          cv.ellipse(frame, (l + a, t + b), (a, b), 0, 0, 360, (255, 0, 255), 2)          face = frame[t:t + h, l:l + w]          eyes = ed.detectMultiScale(face, 1.3, 5)          for l, t, w, h in eyes:              a, b = int(w / 2), int(h / 2)              cv.ellipse(face, (l + a, t + b), (a, b), 0, 0, 360, (0, 255, 0), 2)          noses = nd.detectMultiScale(face, 1.3, 5)          for l, t, w, h in noses:              a, b = int(w / 2), int(h / 2)              cv.ellipse(face, (l + a, t + b), (a, b), 0, 0, 360, (0, 255, 255), 2)      cv.imshow('VideoCapture', frame)      if cv.waitKey(33) == 27:          break  vc.release()  cv.destroyAllWindows()

人臉識別

簡單人臉識別:OpenCV的LBPH(局部二值模式直方圖)

  1. 讀取樣本圖片數據,整理圖片的路徑列表
  2. 讀取每張圖片,基於haar裁剪每張人臉,把人臉數據放入train_x,作為訓練數據。在整理train_y時,由於Bob、Sala、Roy是字符串,需要把字符串做一個標籤編碼 LabelEncoder
  3. 遍歷訓練集,把訓練集交給LBPH人臉識別模型進行訓練。
  4. 讀取測試集數據,整理圖片的路徑列表
  5. 遍歷每張圖片,把圖片中的人臉使用相同的方式裁剪,把人臉數據交給LBPH模型進行類別預測,得到預測結果。
  6. 以圖像的方式輸出結果。
# -*- coding: utf-8 -*-  import os  import numpy as np  import cv2 as cv  import sklearn.preprocessing as sp    fd = cv.CascadeClassifier('../machine_learning_date/haar/face.xml')      def search_faces(directory):      directory = os.path.normpath(directory)        faces = {}      for curdir, subdirs, files in os.walk(directory):          for jpeg in (file for file in files                       if file.endswith('.jpg')):              path = os.path.join(curdir, jpeg)              label = path.split(os.path.sep)[-2]              if label not in faces:                  faces[label] = []              faces[label].append(path)      return faces      train_faces = search_faces('../machine_learning_date/faces/training')  codec = sp.LabelEncoder()  codec.fit(list(train_faces.keys()))    train_x, train_y = [], []  for label, filenames in train_faces.items():      for filename in filenames:          image = cv.imread(filename)          gray = cv.cvtColor(image, cv.COLOR_BGR2GRAY)          faces = fd.detectMultiScale(gray, 1.1, 2, minSize=(100, 100))          for l, t, w, h in faces:              train_x.append(gray[t:t + h, l:l + w])              train_y.append(codec.transform([label])[0])  train_y = np.array(train_y)  '''  訓練集結構:     train_x  train_y   -------------------   | face    | 0     |   -------------------   | face    | 1     |   -------------------   | face    | 2     |   -------------------   | face    | 1     |   -------------------  '''  # 局部二值模式直方圖人臉識別分類器  model = cv.face.LBPHFaceRecognizer_create()  model.train(train_x, train_y)    # 測試  test_faces = search_faces(      '../ml_data/faces/testing')  test_x, test_y, test_z = [], [], []  for label, filenames in test_faces.items():      for filename in filenames:          image = cv.imread(filename)          gray = cv.cvtColor(image, cv.COLOR_BGR2GRAY)          faces = fd.detectMultiScale(gray, 1.1, 2, minSize=(100, 100))          for l, t, w, h in faces:              test_x.append(gray[t:t + h, l:l + w])              test_y.append(codec.transform([label])[0])              a, b = int(w / 2), int(h / 2)              cv.ellipse(image, (l + a, t + b), (a, b), 0, 0, 360, (255, 0, 255), 2)              test_z.append(image)  test_y = np.array(test_y)  pred_test_y = []  for face in test_x:      pred_code = model.predict(face)[0]      pred_test_y.append(pred_code)    print(codec.inverse_transform(test_y))  print(codec.inverse_transform(pred_test_y))    escape = False  while not escape:      for code, pred_code, image in zip(test_y, pred_test_y, test_z):          label, pred_label = codec.inverse_transform([code, pred_code])          text = '{} {} {}'.format(label, '==' if code == pred_code else '!=', pred_label)          cv.putText(image, text, (10, 60), cv.FONT_HERSHEY_SIMPLEX, 2, (255, 255, 255), 6)          cv.imshow('Recognizing...', image)          if cv.waitKey(1000) == 27:              escape = True              break