python?OpenCV計算圖片相似度的5種算法

更新時間：2022年08月03日 11:42:16 作者：Waldenz

本文主要介紹了python?OpenCV計算圖片相似度的5種算法，文中通過示例代碼介紹的非常詳細(xì)，對大家的學(xué)習(xí)或者工作具有一定的參考學(xué)習(xí)價值，需要的朋友們下面隨著小編來一起學(xué)習(xí)學(xué)習(xí)吧

5種算法

值哈希算法、差值哈希算法和感知哈希算法都是值越小，相似度越高，取值為0-64，即漢明距離中，64位的hash值有多少不同。 三直方圖和單通道直方圖的值為0-1，值越大，相似度越高。

源代碼如下：

import cv2
import numpy as np
from PIL import Image
import requests
from io import BytesIO
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt
 
 
def aHash(img):
    # 均值哈希算法
    # 縮放為8*8
    img = cv2.resize(img, (8, 8))
    # 轉(zhuǎn)換為灰度圖
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    # s為像素和初值為0，hash_str為hash值初值為''
    s = 0
    hash_str = ''
    # 遍歷累加求像素和
    for i in range(8):
        for j in range(8):
            s = s+gray[i, j]
    # 求平均灰度
    avg = s/64
    # 灰度大于平均值為1相反為0生成圖片的hash值
    for i in range(8):
        for j in range(8):
            if gray[i, j] > avg:
                hash_str = hash_str+'1'
            else:
                hash_str = hash_str+'0'
    return hash_str
 
 
def dHash(img):
    # 差值哈希算法
    # 縮放8*8
    img = cv2.resize(img, (9, 8))
    # 轉(zhuǎn)換灰度圖
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    hash_str = ''
    # 每行前一個像素大于后一個像素為1，相反為0，生成哈希
    for i in range(8):
        for j in range(8):
            if gray[i, j] > gray[i, j+1]:
                hash_str = hash_str+'1'
            else:
                hash_str = hash_str+'0'
    return hash_str
 
 
def pHash(img):
    # 感知哈希算法
    # 縮放32*32
    img = cv2.resize(img, (32, 32))   # , interpolation=cv2.INTER_CUBIC
 
    # 轉(zhuǎn)換為灰度圖
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    # 將灰度圖轉(zhuǎn)為浮點型，再進(jìn)行dct變換
    dct = cv2.dct(np.float32(gray))
    # opencv實現(xiàn)的掩碼操作
    dct_roi = dct[0:8, 0:8]
 
    hash = []
    avreage = np.mean(dct_roi)
    for i in range(dct_roi.shape[0]):
        for j in range(dct_roi.shape[1]):
            if dct_roi[i, j] > avreage:
                hash.append(1)
            else:
                hash.append(0)
    return hash
 
 
def calculate(image1, image2):
    # 灰度直方圖算法
    # 計算單通道的直方圖的相似值
    hist1 = cv2.calcHist([image1], [0], None, [256], [0.0, 255.0])
    hist2 = cv2.calcHist([image2], [0], None, [256], [0.0, 255.0])
    # 計算直方圖的重合度
    degree = 0
    for i in range(len(hist1)):
        if hist1[i] != hist2[i]:
            degree = degree + \
                (1 - abs(hist1[i] - hist2[i]) / max(hist1[i], hist2[i]))
        else:
            degree = degree + 1
    degree = degree / len(hist1)
    return degree
 
 
def classify_hist_with_split(image1, image2, size=(256, 256)):
    # RGB每個通道的直方圖相似度
    # 將圖像resize后，分離為RGB三個通道，再計算每個通道的相似值
    image1 = cv2.resize(image1, size)
    image2 = cv2.resize(image2, size)
    sub_image1 = cv2.split(image1)
    sub_image2 = cv2.split(image2)
    sub_data = 0
    for im1, im2 in zip(sub_image1, sub_image2):
        sub_data += calculate(im1, im2)
    sub_data = sub_data / 3
    return sub_data
 
 
def cmpHash(hash1, hash2):
    # Hash值對比
    # 算法中1和0順序組合起來的即是圖片的指紋hash。順序不固定，但是比較的時候必須是相同的順序。
    # 對比兩幅圖的指紋，計算漢明距離，即兩個64位的hash值有多少是不一樣的，不同的位數(shù)越小，圖片越相似
    # 漢明距離：一組二進(jìn)制數(shù)據(jù)變成另一組數(shù)據(jù)所需要的步驟，可以衡量兩圖的差異，漢明距離越小，則相似度越高。漢明距離為0，即兩張圖片完全一樣
    n = 0
    # hash長度不同則返回-1代表傳參出錯
    if len(hash1) != len(hash2):
        return -1
    # 遍歷判斷
    for i in range(len(hash1)):
        # 不相等則n計數(shù)+1，n最終為相似度
        if hash1[i] != hash2[i]:
            n = n + 1
    return n
 
 
def getImageByUrl(url):
    # 根據(jù)圖片url 獲取圖片對象
    html = requests.get(url, verify=False)
    image = Image.open(BytesIO(html.content))
    return image
 
 
def PILImageToCV():
    # PIL Image轉(zhuǎn)換成OpenCV格式
    path = "/Users/waldenz/Documents/Work/doc/TestImages/t3.png"
    img = Image.open(path)
    plt.subplot(121)
    plt.imshow(img)
    print(isinstance(img, np.ndarray))
    img = cv2.cvtColor(np.asarray(img), cv2.COLOR_RGB2BGR)
    print(isinstance(img, np.ndarray))
    plt.subplot(122)
    plt.imshow(img)
    plt.show()
 
 
def CVImageToPIL():
    # OpenCV圖片轉(zhuǎn)換為PIL image
    path = "/Users/waldenz/Documents/Work/doc/TestImages/t3.png"
    img = cv2.imread(path)
    # cv2.imshow("OpenCV",img)
    plt.subplot(121)
    plt.imshow(img)
 
    img2 = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    plt.subplot(122)
    plt.imshow(img2)
    plt.show()
 
def bytes_to_cvimage(filebytes):
    # 圖片字節(jié)流轉(zhuǎn)換為cv image
    image = Image.open(filebytes)
    img = cv2.cvtColor(np.asarray(image), cv2.COLOR_RGB2BGR)
    return img
 
def runAllImageSimilaryFun(para1, para2):
    # 均值、差值、感知哈希算法三種算法值越小，則越相似,相同圖片值為0
    # 三直方圖算法和單通道的直方圖 0-1之間，值越大，越相似。 相同圖片為1
 
    # t1,t2   14;19;10;  0.70;0.75
    # t1,t3   39 33 18   0.58 0.49
    # s1,s2  7 23 11     0.83 0.86  挺相似的圖片
    # c1,c2  11 29 17    0.30 0.31
 
    if para1.startswith("http"):
         # 根據(jù)鏈接下載圖片，并轉(zhuǎn)換為opencv格式
        img1 = getImageByUrl(para1)
        img1 = cv2.cvtColor(np.asarray(img1), cv2.COLOR_RGB2BGR)
 
        img2 = getImageByUrl(para2)
        img2 = cv2.cvtColor(np.asarray(img2), cv2.COLOR_RGB2BGR)
    else:
        # 通過imread方法直接讀取物理路徑
        img1 = cv2.imread(para1)
        img2 = cv2.imread(para2)
 
    hash1 = aHash(img1)
    hash2 = aHash(img2)
    n1 = cmpHash(hash1, hash2)
    print('均值哈希算法相似度aHash：', n1)
 
    hash1 = dHash(img1)
    hash2 = dHash(img2)
    n2 = cmpHash(hash1, hash2)
    print('差值哈希算法相似度dHash：', n2)
 
    hash1 = pHash(img1)
    hash2 = pHash(img2)
    n3 = cmpHash(hash1, hash2)
    print('感知哈希算法相似度pHash：', n3)
 
    n4 = classify_hist_with_split(img1, img2)
    print('三直方圖算法相似度：', n4)
 
    n5 = calculate(img1, img2)
    print("單通道的直方圖", n5)
    print("%d %d %d %.2f %.2f " % (n1, n2, n3, round(n4[0], 2), n5[0]))
    print("%.2f %.2f %.2f %.2f %.2f " % (1-float(n1/64), 1 -
                                         float(n2/64), 1-float(n3/64), round(n4[0], 2), n5[0]))
 
    plt.subplot(121)
    plt.imshow(Image.fromarray(cv2.cvtColor(img1, cv2.COLOR_BGR2RGB)))
    plt.subplot(122)
    plt.imshow(Image.fromarray(cv2.cvtColor(img2, cv2.COLOR_BGR2RGB)))
    plt.show()
 
if __name__ == "__main__":
    p1="https://ww3.sinaimg.cn/bmiddle/007INInDly1g336j2zziwj30su0g848w.jpg"
    p2="https://ww2.sinaimg.cn/bmiddle/007INInDly1g336j10d32j30vd0hnam6.jpg"
    runAllImageSimilaryFun(p1,p2)

以下為測試了一些圖片的相似度值。

五個值分別代表均值哈希算法相似度、差值哈希算法相似度、感知哈希算法相似度、三直方算法相似度、單通道直方圖相似度