import matplotlib.pyplot as plt
import keras_ocr
 
pipeline = keras_ocr.pipeline.Pipeline()
 
#read image from the an image path (a jpg/png file or an image url)
img = keras_ocr.tools.read(image_path)
 
# Prediction_groups is a list of (word, box) tuples
prediction_groups = pipeline.recognize([img])
 
#print image with annotation and boxes
keras_ocr.tools.drawAnnotations(image=img, predictions=prediction_groups[0])

如果我們看一下prediction_groups，我們會(huì)看到每個(gè)元素對(duì)應(yīng)一組坐標(biāo)。

例如，prediction_groups[0][10]如下：

('tuesday',
 array([[ 986.2778 ,  625.07764],
        [1192.3856 ,  622.7086 ],
        [1192.8888 ,  666.4836 ],
        [ 986.78094,  668.8526 ]], dtype=float32))

數(shù)組的第一個(gè)元素對(duì)應(yīng)左上角的坐標(biāo)，第二個(gè)元素對(duì)應(yīng)右下角，第三個(gè)元素是右上角，而第四個(gè)元素是左下角。

cv2修復(fù)函數(shù)

使用OpenCV應(yīng)用修復(fù)算法時(shí)，需要提供兩幅圖像：

輸入圖像，包含我們要?jiǎng)h除的文本。

遮罩圖像，它顯示圖像中要?jiǎng)h除的文本在哪里。第二個(gè)圖像的尺寸應(yīng)與輸入的尺寸相同。

Cv2具有兩種修復(fù)算法，并允許應(yīng)用矩形、圓形或線遮罩（請(qǐng)參考)

在這種情況下，我決定使用線遮罩，因?yàn)樗鼈兏`活地覆蓋不同方向的文本（矩形遮罩只適用于平行或垂直于x軸的單詞，圓形遮罩將覆蓋比較大的區(qū)域）。

為了應(yīng)用遮罩，我們需要提供線的起點(diǎn)和終點(diǎn)坐標(biāo)以及線的厚度：

起點(diǎn)將是框的左上角和左下角之間的中點(diǎn)，終點(diǎn)將是右上角和右下角之間的中點(diǎn)。

對(duì)于厚度，我們將計(jì)算左上角和左下角之間的線長(zhǎng)度。

import math
import numpy as np
 
def midpoint(x1, y1, x2, y2):
    x_mid = int((x1 + x2)/2)
    y_mid = int((y1 + y2)/2)
    return (x_mid, y_mid)
 
#example of a line mask for the word "Tuesday"
box = prediction_groups[0][10]
x0, y0 = box[1][0]
x1, y1 = box[1][1] 
x2, y2 = box[1][2]
x3, y3 = box[1][3] 
x_mid0, y_mid0 = midpoint(x1, y1, x2, y2)
x_mid1, y_mi1 = midpoint(x0, y0, x3, y3)
thickness = int(math.sqrt( (x2 - x1)**2 + (y2 - y1)**2 ))

現(xiàn)在我們可以創(chuàng)建我們的遮罩：

mask = np.zeros(img.shape[:2], dtype="uint8")
cv2.line(mask, (x_mid0, y_mid0), (x_mid1, y_mi1), 255, thickness)

我們還可以檢查遮罩區(qū)域，確保其正常工作。

masked = cv2.bitwise_and(img, img, mask=mask)
plt.imshow(masked)

最后，我們可以修復(fù)圖像。

在這種情況下，我們將使用cv2.INPAINT_NS，指得是“Navier-Stokes, Fluid Dynamics, and Image and Video Inpainting”一文中描述的修復(fù)算法。

img_inpainted = cv2.inpaint(img, mask, 7, cv2.INPAINT_NS)
plt.imshow(img_inpainted)

正如你所看到的，“Tuesday”已從圖片中刪除。

匯總

現(xiàn)在，讓我們把它總結(jié)起來，創(chuàng)建一個(gè)函數(shù)來去除任何圖像中的文本。

我們只需要生成框列表，并迭代每個(gè)文本框。

import matplotlib.pyplot as plt
import keras_ocr
import cv2
import math
import numpy as np
 
def midpoint(x1, y1, x2, y2):
 
    x_mid = int((x1 + x2)/2)
    y_mid = int((y1 + y2)/2)
 
    return (x_mid, y_mid)
 
pipeline = keras_ocr.pipeline.Pipeline()
 
def inpaint_text(img_path, pipeline):
 
    # read image
    img = keras_ocr.tools.read(img_path)
 
    # generate (word, box) tuples 
    prediction_groups = pipeline.recognize([img])
    mask = np.zeros(img.shape[:2], dtype="uint8")
 
    for box in prediction_groups[0]:
 
        x0, y0 = box[1][0]
        x1, y1 = box[1][1] 
        x2, y2 = box[1][2]
        x3, y3 = box[1][3] 
 
        x_mid0, y_mid0 = midpoint(x1, y1, x2, y2)
        x_mid1, y_mi1 = midpoint(x0, y0, x3, y3)
        thickness = int(math.sqrt( (x2 - x1)**2 + (y2 - y1)**2 ))
        cv2.line(mask, (x_mid0, y_mid0), (x_mid1, y_mi1), 255, thickness)
        img = cv2.inpaint(img, mask, 7, cv2.INPAINT_NS)
 
    return(img)

以下是最終結(jié)果（之前和之后）：

另外兩個(gè)例子：

請(qǐng)注意，如果要保存圖像，需要將其轉(zhuǎn)換為RGB格式，否則顏色會(huì)反轉(zhuǎn)！

img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
cv2.imwrite(‘text_free_image.jpg',img_rgb)

如果你只對(duì)刪除某些單詞感興趣，則可以包括一個(gè)if條件，如下所示：

給出了一個(gè)要?jiǎng)h除的單詞列表

remove_list = [‘tuesday', ‘monday']

我們可以在for循環(huán)中包含if條件

def inpaint_text(img_path, remove_list, pipeline):
    
    # read image
    img = keras_ocr.tools.read(img_path)
    
    # generate (word, box) tuples 
    prediction_groups = pipeline.recognize([img])
    mask = np.zeros(img.shape[:2], dtype="uint8")
    
    for box in prediction_groups[0]:
        
        if box[0] in remove_list:
           x0, y0 = box[1][0]
           x1, y1 = box[1][1] 
           x2, y2 = box[1][2]
           x3, y3 = box[1][3] 
        
           x_mid0, y_mid0 = midpoint(x1, y1, x2, y2)
            
           x_mid1, y_mi1 = midpoint(x0, y0, x3, y3)
        
           thickness = int(math.sqrt( (x2 - x1)**2 + (y2 - y1)**2 ))
        
           cv2.line(mask, (x_mid0, y_mid0), (x_mid1, y_mi1), 255,    
           thickness)
            
           img = cv2.inpaint(img, mask, 7, cv2.INPAINT_NS)
                 
    return(img)

當(dāng)然，這只是一個(gè)快速的例子，說明了如何對(duì)特定的單詞列表進(jìn)行修復(fù)。

結(jié)尾

在這篇文章中，我們討論了如何實(shí)現(xiàn)一種算法來自動(dòng)刪除圖像中的文本，該算法使用一個(gè)預(yù)訓(xùn)練好的OCR模型（使用Keras）和一個(gè)使用cv2的修復(fù)算法。該算法似乎可以很好地從圖像中快速刪除文本，而無需為此特定任務(wù)訓(xùn)練模型。

當(dāng)文本框靠近其他對(duì)象時(shí)，它通常表現(xiàn)不好，因?yàn)樗赡軙?huì)扭曲周圍環(huán)境。

以上就是Python使用Keras OCR實(shí)現(xiàn)從圖像中刪除文本的詳細(xì)內(nèi)容，更多關(guān)于Python圖像刪除文本的資料請(qǐng)關(guān)注腳本之家其它相關(guān)文章！

您可能感興趣的文章: