Python+OpenCV實戰(zhàn)之實現(xiàn)文檔掃描
1.效果展示
網(wǎng)絡攝像頭掃描:
圖片掃描:
最終掃描保存的圖片:
(視頻)
(圖片)
2.項目準備
今天的項目文件只需要兩個.py文件,其中一個.py文件是已經(jīng)寫好的函數(shù),你將直接使用它,我不會在此多做講解,因為我們將會在主要的.py文件import 導入它,如果想了解其中函數(shù)是如何寫的,請自行學習。
utlis.py,需要添加的.py文件
import cv2 import numpy as np # TO STACK ALL THE IMAGES IN ONE WINDOW def stackImages(imgArray,scale,lables=[]): rows = len(imgArray) cols = len(imgArray[0]) rowsAvailable = isinstance(imgArray[0], list) width = imgArray[0][0].shape[1] height = imgArray[0][0].shape[0] if rowsAvailable: for x in range ( 0, rows): for y in range(0, cols): imgArray[x][y] = cv2.resize(imgArray[x][y], (0, 0), None, scale, scale) if len(imgArray[x][y].shape) == 2: imgArray[x][y]= cv2.cvtColor( imgArray[x][y], cv2.COLOR_GRAY2BGR) imageBlank = np.zeros((height, width, 3), np.uint8) hor = [imageBlank]*rows hor_con = [imageBlank]*rows for x in range(0, rows): hor[x] = np.hstack(imgArray[x]) hor_con[x] = np.concatenate(imgArray[x]) ver = np.vstack(hor) ver_con = np.concatenate(hor) else: for x in range(0, rows): imgArray[x] = cv2.resize(imgArray[x], (0, 0), None, scale, scale) if len(imgArray[x].shape) == 2: imgArray[x] = cv2.cvtColor(imgArray[x], cv2.COLOR_GRAY2BGR) hor= np.hstack(imgArray) hor_con= np.concatenate(imgArray) ver = hor if len(lables) != 0: eachImgWidth= int(ver.shape[1] / cols) eachImgHeight = int(ver.shape[0] / rows) print(eachImgHeight) for d in range(0, rows): for c in range (0,cols): cv2.rectangle(ver,(c*eachImgWidth,eachImgHeight*d),(c*eachImgWidth+len(lables[d][c])*13+27,30+eachImgHeight*d),(255,255,255),cv2.FILLED) cv2.putText(ver,lables[d][c],(eachImgWidth*c+10,eachImgHeight*d+20),cv2.FONT_HERSHEY_COMPLEX,0.7,(255,0,255),2) return ver def reorder(myPoints): myPoints = myPoints.reshape((4, 2)) myPointsNew = np.zeros((4, 1, 2), dtype=np.int32) add = myPoints.sum(1) myPointsNew[0] = myPoints[np.argmin(add)] myPointsNew[3] =myPoints[np.argmax(add)] diff = np.diff(myPoints, axis=1) myPointsNew[1] =myPoints[np.argmin(diff)] myPointsNew[2] = myPoints[np.argmax(diff)] return myPointsNew def biggestContour(contours): biggest = np.array([]) max_area = 0 for i in contours: area = cv2.contourArea(i) if area > 5000: peri = cv2.arcLength(i, True) approx = cv2.approxPolyDP(i, 0.02 * peri, True) if area > max_area and len(approx) == 4: biggest = approx max_area = area return biggest,max_area def drawRectangle(img,biggest,thickness): cv2.line(img, (biggest[0][0][0], biggest[0][0][1]), (biggest[1][0][0], biggest[1][0][1]), (0, 255, 0), thickness) cv2.line(img, (biggest[0][0][0], biggest[0][0][1]), (biggest[2][0][0], biggest[2][0][1]), (0, 255, 0), thickness) cv2.line(img, (biggest[3][0][0], biggest[3][0][1]), (biggest[2][0][0], biggest[2][0][1]), (0, 255, 0), thickness) cv2.line(img, (biggest[3][0][0], biggest[3][0][1]), (biggest[1][0][0], biggest[1][0][1]), (0, 255, 0), thickness) return img def nothing(x): pass def initializeTrackbars(intialTracbarVals=0): cv2.namedWindow("Trackbars") cv2.resizeWindow("Trackbars", 360, 240) cv2.createTrackbar("Threshold1", "Trackbars", 200,255, nothing) cv2.createTrackbar("Threshold2", "Trackbars", 200, 255, nothing) def valTrackbars(): Threshold1 = cv2.getTrackbarPos("Threshold1", "Trackbars") Threshold2 = cv2.getTrackbarPos("Threshold2", "Trackbars") src = Threshold1,Threshold2 return src
3.代碼的講解與展示
import cv2 import numpy as np import utlis ######################################################################## webCamFeed = True # pathImage = "1.jpg" # cap = cv2.VideoCapture(1) # cap.set(10,160) # heightImg = 640 # widthImg = 480 # ######################################################################## utlis.initializeTrackbars() count=0 while True: if webCamFeed: ret, img = cap.read() else: img = cv2.imread(pathImage) img = cv2.resize(img, (widthImg, heightImg)) imgBlank = np.zeros((heightImg,widthImg, 3), np.uint8) imgGray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) imgBlur = cv2.GaussianBlur(imgGray, (5, 5), 1) # 添加高斯模糊 thres=utlis.valTrackbars() #獲取閾值的軌跡欄值 imgThreshold = cv2.Canny(imgBlur,thres[0],thres[1]) # 應用CANNY模糊 kernel = np.ones((5, 5)) imgDial = cv2.dilate(imgThreshold, kernel, iterations=2) imgThreshold = cv2.erode(imgDial, kernel, iterations=1) # 查找所有輪廓 imgContours = img.copy() imgBigContour = img.copy() contours, hierarchy = cv2.findContours(imgThreshold, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) # FIND ALL CONTOURS cv2.drawContours(imgContours, contours, -1, (0, 255, 0), 10) # 繪制所有檢測到的輪廓 # 找到最大的輪廓 biggest, maxArea = utlis.biggestContour(contours) # 找到最大的輪廓 if biggest.size != 0: biggest=utlis.reorder(biggest) cv2.drawContours(imgBigContour, biggest, -1, (0, 255, 0), 20) # 畫最大的輪廓 imgBigContour = utlis.drawRectangle(imgBigContour,biggest,2) pts1 = np.float32(biggest) # 為扭曲準備點 pts2 = np.float32([[0, 0],[widthImg, 0], [0, heightImg],[widthImg, heightImg]]) # 為扭曲準備點 matrix = cv2.getPerspectiveTransform(pts1, pts2) imgWarpColored = cv2.warpPerspective(img, matrix, (widthImg, heightImg)) #從每側移除20個像素 imgWarpColored=imgWarpColored[20:imgWarpColored.shape[0] - 20, 20:imgWarpColored.shape[1] - 20] imgWarpColored = cv2.resize(imgWarpColored,(widthImg,heightImg)) # 應用自適應閾值 imgWarpGray = cv2.cvtColor(imgWarpColored,cv2.COLOR_BGR2GRAY) imgAdaptiveThre= cv2.adaptiveThreshold(imgWarpGray, 255, 1, 1, 7, 2) imgAdaptiveThre = cv2.bitwise_not(imgAdaptiveThre) imgAdaptiveThre=cv2.medianBlur(imgAdaptiveThre,3) # 用于顯示的圖像陣列 imageArray = ([img,imgGray,imgThreshold,imgContours], [imgBigContour,imgWarpColored, imgWarpGray,imgAdaptiveThre]) else: imageArray = ([img,imgGray,imgThreshold,imgContours], [imgBlank, imgBlank, imgBlank, imgBlank]) # 顯示標簽 lables = [["Original","Gray","Threshold","Contours"], ["Biggest Contour","Warp Prespective","Warp Gray","Adaptive Threshold"]] stackedImage = utlis.stackImages(imageArray,0.75,lables) cv2.imshow("Result",stackedImage) # 按下“s”鍵時保存圖像 if cv2.waitKey(1) & 0xFF == ord('s'): cv2.imwrite("Scanned/myImage"+str(count)+".jpg",imgWarpColored) cv2.rectangle(stackedImage, ((int(stackedImage.shape[1] / 2) - 230), int(stackedImage.shape[0] / 2) + 50), (1100, 350), (0, 255, 0), cv2.FILLED) cv2.putText(stackedImage, "Scan Saved", (int(stackedImage.shape[1] / 2) - 200, int(stackedImage.shape[0] / 2)), cv2.FONT_HERSHEY_DUPLEX, 3, (0, 0, 255), 5, cv2.LINE_AA) cv2.imshow('Result', stackedImage) cv2.waitKey(300) count += 1 elif cv2.waitKey(1) & 0xFF == 27: break
今天需要要講解的還是主函數(shù)Main.py,由我來講解,其實我也有點壓力,因為這個項目它涉及了Opencv核心知識點,有的地方我也需要去查找,因為學久必會忘,更何況我也是剛剛起步的階段,所以我會盡我所能的去講清楚。
注意:我是以網(wǎng)絡攝像頭為例,讀取圖片的方式,同理可得。
- 首先,請看#號框內(nèi),我們將從這里開始起,設立變量webCamFeed,用其表示是否打開攝像頭,接著亮度,寬,高的賦值。utlis.initializeTrackbars()是utlis.py文件當中的軌跡欄初始化函數(shù)。
- 然后,我們依次對圖像進行大小調(diào)整、灰度圖像、高斯模糊、Canny邊緣檢測、擴張、侵蝕。
- 之后,找出圖像可以檢測的所有輪廓,并找到最大的輪廓并且畫出來,同時要為掃描到的文檔找到四個頂點,也就是扭曲點,用cv2.getPerspectiveTransform()函數(shù)找到點的坐標,用cv2.warpPerspective()函數(shù)輸出圖像,如果到了這一步,我們?nèi)ミ\行一下會發(fā)現(xiàn)有邊角是桌子的顏色但并沒有很多,所以我們需要從每側移除20個像素,應用自適應閾值讓圖像變得較為清晰——黑色的文字更加的明顯。
- 接著,配置utlis.stackImages()需要的參數(shù)——圖像(列表的形式),規(guī)模,標簽(列表的形式,可以不用標簽,程序一樣可以正確運行),展示窗口。
- 最后,如果你覺得比較滿意,按下s鍵,即可保存,并在圖中央出現(xiàn)有"Scan Saved"的矩形框。點擊Esc鍵即可退出程序。
4.項目資源
5.項目總結與評價
它是一個很好的項目,要知道我們要實現(xiàn)這種效果,即修正文檔,還得清晰,要么有VIP,兌換積分,看廣告等。如果你發(fā)現(xiàn)掃描的文檔不清晰,請修改合適的分辨率。以我個人來看,它的實用性很高。本來今天是想要做人臉識別的項目的,但后面我一直沒有解決下載幾個包錯誤的問題(現(xiàn)在已經(jīng)解決),文檔掃描是明天的項目,今天是趕著做好的,那么希望你在今天的項目中玩得開心!
到此這篇關于Python+OpenCV實戰(zhàn)之實現(xiàn)文檔掃描的文章就介紹到這了,更多相關Python OpenCV文檔掃描內(nèi)容請搜索腳本之家以前的文章或繼續(xù)瀏覽下面的相關文章希望大家以后多多支持腳本之家!
相關文章
Python+Pygame實現(xiàn)海洋之神大冒險游戲
這篇文章主要介紹了如何利用Python+Pygame自制一個海洋之神大冒險游戲。規(guī)則是在海里收集魚骨頭,有些魚骨頭可以轉化為武器,用來攻擊敵人,感興趣的可以了解一下2022-08-08python numpy.ndarray中如何將數(shù)據(jù)轉為int型
這篇文章主要介紹了python numpy.ndarray中如何將數(shù)據(jù)轉為int型,具有很好的參考價值,希望對大家有所幫助。如有錯誤或未考慮完全的地方,望不吝賜教2022-05-05pandas進行時間數(shù)據(jù)的轉換和計算時間差并提取年月日
這篇文章主要介紹了pandas進行時間數(shù)據(jù)的轉換和計算時間差并提取年月日,文中通過示例代碼介紹的非常詳細,對大家的學習或者工作具有一定的參考學習價值,需要的朋友們下面隨著小編來一起學習學習吧2019-07-07利用soaplib搭建webservice詳細步驟和實例代碼
這篇文章主要介紹了使用python soaplib搭建webservice詳細步驟和實例代碼,大家可以參考使用2013-11-11