快捷導(dǎo)航

Python實(shí)現(xiàn)快速將pdf文件剪切成多個(gè)圖片

更新時(shí)間：2024年01月31日 14:18:17 作者：搬磚的詩人Z

這篇文章主要為大家詳細(xì)介紹了如何使用Python實(shí)現(xiàn)快速將pdf文件剪切成多個(gè)圖片,文中的示例代碼講解詳細(xì),有需要的小伙伴可以跟隨小編一起學(xué)習(xí)一下

這里提供一個(gè)打包好的剪切包，window環(huán)境打開即可運(yùn)行

import os
import fitz
import glob
import requests


os.environ['NLS_LANG'] = 'SIMPLIFIED CHINESE_CHINA.UTF8'

#剪切pdf為圖片
def CutPdf(pdfPath,savePath,pdfSavePath):
	try:
		doc = fitz.open(pdfPath) #pdf路徑
		for pg in range(doc.pageCount):
			page = doc[pg]
			rotate = int(0)
		    # 每個(gè)尺寸的縮放系數(shù)為2，這將為我們生成分辨率提高四倍的圖像。
			zoom_x = 2.0
			zoom_y = 2.0
			trans = fitz.Matrix(zoom_x, zoom_y).preRotate(rotate)
			pm = page.getPixmap(matrix=trans, alpha=False)
			imagePath = savePath + '%s.png' % str(pg + 1)
			pm.writePNG(imagePath) #最終存儲(chǔ)路徑
			newpdfPath = pdfSavePath + '%s.pdf' % str(pg + 1)
			print("剪切一張圖片：" + str(pg + 1))
			frompic2pdf(imagePath,pdfSavePath,newpdfPath)
			print("轉(zhuǎn)換成功一張pdf：" + str(pg + 1))
	except Exception as e:
		print('CutPdf出現(xiàn)異常:' + str(e))

#圖片轉(zhuǎn)pdf
def frompic2pdf(img_path,pdfSavePath, pdf_path):
	# 使用glob讀圖
	for img in sorted(glob.glob(img_path)):
		if not os.path.exists(pdfSavePath):
			 os.makedirs(pdfSavePath)
		
		# 打開空文檔
		doc = fitz.open()
		# 打開指定圖片
		imgdoc = fitz.open(img)
		# 使用圖片創(chuàng)建單頁的PDF
		pdfbytes = imgdoc.convertToPDF()
		imgpdf = fitz.open("pdf", pdfbytes)
		# 將當(dāng)前頁寫入文檔
		doc.insertPDF(imgpdf)
		# 保存為指定名稱的PDF文件
		doc.save(pdf_path)
		# 關(guān)閉
		doc.close()


def main():
    pdfUrl = "cards"
    fileList = os.listdir(pdfUrl)
    index = 1
    for item in  fileList:
        print("開始裁剪" + item)
        url = pdfUrl + "/" + item

        foldStr = "images/" + item.split('.')[0]
        url2 = foldStr + "/" + item.split('.')[0]
        foldStr3 = "pdf/" + item.split('.')[0]
        url3 = foldStr3 + "/"
        if not os.path.exists(foldStr):
            os.makedirs(foldStr)

        CutPdf(url,url2,url3)
        print("完成裁剪")
        index+=1

if __name__ == "__main__":
    main()

知識(shí)補(bǔ)充

除了上文的方法，小編還為大家整理了其他Python裁剪pdf文件為圖片的方法，希望對(duì)大家有所幫助

方法一：

剪裁PDF頁面

在剪裁PDF頁面之前，我們需要獲取頁面的尺寸，并計(jì)算剪裁區(qū)域的位置和大小。下面是剪裁PDF頁面的代碼：

# 獲取頁面尺寸
page_width = page.mediaBox.getWidth()
page_height = page.mediaBox.getHeight()

# 定義剪裁區(qū)域的位置和大小
x = 100  # 起始點(diǎn)的x坐標(biāo)
y = 100  # 起始點(diǎn)的y坐標(biāo)
width = 200  # 剪裁區(qū)域的寬度
height = 300  # 剪裁區(qū)域的高度

# 剪裁PDF頁面
page.trimBox.lowerLeft = (x, y)
page.trimBox.upperRight = (x + width, y + height)
page.cropBox.lowerLeft = (x, y)
page.cropBox.upperRight = (x + width, y + height)

代碼解析：

使用mediaBox屬性獲取頁面的寬度和高度。
定義剪裁區(qū)域的位置和大小，可以根據(jù)實(shí)際需求進(jìn)行調(diào)整。
使用trimBox屬性設(shè)置剪裁后的頁面的邊界框。
使用cropBox屬性設(shè)置剪裁后的頁面的裁剪框。

保存剪裁后的圖片

最后，我們需要將剪裁后的頁面保存為圖片。這可以通過使用PyPDF2庫和Pillow庫來實(shí)現(xiàn)。下面是保存剪裁后的圖片的代碼：

from PIL import Image

# 將剪裁后的頁面保存為圖片
with open('output.jpg', 'wb') as image_file:
    page_data = page.extract_xobject().get('/Im0').getData()
    image = Image.open(io.BytesIO(page_data))
    image.save(image_file, 'JPEG')

首先，導(dǎo)入Pillow庫中的Image模塊，用于處理圖像。

使用extract_xobject()方法從剪裁后的頁面中提取圖像數(shù)據(jù)。
使用get('/Im0')方法獲取圖像數(shù)據(jù)。
使用getData()方法獲取圖像的二進(jìn)制數(shù)據(jù)。
使用Image.open()方法打開圖像數(shù)據(jù)。
使用save()方法保存圖像數(shù)據(jù)為JPEG格式的圖片。
'output.jpg'為保存的圖片路徑，可以根據(jù)需要修改。

方法二：

python分割pdf

from PyPDF2 import PdfFileReader, PdfFileWriter
 
 
# PDF文件分割
def split_pdf(read_file, out_detail):
    try:
        fp_read_file = open(read_file, 'rb')
        pdf_input = PdfFileReader(fp_read_file)  # 將要分割的PDF內(nèi)容格式話
        page_count = pdf_input.getNumPages()  # 獲取PDF頁數(shù)
        print(page_count)  # 打印頁數(shù)
 
        with open(out_detail, 'r',True,'utf-8')as fp:
            # print(fp)
            txt = fp.readlines()
            # print(txt)
            for detail in txt:  # 打開分割標(biāo)準(zhǔn)文件
                # print(type(detail))
                pages, write_file = detail.split()  # 空格分組
               #  write_file, write_ext = os.path.splitext(write_file)  # 用于返回文件名和擴(kuò)展名元組
                pdf_file = r'C:\Users\GZTSALFIEL\Desktop\excel問題文件\pdf\\'+f'{write_file}.pdf'
                # liststr=list(map(int, pages.split('-')))
                # print(type(liststr))
                start_page, end_page = list(map(int, pages.split('-')))  # 將字符串?dāng)?shù)組轉(zhuǎn)換成整形數(shù)組
                start_page -= 1
                try:
                    print(f'開始分割{start_page}頁-{end_page}頁，保存為{pdf_file}......')
                    pdf_output = PdfFileWriter()  # 實(shí)例一個(gè) PDF文件編寫器
                    for i in range(start_page, end_page):
                        pdf_output.addPage(pdf_input.getPage(i))
                    with open(pdf_file, 'wb') as sub_fp:
                        pdf_output.write(sub_fp)
                    print(f'完成分割{start_page}頁-{end_page}頁，保存為{pdf_file}!')
                except IndexError:
                    print(f'分割頁數(shù)超過了PDF的頁數(shù)')
        # fp.close()
    except Exception as e:
        print(e)
    finally:
        fp_read_file.close()
 
 
 
# def main():
#     fire.Fire(split_pdf)
#
# if __name__ == '__main__':
#     main()
 
split_pdf(r'..\pdfdocement\myfile.pdf', 'consult.txt')

到此這篇關(guān)于Python實(shí)現(xiàn)快速將pdf文件剪切成多個(gè)圖片的文章就介紹到這了,更多相關(guān)Python將pdf剪切圖片內(nèi)容請(qǐng)搜索腳本之家以前的文章或繼續(xù)瀏覽下面的相關(guān)文章希望大家以后多多支持腳本之家！

您可能感興趣的文章: