基于Python編寫一個PDF轉(zhuǎn)換工具箱
1.簡介
使用Python自寫的pdf工具箱,包括pdf轉(zhuǎn)word,圖片,合并,頁面拆分,頁面刪除,頁面提取、
轉(zhuǎn)換word,圖片功能,支持文件拖入。
2.功能介紹
合并:添加順序就是合并順序,可多次添加。
拆分:將輸入頁碼的范圍拆分成每個獨立的pdf,單次可輸入多個范圍。
刪除:將輸入頁碼的范圍刪除,單次可輸入多個范圍,保存刪除后的文件。
提?。簩⑤斎腠摯a的范圍提取成獨立的pdf,單次可輸入多個范圍。
3.運行效果
4.相關(guān)源碼
import os import re import sys from PyQt5.QtWidgets import QApplication, QMainWindow, QPushButton, QVBoxLayout, QWidget, QFileDialog, QListWidget, \ QMessageBox, QLineEdit, QHBoxLayout from PyQt5.QtCore import QThread, pyqtSignal from PyPDF2 import PdfReader, PdfWriter, PdfMerger from pdf2docx import Converter import fitz # 用于PDF轉(zhuǎn)JPG的處理 class CustomListWidget(QListWidget): def __init__(self, parent=None): super().__init__(parent) self.setAcceptDrops(True) self.parentWindow = parent def dragEnterEvent(self, event): if any(url.toString().lower().endswith('.pdf') for url in event.mimeData().urls()): event.acceptProposedAction() def dragMoveEvent(self, event): if any(url.toString().lower().endswith('.pdf') for url in event.mimeData().urls()): event.acceptProposedAction() def dropEvent(self, event): pdf_files = [url.toLocalFile() for url in event.mimeData().urls() if url.toString().lower().endswith('.pdf')] for f in pdf_files: self.parentWindow.addPDFFile(f) class Worker(QThread): finished = pyqtSignal(str) error = pyqtSignal(str) def __init__(self, pdf_files, range_str=None, save_path=None, operation=None): super().__init__() self.pdf_files = pdf_files self.range_str = range_str self.save_path = save_path self.operation = operation def run(self): try: if self.operation == 'merge': self.merge_pdfs() elif self.operation == 'split': self.split_pdfs() elif self.operation == 'delete': self.delete_pages() elif self.operation == 'extract': self.extract_pages() elif self.operation == 'jpg': self.pdf_to_jpg() elif self.operation == 'word': self.pdf_to_word() except Exception as e: self.error.emit(str(e)) def merge_pdfs(self): merger = PdfMerger() for pdf in self.pdf_files: merger.append(pdf) merger.write(self.save_path) merger.close() self.finished.emit('PDF文件已成功合并。') def split_pdfs(self): ranges = self.parse_ranges(self.range_str) reader = PdfReader(self.pdf_files[0]) os.makedirs(self.save_path, exist_ok=True) # 確保目標文件夾存在 file_index = 1 # 用于創(chuàng)建唯一的文件名 for range_index, (start_page, end_page) in enumerate(ranges): # 對于每個范圍,拆分出來的每個頁面為一個單獨的PDF文件 for page_num in range(start_page, end_page + 1): writer = PdfWriter() writer.add_page(reader.pages[page_num]) # 使用文件索引來確保每個文件的名稱都是唯一的 split_save_path = os.path.join(self.save_path, f'split_page_{file_index}.pdf') with open(split_save_path, 'wb') as f: writer.write(f) file_index += 1 self.finished.emit('PDF文件已成功拆分并保存。') def delete_pages(self): ranges = self.parse_ranges(self.range_str) reader = PdfReader(self.pdf_files[0]) writer = PdfWriter() pages_to_delete = {page for start, end in ranges for page in range(start, end + 1)} for i in range(len(reader.pages)): if i not in pages_to_delete: writer.add_page(reader.pages[i]) with open(self.save_path, 'wb') as f: writer.write(f) self.finished.emit('指定頁面已從PDF中刪除。') def extract_pages(self): ranges = self.parse_ranges(self.range_str) reader = PdfReader(self.pdf_files[0]) os.makedirs(self.save_path, exist_ok=True) # 在循環(huán)外提前確保目錄存在 for i, (start_page, end_page) in enumerate(ranges): writer = PdfWriter() for page_num in range(start_page, end_page + 1): writer.add_page(reader.pages[page_num]) extract_save_path = os.path.join(self.save_path, f'extract_{i + 1}.pdf') with open(extract_save_path, 'wb') as f: writer.write(f) self.finished.emit('指定頁面已從PDF中提取。') def pdf_to_jpg(self): for file in self.pdf_files: pdf = fitz.open(file) img_folder = os.path.join(self.save_path, os.path.splitext(os.path.basename(file))[0]) os.makedirs(img_folder, exist_ok=True) for pg in range(pdf.page_count): page = pdf[pg] trans = fitz.Matrix(2, 2) # 設(shè)置轉(zhuǎn)換矩陣為放大2倍 pm = page.get_pixmap(matrix=trans, alpha=False) pic_name = f'Page_{pg + 1}.jpg' pic_path = os.path.join(img_folder, pic_name) pm.save(pic_path) self.finished.emit('PDF文件已成功轉(zhuǎn)換為圖片。') def pdf_to_word(self): for file in self.pdf_files: docx_name = os.path.splitext(file)[0] + '.docx' cv = Converter(file) cv.convert(docx_name, start=0, end=None) cv.close() self.finished.emit('PDF文件已成功轉(zhuǎn)換為Word文檔。') def parse_ranges(self, ranges_str): ranges = [] for part in re.split(',|,', ranges_str): if '-' in part: start_page, end_page = map(int, part.split('-')) ranges.append((start_page - 1, end_page - 1)) else: page = int(part) ranges.append((page - 1, page - 1)) return ranges class PDFMergerApp(QMainWindow): def __init__(self): super().__init__() self.initUI() self.pdf_files = [] def initUI(self): self.setWindowTitle('PDF 工具箱') self.setGeometry(100, 100, 800, 600) mainLayout = QVBoxLayout() self.addButton = QPushButton('添加 PDF', self) self.addButton.clicked.connect(self.addPDF) mainLayout.addWidget(self.addButton) self.listWidget = CustomListWidget(self) mainLayout.addWidget(self.listWidget) deleteLayout = QHBoxLayout() self.removeButton = QPushButton('刪除選定', self) self.removeButton.clicked.connect(self.removeSelected) deleteLayout.addWidget(self.removeButton) self.removeAllButton = QPushButton('刪除全部', self) self.removeAllButton.clicked.connect(self.removeAll) deleteLayout.addWidget(self.removeAllButton) mainLayout.addLayout(deleteLayout) convertLayout = QHBoxLayout() self.convertJPGButton = QPushButton('轉(zhuǎn)換為圖片', self) self.convertJPGButton.clicked.connect(self.convertToJPG) convertLayout.addWidget(self.convertJPGButton) self.convertWordButton = QPushButton('轉(zhuǎn)換為Word', self) self.convertWordButton.clicked.connect(self.convertToWord) convertLayout.addWidget(self.convertWordButton) mainLayout.addLayout(convertLayout) self.mergeButton = QPushButton('合并 PDFs', self) self.mergeButton.clicked.connect(self.mergePDFs) mainLayout.addWidget(self.mergeButton) splitLayout = QHBoxLayout() self.splitInput = QLineEdit(self) self.splitInput.setPlaceholderText('輸入拆分頁碼范圍可輸入多個范圍,如1,3-4,8-15') splitLayout.addWidget(self.splitInput) self.splitButton = QPushButton('拆分頁面', self) self.splitButton.clicked.connect(self.splitPDF) splitLayout.addWidget(self.splitButton) mainLayout.addLayout(splitLayout) deletePageLayout = QHBoxLayout() self.deleteInput = QLineEdit(self) self.deleteInput.setPlaceholderText('輸入刪除頁碼范圍可輸入多個范圍,如1,3-4,8-15') deletePageLayout.addWidget(self.deleteInput) self.deleteButton = QPushButton('刪除頁面', self) self.deleteButton.clicked.connect(self.deletePages) deletePageLayout.addWidget(self.deleteButton) mainLayout.addLayout(deletePageLayout) extractLayout = QHBoxLayout() self.extractInput = QLineEdit(self) self.extractInput.setPlaceholderText('輸入提取頁碼范圍可輸入多個范圍,如1,3-4,8-15') extractLayout.addWidget(self.extractInput) self.extractButton = QPushButton('提取頁面', self) self.extractButton.clicked.connect(self.extractPages) extractLayout.addWidget(self.extractButton) mainLayout.addLayout(extractLayout) container = QWidget() container.setLayout(mainLayout) self.setCentralWidget(container) def addPDF(self): files, _ = QFileDialog.getOpenFileNames(self, '打開文件', '', 'PDF files (*.pdf)') for file_path in files: self.addPDFFile(file_path) def addPDFFile(self, file_path): if file_path and file_path not in self.pdf_files: self.pdf_files.append(file_path) self.listWidget.addItem(file_path) def removeSelected(self): for item in self.listWidget.selectedItems(): self.pdf_files.remove(item.text()) self.listWidget.takeItem(self.listWidget.row(item)) def removeAll(self): self.pdf_files.clear() self.listWidget.clear() def mergePDFs(self): save_path, _ = QFileDialog.getSaveFileName(self, '保存文件', '', 'PDF files (*.pdf)') if save_path: self.thread = Worker(self.pdf_files, save_path=save_path, operation='merge') self.thread.finished.connect(self.onFinished) self.thread.error.connect(self.onError) self.thread.start() def splitPDF(self): if len(self.pdf_files) != 1: QMessageBox.warning(self, "錯誤", "請只選擇一個PDF文件進行拆分。") return range_str = self.splitInput.text().strip() folder_path = self.getFolderName() if range_str and folder_path: self.thread = Worker(self.pdf_files, range_str=range_str, save_path=folder_path, operation='split') self.thread.finished.connect(self.onFinished) self.thread.error.connect(self.onError) self.thread.start() def deletePages(self): if len(self.pdf_files) != 1: QMessageBox.warning(self, "錯誤", "請只選擇一個PDF文件進行刪除操作。") return range_str = self.deleteInput.text().strip() save_path = QFileDialog.getSaveFileName(self, '保存文件', '', 'PDF files (*.pdf)')[0] if save_path and range_str: self.thread = Worker(self.pdf_files, range_str=range_str, save_path=save_path, operation='delete') self.thread.finished.connect(self.onFinished) self.thread.error.connect(self.onError) self.thread.start() def extractPages(self): if len(self.pdf_files) != 1: QMessageBox.warning(self, "錯誤", "請只選擇一個PDF文件進行提取操作。") return range_str = self.extractInput.text().strip() save_path = QFileDialog.getSaveFileName(self, '保存文件', '', 'PDF files (*.pdf)')[0] if save_path and range_str: self.thread = Worker(self.pdf_files, range_str=range_str, save_path=save_path, operation='extract') self.thread.finished.connect(self.onFinished) self.thread.error.connect(self.onError) self.thread.start() def convertToJPG(self): save_path = QFileDialog.getExistingDirectory(self, "選擇保存圖片的位置") if save_path: self.thread = Worker(self.pdf_files, save_path=save_path, operation='jpg') self.thread.finished.connect(self.onFinished) self.thread.error.connect(self.onError) self.thread.start() def convertToWord(self): save_path = QFileDialog.getExistingDirectory(self, "選擇保存Word的位置") if save_path: self.thread = Worker(self.pdf_files, save_path=save_path, operation='word') self.thread.finished.connect(self.onFinished) self.thread.error.connect(self.onError) self.thread.start() def getFolderName(self): folder_path = QFileDialog.getExistingDirectory(self, "選擇保存拆分文件的位置") return folder_path def onFinished(self, message): QMessageBox.information(self, "操作完成", message) self.clear_pdf_list() self.clear_text_inputs() def onError(self, error_message): QMessageBox.warning(self, "操作失敗", error_message) def clear_pdf_list(self): self.pdf_files.clear() self.listWidget.clear() def clear_text_inputs(self): # 清除所有的QLineEdit控件內(nèi)容 self.splitInput.clear() self.deleteInput.clear() self.extractInput.clear() def main(): app = QApplication(sys.argv) ex = PDFMergerApp() ex.show() sys.exit(app.exec_()) if __name__ == '__main__': main()
到此這篇關(guān)于基于Python編寫一個PDF轉(zhuǎn)換工具箱的文章就介紹到這了,更多相關(guān)Python PDF轉(zhuǎn)換內(nèi)容請搜索腳本之家以前的文章或繼續(xù)瀏覽下面的相關(guān)文章希望大家以后多多支持腳本之家!
相關(guān)文章
Python restful框架接口開發(fā)實現(xiàn)
這篇文章主要介紹了Python restful框架接口開發(fā)實現(xiàn),文中通過示例代碼介紹的非常詳細,對大家的學習或者工作具有一定的參考學習價值,需要的朋友可以參考下2020-04-04使用Python和Selenium構(gòu)建一個自動化圖像引擎
這篇文章主要為大家詳細介紹了如何使用Python和Selenium庫構(gòu)建一個自動化圖像引擎,能夠根據(jù)指定參數(shù)自動截取網(wǎng)頁快照,并將生成的圖片存儲到云端,需要的可以參考下2024-12-12