快捷導(dǎo)航

基于Python開(kāi)發(fā)Office文檔圖片提取器

更新時(shí)間：2025年01月19日 09:53:00 作者：黑客白澤

這篇文章主要為大家詳細(xì)介紹了一個(gè)基于PyQt5開(kāi)發(fā)的桌面應(yīng)用,可以實(shí)現(xiàn)Office文檔圖片提取功能,文中的示例代碼講解詳細(xì),需要的可以參考一下

1. 簡(jiǎn)介

本程序是一個(gè)基于PyQt5開(kāi)發(fā)的桌面應(yīng)用，名為“Office文檔圖片批量提取工具”。其主要功能是從各種Office文檔（包括.doc、.docx、.xls、.xlsx、.ppt、.pptx）中提取圖片，并將提取的圖片保存在指定的目錄中。用戶可以通過(guò)該工具批量操作多個(gè)文件，并查看提取日志。

功能簡(jiǎn)介

1.文件選擇：

用戶可以通過(guò)點(diǎn)擊“添加文件”按鈕選擇多個(gè)Office文件（.doc、.docx、.xls、.xlsx、.ppt、.pptx）。
支持拖拽文件到文件列表中進(jìn)行快速添加。
用戶還可以通過(guò)“移除選中”按鈕刪除選中的文件。

2.輸出目錄選擇：

用戶可以選擇保存提取圖片的輸出目錄，確保所有提取的圖片都能保存在指定位置。

3.提取功能：

點(diǎn)擊“提取圖片”按鈕后，程序會(huì)根據(jù)文件類型自動(dòng)調(diào)用相應(yīng)的函數(shù)提取圖片。
支持從.docx、.xlsx、.pptx等文件格式中提取圖片。
對(duì)于.doc、.xls、.ppt文件，使用win32com.client處理，這需要在Windows環(huán)境下運(yùn)行。

4.日志記錄：

提取過(guò)程中的每一條記錄（包括文件名、圖片名稱、保存路徑）都會(huì)被記錄在提取日志表格中，方便用戶查看提取結(jié)果。
如果某個(gè)文件沒(méi)有找到圖片，或者發(fā)生錯(cuò)誤，日志中會(huì)顯示相應(yīng)的提示。

5.用戶界面：

用戶界面簡(jiǎn)潔，分為多個(gè)區(qū)域：文件選擇框、輸出目錄設(shè)置、提取日志框。
支持水平和垂直布局，確保界面簡(jiǎn)潔且功能齊全。

6.錯(cuò)誤處理：

如果沒(méi)有添加任何文件或沒(méi)有選擇輸出目錄，程序會(huì)彈出警告框提示用戶。
在提取過(guò)程中，任何錯(cuò)誤都會(huì)被捕獲并記錄到日志中，用戶能夠快速查看錯(cuò)誤原因。

技術(shù)實(shí)現(xiàn)

PyQt5：用于構(gòu)建用戶界面。

docx、openpyxl、python-pptx：用于處理不同類型的Office文檔。

Pillow：用于圖像處理和保存。

win32com.client：用于處理Windows特定的.doc、.ppt文件。

QTableWidget：用于顯示提取日志。

該程序使用戶能夠高效、批量地從多種Office文檔中提取圖像，并對(duì)提取結(jié)果進(jìn)行查看和管理。

2. 運(yùn)行效果

3. 相關(guān)源碼

import sys
import os
from PyQt5.QtWidgets import QApplication, QWidget, QVBoxLayout, QHBoxLayout, QGridLayout, QPushButton, QLabel, QLineEdit, QFileDialog, QListWidget, QTableWidget, QTableWidgetItem, QHeaderView, QAbstractItemView, QGroupBox, QMessageBox, QSpacerItem, QSizePolicy
from PyQt5.QtCore import Qt
from PyQt5.QtGui import QDragEnterEvent, QDropEvent
from docx import Document
from PIL import Image
from io import BytesIO
from openpyxl import load_workbook
from pptx import Presentation
import win32com.client  # 用于處理 .doc 和 .ppt 文件（Windows特定）


class OptimizedUI(QWidget):
    def __init__(self):
        super().__init__()

        self.setWindowTitle("Office文檔圖片提取器")
        self.setGeometry(100, 100, 800, 600)

        self.init_ui()

    def init_ui(self):
        layout = QVBoxLayout(self)

        # 文件選擇框
        file_frame = QGroupBox("選擇的文件")
        file_layout = QVBoxLayout()
        self.file_list = QListWidget()
        self.file_list.setSelectionMode(QAbstractItemView.MultiSelection)
        file_layout.addWidget(self.file_list)

        # ScrollBar
        self.file_list.setVerticalScrollBarPolicy(Qt.ScrollBarAlwaysOn)
        file_frame.setLayout(file_layout)

        # 按鈕
        button_layout = QHBoxLayout()
        add_button = QPushButton("添加文件")
        add_button.clicked.connect(self.browse_files)
        remove_button = QPushButton("移除選中")
        remove_button.clicked.connect(self.remove_files)

        button_layout.addWidget(add_button)
        button_layout.addWidget(remove_button)

        # 輸出目錄
        output_frame = QGroupBox("輸出目錄")
        output_layout = QHBoxLayout()
        self.output_folder = QLineEdit()
        browse_button = QPushButton("瀏覽")
        browse_button.clicked.connect(self.browse_output_folder)
        output_layout.addWidget(self.output_folder)
        output_layout.addWidget(browse_button)
        output_frame.setLayout(output_layout)

        # 提取日志框
        log_frame = QGroupBox("提取日志")
        log_layout = QVBoxLayout()
        self.log_table = QTableWidget(0, 3)
        self.log_table.setHorizontalHeaderLabels(["文件名", "圖片名稱", "保存路徑"])
        self.log_table.horizontalHeader().setSectionResizeMode(0, QHeaderView.Stretch)
        self.log_table.horizontalHeader().setSectionResizeMode(1, QHeaderView.Stretch)
        self.log_table.horizontalHeader().setSectionResizeMode(2, QHeaderView.Stretch)
        log_layout.addWidget(self.log_table)
        log_frame.setLayout(log_layout)

        # 提取按鈕
        extract_button = QPushButton("提取圖片")
        extract_button.clicked.connect(self.extract_and_log)

        # 布局
        layout.addWidget(file_frame)
        layout.addLayout(button_layout)
        layout.addWidget(output_frame)
        layout.addWidget(log_frame)
        layout.addWidget(extract_button)

        self.setLayout(layout)

        # 啟用拖放文件功能
        self.setAcceptDrops(True)

    def browse_files(self):
        files, _ = QFileDialog.getOpenFileNames(self, "選擇文件", "", "Office Files (*.doc *.docx *.xls *.xlsx *.ppt *.pptx)")
        if files:
            self.file_list.addItems(files)

    def remove_files(self):
        selected_items = self.file_list.selectedItems()
        for item in selected_items:
            self.file_list.takeItem(self.file_list.row(item))

    def browse_output_folder(self):
        folder = QFileDialog.getExistingDirectory(self, "選擇輸出目錄")
        if folder:
            self.output_folder.setText(folder)

    def extract_and_log(self):
        files = [self.file_list.item(i).text() for i in range(self.file_list.count())]
        output_dir = self.output_folder.text()

        if not files:
            QMessageBox.warning(self, "警告", "請(qǐng)至少添加一個(gè)文件。")
            return

        if not output_dir:
            QMessageBox.warning(self, "警告", "請(qǐng)選擇輸出目錄。")
            return

        log_data = []  # 用于保存日志信息

        for file_path in files:
            try:
                for image_name, image_path in self.extract_images(file_path, output_dir):
                    if image_name:
                        log_data.append((file_path, image_name, image_path))
                    else:
                        log_data.append((file_path, "N/A", image_path))
            except ValueError as e:
                log_data.append((file_path, "N/A", f"錯(cuò)誤：{str(e)}"))

        self.update_log_table(log_data)

    def update_log_table(self, log_data):
        self.log_table.setRowCount(0)  # 清空表格
        for row, log_entry in enumerate(log_data):
            self.log_table.insertRow(row)
            for col, value in enumerate(log_entry):
                self.log_table.setItem(row, col, QTableWidgetItem(value))

    def extract_images(self, file_path, output_dir):
        """根據(jù)文件類型調(diào)用不同的提取圖片函數(shù)。"""
        if file_path.endswith(".docx"):
            return self.extract_images_from_docx(file_path, output_dir)
        elif file_path.endswith(".xlsx"):
            return self.extract_images_from_xlsx(file_path, output_dir)
        elif file_path.endswith(".pptx"):
            return self.extract_images_from_pptx(file_path, output_dir)
        elif file_path.endswith(".doc"):
            return self.extract_images_from_doc(file_path, output_dir)
        elif file_path.endswith(".xls"):
            return self.extract_images_from_xls(file_path, output_dir)
        elif file_path.endswith(".ppt"):
            return self.extract_images_from_ppt(file_path, output_dir)
        else:
            raise ValueError("不支持的文件類型")

    def extract_images_from_docx(self, file_path, output_dir):
        """從 .docx 文件中提取圖片并保存到輸出目錄。"""
        try:
            doc = Document(file_path)
            image_count = 0
            for rel in doc.part.rels.values():
                if "image" in rel.target_ref:
                    image_count += 1
                    image_data = rel.target_part.blob
                    image = Image.open(BytesIO(image_data))

                    ext = image.format.lower()
                    image_name = f"{os.path.splitext(os.path.basename(file_path))[0]}_image{image_count}.{ext}"
                    image_path = os.path.join(output_dir, image_name)
                    image.save(image_path)

                    yield image_name, image_path

            if image_count == 0:
                yield None, "未找到圖片。"

        except Exception as e:
            yield None, f"錯(cuò)誤：{str(e)}"

    def extract_images_from_xlsx(self, file_path, output_dir):
        """從 .xlsx 文件中提取圖片并保存到輸出目錄。"""
        try:
            workbook = load_workbook(file_path)
            image_count = 0
            for sheet in workbook.sheetnames:
                sheet_obj = workbook[sheet]
                for image in sheet_obj._images:
                    image_count += 1
                    image_data = image.ref
                    img = Image.open(BytesIO(image_data))

                    ext = img.format.lower()
                    image_name = f"{os.path.splitext(os.path.basename(file_path))[0]}_image{image_count}.{ext}"
                    image_path = os.path.join(output_dir, image_name)
                    img.save(image_path)

                    yield image_name, image_path

            if image_count == 0:
                yield None, "未找到圖片。"

        except Exception as e:
            yield None, f"錯(cuò)誤：{str(e)}"

    def extract_images_from_pptx(self, file_path, output_dir):
        """從 .pptx 文件中提取圖片并保存到輸出目錄。"""
        try:
            prs = Presentation(file_path)
            image_count = 0

            for slide in prs.slides:
                for shape in slide.shapes:
                    if shape.shape_type == 13:  # 圖片類型
                        image_stream = shape.image.blob  # 獲取圖像流

                        ext = shape.image.ext.lower()  # 使用圖片的擴(kuò)展名（如jpg, png）
                        image_name = f"{os.path.splitext(os.path.basename(file_path))[0]}_image{image_count + 1}.{ext}"
                        image_path = os.path.join(output_dir, image_name)

                        image = Image.open(BytesIO(image_stream))
                        image.save(image_path)

                        image_count += 1  # 圖片計(jì)數(shù)
                        yield image_name, image_path

            if image_count == 0:
                yield None, "未找到圖片。"

        except Exception as e:
            yield None, f"錯(cuò)誤：{str(e)}"

    def extract_images_from_doc(self, file_path, output_dir):
        """從 .doc 文件中提取圖片（Windows特定，使用win32com）。"""
        try:
            word = win32com.client.Dispatch("Word.Application")
            doc = word.Documents.Open(file_path)
            image_count = 0

            for shape in doc.Shapes:
                if shape.Type == 13:  # 圖片類型
                    image_count += 1
                    image_stream = shape.PictureFormat.Picture
                    image = Image.open(BytesIO(image_stream))

                    ext = image.format.lower()
                    image_name = f"{os.path.splitext(os.path.basename(file_path))[0]}_image{image_count}.{ext}"
                    image_path = os.path.join(output_dir, image_name)
                    image.save(image_path)

                    yield image_name, image_path

            if image_count == 0:
                yield None, "未找到圖片。"

        except Exception as e:
            yield None, f"錯(cuò)誤：{str(e)}"
        finally:
            word.Quit()

    def extract_images_from_xls(self, file_path, output_dir):
        """從 .xls 文件中提取圖片（Windows特定，使用xlrd或openpyxl）。"""
        try:
            import xlrd
            workbook = xlrd.open_workbook(file_path)
            image_count = 0
            yield None, "暫不支持的圖片提取。"

        except Exception as e:
            yield None, f"錯(cuò)誤：{str(e)}"

    def extract_images_from_ppt(self, file_path, output_dir):
        """從 .ppt 文件中提取圖片（Windows特定，使用win32com）。"""
        try:
            ppt = win32com.client.Dispatch("PowerPoint.Application")
            presentation = ppt.Presentations.Open(file_path)
            image_count = 0

            for slide in presentation.Slides:
                for shape in slide.Shapes:
                    if shape.Type == 13:  # 圖片類型
                        image_count += 1
                        image_stream = shape.PictureFormat.Picture
                        image = Image.open(BytesIO(image_stream))

                        ext = image.format.lower()
                        image_name = f"{os.path.splitext(os.path.basename(file_path))[0]}_image{image_count}.{ext}"
                        image_path = os.path.join(output_dir, image_name)
                        image.save(image_path)

                        yield image_name, image_path

            if image_count == 0:
                yield None, "未找到圖片。"

        except Exception as e:
            yield None, f"錯(cuò)誤：{str(e)}"
        finally:
            ppt.Quit()

    def dragEnterEvent(self, event):
        """處理拖拽進(jìn)入事件"""
        if event.mimeData().hasUrls():
            event.acceptProposedAction()

    def dropEvent(self, event):
        """處理拖拽釋放事件"""
        for url in event.mimeData().urls():
            file_path = url.toLocalFile()
            if os.path.isfile(file_path):
                self.file_list.addItem(file_path)

# 運(yùn)行應(yīng)用
if __name__ == "__main__":
    app = QApplication(sys.argv)
    window = OptimizedUI()
    window.show()
    sys.exit(app.exec_())

到此這篇關(guān)于基于Python開(kāi)發(fā)Office文檔圖片提取器的文章就介紹到這了,更多相關(guān)Python Office文檔圖片提取器內(nèi)容請(qǐng)搜索腳本之家以前的文章或繼續(xù)瀏覽下面的相關(guān)文章希望大家以后多多支持腳本之家！

您可能感興趣的文章: