使用Python實(shí)現(xiàn)分組數(shù)據(jù)并保存到單獨(dú)的文件中

更新時(shí)間：2024年04月09日 08:17:33 作者：懸崖上的金魚

當(dāng)處理大型數(shù)據(jù)集時(shí),通常需要將數(shù)據(jù)分組,并將每個(gè)分組的數(shù)據(jù)保存到單獨(dú)的文件中,本文將使用 Python 中的 pandas 庫(kù)來(lái)實(shí)現(xiàn)這一目標(biāo),需要的可以參考下

Python分組數(shù)據(jù)并保存到單獨(dú)的文件中

步驟 1: 導(dǎo)入所需的庫(kù)

import os
import pandas as pd

步驟 2: 讀取 Excel 數(shù)據(jù)

# 讀取 Excel 數(shù)據(jù)
df = pd.read_excel("C:\\Users\\liuchunlin2\\Desktop\\新建XLSX 工作表.xlsx")

步驟 3: 根據(jù)指定字段分組數(shù)據(jù)

# 根據(jù)學(xué)校、班級(jí)、老師字段分組
grouped = df.groupby(['學(xué)校', '班級(jí)', '老師'])

步驟 4: 創(chuàng)建保存拆分?jǐn)?shù)據(jù)的文件夾

# 新建文件夾路徑
folder_path = "C:\\Users\\liuchunlin2\\Desktop\\拆分?jǐn)?shù)據(jù)"
os.makedirs(folder_path, exist_ok=True)  # 檢查文件夾是否存在，若不存在則創(chuàng)建

步驟 5: 遍歷分組數(shù)據(jù)并保存到不同的 Excel 文件中

# 遍歷分組，并將每個(gè)分組的數(shù)據(jù)保存到不同的 Excel 文件中
for name, group in grouped:
    school, grade, teacher = name
    filename = f"{school}_{grade}_{teacher}.xlsx"
    file_path = os.path.join(folder_path, filename)
    group.to_excel(file_path, index=False)

創(chuàng)建一個(gè)簡(jiǎn)單的圖形用戶界面，用于選擇 Excel 文件并指定分組列，然后將數(shù)據(jù)按照分組保存到不同的 Excel 文件中

步驟 1: 導(dǎo)入所需的庫(kù)

import tkinter as tk  # 導(dǎo)入 tkinter 模塊，用于創(chuàng)建圖形用戶界面
from tkinter import filedialog  # 導(dǎo)入 filedialog 子模塊，用于打開文件對(duì)話框
import pandas as pd  # 導(dǎo)入 pandas 庫(kù)，用于數(shù)據(jù)處理
import os  # 導(dǎo)入 os 模塊，用于文件和目錄操作

步驟 2: 定義函數(shù)，用于打開文件對(duì)話框并選擇 Excel 文件路徑

def browse_file():
    # 打開文件對(duì)話框，限定文件類型為 Excel 文件 (*.xlsx)
    filepath = filedialog.askopenfilename(filetypes=[("Excel files", "*.xlsx")])
    # 清空文件路徑輸入框，并將選定的文件路徑插入到輸入框中
    file_entry.delete(0, tk.END)
    file_entry.insert(0, filepath)

步驟 3: 定義函數(shù)，用于處理數(shù)據(jù)并將其按指定列分組保存為多個(gè) Excel 文件

def process_data():
    # 獲取輸入文件路徑和需要分組的列名
    input_file = file_entry.get()
    group_columns = [column_entry.get() for column_entry in column_entries if column_entry.get()]

    # 檢查輸入是否完整
    if not input_file or not group_columns:
        result_label.config(text="Please provide input file path and group columns.")
        return

    try:
        # 讀取 Excel 文件為 DataFrame，并按指定列進(jìn)行分組
        df = pd.read_excel(input_file)
        grouped = df.groupby(group_columns)

        # 創(chuàng)建用于存儲(chǔ)分組數(shù)據(jù)的文件夾
        folder_name = "Splitted_Data"
        if not os.path.exists(folder_name):
            os.makedirs(folder_name)

        # 將每個(gè)分組的數(shù)據(jù)保存為單獨(dú)的 Excel 文件
        for name, group in grouped:
            filename = f"{folder_name}/{'_'.join(name)}.xlsx"
            group.to_excel(filename, index=False)

        result_label.config(text="Data processing completed successfully.")
    except Exception as e:
        result_label.config(text=f"Error occurred: {str(e)}")

步驟 4: 創(chuàng)建 tkinter 窗口對(duì)象并設(shè)置標(biāo)題

root = tk.Tk()
root.title("Excel Data Grouping Tool")  # 設(shè)置窗口標(biāo)題

步驟 5: 創(chuàng)建標(biāo)簽和輸入框，用于顯示和輸入 Excel 文件路徑

file_label = tk.Label(root, text="Excel File Path:")
file_label.grid(row=0, column=0, padx=5, pady=5, sticky="w")
file_entry = tk.Entry(root, width=50)
file_entry.grid(row=0, column=1, padx=5, pady=5, sticky="we")
browse_button = tk.Button(root, text="Browse", command=browse_file)
browse_button.grid(row=0, column=2, padx=5, pady=5)

步驟 6: 創(chuàng)建標(biāo)簽、輸入框和按鈕，用于指定分組列名

column_label = tk.Label(root, text="Group Columns:")
column_label.grid(row=1, column=0, padx=5, pady=5, sticky="w")
column_entry = tk.Entry(root, width=50)
column_entry.grid(row=1, column=1, padx=5, pady=5, sticky="we")
column_entries = [column_entry]

add_column_button = tk.Button(root, text="Add Column", command=lambda: add_column_entry())
add_column_button.grid(row=1, column=2, padx=5, pady=5)

步驟 7: 創(chuàng)建函數(shù)，用于添加新的分組列輸入框

def add_column_entry():
    new_column_entry = tk.Entry(root, width=50)
    new_column_entry.grid(row=len(column_entries) + 1, column=1, padx=5, pady=5, sticky="we")
    column_entries.append(new_column_entry)

步驟 8: 創(chuàng)建按鈕，用于處理數(shù)據(jù)

process_button = tk.Button(root, text="Process Data", command=process_data)
process_button.grid(row=2, column=2, padx=5, pady=10, sticky="e")  # 調(diào)整位置至右側(cè)

步驟 9: 創(chuàng)建標(biāo)簽，用于顯示處理結(jié)果信息

result_label = tk.Label(root, text="")
result_label.grid(row=len(column_entries) + 3, column=0, columnspan=3, padx=5, pady=5)

步驟 10: 啟動(dòng)主事件循環(huán)

root.mainloop()

完整代碼

import tkinter as tk  # 導(dǎo)入 tkinter 模塊，用于創(chuàng)建圖形用戶界面
from tkinter import filedialog  # 導(dǎo)入 filedialog 子模塊，用于打開文件對(duì)話框
import pandas as pd  # 導(dǎo)入 pandas 庫(kù)，用于數(shù)據(jù)處理
import os  # 導(dǎo)入 os 模塊，用于文件和目錄操作

# 定義函數(shù)，用于打開文件對(duì)話框并選擇 Excel 文件路徑
def browse_file():
    # 打開文件對(duì)話框，限定文件類型為 Excel 文件 (*.xlsx)
    filepath = filedialog.askopenfilename(filetypes=[("Excel files", "*.xlsx")])
    # 清空文件路徑輸入框，并將選定的文件路徑插入到輸入框中
    file_entry.delete(0, tk.END)
    file_entry.insert(0, filepath)

# 定義函數(shù)，用于處理數(shù)據(jù)并將其按指定列分組保存為多個(gè) Excel 文件
def process_data():
    # 獲取輸入文件路徑和需要分組的列名
    input_file = file_entry.get()
    group_columns = [column_entry.get() for column_entry in column_entries if column_entry.get()]

    # 檢查輸入是否完整
    if not input_file or not group_columns:
        result_label.config(text="Please provide input file path and group columns.")
        return

    try:
        # 讀取 Excel 文件為 DataFrame，并按指定列進(jìn)行分組
        df = pd.read_excel(input_file)
        grouped = df.groupby(group_columns)

        # 創(chuàng)建用于存儲(chǔ)分組數(shù)據(jù)的文件夾
        folder_name = "Splitted_Data"
        if not os.path.exists(folder_name):
            os.makedirs(folder_name)

        # 將每個(gè)分組的數(shù)據(jù)保存為單獨(dú)的 Excel 文件
        for name, group in grouped:
            filename = f"{folder_name}/{'_'.join(name)}.xlsx"
            group.to_excel(filename, index=False)

        result_label.config(text="Data processing completed successfully.")
    except Exception as e:
        result_label.config(text=f"Error occurred: {str(e)}")

# 創(chuàng)建 tkinter 窗口對(duì)象
root = tk.Tk()
root.title("Excel Data Grouping Tool")  # 設(shè)置窗口標(biāo)題

# 創(chuàng)建標(biāo)簽和輸入框，用于顯示和輸入 Excel 文件路徑
file_label = tk.Label(root, text="Excel File Path:")
file_label.grid(row=0, column=0, padx=5, pady=5, sticky="w")
file_entry = tk.Entry(root, width=50)
file_entry.grid(row=0, column=1, padx=5, pady=5, sticky="we")
browse_button = tk.Button(root, text="Browse", command=browse_file)
browse_button.grid(row=0, column=2, padx=5, pady=5)

# 創(chuàng)建標(biāo)簽、輸入框和按鈕，用于指定分組列名
column_label = tk.Label(root, text="Group Columns:")
column_label.grid(row=1, column=0, padx=5, pady=5, sticky="w")
column_entry = tk.Entry(root, width=50)
column_entry.grid(row=1, column=1, padx=5, pady=5, sticky="we")
column_entries = [column_entry]

add_column_button = tk.Button(root, text="Add Column", command=lambda: add_column_entry())
add_column_button.grid(row=1, column=2, padx=5, pady=5)

# 創(chuàng)建函數(shù)，用于添加新的分組列輸入框
def add_column_entry():
    new_column_entry = tk.Entry(root, width=50)
    new_column_entry.grid(row=len(column_entries) + 1, column=1, padx=5, pady=5, sticky="we")
    column_entries.append(new_column_entry)

# 創(chuàng)建按鈕，用于處理數(shù)據(jù)
process_button = tk.Button(root, text="Process Data", command=process_data)
process_button.grid(row=2, column=2, padx=5, pady=10, sticky="e")  # 調(diào)整位置至右側(cè)

# 創(chuàng)建標(biāo)簽，用于顯示處理結(jié)果信息
result_label = tk.Label(root, text="")
result_label.grid(row=len(column_entries) + 3, column=0, columnspan=3, padx=5, pady=5)

# 啟動(dòng)主事件循環(huán)
root.mainloop()

以上就是使用Python實(shí)現(xiàn)分組數(shù)據(jù)并保存到單獨(dú)的文件中的詳細(xì)內(nèi)容，更多關(guān)于Python分組數(shù)據(jù)的資料請(qǐng)關(guān)注腳本之家其它相關(guān)文章！

您可能感興趣的文章: