Python查找大文件的實用腳本分享

更新時間：2024年11月04日 11:09:10 作者：zhongcx01

這篇文章主要為大家分享一個利用Python查找大文件的實用腳本,文中的示例代碼講解詳細,感興趣的小伙伴可以跟隨小編一起學習一下

C盤滿了，寫了一個python腳本，2分多鐘能找到比較大的文件，然后手動刪除或者遷移D盤，最后發(fā)現(xiàn)是微信小程序開發(fā)工具緩存文件太多了，騰出來10個G念頭通達了，這里備份一下腳本。

運行工具：PyCharm 2024.1.3 (Community Edition)

完整代碼

import os
import threading
import time
import sys
from threading import Event
 
def is_large(file_path, threshold_mb):
    """判斷文件大小是否超過指定MB閾值"""
    return os.path.getsize(file_path) / (1024 * 1024) > threshold_mb
 
def show_loading_animation(stop_event, interval=0.5):
    """顯示簡易的文本加載動畫，直到接收到停止信號"""
    loading_chars = ['.', '..', '...', '....']
    total_cycles = int(interval * 10)
    cycle_length = 10
 
    for _ in range(total_cycles):
        for char in loading_chars:
            for _ in range(cycle_length):
                sys.stdout.write('\r正在查找大文件... ' + char)
                sys.stdout.flush()
                time.sleep(interval / cycle_length)
            sys.stdout.write('\r正在查找大文件... ' + loading_chars[0])
            sys.stdout.flush()
 
def filter_files(files, skip_file_keywords, include_file_keywords, extension=None):
    """根據(jù)文件名關鍵詞和擴展名過濾文件列表"""
    filtered_files = [file for file in files if (not skip_file_keywords or all(keyword not in file for keyword in skip_file_keywords)) and
                      (not include_file_keywords or any(keyword in file for keyword in include_file_keywords))]
    if extension is not None:
        filtered_files = [file for file in filtered_files if file.endswith('.' + extension)]
    return filtered_files
 
def filter_dirs(dirs, skip_dir_keywords, include_dir_keywords):
    """根據(jù)目錄名關鍵詞過濾目錄列表"""
    return [dir for dir in dirs if (not skip_dir_keywords or all(keyword not in dir for keyword in skip_dir_keywords)) and
             (not include_dir_keywords or any(keyword in dir for keyword in include_dir_keywords))]
 
def get_all_large_files_with_loading(dir_path, threshold_mb, skip_dir_keywords, skip_file_keywords, include_dir_keywords, include_file_keywords, extension=None, interval=0.5):
    """查找目錄下所有大于指定大小的文件，同時跳過或僅包括特定關鍵詞的文件夾及文件名稱，并顯示加載動畫直到完成"""
    start_time = time.time()
    stop_event = Event()
    large_files = []
    loading_thread = threading.Thread(target=show_loading_animation, args=(stop_event, interval))
    loading_thread.daemon = True
    loading_thread.start()
 
    try:
        for root, dirs, files in os.walk(dir_path):
            dirs[:] = filter_dirs(dirs, skip_dir_keywords, include_dir_keywords)
            filtered_files = filter_files(files, skip_file_keywords, include_file_keywords, extension)
            for file in filtered_files:
                full_path = os.path.join(root, file)
                try:
                    if is_large(full_path, threshold_mb):
                        file_info = {'path': full_path, 'size': os.path.getsize(full_path) / 1024 / 1024}
                        large_files.append(file_info)
                except Exception as e:
                    print(f"警告訪問文件出錯 {full_path} 出錯信息: {e}")
 
    finally:
        stop_event.set()
        loading_thread.join()
    large_files.sort(key=lambda x: x['size'], reverse=True)
    for file_info in large_files:
        print(f"文件路徑: {file_info['path']} | 文件大小: {file_info['size']:.2f} MB")
 
    end_time = time.time()
    print(f"\n查找共耗時: {end_time - start_time:.2f} 秒")
 
def main():
    dir_path = input("請輸入要檢查的目錄路徑: ")
    try:
        threshold_mb = float(input("請輸入文件大小閾值(單位: MB): "))
        skip_dir_keywords = input("請輸入要跳過的文件夾名關鍵詞，用逗號分隔(直接回車跳過，推薦modules,~~,.gradle): ").split(',')
        skip_file_keywords = input("請輸入要跳過的文件名關鍵詞，用逗號分隔(直接回車跳過，推薦$): ").split(',')
        include_dir_keywords = input("請輸入要包含的文件夾名關鍵詞，用逗號分隔(直接回車跳過): ").split(',')
        include_file_keywords = input("請輸入要包含的文件名關鍵詞，用逗號分隔(直接回車跳過): ").split(',')
        extension = input("請輸入要篩選的文件擴展名(例如：txt，可選，直接回車跳過): ").strip('.') or None
        get_all_large_files_with_loading(dir_path, threshold_mb, skip_dir_keywords, skip_file_keywords, include_dir_keywords, include_file_keywords, extension)
        print("搜索結束.")
    except ValueError:
        print("錯誤：請輸入有效的數(shù)字作為文件大小閾值.")
    except OSError as e:
        print(e)
 
if __name__ == '__main__':
    main()

方法補充

除了上文的方法，小編還為大家整理了其他Python查找大文件的方法，希望對大家有所幫助

完整代碼如下

#! python3
#chapter09-test02.py - 找出一個文件夾內的大文件，并打印出大文件的絕對路徑<br>#-----為了防止運行時間過長，我把程序設置為了只檢查前1000個超過size的文件，他們并不是最大的1000個
 
import os,pprint,sys
import timeit,time
 
 
#裝飾器--計算程序運行時間
def colocked_decorator(func):
    def colock(*args):
        startTime=timeit.default_timer()
        result=func(*args)  #運行程序
        spendTime=timeit.default_timer()-startTime
        name=func.__name__  #獲取程序名字
        arg_str=','.join(repr(arg) for arg in args) #注意不是*args  組成程序參數(shù)的字符串
        print('[0.7fs] %s(%s) '%(spendTime,name,arg_str),end='')
        print('%r',result)
        return result
    return colock
 
#尋找指定文件夾內的的大文件
#返回包含所有大文件的絕對地址的一個列表
#folder-指定的文件夾地址
#size-閾值，超過這個為大文件
@colocked_decorator
def findBigFile(folder,size):
    bigFileAbs=[]
    for foldername,subfolders,filenames in os.walk(folder):
        #對文件進行遍歷
        for filename in filenames:
            #.getsize(path)必須是完整路徑
            fileAbs=os.path.join(foldername,filename)
            if os.path.getsize(fileAbs)>size and len(bigFileAbs)<100:   
                #fileAbs=os.path.join(foldername,filename)
                fileAbs=os.path.abspath(fileAbs)
                bigFileAbs.append(fileAbs)
    return bigFileAbs
 
#定義一個函數(shù)用來將尺寸變?yōu)镵B、MB這樣的單位，但是沒有在這個程序中使用
#size-是os.getsize()返回的文件尺寸數(shù)值
#is_1024_byte 代表以1024去轉化還是1000去轉化，默認是1024
#先定義的后綴
SUFFIXES = {1000:['KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB'],
            1024:['KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB']}
def humanReadable_size(size,is_1024_byte=True):
    #mutiple默認是1000
    mutiple=1000 if is_1024_byte else 1024
    #與for遍歷結合起來，這樣來進行遞級的轉換
    for suffix in SUFFIXES[mutiple]:
        size/=mutiple
        #直到Size小于能往下一個單位變的數(shù)值
        if size<mutiple:
            return '{0:.1f}{1}'.format(size,suffix)
    raise ValueError('number too large')
 
         
 
path='F:\DCIM'
size=1000000    #設定的閾值
#先判斷路徑是否存在
if os.path.exists(path):   
    resultList=findBigFile(path,size)
    pprint.pprint(resultList)
     
else:
    print('You enter path does not exist')
    sys.exit()

到此這篇關于Python查找大文件的實用腳本分享的文章就介紹到這了,更多相關Python查找大文件內容請搜索腳本之家以前的文章或繼續(xù)瀏覽下面的相關文章希望大家以后多多支持腳本之家！

您可能感興趣的文章: