Python處理.nfo文件格式的完整教程

更新時(shí)間：2025年07月24日 10:07:02 作者：Yant224

.nfo 文件是媒體文件的元數(shù)據(jù)容器,通常用于存儲(chǔ)電影、電視劇、音樂等多媒體信息的結(jié)構(gòu)化數(shù)據(jù),它們本質(zhì)上是 XML 格式的文本文件,包含如標(biāo)題、演員、劇情簡介等關(guān)鍵信息,本文給大家詳細(xì)介紹了Python處理.nfo文件格式的完整教程,需要的朋友可以參考下

掌握媒體元數(shù)據(jù)文件的操作技巧

一、什么是 .nfo 文件？

.nfo 文件是媒體文件的元數(shù)據(jù)容器，通常用于存儲(chǔ)電影、電視劇、音樂等多媒體信息的結(jié)構(gòu)化數(shù)據(jù)。它們本質(zhì)上是 XML 格式的文本文件，包含如標(biāo)題、演員、劇情簡介等關(guān)鍵信息。

典型 .nfo 文件結(jié)構(gòu)

<?xml version="1.0" encoding="UTF-8"?>
<movie>
  <title>黑客帝國</title>
  <originaltitle>The Matrix</originaltitle>
  <year>1999</year>
  <plot>一名年輕的網(wǎng)絡(luò)黑客發(fā)現(xiàn)看似正常的現(xiàn)實(shí)世界實(shí)際上是由名為"矩陣"的計(jì)算機(jī)人工智能系統(tǒng)控制的...</plot>
  <director>莉莉·沃卓斯基</director>
  <rating>8.7</rating>
  <genre>科幻</genre>
  <genre>動(dòng)作</genre>
  <actor>
    <name>基努·里維斯</name>
    <role>尼奧</role>
    <thumb>https://example.com/keanu.jpg</thumb>
  </actor>
</movie>

二、核心庫介紹

1. 標(biāo)準(zhǔn)庫解決方案

import xml.etree.ElementTree as ET

2. 第三方庫推薦

pip install lxml beautifulsoup4 pynfo

三、完整處理流程

1. 讀取 .nfo 文件

def read_nfo(file_path):
    """讀取并解析 .nfo 文件"""
    try:
        tree = ET.parse(file_path)
        root = tree.getroot()
        return root
    except ET.ParseError as e:
        print(f"解析錯(cuò)誤: {e}")
        return None
    except FileNotFoundError:
        print(f"文件不存在: {file_path}")
        return None

# 使用示例
movie_nfo = read_nfo("The.Matrix.nfo")

2. 提取基本信息

def extract_movie_info(root):
    """提取電影基本信息"""
    if root.tag != 'movie':
        return None
    
    info = {
        'title': root.findtext('title'),
        'year': root.findtext('year'),
        'plot': root.findtext('plot'),
        'director': root.findtext('director'),
        'rating': root.findtext('rating'),
        'genres': [genre.text for genre in root.findall('genre')],
        'actors': []
    }
    
    # 提取演員信息
    for actor in root.findall('actor'):
        info['actors'].append({
            'name': actor.findtext('name'),
            'role': actor.findtext('role'),
            'thumb': actor.findtext('thumb')
        })
    
    return info

# 使用示例
movie_info = extract_movie_info(movie_nfo)
print(f"電影標(biāo)題: {movie_info['title']}")

3. 修改 .nfo 文件

def update_nfo_rating(file_path, new_rating):
    """更新電影評(píng)分"""
    tree = ET.parse(file_path)
    root = tree.getroot()
    
    # 查找或創(chuàng)建 rating 元素
    rating_elem = root.find('rating')
    if rating_elem is None:
        rating_elem = ET.SubElement(root, 'rating')
    
    rating_elem.text = str(new_rating)
    
    # 保存修改
    tree.write(file_path, encoding='utf-8', xml_declaration=True)

# 使用示例
update_nfo_rating("The.Matrix.nfo", 9.2)

4. 創(chuàng)建新的 .nfo 文件

def create_nfo_file(file_path, movie_data):
    """創(chuàng)建新的 .nfo 文件"""
    # 創(chuàng)建根元素
    movie = ET.Element('movie')
    
    # 添加子元素
    ET.SubElement(movie, 'title').text = movie_data['title']
    ET.SubElement(movie, 'year').text = str(movie_data['year'])
    ET.SubElement(movie, 'plot').text = movie_data['plot']
    
    # 添加類型
    for genre in movie_data['genres']:
        ET.SubElement(movie, 'genre').text = genre
    
    # 添加演員
    for actor in movie_data['actors']:
        actor_elem = ET.SubElement(movie, 'actor')
        ET.SubElement(actor_elem, 'name').text = actor['name']
        ET.SubElement(actor_elem, 'role').text = actor['role']
    
    # 創(chuàng)建 XML 樹
    tree = ET.ElementTree(movie)
    
    # 寫入文件
    tree.write(file_path, encoding='utf-8', xml_declaration=True)
    print(f"已創(chuàng)建 .nfo 文件: {file_path}")

# 使用示例
new_movie = {
    'title': '盜夢(mèng)空間',
    'year': 2010,
    'plot': '一群能夠潛入他人夢(mèng)境竊取思想的盜賊...',
    'genres': ['科幻', '驚悚'],
    'actors': [
        {'name': '萊昂納多·迪卡普里奧', 'role': '科布'},
        {'name': '約瑟夫·高登-萊維特', 'role': '亞瑟'}
    ]
}
create_nfo_file("Inception.nfo", new_movie)

四、高級(jí)處理技巧

1. 使用 lxml 處理復(fù)雜文件

from lxml import etree

def parse_with_lxml(file_path):
    """使用 lxml 解析 .nfo 文件"""
    parser = etree.XMLParser(remove_blank_text=True)
    tree = etree.parse(file_path, parser)
    root = tree.getroot()
    
    # 使用 XPath 查詢
    actors = root.xpath('//actor[name="萊昂納多·迪卡普里奧"]')
    for actor in actors:
        print(f"角色: {actor.xpath('role/text()')[0]}")
    
    return tree

# 添加命名空間支持
def parse_with_namespace(file_path):
    ns = {'ns': 'http://www.example.com/nfo'}
    tree = etree.parse(file_path)
    title = tree.xpath('//ns:title', namespaces=ns)[0].text
    print(f"帶命名空間的標(biāo)題: {title}")

2. 處理非標(biāo)準(zhǔn) .nfo 文件

def handle_non_standard_nfo(file_path):
    """處理非標(biāo)準(zhǔn)格式的 .nfo 文件"""
    from bs4 import BeautifulSoup
    
    with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
        content = f.read()
    
    # 修復(fù)常見格式問題
    content = content.replace('&', '&amp;')  # 修復(fù)未轉(zhuǎn)義的 & 符號(hào)
    
    # 使用 BeautifulSoup 解析
    soup = BeautifulSoup(content, 'lxml-xml')
    
    # 提取信息
    title = soup.find('title').text if soup.find('title') else None
    
    return {
        'title': title,
        'soup': soup  # 返回 BeautifulSoup 對(duì)象供進(jìn)一步處理
    }

3. 批量處理 .nfo 文件

import os
from pathlib import Path

def batch_process_nfo(directory):
    """批量處理目錄中的 .nfo 文件"""
    nfo_files = Path(directory).glob('*.nfo')
    results = []
    
    for nfo_file in nfo_files:
        try:
            tree = ET.parse(nfo_file)
            root = tree.getroot()
            info = extract_movie_info(root)
            results.append((nfo_file.name, info))
        except Exception as e:
            print(f"處理 {nfo_file} 失敗: {e}")
    
    return results

# 使用 pandas 導(dǎo)出結(jié)果
import pandas as pd

def export_to_csv(nfo_dir, output_file):
    """導(dǎo)出 .nfo 信息到 CSV"""
    data = batch_process_nfo(nfo_dir)
    df = pd.DataFrame({
        'file': [item[0] for item in data],
        'title': [item[1]['title'] for item in data],
        'year': [item[1]['year'] for item in data],
        'rating': [item[1]['rating'] for item in data]
    })
    df.to_csv(output_file, index=False)

五、實(shí)際應(yīng)用案例

1. 媒體庫元數(shù)據(jù)同步

def sync_with_media_library(nfo_dir, media_dir):
    """將 .nfo 信息同步到媒體文件"""
    for nfo_file in Path(nfo_dir).glob('*.nfo'):
        # 解析 .nfo
        tree = ET.parse(nfo_file)
        root = tree.getroot()
        title = root.findtext('title')
        
        # 查找對(duì)應(yīng)的媒體文件
        media_file = find_media_file(media_dir, title)
        
        if media_file:
            # 使用 mutagen 更新媒體文件元數(shù)據(jù)
            from mutagen import File
            audio = File(media_file)
            audio['title'] = title
            audio['artist'] = root.findtext('director')
            audio.save()

2. 生成 HTML 報(bào)告

def generate_html_report(nfo_files, output_file):
    """從 .nfo 文件生成 HTML 報(bào)告"""
    html = """
    <html>
    <head>
        <title>媒體庫報(bào)告</title>
        <style>
            table { border-collapse: collapse; width: 100%; }
            th, td { border: 1px solid #ddd; padding: 8px; }
            tr:nth-child(even) { background-color: #f2f2f2; }
        </style>
    </head>
    <body>
        <h1>媒體庫報(bào)告</h1>
        <table>
            <tr>
                <th>標(biāo)題</th>
                <th>年份</th>
                <th>導(dǎo)演</th>
                <th>評(píng)分</th>
            </tr>
    """
    
    for nfo_file in nfo_files:
        root = read_nfo(nfo_file)
        if root:
            html += f"""
            <tr>
                <td>{root.findtext('title')}</td>
                <td>{root.findtext('year')}</td>
                <td>{root.findtext('director')}</td>
                <td>{root.findtext('rating')}</td>
            </tr>
            """
    
    html += """
        </table>
    </body>
    </html>
    """
    
    with open(output_file, 'w', encoding='utf-8') as f:
        f.write(html)

3. 自動(dòng)下載缺失信息

import requests
from bs4 import BeautifulSoup

def enrich_nfo_info(file_path):
    """補(bǔ)充缺失的電影信息"""
    tree = ET.parse(file_path)
    root = tree.getroot()
    
    title = root.findtext('title')
    if not title:
        return
    
    # 從豆瓣API獲取信息
    url = f"https://api.douban.com/v2/movie/search?q={title}"
    response = requests.get(url)
    data = response.json()
    
    if data['movies']:
        movie_data = data['movies'][0]
        
        # 更新缺失字段
        if not root.findtext('plot'):
            ET.SubElement(root, 'plot').text = movie_data['summary']
        
        if not root.findtext('rating'):
            ET.SubElement(root, 'rating').text = str(movie_data['rating']['average'])
        
        # 保存更新
        tree.write(file_path, encoding='utf-8', xml_declaration=True)

六、常見問題解決方案

1. 編碼問題處理

def read_nfo_with_encoding(file_path):
    """自動(dòng)檢測(cè)編碼讀取 .nfo 文件"""
    encodings = ['utf-8', 'gbk', 'iso-8859-1']
    
    for enc in encodings:
        try:
            with open(file_path, 'r', encoding=enc) as f:
                content = f.read()
            return ET.fromstring(content)
        except UnicodeDecodeError:
            continue
    
    # 嘗試二進(jìn)制解析
    with open(file_path, 'rb') as f:
        return ET.fromstring(f.read())

2. 處理特殊字符

def sanitize_nfo_content(content):
    """清理 .nfo 內(nèi)容中的特殊字符"""
    replacements = {
        '&': '&amp;',
        '<': '&lt;',
        '>': '&gt;',
        '"': '&quot;',
        "'": '&apos;'
    }
    
    for char, entity in replacements.items():
        content = content.replace(char, entity)
    
    return content

3. 驗(yàn)證 .nfo 文件結(jié)構(gòu)

def validate_nfo(file_path, schema_path='nfo_schema.xsd'):
    """使用 XML Schema 驗(yàn)證 .nfo 文件"""
    from lxml import etree
    
    # 解析 XML
    xml_doc = etree.parse(file_path)
    
    # 加載 Schema
    schema_doc = etree.parse(schema_path)
    schema = etree.XMLSchema(schema_doc)
    
    # 驗(yàn)證
    if schema.validate(xml_doc):
        print("文件有效")
        return True
    else:
        print("文件無效:")
        for error in schema.error_log:
            print(f"行 {error.line}: {error.message}")
        return False

七、完整工具類實(shí)現(xiàn)

class NfoProcessor:
    """處理 .nfo 文件的工具類"""
    
    def __init__(self, file_path=None):
        self.file_path = file_path
        self.tree = None
        self.root = None
        
        if file_path:
            self.load(file_path)
    
    def load(self, file_path):
        """加載 .nfo 文件"""
        self.file_path = file_path
        try:
            self.tree = ET.parse(file_path)
            self.root = self.tree.getroot()
        except ET.ParseError:
            # 嘗試使用 lxml 解析
            try:
                from lxml import etree
                parser = etree.XMLParser(recover=True)
                self.tree = etree.parse(file_path, parser)
                self.root = self.tree.getroot()
            except Exception as e:
                raise ValueError(f"無法解析文件: {e}")
    
    def save(self, file_path=None):
        """保存 .nfo 文件"""
        save_path = file_path or self.file_path
        if not save_path:
            raise ValueError("未指定保存路徑")
        
        if self.tree is not None:
            self.tree.write(save_path, encoding='utf-8', xml_declaration=True)
        else:
            raise ValueError("沒有可保存的數(shù)據(jù)")
    
    def get_value(self, path):
        """獲取指定路徑的值"""
        elem = self.root.find(path)
        return elem.text if elem is not None else None
    
    def set_value(self, path, value):
        """設(shè)置指定路徑的值"""
        parts = path.split('/')
        current = self.root
        
        # 創(chuàng)建或獲取元素
        for part in parts:
            elem = current.find(part)
            if elem is None:
                elem = ET.SubElement(current, part)
            current = elem
        
        # 設(shè)置值
        current.text = str(value)
    
    def get_actors(self):
        """獲取演員列表"""
        return [
            {
                'name': actor.findtext('name'),
                'role': actor.findtext('role'),
                'thumb': actor.findtext('thumb')
            }
            for actor in self.root.findall('actor')
        ]
    
    def add_actor(self, name, role, thumb=None):
        """添加演員"""
        actor = ET.SubElement(self.root, 'actor')
        ET.SubElement(actor, 'name').text = name
        ET.SubElement(actor, 'role').text = role
        if thumb:
            ET.SubElement(actor, 'thumb').text = thumb
    
    def to_dict(self):
        """轉(zhuǎn)換為字典"""
        return {
            'title': self.get_value('title'),
            'year': self.get_value('year'),
            'plot': self.get_value('plot'),
            'director': self.get_value('director'),
            'rating': self.get_value('rating'),
            'genres': [g.text for g in self.root.findall('genre')],
            'actors': self.get_actors()
        }

# 使用示例
processor = NfoProcessor("The.Matrix.nfo")
print(processor.get_value('title'))  # 輸出: 黑客帝國
processor.set_value('rating', 9.0)
processor.add_actor('凱瑞-安·莫斯', '崔妮蒂')
processor.save()

八、總結(jié)與最佳實(shí)踐

核心處理流程

讀取：使用 xml.etree.ElementTree 或 lxml 解析文件
提取：使用 find() 和 findall() 獲取數(shù)據(jù)
修改：直接操作 XML 元素樹
創(chuàng)建：使用 Element 和 SubElement 構(gòu)建結(jié)構(gòu)
保存：使用 write() 方法寫入文件

最佳實(shí)踐建議

編碼處理：始終指定 UTF-8 編碼
錯(cuò)誤處理：添加異常捕獲處理格式錯(cuò)誤
備份文件：修改前創(chuàng)建備份
使用 lxml：處理復(fù)雜文件時(shí)選擇 lxml
驗(yàn)證結(jié)構(gòu)：使用 XML Schema 驗(yàn)證文件

欧美bbbwbbbw肥妇,免费乱码人妻系列日韩,一级黄片

Python處理.nfo文件格式的完整教程

目錄

一、什么是 .nfo 文件？

典型 .nfo 文件結(jié)構(gòu)

二、核心庫介紹

1. 標(biāo)準(zhǔn)庫解決方案

2. 第三方庫推薦

三、完整處理流程

1. 讀取 .nfo 文件

2. 提取基本信息

3. 修改 .nfo 文件

4. 創(chuàng)建新的 .nfo 文件

四、高級(jí)處理技巧

1. 使用 lxml 處理復(fù)雜文件

2. 處理非標(biāo)準(zhǔn) .nfo 文件

3. 批量處理 .nfo 文件

五、實(shí)際應(yīng)用案例

1. 媒體庫元數(shù)據(jù)同步

2. 生成 HTML 報(bào)告

3. 自動(dòng)下載缺失信息

六、常見問題解決方案

1. 編碼問題處理

2. 處理特殊字符

3. 驗(yàn)證 .nfo 文件結(jié)構(gòu)

七、完整工具類實(shí)現(xiàn)

八、總結(jié)與最佳實(shí)踐

核心處理流程

最佳實(shí)踐建議

推薦工具

相關(guān)文章

最新評(píng)論

大家感興趣的內(nèi)容

最近更新的內(nèi)容

常用在線小工具

欧美bbbwbbbw肥妇,免费乱码人妻系列日韩,一级黄片

Python處理.nfo文件格式的完整教程

目錄

一、什么是 .nfo 文件？

典型 .nfo 文件結(jié)構(gòu)

二、核心庫介紹

1. 標(biāo)準(zhǔn)庫解決方案

2. 第三方庫推薦

三、完整處理流程

1. 讀取 .nfo 文件

2. 提取基本信息

3. 修改 .nfo 文件

4. 創(chuàng)建新的 .nfo 文件

四、高級(jí)處理技巧

1. 使用 lxml 處理復(fù)雜文件

2. 處理非標(biāo)準(zhǔn) .nfo 文件

3. 批量處理 .nfo 文件

五、實(shí)際應(yīng)用案例

1. 媒體庫元數(shù)據(jù)同步

2. 生成 HTML 報(bào)告

3. 自動(dòng)下載缺失信息

六、常見問題解決方案

1. 編碼問題處理

2. 處理特殊字符

3. 驗(yàn)證 .nfo 文件結(jié)構(gòu)

七、完整工具類實(shí)現(xiàn)

八、總結(jié)與最佳實(shí)踐

核心處理流程

最佳實(shí)踐建議

推薦工具

相關(guān)文章

最新評(píng)論

大家感興趣的內(nèi)容

最近更新的內(nèi)容

常用在線小工具

一、什么是 .nfo 文件？

二、核心庫介紹

三、完整處理流程

五、實(shí)際應(yīng)用案例

六、常見問題解決方案

八、總結(jié)與最佳實(shí)踐