#將HTML文件導出為PDF
def html_to_pdf(html_path,pdf_path='.\\pdf_new.pdf',html_encoding='UTF-8',path_wkpdf = r'.\Tools\wkhtmltopdf.exe'):
    '''
    將HTML文件導出為PDF
    
    :param html_path:str類型，目標HTML文件的路徑，可以是一個路徑，也可以是多個路徑，以list方式傳入路徑；或者一個或者多個網(wǎng)址；或者為一個字符串
    
    :param pdf_path:str類型，需要導出的PDF文件的路徑
    
    :param html_encoding:str類型，html的編碼格式，具體要看html頁面到底是以什么編碼格式保存的
    
    :param path_wkpdf:str類型，path_wkpdf = r'.\Tools\wkhtmltopdf.exe'  # 工具路徑
    :return:
    '''
    cfg = pdfkit.configuration(wkhtmltopdf=path_wkpdf)
    options = {
        "encoding": html_encoding  # 這個具體要看html頁面到底是以什么編碼格式保存的
    }
 
    if 'http' in str(html_path) and ('html' not in str(html_path) or 'HTML' not in str(html_path)):     #判斷是否為非網(wǎng)址
        #從url獲取html，再轉為pdf
        print('http=>pdf')
        # pdfkit.from_url('https://httpbin.org/ip', 'ip.pdf', options=options, configuration=cfg)
        # pdfkit.from_url(['https://httpbin.org/ip', 'https://httpbin.org/ip'], 'ip.pdf', options=options,configuration=cfg)  # 傳入列表
        pdfkit.from_url(html_path, pdf_path, options=options, configuration=cfg)
        
    elif 'html' in str(html_path) or 'HTML' in str(html_path):          #判斷是否為HTML文件
        #將html文件轉為pdf
        print('html,str=>pdf')
        # pdfkit.from_file(r'./helloworld.html', 'helloworld.pdf',options=options,  configuration=cfg)
        pdfkit.from_file(html_path, pdf_path, options=options, configuration=cfg)
        
    elif isinstance(html_path, list) and ('html' in str(html_path) or 'HTML' in str(html_path)):   #判斷html目標是否為list,
        # 如：[r'./helloworld.html', r'./111.html', r'./222.html']
        print('html,list=>pdf')
        pdfkit.from_file(html_path, pdf_path,options=options,  configuration=cfg)  # 傳入列表
    
    else:
        #將字符串轉為pdf
        print('from_string=>pdf')
        pdfkit.from_string(html_path, pdf_path,options=options,  configuration=cfg)

所需要用的附件程序：

wkhtmltopdf.exe

下載地址

二、html網(wǎng)頁文件轉png

#將HTML文件導出為圖片
def html_to_png(html_path,pdf_path='.\\pdf_new.pdf',html_encoding='UTF-8',path_wkpdf = r'.\Tools\wkhtmltoimage.exe'):
    '''
    將HTML文件導出為圖片
    
    :param html_path:str類型，目標HTML文件的路徑，可以是一個路徑，也可以是多個路徑，以list方式傳入路徑；或者一個或者多個網(wǎng)址；或者為一個字符串
    
    :param pdf_path:str類型，需要導出的圖片文件的路徑
    
    :param html_encoding:str類型，html的編碼格式，具體要看html頁面到底是以什么編碼格式保存的
    
    :param path_wkpdf:str類型，path_wkpdf = r'.\Tools\wwkhtmltoimage.exe'  # 工具路徑
    :return:
    '''
    cfg = imgkit.config(wkhtmltoimage=path_wkpdf)
    options = {
        "encoding": html_encoding  # 這個具體要看html頁面到底是以什么編碼格式保存的
    }
 
    if 'http' in str(html_path) and ('html' not in str(html_path) or 'HTML' not in str(html_path)):     #判斷是否為非網(wǎng)址
        #從url獲取html，再轉為pdf
        print('http=>png')
        # pdfkit.from_url('https://httpbin.org/ip', 'ip.png', options=options, configuration=cfg)
        # pdfkit.from_url(['https://httpbin.org/ip', 'https://httpbin.org/ip'], 'ip.png', options=options,configuration=cfg)  # 傳入列表
        imgkit.from_url(html_path, pdf_path, options=options, config=cfg)
        
    elif 'html' in str(html_path) or 'HTML' in str(html_path):          #判斷是否為HTML文件
        #將html文件轉為pdf
        print('html,str=>png')
        # pdfkit.from_file(r'./helloworld.html', 'helloworld.png',options=options,  configuration=cfg)
        imgkit.from_file(html_path, pdf_path, options=options, config=cfg)
        
    elif isinstance(html_path, list) and ('html' in str(html_path) or 'HTML' in str(html_path)):   #判斷html目標是否為list,
        # 如：[r'./helloworld.html', r'./111.html', r'./222.html']
        print('html,list=>png')
        imgkit.from_file(html_path, pdf_path,options=options,  config=cfg)  # 傳入列表
    
    else:
        #將字符串轉為pdf
        print('from_string=>png')
        imgkit.from_string(html_path, pdf_path,options=options,  config=cfg)

所需要用的附件程序：

wkhtmltoimage.exe

下載地址

到此這篇關于Python實現(xiàn)網(wǎng)頁文件轉PDF文件和PNG圖片的示例代碼的文章就介紹到這了,更多相關Python網(wǎng)頁文件轉PDF PNG內(nèi)容請搜索腳本之家以前的文章或繼續(xù)瀏覽下面的相關文章希望大家以后多多支持腳本之家！

您可能感興趣的文章: