Java實現Word/Pdf/TXT轉html的示例
引言:
最近公司在做一個教育培訓學習及在線考試的項目,本人主要從事網絡課程模塊,主要做課程分類,課程,課件的創(chuàng)建及在線學習和統(tǒng)計的功能,因為課件涉及到多種類型,像視頻,音頻,圖文,外部鏈接及文檔類型.其中就涉及到一個問題,就是文檔型課件課程在網頁上的展示和學習問題,因為要在線統(tǒng)計學習的課程,學習的人員,學習的時長,所以不能像傳統(tǒng)做法將文檔下載到本地學習,那樣就不受系統(tǒng)控制了,所以最終的方案是,在上傳文檔型課件的時候,將其文件對應的轉換成HTML文件,以便在網頁上能夠瀏覽學習
下邊主要針對word,pdf和txt文本文件進行轉換
一:Java實現將word轉換為html
1:引入依賴
<dependency> <groupId>fr.opensagres.xdocreport</groupId> <artifactId>fr.opensagres.xdocreport.document</artifactId> <version>1.0.5</version> </dependency> <dependency> <groupId>fr.opensagres.xdocreport</groupId> <artifactId>org.apache.poi.xwpf.converter.xhtml</artifactId> <version>1.0.5</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi</artifactId> <version>3.12</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-scratchpad</artifactId> <version>3.12</version> </dependency>
2:代碼demo
package com.svse.controller;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.PicturesManager;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.usermodel.PictureType;
import org.apache.poi.xwpf.converter.core.BasicURIResolver;
import org.apache.poi.xwpf.converter.core.FileImageExtractor;
import org.apache.poi.xwpf.converter.core.FileURIResolver;
import org.apache.poi.xwpf.converter.core.IURIResolver;
import org.apache.poi.xwpf.converter.core.IXWPFConverter;
import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
/**
* word 轉換成html
*/
public class TestWordToHtml {
public static final String STORAGEPATH="C://works//files//";
public static final String IP="192.168.30.222";
public static final String PORT="8010";
public static void main(String[] args) throws IOException, TransformerException, ParserConfigurationException {
TestWordToHtml wt=new TestWordToHtml();
//wt.Word2003ToHtml("甲骨文考證.doc");
wt.Word2007ToHtml("甲骨文考證.docx");
}
/**
* 2003版本word轉換成html
* @throws IOException
* @throws TransformerException
* @throws ParserConfigurationException
*/
public void Word2003ToHtml(String fileName) throws IOException, TransformerException, ParserConfigurationException {
final String imagepath = STORAGEPATH+"fileImage/";//解析時候如果doc文件中有圖片 圖片會保存在此路徑
final String strRanString=getRandomNum();
String filepath =STORAGEPATH;
String htmlName =fileName.substring(0, fileName.indexOf("."))+ "2003.html";
final String file = filepath + fileName;
InputStream input = new FileInputStream(new File(file));
HWPFDocument wordDocument = new HWPFDocument(input);
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
//設置圖片存放的位置
wordToHtmlConverter.setPicturesManager(new PicturesManager() {
public String savePicture(byte[] content, PictureType pictureType, String suggestedName, float widthInches, float heightInches) {
File imgPath = new File(imagepath);
if(!imgPath.exists()){//圖片目錄不存在則創(chuàng)建
imgPath.mkdirs();
}
File file = new File(imagepath +strRanString+suggestedName);
try {
OutputStream os = new FileOutputStream(file);
os.write(content);
os.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return "http://"+IP+":"+PORT+"http://uploadFile/fileImage/"+strRanString+suggestedName;
// return imagepath +strRanString+suggestedName;
}
});
//解析word文檔
wordToHtmlConverter.processDocument(wordDocument);
Document htmlDocument = wordToHtmlConverter.getDocument();
File htmlFile = new File(filepath +strRanString+htmlName);
OutputStream outStream = new FileOutputStream(htmlFile);
DOMSource domSource = new DOMSource(htmlDocument);
StreamResult streamResult = new StreamResult(outStream);
TransformerFactory factory = TransformerFactory.newInstance();
Transformer serializer = factory.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(domSource, streamResult);
outStream.close();
System.out.println("生成html文件路徑:"+ "http://"+IP+":"+PORT+"http://uploadFile/"+strRanString+htmlName);
}
/**
* 2007版本word轉換成html
* @throws IOException
*/
public void Word2007ToHtml(String fileName) throws IOException {
final String strRanString=getRandomNum();
String filepath = STORAGEPATH+strRanString;
String htmlName =fileName.substring(0, fileName.indexOf("."))+ "2007.html";
File f = new File(STORAGEPATH+fileName);
if (!f.exists()) {
System.out.println("Sorry File does not Exists!");
} else {
if (f.getName().endsWith(".docx") || f.getName().endsWith(".DOCX")) {
try {
// 1) 加載word文檔生成 XWPFDocument對象
InputStream in = new FileInputStream(f);
XWPFDocument document = new XWPFDocument(in);
// 2) 解析 XHTML配置 (這里設置IURIResolver來設置圖片存放的目錄)
File imageFolderFile = new File(filepath);
XHTMLOptions options = XHTMLOptions.create().URIResolver(new FileURIResolver(imageFolderFile));
options.setExtractor(new FileImageExtractor(imageFolderFile));
options.URIResolver(new IURIResolver() {
public String resolve(String uri) {
//http://192.168.30.222:8010//uploadFile/....
return "http://"+IP+":"+PORT+"http://uploadFile/"+strRanString +"/"+ uri;
}
});
options.setIgnoreStylesIfUnused(false);
options.setFragment(true);
// 3) 將 XWPFDocument轉換成XHTML
OutputStream out = new FileOutputStream(new File(filepath + htmlName));
IXWPFConverter<XHTMLOptions> converter = XHTMLConverter.getInstance();
converter.convert(document,out, options);
//XHTMLConverter.getInstance().convert(document, out, options);
System.out.println("html路徑:"+"http://"+IP+":"+PORT+"http://uploadFile/"+strRanString+htmlName);
} catch (Exception e) {
e.printStackTrace();
}
} else {
System.out.println("Enter only MS Office 2007+ files");
}
}
}
/**
*功能說明:生成時間戳
*創(chuàng)建人:zsq
*創(chuàng)建時間:2019年12月7日 下午2:37:09
*
*/
public static String getRandomNum(){
Date dt = new Date();
SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddHHmmss");
String str=sdf.format(dt);
return str;
}
}
二:Java實現將Pdf轉換為html
1: 引入依賴
<dependency> <groupId>net.sf.cssbox</groupId> <artifactId>pdf2dom</artifactId> <version>1.7</version> </dependency> <dependency> <groupId>org.apache.pdfbox</groupId> <artifactId>pdfbox</artifactId> <version>2.0.12</version> </dependency> <dependency> <groupId>org.apache.pdfbox</groupId> <artifactId>pdfbox-tools</artifactId> <version>2.0.12</version> </dependency>
2:代碼Demo
public class PdfToHtml {
/*
pdf轉換html
*/
public void pdfToHtmlTest(String inPdfPath,String outputHtmlPath) {
// String outputPath = "C:\\works\\files\\ZSQ保密知識測試題庫.html";
//try() 寫在()里面會自動關閉流
try{
BufferedWriter out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(new File(outputHtmlPath)),"utf-8"));
//加載PDF文檔
//PDDocument document = PDDocument.load(bytes);
PDDocument document = PDDocument.load(new File(inPdfPath));
PDFDomTree pdfDomTree = new PDFDomTree();
pdfDomTree.writeText(document,out);
} catch (Exception e) {
e.printStackTrace();
}
}
public static void main(String[] args) throws IOException {
PdfToHtml ph=new PdfToHtml();
String pdfPath="C:\\works\\files\\武研中心行政考勤制度.pdf";
String outputPath="C:\\works\\files\\武研中心行政考勤制度.html";
ph.pdfToHtmlTest(pdfPath,outputPath);
}
}
三:Java實現將TXT轉換為html
/*
* txt文檔轉html
filePath:txt原文件路徑
htmlPosition:轉化后生成的html路徑
*/
public static void txtToHtml(String filePath, String htmlPosition) {
try {
//String encoding = "GBK";
File file = new File(filePath);
if (file.isFile() && file.exists()) { // 判斷文件是否存在
InputStreamReader read = new InputStreamReader(new FileInputStream(file), "GBK");
// 考慮到編碼格式
BufferedReader bufferedReader = new BufferedReader(read);
// 寫文件
FileOutputStream fos = new FileOutputStream(new File(htmlPosition));
OutputStreamWriter osw = new OutputStreamWriter(fos, "GBK");
BufferedWriter bw = new BufferedWriter(osw);
String lineTxt = null;
while ((lineTxt = bufferedReader.readLine()) != null) {
bw.write("   "+lineTxt + "</br>");
}
bw.close();
osw.close();
fos.close();
read.close();
} else {
System.out.println("找不到指定的文件");
}
} catch (Exception e) {
System.out.println("讀取文件內容出錯");
e.printStackTrace();
}
}
以上就是Java實現Word/Pdf/TXT轉html的示例的詳細內容,更多關于Java Word/Pdf/TXT轉html的資料請關注腳本之家其它相關文章!
相關文章
解決response.setHeader設置下載文件名無效的問題
這篇文章主要介紹了解決response.setHeader設置下載文件名無效的問題,具有很好的參考價值,希望對大家有所幫助。如有錯誤或未考慮完全的地方,望不吝賜教2022-01-01
IDEA創(chuàng)建SpringBoot項目整合mybatis時mysql-connector-java報錯異常的詳細分析
最近工作中發(fā)現了個錯誤,分享給同樣遇到這個問題的朋友,這篇文章主要給大家介紹了關于IDEA創(chuàng)建SpringBoot項目整合mybatis時mysql-connector-j報錯異常的詳細分析,需要的朋友可以參考下2023-02-02
java eclipse 出現 xxx cannot be resolved to a type 錯誤解決方法
這篇文章主要介紹了java eclipse 出現 xxx cannot be resolved to a type 錯誤解決方法的相關資料,需要的朋友可以參考下2017-03-03

