SpringBoot如何實(shí)現(xiàn)word文檔轉(zhuǎn)pdf
一.背景
項(xiàng)目中有個(gè)需求大體意思是,上傳一個(gè)word模板,根據(jù)word模板合成word文件,再將word文件轉(zhuǎn)為pdf。
二.方案選擇
1.Spire.Doc for Java方案
Spire.Doc for Java這個(gè)是商用收費(fèi)的,不過API文檔豐富且集成簡單,免費(fèi)版僅支持3頁轉(zhuǎn)換。類似的還有ITEXT,這個(gè)商用也是受限制的。
2.docx4j方案
開源可商用,僅支持docx格式的word。
3.jodconverter+LibreOffice 方案
開源可商用,調(diào)用本地office服務(wù),進(jìn)行pdf轉(zhuǎn)換,類似的還有jodconverter+openOffice。
4.其他
至于其他的由于不支持跨平臺不做考慮。
三.實(shí)操
1.docx4j
首先嘗試了docx4j,因?yàn)閐ocx4j本身支持模板替換的操作,可一次性做替換及文檔類型轉(zhuǎn)換,而且僅支持docx類型,對于本次需求問題不大。
1.依賴僅需要一個(gè)即可
<dependency> ?? ?<groupId>org.docx4j</groupId> ?? ?<artifactId>docx4j-export-fo</artifactId> ?? ?<version>6.1.0</version> </dependency>
2.主要代碼
@Slf4j
public class PdfUtil {
public static <T> void exportByLocalPath(HttpServletResponse response, String fileName, String path, Map<String,String> params){
try (InputStream in = PdfUtil.class.getClassLoader().getResourceAsStream(path)) {
convertDocxToPdf(in, response,fileName,params);
} catch (Exception e) {
log.error("docx文檔轉(zhuǎn)換為PDF失敗", e.getMessage());
}
}
/**
* docx文檔轉(zhuǎn)換為PDF
* @param in
* @param response
* @return
*/
public static void convertDocxToPdf(InputStream in, HttpServletResponse response, String fileName, Map<String,String> params) throws Exception {
response.setContentType("application/pdf");
String fullFileName = new String(fileName.getBytes(), StandardCharsets.ISO_8859_1);
response.setHeader("Content-disposition", "attachment;filename=" + fullFileName + ".pdf");
WordprocessingMLPackage wmlPackage = WordprocessingMLPackage.load(in);
if (params!=null&&!params.isEmpty()) {
MainDocumentPart documentPart = wmlPackage.getMainDocumentPart();
cleanDocumentPart(documentPart);
documentPart.variableReplace(params);
}
setFontMapper(wmlPackage);
Docx4J.toPDF(wmlPackage,response.getOutputStream());
}
/**
* 清除文檔空白占位符
* @param documentPart
* @return {@link boolean}
*/
public static boolean cleanDocumentPart(MainDocumentPart documentPart) throws Exception {
if (documentPart == null) {
return false;
}
Document document = documentPart.getContents();
String wmlTemplate =
XmlUtils.marshaltoString(document, true, false, Context.jc);
document = (Document) XmlUtils.unwrap(DocxVariableClearUtil.doCleanDocumentPart(wmlTemplate, Context.jc));
documentPart.setContents(document);
return true;
}
/**
* 設(shè)置字體樣式
* @param mlPackage
*/
private static void setFontMapper(WordprocessingMLPackage mlPackage) throws Exception {
Mapper fontMapper = new IdentityPlusMapper();
fontMapper.put("隸書", PhysicalFonts.get("LiSu"));
fontMapper.put("宋體", PhysicalFonts.get("SimSun"));
fontMapper.put("微軟雅黑", PhysicalFonts.get("Microsoft Yahei"));
fontMapper.put("黑體", PhysicalFonts.get("SimHei"));
fontMapper.put("楷體", PhysicalFonts.get("KaiTi"));
fontMapper.put("新宋體", PhysicalFonts.get("NSimSun"));
fontMapper.put("華文行楷", PhysicalFonts.get("STXingkai"));
fontMapper.put("華文仿宋", PhysicalFonts.get("STFangsong"));
fontMapper.put("宋體擴(kuò)展", PhysicalFonts.get("simsun-extB"));
fontMapper.put("仿宋", PhysicalFonts.get("FangSong"));
fontMapper.put("仿宋_GB2312", PhysicalFonts.get("FangSong_GB2312"));
fontMapper.put("幼圓", PhysicalFonts.get("YouYuan"));
fontMapper.put("華文宋體", PhysicalFonts.get("STSong"));
fontMapper.put("華文中宋", PhysicalFonts.get("STZhongsong"));
mlPackage.setFontMapper(fontMapper);
}
}
清除工具類,用于處理占位符替換不生效的問題,這里參考文章
public class DocxVariableClearUtil {
/**
* 去任意XML標(biāo)簽
*/
private static final Pattern XML_PATTERN = Pattern.compile("<[^>]*>");
private DocxVariableClearUtil() {
}
/**
* start符號
*/
private static final char PREFIX = '$';
/**
* 中包含
*/
private static final char LEFT_BRACE = '{';
/**
* 結(jié)尾
*/
private static final char RIGHT_BRACE = '}';
/**
* 未開始
*/
private static final int NONE_START = -1;
/**
* 未開始
*/
private static final int NONE_START_INDEX = -1;
/**
* 開始
*/
private static final int PREFIX_STATUS = 1;
/**
* 左括號
*/
private static final int LEFT_BRACE_STATUS = 2;
/**
* 右括號
*/
private static final int RIGHT_BRACE_STATUS = 3;
/**
* doCleanDocumentPart
*
* @param wmlTemplate
* @param jc
* @return
* @throws JAXBException
*/
public static Object doCleanDocumentPart(String wmlTemplate, JAXBContext jc) throws JAXBException {
// 進(jìn)入變量塊位置
int curStatus = NONE_START;
// 開始位置
int keyStartIndex = NONE_START_INDEX;
// 當(dāng)前位置
int curIndex = 0;
char[] textCharacters = wmlTemplate.toCharArray();
StringBuilder documentBuilder = new StringBuilder(textCharacters.length);
documentBuilder.append(textCharacters);
// 新文檔
StringBuilder newDocumentBuilder = new StringBuilder(textCharacters.length);
// 最后一次寫位置
int lastWriteIndex = 0;
for (char c : textCharacters) {
switch (c) {
case PREFIX:
// 不管其何狀態(tài)直接修改指針,這也意味著變量名稱里面不能有PREFIX
keyStartIndex = curIndex;
curStatus = PREFIX_STATUS;
break;
case LEFT_BRACE:
if (curStatus == PREFIX_STATUS) {
curStatus = LEFT_BRACE_STATUS;
}
break;
case RIGHT_BRACE:
if (curStatus == LEFT_BRACE_STATUS) {
// 接上之前的字符
newDocumentBuilder.append(documentBuilder.substring(lastWriteIndex, keyStartIndex));
// 結(jié)束位置
int keyEndIndex = curIndex + 1;
// 替換
String rawKey = documentBuilder.substring(keyStartIndex, keyEndIndex);
// 干掉多余標(biāo)簽
String mappingKey = XML_PATTERN.matcher(rawKey).replaceAll("");
if (!mappingKey.equals(rawKey)) {
char[] rawKeyChars = rawKey.toCharArray();
// 保留原格式
StringBuilder rawStringBuilder = new StringBuilder(rawKey.length());
// 去掉變量引用字符
for (char rawChar : rawKeyChars) {
if (rawChar == PREFIX || rawChar == LEFT_BRACE || rawChar == RIGHT_BRACE) {
continue;
}
rawStringBuilder.append(rawChar);
}
// 要求變量連在一起
String variable = mappingKey.substring(2, mappingKey.length() - 1);
int variableStart = rawStringBuilder.indexOf(variable);
if (variableStart > 0) {
rawStringBuilder = rawStringBuilder.replace(variableStart, variableStart + variable.length(), mappingKey);
}
newDocumentBuilder.append(rawStringBuilder.toString());
} else {
newDocumentBuilder.append(mappingKey);
}
lastWriteIndex = keyEndIndex;
curStatus = NONE_START;
keyStartIndex = NONE_START_INDEX;
}
default:
break;
}
curIndex++;
}
// 余部
if (lastWriteIndex < documentBuilder.length()) {
newDocumentBuilder.append(documentBuilder.substring(lastWriteIndex));
}
return XmlUtils.unmarshalString(newDocumentBuilder.toString(), jc);
}
}
2.poi-tl+jodconverter+LibreOffice 方案
poi-tl這個(gè)是專門用來進(jìn)行word模板合成的開源庫,文檔很詳細(xì)。
LibreOffice 下載最新的穩(wěn)定版本即可。
1.maven依賴
<!-- word合成 --> <!-- 這里注意版本,1.5版本依賴的poi 3.x的版本 --> <dependency> <groupId>com.deepoove</groupId> <artifactId>poi-tl</artifactId> <version>1.5.1</version> </dependency> <!-- jodconverter word轉(zhuǎn)pdf --> <!-- jodconverter-core這個(gè)依賴,理論上不用加的,jodconverter-local已經(jīng)依賴了,但測試的時(shí)候不添加依賴找不到 --> <dependency> <groupId>org.jodconverter</groupId> <artifactId>jodconverter-core</artifactId> <version>4.2.0</version> </dependency> <dependency> <groupId>org.jodconverter</groupId> <artifactId>jodconverter-local</artifactId> <version>4.2.0</version> </dependency> <dependency> <groupId>org.jodconverter</groupId> <artifactId>jodconverter-spring-boot-starter</artifactId> <version>4.2.0</version> </dependency> <!-- 工具類,非必須 --> <dependency> <groupId>cn.hutool</groupId> <artifactId>hutool-all</artifactId> <version>5.4.3</version> </dependency>
2.主要代碼
JodConverterConfig配置類
@Configuration
public class JodConverterConfig {
@Autowired
private OfficeManager officeManager;
@Bean
public DocumentConverter documentConverter() {
return LocalConverter.builder()
.officeManager(officeManager)
.build();
}
}
yml配置文件
jodconverter:
local:
enabled: true
office-home: "C:\\Program Files\\LibreOffice"
PdfService合成導(dǎo)出代碼
@Slf4j
@Component
public class PdfService {
@Autowired
private DocumentConverter documentConverter;
public void docxToPDF(InputStream inputStream,HttpServletResponse response,String fileName) {
response.setContentType("application/pdf");
try {
String fullFileName = new String(fileName.getBytes(), StandardCharsets.ISO_8859_1);
response.setHeader("Content-disposition","attachment;filename=\\"+fullFileName+".pdf\\");
documentConverter
.convert(inputStream)
.as(DefaultDocumentFormatRegistry.DOCX)
.to(response.getOutputStream())
.as(DefaultDocumentFormatRegistry.PDF)
.execute();
} catch (OfficeException |IOException e) {
log.error("word轉(zhuǎn)pdf失敗:{}",e.getMessage());
}
}
public void exportByLocalPath(HttpServletResponse response, String fileName, String path, Object params) throws Exception {
BufferedOutputStream outputStream = null;
BufferedInputStream wordInputStream = null;
try (InputStream in = PdfService.class.getClassLoader().getResourceAsStream(path)) {
// 生成臨時(shí)文件
String outPutWordPath = System.getProperty("java.io.tmpdir").replaceAll(File.separator + "$", "") + fileName+".docx";
File tempFile = FileUtil.touch(outPutWordPath);
outputStream = FileUtil.getOutputStream(tempFile);
// word模板合成寫到臨時(shí)文件
WordUtil.replaceWord(outputStream, in, params);
// word 轉(zhuǎn)pdf
wordInputStream = FileUtil.getInputStream(tempFile);
docxToPDF(wordInputStream, response,fileName);
// 移除臨時(shí)文件
FileUtil.del(tempFile);
} catch (Exception e) {
log.error("docx文檔轉(zhuǎn)換為PDF失敗", e.getMessage());
} finally {
IoUtil.close(outputStream);
IoUtil.close(wordInputStream);
}
}
四.結(jié)論
1.docx4j方案
- 依賴少
- 同時(shí)支持word合成及格式轉(zhuǎn)換
- 轉(zhuǎn)化效率較差
- 對于含樣式及圖片轉(zhuǎn)換不友好,容易排版混亂
2.jodconverter+LibreOffice 方案
- 操作穩(wěn)定
- 轉(zhuǎn)換效率快
- 集成依賴設(shè)置較多
- 依賴本地服務(wù)
- LibreOffice打開word可能排版樣式錯(cuò)亂
- 最后考慮項(xiàng)目需求,最終選擇了jodconverter+LibreOffice方案。
以上為個(gè)人經(jīng)驗(yàn),希望能給大家一個(gè)參考,也希望大家多多支持腳本之家。
相關(guān)文章
JAVA中l(wèi)ist,set,數(shù)組之間的轉(zhuǎn)換詳解
以下是對JAVA中l(wèi)ist,set,數(shù)組之間的轉(zhuǎn)換進(jìn)行了詳細(xì)的分析介紹,需要的朋友可以過來參考下2013-09-09
SpringBoot集成JWT的工具類與攔截器實(shí)現(xiàn)方式
Java實(shí)現(xiàn)簡單樹結(jié)構(gòu)
java實(shí)現(xiàn)多線程之定時(shí)器任務(wù)
Ubuntu安裝jenkins完成自動化構(gòu)建詳細(xì)步驟
淺談Java中Map和Set之間的關(guān)系(及Map.Entry)

