package com.artfess.file.util; import java.awt.image.BufferedImage; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.PrintStream; import java.math.BigDecimal; import java.util.ArrayList; import java.util.Arrays; import java.util.Base64; import java.util.Iterator; import java.util.List; import java.util.ListIterator; import java.util.Map; import java.util.UUID; import java.util.regex.Matcher; import java.util.regex.Pattern; import javax.imageio.ImageIO; import org.apache.commons.lang3.StringUtils; import org.apache.poi.xwpf.usermodel.VerticalAlign; import org.apache.poi.xwpf.usermodel.XWPFParagraph; import org.apache.poi.xwpf.usermodel.XWPFRun; import org.apache.xmlbeans.XmlCursor; import org.docx4j.convert.in.xhtml.XHTMLImporterImpl; import org.docx4j.openpackaging.packages.WordprocessingMLPackage; import org.docx4j.openpackaging.parts.WordprocessingML.NumberingDefinitionsPart; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.nodes.Node; import org.jsoup.select.Elements; import org.springframework.util.ObjectUtils; import com.deepoove.poi.XWPFTemplate; import com.deepoove.poi.data.PictureRenderData; import com.deepoove.poi.data.TextRenderData; import com.deepoove.poi.policy.AbstractRenderPolicy; import com.deepoove.poi.policy.PictureRenderPolicy; import com.deepoove.poi.policy.TextRenderPolicy; import com.deepoove.poi.render.RenderContext; import com.deepoove.poi.xwpf.NiceXWPFDocument; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.ArrayNode; import com.artfess.base.util.AppUtil; import com.artfess.base.util.BeanUtils; import com.artfess.base.util.FileUtil; import com.artfess.base.util.JsonUtil; import com.artfess.base.util.StringUtil; import com.artfess.base.util.time.TimeUtil; import com.artfess.file.model.DefaultFile; import com.artfess.file.persistence.manager.FileManager; import cn.hutool.core.util.ReUtil; /** * word套打富文本处理工具类 * @author zhangxw * */ public class HtmlUtil { private static List HTML_TAG = Arrays.asList("div","a","p","img","table","ul","font","lable","strong","b","span","h1","h2","h3","h4","title"); private static List PICTURE_TAG = Arrays.asList("emf","wmf","pict","jpeg","png","dib","gif","tiff","eps","bmp","wpg"); /** * 获取资源文件的文件流 * * @return */ public static InputStream getResourceInputStream(String filePath) { InputStream in = FileUtil.class.getResourceAsStream(filePath); if (in != null) { return in; } return null; } /** * 创建测试用例过程记录的渲染策略:解析对应的html并输出到word * @return */ public static AbstractRenderPolicy createHtmlRenderPolicy() { return new AbstractRenderPolicy() { @Override protected void afterRender(RenderContext context) { // 清空模板标签所在段落 clearPlaceholder(context, true); } @Override public void doRender(RenderContext context) throws Exception { XWPFRun run = context.getRun(); Object data = context.getData(); XWPFTemplate template = context.getTemplate(); if (data == null || StringUtils.isBlank(data.toString())) { return; } //获得Apache POI增强类NiceXWPFDocument NiceXWPFDocument doc = template.getXWPFDocument(); String html = data.toString(); html = html.replaceAll(">", ">") .replaceAll("<", "<") .replaceAll(" ", " ") .replaceAll("\\n", "") .replaceAll("
", "\n"); Document htmlDoc = Jsoup.parse(html); Elements nodes = htmlDoc.body().children(); XWPFParagraph xwpfParagraph = doc.insertNewParagraph(run.getParagraph().getCTP().newCursor()); if(BeanUtils.isEmpty(xwpfParagraph)){ xwpfParagraph = doc.insertNewParagraph((XmlCursor) run); } ListIterator itr = nodes.listIterator(); while (itr.hasNext()) { Element e = itr.next(); xwpfParagraph = parseHtmlToWord(e, doc, xwpfParagraph, true); } } }; } /** * 转换整个html内容为word内容 * @param ele * @param doc * @param xwpfParagraph * @param isParent * @return * @throws Exception */ private static XWPFParagraph parseHtmlToWord(Element ele, NiceXWPFDocument doc, XWPFParagraph xwpfParagraph , boolean isParent) throws Exception { //处理img图片 if ("img".equals(ele.tagName())) { parseImgToWord(ele.attr("src"), xwpfParagraph); return xwpfParagraph; } //处理table标签 if ("table".equals(ele.tagName())) { xwpfParagraph = doc.insertNewParagraph(getPrevXWPFParagraph(doc, xwpfParagraph).getCTP().newCursor()); parseTableToWord(doc, ele, xwpfParagraph.createRun()); //有表格的话新建段落 //xwpfParagraph = doc.createParagraph(); return xwpfParagraph; } //处理标签 上标 if ("sup".equalsIgnoreCase(ele.tagName())) { XWPFRun run = xwpfParagraph.createRun(); run.setText(ele.text()); // 设置字体加粗; run.setBold(true); // 设置字体大小; run.setFontSize(12); run.setFontFamily("Times New Roman", XWPFRun.FontCharRange.ascii); run.setFontFamily("宋体", XWPFRun.FontCharRange.eastAsia); run.setSubscript(VerticalAlign.SUPERSCRIPT); TextRenderPolicy.Helper.renderTextRun(run, new TextRenderData(ele.text())); return xwpfParagraph; } //处理其他文本标签 String text = ele.ownText(); boolean continueItr = true; //span标签默认全部为文字,不再继续迭代 if ("span".equalsIgnoreCase(ele.tagName())) { text = ele.ownText(); continueItr = false; } boolean enabledBreak = ReUtil.isMatch("(p|h[12345]|li|img)", ele.tagName()); if (enabledBreak) { XWPFRun run = xwpfParagraph.createRun(); run.addBreak(); } if (StringUtils.isNotBlank(text)) { XWPFRun run = xwpfParagraph.createRun(); TextRenderPolicy.Helper.renderTextRun(run, new TextRenderData(text)); } if (continueItr && ele.children().size() > 0) { ListIterator itr = ele.children().listIterator(); while (itr.hasNext()) { Element me = itr.next(); xwpfParagraph = parseHtmlToWord(me, doc, xwpfParagraph, false); } } return xwpfParagraph; } private static XWPFParagraph getPrevXWPFParagraph (NiceXWPFDocument doc, XWPFParagraph xwpfParagraph) { List xwpfParagraphs = doc.getXWPFDocument().getParagraphs(); for (int i = 0;i < xwpfParagraphs.size();i++) { if (xwpfParagraphs.get(i).equals(xwpfParagraph)) { return xwpfParagraphs.get(i + 1); } } return xwpfParagraph; } /** * 转换图片为word内容(目前只处理系统中附件上传的图片) * @param imgUrl * @param xwpfParagraph * @throws Exception */ private static void parseImgToWord(String imgUrl, XWPFParagraph xwpfParagraph) throws Exception { //只处理系统中图片 if(imgUrl.contains("file/v1/downloadFile?fileId=")){ String regEx = "(\\?|&+)(.+?)=([^&]*)";//匹配参数名和参数值的正则表达式 Pattern p = Pattern.compile(regEx); Matcher m = p.matcher(imgUrl); String fileId = ""; while(m.find()){ String paramName = m.group(2);//获取参数名 if("fileId".equals(paramName)){ fileId = m.group(3);//获取参数值 break; } } if(StringUtil.isNotEmpty(fileId)){ FileManager fileManager = AppUtil.getBean(FileManager.class); DefaultFile file = fileManager.get(fileId); if(BeanUtils.isNotEmpty(file)){ String format = file.getExtensionName(); if(!PICTURE_TAG.contains(format)){ format = "png"; } format = "."+format; ByteArrayOutputStream outStream = new ByteArrayOutputStream(); fileManager.downloadFile(fileId, outStream); ByteArrayInputStream inputStream = new ByteArrayInputStream(outStream.toByteArray()); BufferedImage image = ImageIO.read(inputStream); //获得图片的宽 int width = image.getWidth(); //获得图片的高 int height = image.getHeight(); if (width > 600) { //获取比例 int rate = (width / 600 ) + 1; width = width / rate - 20; height = height / rate; } PictureRenderData pictureRenderData = new PictureRenderData(width, height, format, image); XWPFRun run = xwpfParagraph.createRun(); PictureRenderPolicy.Helper.renderPicture(run, pictureRenderData); } return ; } }else{ return ; } } /** * 通过imgUrl获取本地图片路径 * @param imgUrl * @return */ private static String getImgRealPath (String imgUrl) { //TODO 获取real_path if (imgUrl.startsWith("data:;base64,")) { imgUrl = imgUrl.replace("data:;base64,", ""); // base64转本地图片 byte[] bytes = Base64.getDecoder().decode(imgUrl); String path = "D:\\temp\\" + UUID.randomUUID().toString() + ".png"; copyByte2File(bytes, new File(path)); return path; } else { // 其他方式获取本地图片 } return "D:\\img\\timg.jpg"; } private static boolean copyByte2File(byte [] bytes,File file){ FileOutputStream out = null; try { //转化为输入流 ByteArrayInputStream in = new ByteArrayInputStream(bytes); //写出文件 byte[] buffer = new byte[1024]; out = new FileOutputStream(file); //写文件 int len = 0; while ((len = in.read(buffer)) != -1) { out.write(buffer, 0, len); } return true; } catch (Exception e) { e.printStackTrace(); }finally{ try { if(out != null){ out.close(); } } catch (IOException e) { e.printStackTrace(); } } return false; } /** * 转换表格为word内容 * @param doc * @param ele * @param run * @throws Exception */ private static void parseTableToWord(NiceXWPFDocument doc, Element ele, XWPFRun run) throws Exception { //简化表格html Document tableDoc = Jsoup.parse(simplifyTable(ele.outerHtml())); Elements trList = tableDoc.getElementsByTag("tr"); Elements tdList = trList.get(0).getElementsByTag("td"); // //创建表格 // XWPFTable xwpfTable = doc.insertNewTable(run, trList.size(), tdList.size()); // // //设置样式 // TableTools.widthTable(xwpfTable, MiniTableRenderData.WIDTH_A4_FULL, tdList.size()); // TableTools.borderTable(xwpfTable, 4); // // //写入表格行和列内容 // Map[][] array = new Map[trList.size()][tdList.size()]; // for (int row = 0; row < trList.size(); row++) { // Element trElement = trList.get(row); // Elements tds = trElement.getElementsByTag("td"); // for (int col = 0; col < tds.size(); col++) { // Element colElement = tds.get(col); // String colspan = colElement.attr("colspan"); // String rowspan = colElement.attr("rowspan"); // String style = colElement.attr("style"); // StringBuilder styleSB = new StringBuilder(); // if (!StringUtils.isEmpty(colspan)) { // int colCount = Integer.parseInt(colspan); // for (int i = 0; i < colCount - 1; i++) { // array[row][col + i + 1] = new HashMap(); // array[row][col + i + 1].put("mergeCol", true); // } // } // if (!StringUtils.isEmpty(rowspan)) { // int rowCount = Integer.parseInt(rowspan); // for (int i = 0; i < rowCount - 1; i++) { // array[row + i + 1][col] = new HashMap(); // array[row + i + 1][col].put("mergeRow", true); // } // } // XWPFTableCell tableCell = xwpfTable.getRow(row).getCell(col); // if (StringUtils.isEmpty(colspan)) { // if (col == 0) { // if (tableCell.getCTTc().getTcPr() == null) { // tableCell.getCTTc().addNewTcPr().addNewHMerge().setVal(STMerge.RESTART); // } else { // if (tableCell.getCTTc().getTcPr().getHMerge() == null) { // tableCell.getCTTc().getTcPr().addNewHMerge().setVal(STMerge.RESTART); // } else { // tableCell.getCTTc().getTcPr().getHMerge().setVal(STMerge.RESTART); // } // } // } else { // if (array[row][col] != null && array[row][col].get("mergeCol") != null && array[row][col].get("mergeCol")) { // if (tableCell.getCTTc().getTcPr() == null) { // tableCell.getCTTc().addNewTcPr().addNewHMerge().setVal(STMerge.CONTINUE); // } else { // if (tableCell.getCTTc().getTcPr().getHMerge() == null) { // tableCell.getCTTc().getTcPr().addNewHMerge().setVal(STMerge.CONTINUE); // } else { // tableCell.getCTTc().getTcPr().getHMerge().setVal(STMerge.CONTINUE); // } // } // continue; // } else { // if (tableCell.getCTTc().getTcPr() == null) { // tableCell.getCTTc().addNewTcPr().addNewHMerge().setVal(STMerge.RESTART); // } else { // if (tableCell.getCTTc().getTcPr().getHMerge() == null) { // tableCell.getCTTc().getTcPr().addNewHMerge().setVal(STMerge.RESTART); // } else { // tableCell.getCTTc().getTcPr().getHMerge().setVal(STMerge.RESTART); // } // } // } // } // } else { // if (tableCell.getCTTc().getTcPr() == null) { // tableCell.getCTTc().addNewTcPr().addNewHMerge().setVal(STMerge.RESTART); // } else { // if (tableCell.getCTTc().getTcPr().getHMerge() == null) { // tableCell.getCTTc().getTcPr().addNewHMerge().setVal(STMerge.RESTART); // } else { // tableCell.getCTTc().getTcPr().getHMerge().setVal(STMerge.RESTART); // } // } // } // if (StringUtils.isEmpty(rowspan)) { // if (array[row][col] != null && array[row][col].get("mergeRow") != null && array[row][col].get("mergeRow")) { // if (tableCell.getCTTc().getTcPr() == null) { // tableCell.getCTTc().addNewTcPr().addNewVMerge().setVal(STMerge.CONTINUE); // } else { // if (tableCell.getCTTc().getTcPr().getVMerge() == null) { // tableCell.getCTTc().getTcPr().addNewVMerge().setVal(STMerge.CONTINUE); // } else { // tableCell.getCTTc().getTcPr().getVMerge().setVal(STMerge.CONTINUE); // } // } // continue; // } else { // if (tableCell.getCTTc().getTcPr() == null) { // tableCell.getCTTc().addNewTcPr().addNewVMerge().setVal(STMerge.RESTART); // } else { // if (tableCell.getCTTc().getTcPr().getVMerge() == null) { // tableCell.getCTTc().getTcPr().addNewVMerge().setVal(STMerge.RESTART); // } else { // tableCell.getCTTc().getTcPr().getVMerge().setVal(STMerge.RESTART); // } // } // } // } else { // if (tableCell.getCTTc().getTcPr() == null) { // tableCell.getCTTc().addNewTcPr().addNewVMerge().setVal(STMerge.RESTART); // } else { // if (tableCell.getCTTc().getTcPr().getVMerge() == null) { // tableCell.getCTTc().getTcPr().addNewVMerge().setVal(STMerge.RESTART); // } else { // tableCell.getCTTc().getTcPr().getVMerge().setVal(STMerge.RESTART); // } // } // } // tableCell.removeParagraph(0); // XWPFParagraph paragraph = tableCell.addParagraph(); // paragraph.setStyle(styleSB.toString()); // if (!StringUtils.isEmpty(style) && style.contains("text-align:center")) { // paragraph.setAlignment(ParagraphAlignment.CENTER); // } // // parseHtmlToWord(colElement, doc, paragraph, true); // } // } } /** * 简化html中的表格dom * @param tableContent * @return */ private static String simplifyTable(String tableContent) { if (StringUtils.isEmpty(tableContent)) { return null; } Document tableDoc = Jsoup.parse(tableContent); Elements trElements = tableDoc.getElementsByTag("tr"); if (trElements != null) { Iterator eleIterator = trElements.iterator(); Integer rowNum = 0; // 针对于colspan操作 while (eleIterator.hasNext()) { rowNum++; Element trElement = eleIterator.next(); //去除所有样式 trElement.removeAttr("class"); Elements tdElements = trElement.getElementsByTag("td"); List tdEleList = covertElements2List(tdElements); for (int i = 0; i < tdEleList.size(); i++) { Element curTdElement = tdEleList.get(i); //去除所有样式 curTdElement.removeAttr("class"); Element ele = curTdElement.clone(); String colspanValStr = curTdElement.attr("colspan"); if (!StringUtils.isEmpty(colspanValStr)) { ele.removeAttr("colspan"); Integer colspanVal = Integer.parseInt(colspanValStr); for (int k = 0; k < colspanVal - 1; k++) { curTdElement.after(ele.outerHtml()); } } } } // 针对于rowspan操作 List trEleList = covertElements2List(trElements); Element firstTrEle = trElements.first(); Elements tdElements = firstTrEle.getElementsByTag("td"); Integer tdCount = tdElements.size(); //获取该列下所有单元格 for (int i = 0; i < tdElements.size(); i++) { for (Element trElement : trEleList) { List tdElementList = covertElements2List(trElement.getElementsByTag("td")); try { tdElementList.get(i); } catch (Exception e) { continue; } Node curTdNode = tdElementList.get(i); Node cNode = curTdNode.clone(); String rowspanValStr = curTdNode.attr("rowspan"); if (!StringUtils.isEmpty(rowspanValStr)) { cNode.removeAttr("rowspan"); Element nextTrElement = trElement.nextElementSibling(); Integer rowspanVal = Integer.parseInt(rowspanValStr); for (int j = 0; j < rowspanVal - 1; j++) { Node tempNode = cNode.clone(); List nodeList = new ArrayList(); nodeList.add(tempNode); if (j > 0) { nextTrElement = nextTrElement.nextElementSibling(); } Integer indexNum = i + 1; if (i == 0) { indexNum = 0; } if (indexNum.equals(tdCount)) { nextTrElement.appendChild(tempNode); } else { nextTrElement.insertChildren(indexNum, nodeList); } } } } } } Element tableEle = tableDoc.getElementsByTag("table").first(); String tableHtml = tableEle.outerHtml(); return tableHtml; } /** * 转换Elements为list * @param curElements * @return */ private static List covertElements2List(Elements curElements){ List elementList = new ArrayList(); Iterator eleIterator = curElements.iterator(); while(eleIterator.hasNext()){ Element curlement = eleIterator.next(); elementList.add(curlement); } return elementList; } /** * 判断字符串是否富文本 * @param text * @return */ public static boolean isHtml(String text){ if(StringUtil.isEmpty(text)){ return false; } Document document = Jsoup.parseBodyFragment(text, "UTF-8"); try { if(ObjectUtils.isEmpty(document)){ return false; } Elements elements = document.getAllElements(); for(Element e:elements){ String attrName = e.tag().getName(); if(HTML_TAG.contains(attrName.toLowerCase())){ return true; } } } catch (Exception e) { return false; } return false; } /** * 处理富文本套打 * @param content * @return */ public static File getRichtextToDocx(String content){ PrintStream printStream = null; String basePath = (FileUtil.getIoTmpdir() + "/attachFiles/tempZip/printTrans/").replace("/", File.separator); String fileName = TimeUtil.getCurrentTimeMillis() + String.valueOf(new java.util.Random().nextInt(900)+100); try { //将富文本封装成完整html页面内容 StringBuffer html = new StringBuffer(); html.append("") .append("") .append("") .append("") .append(""); html.append(content); html.append(""); File folder=new File(basePath); if(!folder.exists()) { folder.mkdirs(); } //生成html文件 printStream = new PrintStream(new FileOutputStream(basePath+fileName+".html"),false,"utf-8"); printStream.println(html.toString()); //将html文件转换未.docx文件 WordprocessingMLPackage wordMLPackage = WordprocessingMLPackage.createPackage(); NumberingDefinitionsPart ndp = new NumberingDefinitionsPart(); wordMLPackage.getMainDocumentPart().addTargetPart(ndp); ndp.unmarshalDefaultNumbering(); XHTMLImporterImpl xHTMLImporter = new XHTMLImporterImpl(wordMLPackage); xHTMLImporter.setHyperlinkStyle("Hyperlink"); wordMLPackage.getMainDocumentPart().getContent().addAll( xHTMLImporter.convert(new File(basePath+fileName+".html"), null)); File output = new File(basePath+fileName+".docx"); wordMLPackage.save(output); return output; } catch (Exception e) { return null; }finally { if(printStream!=null) { printStream.close(); } //删除html文件 FileUtil.deleteFile(basePath+fileName+".html"); } } /** * word套打时处理图片控件和签章控件套打 * @param key * @param val * @param boMap * @param twiceMap */ public static void printPicture(String key,String val,Map boMap,Map twiceMap){ try { JsonNode varNode = JsonUtil.toJsonNode(val.toString()); if(varNode.isArray() && varNode.size()>0) { FileManager fileManager = AppUtil.getBean(FileManager.class); ArrayNode arrayNode = (ArrayNode) varNode; if(BeanUtils.isNotEmpty(arrayNode.get(0).get("signature"))) {//处理签章打印 StringBuilder signatureTemp = new StringBuilder(); for (int i = 0; i < arrayNode.size(); i++) { JsonNode inode = arrayNode.get(i); String itemKey = key + "_" + i; ByteArrayOutputStream outStream = new ByteArrayOutputStream(); fileManager.downloadFile(inode.get("signature").asText(), outStream); BufferedImage sourceImg =ImageIO.read(new ByteArrayInputStream(outStream.toByteArray())); twiceMap.put(itemKey, new PictureRenderData(90, 25, ".png", sourceImg)); //签章图片用#作为标示符(仅在二次套打中使用) signatureTemp.append("{{@"); signatureTemp.append(itemKey); signatureTemp.append("}}"); } boMap.put(key, signatureTemp.toString()); }else if(BeanUtils.isNotEmpty(arrayNode.get(0).get("url")) || BeanUtils.isNotEmpty(arrayNode.get(0).get("fileName"))) {//打印图片 StringBuilder pictureTemp = new StringBuilder(); for (int i = 0; i < arrayNode.size(); i++) { JsonNode inode = arrayNode.get(i); if(BeanUtils.isNotEmpty(inode.get("url"))) { String url = inode.get("url").asText(); if(url.indexOf("getFileById_")!=-1) { String[] urlItem = url.split("getFileById_"); String itemKey = key + "_" + i; ByteArrayOutputStream outStream = new ByteArrayOutputStream(); fileManager.downloadFile(urlItem[1], outStream); BufferedImage sourceImg =ImageIO.read(new ByteArrayInputStream(outStream.toByteArray())); //设置最大宽度为520像素,如果超过这个宽度,则按比例调整 int width = sourceImg.getWidth(); int height = sourceImg.getHeight(); if(width>520) { double mul = new BigDecimal((float)width/520).setScale(2, BigDecimal.ROUND_HALF_UP).doubleValue(); width = (int) (width/mul); height = (int) (height/mul); } twiceMap.put(itemKey, new PictureRenderData(width, height, ".png", sourceImg)); pictureTemp.append("\n{{@"); pictureTemp.append(itemKey); pictureTemp.append("}}"); } } } boMap.put(key, pictureTemp.toString()); } } } catch (Exception e) { } } }