当前位置: 首页 > ai >正文

libreoffice容器word转pdf

先说结论,市面上不花钱的,简单的效果好的就是这个种方式,在线测试下来不如命令转的效果好。AsposeWords和SpireDoc效果都不错,但是只有这个word转pdf感觉花3-5w不划算。
下载容器路径 https://docker.aityp.com/i/search?search=libreoffice
部署LibreOffice容器
使用Docker运行LibreOffice的无头模式(headless),提供文档转换服务:

#需要挂载输入输出路径和安装字体路径
docker run -d \
--name libreoffice1 \
-v /opt/libreoffice1/input:/app/input \
-v /opt/libreoffice1/output:/app/output \
-v /usr/share/fonts/:/usr/share/fonts/
-p 3000:3000 \
linuxserver/libreoffice:latest #online用的是 需要注意容器配置文件有个位置需要改成一下 要不然http访问不通docker run -t -d -p 9980:9980 -e "username=admin" -e "password=123456" --restart always --cap-add SYS_ADMIN libreofficeonline:telecom

此命令启动一个LibreOffice容器,监听8100端口,并将宿主机目录挂载到容器内以便文件交换。

Java调用REST API转换文档
若容器提供REST API(如libreserver/office-api),可通过Java的HTTP客户端发送请求:

package cn.zjtele.pubinfo.demo.api.controller;import org.apache.http.HttpEntity;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.conn.ssl.NoopHostnameVerifier;
import org.apache.http.entity.ContentType;
import org.apache.http.entity.mime.MultipartEntityBuilder;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
import org.apache.http.ssl.SSLContexts;
import org.apache.http.util.EntityUtils;
import org.slf4j.MDC;import javax.net.ssl.SSLContext;
import java.io.File;
import java.io.FileOutputStream;
import java.nio.charset.StandardCharsets;
import java.security.KeyManagementException;
import java.security.KeyStoreException;
import java.security.NoSuchAlgorithmException;
import java.util.HashMap;
import java.util.Map;
import java.util.UUID;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.Future;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;import static com.sun.javafx.runtime.async.BackgroundExecutor.getExecutor;public class LibreOfficeOnlineMasterConverter {// 正确的API端点路径(根据您的服务器配置可能需要调整)private static final String LOOL_CONVERT_URL = "http://localhost:9980/lool/convert-to/pdf";// 如果需要忽略SSL证书验证static SSLContext sslContext;static {try {sslContext = SSLContexts.custom().loadTrustMaterial((chain, authType) -> true).build();} catch (NoSuchAlgorithmException e) {throw new RuntimeException(e);} catch (KeyManagementException e) {throw new RuntimeException(e);} catch (KeyStoreException e) {throw new RuntimeException(e);}}// 在类初始化时创建共享的HttpClientprivate static final CloseableHttpClient sharedHttpClient = HttpClients.custom().setSSLContext(sslContext).setSSLHostnameVerifier(NoopHostnameVerifier.INSTANCE).setMaxConnTotal(100)  // 最大连接数.setMaxConnPerRoute(20) // 每个路由最大连接数.build();public static void printPoolStatus() {ThreadPoolExecutor executor = (ThreadPoolExecutor) getExecutor();System.out.println("活跃线程: " + executor.getActiveCount() +" / 队列任务: " + executor.getQueue().size());}public static boolean convertToPdf(String inputFile, String outputFile) throws NoSuchAlgorithmException, KeyStoreException, KeyManagementException {MDC.put("traceId", UUID.randomUUID().toString().substring(0,8));System.out.println("开始处理文件: " + inputFile);// 如果需要忽略SSL证书验证
//        SSLContext sslContext = SSLContexts.custom()
//                .loadTrustMaterial((chain, authType) -> true)
//                .build();// 修改convertToPdf方法中的httpClient获取方式
//        CloseableHttpClient httpClient = sharedHttpClient;// 调整HttpClient配置,增加超时控制RequestConfig config = RequestConfig.custom().setConnectTimeout(5000)       // 连接超时5秒.setSocketTimeout(30000)       // 数据传输超时30秒.build();CloseableHttpClient httpClient = HttpClients.custom().setDefaultRequestConfig(config).setConnectionManager(new PoolingHttpClientConnectionManager()) // 使用连接池.build();try {// 1. 创建POST请求HttpPost httpPost = new HttpPost(LOOL_CONVERT_URL);// 2. 构建Multipart请求体(尝试不同字段名)MultipartEntityBuilder builder = MultipartEntityBuilder.create();builder.addBinaryBody("file",  // 先尝试"file",如果失败再尝试"data"new File(inputFile),getContentType(inputFile),new File(inputFile).getName());// 3. 设置必要的头信息(master分支特定头)httpPost.setHeader("X-WOPI-Override", "CONVERT_TO");httpPost.setHeader("X-WOPI-FileExtension", getFileExtension(inputFile));httpPost.setHeader("X-WOPI-SuggestedTarget", getOutputFilename(outputFile));httpPost.setHeader("X-LOOL-WOPI-ConvertTo", "pdf");  // master分支特有httpPost.setHeader("Accept", "application/pdf");// 4. 添加其他可能的必要头httpPost.setHeader("User-Agent", "Java LibreOffice Converter");httpPost.setHeader("Cache-Control", "no-cache");httpPost.setEntity(builder.build());System.out.println("发送请求到: " + LOOL_CONVERT_URL);System.out.println("使用头信息: " + httpPost.getAllHeaders());// 5. 执行请求try (CloseableHttpResponse response = httpClient.execute(httpPost)) {int statusCode = response.getStatusLine().getStatusCode();HttpEntity entity = response.getEntity();System.out.println("响应状态: " + response.getStatusLine());System.out.println("响应头: " + response.getAllHeaders());if (statusCode == 200 && entity != null) {try (FileOutputStream fos = new FileOutputStream(outputFile)) {entity.writeTo(fos);}return true;} else {String responseBody = entity != null ?EntityUtils.toString(entity, StandardCharsets.UTF_8) : "无响应体";System.err.println("转换失败. 状态码: " + statusCode);System.err.println("响应体: " + responseBody);// 如果400错误,尝试使用"data"作为字段名if (statusCode == 400) {System.out.println("尝试使用'data'作为字段名重试...");return retryWithDataField(inputFile, outputFile);}}}} catch (Exception e) {System.err.println("转换过程中发生错误: " + e.getMessage());e.printStackTrace();} finally {try {httpClient.close();} catch (Exception e) {System.err.println("关闭HTTP客户端时出错: " + e.getMessage());}}return false;}/*** 使用"data"作为字段名重试*/private static boolean retryWithDataField(String inputFile, String outputFile) {CloseableHttpClient httpClient = HttpClients.createDefault();try {HttpPost httpPost = new HttpPost(LOOL_CONVERT_URL);MultipartEntityBuilder builder = MultipartEntityBuilder.create();builder.addBinaryBody("data",  // 使用"data"作为字段名new File(inputFile),getContentType(inputFile),new File(inputFile).getName());// 设置相同的头信息httpPost.setHeader("X-WOPI-Override", "CONVERT_TO");httpPost.setHeader("X-WOPI-FileExtension", getFileExtension(inputFile));httpPost.setHeader("X-WOPI-SuggestedTarget", getOutputFilename(outputFile));httpPost.setHeader("X-LOOL-WOPI-ConvertTo", "pdf");httpPost.setHeader("Accept", "application/pdf");httpPost.setEntity(builder.build());try (CloseableHttpResponse response = httpClient.execute(httpPost)) {if (response.getStatusLine().getStatusCode() == 200) {try (FileOutputStream fos = new FileOutputStream(outputFile)) {response.getEntity().writeTo(fos);}return true;}}} catch (Exception e) {System.err.println("重试失败: " + e.getMessage());}return false;}// 新增异步转换方法public static Future<Boolean> convertToPdfAsync(String inputFile, String outputFile) {return ConverterThreadPool.getExecutor().submit(() -> {try {return convertToPdf(inputFile, outputFile);} catch (Exception e) {System.err.println("异步任务执行异常: " + e.getMessage());return false;}});}// 新增批量处理方法public static Map<String, Future<Boolean>> batchConvert(Map<String, String> filePairs) {Map<String, Future<Boolean>> results = new ConcurrentHashMap<>();filePairs.forEach((input, output) ->results.put(input, convertToPdfAsync(input, output)));return results;}/*** 获取正确的内容类型*/private static ContentType getContentType(String filePath) {String ext = getFileExtension(filePath).toLowerCase();switch (ext) {case "docx": return ContentType.create("application/vnd.openxmlformats-officedocument.wordprocessingml.document");case "doc": return ContentType.create("application/msword");case "odt": return ContentType.create("application/vnd.oasis.opendocument.text");default: return ContentType.APPLICATION_OCTET_STREAM;}}private static String getFileExtension(String filePath) {int lastDotIndex = filePath.lastIndexOf('.');return lastDotIndex > 0 ? filePath.substring(lastDotIndex + 1) : "";}private static String getOutputFilename(String filePath) {return new File(filePath).getName();}public static void main(String[] args) throws NoSuchAlgorithmException, KeyStoreException, KeyManagementException {String inputFile = "C:\\Users\\sheng\\Desktop\\chongqing.docx";String outputFile = "C:\\Users\\sheng\\Desktop\\chongqing.pdf";System.out.println("开始转换: " + inputFile + " → " + outputFile);boolean b = convertToPdf(inputFile, outputFile);System.out.println("转换结果: " + b);}
}

通过命令行调用容器内工具
若容器仅包含LibreOffice命令行工具,可通过Java执行Docker命令完成转换:

package cn.zjtele.pubinfo.demo.wordtopdf;import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;public class LibreOfficeConverter {private static final String INPUT_DIR = "D:/docker/input";  // 本地输入目录private static final String OUTPUT_DIR = "D:/docker/output"; // 本地输出目录public static void main(String[] args) {
//        if (args.length == 0) {
//            System.out.println("请提供要转换的Word文件名(例如:example.docx)");
//            return;
//        }long l = System.currentTimeMillis();String fileName = "11.docx";Path inputFilePath = Paths.get(INPUT_DIR, fileName);File inputFile = inputFilePath.toFile();if (!inputFile.exists()) {System.out.println("文件不存在:" + inputFilePath);return;}try {// 确保输出目录存在Files.createDirectories(Paths.get(OUTPUT_DIR));// 构造输出文件路径String outputFileName = fileName.replace(".docx", ".pdf");Path outputFilePath = Paths.get(OUTPUT_DIR, outputFileName);// 调用 LibreOffice 容器进行转换convertFileUsingLibreOffice(inputFile.getAbsolutePath(), outputFilePath.toString());System.out.println("文件转换成功!PDF文件已保存到:" + outputFilePath);System.out.println("转换耗时:" + (System.currentTimeMillis() - l) + "ms");} catch (Exception e) {e.printStackTrace();System.out.println("文件转换失败!");}}private static void convertFileUsingLibreOffice(String inputFilePath, String outputFilePath) throws IOException, InterruptedException {// 使用 LibreOffice 容器命令进行转换String command = String.format(
//                "docker exec -i another_linuxserver-libreoffice libreoffice --headless --convert-to pdf --outdir /app/output /app/input/%s",
//                new File(inputFilePath).getName()"docker exec -i libreoffice767 libreoffice --headless --convert-to pdf --outdir /app/output /app/input/%s",new File(inputFilePath).getName());Process process = Runtime.getRuntime().exec(command);int exitCode = process.waitFor();if (exitCode != 0) {throw new RuntimeException("LibreOffice 转换失败,退出码:" + exitCode);}}
}

文件路径处理注意事项
确保Java应用有权限访问宿主机和容器的挂载目录。
输入/输出路径需使用容器内的映射路径(如/opt/documents)。
转换完成后从挂载目录提取PDF文件。

http://www.xdnf.cn/news/8673.html

相关文章:

  • Word转PDF--自动生成目录
  • 教师技术知识对人工智能赋能下教学效果的影响:以教学创新为中介的实证研究
  • java每日精进 5.25【Redis缓存】
  • 一文讲透golang channel 的特点、原理及使用场景
  • Linux相关概念和易错知识点(41)(UDP、TCP报头结构)
  • 识别速度快且精准的OCR工具
  • 【短距离通信】【WiFi】WiFi7起源和应用场景介绍
  • 中间件安全IISApacheTomcatNginx弱口令不安全配置CVE
  • 梯度下降 损失景观 视频截图
  • 【 java 基础问题 第一篇 】
  • 【MySQL】第9节|Innodb底层原理与Mysql日志机制深入剖析(二)
  • Audio Codec的I2S时序或PCM时序,代表什么意思
  • 使用Chrome waterfall 查看接口耗时
  • openssl-1.1.1w-win64
  • ISO 26262-5 评估硬件随机失效率
  • redis功能清单
  • 记录一次功能优化需求下的业务处理思路整理
  • 【LangChain大模型应用与多智能体开发 ② 接入智谱AI】
  • 特殊类设计
  • 【打卡】树状数组的操作
  • JDK21深度解密 Day 4:虚拟线程底层实现原理
  • 软件名称:系统日志监听工具 v1.0
  • 环境配置文档撰写指南
  • ​​支持多客户网站的SMPP网关完整实现方案​
  • vocabulary in program
  • C++ STL
  • Seismic source model - stochastic kinematic model (kappa-inverse-square)
  • 页面实现渲染大量 DOM 元素
  • 哈希表-有效的数字异位词
  • 基于大模型的短暂性脑缺血发作预测与干预全流程系统技术方案大纲