当前位置：首页 > news >正文

word如何转换为pdf

news 2025/8/20 18:06:58

pip install pywin32

import os
import win32com.client
import pythoncom  # 新增：用于处理COM线程
import sysdef docx_to_pdf(docx_path, pdf_path=None):"""将Word文档转换为PDF格式，修复退出时的COM错误"""if not os.path.exists(docx_path):raise FileNotFoundError(f"文件不存在: {docx_path}")if pdf_path is None:pdf_path = os.path.splitext(docx_path)[0] + ".pdf"# 初始化COM线程（避免线程相关的错误）pythoncom.CoInitialize()word = Nonedoc = Nonetry:# 创建Word应用对象word = win32com.client.Dispatch("Word.Application")word.Visible = False# 打开文档（添加只读参数，避免锁定文件）doc = word.Documents.Open(docx_path, ReadOnly=True)# 保存为PDFdoc.SaveAs2(pdf_path, FileFormat=17)print(f"转换成功: {pdf_path}")except Exception as e:print(f"转换失败: {str(e)}")finally:# 先关闭文档（确保文档先释放）if doc is not None:doc.Close(SaveChanges=0)  # 0表示不保存更改doc = None  # 显式释放对象# 再退出Word（确保文档关闭后再退出）if word is not None:try:word.Quit()except Exception as e:# 忽略退出时的错误（因为转换已成功）print(f"关闭Word时警告: {str(e)}")word = None  # 显式释放对象# 释放COM资源pythoncom.CoUninitialize()if __name__ == "__main__":# 检查命令行参数if len(sys.argv) < 2:print("单文件Word转PDF转换器")print("用法: python docx_to_pdf.py <Word文件路径> [输出PDF路径]")print("示例1: python docx_to_pdf.py D:\\d\\a.docx")print("示例2: python docx_to_pdf.py D:\\d\\a.docx D:\\d\\output.pdf")sys.exit(1)# 获取输入文件路径input_file = sys.argv[1]# 获取输出文件路径（如果提供了）output_file = sys.argv[2] if len(sys.argv) > 2 else None# 检查输入文件是否存在if not os.path.exists(input_file):print(f"错误: 输入文件不存在: {input_file}")sys.exit(1)# 检查输入文件是否为Word文档if not input_file.lower().endswith(('.doc', '.docx')):print(f"错误: 输入文件不是Word文档: {input_file}")sys.exit(1)# 执行转换try:docx_to_pdf(input_file, output_file)print("转换完成!")except Exception as e:print(f"转换过程中发生错误: {str(e)}")sys.exit(1)

然后在java里面调用这个脚本

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;public class PythonScriptCaller {/*** 调用Python脚本实现Word转PDF* @param scriptPath Python脚本的绝对路径* @param docxPath 需要转换的Word文档路径* @return 转换结果（成功/失败信息）*/public static String callDocxToPdfScript(String scriptPath, String docxPath) {// 构建命令：python 脚本路径 文档路径（通过参数传递docx路径，增强灵活性）String[] command = {"python", scriptPath, docxPath};ProcessBuilder processBuilder = new ProcessBuilder(command);// 合并错误流到输出流，方便统一处理processBuilder.redirectErrorStream(true);Process process = null;StringBuilder result = new StringBuilder();try {// 启动进程执行命令process = processBuilder.start();// 读取脚本输出InputStream inputStream = process.getInputStream();BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream, "GBK")); // 适配中文输出String line;while ((line = reader.readLine()) != null) {result.append(line).append("\n");}// 等待进程执行完成int exitCode = process.waitFor();if (exitCode == 0) {result.append("脚本执行成功，退出码：").append(exitCode);} else {result.append("脚本执行失败，退出码：").append(exitCode);}} catch (IOException e) {result.append("执行脚本时发生IO错误：").append(e.getMessage());} catch (InterruptedException e) {result.append("脚本执行被中断：").append(e.getMessage());Thread.currentThread().interrupt(); // 恢复中断状态} finally {if (process != null) {process.destroy(); // 确保进程销毁}}return result.toString();}// 测试方法public static void main(String[] args) {// 替换为实际的脚本路径和Word文档路径String scriptPath = "D:\\git\\docx_to_pdf\\docx_to_pdf.py";String docxPath = "D:\\d\\a.docx";String result = callDocxToPdfScript(scriptPath, docxPath);System.out.println("转换结果：\n" + result);}
}

批量递归

import os
import win32com.client
import pythoncom
import shutil
import sysdef convert_all_docs_to_pdf(source_dir, target_dir):"""递归遍历源目录，将所有Word文档转换为PDF并保存到目标目录Args:source_dir: 源目录路径target_dir: 目标目录路径"""# 确保目标目录存在if not os.path.exists(target_dir):os.makedirs(target_dir)# 遍历源目录for root, dirs, files in os.walk(source_dir):# 计算相对路径relative_path = os.path.relpath(root, source_dir)if relative_path == '.':relative_path = ''# 创建对应的目标目录target_subdir = os.path.join(target_dir, relative_path)if not os.path.exists(target_subdir):os.makedirs(target_subdir)# 处理当前目录下的文件for file in files:if file.endswith(('.doc', '.docx')):# 源文件路径source_file_path = os.path.join(root, file)# 目标PDF文件路径（保持相同文件名但扩展名为.pdf）pdf_filename = os.path.splitext(file)[0] + '.pdf'target_file_path = os.path.join(target_subdir, pdf_filename)# 转换文件print(f"正在转换: {source_file_path}")docx_to_pdf(source_file_path, target_file_path)print("所有文件转换完成!")def docx_to_pdf(docx_path, pdf_path=None):"""将Word文档转换为PDF格式，修复退出时的COM错误"""if not os.path.exists(docx_path):raise FileNotFoundError(f"文件不存在: {docx_path}")if pdf_path is None:pdf_path = os.path.splitext(docx_path)[0] + ".pdf"# 初始化COM线程（避免线程相关的错误）pythoncom.CoInitialize()word = Nonedoc = Nonetry:# 创建Word应用对象word = win32com.client.Dispatch("Word.Application")word.Visible = False# 打开文档（添加只读参数，避免锁定文件）doc = word.Documents.Open(docx_path, ReadOnly=True)# 保存为PDFdoc.SaveAs2(pdf_path, FileFormat=17)print(f"转换成功: {pdf_path}")except Exception as e:print(f"转换失败: {str(e)}")finally:# 先关闭文档（确保文档先释放）if doc is not None:doc.Close(SaveChanges=0)  # 0表示不保存更改doc = None  # 显式释放对象# 再退出Word（确保文档关闭后再退出）if word is not None:try:word.Quit()except Exception as e:# 忽略退出时的错误（因为转换已成功）print(f"关闭Word时警告: {str(e)}")word = None  # 显式释放对象# 释放COM资源pythoncom.CoUninitialize()if __name__ == "__main__":# 检查命令行参数if len(sys.argv) < 3:print("批量转换Word文档到PDF")print("用法: python batch_doc_to_pdf.py <源目录> <目标目录>")print("示例: python batch_doc_to_pdf.py D:\\d1 D:\\d2")sys.exit(1)source_directory = sys.argv[1]target_directory = sys.argv[2]# 检查源目录是否存在if not os.path.exists(source_directory):print(f"错误: 源目录不存在: {source_directory}")sys.exit(1)# 检查源目录和目标目录是否相同（防止误操作）if os.path.abspath(source_directory) == os.path.abspath(target_directory):print("错误: 源目录和目标目录不能相同")sys.exit(1)print(f"开始转换: {source_directory} -> {target_directory}")# 执行转换convert_all_docs_to_pdf(source_directory, target_directory)

java调用python 批量脚本：

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;public class PythonBatchConverter {/*** 调用Python批量转换脚本实现Word转PDF* @param scriptPath Python脚本的绝对路径* @param sourceDir 源目录路径* @param targetDir 目标目录路径* @return 转换结果（成功/失败信息）*/public static String callBatchConversionScript(String scriptPath, String sourceDir, String targetDir) {// 构建命令：python 脚本路径 源目录 目标目录String[] command = {"python", scriptPath, sourceDir, targetDir};ProcessBuilder processBuilder = new ProcessBuilder(command);// 合并错误流到输出流，方便统一处理processBuilder.redirectErrorStream(true);Process process = null;StringBuilder result = new StringBuilder();try {// 启动进程执行命令process = processBuilder.start();// 读取脚本输出InputStream inputStream = process.getInputStream();BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream, "GBK")); // 适配中文输出String line;while ((line = reader.readLine()) != null) {result.append(line).append("\n");}// 等待进程执行完成int exitCode = process.waitFor();if (exitCode == 0) {result.append("批量转换执行成功，退出码：").append(exitCode);} else {result.append("批量转换执行失败，退出码：").append(exitCode);}} catch (IOException e) {result.append("执行脚本时发生IO错误：").append(e.getMessage());} catch (InterruptedException e) {result.append("脚本执行被中断：").append(e.getMessage());Thread.currentThread().interrupt(); // 恢复中断状态} finally {if (process != null) {process.destroy(); // 确保进程销毁}}return result.toString();}/*** 调用单个文件转换脚本实现Word转PDF* @param scriptPath Python脚本的绝对路径* @param docxPath 需要转换的Word文档路径* @return 转换结果（成功/失败信息）*/public static String callSingleFileScript(String scriptPath, String docxPath) {// 构建命令：python 脚本路径 文档路径String[] command = {"python", scriptPath, docxPath};ProcessBuilder processBuilder = new ProcessBuilder(command);// 合并错误流到输出流，方便统一处理processBuilder.redirectErrorStream(true);Process process = null;StringBuilder result = new StringBuilder();try {// 启动进程执行命令process = processBuilder.start();// 读取脚本输出InputStream inputStream = process.getInputStream();BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream, "GBK")); // 适配中文输出String line;while ((line = reader.readLine()) != null) {result.append(line).append("\n");}// 等待进程执行完成int exitCode = process.waitFor();if (exitCode == 0) {result.append("单文件转换执行成功，退出码：").append(exitCode);} else {result.append("单文件转换执行失败，退出码：").append(exitCode);}} catch (IOException e) {result.append("执行脚本时发生IO错误：").append(e.getMessage());} catch (InterruptedException e) {result.append("脚本执行被中断：").append(e.getMessage());Thread.currentThread().interrupt(); // 恢复中断状态} finally {if (process != null) {process.destroy(); // 确保进程销毁}}return result.toString();}// 测试方法 - 批量转换public static void main(String[] args) {// 替换为实际的脚本路径和目录路径String scriptPath = "D:\\git\\docx_to_pdf\\batch_doc_to_pdf.py";String sourceDir = "D:\\d1";String targetDir = "D:\\d2";String result = callBatchConversionScript(scriptPath, sourceDir, targetDir);System.out.println("批量转换结果：\n" + result);// 测试单文件转换String singleFileScriptPath = "D:\\git\\docx_to_pdf\\docx_to_pdf.py";String docxPath = "D:\\d1\\a.docx";String singleResult = callSingleFileScript(singleFileScriptPath, docxPath);System.out.println("单文件转换结果：\n" + singleResult);}
}