当前位置: 首页 > news >正文

c# openxml 打开加密 的word读取内容

using System;
using System.IO;
using System.Linq;
using System.Text;
using DocumentFormat.OpenXml;
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Wordprocessing;/// <summary>
/// 使用OpenXML获取文档内容,替代Aspose方式
/// </summary>
/// <param name="path">文档路径</param>
/// <param name="password">密码</param>
/// <returns>文档内容字符串</returns>
public static string GetWordContentByOpenXml(string path, string password)
{try{using (var document = WordprocessingDocument.Open(path, false, new OpenSettings(){Password = password})){if (document.MainDocumentPart?.Document?.Body == null)return null;// 创建StringBuilder来存储文档主体内容var contentBuilder = new StringBuilder();// 获取文档主体,排除页眉页脚var body = document.MainDocumentPart.Document.Body;// 提取主文档内容(不包括页眉页脚)ExtractBodyContent(body, contentBuilder);// 获取原始内容string contentWithoutHeaderFooter = contentBuilder.ToString();// 应用内容清理和格式化string content = CleanContent(contentWithoutHeaderFooter);// 处理特定的截取逻辑int index = content.LastIndexOf("限公司第");if (index > 0){return content.Substring(0, index).Trim();}else{return content;}}}catch (Exception ex){LogManager.WriteError("GetWordContentByOpenXml()", ex.StackTrace?.ToString());return null;}
}/// <summary>
/// 提取文档主体内容,排除页眉页脚
/// </summary>
/// <param name="body">文档主体</param>
/// <param name="contentBuilder">内容构建器</param>
private static void ExtractBodyContent(Body body, StringBuilder contentBuilder)
{// 遍历文档主体中的所有元素foreach (var element in body.Elements()){ExtractElementContent(element, contentBuilder);}
}/// <summary>
/// 递归提取元素内容
/// </summary>
/// <param name="element">OpenXML元素</param>
/// <param name="contentBuilder">内容构建器</param>
private static void ExtractElementContent(OpenXmlElement element, StringBuilder contentBuilder)
{switch (element){case Paragraph paragraph:ExtractParagraphContent(paragraph, contentBuilder);contentBuilder.AppendLine(); // 段落后换行break;case Table table:ExtractTableContent(table, contentBuilder);break;case SectionProperties _:// 跳过节属性,这些通常包含页眉页脚引用break;default:// 递归处理其他容器元素foreach (var childElement in element.Elements()){ExtractElementContent(childElement, contentBuilder);}break;}
}/// <summary>
/// 提取段落内容
/// </summary>
/// <param name="paragraph">段落元素</param>
/// <param name="contentBuilder">内容构建器</param>
private static void ExtractParagraphContent(Paragraph paragraph, StringBuilder contentBuilder)
{foreach (var run in paragraph.Elements<Run>()){foreach (var text in run.Elements<Text>()){contentBuilder.Append(text.Text);}// 处理制表符foreach (var tab in run.Elements<TabChar>()){contentBuilder.Append("\t");}// 处理换行符foreach (var br in run.Elements<Break>()){contentBuilder.AppendLine();}}
}/// <summary>
/// 提取表格内容
/// </summary>
/// <param name="table">表格元素</param>
/// <param name="contentBuilder">内容构建器</param>
private static void ExtractTableContent(Table table, StringBuilder contentBuilder)
{foreach (var row in table.Elements<TableRow>()){foreach (var cell in row.Elements<TableCell>()){foreach (var paragraph in cell.Elements<Paragraph>()){ExtractParagraphContent(paragraph, contentBuilder);}contentBuilder.Append("\t"); // 单元格间用制表符分隔}contentBuilder.AppendLine(); // 表格行后换行}
}/// <summary>
/// 清理和格式化内容,模拟Aspose的清理功能
/// </summary>
/// <param name="content">原始内容</param>
/// <returns>清理后的内容</returns>
private static string CleanContent(string content)
{if (string.IsNullOrEmpty(content))return string.Empty;// 移除多余的空白字符(模拟Tool.TrimAll功能)content = System.Text.RegularExpressions.Regex.Replace(content, @"\s+", " ");content = content.Trim();// 移除多余的换行符content = System.Text.RegularExpressions.Regex.Replace(content, @"\n\s*\n", "\n");// 移除Aspose评估版本的水印文本(虽然OpenXML不会有,但保持兼容性)content = content.Replace("EvaluationOnly.CreatedwithAspose.Words.Copyright2003-2024AsposePtyLtd.", "");// 移除其他可能的控制字符content = System.Text.RegularExpressions.Regex.Replace(content, @"[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]", "");return content.Trim();
}/// <summary>
/// 检查文档是否需要密码
/// </summary>
/// <param name="path">文档路径</param>
/// <returns>是否需要密码</returns>
public static bool IsPasswordRequired(string path)
{try{using (var document = WordprocessingDocument.Open(path, false)){// 如果能正常打开,说明不需要密码return false;}}catch (OpenXmlPackageException ex){// 如果抛出密码相关异常,说明需要密码return ex.Message.Contains("password") || ex.Message.Contains("encrypted") || ex.Message.Contains("protected");}catch{// 其他异常可能也表示需要密码return true;}
}/// <summary>
/// 增强版本:支持更多文档处理选项
/// </summary>
/// <param name="path">文档路径</param>
/// <param name="password">密码</param>
/// <param name="includeHyperlinks">是否包含超链接文本</param>
/// <param name="includeFootnotes">是否包含脚注</param>
/// <returns>文档内容</returns>
public static string GetWordContentByOpenXmlAdvanced(string path, string password, bool includeHyperlinks = false, bool includeFootnotes = false)
{try{using (var document = WordprocessingDocument.Open(path, false, new OpenSettings(){Password = password})){if (document.MainDocumentPart?.Document?.Body == null)return null;var contentBuilder = new StringBuilder();var body = document.MainDocumentPart.Document.Body;// 提取主文档内容ExtractBodyContentAdvanced(body, contentBuilder, includeHyperlinks);// 如果需要包含脚注if (includeFootnotes && document.MainDocumentPart.FootnotesPart != null){ExtractFootnotesContent(document.MainDocumentPart.FootnotesPart, contentBuilder);}string contentWithoutHeaderFooter = contentBuilder.ToString();string content = CleanContent(contentWithoutHeaderFooter);// 应用特定的截取逻辑int index = content.LastIndexOf("公司第");if (index > 0){return content.Substring(0, index).Trim();}else{return content;}}}catch (Exception ex){LogManager.WriteError("GetWordContentByOpenXmlAdvanced()", ex.StackTrace?.ToString());return null;}
}/// <summary>
/// 高级内容提取,支持超链接等
/// </summary>
private static void ExtractBodyContentAdvanced(Body body, StringBuilder contentBuilder, bool includeHyperlinks)
{foreach (var element in body.Elements()){if (element is Paragraph paragraph){ExtractParagraphContentAdvanced(paragraph, contentBuilder, includeHyperlinks);contentBuilder.AppendLine();}else if (element is Table table){ExtractTableContentAdvanced(table, contentBuilder, includeHyperlinks);}else if (!(element is SectionProperties)){// 递归处理其他元素foreach (var childElement in element.Elements()){ExtractBodyContentAdvanced(new Body(childElement), contentBuilder, includeHyperlinks);}}}
}/// <summary>
/// 高级段落内容提取
/// </summary>
private static void ExtractParagraphContentAdvanced(Paragraph paragraph, StringBuilder contentBuilder, bool includeHyperlinks)
{foreach (var element in paragraph.Elements()){if (element is Run run){foreach (var text in run.Elements<Text>()){contentBuilder.Append(text.Text);}}else if (element is Hyperlink hyperlink && includeHyperlinks){foreach (var run2 in hyperlink.Elements<Run>()){foreach (var text in run2.Elements<Text>()){contentBuilder.Append(text.Text);}}}}
}/// <summary>
/// 高级表格内容提取
/// </summary>
private static void ExtractTableContentAdvanced(Table table, StringBuilder contentBuilder, bool includeHyperlinks)
{foreach (var row in table.Elements<TableRow>()){foreach (var cell in row.Elements<TableCell>()){foreach (var paragraph in cell.Elements<Paragraph>()){ExtractParagraphContentAdvanced(paragraph, contentBuilder, includeHyperlinks);}contentBuilder.Append("\t");}contentBuilder.AppendLine();}
}/// <summary>
/// 提取脚注内容
/// </summary>
private static void ExtractFootnotesContent(FootnotesPart footnotesPart, StringBuilder contentBuilder)
{if (footnotesPart.Footnotes != null){contentBuilder.AppendLine("\n--- 脚注 ---");foreach (var footnote in footnotesPart.Footnotes.Elements<Footnote>()){foreach (var paragraph in footnote.Elements<Paragraph>()){ExtractParagraphContent(paragraph, contentBuilder);contentBuilder.AppendLine();}}}
}
http://www.xdnf.cn/news/1182763.html

相关文章:

  • SQL性能优化
  • 基于开源链动2+1模式AI智能名片S2B2C商城小程序的私域流量池用户运营研究
  • 如何实现缓存音频功能(App端详解)
  • vscode 字体的跟换
  • OpenCV 图像变换全解析:从镜像翻转到仿射变换的实践指南
  • VSCode——python选择解释器消失的解决办法
  • 【通识】算法案例
  • 安卓上的迷之K_1171477665
  • 顺应AI浪潮,电科金仓数据库再创辉煌
  • 2025真实面试试题分析-安卓客户端开发
  • 去除视频字幕 2, 使用 PaddleOCR 选取图片中的字幕区域, 根据像素大小 + 形状轮廓
  • AI浪潮涌,数据库“融合智能”奏响产业新乐章
  • I/O多路复用机制中触发机制详细解析
  • 【数据结构】长幼有序:树、二叉树、堆与TOP-K问题的层次解析(含源码)
  • 【SpringAI实战】实现仿DeepSeek页面对话机器人(支持多模态上传)
  • 【深度学习优化算法】09:Adadelta算法
  • JavaScript -Socket5代理使用
  • 攻防世界-Crypto-Morse
  • react+threejs实现自适应分屏查看/3D场景对比功能/双场景对比查看器
  • C 语言 | 结构体详解:自定义数据类型的艺术
  • 筑牢网站运营根基:售后工作的核心维度与实践方法
  • 篇五 网络通信硬件之PHY,MAC, RJ45
  • 车身域控制器MCU市场报告:解析行业现状与未来趋势
  • 【机器学习之推荐算法】基于矩阵分解和损失函数梯度下降的协同过滤算法实现
  • 解决angular与jetty websocket 每30s自动断连的问题
  • AR眼镜重塑外科手术导航:精准“透视”新突破
  • 从零开始的云计算生活——番外6,使用zabbix对中间件监控
  • 医疗数据挖掘Python机器学习案例
  • 告别静态文档!Oracle交互式技术架构图让数据库学习“活“起来
  • 详谈OSI七层模型和TCP/IP四层模型以及tcp与udp为什么是4层,http与https为什么是7层