识别PDF中的二维码
废话不多说,直接上料:
1、MainApp.java - 主程序
import java.util.List;public class MainApp {public static void main(String[] args) {String pdfPath = "path/to/your/document.pdf";try {System.out.println("开始处理PDF文件: " + pdfPath);List<String> qrContents = PDFProcessor.extractQRFromPDF(pdfPath);System.out.println("\n===== 二维码解析结果 =====");if (qrContents.isEmpty()) {System.out.println("未找到二维码");} else {for (int i = 0; i < qrContents.size(); i++) {System.out.println((i + 1) + ". " + qrContents.get(i));}}} catch (Exception e) {System.err.println("处理失败: " + e.getMessage());e.printStackTrace();}}
}
2、PDFProcessor.java - PDF处理
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.rendering.PDFRenderer;
import java.awt.image.BufferedImage;
import java.io.File;
import java.util.ArrayList;
import java.util.List;public class PDFProcessor {// 从PDF文件中提取二维码public static List<String> extractQRFromPDF(String filePath) throws Exception {List<String> allResults = new ArrayList<>();try (PDDocument document = PDDocument.load(new File(filePath))) {PDFRenderer renderer = new PDFRenderer(document);int pageCount = document.getNumberOfPages();System.out.println("处理PDF文档,共 " + pageCount + " 页");// 处理每一页for (int page = 0; page < pageCount; page++) {System.out.println("解析第 " + (page + 1) + " 页...");// 设置高DPI确保清晰度(重要!)BufferedImage image = renderer.renderImageWithDPI(page, 300); // 300 DPI// 使用增强版识别List<String> pageResults = QRCodeDetector.enhancedDetect(image);allResults.addAll(pageResults);System.out.println("发现二维码: " + pageResults.size());}}return allResults;}
}
3、QRCodeDetector.java - 二维码定位与解码
import com.google.zxing.*;
import com.google.zxing.client.j2se.BufferedImageLuminanceSource;
import com.google.zxing.common.HybridBinarizer;
import com.google.zxing.multi.GenericMultipleBarcodeReader;
import com.google.zxing.multi.MultipleBarcodeReader;import java.awt.image.BufferedImage;
import java.util.*;
import java.util.stream.Collectors;public class QRCodeDetector {// 查找二维码核心方法public static List<String> detectQRCode(BufferedImage image) {List<String> results = new ArrayList<>();try {// 1. 准备ZXing解码器LuminanceSource source = new BufferedImageLuminanceSource(image);BinaryBitmap bitmap = new BinaryBitmap(new HybridBinarizer(source));// 2. 设置解码参数Map<DecodeHintType, Object> hints = new EnumMap<>(DecodeHintType.class);hints.put(DecodeHintType.TRY_HARDER, Boolean.TRUE);hints.put(DecodeHintType.POSSIBLE_FORMATS, Collections.singletonList(BarcodeFormat.QR_CODE));// 3. 使用多二维码识别器MultipleBarcodeReader reader = new GenericMultipleBarcodeReader(new MultiFormatReader());Result[] zxResults = reader.decodeMultiple(bitmap, hints);// 4. 收集结果results = Arrays.stream(zxResults).map(Result::getText).collect(Collectors.toList());} catch (NotFoundException e) {// 未找到二维码是正常情况} catch (Exception e) {System.err.println("二维码识别错误: " + e.getMessage());}return results;}// 增强版二维码定位(处理小尺寸/低质量二维码)public static List<String> enhancedDetect(BufferedImage image) {final int MIN_QR_SIZE = 100; // 最小二维码尺寸(像素)// 尝试原始图像识别List<String> results = detectQRCode(image);if (!results.isEmpty()) return results;// 图像增强处理BufferedImage processedImage = ImagePreprocessor.enhanceImage(image);// 尝试处理后的图像results = detectQRCode(processedImage);if (!results.isEmpty()) return results;// 分区域识别(应对小尺寸二维码)return splitAndDetect(image, MIN_QR_SIZE);}// 分区域识别算法private static List<String> splitAndDetect(BufferedImage image, int minSize) {List<String> finalResults = new ArrayList<>();int width = image.getWidth();int height = image.getHeight();// 计算分割区域int cols = (int) Math.ceil((double) width / minSize);int rows = (int) Math.ceil((double) height / minSize);for (int i = 0; i < rows; i++) {for (int j = 0; j < cols; j++) {int x = j * minSize;int y = i * minSize;int w = Math.min(minSize, width - x);int h = Math.min(minSize, height - y);// 截取子区域BufferedImage subImage = image.getSubimage(x, y, w, h);// 检测子区域List<String> subResults = detectQRCode(subImage);finalResults.addAll(subResults);}}return finalResults;}
}
4、ImagePreprocessor.java - 图像预处理
import java.awt.image.BufferedImage;
import java.awt.image.ConvolveOp;
import java.awt.image.Kernel;public class ImagePreprocessor {// 图像增强处理public static BufferedImage enhanceImage(BufferedImage original) {BufferedImage processed = original;// 1. 转为灰度图processed = toGrayScale(processed);// 2. 锐化处理(增强二维码边缘)processed = sharpenImage(processed);// 3. 二值化(提高对比度)processed = binarizeImage(processed);return processed;}private static BufferedImage toGrayScale(BufferedImage image) {BufferedImage grayImage = new BufferedImage(image.getWidth(), image.getHeight(),BufferedImage.TYPE_BYTE_GRAY);grayImage.getGraphics().drawImage(image, 0, 0, null);return grayImage;}private static BufferedImage sharpenImage(BufferedImage image) {float[] sharpenMatrix = {0, -1, 0,-1, 5, -1,0, -1, 0};Kernel kernel = new Kernel(3, 3, sharpenMatrix);ConvolveOp op = new ConvolveOp(kernel);return op.filter(image, null);}private static BufferedImage binarizeImage(BufferedImage image) {int threshold = calculateOtsuThreshold(image);BufferedImage binary = new BufferedImage(image.getWidth(), image.getHeight(), BufferedImage.TYPE_BYTE_BINARY);for (int y = 0; y < image.getHeight(); y++) {for (int x = 0; x < image.getWidth(); x++) {int rgb = image.getRGB(x, y);int r = (rgb >> 16) & 0xFF;int g = (rgb >> 8) & 0xFF;int b = rgb & 0xFF;int avg = (r + g + b) / 3;binary.setRGB(x, y, avg > threshold ? 0xFFFFFF : 0x000000);}}return binary;}// 大津算法自动计算阈值private static int calculateOtsuThreshold(BufferedImage image) {int[] histogram = new int[256];// 计算直方图for (int y = 0; y < image.getHeight(); y++) {for (int x = 0; x < image.getWidth(); x++) {int rgb = image.getRGB(x, y);int r = (rgb >> 16) & 0xFF;int g = (rgb >> 8) & 0xFF;int b = rgb & 0xFF;int gray = (r + g + b) / 3;histogram[gray]++;}}// 大津算法实现int total = image.getWidth() * image.getHeight();float sum = 0;for (int i = 0; i < 256; i++) sum += i * histogram[i];float sumB = 0;int wB = 0;int wF = 0;float varMax = 0;int threshold = 0;for (int i = 0; i < 256; i++) {wB += histogram[i];if (wB == 0) continue;wF = total - wB;if (wF == 0) break;sumB += (i * histogram[i]);float mB = sumB / wB;float mF = (sum - sumB) / wF;float varBetween = (float) wB * wF * (mB - mF) * (mB - mF);if (varBetween > varMax) {varMax = varBetween;threshold = i;}}return threshold;}
}
5、依赖项(pom.xml)
<dependencies><!-- PDF处理 --><dependency><groupId>org.apache.pdfbox</groupId><artifactId>pdfbox</artifactId><version>2.0.27</version></dependency><!-- 二维码处理 --><dependency><groupId>com.google.zxing</groupId><artifactId>core</artifactId><version>3.4.1</version></dependency><dependency><groupId>com.google.zxing</groupId><artifactId>javase</artifactId><version>3.4.1</version></dependency><!-- 图像处理 --><dependency><groupId>com.twelvemonkeys.imageio</groupId><artifactId>imageio-core</artifactId><version>3.9.4</version></dependency>
</dependencies>
解决方案思路
PDF转图像:将PDF每页转换为高分辨率图像
二维码定位:使用图像处理技术寻找二维码特征
二维码解码:使用ZXing解析二维码内容
多页处理:遍历PDF所有页面
技术要点说明
智能定位策略:
多级检测机制(原始图→增强图→分区域检测)
自动图像分割(应对小尺寸二维码)
大津算法自动计算二值化阈值
图像增强处理:
灰度转换减少干扰
锐化处理增强边缘
自适应二值化提高对比度
容错机制:
多二维码识别(GenericMultipleBarcodeReader)
TRY_HARDER模式提升识别率
异常处理避免程序中断
性能优化:
按需分割图像(避免全图扫描)
高DPI渲染(平衡质量和性能)
分页处理降低内存占用
使用注意事项
分辨率要求:
最小二维码尺寸应大于100×100像素
模糊的PDF建议提高DPI(可调整至400-600)
特殊场景处理:
如遇扭曲的二维码,可增加图像旋转检测
复杂背景PDF需调整二值化参数
加密PDF需先处理解密
性能建议:
大文件建议分页处理
批量处理使用线程池
启用ZXing的TRY_HARDER模式会降低速度