当前位置: 首页 > ds >正文

常用hook钩子函数

爬虫Hook技术常用字段和勾子函数

目录

  • Hook技术概述
  • 网络请求相关Hook
  • 浏览器环境Hook
  • JavaScript引擎Hook
  • 加密算法Hook
  • 反爬虫检测Hook
  • 实际应用示例
  • Hook工具和框架

Hook技术概述

Hook(钩子)技术是一种在程序运行时拦截和修改函数调用的技术。在爬虫中,Hook技术主要用于:

  • 绕过反爬虫检测
  • 获取加密参数
  • 模拟真实浏览器行为
  • 动态修改请求参数

网络请求相关Hook

1. XMLHttpRequest Hook

// 拦截XMLHttpRequest的open方法
(function() {const originalOpen = XMLHttpRequest.prototype.open;XMLHttpRequest.prototype.open = function(method, url, async, user, password) {console.log('XHR Request:', {method: method,url: url,async: async,user: user,password: password});// 可以在这里修改请求参数if (url.includes('api.example.com')) {url = url.replace('api.example.com', 'api.hooked.com');}return originalOpen.call(this, method, url, async, user, password);};// 拦截send方法const originalSend = XMLHttpRequest.prototype.send;XMLHttpRequest.prototype.send = function(data) {console.log('XHR Send Data:', data);// 可以在这里修改发送的数据if (data && typeof data === 'string') {data = data.replace('old_value', 'new_value');}return originalSend.call(this, data);};
})();

2. Fetch API Hook

// 拦截fetch方法
(function() {const originalFetch = window.fetch;window.fetch = function(url, options) {console.log('Fetch Request:', {url: url,options: options});// 修改请求头if (options && options.headers) {options.headers['X-Hooked'] = 'true';}return originalFetch.call(this, url, options).then(response => {console.log('Fetch Response:', response);return response;});};
})();

3. Axios Hook

// 拦截axios请求
(function() {if (window.axios) {// 请求拦截器window.axios.interceptors.request.use(function(config) {console.log('Axios Request Config:', config);// 修改请求头config.headers['X-Hooked'] = 'true';// 修改请求数据if (config.data) {config.data.hooked = true;}return config;}, function(error) {return Promise.reject(error);});// 响应拦截器window.axios.interceptors.response.use(function(response) {console.log('Axios Response:', response);return response;}, function(error) {console.log('Axios Error:', error);return Promise.reject(error);});}
})();

浏览器环境Hook

1. Navigator对象Hook

// Hook navigator.userAgent
(function() {const originalUserAgent = Object.getOwnPropertyDescriptor(Navigator.prototype, 'userAgent');Object.defineProperty(Navigator.prototype, 'userAgent', {get: function() {const userAgent = originalUserAgent.get.call(this);console.log('UserAgent accessed:', userAgent);// 返回修改后的userAgentreturn 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36';}});
})();// Hook navigator.platform
(function() {const originalPlatform = Object.getOwnPropertyDescriptor(Navigator.prototype, 'platform');Object.defineProperty(Navigator.prototype, 'platform', {get: function() {const platform = originalPlatform.get.call(this);console.log('Platform accessed:', platform);return 'Win32';}});
})();// Hook navigator.language
(function() {const originalLanguage = Object.getOwnPropertyDescriptor(Navigator.prototype, 'language');Object.defineProperty(Navigator.prototype, 'language', {get: function() {const language = originalLanguage.get.call(this);console.log('Language accessed:', language);return 'zh-CN';}});
})();

2. Screen对象Hook

// Hook screen.width和screen.height
(function() {const originalWidth = Object.getOwnPropertyDescriptor(Screen.prototype, 'width');const originalHeight = Object.getOwnPropertyDescriptor(Screen.prototype, 'height');Object.defineProperty(Screen.prototype, 'width', {get: function() {const width = originalWidth.get.call(this);console.log('Screen width accessed:', width);return 1920;}});Object.defineProperty(Screen.prototype, 'height', {get: function() {const height = originalHeight.get.call(this);console.log('Screen height accessed:', height);return 1080;}});
})();

3. Window对象Hook

// Hook window.innerWidth和window.innerHeight
(function() {const originalInnerWidth = Object.getOwnPropertyDescriptor(Window.prototype, 'innerWidth');const originalInnerHeight = Object.getOwnPropertyDescriptor(Window.prototype, 'innerHeight');Object.defineProperty(Window.prototype, 'innerWidth', {get: function() {const width = originalInnerWidth.get.call(this);console.log('Inner width accessed:', width);return 1366;}});Object.defineProperty(Window.prototype, 'innerHeight', {get: function() {const height = originalInnerHeight.get.call(this);console.log('Inner height accessed:', height);return 768;}});
})();

4. Document对象Hook

// Hook document.cookie
(function() {const originalCookie = Object.getOwnPropertyDescriptor(Document.prototype, 'cookie');Object.defineProperty(Document.prototype, 'cookie', {get: function() {const cookie = originalCookie.get.call(this);console.log('Cookie accessed:', cookie);return cookie;},set: function(value) {console.log('Cookie set:', value);return originalCookie.set.call(this, value);}});
})();// Hook document.referrer
(function() {const originalReferrer = Object.getOwnPropertyDescriptor(Document.prototype, 'referrer');Object.defineProperty(Document.prototype, 'referrer', {get: function() {const referrer = originalReferrer.get.call(this);console.log('Referrer accessed:', referrer);return 'https://www.google.com/';}});
})();

JavaScript引擎Hook

1. Date对象Hook

// Hook Date构造函数
(function() {const originalDate = Date;Date = function(...args) {console.log('Date constructor called with:', args);if (args.length === 0) {// 返回固定时间return new originalDate('2023-01-01T00:00:00.000Z');}return new originalDate(...args);};// 复制静态方法Date.now = originalDate.now;Date.parse = originalDate.parse;Date.UTC = originalDate.UTC;
})();// Hook Date.now()
(function() {const originalNow = Date.now;Date.now = function() {const now = originalNow();console.log('Date.now() called:', now);// 返回固定时间戳return 1672531200000; // 2023-01-01 00:00:00};
})();

2. Math对象Hook

// Hook Math.random()
(function() {const originalRandom = Math.random;Math.random = function() {const random = originalRandom();console.log('Math.random() called:', random);// 返回固定值或修改后的值return 0.5;};
})();// Hook Math.floor()
(function() {const originalFloor = Math.floor;Math.floor = function(x) {const result = originalFloor(x);console.log('Math.floor() called with:', x, 'result:', result);return result;};
})();

3. JSON对象Hook

// Hook JSON.stringify()
(function() {const originalStringify = JSON.stringify;JSON.stringify = function(value, replacer, space) {console.log('JSON.stringify() called with:', value);const result = originalStringify(value, replacer, space);console.log('JSON.stringify() result:', result);return result;};
})();// Hook JSON.parse()
(function() {const originalParse = JSON.parse;JSON.parse = function(text, reviver) {console.log('JSON.parse() called with:', text);const result = originalParse(text, reviver);console.log('JSON.parse() result:', result);return result;};
})();

加密算法Hook

1. Crypto API Hook

// Hook crypto.getRandomValues()
(function() {const originalGetRandomValues = crypto.getRandomValues;crypto.getRandomValues = function(array) {console.log('crypto.getRandomValues() called with:', array);const result = originalGetRandomValues.call(this, array);console.log('crypto.getRandomValues() result:', result);return result;};
})();// Hook crypto.subtle.digest()
(function() {const originalDigest = crypto.subtle.digest;crypto.subtle.digest = function(algorithm, data) {console.log('crypto.subtle.digest() called with:', {algorithm: algorithm,data: data});return originalDigest.call(this, algorithm, data).then(result => {console.log('crypto.subtle.digest() result:', result);return result;});};
})();

2. 常见加密库Hook

// Hook CryptoJS
(function() {if (window.CryptoJS) {// Hook MD5const originalMD5 = CryptoJS.MD5;CryptoJS.MD5 = function(message, options) {console.log('CryptoJS.MD5() called with:', message);const result = originalMD5(message, options);console.log('CryptoJS.MD5() result:', result.toString());return result;};// Hook SHA256const originalSHA256 = CryptoJS.SHA256;CryptoJS.SHA256 = function(message, options) {console.log('CryptoJS.SHA256() called with:', message);const result = originalSHA256(message, options);console.log('CryptoJS.SHA256() result:', result.toString());return result;};// Hook AESconst originalAES = CryptoJS.AES;CryptoJS.AES = {encrypt: function(message, key, options) {console.log('CryptoJS.AES.encrypt() called with:', {message: message,key: key,options: options});const result = originalAES.encrypt(message, key, options);console.log('CryptoJS.AES.encrypt() result:', result.toString());return result;},decrypt: function(ciphertext, key, options) {console.log('CryptoJS.AES.decrypt() called with:', {ciphertext: ciphertext,key: key,options: options});const result = originalAES.decrypt(ciphertext, key, options);console.log('CryptoJS.AES.decrypt() result:', result.toString());return result;}};}
})();

反爬虫检测Hook

1. WebDriver检测Hook

// Hook webdriver属性
(function() {Object.defineProperty(navigator, 'webdriver', {get: function() {console.log('webdriver property accessed');return false; // 返回false表示不是webdriver}});
})();// Hook chrome对象
(function() {if (!window.chrome) {window.chrome = {runtime: {},loadTimes: function() {return {commitLoadTime: 0,connectionInfo: 'h2',finishDocumentLoadTime: 0,finishLoadTime: 0,firstPaintAfterLoadTime: 0,navigationType: 'Other',npnNegotiatedProtocol: 'h2',requestTime: 0,startLoadTime: 0,wasAlternateProtocolAvailable: false,wasFetchedViaSpdy: true,wasNpnNegotiated: true};}};}
})();

2. 指纹检测Hook

// Hook canvas指纹
(function() {const originalToDataURL = HTMLCanvasElement.prototype.toDataURL;HTMLCanvasElement.prototype.toDataURL = function(type, quality) {console.log('Canvas toDataURL() called');const result = originalToDataURL.call(this, type, quality);// 可以在这里修改canvas指纹if (type === 'image/png') {// 返回固定的canvas指纹return '';}return result;};
})();// Hook WebGL指纹
(function() {const originalGetParameter = WebGLRenderingContext.prototype.getParameter;WebGLRenderingContext.prototype.getParameter = function(parameter) {console.log('WebGL getParameter() called with:', parameter);const result = originalGetParameter.call(this, parameter);// 修改某些WebGL参数if (parameter === 37445) { // UNMASKED_VENDOR_WEBGLreturn 'Intel Inc.';}if (parameter === 37446) { // UNMASKED_RENDERER_WEBGLreturn 'Intel Iris OpenGL Engine';}return result;};
})();

3. 行为检测Hook

// Hook鼠标事件
(function() {const originalAddEventListener = EventTarget.prototype.addEventListener;EventTarget.prototype.addEventListener = function(type, listener, options) {console.log('addEventListener() called with:', {type: type,listener: listener,options: options});// 如果是鼠标事件,可以在这里添加随机延迟if (type.startsWith('mouse')) {const originalListener = listener;listener = function(event) {console.log('Mouse event triggered:', type, event);return originalListener.call(this, event);};}return originalAddEventListener.call(this, type, listener, options);};
})();// Hook键盘事件
(function() {const originalKeyEvent = KeyboardEvent.prototype;const originalKeyDown = originalKeyEvent.key;const originalKeyCode = originalKeyEvent.keyCode;Object.defineProperty(originalKeyEvent, 'key', {get: function() {const key = originalKeyDown.get.call(this);console.log('Keyboard key accessed:', key);return key;}});Object.defineProperty(originalKeyEvent, 'keyCode', {get: function() {const keyCode = originalKeyCode.get.call(this);console.log('Keyboard keyCode accessed:', keyCode);return keyCode;}});
})();

实际应用示例

1. 完整的反检测Hook脚本

// 完整的反检测Hook脚本
(function() {'use strict';console.log('Anti-detection hooks loaded');// 1. 隐藏webdriverObject.defineProperty(navigator, 'webdriver', {get: () => false});// 2. 修改userAgentObject.defineProperty(navigator, 'userAgent', {get: () => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'});// 3. 添加chrome对象if (!window.chrome) {window.chrome = {runtime: {},loadTimes: () => ({commitLoadTime: 0,connectionInfo: 'h2',finishDocumentLoadTime: 0,finishLoadTime: 0,firstPaintAfterLoadTime: 0,navigationType: 'Other',npnNegotiatedProtocol: 'h2',requestTime: 0,startLoadTime: 0,wasAlternateProtocolAvailable: false,wasFetchedViaSpdy: true,wasNpnNegotiated: true})};}// 4. Hook canvas指纹const originalToDataURL = HTMLCanvasElement.prototype.toDataURL;HTMLCanvasElement.prototype.toDataURL = function(type, quality) {if (type === 'image/png') {return '';}return originalToDataURL.call(this, type, quality);};// 5. Hook WebGL指纹const originalGetParameter = WebGLRenderingContext.prototype.getParameter;WebGLRenderingContext.prototype.getParameter = function(parameter) {if (parameter === 37445) return 'Intel Inc.';if (parameter === 37446) return 'Intel Iris OpenGL Engine';return originalGetParameter.call(this, parameter);};// 6. Hook Math.randomconst originalRandom = Math.random;Math.random = function() {const random = originalRandom();// 可以在这里添加随机性return random;};// 7. Hook Date.nowconst originalNow = Date.now;Date.now = function() {const now = originalNow();// 可以在这里添加时间偏移return now;};console.log('Anti-detection hooks completed');
})();

2. 加密参数Hook脚本

// 加密参数Hook脚本
(function() {'use strict';console.log('Encryption parameter hooks loaded');// Hook CryptoJSif (window.CryptoJS) {const originalMD5 = CryptoJS.MD5;CryptoJS.MD5 = function(message, options) {console.log('MD5 input:', message);const result = originalMD5(message, options);console.log('MD5 output:', result.toString());return result;};const originalSHA256 = CryptoJS.SHA256;CryptoJS.SHA256 = function(message, options) {console.log('SHA256 input:', message);const result = originalSHA256(message, options);console.log('SHA256 output:', result.toString());return result;};}// Hook crypto APIconst originalGetRandomValues = crypto.getRandomValues;crypto.getRandomValues = function(array) {console.log('getRandomValues input:', array);const result = originalGetRandomValues.call(this, array);console.log('getRandomValues output:', result);return result;};// Hook JSON.stringifyconst originalStringify = JSON.stringify;JSON.stringify = function(value, replacer, space) {console.log('JSON.stringify input:', value);const result = originalStringify(value, replacer, space);console.log('JSON.stringify output:', result);return result;};console.log('Encryption parameter hooks completed');
})();

3. 网络请求Hook脚本

// 网络请求Hook脚本
(function() {'use strict';console.log('Network request hooks loaded');// Hook XMLHttpRequestconst originalOpen = XMLHttpRequest.prototype.open;const originalSend = XMLHttpRequest.prototype.send;XMLHttpRequest.prototype.open = function(method, url, async, user, password) {console.log('XHR Open:', {method, url, async, user, password});return originalOpen.call(this, method, url, async, user, password);};XMLHttpRequest.prototype.send = function(data) {console.log('XHR Send:', data);return originalSend.call(this, data);};// Hook fetchconst originalFetch = window.fetch;window.fetch = function(url, options) {console.log('Fetch Request:', {url, options});return originalFetch.call(this, url, options).then(response => {console.log('Fetch Response:', response);return response;});};// Hook axiosif (window.axios) {window.axios.interceptors.request.use(function(config) {console.log('Axios Request:', config);return config;});window.axios.interceptors.response.use(function(response) {console.log('Axios Response:', response);return response;});}console.log('Network request hooks completed');
})();

Hook工具和框架

1. 浏览器扩展

  • Tampermonkey: 用户脚本管理器
  • Greasemonkey: Firefox用户脚本管理器
  • Violentmonkey: 现代化的用户脚本管理器

2. 代理工具

  • Fiddler: 网络调试代理
  • Charles: 网络代理工具
  • Burp Suite: Web应用安全测试工具

3. 浏览器自动化

  • Puppeteer: Node.js浏览器自动化
  • Selenium: 浏览器自动化框架
  • Playwright: 现代化的浏览器自动化

4. 移动端Hook

  • Frida: 动态插桩工具
  • Xposed: Android框架Hook
  • Substrate: iOS越狱Hook框架

本文档提供了爬虫中常用的Hook技术和勾子函数,建议在实际使用中根据具体需求进行调整和优化。请注意遵守相关法律法规和网站使用条款。

http://www.xdnf.cn/news/17689.html

相关文章:

  • 快速了解DBSCAN算法
  • Vue.js设计于实现 - 响应式(三)
  • 音视频学习(五十二):ADTS
  • Graham 算法求二维凸包
  • Python 2025:最新技术趋势与展望
  • 每日五个pyecharts可视化图表-line:从入门到精通 (2)
  • lesson34:深入理解Python线程:从基础到实战优化
  • jupyter notebook如何打开其他盘目录
  • MCP学习与实践
  • [激光原理与应用-222]:机械 - 3D设计与2D设计的异同比较
  • Linux 虚拟机磁盘空间占满-全面清理方案
  • Cesium1.95中如何高效管理 1500 个高频实体
  • 赋值运算符指南
  • 代码可读性与维护性的实践与原则
  • word中,添加新的参考文献后,其他参考文献的交叉引用不能及时更新的解决办法
  • 《Webpack与Vite热模块替换机制深度剖析与策略抉择》
  • 二维前缀和问题
  • 如何在 Ubuntu 24.04 LTS Linux 上安装 MySQL 服务器
  • 电脑本地摄像头做成rtsp流调用测试windows系统中
  • 【大智慧数据】心智开花的时候
  • AI测试助手如何让Bug无处可藏
  • Dify 从入门到精通(第 26/100 篇):Dify 的知识图谱集成
  • 2025最新免费的大模型和免费的大模型API有哪些?(202508更新)
  • 2025年6月电子学会全国青少年软件编程等级考试(Python二级)真题及答案
  • 【Linux指南】Vim的全面解析与深度应用
  • C语言第八章指针四
  • 【接口自动化】初识pytest,一文讲解pytest的安装,识别规则以及配置文件的使用
  • Jotai:React轻量级状态管理新选择
  • Code Exercising Day 10 of “Code Ideas Record“:StackQueue part02
  • SQL三剑客:DELETE、TRUNCATE、DROP全解析