前端视频流处理从 0 到 “能跑”:可复制 Demo+WebGL/Worker 优化,覆盖会议 / 直播 / 监控场景
摘要(介绍目前的背景和现状)
视频已经成了大多数应用的“标配”——直播、会议、短视频、监控、互动课堂都离不开它。前端这边,浏览器提供了 <video>
、<canvas>
、WebGL、WebCodecs、WebRTC、Web Workers、OffscreenCanvas 等一堆能力,用好了可以直接在用户设备上做视频帧级处理,少丢帧、少占 CPU,还能把延迟压到很低。本文从“能跑”的小 Demo 入手,把常见的优化点和实战场景串起来。
引言(介绍目前的发展情况和场景应用)
过去做视频处理更多在服务端,现在越来越多逻辑前移:背景虚化、美颜、实时贴纸、运动检测、带宽自适应、端侧防抖降噪……这些都可以在浏览器侧搞定。核心套路就是:用 <video>
解码或播放 → 把帧画到 <canvas>
或 OffscreenCanvas → 用 2D/GL/Shaders/WebAssembly 做像素级处理 → 再以 canvas.captureStream()
或 MediaStreamTrackProcessor
输出到后续链路(显示、录制、推流)。
总体思路(从零到能跑)
基础抓流与逐帧处理
思路很直接:getUserMedia
拿到摄像头流,<video>
播放,<canvas>
每帧绘制,然后在绘制后做像素处理。现代浏览器还支持 requestVideoFrameCallback
,比 requestAnimationFrame
更适合视频帧节奏。
代码示例(基础 Demo,可直接保存为 html 运行)
<!doctype html>
<html lang="zh">
<head><meta charset="utf-8" /><title>前端视频流基础处理 Demo</title><style>body { font-family: ui-sans-serif, system-ui; margin: 24px; display: grid; gap: 16px; }.row { display: grid; grid-template-columns: 1fr 1fr; gap: 16px; align-items: start; }video, canvas { width: 100%; background: #111; border-radius: 12px; }.controls { display: flex; gap: 12px; align-items: center; flex-wrap: wrap; }label { display: flex; gap: 6px; align-items: center; }input[type="range"] { width: 200px; }</style>
</head>
<body><h1>前端视频流处理:Canvas + 每帧像素操作</h1><div class="controls"><button id="start">开始捕获</button><label>灰度 <input id="grayscale" type="range" min="0" max="1" step="0.01" value="0"></label><label>对比度 <input id="contrast" type="range" min="-100" max="100" step="1" value="0"></label><label>阈值 <input id="threshold" type="range" min="0" max="255" step="1" value="0"></label></div><div class="row"><div><h3>原始视频</h3><video id="video" playsinline muted></video></div><div><h3>处理后画面(Canvas)</h3><canvas id="canvas"></canvas></div></div><script type="module">const video = document.getElementById('video');const canvas = document.getElementById('canvas');const ctx = canvas.getContext('2d', { willReadFrequently: true });const startBtn = document.getElementById('start');const grayscale = document.getElementById('grayscale');const contrast = document.getElementById('contrast');const threshold = document.getElementById('threshold');let animationStopper = null;async function start() {const stream = await navigator.mediaDevices.getUserMedia({ video: { width: 1280, height: 720 }, audio: false });video.srcObject = stream;await video.play();resize();runLoop();}function resize() {const w = video.videoWidth || 640;const h = video.videoHeight || 480;canvas.width = w;canvas.height = h;}function applyPixelEffects(imageData) {const { data } = imageData;const g = parseFloat(grayscale.value);const c = parseInt(contrast.value, 10);const t = parseInt(threshold.value, 10);// 预计算对比度系数const factor = (259 * (c + 255)) / (255 * (259 - c));for (let i = 0; i < data.length; i += 4) {let r = data[i], gCh = data[i + 1], b = data[i + 2];// 灰度混合if (g > 0) {const gray = 0.2126 * r + 0.7152 * gCh + 0.0722 * b;r = r + (gray - r) * g;gCh = gCh + (gray - gCh) * g;b = b + (gray - b) * g;}// 对比度r = factor * (r - 128) + 128;gCh = factor * (gCh - 128) + 128;b = factor * (b - 128) + 128;// 简单阈值化if (t > 0) {const v = (r + gCh + b) / 3 >= t ? 255 : 0;r = gCh = b = v;}data[i] = r; data[i + 1] = gCh; data[i + 2] = b;}return imageData;}function drawOnce() {ctx.drawImage(video, 0, 0, canvas.width, canvas.height);const frame = ctx.getImageData(0, 0, canvas.width, canvas.height);const processed = applyPixelEffects(frame);ctx.putImageData(processed, 0, 0);}function runLoop() {// 优先用 rVFC,退化到 rAFif ('requestVideoFrameCallback' in HTMLVideoElement.prototype) {const cb = () => {drawOnce();animationStopper = video.requestVideoFrameCallback(cb);};animationStopper = video.requestVideoFrameCallback(cb);} else {const rafLoop = () => {drawOnce();animationStopper = requestAnimationFrame(rafLoop);};animationStopper = requestAnimationFrame(rafLoop);}}startBtn.addEventListener('click', start);window.addEventListener('resize', resize);</script>
</body>
</html>
用到的关键点:
1)willReadFrequently: true
减少 2D 上下文的内存拷贝开销;
2)requestVideoFrameCallback
对齐解码帧节奏;
3)像素循环里尽量少创建对象,避免 GC 抖动。
进阶路线(WebGL / OffscreenCanvas / WebRTC)
用 WebGL 做滤镜(Shader 处理,显卡加速)
CPU 做像素循环到 1080p 就有点吃力了。把滤镜搬到 GPU 上,延迟和功耗会好很多。下面用一个 fragment shader 做高斯近似模糊 + 色调调节的最小可运行例子。
代码示例(WebGL 滤镜,独立页面可运行)
<!doctype html>
<html lang="zh">
<head><meta charset="utf-8" /><title>WebGL 滤镜 Demo</title><style>body { font-family: ui-sans-serif, system-ui; margin: 24px; display: grid; gap: 16px; }canvas, video { width: 100%; border-radius: 12px; background: #111; }.grid { display: grid; grid-template-columns: 1fr 1fr; gap: 16px; }.controls { display: flex; gap: 12px; align-items: center; flex-wrap: wrap; }</style>
</head>
<body><h1>WebGL 实时滤镜:模糊 + 色调</h1><div class="controls"><button id="start">开始捕获</button><label>模糊强度 <input id="blur" type="range" min="0" max="5" step="0.1" value="1.0"></label><label>色调偏移 <input id="hue" type="range" min="-3.14" max="3.14" step="0.01" value="0.0"></label></div><div class="grid"><video id="video" playsinline muted></video><canvas id="glcanvas"></canvas></div><script type="module">const startBtn = document.getElementById('start');const blurInput = document.getElementById('blur');const hueInput = document.getElementById('hue');const video = document.getElementById('video');const canvas = document.getElementById('glcanvas');/** @type {WebGLRenderingContext} */let gl, program, tex, buffer, u_res, u_blur, u_hue;const vs = `attribute vec2 a_pos;varying vec2 v_uv;void main() {v_uv = (a_pos + 1.0) * 0.5;gl_Position = vec4(a_pos, 0.0, 1.0);}`;const fs = `precision mediump float;varying vec2 v_uv;uniform sampler2D u_tex;uniform vec2 u_res;uniform float u_blur;uniform float u_hue;vec3 rgb2hsv(vec3 c) {vec4 K = vec4(0.0, -1.0/3.0, 2.0/3.0, -1.0);vec4 p = mix(vec4(c.bg, K.wz), vec4(c.gb, K.xy), step(c.b, c.g));vec4 q = mix(vec4(p.xyw, c.r), vec4(c.r, p.yzx), step(p.x, c.r));float d = q.x - min(q.w, q.y);float e = 1.0e-10;return vec3(abs(q.z + (q.w - q.y) / (6.0 * d + e)), d / (q.x + e), q.x);}vec3 hsv2rgb(vec3 c) {vec3 rgb = clamp(abs(mod(c.x * 6.0 + vec3(0.0, 4.0, 2.0), 6.0) - 3.0) - 1.0,0.0, 1.0);return c.z * mix(vec3(1.0), rgb, c.y);}void main() {vec2 px = 1.0 / u_res;float b = u_blur;// 3x3 近似高斯vec4 sum = vec4(0.0);sum += texture2D(u_tex, v_uv + vec2(-px.x, -px.y)) * 0.0625 * b;sum += texture2D(u_tex, v_uv + vec2( 0.0 , -px.y)) * 0.125 * b;sum += texture2D(u_tex, v_uv + vec2( px.x, -px.y)) * 0.0625 * b;sum += texture2D(u_tex, v_uv + vec2(-px.x, 0.0 )) * 0.125 * b;sum += texture2D(u_tex, v_uv) * (1.0 - 0.625 * b);sum += texture2D(u_tex, v_uv + vec2( px.x, 0.0 )) * 0.125 * b;sum += texture2D(u_tex, v_uv + vec2(-px.x, px.y)) * 0.0625 * b;sum += texture2D(u_tex, v_uv + vec2( 0.0 , px.y)) * 0.125 * b;sum += texture2D(u_tex, v_uv + vec2( px.x, px.y)) * 0.0625 * b;vec3 col = sum.rgb;vec3 hsv = rgb2hsv(col);hsv.x = fract(hsv.x + u_hue / 6.2831853); // hue 偏移col = hsv2rgb(hsv);gl_FragColor = vec4(col, 1.0);}`;async function start() {const stream = await navigator.mediaDevices.getUserMedia({ video: { width: 1280, height: 720 }, audio: false });video.srcObject = stream;await video.play();canvas.width = video.videoWidth;canvas.height = video.videoHeight;gl = canvas.getContext('webgl', { preserveDrawingBuffer: false });const vShader = gl.createShader(gl.VERTEX_SHADER);gl.shaderSource(vShader, vs); gl.compileShader(vShader);const fShader = gl.createShader(gl.FRAGMENT_SHADER);gl.shaderSource(fShader, fs); gl.compileShader(fShader);program = gl.createProgram();gl.attachShader(program, vShader); gl.attachShader(program, fShader);gl.linkProgram(program); gl.useProgram(program);// 全屏三角形buffer = gl.createBuffer();gl.bindBuffer(gl.ARRAY_BUFFER, buffer);gl.bufferData(gl.ARRAY_BUFFER, new Float32Array([-1,-1, 3,-1, -1,3]), gl.STATIC_DRAW);const a_pos = gl.getAttribLocation(program, 'a_pos');gl.enableVertexAttribArray(a_pos);gl.vertexAttribPointer(a_pos, 2, gl.FLOAT, false, 0, 0);// 纹理tex = gl.createTexture();gl.bindTexture(gl.TEXTURE_2D, tex);gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_MIN_FILTER, gl.LINEAR);gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_MAG_FILTER, gl.LINEAR);gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_WRAP_S, gl.CLAMP_TO_EDGE);gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_WRAP_T, gl.CLAMP_TO_EDGE);u_res = gl.getUniformLocation(program, 'u_res');u_blur = gl.getUniformLocation(program, 'u_blur');u_hue = gl.getUniformLocation(program, 'u_hue');draw();}function draw() {gl.viewport(0, 0, canvas.width, canvas.height);gl.uniform2f(u_res, canvas.width, canvas.height);gl.uniform1f(u_blur, parseFloat(blurInput.value));gl.uniform1f(u_hue, parseFloat(hueInput.value));// 把 video 作为纹理源上传gl.bindTexture(gl.TEXTURE_2D, tex);gl.texImage2D(gl.TEXTURE_2D, 0, gl.RGBA, gl.RGBA, gl.UNSIGNED_BYTE, video);gl.drawArrays(gl.TRIANGLES, 0, 3);if ('requestVideoFrameCallback' in HTMLVideoElement.prototype) {video.requestVideoFrameCallback(draw);} else {requestAnimationFrame(draw);}}startBtn.addEventListener('click', start);</script>
</body>
</html>
核心优化:视频帧直接作为纹理上传到 GPU;用全屏三角形减少顶点开销;参数放 uniform,避免频繁重编译 shader。
把处理放到 Worker(OffscreenCanvas 解放主线程)
当你要在画布上做复杂计算,又不想卡住主线程交互,可以用 OffscreenCanvas
+ Worker
。主线程只负责拿视频帧并把 ImageBitmap
发给 Worker,Worker 里画到 OffscreenCanvas
并处理,再把结果回传或直接 transferControlToOffscreen
绑定到 UI。
代码示例(主线程 + Worker 分工)
main.html
<!doctype html>
<meta charset="utf-8" />
<title>OffscreenCanvas + Worker</title>
<style> video, canvas { width: 48%; border-radius: 12px; background:#111; } body{font-family:ui-sans-serif;display:flex;gap:12px;padding:16px;flex-wrap:wrap;} </style>
<button id="start">开始</button>
<video id="video" playsinline muted></video>
<canvas id="out"></canvas>
<script type="module">const btn = document.getElementById('start');const video = document.getElementById('video');const out = document.getElementById('out');const worker = new Worker('./worker.js', { type: 'module' });btn.onclick = async () => {const stream = await navigator.mediaDevices.getUserMedia({ video: { width: 1280, height: 720 }, audio: false });video.srcObject = stream;await video.play();out.width = video.videoWidth; out.height = video.videoHeight;const off = out.transferControlToOffscreen();worker.postMessage({ type: 'init', canvas: off, w: out.width, h: out.height }, [off]);const pump = async () => {// 从视频抓一帧为 ImageBitmap,零拷贝传 Workerconst bmp = await createImageBitmap(video);worker.postMessage({ type: 'frame', bmp }, [bmp]);if ('requestVideoFrameCallback' in HTMLVideoElement.prototype) {video.requestVideoFrameCallback(pump);} else {requestAnimationFrame(pump);}};pump();};
</script>
worker.js
let ctx, w, h;
self.onmessage = (e) => {const { type } = e.data;if (type === 'init') {const { canvas, w: W, h: H } = e.data;w = W; h = H;ctx = canvas.getContext('2d', { willReadFrequently: true });} else if (type === 'frame') {const { bmp } = e.data;ctx.drawImage(bmp, 0, 0, w, h);const img = ctx.getImageData(0, 0, w, h);// 简单边缘增强(拉普拉斯卷积)const k = [0,-1,0,-1,5,-1,0,-1,0];const src = img.data;const out = new Uint8ClampedArray(src.length);const w4 = w*4;for (let y=1; y<h-1; y++){for (let x=1; x<w-1; x++){let r=0,g=0,b=0;let i=0;for (let ky=-1; ky<=1; ky++){for (let kx=-1; kx<=1; kx++){const p = ((y+ky)*w + (x+kx)) * 4;const kv = k[i++];r += src[p] * kv;g += src[p+1] * kv;b += src[p+2] * kv;}}const o = (y*w + x) * 4;out[o] = Math.min(255, Math.max(0, r));out[o+1] = Math.min(255, Math.max(0, g));out[o+2] = Math.min(255, Math.max(0, b));out[o+3] = 255;}}img.data.set(out);ctx.putImageData(img, 0, 0);bmp.close && bmp.close();}
};
关键点:
createImageBitmap(video)
结合postMessage(..., [bitmap])
零拷贝传输;Worker 侧用OffscreenCanvas
绘制,不阻塞 UI。
输出处理后的视频流(用于录制/推流/连麦)
把处理好的画面“再变回”媒体流:用 canvas.captureStream()
得到 MediaStream
,可以送到 MediaRecorder
录制,或者喂给 WebRTC 的 RTCPeerConnection 作为视频轨。
代码示例(处理 → 录制 WebM 文件)
<!doctype html>
<meta charset="utf-8" />
<title>处理后视频录制</title>
<button id="start">开始摄像头</button>
<button id="rec">开始录制</button>
<button id="stop">停止录制</button>
<video id="preview" playsinline muted></video>
<canvas id="canvas" style="width:480px;border-radius:8px;background:#111"></canvas>
<a id="dl" download="processed.webm" style="display:none">下载</a>
<script>const v = document.getElementById('preview');const c = document.getElementById('canvas');const ctx = c.getContext('2d', { willReadFrequently: true });const btnStart = document.getElementById('start');const btnRec = document.getElementById('rec');const btnStop = document.getElementById('stop');const dl = document.getElementById('dl');let mediaRecorder, chunks = [];btnStart.onclick = async () => {const s = await navigator.mediaDevices.getUserMedia({ video: { width: 640, height: 360 }, audio: false });v.srcObject = s; await v.play();c.width = v.videoWidth; c.height = v.videoHeight;const draw = () => {ctx.drawImage(v, 0, 0, c.width, c.height);const id = ctx.getImageData(0, 0, c.width, c.height);// 反相小效果for (let i=0;i<id.data.length;i+=4){ id.data[i]^=255; id.data[i+1]^=255; id.data[i+2]^=255; }ctx.putImageData(id, 0, 0);if ('requestVideoFrameCallback' in HTMLVideoElement.prototype) v.requestVideoFrameCallback(draw);else requestAnimationFrame(draw);};draw();};btnRec.onclick = () => {const processedStream = c.captureStream(30); // 30fpsmediaRecorder = new MediaRecorder(processedStream, { mimeType: 'video/webm;codecs=vp9' });chunks = [];mediaRecorder.ondataavailable = e => e.data.size && chunks.push(e.data);mediaRecorder.onstop = () => {const blob = new Blob(chunks, { type: 'video/webm' });dl.href = URL.createObjectURL(blob);dl.style.display = 'inline-block';dl.textContent = '下载处理后视频';};mediaRecorder.start(100);};btnStop.onclick = () => mediaRecorder && mediaRecorder.stop();
</script>
实战场景(2–3 个常见落地)
场景一:在线会议的“背景虚化/替换”
目标:不依赖服务端,浏览器本地做人像分割,背景虚化或替换。
实现要点:用 WebGL + 轻量人体分割模型(如 MediaPipe Selfie Segmentation / TensorFlow.js);分割 mask 作为纹理混合。下面给出不依赖外部模型的“占位”版流程(可先跑通管线,后续替换为真实模型输出):
代码示例(流程占位,mask 用阈值假装)
// 已有 video -> glcanvas 管线(见上文 WebGL Demo)
/*** 假装的人像分割 mask:这里用亮度阈值模拟(真实项目用模型输出)* 返回 [0,1] 的透明度值*/
function fakeSegMask(x, y, w, h, frameData) {const i = (y*w + x)*4;const r = frameData[i], g = frameData[i+1], b = frameData[i+2];const lum = 0.2126*r + 0.7152*g + 0.0722*b;return lum > 110 ? 1.0 : 0.0;
}
实际项目里,把 mask
做成单通道纹理上传到 GPU,然后在 fragment shader 里:
vec4 fg = texture2D(u_tex, v_uv); // 原始人像
vec4 bg = texture2D(u_bg, v_uv); // 模糊背景或替换背景
float m = texture2D(u_mask, v_uv).r; // 分割 mask
gl_FragColor = mix(bg, fg, m);
细节建议:
- 模型推理放 Worker(或 WASM 后端),避免卡 UI;
- mask 先用小分辨率做(比如 256p),再双线性放大;
- 背景模糊可在 GPU 上进行(多次 separable blur);
- 输出用
canvas.captureStream()
接到 WebRTC 轨,会议 SDK 当普通摄像头用。
场景二:弱网直播的自适应预处理(降分辨率 + 降帧 + 降噪)
目标:在码率受限时,先在前端把源视频做预处理,减小编码压力。
实现要点:根据带宽估计(或 RTCPeerConnection 的 stats)动态调整 drawImage
的目标尺寸;间隔丢帧;可叠加轻度降噪(如均值/中值滤波)。
代码示例(根据带宽切换画布分辨率)
let targetW = 1280, targetH = 720;
const canvas = document.querySelector('canvas');
const ctx = canvas.getContext('2d');function setResolutionByKbps(kbps) {if (kbps < 600) { targetW = 640; targetH = 360; }else if (kbps < 1200) { targetW = 960; targetH = 540; }else { targetW = 1280; targetH = 720; }canvas.width = targetW; canvas.height = targetH;
}// 假设你能拿到估算带宽 estimateKbps(来自 WebRTC stats 或自定义测量)
setResolutionByKbps(800); // 示例:初始 800kbpslet frameCount = 0, dropEvery = 2; // 丢帧策略:每 2 帧丢 1
function loop() {frameCount++;if (frameCount % dropEvery !== 0) { // 保留的帧ctx.drawImage(video, 0, 0, targetW, targetH);// 可在这里做轻量去噪(均值滤波核 3x3)}requestAnimationFrame(loop);
}
loop();
细节建议:
- 结合
RTCPeerConnection.getStats()
的outbound-rtp
丢包/重传来动态调dropEvery
; - 分辨率变化不要太频繁,设置最小保持时间,避免画面抖动;
- 编码器若支持(WebCodecs),直接控制编码参数(码率、关键帧间隔)。
场景三:安防/运动检测(低功耗、边缘计算)
目标:不把全量视频传到云上,端上做运动检测,只有触发时再上报。
实现要点:降采样 + 帧差法;只对 ROI(关注区域)做计算;触发后截图上传或推送事件。
代码示例(帧差 + 阈值 + ROI)
const w = 320, h = 180;
canvas.width = w; canvas.height = h;
let lastFrame; // Uint8ClampedArray 灰度function toGray(data) {const out = new Uint8ClampedArray(data.length / 4);for (let i=0, j=0; i<data.length; i+=4, j++) {out[j] = (data[i]*0.2126 + data[i+1]*0.7152 + data[i+2]*0.0722)|0;}return out;
}function diffCount(a, b, thresh, roi) {let cnt = 0;const [x0,y0,x1,y1] = roi; // [0,0,w,h] 全图for (let y=y0; y<y1; y++){for (let x=x0; x<x1; x++){const i = y*w + x;if (Math.abs(a[i] - b[i]) > thresh) cnt++;}}return cnt;
}function loop() {ctx.drawImage(video, 0, 0, w, h);const id = ctx.getImageData(0, 0, w, h);const gray = toGray(id.data);if (lastFrame) {const changes = diffCount(gray, lastFrame, 25, [40,30,w-40,h-30]);if (changes > 1200) {// 触发告警:拍一张图或发送事件canvas.toBlob(b => uploadAlert(b));}}lastFrame = gray;requestAnimationFrame(loop);
}
loop();function uploadAlert(blob) {// fetch('/alert', { method:'POST', body: blob }) ...
}
细节建议:
- 在 Worker 做帧差,主线程只收结果;
- 降采样能大幅降功耗,180p 就够用;
- 叠加形态学操作(开/闭)可降低噪点误报。
QA 环节(常见问题)
Q1:低端设备上 1080p 还是卡,怎么办?
A:优先用 WebGL/GPU 管线;开启 requestVideoFrameCallback
;把复杂像素计算放 Worker;降分辨率 + 降帧 + 降噪;避免频繁 getImageData/putImageData
,能纹理就不要读回 CPU。
Q2:iOS 上自动播放失败?黑屏?
A:必须加 playsinline
,并且通常需要用户手势触发 video.play()
;如果是 HLS 远端流还要注意 CORS 和 MSE/HLS 的兼容策略。
Q3:canvas.captureStream()
录制报错或无声?
A:多数浏览器只给视频轨,音频要从原始 MediaStream
里 addTrack
合并;MediaRecorder
的 mimeType
要实际支持,建议用类型检测。
Q4:WebCodecs 能提升多少?值得上吗?
A:如果目标浏览器支持,WebCodecs 直接与解码/编码器打交道,延迟和可控性都更好,做低延时直播或连麦很香;但要有 Polyfill/降级路径。
Q5:OffscreenCanvas 不支持怎么办?
A:降级为主线程 Canvas 或者使用 WebGL + 小工作量 shader;也可以把计算放到 Worker,但绘制仍在主线程完成。
Q6:跨域视频画不上 Canvas?
A:视频资源需要正确的 CORS 头(Access-Control-Allow-Origin
)并在 <video crossOrigin="anonymous">
,否则读像素会被“污染”。
总结
前端做视频流处理的基本盘就是:拿到流 → 对齐帧 → Canvas/WebGL 处理 → 可选 Worker/WASM 优化 → 按需输出(显示、录制、推流)。
当分辨率上来或算法复杂时,优先把重活交给 GPU(WebGL/WebGPU)和后台线程(Worker/OffscreenCanvas);要传出去就用 canvas.captureStream()
或 WebCodecs 直接编码。工程上再补充带宽自适应、降噪、丢帧和 ROI,基本就能覆盖大多数直播、会议、监控的端侧需求。