name: web-audio-api description: 用于JARVIS音频反馈和语音处理的Web Audio API model: sonnet risk_level: 低 version: 1.0.0
Web Audio API 技能
1. 概述
此技能提供Web Audio API专业知识,用于在JARVIS AI助手中创建音频反馈、语音处理和音效。
风险等级:低 - 音频处理,安全表面积最小
主要用例:
- HUD音频反馈(哔哔声、警报)
- 语音输入处理
- 3D HUD元素的空间音频
- 实时音频可视化
- 文本到语音集成
2. 核心职责
2.1 基本原则
- 测试驱动开发优先:为所有音频组件先写测试再实现
- 性能意识:优化以实现60fps和最小音频延迟
- 需要用户手势:音频上下文必须在用户交互后启动
- 资源清理:在卸载时关闭音频上下文并断开节点连接
- AudioWorklet用于处理:使用AudioWorklet进行重型DSP操作
- 可访问性:提供音频反馈的视觉替代方案
- 音量控制:尊重系统和用户音量偏好
- 错误处理:优雅处理音频权限拒绝
3. 技术栈与版本
3.1 浏览器支持
| 浏览器 | AudioContext | AudioWorklet |
|---|---|---|
| Chrome | 35+ | 66+ |
| Firefox | 25+ | 76+ |
| Safari | 14.1+ | 14.1+ |
3.2 TypeScript类型
// types/audio.ts
interface AudioFeedbackOptions {
frequency: number
duration: number
type: OscillatorType
volume: number
}
interface SpatialAudioPosition {
x: number
y: number
z: number
}
4. 实现模式
4.1 音频上下文管理
// composables/useAudioContext.ts
export function useAudioContext() {
const audioContext = ref<AudioContext | null>(null)
const isInitialized = ref(false)
async function initialize() {
if (audioContext.value) return
audioContext.value = new AudioContext()
if (audioContext.value.state === 'suspended') await audioContext.value.resume()
isInitialized.value = true
}
onUnmounted(() => {
audioContext.value?.close()
audioContext.value = null
})
return { audioContext: readonly(audioContext), isInitialized: readonly(isInitialized), initialize }
}
4.2 HUD哔哔声反馈
// composables/useHUDSounds.ts
export function useHUDSounds() {
const { audioContext, initialize } = useAudioContext()
async function playBeep(options: Partial<AudioFeedbackOptions> = {}) {
await initialize()
const ctx = audioContext.value
if (!ctx) return
const { frequency = 440, duration = 0.1, type = 'sine', volume = 0.3 } = options
const safeVolume = Math.max(0, Math.min(1, volume))
const oscillator = ctx.createOscillator()
const gainNode = ctx.createGain()
oscillator.type = type
oscillator.frequency.value = frequency
gainNode.gain.value = safeVolume
gainNode.gain.exponentialRampToValueAtTime(0.001, ctx.currentTime + duration)
oscillator.connect(gainNode).connect(ctx.destination)
oscillator.start()
oscillator.stop(ctx.currentTime + duration)
}
const sounds = {
confirm: () => playBeep({ frequency: 880, duration: 0.1, volume: 0.2 }),
alert: () => playBeep({ frequency: 440, duration: 0.3, type: 'square', volume: 0.4 }),
error: () => playBeep({ frequency: 220, duration: 0.5, type: 'sawtooth', volume: 0.3 }),
click: () => playBeep({ frequency: 1000, duration: 0.05, volume: 0.1 })
}
return { playBeep, sounds }
}
4.3 音频可视化
// composables/useAudioVisualization.ts
export function useAudioVisualization() {
const { audioContext, initialize } = useAudioContext()
let analyser: AnalyserNode | null = null
let dataArray: Uint8Array | null = null
async function setupAnalyser(source: AudioNode) {
await initialize()
const ctx = audioContext.value
if (!ctx) return
analyser = ctx.createAnalyser()
analyser.fftSize = 256
dataArray = new Uint8Array(analyser.frequencyBinCount)
source.connect(analyser)
}
function getFrequencyData(): Uint8Array | null {
if (!analyser || !dataArray) return null
analyser.getByteFrequencyData(dataArray)
return dataArray
}
return { setupAnalyser, getFrequencyData }
}
4.4 3D HUD的空间音频
// composables/useSpatialAudio.ts
export function useSpatialAudio() {
const { audioContext, initialize } = useAudioContext()
let panner: PannerNode | null = null
async function createSpatialSource(position: SpatialAudioPosition) {
await initialize()
const ctx = audioContext.value
if (!ctx) return null
panner = ctx.createPanner()
panner.panningModel = 'HRTF'
panner.distanceModel = 'inverse'
setPosition(position)
return panner
}
function setPosition(pos: SpatialAudioPosition) {
if (!panner) return
panner.positionX.value = pos.x
panner.positionY.value = pos.y
panner.positionZ.value = pos.z
}
return { createSpatialSource, setPosition }
}
4.5 麦克风输入
// composables/useMicrophone.ts
export function useMicrophone() {
const { audioContext, initialize } = useAudioContext()
const stream = ref<MediaStream | null>(null)
const isListening = ref(false)
const error = ref<string | null>(null)
async function startListening() {
try {
await initialize()
stream.value = await navigator.mediaDevices.getUserMedia({
audio: { echoCancellation: true, noiseSuppression: true, autoGainControl: true }
})
isListening.value = true
return stream.value
} catch (err) {
error.value = err instanceof Error ? err.message : '麦克风访问被拒绝'
return null
}
}
function stopListening() {
stream.value?.getTracks().forEach(track => track.stop())
stream.value = null
isListening.value = false
}
onUnmounted(() => stopListening())
return { stream: readonly(stream), isListening: readonly(isListening), error: readonly(error), startListening, stopListening }
}
5. 实现工作流(测试驱动开发)
步骤1:先写失败测试
// tests/composables/useHUDSounds.test.ts
import { describe, it, expect, vi, beforeEach } from 'vitest'
import { useHUDSounds } from '~/composables/useHUDSounds'
// 模拟AudioContext节点
const mockOscillator = { connect: vi.fn(), start: vi.fn(), stop: vi.fn(), frequency: { value: 440 } }
const mockGainNode = { connect: vi.fn(), gain: { value: 1, exponentialRampToValueAtTime: vi.fn() } }
const mockAudioContext = {
state: 'running', currentTime: 0, destination: {},
createOscillator: vi.fn(() => mockOscillator),
createGain: vi.fn(() => mockGainNode),
resume: vi.fn(), close: vi.fn()
}
vi.stubGlobal('AudioContext', vi.fn(() => mockAudioContext))
describe('useHUDSounds', () => {
beforeEach(() => vi.clearAllMocks())
it('创建振荡器时使用正确的频率', async () => {
const { playBeep } = useHUDSounds()
await playBeep({ frequency: 880 })
expect(mockOscillator.frequency.value).toBe(880)
})
it('将音量限制在有效范围内', async () => {
const { playBeep } = useHUDSounds()
await playBeep({ volume: 2.5 })
expect(mockGainNode.gain.value).toBeLessThanOrEqual(1)
})
it('以正确顺序连接节点', async () => {
const { playBeep } = useHUDSounds()
await playBeep()
expect(mockOscillator.connect).toHaveBeenCalledWith(mockGainNode)
expect(mockGainNode.connect).toHaveBeenCalledWith(mockAudioContext.destination)
})
})
步骤2:实现最小通过代码
// composables/useHUDSounds.ts
export function useHUDSounds() {
// 从第4.2节实现
// 仅添加测试所需功能
}
步骤3:遵循模式重构
测试通过后,重构以:
- 提取共享音频上下文逻辑
- 添加适当的TypeScript类型
- 实现卸载时的清理
步骤4:运行完整验证
# 运行所有音频相关测试
npm test -- --grep "audio|sound|HUD"
# 检查类型
npm run typecheck
# 在浏览器中验证无内存泄漏
npm run dev # 使用DevTools内存标签手动测试
6. 性能模式
6.1 AudioWorklet用于处理
// ✅ 好:使用AudioWorklet进行DSP(在音频线程上运行)
class NoiseGateProcessor extends AudioWorkletProcessor {
process(inputs: Float32Array[][], outputs: Float32Array[][]) {
for (let ch = 0; ch < inputs[0].length; ch++) {
for (let i = 0; i < inputs[0][ch].length; i++) {
outputs[0][ch][i] = Math.abs(inputs[0][ch][i]) > 0.01 ? inputs[0][ch][i] : 0
}
}
return true
}
}
registerProcessor('noise-gate', NoiseGateProcessor)
// ❌ 坏:ScriptProcessorNode(已弃用,阻塞主线程)
6.2 缓冲池
// ✅ 好:重用音频缓冲器
class AudioBufferPool {
private pool: AudioBuffer[] = []
constructor(ctx: AudioContext, size: number, length: number) {
for (let i = 0; i < size; i++) {
this.pool.push(ctx.createBuffer(2, length, ctx.sampleRate))
}
}
acquire(): AudioBuffer | undefined { return this.pool.pop() }
release(buffer: AudioBuffer) {
for (let ch = 0; ch < buffer.numberOfChannels; ch++) {
buffer.getChannelData(ch).fill(0)
}
this.pool.push(buffer)
}
}
// ❌ 坏:每次创建新缓冲器
const buffer = ctx.createBuffer(2, 44100, 44100) // 每次调用都分配内存
6.3 离线渲染
// ✅ 好:预渲染复杂声音
async function prerenderSound(): Promise<AudioBuffer> {
const offlineCtx = new OfflineAudioContext(2, 44100, 44100)
const osc = offlineCtx.createOscillator()
const gain = offlineCtx.createGain()
osc.connect(gain).connect(offlineCtx.destination)
gain.gain.setValueAtTime(0, 0)
gain.gain.linearRampToValueAtTime(1, 0.01)
gain.gain.exponentialRampToValueAtTime(0.001, 1)
osc.start(); osc.stop(1)
return offlineCtx.startRendering()
}
// ❌ 坏:实时生成复杂声音(多个振荡器实时计算)
6.4 节点图优化
// ✅ 好:重用主增益节点
const masterGain = ctx.createGain()
masterGain.connect(ctx.destination)
function playSound(buffer: AudioBuffer) {
const source = ctx.createBufferSource()
source.buffer = buffer
source.connect(masterGain)
source.start()
}
// ❌ 坏:为每个声音创建完整链(每次播放都有增益 + 压缩器)
6.5 内存管理
// ✅ 好:断开连接并清理节点
function playOneShot(buffer: AudioBuffer) {
const source = ctx.createBufferSource()
source.buffer = buffer
source.connect(masterGain)
source.onended = () => source.disconnect()
source.start()
}
// ✅ 好:限制并发声音(最多8个)
class SoundManager {
private activeSources = new Set<AudioBufferSourceNode>()
play(buffer: AudioBuffer) {
if (this.activeSources.size >= 8) this.activeSources.values().next().value?.stop()
const source = ctx.createBufferSource()
source.buffer = buffer
source.connect(masterGain)
source.onended = () => { source.disconnect(); this.activeSources.delete(source) }
this.activeSources.add(source)
source.start()
}
}
// ❌ 坏:从不清理 - 播放后节点留在内存中
const source = ctx.createBufferSource()
source.connect(ctx.destination)
source.start()
7. 质量标准
// ✅ 总是需要用户手势
button.addEventListener('click', async () => {
await audioContext.resume()
playSound()
})
// ✅ 尊重用户偏好
if (usePreferencesStore().preferences.soundEnabled) playBeep()
// ✅ 优雅处理权限拒绝
try {
await navigator.mediaDevices.getUserMedia({ audio: true })
} catch (err) {
if (err.name === 'NotAllowedError') {
showVisualFeedback('需要麦克风访问权限')
}
}
8. 测试与质量
describe('HUD声音', () => {
it('验证音量边界', async () => {
const { playBeep } = useHUDSounds()
await playBeep({ volume: 2 }) // 限制到1
await playBeep({ volume: -1 }) // 限制到0
})
})
9. 常见错误与反模式
9.1 关键反模式
// ❌ 无需用户手势自动播放 - 被阻止
onMounted(() => playSound())
// ✅ 用户交互后
const handleClick = async () => { await audioContext.resume(); playSound() }
// ❌ 内存泄漏 - 不清理
const audioContext = new AudioContext()
// ✅ 适当清理
onUnmounted(() => audioContext.close())
// ❌ 每个声音新上下文 - 性能杀手
function playSound() { const ctx = new AudioContext() }
// ✅ 重用上下文
const ctx = new AudioContext()
function playSound() { /* 重用ctx */ }
10. 预实现清单
阶段1:编写代码前
- [ ] 为音频节点创建和连接编写测试
- [ ] 为音量限制和验证编写测试
- [ ] 识别性能要求(延迟、并发声音)
- [ ] 需要AudioWorklet进行DSP?创建工作文件
- [ ] 计算预期使用的缓冲池大小
阶段2:实现过程中
- [ ] 需要用户手势来初始化AudioContext
- [ ] 音频上下文被重用(不为每个声音创建)
- [ ] 在onended回调中断开节点连接
- [ ] 验证音量边界(0-1范围)
- [ ] 优雅处理麦克风权限
- [ ] 错误状态提供视觉反馈
阶段3:提交前
- [ ] 所有音频测试通过:
npm test -- --grep "audio" - [ ] 类型检查通过:
npm run typecheck - [ ] 无内存泄漏(在DevTools内存标签中测试)
- [ ] 组件卸载时关闭音频上下文
- [ ] 提供可访问性的视觉替代方案
- [ ] 可以通过用户偏好禁用声音
- [ ] 音量尊重系统偏好
11. 总结
JARVIS的Web Audio API:用户手势后初始化、卸载时清理、处理权限拒绝、提供视觉替代方案。参见references/advanced-patterns.md