volcAsr.ts
8.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
import { getVolcAsrToken } from "@/api/volcengine";
interface VolcAsrOptions {
onResult: (text: string, isFinal: boolean) => void;
onError: (error: any) => void;
onStart?: () => void;
onStop?: () => void;
}
export class VolcAsrService {
private socket: WebSocket | null = null;
private audioContext: AudioContext | null = null;
private processor: ScriptProcessorNode | null = null;
private stream: MediaStream | null = null;
private options: VolcAsrOptions;
private isRunning = false;
private token = "";
private recordingStarted = false;
constructor(options: VolcAsrOptions) {
this.options = options;
}
async start(appId?: string, jwtToken?: string) {
if (this.isRunning) return;
try {
// 1. 获取令牌
let finalAppId = appId;
let finalToken = jwtToken;
if (!finalToken) {
try {
const response = await getVolcAsrToken();
finalToken = response.data;
} catch (apiError) {
console.warn("[ASR] 后端令牌接口 500,尝试使用测试账号兜底...", apiError);
// 使用您验证成功的个人测试账号作为兜底
finalAppId = "9654596185";
finalToken = "9ww8h2jq4egLcq0Ahvzq5Re8C2fYXOBE";
}
}
this.token = finalToken!;
// 如果没有传 appId,尝试从 JWT 中解析
if (!finalAppId && this.token) {
try {
const parts = this.token.split('.');
if (parts.length > 1) {
const payload = JSON.parse(atob(parts[1].replace(/-/g, '+').replace(/_/g, '/')));
finalAppId = payload.appid || (Array.isArray(payload.aud) ? payload.aud[0] : payload.aud);
console.log(`[ASR] Extracted AppID from JWT: ${finalAppId}`);
}
} catch (e) {
console.error("[ASR] Failed to parse AppID from JWT", e);
}
}
if (!finalAppId || !this.token) {
throw new Error("缺少必要的 AppID 或 Token");
}
// 严格按照 V3 双向流式协议构造
// 1. 资源 ID 尝试使用豆包 2.0 (SeedASR)
// 因为 403 很大可能是 1.0 资源未授权,而后端开通的是 2.0 资源
const resourceId = "volc.seedasr.sauc.duration";
const connectId = crypto.randomUUID();
// 2. 自动检测 Token 类型并匹配前缀
const isJwt = this.token.includes('.');
const authPrefix = isJwt ? "Jwt; " : "Bearer; ";
const encodedAccessKey = encodeURIComponent(`${authPrefix}${this.token}`);
// 3. 最终 URL 构造
const url = `wss://openspeech.bytedance.com/api/v3/sauc/bigmodel_async?` +
`api_resource_id=${resourceId}&` +
`api_app_key=${finalAppId}&` +
`api_access_key=${encodedAccessKey}&` +
`api_connect_id=${connectId}`;
console.log(`[ASR] Connecting to V3 async with SeedASR 2.0 resource...`);
console.log(`[ASR] AppID: ${finalAppId}, Resource: ${resourceId}`);
this.socket = new WebSocket(url);
this.socket.binaryType = "arraybuffer";
this.socket.onopen = () => {
console.log("[ASR] WebSocket connected");
this.sendFullClientRequest(finalAppId!);
};
this.socket.onmessage = (event) => this.handleMessage(event.data);
this.socket.onerror = (err) => console.error("[ASR] WebSocket error:", err);
this.socket.onclose = (e) => {
console.log(`[ASR] WebSocket closed: ${e.code}`);
const wasRunning = this.isRunning;
this.stop();
if (wasRunning && !this.recordingStarted) {
this.options.onError(new Error(`ASR 连接失败 (${e.code})。请确认后端已正确授权大模型 ASR 资源。`));
}
};
this.isRunning = true;
this.recordingStarted = false;
this.options.onStart?.();
} catch (err) {
this.options.onError(err);
this.stop();
}
}
private sendFullClientRequest(appId: string) {
if (!this.socket || this.socket.readyState !== WebSocket.OPEN) return;
const isJwt = this.token.includes('.');
const authPrefix = isJwt ? "Jwt; " : "Bearer; ";
const config = {
app: {
appid: Number(appId),
token: `${authPrefix}${this.token}`
},
user: { uid: "user_" + Math.random().toString(36).substring(7) },
audio: {
format: "pcm",
sample_rate: 16000,
bits: 16,
channel: 1
},
request: {
model_name: "seedasr", // 切换到 2.0 模型名称
enable_itn: true,
enable_punctuation: true,
show_utterance: true
},
};
const payload = new TextEncoder().encode(JSON.stringify(config));
// Header(4B) + PayloadSize(4B) + Payload
const message = this.packMessage(0x1, 0, 1, 0, payload);
this.socket.send(message);
}
private packMessage(type: number, flags: number, serial: number, compress: number, payload: Uint8Array): ArrayBuffer {
// V3 协议封包:Header(4B) + PayloadSize(4B) + Payload
const buffer = new ArrayBuffer(4 + 4 + payload.length);
const view = new DataView(buffer);
// Header (4 Bytes)
view.setUint8(0, 0x11); // Protocol Version & Header Size
view.setUint8(1, (type << 4) | (flags & 0x0f)); // Message Type & Flags
view.setUint8(2, (serial << 4) | (compress & 0x0f)); // Serial & Compression
view.setUint8(3, 0x00); // Reserved
// Payload Size (4 Bytes, Big Endian) - V3 关键字段
view.setUint32(4, payload.length, false);
// Payload
new Uint8Array(buffer, 8).set(payload);
return buffer;
}
private async startRecording() {
try {
this.stream = await navigator.mediaDevices.getUserMedia({ audio: true });
this.audioContext = new (window.AudioContext || (window as any).webkitAudioContext)({ sampleRate: 16000 });
const source = this.audioContext.createMediaStreamSource(this.stream);
this.processor = this.audioContext.createScriptProcessor(4096, 1, 1);
source.connect(this.processor);
this.processor.connect(this.audioContext.destination);
this.processor.onaudioprocess = (e) => {
if (!this.isRunning || !this.socket || this.socket.readyState !== WebSocket.OPEN) return;
const inputData = e.inputBuffer.getChannelData(0);
const pcmData = this.floatTo16BitPCM(inputData);
const message = this.packMessage(0x2, 0, 0, 0, new Uint8Array(pcmData));
this.socket.send(message);
};
} catch (err) {
this.options.onError(err);
this.stop();
}
}
private floatTo16BitPCM(input: Float32Array): ArrayBuffer {
const buffer = new ArrayBuffer(input.length * 2);
const view = new DataView(buffer);
for (let i = 0; i < input.length; i++) {
let s = Math.max(-1, Math.min(1, input[i]));
view.setInt16(i * 2, s < 0 ? s * 0x8000 : s * 0x7fff, true);
}
return buffer;
}
private handleMessage(data: ArrayBuffer) {
if (!(data instanceof ArrayBuffer)) return;
const view = new DataView(data);
const type = view.getUint8(1) >> 4;
const flags = view.getUint8(1) & 0x0f;
let offset = 4;
if (flags & 0x01) offset += 4;
if (data.byteLength < offset + 4) return;
const payloadSize = view.getUint32(offset, false);
offset += 4;
const payload = data.slice(offset, offset + payloadSize);
if (type === 0x09 || type === 0x03) {
try {
const resp = JSON.parse(new TextDecoder().decode(payload));
if (!this.recordingStarted) {
this.recordingStarted = true;
this.startRecording();
}
const result = resp.result;
if (result) {
const text = result.text || "";
const isFinal = (result.utterances || []).some((u: any) => u.definite === true);
if (text) this.options.onResult(text, isFinal);
}
} catch (err) { /* ignore */ }
} else if (type === 0x0f) {
this.options.onError(new Error("ASR 鉴权成功但业务报错,请检查资源关联状态"));
}
}
stop() {
if (this.isRunning && this.socket && this.socket.readyState === WebSocket.OPEN) {
this.socket.send(this.packMessage(0x2, 0x02, 0, 0, new Uint8Array(0)));
}
this.isRunning = false;
this.recordingStarted = false;
if (this.processor) { this.processor.disconnect(); this.processor = null; }
if (this.audioContext) { this.audioContext.close(); this.audioContext = null; }
if (this.stream) { this.stream.getTracks().forEach((t) => t.stop()); this.stream = null; }
if (this.socket) { this.socket.close(); this.socket = null; }
this.options.onStop?.();
}
}