volcAsr.ts 8.5 KB
import { getVolcAsrToken } from "@/api/volcengine";

interface VolcAsrOptions {
  onResult: (text: string, isFinal: boolean) => void;
  onError: (error: any) => void;
  onStart?: () => void;
  onStop?: () => void;
}

export class VolcAsrService {
  private socket: WebSocket | null = null;
  private audioContext: AudioContext | null = null;
  private processor: ScriptProcessorNode | null = null;
  private stream: MediaStream | null = null;
  private options: VolcAsrOptions;
  private isRunning = false;
  private token = "";
  private recordingStarted = false;

  constructor(options: VolcAsrOptions) {
    this.options = options;
  }

  async start(appId?: string, jwtToken?: string) {
    if (this.isRunning) return;

    try {
      // 1. 获取令牌
      let finalAppId = appId;
      let finalToken = jwtToken;

      if (!finalToken) {
        try {
          const response = await getVolcAsrToken();
          finalToken = response.data;
        } catch (apiError) {
          console.warn("[ASR] 后端令牌接口 500,尝试使用测试账号兜底...", apiError);
          // 使用您验证成功的个人测试账号作为兜底
          finalAppId = "9654596185";
          finalToken = "9ww8h2jq4egLcq0Ahvzq5Re8C2fYXOBE";
        }
      }

      this.token = finalToken!;

      // 如果没有传 appId,尝试从 JWT 中解析
      if (!finalAppId && this.token) {
        try {
          const parts = this.token.split('.');
          if (parts.length > 1) {
            const payload = JSON.parse(atob(parts[1].replace(/-/g, '+').replace(/_/g, '/')));
            finalAppId = payload.appid || (Array.isArray(payload.aud) ? payload.aud[0] : payload.aud);
            console.log(`[ASR] Extracted AppID from JWT: ${finalAppId}`);
          }
        } catch (e) {
          console.error("[ASR] Failed to parse AppID from JWT", e);
        }
      }

      if (!finalAppId || !this.token) {
        throw new Error("缺少必要的 AppID 或 Token");
      }

      // 严格按照 V3 双向流式协议构造
      // 1. 资源 ID 尝试使用豆包 2.0 (SeedASR)
      // 因为 403 很大可能是 1.0 资源未授权,而后端开通的是 2.0 资源
      const resourceId = "volc.seedasr.sauc.duration"; 
      const connectId = crypto.randomUUID();
      
      // 2. 自动检测 Token 类型并匹配前缀
      const isJwt = this.token.includes('.');
      const authPrefix = isJwt ? "Jwt; " : "Bearer; ";
      const encodedAccessKey = encodeURIComponent(`${authPrefix}${this.token}`);
      
      // 3. 最终 URL 构造
      const url = `wss://openspeech.bytedance.com/api/v3/sauc/bigmodel_async?` +
                  `api_resource_id=${resourceId}&` +
                  `api_app_key=${finalAppId}&` +
                  `api_access_key=${encodedAccessKey}&` +
                  `api_connect_id=${connectId}`;
      
      console.log(`[ASR] Connecting to V3 async with SeedASR 2.0 resource...`);
      console.log(`[ASR] AppID: ${finalAppId}, Resource: ${resourceId}`);
      this.socket = new WebSocket(url);
      this.socket.binaryType = "arraybuffer";

      this.socket.onopen = () => {
        console.log("[ASR] WebSocket connected");
        this.sendFullClientRequest(finalAppId!);
      };

      this.socket.onmessage = (event) => this.handleMessage(event.data);
      this.socket.onerror = (err) => console.error("[ASR] WebSocket error:", err);

      this.socket.onclose = (e) => {
        console.log(`[ASR] WebSocket closed: ${e.code}`);
        const wasRunning = this.isRunning;
        this.stop();
        
        if (wasRunning && !this.recordingStarted) {
          this.options.onError(new Error(`ASR 连接失败 (${e.code})。请确认后端已正确授权大模型 ASR 资源。`));
        }
      };

      this.isRunning = true;
      this.recordingStarted = false;
      this.options.onStart?.();
    } catch (err) {
      this.options.onError(err);
      this.stop();
    }
  }

  private sendFullClientRequest(appId: string) {
    if (!this.socket || this.socket.readyState !== WebSocket.OPEN) return;

    const isJwt = this.token.includes('.');
    const authPrefix = isJwt ? "Jwt; " : "Bearer; ";

    const config = {
      app: {
        appid: Number(appId),
        token: `${authPrefix}${this.token}`
      },
      user: { uid: "user_" + Math.random().toString(36).substring(7) },
      audio: {
        format: "pcm",
        sample_rate: 16000,
        bits: 16,
        channel: 1
      },
      request: {
        model_name: "seedasr", // 切换到 2.0 模型名称
        enable_itn: true,
        enable_punctuation: true,
        show_utterance: true
      },
    };

    const payload = new TextEncoder().encode(JSON.stringify(config));
    // Header(4B) + PayloadSize(4B) + Payload
    const message = this.packMessage(0x1, 0, 1, 0, payload);
    this.socket.send(message);
  }

  private packMessage(type: number, flags: number, serial: number, compress: number, payload: Uint8Array): ArrayBuffer {
    // V3 协议封包:Header(4B) + PayloadSize(4B) + Payload
    const buffer = new ArrayBuffer(4 + 4 + payload.length);
    const view = new DataView(buffer);
    
    // Header (4 Bytes)
    view.setUint8(0, 0x11); // Protocol Version & Header Size
    view.setUint8(1, (type << 4) | (flags & 0x0f)); // Message Type & Flags
    view.setUint8(2, (serial << 4) | (compress & 0x0f)); // Serial & Compression
    view.setUint8(3, 0x00); // Reserved
    
    // Payload Size (4 Bytes, Big Endian) - V3 关键字段
    view.setUint32(4, payload.length, false);
    
    // Payload
    new Uint8Array(buffer, 8).set(payload);
    return buffer;
  }

  private async startRecording() {
    try {
      this.stream = await navigator.mediaDevices.getUserMedia({ audio: true });
      this.audioContext = new (window.AudioContext || (window as any).webkitAudioContext)({ sampleRate: 16000 });
      const source = this.audioContext.createMediaStreamSource(this.stream);
      this.processor = this.audioContext.createScriptProcessor(4096, 1, 1);
      source.connect(this.processor);
      this.processor.connect(this.audioContext.destination);

      this.processor.onaudioprocess = (e) => {
        if (!this.isRunning || !this.socket || this.socket.readyState !== WebSocket.OPEN) return;
        const inputData = e.inputBuffer.getChannelData(0);
        const pcmData = this.floatTo16BitPCM(inputData);
        const message = this.packMessage(0x2, 0, 0, 0, new Uint8Array(pcmData));
        this.socket.send(message);
      };
    } catch (err) {
      this.options.onError(err);
      this.stop();
    }
  }

  private floatTo16BitPCM(input: Float32Array): ArrayBuffer {
    const buffer = new ArrayBuffer(input.length * 2);
    const view = new DataView(buffer);
    for (let i = 0; i < input.length; i++) {
      let s = Math.max(-1, Math.min(1, input[i]));
      view.setInt16(i * 2, s < 0 ? s * 0x8000 : s * 0x7fff, true); 
    }
    return buffer;
  }

  private handleMessage(data: ArrayBuffer) {
    if (!(data instanceof ArrayBuffer)) return;
    const view = new DataView(data);
    const type = view.getUint8(1) >> 4;
    const flags = view.getUint8(1) & 0x0f;

    let offset = 4;
    if (flags & 0x01) offset += 4;
    if (data.byteLength < offset + 4) return;
    const payloadSize = view.getUint32(offset, false);
    offset += 4;
    const payload = data.slice(offset, offset + payloadSize);

    if (type === 0x09 || type === 0x03) {
      try {
        const resp = JSON.parse(new TextDecoder().decode(payload));
        if (!this.recordingStarted) {
          this.recordingStarted = true;
          this.startRecording();
        }
        const result = resp.result;
        if (result) {
          const text = result.text || "";
          const isFinal = (result.utterances || []).some((u: any) => u.definite === true);
          if (text) this.options.onResult(text, isFinal);
        }
      } catch (err) { /* ignore */ }
    } else if (type === 0x0f) {
      this.options.onError(new Error("ASR 鉴权成功但业务报错,请检查资源关联状态"));
    }
  }

  stop() {
    if (this.isRunning && this.socket && this.socket.readyState === WebSocket.OPEN) {
      this.socket.send(this.packMessage(0x2, 0x02, 0, 0, new Uint8Array(0)));
    }
    this.isRunning = false;
    this.recordingStarted = false;
    if (this.processor) { this.processor.disconnect(); this.processor = null; }
    if (this.audioContext) { this.audioContext.close(); this.audioContext = null; }
    if (this.stream) { this.stream.getTracks().forEach((t) => t.stop()); this.stream = null; }
    if (this.socket) { this.socket.close(); this.socket = null; }
    this.options.onStop?.();
  }
}