renderMarkdown.ts 7.78 KB
// utils/renderMarkdown.ts
import MarkdownIt from "markdown-it";
// @ts-ignore - 部分插件没有类型定义
import anchor from "markdown-it-anchor";
// @ts-ignore
import taskLists from "markdown-it-task-lists";
// @ts-ignore
import attrs from "markdown-it-attrs";
// @ts-ignore
import mark from "markdown-it-mark";
// @ts-ignore
import sub from "markdown-it-sub";
// @ts-ignore
import sup from "markdown-it-sup";
// @ts-ignore
import footnote from "markdown-it-footnote";
// @ts-ignore
import texmath from "markdown-it-texmath";
import DOMPurify from "dompurify";

// ===== KaTeX 懒加载:避免在入口或 /auth 首屏就拉取大包 =====
let katexInstance: any | null = null;
let katexLoading: Promise<any | null> | null = null;

// 外部可以在需要时调用,按需加载 KaTeX;不依赖 CDN
export const ensureKatexReady = async (): Promise<any | null> => {
  if (katexInstance) return katexInstance;
  if (!katexLoading) {
    katexLoading = import("katex")
      .then((mod) => {
        const instance = (mod as any).default || mod;
        katexInstance = instance;
        // 同时挂到全局,兼容依赖 window.katex 的第三方组件(如 TDesign ChatMarkdown)
        if (typeof window !== "undefined") {
          (window as any).katex = instance;
        }
        return instance;
      })
      .catch((err) => {
        console.error("加载 KaTeX 失败:", err);
        katexLoading = null;
        return null;
      });
  }
  return katexLoading;
};

// 延迟创建 Markdown 实例,只有真正用到时才初始化,并根据是否存在 katex 决定是否启用公式渲染
let md: MarkdownIt | null = null;

// --- 核心修复函数:数学公式容错处理 (V8 完美版) ---
export function preprocessMathContent(content: string): string {
  if (!content) return content;

  // 0. 统一换行符并清理
  let res = content.replace(/\r\n/g, "\n");
  res = res.replace(/\\(\$)/g, "$1");
  res = res.replace(/\\([_#&])/g, "$1"); // 修复被过度转义的符号

  // 1. 统一块级定界符 (并在前后加换行,确保独立性)
  res = res.replace(/\\\[([\s\S]*?)\\\]/g, (_, p1) => {
    const processed = p1.replace(/(?<!\\)\|/g, "\\mid ");
    return `\n\n$$${processed}$$\n\n`;
  });
  res = res.replace(/\\\(([\s\S]*?)\\\)/g, (_, p1) => {
    const processed = p1.replace(/(?<!\\)\|/g, "\\mid ");
    return `$${processed}$`;
  });

  // 处理已有的 $$...$$ 块
  res = res.replace(/\$\$([\s\S]*?)\$\$/g, (_, p1) => {
    const processed = p1.replace(/(?<!\\)\|/g, "\\mid ");
    return `$$${processed}$$`;
  });

  // 2. 状态机处理裸露公式
  const lines = res.split("\n");
  let isInsideExplicitBlock = false;
  let envStack: string[] = [];

  const processedLines = lines.map((line) => {
    const trimmed = line.trim();
    if (!trimmed) return line;

    // A. 追踪显式块 ($$ 或 \[)
    const blockCount = (trimmed.match(/\$\$/g) || []).length;
    if (blockCount > 0) {
      if (blockCount % 2 !== 0) isInsideExplicitBlock = !isInsideExplicitBlock;
      return line;
    }

    // B. 追踪 LaTeX 环境 (\begin{...})
    if (trimmed.includes("\\begin{")) envStack.push("env");
    if (trimmed.includes("\\end{")) envStack.pop();

    if (isInsideExplicitBlock || envStack.length > 0) return line;

    // C. 裸露公式识别逻辑 (V8 极致版) - 优化:避免误伤英文内容
    // 只有在确实具有强数学特征且不像普通英文句子时才进行自动包裹
    const hasMathCommand = /\\[a-zA-Z]{2,}/.test(trimmed); // 至少两个字母的命令,避免误伤单个反斜杠
    const hasMathStruct =
      /[\^_\{\}]/.test(trimmed) && /[a-zA-Z0-9]/.test(trimmed);
    const startsWithMath = /^[\\+\-\=]/.test(trimmed) && !trimmed.startsWith("- ") && !trimmed.startsWith("* ");
    const isListItem = /^\d+\.\s/.test(trimmed) || trimmed.startsWith("- ") || trimmed.startsWith("* ");

    // 启发式判断:如果包含较多空格且没有明显的数学命令,则视为普通句子
    const spaceCount = (trimmed.match(/\s/g) || []).length;
    const wordCount = trimmed.split(/\s+/).filter(w => w.length > 0).length;
    
    // 检查是否包含多个英文单词(长度 >= 3)
    const longWords = trimmed.match(/[a-zA-Z]{3,}/g) || [];
    const hasEnglishWords = longWords.length >= 3;
    
    // 检查是否包含常见的 Markdown 标签或 English 特征
    const isLikelySentence = (hasEnglishWords && spaceCount > 2) || 
                            trimmed.includes("**") || 
                            trimmed.includes("__") || 
                            (trimmed.includes("[") && trimmed.includes("]")) ||
                            /^[A-Z][a-z]+/.test(trimmed) ||
                            isListItem; // 列表项不是数学公式

    if (
      (hasMathCommand || hasMathStruct || startsWithMath) &&
      !trimmed.includes("$") &&
      !isLikelySentence
    ) {
      // 额外检查:如果是常见的英文标点用法(如括号),则不视为数学公式
      const isCommonPunctuation = /^[\(\[\{].*[\)\}\]]$/.test(trimmed) && !hasMathCommand;
      
      if (!isCommonPunctuation) {
        // 如果这一行不包含中文字符且具有数学特征,则强制包裹为块级公式
        const hasChinese = /[\u4e00-\u9fa5]/.test(trimmed);
        if (!hasChinese) {
          return `\n\n$$${trimmed}$$\n\n`;
        }
      }
    }

    return line;
  });

  return processedLines.join("\n");
}

function getMarkdownIt(): MarkdownIt {
  if (md) return md;

  const katex = katexInstance;

  const instance = new MarkdownIt({
    html: true, // 允许内联 HTML(如果不需要可关)
    linkify: true, // 自动把URL转为链接
    typographer: true, // 更漂亮的标点
    breaks: false, // 单换行不转 <br>(按需改)
  })
    .use(anchor, { permalink: false })
    .use(taskLists, { enabled: true, label: true })
    .use(attrs)
    .use(mark)
    .use(sub)
    .use(sup)
    .use(footnote);

  // 只有在全局已经提供 katex 时,才启用 texmath 插件,避免强制加载 KaTeX + umoteam 大包
  if (katex) {
    instance.use(texmath, {
      engine: katex,
      delimiters: "dollars", // 支持 $ 和 $$ 分隔符
      katexOptions: {
        throwOnError: false, // 不抛出错误,而是显示错误信息
        errorColor: "#cc0000",
        macros: { "\\RR": "\\mathbb{R}" },
      },
    });
  }

  md = instance;
  return instance;
}

// 可选:语法高亮(prism 或 highlight.js 选其一)
// 例如:import 'prismjs/themes/prism.css'; import 'prismjs/components/prism-javascript';
// 然后在 md.set({ highlight: (str, lang) => ... })

// 简化版的 Markdown 渲染函数(别名)
export function simpleMarkdownToHtml(markdown: string = ""): string {
  return markdownToHtml(markdown);
}

export function markdownToHtml(markdown: string = ""): string {
  if (!markdown) return "";
  
  // 先进行数学公式预处理
  const processedContent = preprocessMathContent(markdown);
  
  const raw = getMarkdownIt().render(processedContent || "");
  // 配置 DOMPurify 以允许 KaTeX 生成的 HTML 元素和属性
  return DOMPurify.sanitize(raw, {
    USE_PROFILES: { html: true },
    // 允许 KaTeX 相关的 MathML 标签(如果需要)
    ADD_TAGS: [
      "math",
      "annotation",
      "semantics",
      "mrow",
      "mi",
      "mo",
      "mn",
      "mfrac",
      "msup",
      "msub",
      "munderover",
      "mover",
      "munder",
      "mtable",
      "mtr",
      "mtd",
      "mtext",
      "mspace",
      "menclose",
      "mstyle",
      "mpadded",
      "mphantom",
      "mfenced",
      "merror",
      "maction",
    ],
    // 允许 KaTeX 相关的属性(class 和 style 是必需的)
    ADD_ATTR: ["class", "style", "data-*", "aria-*", "encoding"],
    // 允许所有 data-* 属性
    ALLOW_DATA_ATTR: true,
    // 允许所有 aria-* 属性
    ALLOW_ARIA_ATTR: true,
  });
}