refactor(Markdown): remove rehype-sanitize and implement custom element filtering

- Removed rehype-sanitize dependency and its related configuration.
- Introduced ALLOWED_ELEMENTS and DISALLOWED_ELEMENTS for custom HTML element filtering.
- Updated rehypePlugins logic to conditionally apply plugins based on message content.
- Added encodeHTML utility function for HTML entity encoding.
This commit is contained in:
kangfenmao 2025-04-22 11:00:15 +08:00
parent c576aa5cb4
commit 55a9447a7b
5 changed files with 30 additions and 120 deletions

View File

@ -190,7 +190,6 @@
"rehype-katex": "^7.0.1",
"rehype-mathjax": "^7.0.0",
"rehype-raw": "^7.0.0",
"rehype-sanitize": "^6.0.0",
"remark-cjk-friendly": "^1.1.0",
"remark-gfm": "^4.0.0",
"remark-math": "^6.0.0",

View File

@ -8,16 +8,14 @@ import type { Message } from '@renderer/types'
import { parseJSON } from '@renderer/utils'
import { escapeBrackets, removeSvgEmptyLines, withGeminiGrounding } from '@renderer/utils/formats'
import { findCitationInChildren } from '@renderer/utils/markdown'
import { sanitizeSchema } from '@renderer/utils/markdown'
import { isEmpty } from 'lodash'
import { type FC, useMemo } from 'react'
import { useTranslation } from 'react-i18next'
import ReactMarkdown, { type Components } from 'react-markdown'
import rehypeKatex from 'rehype-katex'
// @ts-ignore next-line
// @ts-ignore rehype-mathjax is not typed
import rehypeMathjax from 'rehype-mathjax'
import rehypeRaw from 'rehype-raw'
import rehypeSanitize from 'rehype-sanitize'
import remarkCjkFriendly from 'remark-cjk-friendly'
import remarkGfm from 'remark-gfm'
import remarkMath from 'remark-math'
@ -26,6 +24,10 @@ import CodeBlock from './CodeBlock'
import ImagePreview from './ImagePreview'
import Link from './Link'
const ALLOWED_ELEMENTS =
/<(style|p|div|span|b|i|strong|em|ul|ol|li|table|tr|td|th|thead|tbody|h[1-6]|blockquote|pre|code|br|hr|svg|path|circle|rect|line|polyline|polygon|text|g|defs|title|desc|tspan|sub|sup)/i
const DISALLOWED_ELEMENTS = ['iframe']
interface Props {
message: Message
}
@ -43,9 +45,12 @@ const Markdown: FC<Props> = ({ message }) => {
return removeSvgEmptyLines(escapeBrackets(content))
}, [message, t])
const rehypeMath = useMemo(() => (mathEngine === 'KaTeX' ? rehypeKatex : rehypeMathjax), [mathEngine])
const rehypePlugins = useMemo(() => {
return [rehypeRaw, [rehypeSanitize, sanitizeSchema], mathEngine === 'KaTeX' ? rehypeKatex : rehypeMathjax]
}, [mathEngine])
const hasElements = ALLOWED_ELEMENTS.test(messageContent)
return hasElements ? [rehypeRaw, rehypeMath] : [rehypeMath]
}, [messageContent, rehypeMath])
const components = useMemo(() => {
const baseComponents = {
@ -71,6 +76,7 @@ const Markdown: FC<Props> = ({ message }) => {
remarkPlugins={remarkPlugins}
className="markdown"
components={components}
disallowedElements={DISALLOWED_ELEMENTS}
remarkRehypeOptions={{
footnoteLabel: t('common.footnotes'),
footnoteLabelTagName: 'h4',

View File

@ -1,12 +1,6 @@
import { describe, expect, it } from 'vitest'
import {
convertMathFormula,
findCitationInChildren,
MARKDOWN_ALLOWED_TAGS,
removeTrailingDoubleSpaces,
sanitizeSchema
} from '../markdown'
import { convertMathFormula, findCitationInChildren, removeTrailingDoubleSpaces } from '../markdown'
describe('markdown', () => {
describe('findCitationInChildren', () => {
@ -72,27 +66,6 @@ describe('markdown', () => {
})
})
describe('markdown configuration constants', () => {
it('MARKDOWN_ALLOWED_TAGS contains expected tags', () => {
expect(MARKDOWN_ALLOWED_TAGS).toContain('p')
expect(MARKDOWN_ALLOWED_TAGS).toContain('div')
expect(MARKDOWN_ALLOWED_TAGS).toContain('code')
expect(MARKDOWN_ALLOWED_TAGS).toContain('svg')
expect(MARKDOWN_ALLOWED_TAGS.length).toBeGreaterThan(10)
})
it('sanitizeSchema contains proper configuration', () => {
expect(sanitizeSchema.tagNames).toBe(MARKDOWN_ALLOWED_TAGS)
expect(sanitizeSchema.attributes).toHaveProperty('*')
expect(sanitizeSchema.attributes).toHaveProperty('svg')
expect(sanitizeSchema.attributes).toHaveProperty('a')
})
it('sanitizeSchema matches snapshot', () => {
expect(sanitizeSchema).toMatchSnapshot()
})
})
describe('convertMathFormula', () => {
it('should convert LaTeX block delimiters to $$$$', () => {
// 验证将 LaTeX 块分隔符转换为 $$$$

View File

@ -1,67 +1,3 @@
export const MARKDOWN_ALLOWED_TAGS = [
'style',
'p',
'div',
'span',
'b',
'i',
'strong',
'em',
'ul',
'ol',
'li',
'table',
'tr',
'td',
'th',
'thead',
'tbody',
'h1',
'h2',
'h3',
'h4',
'h5',
'h6',
'blockquote',
'pre',
'code',
'br',
'hr',
'svg',
'path',
'circle',
'rect',
'line',
'polyline',
'polygon',
'text',
'g',
'defs',
'title',
'desc',
'tspan',
'sub',
'sup'
]
// rehype-sanitize配置
export const sanitizeSchema = {
tagNames: MARKDOWN_ALLOWED_TAGS,
attributes: {
'*': ['className', 'style', 'id', 'title'],
svg: ['viewBox', 'width', 'height', 'xmlns', 'fill', 'stroke'],
path: ['d', 'fill', 'stroke', 'strokeWidth', 'strokeLinecap', 'strokeLinejoin'],
circle: ['cx', 'cy', 'r', 'fill', 'stroke'],
rect: ['x', 'y', 'width', 'height', 'fill', 'stroke'],
line: ['x1', 'y1', 'x2', 'y2', 'stroke'],
polyline: ['points', 'fill', 'stroke'],
polygon: ['points', 'fill', 'stroke'],
text: ['x', 'y', 'fill', 'textAnchor', 'dominantBaseline'],
g: ['transform', 'fill', 'stroke'],
a: ['href', 'target', 'rel']
}
}
// 更彻底的查找方法,递归搜索所有子元素
export const findCitationInChildren = (children) => {
if (!children) return null
@ -107,3 +43,21 @@ export function removeTrailingDoubleSpaces(markdown: string): string {
// 使用正则表达式匹配末尾的两个空格,并替换为空字符串
return markdown.replace(/ {2}$/gm, '')
}
/**
* HTML实体编码辅助函数
* @param str
* @returns string
*/
export const encodeHTML = (str: string) => {
return str.replace(/[&<>"']/g, (match) => {
const entities: { [key: string]: string } = {
'&': '&amp;',
'<': '&lt;',
'>': '&gt;',
'"': '&quot;',
"'": '&apos;'
}
return entities[match]
})
}

View File

@ -4338,7 +4338,6 @@ __metadata:
rehype-katex: "npm:^7.0.1"
rehype-mathjax: "npm:^7.0.0"
rehype-raw: "npm:^7.0.0"
rehype-sanitize: "npm:^6.0.0"
remark-cjk-friendly: "npm:^1.1.0"
remark-gfm: "npm:^4.0.0"
remark-math: "npm:^6.0.0"
@ -9104,17 +9103,6 @@ __metadata:
languageName: node
linkType: hard
"hast-util-sanitize@npm:^5.0.0":
version: 5.0.2
resolution: "hast-util-sanitize@npm:5.0.2"
dependencies:
"@types/hast": "npm:^3.0.0"
"@ungap/structured-clone": "npm:^1.0.0"
unist-util-position: "npm:^5.0.0"
checksum: 10c0/20951652078a8c21341c1c9a84f90015b2ba01cc41fa16772f122c65cda26a7adb0501fdeba5c8e37e40e2632447e8fe455d0dd2dc27d39663baacca76f2ecb6
languageName: node
linkType: hard
"hast-util-to-html@npm:^9.0.5":
version: 9.0.5
resolution: "hast-util-to-html@npm:9.0.5"
@ -14735,16 +14723,6 @@ __metadata:
languageName: node
linkType: hard
"rehype-sanitize@npm:^6.0.0":
version: 6.0.0
resolution: "rehype-sanitize@npm:6.0.0"
dependencies:
"@types/hast": "npm:^3.0.0"
hast-util-sanitize: "npm:^5.0.0"
checksum: 10c0/43d6c056e63c994cf56e5ee0e157052d2030dc5ac160845ee494af9a26e5906bf5ec5af56c7d90c99f9c4dc0091e45a48a168618135fb6c64a76481ad3c449e9
languageName: node
linkType: hard
"remark-cjk-friendly@npm:^1.1.0":
version: 1.1.0
resolution: "remark-cjk-friendly@npm:1.1.0"