From f2ca56a088b3ca1d3efc8cc41013bbcfcdd30099 Mon Sep 17 00:00:00 2001 From: SuYao Date: Sun, 6 Apr 2025 09:11:59 +0800 Subject: [PATCH] feat(UI, OpenAI): support OpenAI-4o-web-search add support for web search citations (#3524) * feat(UI, OpenAI): support OpenAI 4o web search add support for web search citations - refactor: Introduced a new CitationsList component to display citations in MessageContent. - feat: Enhanced message handling to support web search results and annotations from OpenAI. - refactor: Removed the deprecated MessageSearchResults component for cleaner code structure. - refactor: Added utility functions for link conversion and URL extraction from Markdown. * chore: remove debug logging from ProxyManager * revert(OpenAIProvider): streamline reasoning check for stream output handling * chore(OpenAIProvider): correct placement of webSearch in response object * fix(patches): update OpenAI package version and remove patch references - Integrated dayjs for dynamic date formatting in prompts.ts. * feat(Citation, Favicon): enhance OpenAI web search support and citation handling - Improved FallbackFavicon component to cache failed favicon URLs. - Support all web search citation preview - Added support for Hunyuan search model in OpenAIProvider and ApiService. * refactor(provider/AI): move additional search parameters to AI Provider --- ...tch => openai-npm-4.87.3-2b30a7685f.patch} | 8 +- package.json | 4 +- .../src/components/Icons/FallbackFavicon.tsx | 56 ++- src/renderer/src/config/models.ts | 34 +- src/renderer/src/config/prompts.ts | 19 + src/renderer/src/config/tools.ts | 10 +- .../src/pages/home/Markdown/Markdown.tsx | 42 +- .../src/pages/home/Messages/CitationsList.tsx | 81 ++++ .../pages/home/Messages/MessageContent.tsx | 252 +++++++----- .../home/Messages/MessageSearchResults.tsx | 95 ----- .../src/providers/AiProvider/BaseProvider.ts | 6 +- .../providers/AiProvider/OpenAIProvider.ts | 21 +- .../src/providers/AiProvider/index.ts | 8 + src/renderer/src/services/ApiService.ts | 92 ++++- src/renderer/src/types/index.ts | 6 +- src/renderer/src/utils/linkConverter.ts | 389 ++++++++++++++++++ yarn.lock | 24 +- 17 files changed, 894 insertions(+), 253 deletions(-) rename .yarn/patches/{openai-npm-4.77.3-59c6d42e7a.patch => openai-npm-4.87.3-2b30a7685f.patch} (82%) create mode 100644 src/renderer/src/pages/home/Messages/CitationsList.tsx delete mode 100644 src/renderer/src/pages/home/Messages/MessageSearchResults.tsx create mode 100644 src/renderer/src/utils/linkConverter.ts diff --git a/.yarn/patches/openai-npm-4.77.3-59c6d42e7a.patch b/.yarn/patches/openai-npm-4.87.3-2b30a7685f.patch similarity index 82% rename from .yarn/patches/openai-npm-4.77.3-59c6d42e7a.patch rename to .yarn/patches/openai-npm-4.87.3-2b30a7685f.patch index fbb26d7c..99709725 100644 --- a/.yarn/patches/openai-npm-4.77.3-59c6d42e7a.patch +++ b/.yarn/patches/openai-npm-4.87.3-2b30a7685f.patch @@ -1,8 +1,8 @@ diff --git a/core.js b/core.js -index e75a18281ce8f051990c5a50bc1076afdddf91a3..e62f796791a155f23d054e74a429516c14d6e11b 100644 +index ebb071d31cd5a14792b62814df072c5971e83300..31e1062d4a7f2422ffec79cf96a35dbb69fe89cb 100644 --- a/core.js +++ b/core.js -@@ -156,7 +156,7 @@ class APIClient { +@@ -157,7 +157,7 @@ class APIClient { Accept: 'application/json', 'Content-Type': 'application/json', 'User-Agent': this.getUserAgent(), @@ -12,10 +12,10 @@ index e75a18281ce8f051990c5a50bc1076afdddf91a3..e62f796791a155f23d054e74a429516c }; } diff --git a/core.mjs b/core.mjs -index fcef58eb502664c41a77483a00db8adaf29b2817..18c5d6ed4be86b3640931277bdc27700006764d7 100644 +index 9c1a0264dcd73a85de1cf81df4efab9ce9ee2ab7..33f9f1f237f2eb2667a05dae1a7e3dc916f6bfff 100644 --- a/core.mjs +++ b/core.mjs -@@ -149,7 +149,7 @@ export class APIClient { +@@ -150,7 +150,7 @@ export class APIClient { Accept: 'application/json', 'Content-Type': 'application/json', 'User-Agent': this.getUserAgent(), diff --git a/package.json b/package.json index 7334e9b0..7cdc30fe 100644 --- a/package.json +++ b/package.json @@ -156,7 +156,7 @@ "lodash": "^4.17.21", "mime": "^4.0.4", "npx-scope-finder": "^1.2.0", - "openai": "patch:openai@npm%3A4.77.3#~/.yarn/patches/openai-npm-4.77.3-59c6d42e7a.patch", + "openai": "patch:openai@npm%3A4.87.3#~/.yarn/patches/openai-npm-4.87.3-2b30a7685f.patch", "p-queue": "^8.1.0", "prettier": "^3.5.3", "rc-virtual-list": "^3.18.5", @@ -193,7 +193,7 @@ "pdf-parse@npm:1.1.1": "patch:pdf-parse@npm%3A1.1.1#~/.yarn/patches/pdf-parse-npm-1.1.1-04a6109b2a.patch", "@langchain/openai@npm:^0.3.16": "patch:@langchain/openai@npm%3A0.3.16#~/.yarn/patches/@langchain-openai-npm-0.3.16-e525b59526.patch", "@langchain/openai@npm:>=0.1.0 <0.4.0": "patch:@langchain/openai@npm%3A0.3.16#~/.yarn/patches/@langchain-openai-npm-0.3.16-e525b59526.patch", - "openai@npm:^4.77.0": "patch:openai@npm%3A4.77.3#~/.yarn/patches/openai-npm-4.77.3-59c6d42e7a.patch", + "openai@npm:^4.77.0": "patch:openai@npm%3A4.87.3#~/.yarn/patches/openai-npm-4.87.3-2b30a7685f.patch", "pkce-challenge@npm:^4.1.0": "patch:pkce-challenge@npm%3A4.1.0#~/.yarn/patches/pkce-challenge-npm-4.1.0-fbc51695a3.patch" }, "packageManager": "yarn@4.6.0", diff --git a/src/renderer/src/components/Icons/FallbackFavicon.tsx b/src/renderer/src/components/Icons/FallbackFavicon.tsx index 9b01fe7a..f6c8369f 100644 --- a/src/renderer/src/components/Icons/FallbackFavicon.tsx +++ b/src/renderer/src/components/Icons/FallbackFavicon.tsx @@ -1,6 +1,37 @@ import { useEffect, useState } from 'react' import styled from 'styled-components' +// 记录失败的URL的缓存键前缀 +const FAILED_FAVICON_CACHE_PREFIX = 'failed_favicon_' +// 失败URL的缓存时间 (24小时) +const FAILED_FAVICON_CACHE_DURATION = 24 * 60 * 60 * 1000 + +// 检查URL是否在失败缓存中 +const isUrlFailedRecently = (url: string): boolean => { + const cacheKey = `${FAILED_FAVICON_CACHE_PREFIX}${url}` + const cachedTimestamp = localStorage.getItem(cacheKey) + + if (!cachedTimestamp) return false + + const timestamp = parseInt(cachedTimestamp, 10) + const now = Date.now() + + // 如果时间戳在缓存期内,则认为URL仍处于失败状态 + if (now - timestamp < FAILED_FAVICON_CACHE_DURATION) { + return true + } + + // 清除过期的缓存 + localStorage.removeItem(cacheKey) + return false +} + +// 记录失败的URL到缓存 +const markUrlAsFailed = (url: string): void => { + const cacheKey = `${FAILED_FAVICON_CACHE_PREFIX}${url}` + localStorage.setItem(cacheKey, Date.now().toString()) +} + // FallbackFavicon component that tries multiple favicon sources interface FallbackFaviconProps { hostname: string @@ -22,20 +53,27 @@ const FallbackFavicon: React.FC = ({ hostname, alt }) => { // Generate all possible favicon URLs const faviconUrls = [ - `https://favicon.splitbee.io/?url=${hostname}`, - `https://${hostname}/favicon.ico`, `https://icon.horse/icon/${hostname}`, - `https://favicon.cccyun.cc/${hostname}`, + `https://favicon.splitbee.io/?url=${hostname}`, `https://favicon.im/${hostname}`, - `https://www.google.com/s2/favicons?domain=${hostname}` + `https://${hostname}/favicon.ico` ] + // 过滤掉最近已失败的URL + const validFaviconUrls = faviconUrls.filter((url) => !isUrlFailedRecently(url)) + + // 如果所有URL都被缓存为失败,使用第一个URL + if (validFaviconUrls.length === 0) { + setFaviconState({ status: 'loaded', src: faviconUrls[0] }) + return + } + // Main controller to abort all requests when needed const controller = new AbortController() const { signal } = controller // Create a promise for each favicon URL - const faviconPromises = faviconUrls.map((url) => + const faviconPromises = validFaviconUrls.map((url) => fetch(url, { method: 'HEAD', signal, @@ -45,6 +83,10 @@ const FallbackFavicon: React.FC = ({ hostname, alt }) => { if (response.ok) { return url } + // 记录4xx或5xx失败 + if (response.status >= 400) { + markUrlAsFailed(url) + } throw new Error(`Failed to fetch ${url}`) }) .catch((error) => { @@ -89,6 +131,10 @@ const FallbackFavicon: React.FC = ({ hostname, alt }) => { }, [hostname]) // Only depend on hostname const handleError = () => { + if (faviconState.status === 'loaded') { + // 记录图片加载失败的URL + markUrlAsFailed(faviconState.src) + } setFaviconState({ status: 'failed' }) } diff --git a/src/renderer/src/config/models.ts b/src/renderer/src/config/models.ts index 1d405cc7..690c791c 100644 --- a/src/renderer/src/config/models.ts +++ b/src/renderer/src/config/models.ts @@ -133,6 +133,7 @@ import { getProviderByModel } from '@renderer/services/AssistantService' import { Assistant, Model } from '@renderer/types' import OpenAI from 'openai' +import { WEB_SEARCH_PROMPT_FOR_OPENROUTER } from './prompts' import { getWebSearchTools } from './tools' // Vision models @@ -2148,6 +2149,9 @@ export function isVisionModel(model: Model): boolean { export function isOpenAIoSeries(model: Model): boolean { return ['o1', 'o1-2024-12-17'].includes(model.id) || model.id.includes('o3') } +export function isOpenAIWebSearch(model: Model): boolean { + return model.id.includes('gpt-4o-search-preview') || model.id.includes('gpt-4o-mini-search-preview') +} export function isSupportedResoningEffortModel(model?: Model): boolean { if (!model) { @@ -2212,7 +2216,7 @@ export function isWebSearchModel(model: Model): boolean { } if (provider?.type === 'openai') { - if (GEMINI_SEARCH_MODELS.includes(model?.id)) { + if (GEMINI_SEARCH_MODELS.includes(model?.id) || isOpenAIWebSearch(model)) { return true } } @@ -2270,7 +2274,7 @@ export function getOpenAIWebSearchParams(assistant: Assistant, model: Model): Re const webSearchTools = getWebSearchTools(model) if (model.provider === 'hunyuan') { - return { enable_enhancement: true } + return { enable_enhancement: true, citation: true, search_info: true } } if (model.provider === 'dashscope') { @@ -2284,10 +2288,14 @@ export function getOpenAIWebSearchParams(assistant: Assistant, model: Model): Re if (model.provider === 'openrouter') { return { - plugins: [{ id: 'web' }] + plugins: [{ id: 'web', search_prompts: WEB_SEARCH_PROMPT_FOR_OPENROUTER }] } } + if (isOpenAIWebSearch(model)) { + return {} + } + return { tools: webSearchTools } @@ -2308,3 +2316,23 @@ export function isGemmaModel(model?: Model): boolean { return model.id.includes('gemma-') || model.group === 'Gemma' } + +export function isZhipuModel(model?: Model): boolean { + if (!model) { + return false + } + + return model.provider === 'zhipu' +} + +export function isHunyuanSearchModel(model?: Model): boolean { + if (!model) { + return false + } + + if (model.provider === 'hunyuan') { + return model.id !== 'hunyuan-lite' + } + + return false +} diff --git a/src/renderer/src/config/prompts.ts b/src/renderer/src/config/prompts.ts index 41405ccd..fc24387a 100644 --- a/src/renderer/src/config/prompts.ts +++ b/src/renderer/src/config/prompts.ts @@ -1,3 +1,5 @@ +import dayjs from 'dayjs' + export const AGENT_PROMPT = ` You are a Prompt Generator. You will integrate user input information into a structured Prompt using Markdown syntax. Please do not use code blocks for output, display directly! @@ -109,3 +111,20 @@ export const FOOTNOTE_PROMPT = `Please answer the question based on the referenc {references} ` + +export const WEB_SEARCH_PROMPT_FOR_ZHIPU = ` +# 以下是来自互联网的信息: +{search_result} + +# 当前日期: ${dayjs().format('YYYY-MM-DD')} +# 要求: +根据最新发布的信息回答用户问题,当回答引用了参考信息时,必须在句末使用对应的[ref_序号](url)的markdown链接形式来标明参考信息来源。 +` +export const WEB_SEARCH_PROMPT_FOR_OPENROUTER = ` +A web search was conducted on \`${dayjs().format('YYYY-MM-DD')}\`. Incorporate the following web search results into your response. + +IMPORTANT: Cite them using markdown links named using the domain of the source. +Example: [nytimes.com](https://nytimes.com/some-page). +If have multiple citations, please directly list them like this: +[www.nytimes.com](https://nytimes.com/some-page)[www.bbc.com](https://bbc.com/some-page) +` diff --git a/src/renderer/src/config/tools.ts b/src/renderer/src/config/tools.ts index b7769d9c..12ff6676 100644 --- a/src/renderer/src/config/tools.ts +++ b/src/renderer/src/config/tools.ts @@ -1,12 +1,17 @@ import { Model } from '@renderer/types' import { ChatCompletionTool } from 'openai/resources' +import { WEB_SEARCH_PROMPT_FOR_ZHIPU } from './prompts' + export function getWebSearchTools(model: Model): ChatCompletionTool[] { if (model?.provider === 'zhipu') { if (model.id === 'glm-4-alltools') { return [ { - type: 'web_browser' + type: 'web_browser', + web_browser: { + browser: 'auto' + } } as unknown as ChatCompletionTool ] } @@ -15,7 +20,8 @@ export function getWebSearchTools(model: Model): ChatCompletionTool[] { type: 'web_search', web_search: { enable: true, - search_result: true + search_result: true, + search_prompt: WEB_SEARCH_PROMPT_FOR_ZHIPU } } as unknown as ChatCompletionTool ] diff --git a/src/renderer/src/pages/home/Markdown/Markdown.tsx b/src/renderer/src/pages/home/Markdown/Markdown.tsx index ecf70dc0..9f33760b 100644 --- a/src/renderer/src/pages/home/Markdown/Markdown.tsx +++ b/src/renderer/src/pages/home/Markdown/Markdown.tsx @@ -27,19 +27,11 @@ const ALLOWED_ELEMENTS = interface Props { message: Message - citationsData?: Map< - string, - { - url: string - title?: string - content?: string - } - > } const remarkPlugins = [remarkMath, remarkGfm, remarkCjkFriendly] const disallowedElements = ['iframe'] -const Markdown: FC = ({ message, citationsData }) => { +const Markdown: FC = ({ message }) => { const { t } = useTranslation() const { renderInputMessageAsMarkdown, mathEngine } = useSettings() @@ -60,8 +52,34 @@ const Markdown: FC = ({ message, citationsData }) => { const components = useMemo(() => { const baseComponents = { a: (props: any) => { - if (props.href && citationsData?.has(props.href)) { - return + // 更彻底的查找方法,递归搜索所有子元素 + const findCitationInChildren = (children) => { + if (!children) return null + + // 直接搜索子元素 + for (const child of Array.isArray(children) ? children : [children]) { + if (typeof child === 'object' && child?.props?.['data-citation']) { + return child.props['data-citation'] + } + + // 递归查找更深层次 + if (typeof child === 'object' && child?.props?.children) { + const found = findCitationInChildren(child.props.children) + if (found) return found + } + } + + return null + } + + // 然后在组件中使用 + const citationData = findCitationInChildren(props.children) + if (citationData) { + try { + return + } catch (e) { + console.error('Failed to parse citation data', e) + } } return }, @@ -70,7 +88,7 @@ const Markdown: FC = ({ message, citationsData }) => { pre: (props: any) =>
     } as Partial
     return baseComponents
-  }, [citationsData])
+  }, [messageContent])
 
   if (message.role === 'user' && !renderInputMessageAsMarkdown) {
     return 

{messageContent}

diff --git a/src/renderer/src/pages/home/Messages/CitationsList.tsx b/src/renderer/src/pages/home/Messages/CitationsList.tsx new file mode 100644 index 00000000..90926220 --- /dev/null +++ b/src/renderer/src/pages/home/Messages/CitationsList.tsx @@ -0,0 +1,81 @@ +import { InfoCircleOutlined } from '@ant-design/icons' +import Favicon from '@renderer/components/Icons/FallbackFavicon' +import { HStack } from '@renderer/components/Layout' +import React from 'react' +import { useTranslation } from 'react-i18next' +import styled from 'styled-components' + +interface Citation { + number: number + url: string + title?: string + hostname?: string + showFavicon?: boolean +} + +interface CitationsListProps { + citations: Citation[] +} + +const CitationsList: React.FC = ({ citations }) => { + const { t } = useTranslation() + + if (!citations || citations.length === 0) return null + + return ( + + + {t('message.citations')} + + + {citations.map((citation) => ( + + {citation.number}. + {citation.showFavicon && citation.url && ( + + )} + + {citation.title ? citation.title : {citation.hostname}} + + + ))} + + ) +} + +const CitationsContainer = styled.div` + background-color: rgb(242, 247, 253); + border-radius: 4px; + padding: 8px 12px; + margin: 12px 0; + display: flex; + flex-direction: column; + gap: 4px; + + body[theme-mode='dark'] & { + background-color: rgba(255, 255, 255, 0.05); + } +` + +const CitationsTitle = styled.div` + font-weight: 500; + margin-bottom: 4px; + color: var(--color-text-1); +` + +const CitationLink = styled.a` + font-size: 14px; + line-height: 1.6; + text-decoration: none; + color: var(--color-text-1); + + .hostname { + color: var(--color-link); + } + + &:hover { + text-decoration: underline; + } +` + +export default CitationsList diff --git a/src/renderer/src/pages/home/Messages/MessageContent.tsx b/src/renderer/src/pages/home/Messages/MessageContent.tsx index eea78503..a0fadbb6 100644 --- a/src/renderer/src/pages/home/Messages/MessageContent.tsx +++ b/src/renderer/src/pages/home/Messages/MessageContent.tsx @@ -1,6 +1,5 @@ -import { InfoCircleOutlined, SearchOutlined, SyncOutlined, TranslationOutlined } from '@ant-design/icons' -import Favicon from '@renderer/components/Icons/FallbackFavicon' -import { HStack } from '@renderer/components/Layout' +import { SearchOutlined, SyncOutlined, TranslationOutlined } from '@ant-design/icons' +import { isOpenAIWebSearch } from '@renderer/config/models' import { getModelUniqId } from '@renderer/services/ModelService' import { Message, Model } from '@renderer/types' import { getBriefInfo } from '@renderer/utils' @@ -14,10 +13,10 @@ import BeatLoader from 'react-spinners/BeatLoader' import styled from 'styled-components' import Markdown from '../Markdown/Markdown' +import CitationsList from './CitationsList' import MessageAttachments from './MessageAttachments' import MessageError from './MessageError' import MessageImage from './MessageImage' -import MessageSearchResults from './MessageSearchResults' import MessageThought from './MessageThought' import MessageTools from './MessageTools' @@ -29,6 +28,7 @@ interface Props { const MessageContent: React.FC = ({ message: _message, model }) => { const { t } = useTranslation() const message = withMessageThought(clone(_message)) + const isWebCitation = model && (isOpenAIWebSearch(model) || model.provider === 'openrouter') // HTML实体编码辅助函数 const encodeHTML = (str: string) => { @@ -44,39 +44,95 @@ const MessageContent: React.FC = ({ message: _message, model }) => { }) } + // Format citations for display + const formattedCitations = useMemo(() => { + if (!message.metadata?.citations?.length && !message.metadata?.annotations?.length) return null + + let citations: any[] = [] + + if (model && isOpenAIWebSearch(model)) { + citations = + message.metadata.annotations?.map((url, index) => { + return { number: index + 1, url: url.url_citation?.url, hostname: url.url_citation.title } + }) || [] + } else { + citations = + message.metadata?.citations?.map((url, index) => { + try { + const hostname = new URL(url).hostname + return { number: index + 1, url, hostname } + } catch { + return { number: index + 1, url, hostname: url } + } + }) || [] + } + + // Deduplicate by URL + const urlSet = new Set() + return citations + .filter((citation) => { + if (!citation.url || urlSet.has(citation.url)) return false + urlSet.add(citation.url) + return true + }) + .map((citation, index) => ({ + ...citation, + number: index + 1 // Renumber citations sequentially after deduplication + })) + }, [message.metadata?.citations, message.metadata?.annotations, model]) + // 获取引用数据 const citationsData = useMemo(() => { - const searchResults = message?.metadata?.webSearch?.results || [] - const citationsUrls = message?.metadata?.citations || [] + const searchResults = + message?.metadata?.webSearch?.results || + message?.metadata?.webSearchInfo || + message?.metadata?.groundingMetadata?.groundingChunks.map((chunk) => chunk.web) || + message?.metadata?.annotations?.map((annotation) => annotation.url_citation) || + [] + const citationsUrls = formattedCitations || [] // 合并引用数据 const data = new Map() // 添加webSearch结果 searchResults.forEach((result) => { - data.set(result.url, { - url: result.url, - title: result.title, + data.set(result.url || result.uri || result.link, { + url: result.url || result.uri || result.link, + title: result.title || result.hostname, content: result.content }) }) // 添加citations - citationsUrls.forEach((url) => { - if (!data.has(url)) { - data.set(url, { - url: url - // 如果没有title和content,将在CitationTooltip中显示hostname + citationsUrls.forEach((result) => { + if (!data.has(result.url)) { + data.set(result.url, { + url: result.url, + title: result.title || result.hostname || undefined, + content: result.content || undefined }) } }) return data - }, [message.metadata?.citations, message.metadata?.webSearch?.results]) + }, [ + formattedCitations, + message?.metadata?.annotations, + message?.metadata?.groundingMetadata?.groundingChunks, + message?.metadata?.webSearch?.results, + message?.metadata?.webSearchInfo + ]) // Process content to make citation numbers clickable const processedContent = useMemo(() => { - if (!(message.metadata?.citations || message.metadata?.webSearch)) { + if ( + !( + message.metadata?.citations || + message.metadata?.webSearch || + message.metadata?.webSearchInfo || + message.metadata?.annotations + ) + ) { return message.content } @@ -88,33 +144,32 @@ const MessageContent: React.FC = ({ message: _message, model }) => { // Convert [n] format to superscript numbers and make them clickable // Use tag for superscript and make it a link with citation data - content = content.replace(/\[\[(\d+)\]\]|\[(\d+)\]/g, (match, num1, num2) => { - const num = num1 || num2 - const index = parseInt(num) - 1 - if (index >= 0 && index < citations.length) { - const link = citations[index] - const citationData = link ? encodeHTML(JSON.stringify(citationsData.get(link) || { url: link })) : null - return link ? `[${num}](${link})` : `${num}` - } - return match - }) - + if (message.metadata?.webSearch) { + content = content.replace(/\[\[(\d+)\]\]|\[(\d+)\]/g, (match, num1, num2) => { + const num = num1 || num2 + const index = parseInt(num) - 1 + if (index >= 0 && index < citations.length) { + const link = citations[index] + const citationData = link ? encodeHTML(JSON.stringify(citationsData.get(link) || { url: link })) : null + return link ? `[${num}](${link})` : `${num}` + } + return match + }) + } else { + content = content.replace(/\[(\d+)<\/sup>\]\(([^)]+)\)/g, (_, num, url) => { + const citationData = url ? encodeHTML(JSON.stringify(citationsData.get(url) || { url })) : null + return `[${num}](${url})` + }) + } return content - }, [message.content, message.metadata, citationsData]) - - // Format citations for display - const formattedCitations = useMemo(() => { - if (!message.metadata?.citations?.length) return null - - return message.metadata.citations.map((url, index) => { - try { - const hostname = new URL(url).hostname - return { number: index + 1, url, hostname } - } catch { - return { number: index + 1, url, hostname: url } - } - }) - }, [message.metadata?.citations]) + }, [ + message.metadata?.citations, + message.metadata?.webSearch, + message.metadata?.webSearchInfo, + message.metadata?.annotations, + message.content, + citationsData + ]) if (message.status === 'sending') { return ( @@ -150,7 +205,7 @@ const MessageContent: React.FC = ({ message: _message, model }) => { - + {message.metadata?.generateImage && } {message.translatedContent && ( @@ -164,36 +219,54 @@ const MessageContent: React.FC = ({ message: _message, model }) => { )} )} - + {message?.metadata?.groundingMetadata && message.status == 'success' && ( + <> + ({ + number: index + 1, + url: chunk.web?.uri, + title: chunk.web?.title, + showFavicon: false + }))} + /> + + + )} {formattedCitations && ( - - - {t('message.citations')} - - - {formattedCitations.map(({ number, url, hostname }) => ( - - {number}. {hostname} - - ))} - + ({ + number: citation.number, + url: citation.url, + hostname: citation.hostname, + showFavicon: isWebCitation + }))} + /> )} {message?.metadata?.webSearch && message.status === 'success' && ( - - - {t('message.citations')} - - - {message.metadata.webSearch.results.map((result, index) => ( - - {index + 1}. - - - {result.title} - - - ))} - + ({ + number: index + 1, + url: result.url, + title: result.title, + showFavicon: true + }))} + /> + )} + {message?.metadata?.webSearchInfo && message.status === 'success' && ( + ({ + number: index + 1, + url: result.link || result.url, + title: result.title, + showFavicon: true + }))} + /> )} @@ -224,41 +297,6 @@ const MentionTag = styled.span` color: var(--color-link); ` -const CitationsContainer = styled.div` - background-color: rgb(242, 247, 253); - border-radius: 4px; - padding: 8px 12px; - margin: 12px 0; - display: flex; - flex-direction: column; - gap: 4px; - - body[theme-mode='dark'] & { - background-color: rgba(255, 255, 255, 0.05); - } -` - -const CitationsTitle = styled.div` - font-weight: 500; - margin-bottom: 4px; - color: var(--color-text-1); -` - -const CitationLink = styled.a` - font-size: 14px; - line-height: 1.6; - text-decoration: none; - color: var(--color-text-1); - - .hostname { - color: var(--color-link); - } - - &:hover { - text-decoration: underline; - } -` - const SearchingText = styled.div` font-size: 14px; line-height: 1.6; @@ -266,4 +304,8 @@ const SearchingText = styled.div` color: var(--color-text-1); ` +const SearchEntryPoint = styled.div` + margin: 10px 2px; +` + export default React.memo(MessageContent) diff --git a/src/renderer/src/pages/home/Messages/MessageSearchResults.tsx b/src/renderer/src/pages/home/Messages/MessageSearchResults.tsx deleted file mode 100644 index a16caca7..00000000 --- a/src/renderer/src/pages/home/Messages/MessageSearchResults.tsx +++ /dev/null @@ -1,95 +0,0 @@ -import { InfoCircleOutlined } from '@ant-design/icons' -import { Message } from '@renderer/types' -import { FC } from 'react' -import { useTranslation } from 'react-i18next' -import styled from 'styled-components' - -interface Props { - message: Message -} - -const MessageSearchResults: FC = ({ message }) => { - const { t } = useTranslation() - - if (!message.metadata?.groundingMetadata) { - return null - } - - const { groundingChunks, searchEntryPoint } = message.metadata.groundingMetadata - - if (!groundingChunks) { - return null - } - - let searchEntryContent = searchEntryPoint?.renderedContent - - searchEntryContent = searchEntryContent?.replace( - /@media \(prefers-color-scheme: light\)/g, - 'body[theme-mode="light"]' - ) - - searchEntryContent = searchEntryContent?.replace(/@media \(prefers-color-scheme: dark\)/g, 'body[theme-mode="dark"]') - - return ( - <> - - - {t('common.footnotes')} - - - - {groundingChunks.map((chunk, index) => ( - - - {chunk.web?.title} - - - ))} - - - - - ) -} - -const Container = styled.div` - padding: 16px; - border-radius: 8px; - margin-bottom: 0; -` - -const TitleRow = styled.div` - display: flex; - flex-direction: row; - align-items: center; - gap: 5px; - margin-bottom: 10px; -` - -const Title = styled.h4` - margin: 0 !important; -` - -const Sources = styled.ol` - margin-top: 10px; -` - -const SourceItem = styled.li` - margin-bottom: 5px; -` - -const Link = styled.a` - margin-left: 5px; - color: var(--color-primary); - text-decoration: none; - - &:hover { - text-decoration: underline; - } -` - -const SearchEntryPoint = styled.div` - margin: 10px 2px; -` - -export default MessageSearchResults diff --git a/src/renderer/src/providers/AiProvider/BaseProvider.ts b/src/renderer/src/providers/AiProvider/BaseProvider.ts index e9004789..6f7dc9f0 100644 --- a/src/renderer/src/providers/AiProvider/BaseProvider.ts +++ b/src/renderer/src/providers/AiProvider/BaseProvider.ts @@ -9,12 +9,12 @@ import type { Message, Model, Provider, - Suggestion + Suggestion, + WebSearchResponse } from '@renderer/types' import { delay, isJSON, parseJSON } from '@renderer/utils' import { addAbortController, removeAbortController } from '@renderer/utils/abortController' import { formatApiHost } from '@renderer/utils/api' -import { TavilySearchResponse } from '@tavily/core' import { t } from 'i18next' import { isEmpty } from 'lodash' import type OpenAI from 'openai' @@ -123,7 +123,7 @@ export default abstract class BaseProvider { if (isEmpty(message.content)) { return [] } - const webSearch: TavilySearchResponse = window.keyv.get(`web-search-${message.id}`) + const webSearch: WebSearchResponse = window.keyv.get(`web-search-${message.id}`) if (webSearch) { return webSearch.results.map( diff --git a/src/renderer/src/providers/AiProvider/OpenAIProvider.ts b/src/renderer/src/providers/AiProvider/OpenAIProvider.ts index 787f26c4..97f639b1 100644 --- a/src/renderer/src/providers/AiProvider/OpenAIProvider.ts +++ b/src/renderer/src/providers/AiProvider/OpenAIProvider.ts @@ -1,10 +1,13 @@ import { DEFAULT_MAX_TOKENS } from '@renderer/config/constant' import { getOpenAIWebSearchParams, + isHunyuanSearchModel, isOpenAIoSeries, + isOpenAIWebSearch, isReasoningModel, isSupportedModel, - isVisionModel + isVisionModel, + isZhipuModel } from '@renderer/config/models' import { getStoreSetting } from '@renderer/hooks/useSettings' import i18n from '@renderer/i18n' @@ -185,7 +188,7 @@ export default class OpenAIProvider extends BaseProvider { * @returns The temperature */ private getTemperature(assistant: Assistant, model: Model) { - return isReasoningModel(model) ? undefined : assistant?.settings?.temperature + return isReasoningModel(model) || isOpenAIWebSearch(model) ? undefined : assistant?.settings?.temperature } /** @@ -222,7 +225,7 @@ export default class OpenAIProvider extends BaseProvider { * @returns The top P */ private getTopP(assistant: Assistant, model: Model) { - if (isReasoningModel(model)) return undefined + if (isReasoningModel(model) || isOpenAIWebSearch(model)) return undefined return assistant?.settings?.topP } @@ -433,6 +436,7 @@ export default class OpenAIProvider extends BaseProvider { ) as ChatCompletionMessageParam[] const toolResponses: MCPToolResponse[] = [] + let firstChunk = true const processStream = async (stream: any, idx: number) => { if (!isSupportStreamOutput()) { const time_completion_millsec = new Date().getTime() - start_time_millsec @@ -498,6 +502,15 @@ export default class OpenAIProvider extends BaseProvider { } } + let webSearch: any[] | undefined = undefined + if (assistant.enableWebSearch && isZhipuModel(model) && finishReason === 'stop') { + webSearch = chunk?.web_search + } + if (firstChunk && assistant.enableWebSearch && isHunyuanSearchModel(model)) { + webSearch = chunk?.search_info?.search_results + firstChunk = true + } + if (finishReason === 'tool_calls' || (finishReason === 'stop' && Object.keys(final_tool_calls).length > 0)) { const toolCalls = Object.values(final_tool_calls).map(this.cleanToolCallArgs) console.log('start invoke tools', toolCalls) @@ -603,6 +616,8 @@ export default class OpenAIProvider extends BaseProvider { time_first_token_millsec, time_thinking_millsec }, + webSearch, + annotations: delta?.annotations, citations, mcpToolResponse: toolResponses }) diff --git a/src/renderer/src/providers/AiProvider/index.ts b/src/renderer/src/providers/AiProvider/index.ts index e736fca7..5a377e30 100644 --- a/src/renderer/src/providers/AiProvider/index.ts +++ b/src/renderer/src/providers/AiProvider/index.ts @@ -20,7 +20,13 @@ export interface ChunkCallbackData { reasoning_content?: string usage?: OpenAI.Completions.CompletionUsage metrics?: Metrics + // Zhipu web search + webSearch?: any[] + // Gemini web search search?: GroundingMetadata + // Openai web search + annotations?: OpenAI.Chat.Completions.ChatCompletionMessage.Annotation[] + // Openrouter web search or Knowledge base citations?: string[] mcpToolResponse?: MCPToolResponse[] generateImage?: GenerateImageResponse @@ -34,7 +40,9 @@ export interface CompletionsParams { reasoning_content, usage, metrics, + webSearch, search, + annotations, citations, mcpToolResponse, generateImage diff --git a/src/renderer/src/services/ApiService.ts b/src/renderer/src/services/ApiService.ts index 1fbe61c0..d2bceb79 100644 --- a/src/renderer/src/services/ApiService.ts +++ b/src/renderer/src/services/ApiService.ts @@ -1,4 +1,9 @@ -import { getOpenAIWebSearchParams } from '@renderer/config/models' +import { + getOpenAIWebSearchParams, + isHunyuanSearchModel, + isOpenAIWebSearch, + isZhipuModel +} from '@renderer/config/models' import { SEARCH_SUMMARY_PROMPT } from '@renderer/config/prompts' import i18n from '@renderer/i18n' import store from '@renderer/store' @@ -6,6 +11,15 @@ import { setGenerating } from '@renderer/store/runtime' import { Assistant, MCPTool, Message, Model, Provider, Suggestion } from '@renderer/types' import { formatMessageError, isAbortError } from '@renderer/utils/error' import { withGenerateImage } from '@renderer/utils/formats' +import { + cleanLinkCommas, + completeLinks, + convertLinks, + convertLinksToHunyuan, + convertLinksToOpenRouter, + convertLinksToZhipu, + extractUrlsFromMarkdown +} from '@renderer/utils/linkConverter' import { cloneDeep, findLast, isEmpty } from 'lodash' import AiProvider from '../providers/AiProvider' @@ -46,7 +60,7 @@ export async function fetchChatCompletion({ if (WebSearchService.isWebSearchEnabled() && assistant.enableWebSearch && assistant.model) { const webSearchParams = getOpenAIWebSearchParams(assistant, assistant.model) - if (isEmpty(webSearchParams)) { + if (isEmpty(webSearchParams) && !isOpenAIWebSearch(assistant.model)) { const lastMessage = findLast(messages, (m) => m.role === 'user') const lastAnswer = findLast(messages, (m) => m.role === 'assistant') const hasKnowledgeBase = !isEmpty(lastMessage?.knowledgeBaseIds) @@ -115,7 +129,34 @@ export async function fetchChatCompletion({ messages: filterUsefulMessages(filterContextMessages(messages)), assistant, onFilterMessages: (messages) => (_messages = messages), - onChunk: ({ text, reasoning_content, usage, metrics, search, citations, mcpToolResponse, generateImage }) => { + onChunk: ({ + text, + reasoning_content, + usage, + metrics, + webSearch, + search, + annotations, + citations, + mcpToolResponse, + generateImage + }) => { + if (assistant.model) { + if (isOpenAIWebSearch(assistant.model)) { + text = convertLinks(text || '', isFirstChunk) + } else if (assistant.model.provider === 'openrouter' && assistant.enableWebSearch) { + text = convertLinksToOpenRouter(text || '', isFirstChunk) + } else if (assistant.enableWebSearch) { + if (isZhipuModel(assistant.model)) { + text = convertLinksToZhipu(text || '', isFirstChunk) + } else if (isHunyuanSearchModel(assistant.model)) { + text = convertLinksToHunyuan(text || '', webSearch || [], isFirstChunk) + } + } + } + if (isFirstChunk) { + isFirstChunk = false + } message.content = message.content + text || '' message.usage = usage message.metrics = metrics @@ -124,10 +165,6 @@ export async function fetchChatCompletion({ message.reasoning_content = (message.reasoning_content || '') + reasoning_content } - if (search) { - message.metadata = { ...message.metadata, groundingMetadata: search } - } - if (mcpToolResponse) { message.metadata = { ...message.metadata, mcpTools: cloneDeep(mcpToolResponse) } } @@ -143,12 +180,49 @@ export async function fetchChatCompletion({ } // Handle citations from Perplexity API - if (isFirstChunk && citations) { + if (citations) { message.metadata = { ...message.metadata, citations } - isFirstChunk = false + } + + // Handle web search from Gemini + if (search) { + message.metadata = { ...message.metadata, groundingMetadata: search } + } + + // Handle annotations from OpenAI + if (annotations) { + message.metadata = { + ...message.metadata, + annotations: annotations + } + } + + // Handle web search from Zhipu or Hunyuan + if (webSearch) { + message.metadata = { + ...message.metadata, + webSearchInfo: webSearch + } + } + + // Handle citations from Openrouter + if (assistant.model?.provider === 'openrouter' && assistant.enableWebSearch) { + const extractedUrls = extractUrlsFromMarkdown(message.content) + if (extractedUrls.length > 0) { + message.metadata = { + ...message.metadata, + citations: extractedUrls + } + } + } + if (assistant.enableWebSearch) { + message.content = cleanLinkCommas(message.content) + if (webSearch && isZhipuModel(assistant.model)) { + message.content = completeLinks(message.content, webSearch) + } } onResponse({ ...message, status: 'pending' }) diff --git a/src/renderer/src/types/index.ts b/src/renderer/src/types/index.ts index 8ce32963..ca5fbafc 100644 --- a/src/renderer/src/types/index.ts +++ b/src/renderer/src/types/index.ts @@ -73,8 +73,12 @@ export type Message = { metadata?: { // Gemini groundingMetadata?: any - // Perplexity + // Perplexity Or Openrouter citations?: string[] + // OpenAI + annotations?: OpenAI.Chat.Completions.ChatCompletionMessage.Annotation[] + // Zhipu or Hunyuan + webSearchInfo?: any[] // Web search webSearch?: WebSearchResponse // MCP Tools diff --git a/src/renderer/src/utils/linkConverter.ts b/src/renderer/src/utils/linkConverter.ts new file mode 100644 index 00000000..50b3d5d0 --- /dev/null +++ b/src/renderer/src/utils/linkConverter.ts @@ -0,0 +1,389 @@ +// Counter for numbering links +let linkCounter = 1 +// Buffer to hold incomplete link fragments across chunks +let buffer = '' +// Map to track URLs that have already been assigned numbers +let urlToCounterMap: Map = new Map() + +/** + * Determines if a string looks like a host/URL + * @param text The text to check + * @returns Boolean indicating if the text is likely a host + */ +function isHost(text: string): boolean { + // Basic check for URL-like patterns + return /^(https?:\/\/)?[\w.-]+\.[a-z]{2,}(\/.*)?$/i.test(text) || /^[\w.-]+\.[a-z]{2,}(\/.*)?$/i.test(text) +} + +/** + * Converts Markdown links in the text to numbered links based on the rules:s + * [ref_N] -> [N] + * @param text The current chunk of text to process + * @param resetCounter Whether to reset the counter and buffer + * @returns Processed text with complete links converted + */ +export function convertLinksToZhipu(text: string, resetCounter = false): string { + if (resetCounter) { + linkCounter = 1 + buffer = '' + } + + // Append the new text to the buffer + buffer += text + let safePoint = buffer.length + + // Check from the end for potentially incomplete [ref_N] patterns + for (let i = buffer.length - 1; i >= 0; i--) { + if (buffer[i] === '[') { + const substring = buffer.substring(i) + // Check if it's a complete [ref_N] pattern + const match = /^\[ref_\d+\]/.exec(substring) + + if (!match) { + // Potentially incomplete [ref_N] pattern + safePoint = i + break + } + } + } + + // Process the safe part of the buffer + const safeBuffer = buffer.substring(0, safePoint) + buffer = buffer.substring(safePoint) + + // Replace all complete [ref_N] patterns + return safeBuffer.replace(/\[ref_(\d+)\]/g, (_, num) => { + return `[${num}]()` + }) +} + +export function convertLinksToHunyuan(text: string, webSearch: any[], resetCounter = false): string { + if (resetCounter) { + linkCounter = 1 + buffer = '' + } + + buffer += text + let safePoint = buffer.length + + // Check from the end for potentially incomplete patterns + for (let i = buffer.length - 1; i >= 0; i--) { + if (buffer[i] === '[') { + const substring = buffer.substring(i) + // Check if it's a complete pattern - handles both [N](@ref) and [N,M,...](@ref) + const match = /^\[[\d,\s]+\]\(@ref\)/.exec(substring) + + if (!match) { + // Potentially incomplete pattern + safePoint = i + break + } + } + } + + // Process the safe part of the buffer + const safeBuffer = buffer.substring(0, safePoint) + buffer = buffer.substring(safePoint) + + // Replace all complete patterns + return safeBuffer.replace(/\[([\d,\s]+)\]\(@ref\)/g, (_, numbers) => { + // Split the numbers string into individual numbers + const numArray = numbers + .split(',') + .map((num) => parseInt(num.trim())) + .filter((num) => !isNaN(num)) + + // Generate separate superscript links for each number + const links = numArray.map((num) => { + const index = num - 1 + // Check if the index is valid in webSearch array + if (index >= 0 && index < webSearch.length && webSearch[index]?.url) { + return `[${num}](${webSearch[index].url})` + } + // If no matching URL found, keep the original reference format for this number + return `[${num}](@ref)` + }) + + // Join the separate links with spaces + return links.join('') + }) +} + +/** + * Converts Markdown links in the text to numbered links based on the rules: + * 1. ([host](url)) -> [cnt](url) + * 2. [host](url) -> [cnt](url) + * 3. [anytext except host](url) -> anytext[cnt](url) + * + * @param text The current chunk of text to process + * @param resetCounter Whether to reset the counter and buffer + * @param isZhipu Whether to use Zhipu format + * @returns Processed text with complete links converted + */ +export function convertLinks(text: string, resetCounter = false, isZhipu = false): string { + if (resetCounter) { + linkCounter = 1 + buffer = '' + urlToCounterMap = new Map() + } + + // Append the new text to the buffer + buffer += text + + // Find the safe point - the position after which we might have incomplete patterns + let safePoint = buffer.length + if (isZhipu) { + // Handle Zhipu mode - find safe point for [ref_N] patterns + let safePoint = buffer.length + + // Check from the end for potentially incomplete [ref_N] patterns + for (let i = buffer.length - 1; i >= 0; i--) { + if (buffer[i] === '[') { + const substring = buffer.substring(i) + // Check if it's a complete [ref_N] pattern + const match = /^\[ref_\d+\]/.exec(substring) + + if (!match) { + // Potentially incomplete [ref_N] pattern + safePoint = i + break + } + } + } + + // Process the safe part of the buffer + const safeBuffer = buffer.substring(0, safePoint) + buffer = buffer.substring(safePoint) + + // Replace all complete [ref_N] patterns + return safeBuffer.replace(/\[ref_(\d+)\]/g, (_, num) => { + return `[${num}]()` + }) + } + + // Check for potentially incomplete patterns from the end + for (let i = buffer.length - 1; i >= 0; i--) { + if (buffer[i] === '(') { + // Check if this could be the start of a parenthesized link + if (i + 1 < buffer.length && buffer[i + 1] === '[') { + // Verify if we have a complete parenthesized link + const substring = buffer.substring(i) + const match = /^\(\[([^\]]+)\]\(([^)]+)\)\)/.exec(substring) + + if (!match) { + safePoint = i + break + } + } + } else if (buffer[i] === '[') { + // Check if this could be the start of a regular link + const substring = buffer.substring(i) + const match = /^\[([^\]]+)\]\(([^)]+)\)/.exec(substring) + + if (!match) { + safePoint = i + break + } + } + } + + // Extract the part of the buffer that we can safely process + const safeBuffer = buffer.substring(0, safePoint) + buffer = buffer.substring(safePoint) + + // Process the safe buffer to handle complete links + let result = '' + let position = 0 + + while (position < safeBuffer.length) { + // Check for parenthesized link pattern: ([text](url)) + if (position + 1 < safeBuffer.length && safeBuffer[position] === '(' && safeBuffer[position + 1] === '[') { + const substring = safeBuffer.substring(position) + const match = /^\(\[([^\]]+)\]\(([^)]+)\)\)/.exec(substring) + + if (match) { + // Found complete parenthesized link + const url = match[2] + + // Check if this URL has been seen before + let counter: number + if (urlToCounterMap.has(url)) { + counter = urlToCounterMap.get(url)! + } else { + counter = linkCounter++ + urlToCounterMap.set(url, counter) + } + + result += `[${counter}](${url})` + position += match[0].length + continue + } + } + + // Check for regular link pattern: [text](url) + if (safeBuffer[position] === '[') { + const substring = safeBuffer.substring(position) + const match = /^\[([^\]]+)\]\(([^)]+)\)/.exec(substring) + + if (match) { + // Found complete regular link + const linkText = match[1] + const url = match[2] + + // Check if this URL has been seen before + let counter: number + if (urlToCounterMap.has(url)) { + counter = urlToCounterMap.get(url)! + } else { + counter = linkCounter++ + urlToCounterMap.set(url, counter) + } + + if (isHost(linkText)) { + result += `[${counter}](${url})` + } else { + result += `${linkText}[${counter}](${url})` + } + + position += match[0].length + continue + } + } + + // If no pattern matches at this position, add the character and move on + result += safeBuffer[position] + position++ + } + + return result +} + +/** + * Converts Markdown links in the text to numbered links based on the rules: + * 1. [host](url) -> [cnt](url) + * + * @param text The current chunk of text to process + * @param resetCounter Whether to reset the counter and buffer + * @returns Processed text with complete links converted + */ +export function convertLinksToOpenRouter(text: string, resetCounter = false): string { + if (resetCounter) { + linkCounter = 1 + buffer = '' + urlToCounterMap = new Map() + } + + // Append the new text to the buffer + buffer += text + + // Find a safe point to process + let safePoint = buffer.length + + // Check for potentially incomplete link patterns from the end + for (let i = buffer.length - 1; i >= 0; i--) { + if (buffer[i] === '[') { + const substring = buffer.substring(i) + const match = /^\[([^\]]+)\]\(([^)]+)\)/.exec(substring) + + if (!match) { + safePoint = i + break + } + } + } + + // Extract the part of the buffer that we can safely process + const safeBuffer = buffer.substring(0, safePoint) + buffer = buffer.substring(safePoint) + + // Process the safe buffer to handle complete links + const result = safeBuffer.replace(/\[([^\]]+)\]\(([^)]+)\)/g, (match, text, url) => { + // Only convert link if the text looks like a host/URL + if (isHost(text)) { + // Check if this URL has been seen before + let counter: number + if (urlToCounterMap.has(url)) { + counter = urlToCounterMap.get(url)! + } else { + counter = linkCounter++ + urlToCounterMap.set(url, counter) + } + return `[${counter}](${url})` + } + // Keep original link format if the text doesn't look like a host + return match + }) + + return result +} + +/** + * 根据webSearch结果补全链接,将[num]()转换为[num](webSearch[num-1].url) + * @param text 原始文本 + * @param webSearch webSearch结果 + * @returns 补全后的文本 + */ +export function completeLinks(text: string, webSearch: any[]): string { + // 使用正则表达式匹配形如 [num]() 的链接 + return text.replace(/\[(\d+)<\/sup>\]\(\)/g, (match, num) => { + const index = parseInt(num) - 1 + // 检查 webSearch 数组中是否存在对应的 URL + if (index >= 0 && index < webSearch.length && webSearch[index]?.link) { + return `[${num}](${webSearch[index].link})` + } + // 如果没有找到对应的 URL,保持原样 + return match + }) +} + +/** + * 从Markdown文本中提取所有URL + * 支持以下格式: + * 1. [text](url) + * 2. [num](url) + * 3. ([text](url)) + * + * @param text Markdown格式的文本 + * @returns 提取到的URL数组,去重后的结果 + */ +export function extractUrlsFromMarkdown(text: string): string[] { + const urlSet = new Set() + + // 匹配所有Markdown链接格式 + const linkPattern = /\[(?:[^[\]]*)\]\(([^()]+)\)/g + let match + + while ((match = linkPattern.exec(text)) !== null) { + const url = match[1].trim() + if (isValidUrl(url)) { + urlSet.add(url) + } + } + + return Array.from(urlSet) +} + +/** + * 验证字符串是否是有效的URL + * @param url 要验证的URL字符串 + * @returns 是否是有效的URL + */ +function isValidUrl(url: string): boolean { + try { + new URL(url) + return true + } catch { + return false + } +} + +/** + * 清理 Markdown 链接之间的逗号 + * 例如: [text](url),[text](url) -> [text](url) [text](url) + * @param text 包含 Markdown 链接的文本 + * @returns 清理后的文本 + */ +export function cleanLinkCommas(text: string): string { + // 匹配两个 Markdown 链接之间的逗号(可能包含空格) + return text.replace(/\]\([^)]+\)\s*,\s*\[/g, ']()[') +} diff --git a/yarn.lock b/yarn.lock index 26a4980a..cbe03dd8 100644 --- a/yarn.lock +++ b/yarn.lock @@ -4003,7 +4003,7 @@ __metadata: mime: "npm:^4.0.4" npx-scope-finder: "npm:^1.2.0" officeparser: "npm:^4.1.1" - openai: "patch:openai@npm%3A4.77.3#~/.yarn/patches/openai-npm-4.77.3-59c6d42e7a.patch" + openai: "patch:openai@npm%3A4.87.3#~/.yarn/patches/openai-npm-4.87.3-2b30a7685f.patch" p-queue: "npm:^8.1.0" prettier: "npm:^3.5.3" proxy-agent: "npm:^6.5.0" @@ -12180,9 +12180,9 @@ __metadata: languageName: node linkType: hard -"openai@npm:4.77.3": - version: 4.77.3 - resolution: "openai@npm:4.77.3" +"openai@npm:4.87.3": + version: 4.87.3 + resolution: "openai@npm:4.87.3" dependencies: "@types/node": "npm:^18.11.18" "@types/node-fetch": "npm:^2.6.4" @@ -12192,13 +12192,16 @@ __metadata: formdata-node: "npm:^4.3.2" node-fetch: "npm:^2.6.7" peerDependencies: + ws: ^8.18.0 zod: ^3.23.8 peerDependenciesMeta: + ws: + optional: true zod: optional: true bin: openai: bin/cli - checksum: 10c0/b90a4071cc1a8257339e3001377396226422519d168ae3c05b5abc662bbac2009c5ccd37f0112c431b0ce45d83e616305ee264846ddb2f2129f186faf9b5a8cc + checksum: 10c0/e647456030f44b0c90cf35367676a7a2d8ed8a3cfa4bdd8785553519e1092699915e9a6a0c714b1f3ee59f6c116203422dc1d8f60ec2d7ba416dac0e343d0f62 languageName: node linkType: hard @@ -12227,9 +12230,9 @@ __metadata: languageName: node linkType: hard -"openai@patch:openai@npm%3A4.77.3#~/.yarn/patches/openai-npm-4.77.3-59c6d42e7a.patch": - version: 4.77.3 - resolution: "openai@patch:openai@npm%3A4.77.3#~/.yarn/patches/openai-npm-4.77.3-59c6d42e7a.patch::version=4.77.3&hash=c5d42a" +"openai@patch:openai@npm%3A4.87.3#~/.yarn/patches/openai-npm-4.87.3-2b30a7685f.patch": + version: 4.87.3 + resolution: "openai@patch:openai@npm%3A4.87.3#~/.yarn/patches/openai-npm-4.87.3-2b30a7685f.patch::version=4.87.3&hash=7dcff7" dependencies: "@types/node": "npm:^18.11.18" "@types/node-fetch": "npm:^2.6.4" @@ -12239,13 +12242,16 @@ __metadata: formdata-node: "npm:^4.3.2" node-fetch: "npm:^2.6.7" peerDependencies: + ws: ^8.18.0 zod: ^3.23.8 peerDependenciesMeta: + ws: + optional: true zod: optional: true bin: openai: bin/cli - checksum: 10c0/c3449d3d9945675d7debc4e3a68f58093400985e5275b29e4eb5610300ad3fa4589e527fda526ce770f9a945d7a1d03ffb33e34a3566f996a6947125aa761b1e + checksum: 10c0/e23ddf28487ab0fdd72fb3c429500986651f1204cba5e778e1aa02ba5b382a2a68de8ca81d717d8d0fdbea985f07b0476b2e4a86d57bf71bf1d65aa141d7d7de languageName: node linkType: hard