From 9438c8e6fffec8a0402fb24ff4bd6764958df08c Mon Sep 17 00:00:00 2001 From: Chen Tao <70054568+eeee0717@users.noreply.github.com> Date: Thu, 17 Apr 2025 13:11:43 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20LLM=E5=8F=AF=E4=BB=A5=E6=A0=B9=E6=8D=AE?= =?UTF-8?q?=E9=9C=80=E6=B1=82=E8=87=AA=E8=A1=8C=E9=80=89=E6=8B=A9=E4=BD=BF?= =?UTF-8?q?=E7=94=A8=E7=9F=A5=E8=AF=86=E5=BA=93=E6=88=96=E8=80=85=E7=BD=91?= =?UTF-8?q?=E7=BB=9C=E6=90=9C=E7=B4=A2=20(#4806)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- package.json | 2 +- src/renderer/src/config/prompts.ts | 148 +++++++++++---- src/renderer/src/i18n/locales/en-us.json | 6 +- src/renderer/src/i18n/locales/ja-jp.json | 6 +- src/renderer/src/i18n/locales/ru-ru.json | 12 +- src/renderer/src/i18n/locales/zh-cn.json | 6 +- src/renderer/src/i18n/locales/zh-tw.json | 6 +- .../src/pages/home/Messages/CitationsList.tsx | 55 +++++- .../pages/home/Messages/MessageContent.tsx | 40 +++- .../WebSearchSettings/BasicSettings.tsx | 31 ++- .../src/providers/AiProvider/BaseProvider.ts | 51 +++-- .../providers/AiProvider/OpenAIProvider.ts | 3 +- .../WebSearchProvider/ExaProvider.ts | 17 +- .../WebSearchProvider/LocalSearchProvider.ts | 8 +- .../WebSearchProvider/SearxngProvider.ts | 7 +- .../WebSearchProvider/TavilyProvider.ts | 17 +- src/renderer/src/services/ApiService.ts | 177 ++++++++++-------- src/renderer/src/services/KnowledgeService.ts | 139 ++++++++------ src/renderer/src/services/WebSearchService.ts | 68 ++++--- src/renderer/src/store/websearch.ts | 7 +- src/renderer/src/types/index.ts | 2 + src/renderer/src/utils/extract.ts | 33 ++++ yarn.lock | 4 +- 23 files changed, 589 insertions(+), 256 deletions(-) create mode 100644 src/renderer/src/utils/extract.ts diff --git a/package.json b/package.json index 779a34b8..f23b5b8a 100644 --- a/package.json +++ b/package.json @@ -82,7 +82,7 @@ "electron-updater": "^6.3.9", "electron-window-state": "^5.0.3", "epub": "patch:epub@npm%3A1.3.0#~/.yarn/patches/epub-npm-1.3.0-8325494ffe.patch", - "fast-xml-parser": "^5.0.9", + "fast-xml-parser": "^5.2.0", "fetch-socks": "^1.3.2", "fs-extra": "^11.2.0", "got-scraping": "^4.1.1", diff --git a/src/renderer/src/config/prompts.ts b/src/renderer/src/config/prompts.ts index 71d4dfd1..35f877de 100644 --- a/src/renderer/src/config/prompts.ts +++ b/src/renderer/src/config/prompts.ts @@ -51,65 +51,149 @@ export const SUMMARIZE_PROMPT = // https://github.com/ItzCrazyKns/Perplexica/blob/master/src/lib/prompts/webSearch.ts export const SEARCH_SUMMARY_PROMPT = ` - You are an AI question rephraser. You will be given a conversation and a follow-up question, you will have to rephrase the follow up question so it is a standalone question and can be used by another LLM to search the web for information to answer it. - If it is a simple writing task or a greeting (unless the greeting contains a question after it) like Hi, Hello, How are you, etc. than a question then you need to return \`not_needed\` as the response (This is because the LLM won't need to search the web for finding information on this topic). - If the user asks some question from some URL or wants you to summarize a PDF or a webpage (via URL) you need to return the links inside the \`links\` XML block and the question inside the \`question\` XML block. If the user wants to you to summarize the webpage or the PDF you need to return \`summarize\` inside the \`question\` XML block in place of a question and the link to summarize in the \`links\` XML block. - You must always return the rephrased question inside the \`question\` XML block, if there are no links in the follow-up question then don't insert a \`links\` XML block in your response. + You are an AI question rephraser. Your role is to rephrase follow-up queries from a conversation into standalone queries that can be used by another LLM to retrieve information, either through web search or from a knowledge base. + Follow these guidelines: + 1. If the question is a simple writing task, greeting (e.g., Hi, Hello, How are you), or does not require searching for information (unless the greeting contains a follow-up question), return 'not_needed' in the 'question' XML block. This indicates that no search is required. + 2. If the user asks a question related to a specific URL, PDF, or webpage, include the links in the 'links' XML block and the question in the 'question' XML block. If the request is to summarize content from a URL or PDF, return 'summarize' in the 'question' XML block and include the relevant links in the 'links' XML block. + 3. For websearch, You need extract keywords into 'question' XML block. For knowledge, You need rewrite user query into 'rewrite' XML block with one alternative version while preserving the original intent and meaning. + 4. Websearch: Always return the rephrased question inside the 'question' XML block. If there are no links in the follow-up question, do not insert a 'links' XML block in your response. + 5. Knowledge: Always return the rephrased question inside the 'question' XML block. + 6. Always wrap the rephrased question in the appropriate XML blocks to specify the tool(s) for retrieving information: use for queries requiring real-time or external information, for queries that can be answered from a pre-existing knowledge base, or both if the question could be applicable to either tool. Ensure that the rephrased question is always contained within a block inside these wrappers. + 7. If you are not sure to use knowledge or websearch, you need use both of them. - There are several examples attached for your reference inside the below \`examples\` XML block + There are several examples attached for your reference inside the below 'examples' XML block. 1. Follow up question: What is the capital of France Rephrased question:\` - - Capital of france - + + + Capital of France + + + + + What city serves as the capital of France? + + + What is the capital of France + + \` - 2. Hi, how are you? - Rephrased question\` - - not_needed - + 2. Follow up question: Hi, how are you? + Rephrased question:\` + + + not_needed + + + + + not_needed + + \` 3. Follow up question: What is Docker? Rephrased question: \` - - What is Docker - + + + What is Docker + + + + + Can you explain what Docker is and its main purpose? + + + What is Docker + + \` 4. Follow up question: Can you tell me what is X from https://example.com Rephrased question: \` - - Can you tell me what is X? - - - - https://example.com - + + + What is X + + + https://example.com + + + + + not_needed + + \` - 5. Follow up question: Summarize the content from https://example.com + 5. Follow up question: Summarize the content from https://example1.com and https://example2.com Rephrased question: \` - - summarize - + + + summarize + + + https://example1.com + + + https://example2.com + + + + + not_needed + + + \` - - https://example.com - + 6. Follow up question: Based on websearch, Which company had higher revenue in 2022, "Apple" or "Microsoft"? + Rephrased question: \` + + + Apple's revenue in 2022 + + + Microsoft's revenue in 2022 + + + + + not_needed + + + \` + + 7. Follow up question: Based on knowledge, Fomula of Scaled Dot-Product Attention and Multi-Head Attention? + Rephrased question: \` + + + not_needed + + + + + What are the mathematical formulas for Scaled Dot-Product Attention and Multi-Head Attention + + + What is the formula for Scaled Dot-Product Attention? + + + What is the formula for Multi-Head Attention? + + \` - Anything below is the part of the actual conversation and you need to use conversation and the follow-up question to rephrase the follow-up question as a standalone question based on the guidelines shared above. + Anything below is part of the actual conversation. Use the conversation history and the follow-up question to rephrase the follow-up question as a standalone question based on the guidelines shared above. {chat_history} - Follow up question: {query} + Follow up question: {question} Rephrased question: ` diff --git a/src/renderer/src/i18n/locales/en-us.json b/src/renderer/src/i18n/locales/en-us.json index 980760bd..192ced81 100644 --- a/src/renderer/src/i18n/locales/en-us.json +++ b/src/renderer/src/i18n/locales/en-us.json @@ -548,7 +548,7 @@ "restore.failed": "Restore failed", "restore.success": "Restored successfully", "save.success.title": "Saved successfully", - "searching": "Searching the internet...", + "searching": "Searching...", "success.joplin.export": "Successfully exported to Joplin", "success.markdown.export.preconf": "Successfully exported the Markdown file to the preconfigured path", "success.markdown.export.specified": "Successfully exported the Markdown file", @@ -1361,7 +1361,9 @@ "overwrite": "Override search service", "overwrite_tooltip": "Force use search service instead of LLM", "apikey": "API key", - "free": "Free" + "free": "Free", + "content_limit": "Content length limit", + "content_limit_tooltip": "Limit the content length of the search results; content that exceeds the limit will be truncated." }, "quickPhrase": { "title": "Quick Phrases", diff --git a/src/renderer/src/i18n/locales/ja-jp.json b/src/renderer/src/i18n/locales/ja-jp.json index d7238402..17a0ebb1 100644 --- a/src/renderer/src/i18n/locales/ja-jp.json +++ b/src/renderer/src/i18n/locales/ja-jp.json @@ -547,7 +547,7 @@ "restore.failed": "復元に失敗しました", "restore.success": "復元に成功しました", "save.success.title": "保存に成功しました", - "searching": "インターネットで検索中...", + "searching": "検索中...", "success.joplin.export": "Joplin へのエクスポートに成功しました", "success.markdown.export.preconf": "Markdown ファイルを事前設定されたパスに正常にエクスポートしました", "success.markdown.export.specified": "Markdown ファイルを正常にエクスポートしました", @@ -1360,7 +1360,9 @@ "overwrite": "サービス検索を上書き", "overwrite_tooltip": "大規模言語モデルではなく、サービス検索を使用する", "apikey": "API キー", - "free": "無料" + "free": "無料", + "content_limit": "内容の長さ制限", + "content_limit_tooltip": "検索結果の内容長を制限し、制限を超える内容は切り捨てられます。" }, "general.auto_check_update.title": "自動更新チェックを有効にする", "quickPhrase": { diff --git a/src/renderer/src/i18n/locales/ru-ru.json b/src/renderer/src/i18n/locales/ru-ru.json index ad85e3ce..48cf2ef8 100644 --- a/src/renderer/src/i18n/locales/ru-ru.json +++ b/src/renderer/src/i18n/locales/ru-ru.json @@ -548,7 +548,7 @@ "restore.failed": "Восстановление не удалось", "restore.success": "Успешно восстановлено", "save.success.title": "Успешно сохранено", - "searching": "Поиск в Интернете...", + "searching": "Идет поиск...", "success.joplin.export": "Успешный экспорт в Joplin", "success.markdown.export.preconf": "Файл Markdown успешно экспортирован в предуказанный путь", "success.markdown.export.specified": "Файл Markdown успешно экспортирован", @@ -1360,10 +1360,12 @@ "overwrite": "Переопределить провайдера поиска", "overwrite_tooltip": "Использовать провайдера поиска вместо LLM", "apikey": "API ключ", - "free": "Бесплатно" - }, - "general.auto_check_update.title": "Включить автообновление", - "quickPhrase": { + "free": "Бесплатно", + "content_limit": "Ограничение длины текста", + "content_limit_tooltip": "Ограничьте длину содержимого результатов поиска, контент, превышающий ограничение, будет обрезан." + }, + "general.auto_check_update.title": "Включить автообновление", + "quickPhrase": { "title": "Быстрые фразы", "add": "Добавить фразу", "edit": "Редактировать фразу", diff --git a/src/renderer/src/i18n/locales/zh-cn.json b/src/renderer/src/i18n/locales/zh-cn.json index 4002bbd1..bbfec77c 100644 --- a/src/renderer/src/i18n/locales/zh-cn.json +++ b/src/renderer/src/i18n/locales/zh-cn.json @@ -548,7 +548,7 @@ "restore.failed": "恢复失败", "restore.success": "恢复成功", "save.success.title": "保存成功", - "searching": "正在联网搜索...", + "searching": "正在搜索...", "success.joplin.export": "成功导出到 Joplin", "success.markdown.export.preconf": "成功导出 Markdown 文件到预先设定的路径", "success.markdown.export.specified": "成功导出 Markdown 文件", @@ -1361,7 +1361,9 @@ }, "title": "网络搜索", "apikey": "API 密钥", - "free": "免费" + "free": "免费", + "content_limit": "内容长度限制", + "content_limit_tooltip": "限制搜索结果的内容长度, 超过限制的内容将被截断" }, "quickPhrase": { "title": "快捷短语", diff --git a/src/renderer/src/i18n/locales/zh-tw.json b/src/renderer/src/i18n/locales/zh-tw.json index 4dc6febd..ccd114c2 100644 --- a/src/renderer/src/i18n/locales/zh-tw.json +++ b/src/renderer/src/i18n/locales/zh-tw.json @@ -548,7 +548,7 @@ "restore.failed": "恢復失敗", "restore.success": "恢復成功", "save.success.title": "儲存成功", - "searching": "正在網路上搜尋...", + "searching": "正在搜尋...", "success.joplin.export": "成功匯出到 Joplin", "success.markdown.export.preconf": "成功導出 Markdown 文件到預先設定的路徑", "success.markdown.export.specified": "成功導出 Markdown 文件", @@ -1360,7 +1360,9 @@ "overwrite": "覆蓋搜尋服務商", "overwrite_tooltip": "強制使用搜尋服務商而不是大語言模型進行搜尋", "apikey": "API 金鑰", - "free": "免費" + "free": "免費", + "content_limit": "內容長度限制", + "content_limit_tooltip": "限制搜尋結果的內容長度,超過限制的內容將被截斷。" }, "general.auto_check_update.title": "啟用自動更新檢查", "quickPhrase": { diff --git a/src/renderer/src/pages/home/Messages/CitationsList.tsx b/src/renderer/src/pages/home/Messages/CitationsList.tsx index 4d269f51..51a7ca36 100644 --- a/src/renderer/src/pages/home/Messages/CitationsList.tsx +++ b/src/renderer/src/pages/home/Messages/CitationsList.tsx @@ -1,5 +1,6 @@ import Favicon from '@renderer/components/Icons/FallbackFavicon' import { HStack } from '@renderer/components/Layout' +import { FileSearch } from 'lucide-react' import React from 'react' import styled from 'styled-components' @@ -9,6 +10,7 @@ interface Citation { title?: string hostname?: string showFavicon?: boolean + type?: string } interface CitationsListProps { @@ -17,6 +19,7 @@ interface CitationsListProps { } const CitationsList: React.FC = ({ citations }) => { + console.log('CitationsList', citations) if (!citations || citations.length === 0) return null return ( @@ -24,18 +27,60 @@ const CitationsList: React.FC = ({ citations }) => { {citations.map((citation) => ( {citation.number}. - {citation.showFavicon && citation.url && ( - + {citation.type === 'websearch' ? ( + + ) : ( + )} - - {citation.title ? citation.title : {citation.hostname}} - ))} ) } +const handleLinkClick = (url: string, event: React.MouseEvent) => { + if (!url) return + + event.preventDefault() + + // 检查是否是网络URL + if (url.startsWith('http://') || url.startsWith('https://')) { + window.open(url, '_blank', 'noopener,noreferrer') + } else { + try { + window.api.file.openPath(url) + } catch (error) { + console.error('打开本地文件失败:', error) + } + } +} + +// 网络搜索引用组件 +const WebSearchCitation: React.FC<{ citation: Citation }> = ({ citation }) => { + return ( + <> + {citation.showFavicon && citation.url && ( + + )} + handleLinkClick(citation.url, e)}> + {citation.title ? citation.title : {citation.hostname}} + + + ) +} + +// 知识库引用组件 +const KnowledgeCitation: React.FC<{ citation: Citation }> = ({ citation }) => { + return ( + <> + {citation.showFavicon && citation.url && } + handleLinkClick(citation.url, e)}> + {citation.title} + + + ) +} + const CitationsContainer = styled.div` background-color: rgb(242, 247, 253); border-radius: 4px; diff --git a/src/renderer/src/pages/home/Messages/MessageContent.tsx b/src/renderer/src/pages/home/Messages/MessageContent.tsx index d95ce529..997b463d 100644 --- a/src/renderer/src/pages/home/Messages/MessageContent.tsx +++ b/src/renderer/src/pages/home/Messages/MessageContent.tsx @@ -89,7 +89,8 @@ const MessageContent: React.FC = ({ message: _message, model }) => { (formattedCitations && formattedCitations.length > 0) || (message?.metadata?.webSearch && message.status === 'success') || (message?.metadata?.webSearchInfo && message.status === 'success') || - (message?.metadata?.groundingMetadata && message.status === 'success') + (message?.metadata?.groundingMetadata && message.status === 'success') || + (message?.metadata?.knowledge && message.status === 'success') ) }, [formattedCitations, message]) @@ -115,6 +116,16 @@ const MessageContent: React.FC = ({ message: _message, model }) => { }) }) + // 添加knowledge结果 + const knowledgeResults = message.metadata?.knowledge + knowledgeResults?.forEach((result) => { + data.set(result.sourceUrl, { + url: result.sourceUrl, + title: result.id, + content: result.content + }) + }) + // 添加citations citationsUrls.forEach((result) => { if (!data.has(result.url)) { @@ -156,7 +167,7 @@ const MessageContent: React.FC = ({ message: _message, model }) => { // Convert [n] format to superscript numbers and make them clickable // Use tag for superscript and make it a link with citation data - if (message.metadata?.webSearch) { + if (message.metadata?.webSearch || message.metadata.knowledge) { content = content.replace(/\[\[(\d+)\]\]|\[(\d+)\]/g, (match, num1, num2) => { const num = num1 || num2 const index = parseInt(num) - 1 @@ -276,14 +287,24 @@ const MessageContent: React.FC = ({ message: _message, model }) => { }))} /> )} - {message?.metadata?.webSearch && message.status === 'success' && ( + {(message?.metadata?.webSearch || message.metadata?.knowledge) && message.status === 'success' && ( ({ - number: index + 1, - url: result.url, - title: result.title, - showFavicon: true - }))} + citations={[ + ...(message.metadata.webSearch?.results.map((result, index) => ({ + number: index + 1, + url: result.url, + title: result.title, + showFavicon: true, + type: 'websearch' + })) || []), + ...(message.metadata.knowledge?.map((result, index) => ({ + number: (message.metadata?.webSearch?.results?.length || 0) + index + 1, + url: result.sourceUrl, + title: result.sourceUrl, + showFavicon: true, + type: 'knowledge' + })) || []) + ]} /> )} {message?.metadata?.webSearchInfo && message.status === 'success' && ( @@ -300,6 +321,7 @@ const MessageContent: React.FC = ({ message: _message, model }) => { )} )} + ) diff --git a/src/renderer/src/pages/settings/WebSearchSettings/BasicSettings.tsx b/src/renderer/src/pages/settings/WebSearchSettings/BasicSettings.tsx index 47a8ca3c..368d0e38 100644 --- a/src/renderer/src/pages/settings/WebSearchSettings/BasicSettings.tsx +++ b/src/renderer/src/pages/settings/WebSearchSettings/BasicSettings.tsx @@ -1,7 +1,13 @@ import { useTheme } from '@renderer/context/ThemeProvider' import { useAppDispatch, useAppSelector } from '@renderer/store' -import { setEnhanceMode, setMaxResult, setOverwrite, setSearchWithTime } from '@renderer/store/websearch' -import { Slider, Switch, Tooltip } from 'antd' +import { + setContentLimit, + setEnhanceMode, + setMaxResult, + setOverwrite, + setSearchWithTime +} from '@renderer/store/websearch' +import { Input, Slider, Switch, Tooltip } from 'antd' import { t } from 'i18next' import { Info } from 'lucide-react' import { FC } from 'react' @@ -14,6 +20,7 @@ const BasicSettings: FC = () => { const enhanceMode = useAppSelector((state) => state.websearch.enhanceMode) const overwrite = useAppSelector((state) => state.websearch.overwrite) const maxResults = useAppSelector((state) => state.websearch.maxResults) + const contentLimit = useAppSelector((state) => state.websearch.contentLimit) const dispatch = useAppDispatch() @@ -59,6 +66,26 @@ const BasicSettings: FC = () => { onChangeComplete={(value) => dispatch(setMaxResult(value))} /> + + + + {t('settings.websearch.content_limit')} + + + + + { + const value = e.target.value + if (!isNaN(Number(value)) && Number(value) > 0) { + dispatch(setContentLimit(Number(value))) + } + }} + /> + ) diff --git a/src/renderer/src/providers/AiProvider/BaseProvider.ts b/src/renderer/src/providers/AiProvider/BaseProvider.ts index 6f7dc9f0..142f5c06 100644 --- a/src/renderer/src/providers/AiProvider/BaseProvider.ts +++ b/src/renderer/src/providers/AiProvider/BaseProvider.ts @@ -1,7 +1,6 @@ -import { FOOTNOTE_PROMPT, REFERENCE_PROMPT } from '@renderer/config/prompts' +import { REFERENCE_PROMPT } from '@renderer/config/prompts' import { getLMStudioKeepAliveTime } from '@renderer/hooks/useLMStudio' import { getOllamaKeepAliveTime } from '@renderer/hooks/useOllama' -import { getKnowledgeBaseReferences } from '@renderer/services/KnowledgeService' import type { Assistant, GenerateImageParams, @@ -15,7 +14,6 @@ import type { import { delay, isJSON, parseJSON } from '@renderer/utils' import { addAbortController, removeAbortController } from '@renderer/utils/abortController' import { formatApiHost } from '@renderer/utils/api' -import { t } from 'i18next' import { isEmpty } from 'lodash' import type OpenAI from 'openai' @@ -98,28 +96,28 @@ export default abstract class BaseProvider { return message.content } - const webSearchReferences = await this.getWebSearchReferences(message) + const webSearchReferences = await this.getWebSearchReferencesFromCache(message) + const knowledgeReferences = await this.getKnowledgeBaseReferencesFromCache(message) - if (!isEmpty(webSearchReferences)) { - const referenceContent = `\`\`\`json\n${JSON.stringify(webSearchReferences, null, 2)}\n\`\`\`` + // 添加偏移量以避免ID冲突 + const reindexedKnowledgeReferences = knowledgeReferences.map((ref) => ({ + ...ref, + id: ref.id + webSearchReferences.length // 为知识库引用的ID添加网络搜索引用的数量作为偏移量 + })) + + const allReferences = [...webSearchReferences, ...reindexedKnowledgeReferences] + + console.log(`Found ${allReferences.length} references for ID: ${message.id}`, allReferences) + + if (!isEmpty(allReferences)) { + const referenceContent = `\`\`\`json\n${JSON.stringify(allReferences, null, 2)}\n\`\`\`` return REFERENCE_PROMPT.replace('{question}', message.content).replace('{references}', referenceContent) } - const knowledgeReferences = await getKnowledgeBaseReferences(message) - - if (!isEmpty(message.knowledgeBaseIds) && isEmpty(knowledgeReferences)) { - window.message.info({ content: t('knowledge.no_match'), key: 'knowledge-base-no-match-info' }) - } - - if (!isEmpty(knowledgeReferences)) { - const referenceContent = `\`\`\`json\n${JSON.stringify(knowledgeReferences, null, 2)}\n\`\`\`` - return FOOTNOTE_PROMPT.replace('{question}', message.content).replace('{references}', referenceContent) - } - return message.content } - private async getWebSearchReferences(message: Message) { + private async getWebSearchReferencesFromCache(message: Message) { if (isEmpty(message.content)) { return [] } @@ -140,6 +138,23 @@ export default abstract class BaseProvider { return [] } + /** + * 从缓存中获取知识库引用 + */ + private async getKnowledgeBaseReferencesFromCache(message: Message): Promise { + if (isEmpty(message.content)) { + return [] + } + const knowledgeReferences: KnowledgeReference[] = window.keyv.get(`knowledge-search-${message.id}`) + + if (!isEmpty(knowledgeReferences)) { + console.log(`Found ${knowledgeReferences.length} knowledge base references in cache for ID: ${message.id}`) + return knowledgeReferences + } + console.log(`No knowledge base references found in cache for ID: ${message.id}`) + return [] + } + protected getCustomParameters(assistant: Assistant) { return ( assistant?.settings?.customParameters?.reduce((acc, param) => { diff --git a/src/renderer/src/providers/AiProvider/OpenAIProvider.ts b/src/renderer/src/providers/AiProvider/OpenAIProvider.ts index bc0f0df4..dfdbdf0b 100644 --- a/src/renderer/src/providers/AiProvider/OpenAIProvider.ts +++ b/src/renderer/src/providers/AiProvider/OpenAIProvider.ts @@ -373,6 +373,7 @@ export default class OpenAIProvider extends BaseProvider { let time_first_content_millsec = 0 const start_time_millsec = new Date().getTime() const lastUserMessage = _messages.findLast((m) => m.role === 'user') + const { abortController, cleanup, signalPromise } = this.createAbortController(lastUserMessage?.id, true) const { signal } = abortController await this.checkIsCopilot() @@ -503,7 +504,7 @@ export default class OpenAIProvider extends BaseProvider { await processToolUses(content, idx) } - + // console.log('reqMessages', reqMessages) const stream = await this.sdk.chat.completions // @ts-ignore key is not typed .create( diff --git a/src/renderer/src/providers/WebSearchProvider/ExaProvider.ts b/src/renderer/src/providers/WebSearchProvider/ExaProvider.ts index 79140a02..a91f6542 100644 --- a/src/renderer/src/providers/WebSearchProvider/ExaProvider.ts +++ b/src/renderer/src/providers/WebSearchProvider/ExaProvider.ts @@ -31,11 +31,18 @@ export default class ExaProvider extends BaseWebSearchProvider { return { query: response.autopromptString, - results: response.results.map((result) => ({ - title: result.title || 'No title', - content: result.text || '', - url: result.url || '' - })) + results: response.results.slice(0, websearch.maxResults).map((result) => { + let content = result.text || '' + if (websearch.contentLimit && content.length > websearch.contentLimit) { + content = content.slice(0, websearch.contentLimit) + '...' + } + + return { + title: result.title || 'No title', + content: content, + url: result.url || '' + } + }) } } catch (error) { console.error('Exa search failed:', error) diff --git a/src/renderer/src/providers/WebSearchProvider/LocalSearchProvider.ts b/src/renderer/src/providers/WebSearchProvider/LocalSearchProvider.ts index b65f9648..c8235bef 100644 --- a/src/renderer/src/providers/WebSearchProvider/LocalSearchProvider.ts +++ b/src/renderer/src/providers/WebSearchProvider/LocalSearchProvider.ts @@ -44,12 +44,8 @@ export default class LocalSearchProvider extends BaseWebSearchProvider { const fetchPromises = validItems.map(async (item) => { // console.log(`Fetching content for ${item.url}...`) const result = await fetchWebContent(item.url, 'markdown', this.provider.usingBrowser) - if ( - this.provider.contentLimit && - this.provider.contentLimit != -1 && - result.content.length > this.provider.contentLimit - ) { - result.content = result.content.slice(0, this.provider.contentLimit) + '...' + if (websearch.contentLimit && result.content.length > websearch.contentLimit) { + result.content = result.content.slice(0, websearch.contentLimit) + '...' } return result }) diff --git a/src/renderer/src/providers/WebSearchProvider/SearxngProvider.ts b/src/renderer/src/providers/WebSearchProvider/SearxngProvider.ts index 3fd7993b..ad877ba1 100644 --- a/src/renderer/src/providers/WebSearchProvider/SearxngProvider.ts +++ b/src/renderer/src/providers/WebSearchProvider/SearxngProvider.ts @@ -92,9 +92,14 @@ export default class SearxngProvider extends BaseWebSearchProvider { return { query: result.query, results: result.results.slice(0, websearch.maxResults).map((result) => { + let content = result.content || '' + if (websearch.contentLimit && content.length > websearch.contentLimit) { + content = content.slice(0, websearch.contentLimit) + '...' + } + return { title: result.title || 'No title', - content: result.content || '', + content: content, url: result.url || '' } }) diff --git a/src/renderer/src/providers/WebSearchProvider/TavilyProvider.ts b/src/renderer/src/providers/WebSearchProvider/TavilyProvider.ts index c1d7a528..07de56c0 100644 --- a/src/renderer/src/providers/WebSearchProvider/TavilyProvider.ts +++ b/src/renderer/src/providers/WebSearchProvider/TavilyProvider.ts @@ -27,11 +27,18 @@ export default class TavilyProvider extends BaseWebSearchProvider { }) return { query: result.query, - results: result.results.map((result) => ({ - title: result.title || 'No title', - content: result.content || '', - url: result.url || '' - })) + results: result.results.slice(0, websearch.maxResults).map((result) => { + let content = result.content || '' + if (websearch.contentLimit && content.length > websearch.contentLimit) { + content = content.slice(0, websearch.contentLimit) + '...' + } + + return { + title: result.title || 'No title', + content: content, + url: result.url || '' + } + }) } } catch (error) { console.error('Tavily search failed:', error) diff --git a/src/renderer/src/services/ApiService.ts b/src/renderer/src/services/ApiService.ts index f27ee6ba..ea910342 100644 --- a/src/renderer/src/services/ApiService.ts +++ b/src/renderer/src/services/ApiService.ts @@ -8,9 +8,18 @@ import { SEARCH_SUMMARY_PROMPT } from '@renderer/config/prompts' import i18n from '@renderer/i18n' import store from '@renderer/store' import { setGenerating } from '@renderer/store/runtime' -import { Assistant, MCPTool, Message, Model, Provider, Suggestion, WebSearchResponse } from '@renderer/types' +import { + Assistant, + KnowledgeReference, + MCPTool, + Message, + Model, + Provider, + Suggestion, + WebSearchResponse +} from '@renderer/types' import { formatMessageError, isAbortError } from '@renderer/utils/error' -import { fetchWebContents } from '@renderer/utils/fetch' +import { extractInfoFromXML, ExtractResults } from '@renderer/utils/extract' import { withGenerateImage } from '@renderer/utils/formats' import { cleanLinkCommas, @@ -26,13 +35,13 @@ import { cloneDeep, findLast, isEmpty } from 'lodash' import AiProvider from '../providers/AiProvider' import { getAssistantProvider, - getDefaultAssistant, getDefaultModel, getProviderByModel, getTopNamingModel, getTranslateModel } from './AssistantService' import { EVENT_NAMES, EventEmitter } from './EventService' +import { processKnowledgeSearch } from './KnowledgeService' import { filterContextMessages, filterMessages, filterUsefulMessages } from './MessagesService' import { estimateMessagesUsage } from './TokenService' import WebSearchService from './WebSearchService' @@ -52,77 +61,99 @@ export async function fetchChatCompletion({ const webSearchProvider = WebSearchService.getWebSearchProvider() const AI = new AiProvider(provider) - const searchTheWeb = async () => { - if (WebSearchService.isWebSearchEnabled() && assistant.enableWebSearch && assistant.model) { - let query = '' - let webSearchResponse: WebSearchResponse = { - results: [] - } - const webSearchParams = getOpenAIWebSearchParams(assistant, assistant.model) - if (isEmpty(webSearchParams) && !isOpenAIWebSearch(assistant.model)) { - const lastMessage = findLast(messages, (m) => m.role === 'user') - const lastAnswer = findLast(messages, (m) => m.role === 'assistant') - const hasKnowledgeBase = !isEmpty(lastMessage?.knowledgeBaseIds) + const lastUserMessage = findLast(messages, (m) => m.role === 'user') + const lastAnswer = findLast(messages, (m) => m.role === 'assistant') + const hasKnowledgeBase = !isEmpty(lastUserMessage?.knowledgeBaseIds) + if (!lastUserMessage) { + return + } - if (lastMessage) { - if (hasKnowledgeBase) { - window.message.info({ - content: i18n.t('message.ignore.knowledge.base'), - key: 'knowledge-base-no-match-info' - }) - } - - // 更新消息状态为搜索中 - onResponse({ ...message, status: 'searching' }) - - try { - // 等待关键词生成完成 - const searchSummaryAssistant = getDefaultAssistant() - searchSummaryAssistant.model = assistant.model || getDefaultModel() - searchSummaryAssistant.prompt = SEARCH_SUMMARY_PROMPT - - // 如果启用搜索增强模式,则使用搜索增强模式 - if (WebSearchService.isEnhanceModeEnabled()) { - const keywords = await fetchSearchSummary({ - messages: lastAnswer ? [lastAnswer, lastMessage] : [lastMessage], - assistant: searchSummaryAssistant - }) - - try { - const result = WebSearchService.extractInfoFromXML(keywords || '') - if (result.question === 'not_needed') { - // 如果不需要搜索,则直接返回 - console.log('No need to search') - return - } else if (result.question === 'summarize' && result.links && result.links.length > 0) { - const contents = await fetchWebContents(result.links) - webSearchResponse = { - query: 'summaries', - results: contents - } - } else { - query = result.question - webSearchResponse = await WebSearchService.search(webSearchProvider, query) - } - } catch (error) { - console.error('Failed to extract info from XML:', error) - } - } else { - query = lastMessage.content - } - - // 处理搜索结果 - message.metadata = { - ...message.metadata, - webSearch: webSearchResponse - } - - window.keyv.set(`web-search-${lastMessage?.id}`, webSearchResponse) - } catch (error) { - console.error('Web search failed:', error) - } + // 网络搜索/知识库 关键词提取 + const extract = async () => { + const summaryAssistant = { + ...assistant, + prompt: SEARCH_SUMMARY_PROMPT + } + const keywords = await fetchSearchSummary({ + messages: lastAnswer ? [lastAnswer, lastUserMessage] : [lastUserMessage], + assistant: summaryAssistant + }) + try { + return extractInfoFromXML(keywords || '') + } catch (e: any) { + console.error('extract error', e) + return { + websearch: { + question: [lastUserMessage.content] + }, + knowledge: { + question: [lastUserMessage.content] } + } as ExtractResults + } + } + let extractResults: ExtractResults + if (assistant.enableWebSearch || hasKnowledgeBase) { + extractResults = await extract() + } + + const searchTheWeb = async () => { + // 检查是否需要进行网络搜索 + const shouldSearch = + extractResults?.websearch && + WebSearchService.isWebSearchEnabled() && + assistant.enableWebSearch && + assistant.model && + extractResults.websearch.question[0] !== 'not_needed' + + if (!shouldSearch) return + + onResponse({ ...message, status: 'searching' }) + // 检查是否使用OpenAI的网络搜索 + const webSearchParams = getOpenAIWebSearchParams(assistant, assistant.model!) + if (!isEmpty(webSearchParams) || isOpenAIWebSearch(assistant.model!)) return + + try { + const webSearchResponse: WebSearchResponse = await WebSearchService.processWebsearch( + webSearchProvider, + extractResults + ) + // console.log('webSearchResponse', webSearchResponse) + // 处理搜索结果 + message.metadata = { + ...message.metadata, + webSearch: webSearchResponse } + + window.keyv.set(`web-search-${lastUserMessage?.id}`, webSearchResponse) + } catch (error) { + console.error('Web search failed:', error) + } + } + + // --- 知识库搜索 --- + const searchKnowledgeBase = async () => { + const shouldSearch = + hasKnowledgeBase && extractResults.knowledge && extractResults.knowledge.question[0] !== 'not_needed' + + if (!shouldSearch) return + + onResponse({ ...message, status: 'searching' }) + try { + const knowledgeReferences: KnowledgeReference[] = await processKnowledgeSearch( + extractResults, + lastUserMessage.knowledgeBaseIds + ) + console.log('knowledgeReferences', knowledgeReferences) + // 处理搜索结果 + message.metadata = { + ...message.metadata, + knowledge: knowledgeReferences + } + window.keyv.set(`knowledge-search-${lastUserMessage?.id}`, knowledgeReferences) + } catch (error) { + console.error('Knowledge base search failed:', error) + window.keyv.set(`knowledge-search-${lastUserMessage?.id}`, []) } } @@ -130,10 +161,8 @@ export async function fetchChatCompletion({ let _messages: Message[] = [] let isFirstChunk = true - // Search web - await searchTheWeb() + await Promise.all([searchTheWeb(), searchKnowledgeBase()]) - const lastUserMessage = findLast(messages, (m) => m.role === 'user') // Get MCP tools const mcpTools: MCPTool[] = [] const enabledMCPs = lastUserMessage?.enabledMCPs diff --git a/src/renderer/src/services/KnowledgeService.ts b/src/renderer/src/services/KnowledgeService.ts index 0deb8730..5a0a1e53 100644 --- a/src/renderer/src/services/KnowledgeService.ts +++ b/src/renderer/src/services/KnowledgeService.ts @@ -3,8 +3,9 @@ import { DEFAULT_KNOWLEDGE_DOCUMENT_COUNT, DEFAULT_KNOWLEDGE_THRESHOLD } from '@ import { getEmbeddingMaxContext } from '@renderer/config/embedings' import AiProvider from '@renderer/providers/AiProvider' import store from '@renderer/store' -import { FileType, KnowledgeBase, KnowledgeBaseParams, KnowledgeReference, Message } from '@renderer/types' -import { isEmpty, take } from 'lodash' +import { FileType, KnowledgeBase, KnowledgeBaseParams, KnowledgeReference } from '@renderer/types' +import { ExtractResults } from '@renderer/utils/extract' +import { isEmpty } from 'lodash' import { getProviderByModel } from './AssistantService' import FileManager from './FileManager' @@ -86,66 +87,96 @@ export const getKnowledgeSourceUrl = async (item: ExtractChunkData & { file: Fil return item.metadata.source } -export const getKnowledgeBaseReference = async (base: KnowledgeBase, message: Message) => { - const searchResults = await window.api.knowledgeBase - .search({ - search: message.content, - base: getKnowledgeBaseParams(base) - }) - .then((results) => - results.filter((item) => { - const threshold = base.threshold || DEFAULT_KNOWLEDGE_THRESHOLD - return item.score >= threshold - }) - ) - - let rerankResults = searchResults - if (base.rerankModel) { - rerankResults = await window.api.knowledgeBase.rerank({ - search: message.content, - base: getKnowledgeBaseParams(base), - results: searchResults - }) - } - - const processdResults = await Promise.all( - rerankResults.map(async (item) => { - const file = await getFileFromUrl(item.metadata.source) - return { ...item, file } - }) - ) - - const documentCount = base.documentCount || DEFAULT_KNOWLEDGE_DOCUMENT_COUNT - - const references = await Promise.all( - take(processdResults, documentCount).map(async (item, index) => { - const baseItem = base.items.find((i) => i.uniqueId === item.metadata.uniqueLoaderId) - return { - id: index + 1, - content: item.pageContent, - sourceUrl: await getKnowledgeSourceUrl(item), - type: baseItem?.type - } as KnowledgeReference - }) - ) - - return references -} - -export const getKnowledgeBaseReferences = async (message: Message) => { - if (isEmpty(message.knowledgeBaseIds) || isEmpty(message.content)) { +export const processKnowledgeSearch = async ( + extractResults: ExtractResults, + knowledgeBaseIds: string[] | undefined +): Promise => { + if ( + !extractResults.knowledge?.question || + extractResults.knowledge.question.length === 0 || + isEmpty(knowledgeBaseIds) + ) { + console.log('No valid question found in extractResults.knowledge') return [] } + const questions = extractResults.knowledge.question + const rewrite = extractResults.knowledge.rewrite - const bases = store.getState().knowledge.bases.filter((kb) => message.knowledgeBaseIds?.includes(kb.id)) - + const bases = store.getState().knowledge.bases.filter((kb) => knowledgeBaseIds?.includes(kb.id)) if (!bases || bases.length === 0) { + console.log('Skipping knowledge search: No matching knowledge bases found.') return [] } - const referencesPromises = bases.map(async (base) => await getKnowledgeBaseReference(base, message)) + const referencesPromises = bases.map(async (base) => { + try { + const baseParams = getKnowledgeBaseParams(base) + const documentCount = base.documentCount || DEFAULT_KNOWLEDGE_DOCUMENT_COUNT - const references = (await Promise.all(referencesPromises)).filter((result) => !isEmpty(result)).flat() + const allSearchResultsPromises = questions.map((question) => + window.api.knowledgeBase + .search({ + search: question, + base: baseParams + }) + .then((results) => + results.filter((item) => { + const threshold = base.threshold || DEFAULT_KNOWLEDGE_THRESHOLD + return item.score >= threshold + }) + ) + ) + const allSearchResults = await Promise.all(allSearchResultsPromises) + + const searchResults = Array.from( + new Map(allSearchResults.flat().map((item) => [item.metadata.uniqueId || item.pageContent, item])).values() + ) + .sort((a, b) => b.score - a.score) + .slice(0, documentCount) + + console.log(`Knowledge base ${base.name} search results:`, searchResults) + let rerankResults = searchResults + if (base.rerankModel && searchResults.length > 0) { + rerankResults = await window.api.knowledgeBase.rerank({ + search: rewrite, + base: baseParams, + results: searchResults + }) + } + + const processdResults = await Promise.all( + rerankResults.map(async (item) => { + const file = await getFileFromUrl(item.metadata.source) + return { ...item, file } + }) + ) + + const references = await Promise.all( + processdResults.map(async (item, index) => { + // const baseItem = base.items.find((i) => i.uniqueId === item.metadata.uniqueLoaderId) + return { + id: index + 1, // 搜索多个库会导致ID重复 + content: item.pageContent, + sourceUrl: await getKnowledgeSourceUrl(item), + type: 'file' // 需要映射 baseItem.type是'localPathLoader' -> 'file' + } as KnowledgeReference + }) + ) + return references + } catch (error) { + console.error(`Error searching knowledge base ${base.name}:`, error) + return [] + } + }) + + const resultsPerBase = await Promise.all(referencesPromises) + + const allReferencesRaw = resultsPerBase.flat().filter((ref): ref is KnowledgeReference => !!ref) + // 重新为引用分配ID + const references = allReferencesRaw.map((ref, index) => ({ + ...ref, + id: index + 1 + })) return references } diff --git a/src/renderer/src/services/WebSearchService.ts b/src/renderer/src/services/WebSearchService.ts index 95267438..01d12aa8 100644 --- a/src/renderer/src/services/WebSearchService.ts +++ b/src/renderer/src/services/WebSearchService.ts @@ -1,8 +1,10 @@ import WebSearchEngineProvider from '@renderer/providers/WebSearchProvider' import store from '@renderer/store' import { setDefaultProvider, WebSearchState } from '@renderer/store/websearch' -import { WebSearchProvider, WebSearchResponse } from '@renderer/types' +import { WebSearchProvider, WebSearchResponse, WebSearchResult } from '@renderer/types' import { hasObjectKey } from '@renderer/utils' +import { ExtractResults } from '@renderer/utils/extract' +import { fetchWebContents } from '@renderer/utils/fetch' import dayjs from 'dayjs' /** @@ -131,34 +133,46 @@ class WebSearchService { } } - /** - * 从带有XML标签的文本中提取信息 - * @public - * @param text 包含XML标签的文本 - * @returns 提取的信息对象 - * @throws 如果文本中没有question标签则抛出错误 - */ - public extractInfoFromXML(text: string): { question: string; links?: string[] } { - // 提取question标签内容 - const questionMatch = text.match(/([\s\S]*?)<\/question>/) - if (!questionMatch) { - throw new Error('Missing required tag') - } - const question = questionMatch[1].trim() + public async processWebsearch( + webSearchProvider: WebSearchProvider, + extractResults: ExtractResults + ): Promise { + try { + // 检查 websearch 和 question 是否有效 + if (!extractResults.websearch?.question || extractResults.websearch.question.length === 0) { + console.log('No valid question found in extractResults.websearch') + return { results: [] } + } - // 提取links标签内容(可选) - const linksMatch = text.match(/([\s\S]*?)<\/links>/) - const links = linksMatch - ? linksMatch[1] - .trim() - .split('\n') - .map((link) => link.trim()) - .filter((link) => link !== '') - : undefined + const questions = extractResults.websearch.question + const links = extractResults.websearch.links + const firstQuestion = questions[0] - return { - question, - links + if (firstQuestion === 'summarize' && links && links.length > 0) { + const contents = await fetchWebContents(links) + return { + query: 'summaries', + results: contents + } + } + const searchPromises = questions.map((q) => this.search(webSearchProvider, q)) + const searchResults = await Promise.allSettled(searchPromises) + const aggregatedResults: WebSearchResult[] = [] + + searchResults.forEach((result) => { + if (result.status === 'fulfilled') { + if (result.value.results) { + aggregatedResults.push(...result.value.results) + } + } + }) + return { + query: questions.join(' | '), + results: aggregatedResults + } + } catch (error) { + console.error('Failed to process enhanced search:', error) + return { results: [] } } } } diff --git a/src/renderer/src/store/websearch.ts b/src/renderer/src/store/websearch.ts index cd092023..31f0224c 100644 --- a/src/renderer/src/store/websearch.ts +++ b/src/renderer/src/store/websearch.ts @@ -24,6 +24,7 @@ export interface WebSearchState { enhanceMode: boolean // 是否覆盖服务商搜索 overwrite: boolean + contentLimit?: number } const initialState: WebSearchState = { @@ -139,6 +140,9 @@ const websearchSlice = createSlice({ // Add the new provider to the array state.providers.push(action.payload) } + }, + setContentLimit: (state, action: PayloadAction) => { + state.contentLimit = action.payload } } }) @@ -157,7 +161,8 @@ export const { setSubscribeSources, setEnhanceMode, setOverwrite, - addWebSearchProvider + addWebSearchProvider, + setContentLimit } = websearchSlice.actions export default websearchSlice.reducer diff --git a/src/renderer/src/types/index.ts b/src/renderer/src/types/index.ts index 803790c2..ec1f5f96 100644 --- a/src/renderer/src/types/index.ts +++ b/src/renderer/src/types/index.ts @@ -88,6 +88,8 @@ export type Message = { mcpTools?: MCPToolResponse[] // Generate Image generateImage?: GenerateImageResponse + // knowledge + knowledge?: KnowledgeReference[] } // 多模型消息样式 multiModelMessageStyle?: 'horizontal' | 'vertical' | 'fold' | 'grid' diff --git a/src/renderer/src/utils/extract.ts b/src/renderer/src/utils/extract.ts new file mode 100644 index 00000000..1c764f62 --- /dev/null +++ b/src/renderer/src/utils/extract.ts @@ -0,0 +1,33 @@ +import { XMLParser } from 'fast-xml-parser' +export interface ExtractResults { + websearch?: WebsearchExtractResults + knowledge?: KnowledgeExtractResults +} + +export interface WebsearchExtractResults { + question: string[] + links?: string[] +} + +export interface KnowledgeExtractResults { + rewrite: string + question: string[] +} +/** + * 从带有XML标签的文本中提取信息 + * @public + * @param text 包含XML标签的文本 + * @returns 提取的信息对象 + * @throws + */ +export const extractInfoFromXML = (text: string): ExtractResults => { + // console.log('extract text', text) + const parser = new XMLParser({ + isArray: (name) => { + return name === 'question' || name === 'links' + } + }) + const extractResults: ExtractResults = parser.parse(text) + // console.log('Extracted results:', extractResults) + return extractResults +} diff --git a/yarn.lock b/yarn.lock index 905a5b5b..770932cf 100644 --- a/yarn.lock +++ b/yarn.lock @@ -4229,7 +4229,7 @@ __metadata: eslint-plugin-react-hooks: "npm:^5.2.0" eslint-plugin-simple-import-sort: "npm:^12.1.1" eslint-plugin-unused-imports: "npm:^4.1.4" - fast-xml-parser: "npm:^5.0.9" + fast-xml-parser: "npm:^5.2.0" fetch-socks: "npm:^1.3.2" fs-extra: "npm:^11.2.0" got-scraping: "npm:^4.1.1" @@ -7854,7 +7854,7 @@ __metadata: languageName: node linkType: hard -"fast-xml-parser@npm:^5.0.9": +"fast-xml-parser@npm:^5.2.0": version: 5.2.0 resolution: "fast-xml-parser@npm:5.2.0" dependencies: