fix(knowledge): fix citation bug and optimize extract logic (#5195)
* fix(knowledge): change search ui and fix search bug * fix: knowledge citation * feat: optimize extract logic
This commit is contained in:
parent
0fa10627bc
commit
a1ffabae41
@ -60,7 +60,7 @@ export const SEARCH_SUMMARY_PROMPT = `
|
|||||||
4. Websearch: Always return the rephrased question inside the 'question' XML block. If there are no links in the follow-up question, do not insert a 'links' XML block in your response.
|
4. Websearch: Always return the rephrased question inside the 'question' XML block. If there are no links in the follow-up question, do not insert a 'links' XML block in your response.
|
||||||
5. Knowledge: Always return the rephrased question inside the 'question' XML block.
|
5. Knowledge: Always return the rephrased question inside the 'question' XML block.
|
||||||
6. Always wrap the rephrased question in the appropriate XML blocks to specify the tool(s) for retrieving information: use <websearch></websearch> for queries requiring real-time or external information, <knowledge></knowledge> for queries that can be answered from a pre-existing knowledge base, or both if the question could be applicable to either tool. Ensure that the rephrased question is always contained within a <question></question> block inside these wrappers.
|
6. Always wrap the rephrased question in the appropriate XML blocks to specify the tool(s) for retrieving information: use <websearch></websearch> for queries requiring real-time or external information, <knowledge></knowledge> for queries that can be answered from a pre-existing knowledge base, or both if the question could be applicable to either tool. Ensure that the rephrased question is always contained within a <question></question> block inside these wrappers.
|
||||||
7. If you are not sure to use knowledge or websearch, you need use both of them.
|
7. *use {tools} to rephrase the question*
|
||||||
|
|
||||||
There are several examples attached for your reference inside the below 'examples' XML block.
|
There are several examples attached for your reference inside the below 'examples' XML block.
|
||||||
|
|
||||||
|
|||||||
@ -30,7 +30,7 @@ const toolUseRegex = /<tool_use>([\s\S]*?)<\/tool_use>/g
|
|||||||
|
|
||||||
const MessageContent: React.FC<Props> = ({ message: _message, model }) => {
|
const MessageContent: React.FC<Props> = ({ message: _message, model }) => {
|
||||||
const { t } = useTranslation()
|
const { t } = useTranslation()
|
||||||
const message = withMessageThought(clone(_message))
|
let message = withMessageThought(clone(_message))
|
||||||
|
|
||||||
// Memoize message status checks
|
// Memoize message status checks
|
||||||
const messageStatus = useMemo(
|
const messageStatus = useMemo(
|
||||||
@ -119,6 +119,9 @@ const MessageContent: React.FC<Props> = ({ message: _message, model }) => {
|
|||||||
message.metadata?.webSearchInfo
|
message.metadata?.webSearchInfo
|
||||||
])
|
])
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 知识库索引部分:解决LLM回复中未使用的知识库引用索引问题
|
||||||
|
*/
|
||||||
// Process content to make citation numbers clickable
|
// Process content to make citation numbers clickable
|
||||||
const processedContent = useMemo(() => {
|
const processedContent = useMemo(() => {
|
||||||
const metadataFields = ['citations', 'webSearch', 'webSearchInfo', 'annotations', 'knowledge']
|
const metadataFields = ['citations', 'webSearch', 'webSearchInfo', 'annotations', 'knowledge']
|
||||||
@ -129,42 +132,90 @@ const MessageContent: React.FC<Props> = ({ message: _message, model }) => {
|
|||||||
return content
|
return content
|
||||||
}
|
}
|
||||||
|
|
||||||
// 预先计算citations数组,避免重复计算
|
// 预先计算citations数组
|
||||||
const websearchResults = message?.metadata?.webSearch?.results?.map((result) => result.url) || []
|
const websearchResults = message?.metadata?.webSearch?.results?.map((result) => result.url) || []
|
||||||
const knowledgeResults = message?.metadata?.knowledge?.map((result) => result.sourceUrl) || []
|
const knowledgeResults = message?.metadata?.knowledge?.map((result) => result.sourceUrl) || []
|
||||||
const citations = message?.metadata?.citations || [...websearchResults, ...knowledgeResults]
|
const citations = message?.metadata?.citations || [...websearchResults, ...knowledgeResults]
|
||||||
|
const webSearchLength = websearchResults.length // 计算 web search 结果的数量
|
||||||
|
|
||||||
// 优化正则表达式匹配
|
|
||||||
if (message.metadata?.webSearch || message.metadata?.knowledge) {
|
if (message.metadata?.webSearch || message.metadata?.knowledge) {
|
||||||
// 合并两个正则为一个,减少遍历次数
|
const usedOriginalIndexes: number[] = []
|
||||||
content = content.replace(/\[\[(\d+)\]\]|\[(\d+)\]/g, (match, num1, num2) => {
|
const citationRegex = /\[\[(\d+)\]\]|\[(\d+)\]/g
|
||||||
const num = num1 || num2
|
|
||||||
const index = parseInt(num) - 1
|
|
||||||
|
|
||||||
if (index < 0 || index >= citations.length) {
|
// 第一步: 识别有效的原始索引
|
||||||
return match
|
for (const match of content.matchAll(citationRegex)) {
|
||||||
|
const numStr = match[1] || match[2]
|
||||||
|
const index = parseInt(numStr) - 1
|
||||||
|
if (index >= webSearchLength && index < citations.length && citations[index]) {
|
||||||
|
if (!usedOriginalIndexes.includes(index)) {
|
||||||
|
usedOriginalIndexes.push(index)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// 对使用的原始索引进行排序,以便后续查找新索引
|
||||||
|
usedOriginalIndexes.sort((a, b) => a - b)
|
||||||
|
|
||||||
|
// 创建原始索引到新索引的映射
|
||||||
|
const originalIndexToNewIndexMap = new Map<number, number>()
|
||||||
|
usedOriginalIndexes.forEach((originalIndex, newIndex) => {
|
||||||
|
originalIndexToNewIndexMap.set(originalIndex, newIndex)
|
||||||
|
})
|
||||||
|
|
||||||
|
// 第二步: 替换并使用新的索引编号
|
||||||
|
content = content.replace(citationRegex, (match, num1, num2) => {
|
||||||
|
const numStr = num1 || num2
|
||||||
|
const originalIndex = parseInt(numStr) - 1
|
||||||
|
|
||||||
|
// 检查索引是否有效
|
||||||
|
if (originalIndex < 0 || originalIndex >= citations.length || !citations[originalIndex]) {
|
||||||
|
return match // 无效索引,返回原文
|
||||||
}
|
}
|
||||||
|
|
||||||
const link = citations[index]
|
const link = citations[originalIndex]
|
||||||
|
const citation = { ...(citationsData[link] || { url: link }) }
|
||||||
if (!link) {
|
|
||||||
return match
|
|
||||||
}
|
|
||||||
|
|
||||||
const isWebLink = link.startsWith('http://') || link.startsWith('https://')
|
|
||||||
if (!isWebLink) {
|
|
||||||
return `<sup>${num}</sup>`
|
|
||||||
}
|
|
||||||
|
|
||||||
const citation = citationsData[link] || { url: link }
|
|
||||||
if (citation.content) {
|
if (citation.content) {
|
||||||
citation.content = citation.content.substring(0, 200)
|
citation.content = citation.content.substring(0, 200)
|
||||||
}
|
}
|
||||||
|
const citationDataHtml = encodeHTML(JSON.stringify(citation))
|
||||||
|
|
||||||
return `[<sup data-citation='${encodeHTML(JSON.stringify(citation))}'>${num}</sup>](${link})`
|
// 检查是否是 *被使用的知识库* 引用
|
||||||
|
if (originalIndexToNewIndexMap.has(originalIndex)) {
|
||||||
|
const newIndex = originalIndexToNewIndexMap.get(originalIndex)!
|
||||||
|
const newCitationNum = webSearchLength + newIndex + 1 // 重新编号的知识库引用 (从websearch index+1开始)
|
||||||
|
|
||||||
|
const isWebLink = link.startsWith('http://') || link.startsWith('https://')
|
||||||
|
if (!isWebLink) {
|
||||||
|
// 知识库引用通常不是网页链接,只显示上标数字
|
||||||
|
return `<sup>${newCitationNum}</sup>`
|
||||||
|
} else {
|
||||||
|
// 如果知识库源是网页链接 (特殊情况)
|
||||||
|
return `[<sup data-citation='${citationDataHtml}'>${newCitationNum}</sup>](${link})`
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// 检查是否是 *Web搜索* 引用
|
||||||
|
else if (originalIndex < webSearchLength) {
|
||||||
|
const citationNum = originalIndex + 1 // Web搜索引用保持原编号 (从1开始)
|
||||||
|
return `[<sup data-citation='${citationDataHtml}'>${citationNum}</sup>](${link})`
|
||||||
|
}
|
||||||
|
// 其他情况 (如未使用的知识库引用),返回原文
|
||||||
|
else {
|
||||||
|
return match
|
||||||
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
|
// 过滤掉未使用的知识索引
|
||||||
|
message = {
|
||||||
|
...message,
|
||||||
|
metadata: {
|
||||||
|
...message.metadata,
|
||||||
|
// 根据其对应的全局索引是否存在于 usedOriginalIndexes 来过滤
|
||||||
|
knowledge: message.metadata.knowledge?.filter((_, knowledgeIndex) =>
|
||||||
|
usedOriginalIndexes.includes(knowledgeIndex + webSearchLength)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
// 使用预编译的正则表达式
|
// 处理非 webSearch/knowledge 的情况 (这部分逻辑保持不变)
|
||||||
const citationRegex = /\[<sup>(\d+)<\/sup>\]\(([^)]+)\)/g
|
const citationRegex = /\[<sup>(\d+)<\/sup>\]\(([^)]+)\)/g
|
||||||
content = content.replace(citationRegex, (_, num, url) => {
|
content = content.replace(citationRegex, (_, num, url) => {
|
||||||
const citation = citationsData[url] || { url }
|
const citation = citationsData[url] || { url }
|
||||||
|
|||||||
@ -70,10 +70,16 @@ export async function fetchChatCompletion({
|
|||||||
|
|
||||||
// 网络搜索/知识库 关键词提取
|
// 网络搜索/知识库 关键词提取
|
||||||
const extract = async () => {
|
const extract = async () => {
|
||||||
|
const tools: string[] = []
|
||||||
|
|
||||||
|
if (assistant.enableWebSearch) tools.push('websearch')
|
||||||
|
if (hasKnowledgeBase) tools.push('knowledge')
|
||||||
|
|
||||||
const summaryAssistant = {
|
const summaryAssistant = {
|
||||||
...assistant,
|
...assistant,
|
||||||
prompt: SEARCH_SUMMARY_PROMPT
|
prompt: SEARCH_SUMMARY_PROMPT.replace('{tools}', tools.join(', '))
|
||||||
}
|
}
|
||||||
|
|
||||||
const keywords = await fetchSearchSummary({
|
const keywords = await fetchSearchSummary({
|
||||||
messages: lastAnswer ? [lastAnswer, lastUserMessage] : [lastUserMessage],
|
messages: lastAnswer ? [lastAnswer, lastUserMessage] : [lastUserMessage],
|
||||||
assistant: summaryAssistant
|
assistant: summaryAssistant
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user