feat: 添加知识库匹配度阈值 (#1634)

* feat: 添加知识库匹配度阈值

* fix: 增加问答时知识库阈值

* feat: 当知识库未检索到数据时使用通用对话逻辑

* fix: add toast
This commit is contained in:
Chen Tao 2025-02-16 11:38:00 +08:00 committed by GitHub
parent b6ad7eeb9a
commit c0e0e924f7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 91 additions and 19 deletions

View File

@ -2,6 +2,7 @@ export const DEFAULT_TEMPERATURE = 1.0
export const DEFAULT_CONTEXTCOUNT = 5
export const DEFAULT_MAX_TOKENS = 4096
export const DEFAULT_KNOWLEDGE_DOCUMENT_COUNT = 6
export const DEFAULT_KNOWLEDGE_THRESHOLD = 0.0
export const FONT_FAMILY =
"Ubuntu, -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Cantarell, 'Open Sans', 'Helvetica Neue', sans-serif"

View File

@ -296,7 +296,12 @@
"title": "Knowledge Base",
"url_added": "URL added",
"url_placeholder": "Enter URL, multiple URLs separated by Enter",
"urls": "URLs"
"urls": "URLs",
"threshold_tooltip": "Match threshold",
"threshold_placeholder": "Default value (0.0)",
"threshold_too_large_or_small": "Threshold cannot be greater than 1 or less than 0",
"no_match": "No matching content found in the knowledge base.",
"threshold": "Matching threshold"
},
"languages": {
"arabic": "Arabic",

View File

@ -296,7 +296,12 @@
"title": "ナレッジベース",
"url_added": "URLが追加されました",
"url_placeholder": "URLを入力, 複数のURLはEnterで区切る",
"urls": "URL"
"urls": "URL",
"threshold_tooltip": "マッチングしきい値",
"threshold_placeholder": "デフォルト値(0.0)",
"threshold_too_large_or_small": "しきい値は0より大きく1より小さい必要があります",
"no_match": "知識ベースの内容が見つかりませんでした。",
"threshold": "マッチング度閾値"
},
"languages": {
"arabic": "アラビア語",

View File

@ -296,7 +296,12 @@
"title": "База знаний",
"url_added": "URL добавлен",
"url_placeholder": "Введите URL, несколько URL через Enter",
"urls": "URL-адреса"
"urls": "URL-адреса",
"threshold_tooltip": "Порог совпадения",
"threshold_placeholder": "По умолчанию (0.0)",
"threshold_too_large_or_small": "Порог не может быть больше 1 или меньше 0",
"no_match": "Не найдено содержимого в базе знаний.",
"threshold": "Порог соответствия"
},
"languages": {
"arabic": "Арабский",

View File

@ -275,6 +275,7 @@
"invalid_url": "无效的网址",
"model_info": "模型信息",
"no_bases": "暂无知识库",
"no_match": "未匹配到知识库内容",
"no_provider": "知识库模型服务商丢失,该知识库将不再支持,请重新创建知识库",
"not_set": "未设置",
"not_support": "知识库数据库引擎已更新,该知识库将不再支持,请重新创建知识库",
@ -293,6 +294,10 @@
"status_new": "已添加",
"status_pending": "等待中",
"status_processing": "处理中",
"threshold": "匹配度阈值",
"threshold_tooltip": "用于衡量用户问题与知识库内容之间的相关性",
"threshold_placeholder": "默认值(0.0)",
"threshold_too_large_or_small": "阈值不能大于1或小于0",
"title": "知识库",
"url_added": "网址已添加",
"url_placeholder": "请输入网址, 多个网址用回车分隔",
@ -768,4 +773,4 @@
"title": "帮助文档"
}
}
}
}

View File

@ -296,7 +296,12 @@
"title": "知識庫",
"url_added": "網址已添加",
"url_placeholder": "請輸入網址, 多個網址用回車分隔",
"urls": "網址"
"urls": "網址",
"threshold_tooltip": "匹配度閾值",
"threshold_placeholder": "預設值(0.0)",
"threshold_too_large_or_small": "閾值不能大於1或小於0",
"no_match": "未匹配到知識庫內容",
"threshold": "匹配度閾值"
},
"languages": {
"arabic": "阿拉伯文",
@ -449,9 +454,6 @@
"title": "LM Studio"
},
"paintings": {
"infini": "無問芯穹",
"perplexity": "Perplexity",
"dmxapi": "DMXAPI",
"button.delete.image": "刪除繪圖",
"button.delete.image.confirm": "確定要刪除此繪圖嗎?",
"button.new.image": "新繪圖",
@ -513,7 +515,10 @@
"together": "Together",
"yi": "零一萬物",
"zhinao": "360智腦",
"zhipu": "智譜AI"
"zhipu": "智譜AI",
"infini": "無問芯穹",
"perplexity": "Perplexity",
"dmxapi": "DMXAPI"
},
"settings": {
"about": "關於與回饋",

View File

@ -1,5 +1,6 @@
import type { ExtractChunkData } from '@llm-tools/embedjs-interfaces'
import { TopView } from '@renderer/components/TopView'
import { DEFAULT_KNOWLEDGE_THRESHOLD } from '@renderer/config/constant'
import { getFileFromUrl, getKnowledgeBaseParams } from '@renderer/services/KnowledgeService'
import { FileType, KnowledgeBase } from '@renderer/types'
import { Input, List, Modal, Spin, Typography } from 'antd'
@ -45,7 +46,11 @@ const PopupContainer: React.FC<Props> = ({ base, resolve }) => {
return { ...item, file }
})
)
setResults(results)
const filteredResults = results.filter((item) => {
const threshold = base.threshold || DEFAULT_KNOWLEDGE_THRESHOLD
return item.score >= threshold
})
setResults(filteredResults)
} catch (error) {
console.error('Search failed:', error)
} finally {

View File

@ -22,6 +22,7 @@ interface FormData {
documentCount?: number
chunkSize?: number
chunkOverlap?: number
threshold?: number
}
interface Props extends ShowParams {
@ -66,7 +67,8 @@ const PopupContainer: React.FC<Props> = ({ base: _base, resolve }) => {
name: values.name,
documentCount: values.documentCount || DEFAULT_KNOWLEDGE_DOCUMENT_COUNT,
chunkSize: values.chunkSize,
chunkOverlap: values.chunkOverlap
chunkOverlap: values.chunkOverlap,
threshold: values.threshold
}
updateKnowledgeBase(newBase)
setOpen(false)
@ -174,6 +176,23 @@ const PopupContainer: React.FC<Props> = ({ base: _base, resolve }) => {
placeholder={t('knowledge.chunk_overlap_placeholder')}
/>
</Form.Item>
<Form.Item
name="threshold"
label={t('knowledge.threshold')}
tooltip={{ title: t('knowledge.threshold_tooltip') }}
initialValue={base.threshold}
rules={[
{
validator(_, value) {
if (value && (value > 1 || value < 0)) {
return Promise.reject(new Error(t('knowledge.threshold_too_large_or_small')))
}
return Promise.resolve()
}
}
]}>
<Input placeholder={t('knowledge.threshold_placeholder')} />
</Form.Item>
</Form>
<Alert message={t('knowledge.chunk_size_change_warning')} type="warning" showIcon icon={<WarningOutlined />} />
</Modal>

View File

@ -90,9 +90,14 @@ export default abstract class BaseProvider {
return message.content
}
const references = await getKnowledgeReferences(base, message)
const { referencesContent, referencesCount } = await getKnowledgeReferences(base, message)
return REFERENCE_PROMPT.replace('{question}', message.content).replace('{references}', references)
// 如果知识库中未检索到内容则使用通用逻辑
if (referencesCount === 0) {
return message.content
}
return REFERENCE_PROMPT.replace('{question}', message.content).replace('{references}', referencesContent)
}
protected getCustomParameters(assistant: Assistant) {

View File

@ -1,8 +1,9 @@
import type { ExtractChunkData } from '@llm-tools/embedjs-interfaces'
import { DEFAULT_KNOWLEDGE_DOCUMENT_COUNT } from '@renderer/config/constant'
import { DEFAULT_KNOWLEDGE_DOCUMENT_COUNT, DEFAULT_KNOWLEDGE_THRESHOLD } from '@renderer/config/constant'
import { getEmbeddingMaxContext } from '@renderer/config/embedings'
import AiProvider from '@renderer/providers/AiProvider'
import { FileType, KnowledgeBase, KnowledgeBaseParams, Message } from '@renderer/types'
import { t } from 'i18next'
import { take } from 'lodash'
import { getProviderByModel } from './AssistantService'
@ -79,10 +80,25 @@ export const getKnowledgeSourceUrl = async (item: ExtractChunkData & { file: Fil
}
export const getKnowledgeReferences = async (base: KnowledgeBase, message: Message) => {
const searchResults = await window.api.knowledgeBase.search({
search: message.content,
base: getKnowledgeBaseParams(base)
})
const searchResults = await window.api.knowledgeBase
.search({
search: message.content,
base: getKnowledgeBaseParams(base)
})
.then((results) =>
results.filter((item) => {
const threshold = base.threshold || DEFAULT_KNOWLEDGE_THRESHOLD
return item.score >= threshold
})
)
if (searchResults.length === 0) {
window.message.info({
content: t('knowledge.no_match'),
duration: 4,
key: 'knowledge-base-no-match-info'
})
return { referencesContent: '', referencesCount: 0 }
}
const _searchResults = await Promise.all(
searchResults.map(async (item) => {
@ -107,5 +123,5 @@ export const getKnowledgeReferences = async (base: KnowledgeBase, message: Messa
const referencesContent = `\`\`\`json\n${JSON.stringify(references, null, 2)}\n\`\`\``
return referencesContent
return { referencesContent, referencesCount: references.length }
}

View File

@ -244,6 +244,7 @@ export interface KnowledgeBase {
documentCount?: number
chunkSize?: number
chunkOverlap?: number
threshold?: number
}
export type KnowledgeBaseParams = {