feat: add knowledge base settings

This commit is contained in:
kangfenmao 2025-02-08 19:19:59 +08:00
parent ee46d2055a
commit 3d2e209550
12 changed files with 186 additions and 15 deletions

View File

@ -35,6 +35,7 @@ class KnowledgeService {
baseURL,
dimensions
}: KnowledgeBaseParams): Promise<RAGApplication> => {
const batchSize = 10
return new RAGApplicationBuilder()
.setModel('NO_MODEL')
.setEmbeddingModel(
@ -45,14 +46,14 @@ class KnowledgeService {
azureOpenAIApiDeploymentName: model,
azureOpenAIApiInstanceName: getInstanceName(baseURL),
dimensions,
batchSize: 5
batchSize
})
: new OpenAiEmbeddings({
model,
apiKey,
configuration: { baseURL },
dimensions,
batchSize: 5
batchSize
})
)
.setVectorDatabase(new LibSqlDb({ path: path.join(this.storageDir, id) }))
@ -110,6 +111,7 @@ class KnowledgeService {
if (item.type === 'note') {
const content = item.content as string
console.debug('chunkSize', base.chunkSize)
return await ragApplication.addLoader(
new TextLoader({ text: content, chunkSize: base.chunkSize, chunkOverlap: base.chunkOverlap }),
forceReload

View File

@ -0,0 +1,56 @@
export const EMBEDDING_MODELS = [
{
id: 'BAAI/bge-m3',
max_context: 8000
},
{
id: 'Pro/BAAI/bge-m3',
max_context: 8000
},
{
id: 'BAAI/bge-large-zh-v1.5',
max_context: 512
},
{
id: 'BAAI/bge-large-en-v1.5',
max_context: 512
},
{
id: 'netease-youdao/bce-embedding-base_v1',
max_context: 512
},
{
id: 'tao-8k',
max_context: 8192
},
{
id: 'embedding-v1',
max_context: 384
},
{
id: 'bge-large-zh',
max_context: 512
},
{
id: 'bge-large-en',
max_context: 512
}
]
export function getEmbeddingMaxContext(id: string) {
const model = EMBEDDING_MODELS.find((m) => m.id === id)
if (model) {
return model.max_context
}
if (id.includes('bge-large')) {
return 512
}
if (id.includes('bge-m3')) {
return 8000
}
return undefined
}

View File

@ -644,7 +644,16 @@
"chunk_size": "Chunk Size",
"chunk_overlap": "Chunk Overlap",
"not_set": "Not Set",
"settings": "Knowledge Base Settings"
"settings": "Knowledge Base Settings",
"document_count": "Requested Document Count",
"document_count_help": "The more documents requested, the more information is included, but the more tokens are consumed",
"document_count_default": "Default",
"chunk_size_placeholder": "Default (not recommended to change)",
"chunk_overlap_placeholder": "Default (not recommended to change)",
"chunk_size_tooltip": "Split documents into chunks, each chunk size, not exceeding model context limit",
"chunk_overlap_tooltip": "The amount of duplicate content between adjacent chunks, ensuring that the chunks are still contextually related, improving the overall effect of processing long text",
"chunk_size_change_warning": "Chunk size and overlap size changes only apply to new content",
"chunk_size_too_large": "Chunk size cannot exceed model context limit ({{max_context}})"
},
"models": {
"pinned": "Pinned",

View File

@ -624,7 +624,16 @@
"chunk_size": "チャンクサイズ",
"chunk_overlap": "チャンクの重なり",
"not_set": "未設定",
"settings": "ナレッジベース設定"
"settings": "ナレッジベース設定",
"document_count": "要求されたドキュメント数",
"document_count_help": "要求されたドキュメント数が多いほど、付随する情報が多くなりますが、トークンの消費量も増加します",
"document_count_default": "デフォルト",
"chunk_size_placeholder": "デフォルト(変更しないでください)",
"chunk_overlap_placeholder": "デフォルト(変更しないでください)",
"chunk_size_tooltip": "ドキュメントを分割し、各チャンクのサイズ。モデルのコンテキスト制限を超えないようにしてください。",
"chunk_overlap_tooltip": "隣接するチャンク間の重複内容量。チャンク間のコンテキスト関連性を確保し、長文テキストの処理効果を向上させます。",
"chunk_size_change_warning": "チャンクサイズと重複サイズの変更は、新しく追加された内容にのみ適用されます",
"chunk_size_too_large": "チャンクサイズはモデルのコンテキスト制限を超えることはできません({{max_context}}"
},
"models": {
"pinned": "固定済み",

View File

@ -636,7 +636,16 @@
"chunk_size": "Размер фрагмента",
"chunk_overlap": "Перекрытие фрагмента",
"not_set": "Не установлено",
"settings": "Настройки базы знаний"
"settings": "Настройки базы знаний",
"document_count": "Количество запрошенных документов",
"document_count_help": "Количество запрошенных документов, вместе с ними передается больше информации, но и требуется больше токенов",
"document_count_default": "По умолчанию",
"chunk_size_placeholder": "По умолчанию (не рекомендуется изменять)",
"chunk_overlap_placeholder": "По умолчанию (не рекомендуется изменять)",
"chunk_size_tooltip": "Размер фрагмента, не превышающий модель контекста",
"chunk_overlap_tooltip": "Перекрытие фрагмента, не превышающее модель контекста",
"chunk_size_change_warning": "Размер фрагмента и перекрытие фрагмента могут быть изменены только для новых содержимого",
"chunk_size_too_large": "Размер фрагмента не может превышать модель контекста ({{max_context}})"
},
"models": {
"pinned": "Закреплено",

View File

@ -631,7 +631,16 @@
"chunk_size": "分段大小",
"chunk_overlap": "重叠大小",
"not_set": "未设置",
"settings": "知识库设置"
"settings": "知识库设置",
"document_count": "请求文档数量",
"document_count_help": "请求文档数量越多,附带的信息越多,但需要消耗的 Token 也越多",
"document_count_default": "默认",
"chunk_size_placeholder": "默认值(不建议修改)",
"chunk_overlap_placeholder": "默认值(不建议修改)",
"chunk_size_tooltip": "将文档切割分段,每段的大小,不能超过模型上下文限制",
"chunk_overlap_tooltip": "相邻文本块之间重复的内容量,确保分段后的文本块之间仍然有上下文联系,提升模型处理长文本的整体效果",
"chunk_size_change_warning": "分段大小和重叠大小修改只针对新添加的内容有效",
"chunk_size_too_large": "分段大小不能超过模型上下文限制({{max_context}}"
},
"models": {
"pinned": "已固定",

View File

@ -630,7 +630,16 @@
"chunk_size": "分段大小",
"chunk_overlap": "重疊大小",
"not_set": "未設置",
"settings": "知識庫設定"
"settings": "知識庫設定",
"document_count": "請求文件數量",
"document_count_help": "請求文件數量越多,附帶的資訊越多,但需要消耗的 Token 也越多",
"document_count_default": "預設",
"chunk_size_placeholder": "預設值(不建議修改)",
"chunk_overlap_placeholder": "預設值(不建議修改)",
"chunk_size_tooltip": "將文件切割分段,每段的大小,不能超過模型上下文限制",
"chunk_overlap_tooltip": "相鄰文本塊之間重複的內容量,確保分段後的文本塊之間仍然有上下文聯繫,提升模型處理長文本的整體效果",
"chunk_size_change_warning": "分段大小和重疊大小修改只針對新添加的內容有效",
"chunk_size_too_large": "分段大小不能超過模型上下文限制({{max_context}}"
},
"models": {
"pinned": "已固定",

View File

@ -7,7 +7,8 @@ import {
LinkOutlined,
PlusOutlined,
RedoOutlined,
SearchOutlined
SearchOutlined,
SettingOutlined
} from '@ant-design/icons'
import PromptPopup from '@renderer/components/Popups/PromptPopup'
import TextEditPopup from '@renderer/components/Popups/TextEditPopup'
@ -22,6 +23,7 @@ import { useTranslation } from 'react-i18next'
import styled from 'styled-components'
import KnowledgeSearchPopup from './components/KnowledgeSearchPopup'
import KnowledgeSettingsPopup from './components/KnowledgeSettingsPopup'
import StatusIcon from './components/StatusIcon'
const { Dragger } = Upload
@ -359,7 +361,7 @@ const KnowledgeContent: FC<KnowledgeContentProps> = ({ selectedBase }) => {
<Tag color="blue">{base.model.name}</Tag>
<Tag color="cyan">{t('models.dimensions', { dimensions: base.dimensions || 0 })}</Tag>
{providerName && <Tag color="purple">{providerName}</Tag>}
{/* <Button icon={<SettingOutlined />} onClick={() => KnowledgeSettingsPopup.show({ base })} size="small" /> */}
<Button icon={<SettingOutlined />} onClick={() => KnowledgeSettingsPopup.show({ base })} size="small" />
</ModelInfo>
<IndexSection>

View File

@ -1,4 +1,4 @@
import { DeleteOutlined, EditOutlined, FileTextOutlined, PlusOutlined } from '@ant-design/icons'
import { DeleteOutlined, EditOutlined, FileTextOutlined, PlusOutlined, SettingOutlined } from '@ant-design/icons'
import { Navbar, NavbarCenter } from '@renderer/components/app/Navbar'
import DragableList from '@renderer/components/DragableList'
import ListItem from '@renderer/components/ListItem'
@ -12,6 +12,7 @@ import { useTranslation } from 'react-i18next'
import styled from 'styled-components'
import AddKnowledgePopup from './components/AddKnowledgePopup'
import KnowledgeSettingsPopup from './components/KnowledgeSettingsPopup'
import KnowledgeContent from './KnowledgeContent'
const KnowledgePage: FC = () => {
@ -47,6 +48,12 @@ const KnowledgePage: FC = () => {
}
}
},
{
label: t('knowledge.settings'),
key: 'settings',
icon: <SettingOutlined />,
onClick: () => KnowledgeSettingsPopup.show({ base })
},
{ type: 'divider' },
{
label: t('common.delete'),

View File

@ -1,10 +1,12 @@
import { WarningOutlined } from '@ant-design/icons'
import { TopView } from '@renderer/components/TopView'
import { getEmbeddingMaxContext } from '@renderer/config/embedings'
import { isEmbeddingModel } from '@renderer/config/models'
import { useKnowledge } from '@renderer/hooks/useKnowledge'
import { useProviders } from '@renderer/hooks/useProvider'
import { getModelUniqId } from '@renderer/services/ModelService'
import { KnowledgeBase } from '@renderer/types'
import { Form, Input, InputNumber, Modal, Select } from 'antd'
import { Alert, Form, Input, InputNumber, Modal, Select, Slider } from 'antd'
import { sortBy } from 'lodash'
import { useState } from 'react'
import { useTranslation } from 'react-i18next'
@ -16,6 +18,7 @@ interface ShowParams {
interface FormData {
name: string
model: string
documentCount?: number
chunkSize?: number
chunkOverlap?: number
}
@ -56,6 +59,7 @@ const PopupContainer: React.FC<Props> = ({ base: _base, resolve }) => {
const newBase = {
...base,
name: values.name,
documentCount: values.documentCount,
chunkSize: values.chunkSize,
chunkOverlap: values.chunkOverlap
}
@ -104,14 +108,49 @@ const PopupContainer: React.FC<Props> = ({ base: _base, resolve }) => {
<Select style={{ width: '100%' }} options={selectOptions} placeholder={t('settings.models.empty')} disabled />
</Form.Item>
<Form.Item name="chunkSize" label={t('knowledge.chunk_size')}>
<InputNumber style={{ width: '100%' }} min={1} defaultValue={base.chunkSize} />
<Form.Item
name="documentCount"
label={t('knowledge.document_count')}
tooltip={{ title: t('knowledge.document_count_help') }}>
<Slider
style={{ width: '100%' }}
min={1}
max={15}
defaultValue={base.documentCount || 6}
step={1}
marks={{ 1: '1', 6: t('knowledge.document_count_default'), 15: '15' }}
/>
</Form.Item>
<Form.Item
name="chunkSize"
label={t('knowledge.chunk_size')}
tooltip={{ title: t('knowledge.chunk_size_tooltip') }}
initialValue={base.chunkSize}
rules={[
{
validator(_, value) {
const maxContext = getEmbeddingMaxContext(base.model.id)
if (value && maxContext && value > maxContext) {
return Promise.reject(new Error(t('knowledge.chunk_size_too_large', { max_context: maxContext })))
}
return Promise.resolve()
}
}
]}>
<InputNumber
style={{ width: '100%' }}
min={100}
defaultValue={base.chunkSize}
placeholder={t('knowledge.chunk_size_placeholder')}
/>
</Form.Item>
<Form.Item
name="chunkOverlap"
label={t('knowledge.chunk_overlap')}
initialValue={base.chunkOverlap}
tooltip={{ title: t('knowledge.chunk_overlap_tooltip') }}
rules={[
({ getFieldValue }) => ({
validator(_, value) {
@ -123,9 +162,15 @@ const PopupContainer: React.FC<Props> = ({ base: _base, resolve }) => {
})
]}
dependencies={['chunkSize']}>
<InputNumber style={{ width: '100%' }} min={0} />
<InputNumber
style={{ width: '100%' }}
min={0}
defaultValue={base.chunkOverlap}
placeholder={t('knowledge.chunk_overlap_placeholder')}
/>
</Form.Item>
</Form>
<Alert message={t('knowledge.chunk_size_change_warning')} type="warning" showIcon icon={<WarningOutlined />} />
</Modal>
)
}

View File

@ -1,4 +1,5 @@
import type { ExtractChunkData } from '@llm-tools/embedjs-interfaces'
import { getEmbeddingMaxContext } from '@renderer/config/embedings'
import AiProvider from '@renderer/providers/AiProvider'
import { FileType, KnowledgeBase, KnowledgeBaseParams, Message } from '@renderer/types'
import { take } from 'lodash'
@ -16,6 +17,18 @@ export const getKnowledgeBaseParams = (base: KnowledgeBase): KnowledgeBaseParams
host = host + '/v1beta/openai/'
}
let chunkSize = base.chunkSize
const maxChunkSize = getEmbeddingMaxContext(base.model.id)
if (maxChunkSize) {
if (chunkSize && chunkSize > maxChunkSize) {
chunkSize = maxChunkSize
}
if (!chunkSize) {
chunkSize = maxChunkSize
}
}
return {
id: base.id,
model: base.model.id,
@ -23,7 +36,7 @@ export const getKnowledgeBaseParams = (base: KnowledgeBase): KnowledgeBaseParams
apiKey: aiProvider.getApiKey() || 'secret',
apiVersion: provider.apiVersion,
baseURL: host,
chunkSize: base.chunkSize,
chunkSize,
chunkOverlap: base.chunkOverlap
}
}

View File

@ -234,6 +234,7 @@ export interface KnowledgeBase {
created_at: number
updated_at: number
version: number
documentCount?: number
chunkSize?: number
chunkOverlap?: number
}