fix: knowledge base bugs

This commit is contained in:
kangfenmao 2024-12-25 21:54:46 +08:00
parent 34ebab0af8
commit c50ac440c8
14 changed files with 142 additions and 47 deletions

View File

@ -35,7 +35,8 @@ export default defineConfig({
build: {
rollupOptions: {
external: ['@lancedb/lancedb']
}
},
minify: true
}
},
preload: {
@ -51,6 +52,9 @@ export default defineConfig({
},
optimizeDeps: {
exclude: []
},
build: {
minify: true
}
}
})

View File

@ -2,7 +2,7 @@ import * as fs from 'node:fs'
import path from 'node:path'
import { LocalPathLoader, RAGApplication, RAGApplicationBuilder, TextLoader } from '@llm-tools/embedjs'
import { AddLoaderReturn, ExtractChunkData } from '@llm-tools/embedjs-interfaces'
import type { AddLoaderReturn, ExtractChunkData } from '@llm-tools/embedjs-interfaces'
import { LanceDb } from '@llm-tools/embedjs-lancedb'
import { MarkdownLoader } from '@llm-tools/embedjs-loader-markdown'
import { DocxLoader, ExcelLoader, PptLoader } from '@llm-tools/embedjs-loader-msoffice'

View File

@ -3,7 +3,7 @@ import { isLinux, isWin } from '@main/constant'
import { app, BrowserWindow, Menu, MenuItem, shell } from 'electron'
import Logger from 'electron-log'
import windowStateKeeper from 'electron-window-state'
import { join } from 'path'
import path, { join } from 'path'
import icon from '../../../build/icon.png?asset'
import { titleBarOverlayDark, titleBarOverlayLight } from '../config'
@ -137,8 +137,9 @@ export class WindowService {
const { url } = details
if (url.includes('http://file/')) {
const fileUrl = url.replace('http://file/', '')
const filePath = decodeURIComponent(fileUrl)
const fileName = url.replace('http://file/', '')
const storageDir = path.join(app.getPath('userData'), 'Data', 'Files')
const filePath = storageDir + '/' + fileName
shell.openPath(filePath).catch((err) => Logger.error('Failed to open file:', err))
} else {
shell.openExternal(details.url)

View File

@ -56,6 +56,7 @@ export const REFERENCE_PROMPT = `请根据参考资料回答问题,并使用
1. ****使 [^] [^1]
2. ****使 [^]:
3. ****
##

View File

@ -358,7 +358,7 @@ export const PROVIDER_CONFIG = {
url: 'https://aihubmix.com?aff=SJyh'
},
websites: {
official: 'https://aihubmix.com/',
official: 'https://aihubmix.com?aff=SJyh',
apiKey: 'https://aihubmix.com?aff=SJyh',
docs: 'https://doc.aihubmix.com/',
models: 'https://aihubmix.com/models'

View File

@ -229,6 +229,7 @@
"error.enter.api.host": "Please enter your API host first",
"error.enter.api.key": "Please enter your API key first",
"error.enter.model": "Please select a model first",
"error.enter.name": "Please enter the name of the knowledge base",
"error.invalid.proxy.url": "Invalid proxy URL",
"error.invalid.webdav": "Invalid WebDAV settings",
"message.code_style": "Code style",
@ -249,7 +250,7 @@
"upgrade.success.title": "Upgrade successfully",
"regenerate.confirm": "Regenerating will replace current message",
"copy.success": "Copied!",
"get_embedding_dimensions": "Failed to get embedding dimensions"
"error.get_embedding_dimensions": "Failed to get embedding dimensions"
},
"minapp": {
"title": "MinApp"
@ -562,7 +563,8 @@
"add_directory": "Add Directory",
"directory_placeholder": "Enter Directory Path",
"model_info": "Model Info",
"not_support": "Knowledge base database engine updated, the knowledge base will no longer be supported, please create a new knowledge base"
"not_support": "Knowledge base database engine updated, the knowledge base will no longer be supported, please create a new knowledge base",
"source": "Source"
},
"models": {
"pinned": "Pinned",

View File

@ -229,6 +229,7 @@
"error.enter.api.host": "Пожалуйста, введите ваш API хост",
"error.enter.api.key": "Пожалуйста, введите ваш API ключ",
"error.enter.model": "Пожалуйста, выберите модель",
"error.enter.name": "Пожалуйста, введите название базы знаний",
"error.invalid.proxy.url": "Неверный URL прокси",
"error.invalid.webdav": "Неверные настройки WebDAV",
"message.code_style": "Стиль кода",
@ -249,7 +250,7 @@
"upgrade.success.title": "Обновление успешно",
"regenerate.confirm": "Перегенерация заменит текущее сообщение",
"copy.success": "Скопировано!",
"get_embedding_dimensions": "Не удалось получить размерность встраивания"
"error.get_embedding_dimensions": "Не удалось получить размерность встраивания"
},
"minapp": {
"title": "Встроенные приложения"
@ -562,7 +563,8 @@
"add_directory": "Добавить директорию",
"directory_placeholder": "Введите путь к директории",
"model_info": "Модель информации",
"not_support": "База знаний базы данных движок обновлен, база знаний больше не поддерживается, пожалуйста, создайте новую базу знаний"
"not_support": "База знаний базы данных движок обновлен, база знаний больше не поддерживается, пожалуйста, создайте новую базу знаний",
"source": "Источник"
},
"models": {
"pinned": "Закреплено",

View File

@ -230,6 +230,7 @@
"error.enter.api.host": "请输入您的 API 地址",
"error.enter.api.key": "请输入您的 API 密钥",
"error.enter.model": "请选择一个模型",
"error.enter.name": "请输入知识库名称",
"error.invalid.proxy.url": "无效的代理地址",
"error.invalid.webdav": "无效的 WebDAV 设置",
"message.code_style": "代码风格",
@ -250,7 +251,7 @@
"upgrade.success.title": "升级成功",
"regenerate.confirm": "重新生成会覆盖当前消息",
"copy.success": "复制成功",
"get_embedding_dimensions": "获取嵌入维度失败"
"error.get_embedding_dimensions": "获取嵌入维度失败"
},
"minapp": {
"title": "小程序"
@ -551,7 +552,8 @@
"add_directory": "添加目录",
"directory_placeholder": "请输入目录路径",
"model_info": "模型信息",
"not_support": "知识库数据库引擎已更新,该知识库将不再支持,请重新创建知识库"
"not_support": "知识库数据库引擎已更新,该知识库将不再支持,请重新创建知识库",
"source": "来源"
},
"models": {
"pinned": "已固定",

View File

@ -229,6 +229,7 @@
"error.enter.api.host": "請先輸入您的 API 主機地址",
"error.enter.api.key": "請先輸入您的 API 密鑰",
"error.enter.model": "請先選擇一個模型",
"error.enter.name": "請先輸入知識庫名稱",
"error.invalid.proxy.url": "無效的代理 URL",
"error.invalid.webdav": "無效的 WebDAV 設定",
"message.code_style": "程式碼風格",
@ -249,7 +250,7 @@
"upgrade.success.title": "升級成功",
"regenerate.confirm": "重新生成會覆蓋當前訊息",
"copy.success": "複製成功",
"get_embedding_dimensions": "獲取嵌入維度失敗"
"error.get_embedding_dimensions": "獲取嵌入維度失敗"
},
"minapp": {
"title": "小程序"
@ -550,7 +551,8 @@
"add_directory": "添加目錄",
"directory_placeholder": "請輸入目錄路徑",
"model_info": "模型信息",
"not_support": "知識庫數據庫引擎已更新,該知識庫將不再支持,請重新創建知識庫"
"not_support": "知識庫數據庫引擎已更新,該知識庫將不再支持,請重新創建知識庫",
"source": "來源"
},
"models": {
"pinned": "已固定",

View File

@ -1,7 +1,7 @@
import type { ExtractChunkData } from '@llm-tools/embedjs-interfaces'
import { TopView } from '@renderer/components/TopView'
import { getKnowledgeBaseParams } from '@renderer/services/KnowledgeService'
import { KnowledgeBase } from '@renderer/types'
import { getFileFromUrl, getKnowledgeBaseParams } from '@renderer/services/KnowledgeService'
import { FileType, KnowledgeBase } from '@renderer/types'
import { Input, List, Modal, Spin, Typography } from 'antd'
import { useState } from 'react'
import { useTranslation } from 'react-i18next'
@ -21,7 +21,7 @@ interface Props extends ShowParams {
const PopupContainer: React.FC<Props> = ({ base, resolve }) => {
const [open, setOpen] = useState(true)
const [loading, setLoading] = useState(false)
const [results, setResults] = useState<ExtractChunkData[]>([])
const [results, setResults] = useState<Array<ExtractChunkData & { file: FileType | null }>>([])
const [searchKeyword, setSearchKeyword] = useState('')
const { t } = useTranslation()
@ -39,7 +39,13 @@ const PopupContainer: React.FC<Props> = ({ base, resolve }) => {
search: value,
base: getKnowledgeBaseParams(base)
})
setResults(searchResults)
const results = await Promise.all(
searchResults.map(async (item) => {
const file = await getFileFromUrl(item.metadata.source)
return { ...item, file }
})
)
setResults(results)
} catch (error) {
console.error('Search failed:', error)
} finally {
@ -102,7 +108,16 @@ const PopupContainer: React.FC<Props> = ({ base, resolve }) => {
<ScoreTag>Score: {(item.score * 100).toFixed(1)}%</ScoreTag>
<Paragraph>{highlightText(item.pageContent)}</Paragraph>
<MetadataContainer>
<Text type="secondary">Source: {item.metadata.source}</Text>
<Text type="secondary">
{t('knowledge_base.source')}:{' '}
{item.file ? (
<a href={`http://file/${item.file.name}`} target="_blank" rel="noreferrer">
{item.file.origin_name}
</a>
) : (
item.metadata.source
)}
</Text>
</MetadataContainer>
</ResultItem>
</List.Item>

View File

@ -1,10 +1,9 @@
import { REFERENCE_PROMPT } from '@renderer/config/prompts'
import { getOllamaKeepAliveTime } from '@renderer/hooks/useOllama'
import { getKnowledgeBaseParams } from '@renderer/services/KnowledgeService'
import { getKnowledgeReferences } from '@renderer/services/KnowledgeService'
import store from '@renderer/store'
import { Assistant, Message, Model, Provider, Suggestion } from '@renderer/types'
import { delay } from '@renderer/utils'
import { take } from 'lodash'
import OpenAI from 'openai'
import { CompletionsParams } from '.'
@ -95,25 +94,8 @@ export default abstract class BaseProvider {
return message.content
}
const searchResults = await window.api.knowledgeBase.search({
search: message.content,
base: getKnowledgeBaseParams(base)
})
const references = await getKnowledgeReferences(base, message)
const references = take(searchResults, 6).map((item, index) => {
const sourceUrl = item.metadata.source
const baseItem = base.items.find((i) => i.uniqueId === item.metadata.uniqueLoaderId)
return {
id: index,
content: item.pageContent,
sourceUrl: sourceUrl.startsWith('http') ? sourceUrl : encodeURIComponent(sourceUrl),
type: baseItem?.type
}
})
const referencesContent = `\`\`\`json\n${JSON.stringify(references, null, 2)}\n\`\`\``
return REFERENCE_PROMPT.replace('{question}', message.content).replace('{references}', referencesContent)
return REFERENCE_PROMPT.replace('{question}', message.content).replace('{references}', references)
}
}

View File

@ -51,7 +51,7 @@ class FileManager {
if (file) {
const filesPath = store.getState().runtime.filesPath
file.path = filesPath + file.id
file.path = filesPath + '/' + file.id + file.ext
}
return file
@ -91,7 +91,7 @@ class FileManager {
static getFileUrl(file: FileType) {
const filesPath = store.getState().runtime.filesPath
return 'file://' + filesPath + '/' + file.id + file.ext
return 'file://' + filesPath + '/' + file.name
}
}

View File

@ -1,8 +1,10 @@
import type { ExtractChunkData } from '@llm-tools/embedjs-interfaces'
import AiProvider from '@renderer/providers/AiProvider'
import { KnowledgeBase, KnowledgeBaseParams } from '@renderer/types'
import { isEmpty } from 'lodash'
import { FileType, KnowledgeBase, KnowledgeBaseParams, Message } from '@renderer/types'
import { isEmpty, take } from 'lodash'
import { getProviderByModel } from './AssistantService'
import FileManager from './FileManager'
export const getKnowledgeBaseParams = (base: KnowledgeBase): KnowledgeBaseParams => {
const provider = getProviderByModel(base.model)
@ -14,7 +16,7 @@ export const getKnowledgeBaseParams = (base: KnowledgeBase): KnowledgeBaseParams
let host = aiProvider.getBaseURL()
if (host.includes('generativelanguage.googleapis.com')) {
if (provider.type === 'gemini') {
host = host + '/v1beta/openai/'
}
@ -26,3 +28,69 @@ export const getKnowledgeBaseParams = (base: KnowledgeBase): KnowledgeBaseParams
baseURL: host
}
}
export const getFileFromUrl = async (url: string): Promise<FileType | null> => {
let fileName = ''
if (url && url.includes('CherryStudio')) {
if (url.includes('/Data/Files')) {
fileName = url.split('/Data/Files/')[1]
}
if (url.includes('\\Data\\Files')) {
fileName = url.split('\\Data\\Files\\')[1]
}
}
if (fileName) {
const fileId = fileName.split('.')[0]
const file = await FileManager.getFile(fileId)
if (file) {
return file
}
}
return null
}
export const getKnowledgeSourceUrl = async (item: ExtractChunkData & { file: FileType | null }) => {
if (item.metadata.source.startsWith('http')) {
return item.metadata.source
}
if (item.file) {
return `[${item.file.origin_name}](http://file/${item.file.name})`
}
return item.metadata.source
}
export const getKnowledgeReferences = async (base: KnowledgeBase, message: Message) => {
const searchResults = await window.api.knowledgeBase.search({
search: message.content,
base: getKnowledgeBaseParams(base)
})
const _searchResults = await Promise.all(
searchResults.map(async (item) => {
const file = await getFileFromUrl(item.metadata.source)
return { ...item, file }
})
)
const references = await Promise.all(
take(_searchResults, 6).map(async (item, index) => {
const baseItem = base.items.find((i) => i.uniqueId === item.metadata.uniqueLoaderId)
return {
id: index,
content: item.pageContent,
sourceUrl: await getKnowledgeSourceUrl(item),
type: baseItem?.type
}
})
)
const referencesContent = `\`\`\`json\n${JSON.stringify(references, null, 2)}\n\`\`\``
return referencesContent
}

View File

@ -1640,7 +1640,7 @@ __metadata:
languageName: node
linkType: hard
"@llm-tools/embedjs@npm:^0.1.25":
"@llm-tools/embedjs@npm:0.1.25":
version: 0.1.25
resolution: "@llm-tools/embedjs@npm:0.1.25"
dependencies:
@ -1656,6 +1656,22 @@ __metadata:
languageName: node
linkType: hard
"@llm-tools/embedjs@patch:@llm-tools/embedjs@npm%3A0.1.25#~/.yarn/patches/@llm-tools-embedjs-npm-0.1.25-ec5645cf36.patch":
version: 0.1.25
resolution: "@llm-tools/embedjs@patch:@llm-tools/embedjs@npm%3A0.1.25#~/.yarn/patches/@llm-tools-embedjs-npm-0.1.25-ec5645cf36.patch::version=0.1.25&hash=7b05b5"
dependencies:
"@langchain/textsplitters": "npm:^0.1.0"
"@llm-tools/embedjs-interfaces": "npm:0.1.25"
"@llm-tools/embedjs-utils": "npm:0.1.25"
debug: "npm:^4.4.0"
langchain: "npm:^0.3.7"
md5: "npm:^2.3.0"
mime: "npm:^4.0.6"
stream-mime-type: "npm:^2.0.0"
checksum: 10c0/d0a37a5c7232571a71eff7e90ff4ba612bf33022a6eccd933c3a778844320f427a936d0851aae00092e34407c8c2f3555fe4444c6f2139f978ecfdd42fd89375
languageName: node
linkType: hard
"@malept/cross-spawn-promise@npm:^1.1.0":
version: 1.1.1
resolution: "@malept/cross-spawn-promise@npm:1.1.1"
@ -2747,7 +2763,7 @@ __metadata:
"@google/generative-ai": "npm:^0.21.0"
"@hello-pangea/dnd": "npm:^16.6.0"
"@kangfenmao/keyv-storage": "npm:^0.1.0"
"@llm-tools/embedjs": "npm:^0.1.25"
"@llm-tools/embedjs": "patch:@llm-tools/embedjs@npm%3A0.1.25#~/.yarn/patches/@llm-tools-embedjs-npm-0.1.25-ec5645cf36.patch"
"@llm-tools/embedjs-lancedb": "npm:^0.1.25"
"@llm-tools/embedjs-loader-csv": "npm:^0.1.25"
"@llm-tools/embedjs-loader-markdown": "npm:^0.1.25"