refactor: optimize file loader with switch-case structure

* Enhance update error logging and fix duplicate type import

- Improve error logging in AppUpdater with more detailed error information and timestamps
- Remove duplicate MCPServer type import in Inputbar component

* refactor: optimize file loader with switch-case structure
This commit is contained in:
Hao He 2025-03-14 13:30:21 +08:00 committed by GitHub
parent 18d143f56e
commit ca085a807e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -11,8 +11,30 @@ import { DraftsExportLoader } from './draftsExportLoader'
import { EpubLoader } from './epubLoader'
import { OdLoader, OdType } from './odLoader'
// embedjs内置loader类型
const commonExts = ['.pdf', '.csv', '.docx', '.pptx', '.xlsx', '.md']
// 文件扩展名到加载器类型的映射
const FILE_LOADER_MAP: Record<string, string> = {
// 内置类型
'.pdf': 'common',
'.csv': 'common',
'.docx': 'common',
'.pptx': 'common',
'.xlsx': 'common',
'.md': 'common',
// OD类型
'.odt': 'od',
'.ods': 'od',
'.odp': 'od',
// epub类型
'.epub': 'epub',
// Drafts类型
'.draftsexport': 'drafts',
// HTML类型
'.html': 'html',
'.htm': 'html',
// JSON类型
'.json': 'json'
// 其他类型默认为文本类型
}
export async function addOdLoader(
ragApplication: RAGApplication,
@ -46,35 +68,34 @@ export async function addFileLoader(
base: KnowledgeBaseParams,
forceReload: boolean
): Promise<LoaderReturn> {
// 内置类型
if (commonExts.includes(file.ext)) {
const loaderReturn = await ragApplication.addLoader(
// @ts-ignore LocalPathLoader
new LocalPathLoader({ path: file.path, chunkSize: base.chunkSize, chunkOverlap: base.chunkOverlap }) as any,
// 获取文件类型,如果没有匹配则默认为文本类型
const loaderType = FILE_LOADER_MAP[file.ext.toLowerCase()] || 'text'
let loaderReturn: AddLoaderReturn
// JSON类型处理
let jsonObject = {}
let jsonParsed = true
Logger.info(`[KnowledgeBase] processing file ${file.path} as ${loaderType} type`)
switch (loaderType) {
case 'common':
// 内置类型处理
loaderReturn = await ragApplication.addLoader(
new LocalPathLoader({
path: file.path,
chunkSize: base.chunkSize,
chunkOverlap: base.chunkOverlap
}) as any,
forceReload
)
return {
entriesAdded: loaderReturn.entriesAdded,
uniqueId: loaderReturn.uniqueId,
uniqueIds: [loaderReturn.uniqueId],
loaderType: loaderReturn.loaderType
} as LoaderReturn
}
break
// 自定义类型
if (['.odt', '.ods', '.odp'].includes(file.ext)) {
const loaderReturn = await addOdLoader(ragApplication, file, base, forceReload)
return {
entriesAdded: loaderReturn.entriesAdded,
uniqueId: loaderReturn.uniqueId,
uniqueIds: [loaderReturn.uniqueId],
loaderType: loaderReturn.loaderType
} as LoaderReturn
}
// epub 文件处理
if (file.ext === '.epub') {
const loaderReturn = await ragApplication.addLoader(
case 'od':
// OD类型处理
loaderReturn = await addOdLoader(ragApplication, file, base, forceReload)
break
case 'epub':
// epub类型处理
loaderReturn = await ragApplication.addLoader(
new EpubLoader({
filePath: file.path,
chunkSize: base.chunkSize ?? 1000,
@ -82,73 +103,51 @@ export async function addFileLoader(
}) as any,
forceReload
)
return {
entriesAdded: loaderReturn.entriesAdded,
uniqueId: loaderReturn.uniqueId,
uniqueIds: [loaderReturn.uniqueId],
loaderType: loaderReturn.loaderType
} as LoaderReturn
}
break
// DraftsExport类型 (file.ext会自动转换成小写)
if (['.draftsexport'].includes(file.ext)) {
const loaderReturn = await ragApplication.addLoader(new DraftsExportLoader(file.path) as any, forceReload)
return {
entriesAdded: loaderReturn.entriesAdded,
uniqueId: loaderReturn.uniqueId,
uniqueIds: [loaderReturn.uniqueId],
loaderType: loaderReturn.loaderType
}
}
case 'drafts':
// Drafts类型处理
loaderReturn = await ragApplication.addLoader(new DraftsExportLoader(file.path) as any, forceReload)
break
const fileContent = fs.readFileSync(file.path, 'utf-8')
// HTML类型
if (['.html', '.htm'].includes(file.ext)) {
const loaderReturn = await ragApplication.addLoader(
case 'html':
// HTML类型处理
loaderReturn = await ragApplication.addLoader(
new WebLoader({
urlOrContent: fileContent,
urlOrContent: fs.readFileSync(file.path, 'utf-8'),
chunkSize: base.chunkSize,
chunkOverlap: base.chunkOverlap
}) as any,
forceReload
)
return {
entriesAdded: loaderReturn.entriesAdded,
uniqueId: loaderReturn.uniqueId,
uniqueIds: [loaderReturn.uniqueId],
loaderType: loaderReturn.loaderType
}
}
break
// JSON类型
if (['.json'].includes(file.ext)) {
let jsonObject = {}
let jsonParsed = true
case 'json':
try {
jsonObject = JSON.parse(fileContent)
jsonObject = JSON.parse(fs.readFileSync(file.path, 'utf-8'))
} catch (error) {
jsonParsed = false
Logger.warn('[KnowledgeBase] failed parsing json file, failling back to text processing:', file.path, error)
}
if (jsonParsed) {
const loaderReturn = await ragApplication.addLoader(new JsonLoader({ object: jsonObject }))
return {
entriesAdded: loaderReturn.entriesAdded,
uniqueId: loaderReturn.uniqueId,
uniqueIds: [loaderReturn.uniqueId],
loaderType: loaderReturn.loaderType
}
}
Logger.warn('[KnowledgeBase] failed parsing json file, falling back to text processing:', file.path, error)
}
// 文本类型
const loaderReturn = await ragApplication.addLoader(
new TextLoader({ text: fileContent, chunkSize: base.chunkSize, chunkOverlap: base.chunkOverlap }) as any,
if (jsonParsed) {
loaderReturn = await ragApplication.addLoader(new JsonLoader({ object: jsonObject }), forceReload)
break
}
// fallthrough - JSON 解析失败时作为文本处理
default:
// 文本类型处理(默认)
// 如果是其他文本类型且尚未读取文件,则读取文件
loaderReturn = await ragApplication.addLoader(
new TextLoader({
text: fs.readFileSync(file.path, 'utf-8'),
chunkSize: base.chunkSize,
chunkOverlap: base.chunkOverlap
}) as any,
forceReload
)
Logger.info('[KnowledgeBase] processing file', file.path)
break
}
return {
entriesAdded: loaderReturn.entriesAdded,