refactor: optimize file loader with switch-case structure
* Enhance update error logging and fix duplicate type import - Improve error logging in AppUpdater with more detailed error information and timestamps - Remove duplicate MCPServer type import in Inputbar component * refactor: optimize file loader with switch-case structure
This commit is contained in:
parent
18d143f56e
commit
ca085a807e
@ -11,8 +11,30 @@ import { DraftsExportLoader } from './draftsExportLoader'
|
|||||||
import { EpubLoader } from './epubLoader'
|
import { EpubLoader } from './epubLoader'
|
||||||
import { OdLoader, OdType } from './odLoader'
|
import { OdLoader, OdType } from './odLoader'
|
||||||
|
|
||||||
// embedjs内置loader类型
|
// 文件扩展名到加载器类型的映射
|
||||||
const commonExts = ['.pdf', '.csv', '.docx', '.pptx', '.xlsx', '.md']
|
const FILE_LOADER_MAP: Record<string, string> = {
|
||||||
|
// 内置类型
|
||||||
|
'.pdf': 'common',
|
||||||
|
'.csv': 'common',
|
||||||
|
'.docx': 'common',
|
||||||
|
'.pptx': 'common',
|
||||||
|
'.xlsx': 'common',
|
||||||
|
'.md': 'common',
|
||||||
|
// OD类型
|
||||||
|
'.odt': 'od',
|
||||||
|
'.ods': 'od',
|
||||||
|
'.odp': 'od',
|
||||||
|
// epub类型
|
||||||
|
'.epub': 'epub',
|
||||||
|
// Drafts类型
|
||||||
|
'.draftsexport': 'drafts',
|
||||||
|
// HTML类型
|
||||||
|
'.html': 'html',
|
||||||
|
'.htm': 'html',
|
||||||
|
// JSON类型
|
||||||
|
'.json': 'json'
|
||||||
|
// 其他类型默认为文本类型
|
||||||
|
}
|
||||||
|
|
||||||
export async function addOdLoader(
|
export async function addOdLoader(
|
||||||
ragApplication: RAGApplication,
|
ragApplication: RAGApplication,
|
||||||
@ -46,110 +68,87 @@ export async function addFileLoader(
|
|||||||
base: KnowledgeBaseParams,
|
base: KnowledgeBaseParams,
|
||||||
forceReload: boolean
|
forceReload: boolean
|
||||||
): Promise<LoaderReturn> {
|
): Promise<LoaderReturn> {
|
||||||
// 内置类型
|
// 获取文件类型,如果没有匹配则默认为文本类型
|
||||||
if (commonExts.includes(file.ext)) {
|
const loaderType = FILE_LOADER_MAP[file.ext.toLowerCase()] || 'text'
|
||||||
const loaderReturn = await ragApplication.addLoader(
|
let loaderReturn: AddLoaderReturn
|
||||||
// @ts-ignore LocalPathLoader
|
|
||||||
new LocalPathLoader({ path: file.path, chunkSize: base.chunkSize, chunkOverlap: base.chunkOverlap }) as any,
|
|
||||||
forceReload
|
|
||||||
)
|
|
||||||
return {
|
|
||||||
entriesAdded: loaderReturn.entriesAdded,
|
|
||||||
uniqueId: loaderReturn.uniqueId,
|
|
||||||
uniqueIds: [loaderReturn.uniqueId],
|
|
||||||
loaderType: loaderReturn.loaderType
|
|
||||||
} as LoaderReturn
|
|
||||||
}
|
|
||||||
|
|
||||||
// 自定义类型
|
// JSON类型处理
|
||||||
if (['.odt', '.ods', '.odp'].includes(file.ext)) {
|
let jsonObject = {}
|
||||||
const loaderReturn = await addOdLoader(ragApplication, file, base, forceReload)
|
let jsonParsed = true
|
||||||
return {
|
Logger.info(`[KnowledgeBase] processing file ${file.path} as ${loaderType} type`)
|
||||||
entriesAdded: loaderReturn.entriesAdded,
|
switch (loaderType) {
|
||||||
uniqueId: loaderReturn.uniqueId,
|
case 'common':
|
||||||
uniqueIds: [loaderReturn.uniqueId],
|
// 内置类型处理
|
||||||
loaderType: loaderReturn.loaderType
|
loaderReturn = await ragApplication.addLoader(
|
||||||
} as LoaderReturn
|
new LocalPathLoader({
|
||||||
}
|
path: file.path,
|
||||||
|
chunkSize: base.chunkSize,
|
||||||
|
chunkOverlap: base.chunkOverlap
|
||||||
|
}) as any,
|
||||||
|
forceReload
|
||||||
|
)
|
||||||
|
break
|
||||||
|
|
||||||
// epub 文件处理
|
case 'od':
|
||||||
if (file.ext === '.epub') {
|
// OD类型处理
|
||||||
const loaderReturn = await ragApplication.addLoader(
|
loaderReturn = await addOdLoader(ragApplication, file, base, forceReload)
|
||||||
new EpubLoader({
|
break
|
||||||
filePath: file.path,
|
case 'epub':
|
||||||
chunkSize: base.chunkSize ?? 1000,
|
// epub类型处理
|
||||||
chunkOverlap: base.chunkOverlap ?? 200
|
loaderReturn = await ragApplication.addLoader(
|
||||||
}) as any,
|
new EpubLoader({
|
||||||
forceReload
|
filePath: file.path,
|
||||||
)
|
chunkSize: base.chunkSize ?? 1000,
|
||||||
return {
|
chunkOverlap: base.chunkOverlap ?? 200
|
||||||
entriesAdded: loaderReturn.entriesAdded,
|
}) as any,
|
||||||
uniqueId: loaderReturn.uniqueId,
|
forceReload
|
||||||
uniqueIds: [loaderReturn.uniqueId],
|
)
|
||||||
loaderType: loaderReturn.loaderType
|
break
|
||||||
} as LoaderReturn
|
|
||||||
}
|
|
||||||
|
|
||||||
// DraftsExport类型 (file.ext会自动转换成小写)
|
case 'drafts':
|
||||||
if (['.draftsexport'].includes(file.ext)) {
|
// Drafts类型处理
|
||||||
const loaderReturn = await ragApplication.addLoader(new DraftsExportLoader(file.path) as any, forceReload)
|
loaderReturn = await ragApplication.addLoader(new DraftsExportLoader(file.path) as any, forceReload)
|
||||||
return {
|
break
|
||||||
entriesAdded: loaderReturn.entriesAdded,
|
|
||||||
uniqueId: loaderReturn.uniqueId,
|
|
||||||
uniqueIds: [loaderReturn.uniqueId],
|
|
||||||
loaderType: loaderReturn.loaderType
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const fileContent = fs.readFileSync(file.path, 'utf-8')
|
case 'html':
|
||||||
|
// HTML类型处理
|
||||||
|
loaderReturn = await ragApplication.addLoader(
|
||||||
|
new WebLoader({
|
||||||
|
urlOrContent: fs.readFileSync(file.path, 'utf-8'),
|
||||||
|
chunkSize: base.chunkSize,
|
||||||
|
chunkOverlap: base.chunkOverlap
|
||||||
|
}) as any,
|
||||||
|
forceReload
|
||||||
|
)
|
||||||
|
break
|
||||||
|
|
||||||
// HTML类型
|
case 'json':
|
||||||
if (['.html', '.htm'].includes(file.ext)) {
|
try {
|
||||||
const loaderReturn = await ragApplication.addLoader(
|
jsonObject = JSON.parse(fs.readFileSync(file.path, 'utf-8'))
|
||||||
new WebLoader({
|
} catch (error) {
|
||||||
urlOrContent: fileContent,
|
jsonParsed = false
|
||||||
chunkSize: base.chunkSize,
|
Logger.warn('[KnowledgeBase] failed parsing json file, falling back to text processing:', file.path, error)
|
||||||
chunkOverlap: base.chunkOverlap
|
|
||||||
}) as any,
|
|
||||||
forceReload
|
|
||||||
)
|
|
||||||
return {
|
|
||||||
entriesAdded: loaderReturn.entriesAdded,
|
|
||||||
uniqueId: loaderReturn.uniqueId,
|
|
||||||
uniqueIds: [loaderReturn.uniqueId],
|
|
||||||
loaderType: loaderReturn.loaderType
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// JSON类型
|
|
||||||
if (['.json'].includes(file.ext)) {
|
|
||||||
let jsonObject = {}
|
|
||||||
let jsonParsed = true
|
|
||||||
try {
|
|
||||||
jsonObject = JSON.parse(fileContent)
|
|
||||||
} catch (error) {
|
|
||||||
jsonParsed = false
|
|
||||||
Logger.warn('[KnowledgeBase] failed parsing json file, failling back to text processing:', file.path, error)
|
|
||||||
}
|
|
||||||
if (jsonParsed) {
|
|
||||||
const loaderReturn = await ragApplication.addLoader(new JsonLoader({ object: jsonObject }))
|
|
||||||
return {
|
|
||||||
entriesAdded: loaderReturn.entriesAdded,
|
|
||||||
uniqueId: loaderReturn.uniqueId,
|
|
||||||
uniqueIds: [loaderReturn.uniqueId],
|
|
||||||
loaderType: loaderReturn.loaderType
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
if (jsonParsed) {
|
||||||
|
loaderReturn = await ragApplication.addLoader(new JsonLoader({ object: jsonObject }), forceReload)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
// fallthrough - JSON 解析失败时作为文本处理
|
||||||
|
default:
|
||||||
|
// 文本类型处理(默认)
|
||||||
|
// 如果是其他文本类型且尚未读取文件,则读取文件
|
||||||
|
loaderReturn = await ragApplication.addLoader(
|
||||||
|
new TextLoader({
|
||||||
|
text: fs.readFileSync(file.path, 'utf-8'),
|
||||||
|
chunkSize: base.chunkSize,
|
||||||
|
chunkOverlap: base.chunkOverlap
|
||||||
|
}) as any,
|
||||||
|
forceReload
|
||||||
|
)
|
||||||
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
// 文本类型
|
|
||||||
const loaderReturn = await ragApplication.addLoader(
|
|
||||||
new TextLoader({ text: fileContent, chunkSize: base.chunkSize, chunkOverlap: base.chunkOverlap }) as any,
|
|
||||||
forceReload
|
|
||||||
)
|
|
||||||
|
|
||||||
Logger.info('[KnowledgeBase] processing file', file.path)
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
entriesAdded: loaderReturn.entriesAdded,
|
entriesAdded: loaderReturn.entriesAdded,
|
||||||
uniqueId: loaderReturn.uniqueId,
|
uniqueId: loaderReturn.uniqueId,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user