fix: support html file in knowledge base (#1703)

This commit is contained in:
Wenwei Lin 2025-02-15 22:50:05 +08:00 committed by GitHub
parent f20bccfd7d
commit d574a09529
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -2,6 +2,7 @@ import * as fs from 'node:fs'
import { LocalPathLoader, RAGApplication, TextLoader } from '@llm-tools/embedjs'
import type { AddLoaderReturn } from '@llm-tools/embedjs-interfaces'
import { WebLoader } from '@llm-tools/embedjs-loader-web'
import { LoaderReturn } from '@shared/config/types'
import { FileType, KnowledgeBaseParams } from '@types'
import Logger from 'electron-log'
@ -69,8 +70,26 @@ export async function addFileLoader(
} as LoaderReturn
}
// 文本类型
const fileContent = fs.readFileSync(file.path, 'utf-8')
// HTML类型
if (['.html', '.htm'].includes(file.ext)) {
const loaderReturn = await ragApplication.addLoader(
new WebLoader({
urlOrContent: fileContent,
chunkSize: base.chunkSize,
chunkOverlap: base.chunkOverlap
}) as any,
forceReload
)
return {
entriesAdded: loaderReturn.entriesAdded,
uniqueId: loaderReturn.uniqueId,
uniqueIds: [loaderReturn.uniqueId],
loaderType: loaderReturn.loaderType
}
}
// 文本类型
const loaderReturn = await ragApplication.addLoader(
new TextLoader({ text: fileContent, chunkSize: base.chunkSize, chunkOverlap: base.chunkOverlap }) as any,
forceReload