feat: Enhance file processing and loader configuration

- Remove image loader from dependencies
- Update file loading to skip image, video, and audio files
- Add logging for knowledge base file processing
- Modify common file extensions list
- Add type ignore for LocalPathLoader to resolve TypeScript issues
This commit is contained in:
kangfenmao 2025-02-13 17:49:00 +08:00
parent 1c33c90884
commit 15d50761e7
4 changed files with 12 additions and 3 deletions

View File

@ -55,7 +55,6 @@
"@llm-tools/embedjs": "patch:@llm-tools/embedjs@npm%3A0.1.28#~/.yarn/patches/@llm-tools-embedjs-npm-0.1.28-8e4393fa2d.patch", "@llm-tools/embedjs": "patch:@llm-tools/embedjs@npm%3A0.1.28#~/.yarn/patches/@llm-tools-embedjs-npm-0.1.28-8e4393fa2d.patch",
"@llm-tools/embedjs-libsql": "^0.1.28", "@llm-tools/embedjs-libsql": "^0.1.28",
"@llm-tools/embedjs-loader-csv": "^0.1.28", "@llm-tools/embedjs-loader-csv": "^0.1.28",
"@llm-tools/embedjs-loader-image": "^0.1.28",
"@llm-tools/embedjs-loader-markdown": "patch:@llm-tools/embedjs-loader-markdown@npm%3A0.1.28#~/.yarn/patches/@llm-tools-embedjs-loader-markdown-npm-0.1.28-81647ffac6.patch", "@llm-tools/embedjs-loader-markdown": "patch:@llm-tools/embedjs-loader-markdown@npm%3A0.1.28#~/.yarn/patches/@llm-tools-embedjs-loader-markdown-npm-0.1.28-81647ffac6.patch",
"@llm-tools/embedjs-loader-msoffice": "^0.1.28", "@llm-tools/embedjs-loader-msoffice": "^0.1.28",
"@llm-tools/embedjs-loader-pdf": "^0.1.28", "@llm-tools/embedjs-loader-pdf": "^0.1.28",
@ -86,6 +85,7 @@
"@electron-toolkit/tsconfig": "^1.0.1", "@electron-toolkit/tsconfig": "^1.0.1",
"@hello-pangea/dnd": "^16.6.0", "@hello-pangea/dnd": "^16.6.0",
"@kangfenmao/keyv-storage": "^0.1.0", "@kangfenmao/keyv-storage": "^0.1.0",
"@llm-tools/embedjs-loader-image": "^0.1.28",
"@reduxjs/toolkit": "^2.2.5", "@reduxjs/toolkit": "^2.2.5",
"@types/adm-zip": "^0", "@types/adm-zip": "^0",
"@types/fs-extra": "^11", "@types/fs-extra": "^11",

View File

@ -4,11 +4,12 @@ import { LocalPathLoader, RAGApplication, TextLoader } from '@llm-tools/embedjs'
import type { AddLoaderReturn } from '@llm-tools/embedjs-interfaces' import type { AddLoaderReturn } from '@llm-tools/embedjs-interfaces'
import { LoaderReturn } from '@shared/config/types' import { LoaderReturn } from '@shared/config/types'
import { FileType, KnowledgeBaseParams } from '@types' import { FileType, KnowledgeBaseParams } from '@types'
import Logger from 'electron-log'
import { OdLoader, OdType } from './odLoader' import { OdLoader, OdType } from './odLoader'
// embedjs内置loader类型 // embedjs内置loader类型
const commonExts = ['.pdf', '.csv', '.json', '.docx', '.pptx', '.xlsx', '.md', '.jpeg'] const commonExts = ['.pdf', '.csv', '.json', '.docx', '.pptx', '.xlsx', '.md']
export async function addOdLoader( export async function addOdLoader(
ragApplication: RAGApplication, ragApplication: RAGApplication,
@ -45,6 +46,7 @@ export async function addFileLoader(
// 内置类型 // 内置类型
if (commonExts.includes(file.ext)) { if (commonExts.includes(file.ext)) {
const loaderReturn = await ragApplication.addLoader( const loaderReturn = await ragApplication.addLoader(
// @ts-ignore LocalPathLoader
new LocalPathLoader({ path: file.path, chunkSize: base.chunkSize, chunkOverlap: base.chunkOverlap }) as any, new LocalPathLoader({ path: file.path, chunkSize: base.chunkSize, chunkOverlap: base.chunkOverlap }) as any,
forceReload forceReload
) )
@ -73,6 +75,9 @@ export async function addFileLoader(
new TextLoader({ text: fileContent, chunkSize: base.chunkSize, chunkOverlap: base.chunkOverlap }) as any, new TextLoader({ text: fileContent, chunkSize: base.chunkSize, chunkOverlap: base.chunkOverlap }) as any,
forceReload forceReload
) )
Logger.info('[KnowledgeBase] processing file', file.path)
return { return {
entriesAdded: loaderReturn.entriesAdded, entriesAdded: loaderReturn.entriesAdded,
uniqueId: loaderReturn.uniqueId, uniqueId: loaderReturn.uniqueId,

View File

@ -25,7 +25,9 @@ export function getAllFiles(dirPath: string, arrayOfFiles: FileType[] = []): Fil
const ext = path.extname(file) const ext = path.extname(file)
const fileType = getFileType(ext) const fileType = getFileType(ext)
if (fileType === FileTypes.OTHER) return if ([FileTypes.OTHER, FileTypes.IMAGE, FileTypes.VIDEO, FileTypes.AUDIO].includes(fileType)) {
return
}
const name = path.basename(file) const name = path.basename(file)
const size = fs.statSync(fullPath).size const size = fs.statSync(fullPath).size
@ -41,6 +43,7 @@ export function getAllFiles(dirPath: string, arrayOfFiles: FileType[] = []): Fil
type: fileType, type: fileType,
created_at: new Date() created_at: new Date()
} }
arrayOfFiles.push(fileItem) arrayOfFiles.push(fileItem)
} }
}) })

View File

@ -37,6 +37,7 @@ interface KnowledgeContentProps {
const fileTypes = [...documentExts, ...textExts] const fileTypes = [...documentExts, ...textExts]
const KnowledgeContent: FC<KnowledgeContentProps> = ({ selectedBase }) => { const KnowledgeContent: FC<KnowledgeContentProps> = ({ selectedBase }) => {
const { t } = useTranslation() const { t } = useTranslation()
const { const {
base, base,
noteItems, noteItems,