feat: Enhance file processing and loader configuration
- Remove image loader from dependencies - Update file loading to skip image, video, and audio files - Add logging for knowledge base file processing - Modify common file extensions list - Add type ignore for LocalPathLoader to resolve TypeScript issues
This commit is contained in:
parent
1c33c90884
commit
15d50761e7
@ -55,7 +55,6 @@
|
|||||||
"@llm-tools/embedjs": "patch:@llm-tools/embedjs@npm%3A0.1.28#~/.yarn/patches/@llm-tools-embedjs-npm-0.1.28-8e4393fa2d.patch",
|
"@llm-tools/embedjs": "patch:@llm-tools/embedjs@npm%3A0.1.28#~/.yarn/patches/@llm-tools-embedjs-npm-0.1.28-8e4393fa2d.patch",
|
||||||
"@llm-tools/embedjs-libsql": "^0.1.28",
|
"@llm-tools/embedjs-libsql": "^0.1.28",
|
||||||
"@llm-tools/embedjs-loader-csv": "^0.1.28",
|
"@llm-tools/embedjs-loader-csv": "^0.1.28",
|
||||||
"@llm-tools/embedjs-loader-image": "^0.1.28",
|
|
||||||
"@llm-tools/embedjs-loader-markdown": "patch:@llm-tools/embedjs-loader-markdown@npm%3A0.1.28#~/.yarn/patches/@llm-tools-embedjs-loader-markdown-npm-0.1.28-81647ffac6.patch",
|
"@llm-tools/embedjs-loader-markdown": "patch:@llm-tools/embedjs-loader-markdown@npm%3A0.1.28#~/.yarn/patches/@llm-tools-embedjs-loader-markdown-npm-0.1.28-81647ffac6.patch",
|
||||||
"@llm-tools/embedjs-loader-msoffice": "^0.1.28",
|
"@llm-tools/embedjs-loader-msoffice": "^0.1.28",
|
||||||
"@llm-tools/embedjs-loader-pdf": "^0.1.28",
|
"@llm-tools/embedjs-loader-pdf": "^0.1.28",
|
||||||
@ -86,6 +85,7 @@
|
|||||||
"@electron-toolkit/tsconfig": "^1.0.1",
|
"@electron-toolkit/tsconfig": "^1.0.1",
|
||||||
"@hello-pangea/dnd": "^16.6.0",
|
"@hello-pangea/dnd": "^16.6.0",
|
||||||
"@kangfenmao/keyv-storage": "^0.1.0",
|
"@kangfenmao/keyv-storage": "^0.1.0",
|
||||||
|
"@llm-tools/embedjs-loader-image": "^0.1.28",
|
||||||
"@reduxjs/toolkit": "^2.2.5",
|
"@reduxjs/toolkit": "^2.2.5",
|
||||||
"@types/adm-zip": "^0",
|
"@types/adm-zip": "^0",
|
||||||
"@types/fs-extra": "^11",
|
"@types/fs-extra": "^11",
|
||||||
|
|||||||
@ -4,11 +4,12 @@ import { LocalPathLoader, RAGApplication, TextLoader } from '@llm-tools/embedjs'
|
|||||||
import type { AddLoaderReturn } from '@llm-tools/embedjs-interfaces'
|
import type { AddLoaderReturn } from '@llm-tools/embedjs-interfaces'
|
||||||
import { LoaderReturn } from '@shared/config/types'
|
import { LoaderReturn } from '@shared/config/types'
|
||||||
import { FileType, KnowledgeBaseParams } from '@types'
|
import { FileType, KnowledgeBaseParams } from '@types'
|
||||||
|
import Logger from 'electron-log'
|
||||||
|
|
||||||
import { OdLoader, OdType } from './odLoader'
|
import { OdLoader, OdType } from './odLoader'
|
||||||
|
|
||||||
// embedjs内置loader类型
|
// embedjs内置loader类型
|
||||||
const commonExts = ['.pdf', '.csv', '.json', '.docx', '.pptx', '.xlsx', '.md', '.jpeg']
|
const commonExts = ['.pdf', '.csv', '.json', '.docx', '.pptx', '.xlsx', '.md']
|
||||||
|
|
||||||
export async function addOdLoader(
|
export async function addOdLoader(
|
||||||
ragApplication: RAGApplication,
|
ragApplication: RAGApplication,
|
||||||
@ -45,6 +46,7 @@ export async function addFileLoader(
|
|||||||
// 内置类型
|
// 内置类型
|
||||||
if (commonExts.includes(file.ext)) {
|
if (commonExts.includes(file.ext)) {
|
||||||
const loaderReturn = await ragApplication.addLoader(
|
const loaderReturn = await ragApplication.addLoader(
|
||||||
|
// @ts-ignore LocalPathLoader
|
||||||
new LocalPathLoader({ path: file.path, chunkSize: base.chunkSize, chunkOverlap: base.chunkOverlap }) as any,
|
new LocalPathLoader({ path: file.path, chunkSize: base.chunkSize, chunkOverlap: base.chunkOverlap }) as any,
|
||||||
forceReload
|
forceReload
|
||||||
)
|
)
|
||||||
@ -73,6 +75,9 @@ export async function addFileLoader(
|
|||||||
new TextLoader({ text: fileContent, chunkSize: base.chunkSize, chunkOverlap: base.chunkOverlap }) as any,
|
new TextLoader({ text: fileContent, chunkSize: base.chunkSize, chunkOverlap: base.chunkOverlap }) as any,
|
||||||
forceReload
|
forceReload
|
||||||
)
|
)
|
||||||
|
|
||||||
|
Logger.info('[KnowledgeBase] processing file', file.path)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
entriesAdded: loaderReturn.entriesAdded,
|
entriesAdded: loaderReturn.entriesAdded,
|
||||||
uniqueId: loaderReturn.uniqueId,
|
uniqueId: loaderReturn.uniqueId,
|
||||||
|
|||||||
@ -25,7 +25,9 @@ export function getAllFiles(dirPath: string, arrayOfFiles: FileType[] = []): Fil
|
|||||||
const ext = path.extname(file)
|
const ext = path.extname(file)
|
||||||
const fileType = getFileType(ext)
|
const fileType = getFileType(ext)
|
||||||
|
|
||||||
if (fileType === FileTypes.OTHER) return
|
if ([FileTypes.OTHER, FileTypes.IMAGE, FileTypes.VIDEO, FileTypes.AUDIO].includes(fileType)) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
const name = path.basename(file)
|
const name = path.basename(file)
|
||||||
const size = fs.statSync(fullPath).size
|
const size = fs.statSync(fullPath).size
|
||||||
@ -41,6 +43,7 @@ export function getAllFiles(dirPath: string, arrayOfFiles: FileType[] = []): Fil
|
|||||||
type: fileType,
|
type: fileType,
|
||||||
created_at: new Date()
|
created_at: new Date()
|
||||||
}
|
}
|
||||||
|
|
||||||
arrayOfFiles.push(fileItem)
|
arrayOfFiles.push(fileItem)
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|||||||
@ -37,6 +37,7 @@ interface KnowledgeContentProps {
|
|||||||
const fileTypes = [...documentExts, ...textExts]
|
const fileTypes = [...documentExts, ...textExts]
|
||||||
const KnowledgeContent: FC<KnowledgeContentProps> = ({ selectedBase }) => {
|
const KnowledgeContent: FC<KnowledgeContentProps> = ({ selectedBase }) => {
|
||||||
const { t } = useTranslation()
|
const { t } = useTranslation()
|
||||||
|
|
||||||
const {
|
const {
|
||||||
base,
|
base,
|
||||||
noteItems,
|
noteItems,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user