feat: support json and draftsExport file in knowledge base (#1717)

This commit is contained in:
Wenwei Lin 2025-02-17 08:25:07 +08:00 committed by GitHub
parent 7764507d74
commit 574d02a8c9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 52 additions and 4 deletions

View File

@ -2,6 +2,7 @@ export const imageExts = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp']
export const videoExts = ['.mp4', '.avi', '.mov', '.wmv', '.flv', '.mkv'] export const videoExts = ['.mp4', '.avi', '.mov', '.wmv', '.flv', '.mkv']
export const audioExts = ['.mp3', '.wav', '.ogg', '.flac', '.aac'] export const audioExts = ['.mp3', '.wav', '.ogg', '.flac', '.aac']
export const documentExts = ['.pdf', '.docx', '.pptx', '.xlsx', '.odt', '.odp', '.ods'] export const documentExts = ['.pdf', '.docx', '.pptx', '.xlsx', '.odt', '.odp', '.ods']
export const thirdPartyApplicationExts = ['.draftsExport']
export const bookExts = ['.epub'] export const bookExts = ['.epub']
export const textExts = [ export const textExts = [
'.txt', // 普通文本文件 '.txt', // 普通文本文件

View File

@ -0,0 +1,22 @@
import * as fs from 'node:fs'
import { JsonLoader } from '@llm-tools/embedjs'
/**
* Drafts
* JSON contenttagsmodified_at
*/
export class DraftsExportLoader extends JsonLoader {
constructor(filePath: string) {
const fileContent = fs.readFileSync(filePath, 'utf-8')
const rawJson = JSON.parse(fileContent) as any[]
const json = rawJson.map((item) => {
return {
content: item.content?.replace(/\n/g, '<br>'),
tags: item.tags,
modified_at: item.created_at
}
})
super({ object: json })
}
}

View File

@ -1,17 +1,18 @@
import * as fs from 'node:fs' import * as fs from 'node:fs'
import { LocalPathLoader, RAGApplication, TextLoader } from '@llm-tools/embedjs' import { JsonLoader, LocalPathLoader, RAGApplication, TextLoader } from '@llm-tools/embedjs'
import type { AddLoaderReturn } from '@llm-tools/embedjs-interfaces' import type { AddLoaderReturn } from '@llm-tools/embedjs-interfaces'
import { WebLoader } from '@llm-tools/embedjs-loader-web' import { WebLoader } from '@llm-tools/embedjs-loader-web'
import { LoaderReturn } from '@shared/config/types' import { LoaderReturn } from '@shared/config/types'
import { FileType, KnowledgeBaseParams } from '@types' import { FileType, KnowledgeBaseParams } from '@types'
import Logger from 'electron-log' import Logger from 'electron-log'
import { DraftsExportLoader } from './draftsExportLoader'
import { EpubLoader } from './epubLoader' import { EpubLoader } from './epubLoader'
import { OdLoader, OdType } from './odLoader' import { OdLoader, OdType } from './odLoader'
// embedjs内置loader类型 // embedjs内置loader类型
const commonExts = ['.pdf', '.csv', '.json', '.docx', '.pptx', '.xlsx', '.md'] const commonExts = ['.pdf', '.csv', '.docx', '.pptx', '.xlsx', '.md']
export async function addOdLoader( export async function addOdLoader(
ragApplication: RAGApplication, ragApplication: RAGApplication,
@ -89,7 +90,19 @@ export async function addFileLoader(
} as LoaderReturn } as LoaderReturn
} }
// DraftsExport类型 (file.ext会自动转换成小写)
if (['.draftsexport'].includes(file.ext)) {
const loaderReturn = await ragApplication.addLoader(new DraftsExportLoader(file.path) as any, forceReload)
return {
entriesAdded: loaderReturn.entriesAdded,
uniqueId: loaderReturn.uniqueId,
uniqueIds: [loaderReturn.uniqueId],
loaderType: loaderReturn.loaderType
}
}
const fileContent = fs.readFileSync(file.path, 'utf-8') const fileContent = fs.readFileSync(file.path, 'utf-8')
// HTML类型 // HTML类型
if (['.html', '.htm'].includes(file.ext)) { if (['.html', '.htm'].includes(file.ext)) {
const loaderReturn = await ragApplication.addLoader( const loaderReturn = await ragApplication.addLoader(
@ -108,6 +121,18 @@ export async function addFileLoader(
} }
} }
// JSON类型
if (['.json'].includes(file.ext)) {
const jsonObject = JSON.parse(fileContent)
const loaderReturn = await ragApplication.addLoader(new JsonLoader({ object: jsonObject }))
return {
entriesAdded: loaderReturn.entriesAdded,
uniqueId: loaderReturn.uniqueId,
uniqueIds: [loaderReturn.uniqueId],
loaderType: loaderReturn.loaderType
}
}
// 文本类型 // 文本类型
const loaderReturn = await ragApplication.addLoader( const loaderReturn = await ragApplication.addLoader(
new TextLoader({ text: fileContent, chunkSize: base.chunkSize, chunkOverlap: base.chunkOverlap }) as any, new TextLoader({ text: fileContent, chunkSize: base.chunkSize, chunkOverlap: base.chunkOverlap }) as any,

View File

@ -18,7 +18,7 @@ import { useKnowledge } from '@renderer/hooks/useKnowledge'
import FileManager from '@renderer/services/FileManager' import FileManager from '@renderer/services/FileManager'
import { getProviderName } from '@renderer/services/ProviderService' import { getProviderName } from '@renderer/services/ProviderService'
import { FileType, FileTypes, KnowledgeBase } from '@renderer/types' import { FileType, FileTypes, KnowledgeBase } from '@renderer/types'
import { bookExts, documentExts, textExts } from '@shared/config/constant' import { bookExts, documentExts, textExts, thirdPartyApplicationExts } from '@shared/config/constant'
import { Alert, Button, Card, Divider, message, Tag, Tooltip, Typography, Upload } from 'antd' import { Alert, Button, Card, Divider, message, Tag, Tooltip, Typography, Upload } from 'antd'
import { FC } from 'react' import { FC } from 'react'
import { useTranslation } from 'react-i18next' import { useTranslation } from 'react-i18next'
@ -35,7 +35,7 @@ interface KnowledgeContentProps {
selectedBase: KnowledgeBase selectedBase: KnowledgeBase
} }
const fileTypes = [...bookExts, ...documentExts, ...textExts] const fileTypes = [...bookExts, ...thirdPartyApplicationExts, ...documentExts, ...textExts]
const KnowledgeContent: FC<KnowledgeContentProps> = ({ selectedBase }) => { const KnowledgeContent: FC<KnowledgeContentProps> = ({ selectedBase }) => {
const { t } = useTranslation() const { t } = useTranslation()