From 574d02a8c9bb5b9e500949dc4ba9495432c5ec88 Mon Sep 17 00:00:00 2001
From: Wenwei Lin <75592450+wenwei-lin@users.noreply.github.com>
Date: Mon, 17 Feb 2025 08:25:07 +0800
Subject: [PATCH] feat: support json and draftsExport file in knowledge base
(#1717)
---
packages/shared/config/constant.ts | 1 +
src/main/loader/draftsExportLoader.ts | 22 ++++++++++++++
src/main/loader/index.ts | 29 +++++++++++++++++--
.../src/pages/knowledge/KnowledgeContent.tsx | 4 +--
4 files changed, 52 insertions(+), 4 deletions(-)
create mode 100644 src/main/loader/draftsExportLoader.ts
diff --git a/packages/shared/config/constant.ts b/packages/shared/config/constant.ts
index c39b1370..1c7b5758 100644
--- a/packages/shared/config/constant.ts
+++ b/packages/shared/config/constant.ts
@@ -2,6 +2,7 @@ export const imageExts = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp']
export const videoExts = ['.mp4', '.avi', '.mov', '.wmv', '.flv', '.mkv']
export const audioExts = ['.mp3', '.wav', '.ogg', '.flac', '.aac']
export const documentExts = ['.pdf', '.docx', '.pptx', '.xlsx', '.odt', '.odp', '.ods']
+export const thirdPartyApplicationExts = ['.draftsExport']
export const bookExts = ['.epub']
export const textExts = [
'.txt', // 普通文本文件
diff --git a/src/main/loader/draftsExportLoader.ts b/src/main/loader/draftsExportLoader.ts
new file mode 100644
index 00000000..4ad1806b
--- /dev/null
+++ b/src/main/loader/draftsExportLoader.ts
@@ -0,0 +1,22 @@
+import * as fs from 'node:fs'
+
+import { JsonLoader } from '@llm-tools/embedjs'
+
+/**
+ * Drafts 应用导出的笔记文件加载器
+ * 原始文件是一个 JSON 数组。每条笔记只保留 content、tags、modified_at 三个字段
+ */
+export class DraftsExportLoader extends JsonLoader {
+ constructor(filePath: string) {
+ const fileContent = fs.readFileSync(filePath, 'utf-8')
+ const rawJson = JSON.parse(fileContent) as any[]
+ const json = rawJson.map((item) => {
+ return {
+ content: item.content?.replace(/\n/g, '
'),
+ tags: item.tags,
+ modified_at: item.created_at
+ }
+ })
+ super({ object: json })
+ }
+}
diff --git a/src/main/loader/index.ts b/src/main/loader/index.ts
index 3cc1d1af..f23c58b1 100644
--- a/src/main/loader/index.ts
+++ b/src/main/loader/index.ts
@@ -1,17 +1,18 @@
import * as fs from 'node:fs'
-import { LocalPathLoader, RAGApplication, TextLoader } from '@llm-tools/embedjs'
+import { JsonLoader, LocalPathLoader, RAGApplication, TextLoader } from '@llm-tools/embedjs'
import type { AddLoaderReturn } from '@llm-tools/embedjs-interfaces'
import { WebLoader } from '@llm-tools/embedjs-loader-web'
import { LoaderReturn } from '@shared/config/types'
import { FileType, KnowledgeBaseParams } from '@types'
import Logger from 'electron-log'
+import { DraftsExportLoader } from './draftsExportLoader'
import { EpubLoader } from './epubLoader'
import { OdLoader, OdType } from './odLoader'
// embedjs内置loader类型
-const commonExts = ['.pdf', '.csv', '.json', '.docx', '.pptx', '.xlsx', '.md']
+const commonExts = ['.pdf', '.csv', '.docx', '.pptx', '.xlsx', '.md']
export async function addOdLoader(
ragApplication: RAGApplication,
@@ -89,7 +90,19 @@ export async function addFileLoader(
} as LoaderReturn
}
+ // DraftsExport类型 (file.ext会自动转换成小写)
+ if (['.draftsexport'].includes(file.ext)) {
+ const loaderReturn = await ragApplication.addLoader(new DraftsExportLoader(file.path) as any, forceReload)
+ return {
+ entriesAdded: loaderReturn.entriesAdded,
+ uniqueId: loaderReturn.uniqueId,
+ uniqueIds: [loaderReturn.uniqueId],
+ loaderType: loaderReturn.loaderType
+ }
+ }
+
const fileContent = fs.readFileSync(file.path, 'utf-8')
+
// HTML类型
if (['.html', '.htm'].includes(file.ext)) {
const loaderReturn = await ragApplication.addLoader(
@@ -108,6 +121,18 @@ export async function addFileLoader(
}
}
+ // JSON类型
+ if (['.json'].includes(file.ext)) {
+ const jsonObject = JSON.parse(fileContent)
+ const loaderReturn = await ragApplication.addLoader(new JsonLoader({ object: jsonObject }))
+ return {
+ entriesAdded: loaderReturn.entriesAdded,
+ uniqueId: loaderReturn.uniqueId,
+ uniqueIds: [loaderReturn.uniqueId],
+ loaderType: loaderReturn.loaderType
+ }
+ }
+
// 文本类型
const loaderReturn = await ragApplication.addLoader(
new TextLoader({ text: fileContent, chunkSize: base.chunkSize, chunkOverlap: base.chunkOverlap }) as any,
diff --git a/src/renderer/src/pages/knowledge/KnowledgeContent.tsx b/src/renderer/src/pages/knowledge/KnowledgeContent.tsx
index 41bda2a9..f248f736 100644
--- a/src/renderer/src/pages/knowledge/KnowledgeContent.tsx
+++ b/src/renderer/src/pages/knowledge/KnowledgeContent.tsx
@@ -18,7 +18,7 @@ import { useKnowledge } from '@renderer/hooks/useKnowledge'
import FileManager from '@renderer/services/FileManager'
import { getProviderName } from '@renderer/services/ProviderService'
import { FileType, FileTypes, KnowledgeBase } from '@renderer/types'
-import { bookExts, documentExts, textExts } from '@shared/config/constant'
+import { bookExts, documentExts, textExts, thirdPartyApplicationExts } from '@shared/config/constant'
import { Alert, Button, Card, Divider, message, Tag, Tooltip, Typography, Upload } from 'antd'
import { FC } from 'react'
import { useTranslation } from 'react-i18next'
@@ -35,7 +35,7 @@ interface KnowledgeContentProps {
selectedBase: KnowledgeBase
}
-const fileTypes = [...bookExts, ...documentExts, ...textExts]
+const fileTypes = [...bookExts, ...thirdPartyApplicationExts, ...documentExts, ...textExts]
const KnowledgeContent: FC = ({ selectedBase }) => {
const { t } = useTranslation()