feat: Support configurable chunk size and overlap for knowledge base
This commit is contained in:
parent
4464992873
commit
7f7300e6dc
@ -15,3 +15,203 @@ index 50c3c4064af17bc4c7c46554d8f2419b3afceb0e..632c9b2e04d2e0e3bb09ef1cd8f29d25
|
|||||||
}
|
}
|
||||||
static getInstance() {
|
static getInstance() {
|
||||||
return RAGEmbedding.singleton;
|
return RAGEmbedding.singleton;
|
||||||
|
diff --git a/src/loaders/local-path-loader.d.ts b/src/loaders/local-path-loader.d.ts
|
||||||
|
index 48c20e68c469cd309be2dc8f28e44c1bd04a26e9..87002be39e7305a02e2a607b0c0d95cbbc359f9d 100644
|
||||||
|
--- a/src/loaders/local-path-loader.d.ts
|
||||||
|
+++ b/src/loaders/local-path-loader.d.ts
|
||||||
|
@@ -1,19 +1,29 @@
|
||||||
|
-import { BaseLoader } from '@llm-tools/embedjs-interfaces';
|
||||||
|
+import { BaseLoader } from "@llm-tools/embedjs-interfaces";
|
||||||
|
export declare class LocalPathLoader extends BaseLoader<{
|
||||||
|
- type: 'LocalPathLoader';
|
||||||
|
+ type: "LocalPathLoader";
|
||||||
|
}> {
|
||||||
|
- private readonly debug;
|
||||||
|
- private readonly path;
|
||||||
|
- constructor({ path }: {
|
||||||
|
- path: string;
|
||||||
|
- });
|
||||||
|
- getUnfilteredChunks(): AsyncGenerator<{
|
||||||
|
- metadata: {
|
||||||
|
- type: "LocalPathLoader";
|
||||||
|
- originalPath: string;
|
||||||
|
- source: string;
|
||||||
|
- };
|
||||||
|
- pageContent: string;
|
||||||
|
- }, void, unknown>;
|
||||||
|
- private recursivelyAddPath;
|
||||||
|
+ private readonly debug;
|
||||||
|
+ private readonly path;
|
||||||
|
+ constructor({
|
||||||
|
+ path,
|
||||||
|
+ chunkSize,
|
||||||
|
+ chunkOverlap,
|
||||||
|
+ }: {
|
||||||
|
+ path: string;
|
||||||
|
+ chunkSize?: number;
|
||||||
|
+ chunkOverlap?: number;
|
||||||
|
+ });
|
||||||
|
+ getUnfilteredChunks(): AsyncGenerator<
|
||||||
|
+ {
|
||||||
|
+ metadata: {
|
||||||
|
+ type: "LocalPathLoader";
|
||||||
|
+ originalPath: string;
|
||||||
|
+ source: string;
|
||||||
|
+ };
|
||||||
|
+ pageContent: string;
|
||||||
|
+ },
|
||||||
|
+ void,
|
||||||
|
+ unknown
|
||||||
|
+ >;
|
||||||
|
+ private recursivelyAddPath;
|
||||||
|
}
|
||||||
|
diff --git a/src/loaders/local-path-loader.js b/src/loaders/local-path-loader.js
|
||||||
|
index 4cf8a6bd1d890244c8ec49d4a05ee3bd58861c79..fd0fe1951c73da315b0c9bf4a8f33effbadb9f8f 100644
|
||||||
|
--- a/src/loaders/local-path-loader.js
|
||||||
|
+++ b/src/loaders/local-path-loader.js
|
||||||
|
@@ -8,8 +8,8 @@ import { BaseLoader } from '@llm-tools/embedjs-interfaces';
|
||||||
|
export class LocalPathLoader extends BaseLoader {
|
||||||
|
debug = createDebugMessages('embedjs:loader:LocalPathLoader');
|
||||||
|
path;
|
||||||
|
- constructor({ path }) {
|
||||||
|
- super(`LocalPathLoader_${md5(path)}`, { path });
|
||||||
|
+ constructor({ path, chunkSize, chunkOverlap}) {
|
||||||
|
+ super(`LocalPathLoader_${md5(path)}`, { path }, chunkSize ?? 1000, chunkOverlap ?? 0);
|
||||||
|
this.path = path;
|
||||||
|
}
|
||||||
|
async *getUnfilteredChunks() {
|
||||||
|
@@ -36,10 +36,12 @@ export class LocalPathLoader extends BaseLoader {
|
||||||
|
const extension = currentPath.split('.').pop().toLowerCase();
|
||||||
|
if (extension === 'md' || extension === 'mdx')
|
||||||
|
mime = 'text/markdown';
|
||||||
|
+ if (extension === 'txt')
|
||||||
|
+ mime = 'text/plain';
|
||||||
|
this.debug(`File '${this.path}' mime type updated to 'text/markdown'`);
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
- const loader = await createLoaderFromMimeType(currentPath, mime);
|
||||||
|
+ const loader = await createLoaderFromMimeType(currentPath, mime, this.chunkSize, this.chunkOverlap);
|
||||||
|
for await (const result of await loader.getUnfilteredChunks()) {
|
||||||
|
yield {
|
||||||
|
pageContent: result.pageContent,
|
||||||
|
diff --git a/src/util/mime.d.ts b/src/util/mime.d.ts
|
||||||
|
index 57f56a1b8edc98366af9f84d671676c41c2f01ca..f53856fa9c78afbeee9e085c7ed0b3a131f8ee5a 100644
|
||||||
|
--- a/src/util/mime.d.ts
|
||||||
|
+++ b/src/util/mime.d.ts
|
||||||
|
@@ -1,2 +1,7 @@
|
||||||
|
-import { BaseLoader } from '@llm-tools/embedjs-interfaces';
|
||||||
|
-export declare function createLoaderFromMimeType(loaderData: string, mimeType: string): Promise<BaseLoader>;
|
||||||
|
+import { BaseLoader } from "@llm-tools/embedjs-interfaces";
|
||||||
|
+export declare function createLoaderFromMimeType(
|
||||||
|
+ loaderData: string,
|
||||||
|
+ mimeType: string,
|
||||||
|
+ chunkSize?: number,
|
||||||
|
+ chunkOverlap?: number
|
||||||
|
+): Promise<BaseLoader>;
|
||||||
|
diff --git a/src/util/mime.js b/src/util/mime.js
|
||||||
|
index 9af30bd5b8cf42985f547073a4c19756292c33a3..54ae20343131a533ab70236d3060b6accc8f6126 100644
|
||||||
|
--- a/src/util/mime.js
|
||||||
|
+++ b/src/util/mime.js
|
||||||
|
@@ -1,7 +1,9 @@
|
||||||
|
import mime from 'mime';
|
||||||
|
import createDebugMessages from 'debug';
|
||||||
|
import { TextLoader } from '../loaders/text-loader.js';
|
||||||
|
-export async function createLoaderFromMimeType(loaderData, mimeType) {
|
||||||
|
+import fs from 'node:fs';
|
||||||
|
+
|
||||||
|
+export async function createLoaderFromMimeType(loaderData, mimeType, chunkSize, chunkOverlap) {
|
||||||
|
createDebugMessages('embedjs:util:createLoaderFromMimeType')(`Incoming mime type '${mimeType}'`);
|
||||||
|
switch (mimeType) {
|
||||||
|
case 'application/msword':
|
||||||
|
@@ -10,7 +12,7 @@ export async function createLoaderFromMimeType(loaderData, mimeType) {
|
||||||
|
throw new Error('Package `@llm-tools/embedjs-loader-msoffice` needs to be installed to load docx files');
|
||||||
|
});
|
||||||
|
createDebugMessages('embedjs:util:createLoaderFromMimeType')('Dynamically imported DocxLoader');
|
||||||
|
- return new DocxLoader({ filePathOrUrl: loaderData });
|
||||||
|
+ return new DocxLoader({ filePathOrUrl: loaderData, chunkSize, chunkOverlap });
|
||||||
|
}
|
||||||
|
case 'application/vnd.ms-excel':
|
||||||
|
case 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': {
|
||||||
|
@@ -18,21 +20,21 @@ export async function createLoaderFromMimeType(loaderData, mimeType) {
|
||||||
|
throw new Error('Package `@llm-tools/embedjs-loader-msoffice` needs to be installed to load excel files');
|
||||||
|
});
|
||||||
|
createDebugMessages('embedjs:util:createLoaderFromMimeType')('Dynamically imported ExcelLoader');
|
||||||
|
- return new ExcelLoader({ filePathOrUrl: loaderData });
|
||||||
|
+ return new ExcelLoader({ filePathOrUrl: loaderData, chunkSize, chunkOverlap });
|
||||||
|
}
|
||||||
|
case 'application/pdf': {
|
||||||
|
const { PdfLoader } = await import('@llm-tools/embedjs-loader-pdf').catch(() => {
|
||||||
|
throw new Error('Package `@llm-tools/embedjs-loader-pdf` needs to be installed to load PDF files');
|
||||||
|
});
|
||||||
|
createDebugMessages('embedjs:util:createLoaderFromMimeType')('Dynamically imported PdfLoader');
|
||||||
|
- return new PdfLoader({ filePathOrUrl: loaderData });
|
||||||
|
+ return new PdfLoader({ filePathOrUrl: loaderData, chunkSize, chunkOverlap });
|
||||||
|
}
|
||||||
|
case 'application/vnd.openxmlformats-officedocument.presentationml.presentation': {
|
||||||
|
const { PptLoader } = await import('@llm-tools/embedjs-loader-msoffice').catch(() => {
|
||||||
|
throw new Error('Package `@llm-tools/embedjs-loader-msoffice` needs to be installed to load pptx files');
|
||||||
|
});
|
||||||
|
createDebugMessages('embedjs:util:createLoaderFromMimeType')('Dynamically imported PptLoader');
|
||||||
|
- return new PptLoader({ filePathOrUrl: loaderData });
|
||||||
|
+ return new PptLoader({ filePathOrUrl: loaderData, chunkSize, chunkOverlap });
|
||||||
|
}
|
||||||
|
case 'text/plain': {
|
||||||
|
const fineType = mime.getType(loaderData);
|
||||||
|
@@ -42,24 +44,26 @@ export async function createLoaderFromMimeType(loaderData, mimeType) {
|
||||||
|
throw new Error('Package `@llm-tools/embedjs-loader-csv` needs to be installed to load CSV files');
|
||||||
|
});
|
||||||
|
createDebugMessages('embedjs:util:createLoaderFromMimeType')('Dynamically imported CsvLoader');
|
||||||
|
- return new CsvLoader({ filePathOrUrl: loaderData });
|
||||||
|
+ return new CsvLoader({ filePathOrUrl: loaderData, chunkSize, chunkOverlap });
|
||||||
|
+ }
|
||||||
|
+ else{
|
||||||
|
+ const content = fs.readFileSync(loaderData, 'utf-8');
|
||||||
|
+ return new TextLoader({ text: content, chunkSize, chunkOverlap });
|
||||||
|
}
|
||||||
|
- else
|
||||||
|
- return new TextLoader({ text: loaderData });
|
||||||
|
}
|
||||||
|
case 'application/csv': {
|
||||||
|
const { CsvLoader } = await import('@llm-tools/embedjs-loader-csv').catch(() => {
|
||||||
|
throw new Error('Package `@llm-tools/embedjs-loader-csv` needs to be installed to load CSV files');
|
||||||
|
});
|
||||||
|
createDebugMessages('embedjs:util:createLoaderFromMimeType')('Dynamically imported CsvLoader');
|
||||||
|
- return new CsvLoader({ filePathOrUrl: loaderData });
|
||||||
|
+ return new CsvLoader({ filePathOrUrl: loaderData, chunkSize, chunkOverlap });
|
||||||
|
}
|
||||||
|
case 'text/html': {
|
||||||
|
const { WebLoader } = await import('@llm-tools/embedjs-loader-web').catch(() => {
|
||||||
|
throw new Error('Package `@llm-tools/embedjs-loader-web` needs to be installed to load web documents');
|
||||||
|
});
|
||||||
|
createDebugMessages('embedjs:util:createLoaderFromMimeType')('Dynamically imported WebLoader');
|
||||||
|
- return new WebLoader({ urlOrContent: loaderData });
|
||||||
|
+ return new WebLoader({ urlOrContent: loaderData, chunkSize, chunkOverlap });
|
||||||
|
}
|
||||||
|
case 'text/xml': {
|
||||||
|
const { SitemapLoader } = await import('@llm-tools/embedjs-loader-sitemap').catch(() => {
|
||||||
|
@@ -67,14 +71,14 @@ export async function createLoaderFromMimeType(loaderData, mimeType) {
|
||||||
|
});
|
||||||
|
createDebugMessages('embedjs:util:createLoaderFromMimeType')('Dynamically imported SitemapLoader');
|
||||||
|
if (await SitemapLoader.test(loaderData)) {
|
||||||
|
- return new SitemapLoader({ url: loaderData });
|
||||||
|
+ return new SitemapLoader({ url: loaderData, chunkSize, chunkOverlap });
|
||||||
|
}
|
||||||
|
//This is not a Sitemap but is still XML
|
||||||
|
const { XmlLoader } = await import('@llm-tools/embedjs-loader-xml').catch(() => {
|
||||||
|
throw new Error('Package `@llm-tools/embedjs-loader-xml` needs to be installed to load XML documents');
|
||||||
|
});
|
||||||
|
createDebugMessages('embedjs:util:createLoaderFromMimeType')('Dynamically imported XmlLoader');
|
||||||
|
- return new XmlLoader({ filePathOrUrl: loaderData });
|
||||||
|
+ return new XmlLoader({ filePathOrUrl: loaderData, chunkSize, chunkOverlap });
|
||||||
|
}
|
||||||
|
case 'text/x-markdown':
|
||||||
|
case 'text/markdown': {
|
||||||
|
@@ -82,7 +86,7 @@ export async function createLoaderFromMimeType(loaderData, mimeType) {
|
||||||
|
throw new Error('Package `@llm-tools/embedjs-loader-markdown` needs to be installed to load markdown files');
|
||||||
|
});
|
||||||
|
createDebugMessages('embedjs:util:createLoaderFromMimeType')('Dynamically imported MarkdownLoader');
|
||||||
|
- return new MarkdownLoader({ filePathOrUrl: loaderData });
|
||||||
|
+ return new MarkdownLoader({ filePathOrUrl: loaderData, chunkSize, chunkOverlap });
|
||||||
|
}
|
||||||
|
case undefined:
|
||||||
|
throw new Error(`MIME type could not be detected. Please file an issue if you think this is a bug.`);
|
||||||
|
|||||||
@ -83,54 +83,103 @@ class KnowledgeService {
|
|||||||
|
|
||||||
if (item.type === 'directory') {
|
if (item.type === 'directory') {
|
||||||
const directory = item.content as string
|
const directory = item.content as string
|
||||||
return await ragApplication.addLoader(new LocalPathLoader({ path: directory }), forceReload)
|
return await ragApplication.addLoader(
|
||||||
|
new LocalPathLoader({ path: directory, chunkSize: base.chunkSize, chunkOverlap: base.chunkOverlap }) as any,
|
||||||
|
forceReload
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (item.type === 'url') {
|
if (item.type === 'url') {
|
||||||
const content = item.content as string
|
const content = item.content as string
|
||||||
if (content.startsWith('http')) {
|
if (content.startsWith('http')) {
|
||||||
// @ts-ignore loader type
|
return await ragApplication.addLoader(
|
||||||
return await ragApplication.addLoader(new WebLoader({ urlOrContent: content }), forceReload)
|
new WebLoader({ urlOrContent: content, chunkSize: base.chunkSize, chunkOverlap: base.chunkOverlap }) as any,
|
||||||
|
forceReload
|
||||||
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (item.type === 'sitemap') {
|
if (item.type === 'sitemap') {
|
||||||
const content = item.content as string
|
const content = item.content as string
|
||||||
// @ts-ignore loader type
|
// @ts-ignore loader type
|
||||||
return await ragApplication.addLoader(new SitemapLoader({ url: content }), forceReload)
|
return await ragApplication.addLoader(
|
||||||
|
new SitemapLoader({ url: content, chunkSize: base.chunkSize, chunkOverlap: base.chunkOverlap }) as any,
|
||||||
|
forceReload
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (item.type === 'note') {
|
if (item.type === 'note') {
|
||||||
const content = item.content as string
|
const content = item.content as string
|
||||||
return await ragApplication.addLoader(new TextLoader({ text: content }), forceReload)
|
return await ragApplication.addLoader(
|
||||||
|
new TextLoader({ text: content, chunkSize: base.chunkSize, chunkOverlap: base.chunkOverlap }),
|
||||||
|
forceReload
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (item.type === 'file') {
|
if (item.type === 'file') {
|
||||||
const file = item.content as FileType
|
const file = item.content as FileType
|
||||||
|
|
||||||
if (file.ext === '.pdf') {
|
if (file.ext === '.pdf') {
|
||||||
return await ragApplication.addLoader(new PdfLoader({ filePathOrUrl: file.path }) as any, forceReload)
|
return await ragApplication.addLoader(
|
||||||
|
new PdfLoader({
|
||||||
|
filePathOrUrl: file.path,
|
||||||
|
chunkSize: base.chunkSize,
|
||||||
|
chunkOverlap: base.chunkOverlap
|
||||||
|
}) as any,
|
||||||
|
forceReload
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (file.ext === '.docx') {
|
if (file.ext === '.docx') {
|
||||||
return await ragApplication.addLoader(new DocxLoader({ filePathOrUrl: file.path }) as any, forceReload)
|
return await ragApplication.addLoader(
|
||||||
|
new DocxLoader({
|
||||||
|
filePathOrUrl: file.path,
|
||||||
|
chunkSize: base.chunkSize,
|
||||||
|
chunkOverlap: base.chunkOverlap
|
||||||
|
}) as any,
|
||||||
|
forceReload
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (file.ext === '.pptx') {
|
if (file.ext === '.pptx') {
|
||||||
return await ragApplication.addLoader(new PptLoader({ filePathOrUrl: file.path }) as any, forceReload)
|
return await ragApplication.addLoader(
|
||||||
|
new PptLoader({
|
||||||
|
filePathOrUrl: file.path,
|
||||||
|
chunkSize: base.chunkSize,
|
||||||
|
chunkOverlap: base.chunkOverlap
|
||||||
|
}) as any,
|
||||||
|
forceReload
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (file.ext === '.xlsx') {
|
if (file.ext === '.xlsx') {
|
||||||
return await ragApplication.addLoader(new ExcelLoader({ filePathOrUrl: file.path }) as any, forceReload)
|
return await ragApplication.addLoader(
|
||||||
|
new ExcelLoader({
|
||||||
|
filePathOrUrl: file.path,
|
||||||
|
chunkSize: base.chunkSize,
|
||||||
|
chunkOverlap: base.chunkOverlap
|
||||||
|
}) as any,
|
||||||
|
forceReload
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (['.md'].includes(file.ext)) {
|
if (['.md'].includes(file.ext)) {
|
||||||
return await ragApplication.addLoader(new MarkdownLoader({ filePathOrUrl: file.path }) as any, forceReload)
|
return await ragApplication.addLoader(
|
||||||
|
new MarkdownLoader({
|
||||||
|
filePathOrUrl: file.path,
|
||||||
|
chunkSize: base.chunkSize,
|
||||||
|
chunkOverlap: base.chunkOverlap
|
||||||
|
}) as any,
|
||||||
|
forceReload
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
const fileContent = fs.readFileSync(file.path, 'utf-8')
|
const fileContent = fs.readFileSync(file.path, 'utf-8')
|
||||||
|
|
||||||
return await ragApplication.addLoader(new TextLoader({ text: fileContent }), forceReload)
|
return await ragApplication.addLoader(
|
||||||
|
new TextLoader({ text: fileContent, chunkSize: base.chunkSize, chunkOverlap: base.chunkOverlap }),
|
||||||
|
forceReload
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
return { entriesAdded: 0, uniqueId: '', loaderType: '' }
|
return { entriesAdded: 0, uniqueId: '', loaderType: '' }
|
||||||
|
|||||||
@ -245,6 +245,7 @@
|
|||||||
"error.enter.api.key": "Please enter your API key first",
|
"error.enter.api.key": "Please enter your API key first",
|
||||||
"error.enter.model": "Please select a model first",
|
"error.enter.model": "Please select a model first",
|
||||||
"error.enter.name": "Please enter the name of the knowledge base",
|
"error.enter.name": "Please enter the name of the knowledge base",
|
||||||
|
"error.chunk_overlap_too_large": "Chunk overlap cannot be greater than chunk size",
|
||||||
"error.invalid.proxy.url": "Invalid proxy URL",
|
"error.invalid.proxy.url": "Invalid proxy URL",
|
||||||
"error.invalid.webdav": "Invalid WebDAV settings",
|
"error.invalid.webdav": "Invalid WebDAV settings",
|
||||||
"message.code_style": "Code style",
|
"message.code_style": "Code style",
|
||||||
@ -625,7 +626,10 @@
|
|||||||
"model_info": "Model Info",
|
"model_info": "Model Info",
|
||||||
"not_support": "Knowledge base database engine updated, the knowledge base will no longer be supported, please create a new knowledge base",
|
"not_support": "Knowledge base database engine updated, the knowledge base will no longer be supported, please create a new knowledge base",
|
||||||
"no_provider": "Knowledge base model provider is not set, the knowledge base will no longer be supported, please create a new knowledge base",
|
"no_provider": "Knowledge base model provider is not set, the knowledge base will no longer be supported, please create a new knowledge base",
|
||||||
"source": "Source"
|
"source": "Source",
|
||||||
|
"chunk_size": "Chunk Size",
|
||||||
|
"chunk_overlap": "Chunk Overlap",
|
||||||
|
"not_set": "Not Set"
|
||||||
},
|
},
|
||||||
"models": {
|
"models": {
|
||||||
"pinned": "Pinned",
|
"pinned": "Pinned",
|
||||||
|
|||||||
@ -244,6 +244,7 @@
|
|||||||
"error.enter.api.host": "APIホストを入力してください",
|
"error.enter.api.host": "APIホストを入力してください",
|
||||||
"error.enter.api.key": "APIキーを入力してください",
|
"error.enter.api.key": "APIキーを入力してください",
|
||||||
"error.enter.model": "モデルを選択してください",
|
"error.enter.model": "モデルを選択してください",
|
||||||
|
"error.chunk_overlap_too_large": "チャンクの重なりは、チャンクサイズを超えることはできません",
|
||||||
"error.invalid.proxy.url": "無効なプロキシURL",
|
"error.invalid.proxy.url": "無効なプロキシURL",
|
||||||
"error.invalid.webdav": "無効なWebDAV設定",
|
"error.invalid.webdav": "無効なWebDAV設定",
|
||||||
"message.code_style": "コードスタイル",
|
"message.code_style": "コードスタイル",
|
||||||
@ -609,7 +610,10 @@
|
|||||||
"model_info": "モデル情報",
|
"model_info": "モデル情報",
|
||||||
"not_support": "ナレッジベースデータベースエンジンが更新されました。このナレッジベースはもうサポートされていません。新しいナレッジベースを作成してください",
|
"not_support": "ナレッジベースデータベースエンジンが更新されました。このナレッジベースはもうサポートされていません。新しいナレッジベースを作成してください",
|
||||||
"no_provider": "ナレッジベースモデルプロバイダーが設定されていません。ナレッジベースはもうサポートされていません。新しいナレッジベースを作成してください",
|
"no_provider": "ナレッジベースモデルプロバイダーが設定されていません。ナレッジベースはもうサポートされていません。新しいナレッジベースを作成してください",
|
||||||
"source": "ソース"
|
"source": "ソース",
|
||||||
|
"chunk_size": "チャンクサイズ",
|
||||||
|
"chunk_overlap": "チャンクの重なり",
|
||||||
|
"not_set": "未設定"
|
||||||
},
|
},
|
||||||
"models": {
|
"models": {
|
||||||
"pinned": "固定済み",
|
"pinned": "固定済み",
|
||||||
|
|||||||
@ -245,6 +245,7 @@
|
|||||||
"error.enter.api.key": "Пожалуйста, введите ваш API ключ",
|
"error.enter.api.key": "Пожалуйста, введите ваш API ключ",
|
||||||
"error.enter.model": "Пожалуйста, выберите модель",
|
"error.enter.model": "Пожалуйста, выберите модель",
|
||||||
"error.enter.name": "Пожалуйста, введите название базы знаний",
|
"error.enter.name": "Пожалуйста, введите название базы знаний",
|
||||||
|
"error.chunk_overlap_too_large": "Перекрытие фрагментов не может быть больше размера фрагмента.",
|
||||||
"error.invalid.proxy.url": "Неверный URL прокси",
|
"error.invalid.proxy.url": "Неверный URL прокси",
|
||||||
"error.invalid.webdav": "Неверные настройки WebDAV",
|
"error.invalid.webdav": "Неверные настройки WebDAV",
|
||||||
"message.code_style": "Стиль кода",
|
"message.code_style": "Стиль кода",
|
||||||
@ -622,7 +623,10 @@
|
|||||||
"model_info": "Модель информации",
|
"model_info": "Модель информации",
|
||||||
"not_support": "База знаний базы данных движок обновлен, база знаний больше не поддерживается, пожалуйста, создайте новую базу знаний",
|
"not_support": "База знаний базы данных движок обновлен, база знаний больше не поддерживается, пожалуйста, создайте новую базу знаний",
|
||||||
"no_provider": "База знаний модель поставщика не настроена, база знаний больше не поддерживается, пожалуйста, создайте новую базу знаний",
|
"no_provider": "База знаний модель поставщика не настроена, база знаний больше не поддерживается, пожалуйста, создайте новую базу знаний",
|
||||||
"source": "Источник"
|
"source": "Источник",
|
||||||
|
"chunk_size": "Размер фрагмента",
|
||||||
|
"chunk_overlap": "Перекрытие фрагмента",
|
||||||
|
"not_set": "Не установлено"
|
||||||
},
|
},
|
||||||
"models": {
|
"models": {
|
||||||
"pinned": "Закреплено",
|
"pinned": "Закреплено",
|
||||||
|
|||||||
@ -246,6 +246,7 @@
|
|||||||
"error.enter.api.key": "请输入您的 API 密钥",
|
"error.enter.api.key": "请输入您的 API 密钥",
|
||||||
"error.enter.model": "请选择一个模型",
|
"error.enter.model": "请选择一个模型",
|
||||||
"error.enter.name": "请输入知识库名称",
|
"error.enter.name": "请输入知识库名称",
|
||||||
|
"error.chunk_overlap_too_large": "分段重叠不能大于分段大小",
|
||||||
"error.invalid.proxy.url": "无效的代理地址",
|
"error.invalid.proxy.url": "无效的代理地址",
|
||||||
"error.invalid.webdav": "无效的 WebDAV 设置",
|
"error.invalid.webdav": "无效的 WebDAV 设置",
|
||||||
"message.code_style": "代码风格",
|
"message.code_style": "代码风格",
|
||||||
@ -611,7 +612,10 @@
|
|||||||
"model_info": "模型信息",
|
"model_info": "模型信息",
|
||||||
"not_support": "知识库数据库引擎已更新,该知识库将不再支持,请重新创建知识库",
|
"not_support": "知识库数据库引擎已更新,该知识库将不再支持,请重新创建知识库",
|
||||||
"no_provider": "知识库模型服务商丢失,该知识库将不再支持,请重新创建知识库",
|
"no_provider": "知识库模型服务商丢失,该知识库将不再支持,请重新创建知识库",
|
||||||
"source": "来源"
|
"source": "来源",
|
||||||
|
"chunk_size": "分段大小",
|
||||||
|
"chunk_overlap": "重叠大小",
|
||||||
|
"not_set": "未设置"
|
||||||
},
|
},
|
||||||
"models": {
|
"models": {
|
||||||
"pinned": "已固定",
|
"pinned": "已固定",
|
||||||
|
|||||||
@ -245,6 +245,7 @@
|
|||||||
"error.enter.api.key": "請先輸入您的 API 密鑰",
|
"error.enter.api.key": "請先輸入您的 API 密鑰",
|
||||||
"error.enter.model": "請先選擇一個模型",
|
"error.enter.model": "請先選擇一個模型",
|
||||||
"error.enter.name": "請先輸入知識庫名稱",
|
"error.enter.name": "請先輸入知識庫名稱",
|
||||||
|
"error.chunk_overlap_too_large": "分段重疊不能大於分段大小",
|
||||||
"error.invalid.proxy.url": "無效的代理 URL",
|
"error.invalid.proxy.url": "無效的代理 URL",
|
||||||
"error.invalid.webdav": "無效的 WebDAV 設定",
|
"error.invalid.webdav": "無效的 WebDAV 設定",
|
||||||
"message.code_style": "程式碼風格",
|
"message.code_style": "程式碼風格",
|
||||||
@ -610,7 +611,10 @@
|
|||||||
"model_info": "模型信息",
|
"model_info": "模型信息",
|
||||||
"not_support": "知識庫數據庫引擎已更新,該知識庫將不再支持,請重新創建知識庫",
|
"not_support": "知識庫數據庫引擎已更新,該知識庫將不再支持,請重新創建知識庫",
|
||||||
"no_provider": "知識庫模型提供商遺失,該知識庫將不再支持,請重新創建知識庫",
|
"no_provider": "知識庫模型提供商遺失,該知識庫將不再支持,請重新創建知識庫",
|
||||||
"source": "來源"
|
"source": "來源",
|
||||||
|
"chunk_size": "分段大小",
|
||||||
|
"chunk_overlap": "重疊大小",
|
||||||
|
"not_set": "未設置"
|
||||||
},
|
},
|
||||||
"models": {
|
"models": {
|
||||||
"pinned": "已固定",
|
"pinned": "已固定",
|
||||||
|
|||||||
@ -361,6 +361,13 @@ const KnowledgeContent: FC<KnowledgeContentProps> = ({ selectedBase }) => {
|
|||||||
{providerName && <Tag color="purple">{providerName}</Tag>}
|
{providerName && <Tag color="purple">{providerName}</Tag>}
|
||||||
</ModelInfo>
|
</ModelInfo>
|
||||||
|
|
||||||
|
<ModelInfo>
|
||||||
|
<label htmlFor="model-info">{t('knowledge.chunk_size')}</label>
|
||||||
|
<Tag color="green">{base.chunkSize || t('knowledge.not_set')}</Tag>
|
||||||
|
<label htmlFor="model-info">{t('knowledge.chunk_overlap')}</label>
|
||||||
|
<Tag color="orange">{base.chunkOverlap || t('knowledge.not_set')}</Tag>
|
||||||
|
</ModelInfo>
|
||||||
|
|
||||||
<IndexSection>
|
<IndexSection>
|
||||||
<Button
|
<Button
|
||||||
type="primary"
|
type="primary"
|
||||||
|
|||||||
@ -6,7 +6,7 @@ import AiProvider from '@renderer/providers/AiProvider'
|
|||||||
import { getKnowledgeBaseParams } from '@renderer/services/KnowledgeService'
|
import { getKnowledgeBaseParams } from '@renderer/services/KnowledgeService'
|
||||||
import { getModelUniqId } from '@renderer/services/ModelService'
|
import { getModelUniqId } from '@renderer/services/ModelService'
|
||||||
import { Model } from '@renderer/types'
|
import { Model } from '@renderer/types'
|
||||||
import { Form, Input, Modal, Select } from 'antd'
|
import { Form, Input, InputNumber, Modal, Select } from 'antd'
|
||||||
import { find, sortBy } from 'lodash'
|
import { find, sortBy } from 'lodash'
|
||||||
import { nanoid } from 'nanoid'
|
import { nanoid } from 'nanoid'
|
||||||
import { useState } from 'react'
|
import { useState } from 'react'
|
||||||
@ -19,6 +19,8 @@ interface ShowParams {
|
|||||||
interface FormData {
|
interface FormData {
|
||||||
name: string
|
name: string
|
||||||
model: string
|
model: string
|
||||||
|
chunkSize?: number
|
||||||
|
chunkOverlap?: number
|
||||||
}
|
}
|
||||||
|
|
||||||
interface Props extends ShowParams {
|
interface Props extends ShowParams {
|
||||||
@ -81,6 +83,8 @@ const PopupContainer: React.FC<Props> = ({ title, resolve }) => {
|
|||||||
name: values.name,
|
name: values.name,
|
||||||
model: selectedModel,
|
model: selectedModel,
|
||||||
dimensions,
|
dimensions,
|
||||||
|
chunkSize: values.chunkSize,
|
||||||
|
chunkOverlap: values.chunkOverlap,
|
||||||
items: [],
|
items: [],
|
||||||
created_at: Date.now(),
|
created_at: Date.now(),
|
||||||
updated_at: Date.now(),
|
updated_at: Date.now(),
|
||||||
@ -131,6 +135,28 @@ const PopupContainer: React.FC<Props> = ({ title, resolve }) => {
|
|||||||
rules={[{ required: true, message: t('message.error.enter.model') }]}>
|
rules={[{ required: true, message: t('message.error.enter.model') }]}>
|
||||||
<Select style={{ width: '100%' }} options={selectOptions} placeholder={t('settings.models.empty')} />
|
<Select style={{ width: '100%' }} options={selectOptions} placeholder={t('settings.models.empty')} />
|
||||||
</Form.Item>
|
</Form.Item>
|
||||||
|
|
||||||
|
<Form.Item name="chunkSize" label={t('knowledge.chunk_size')} initialValue={1000}>
|
||||||
|
<InputNumber style={{ width: '100%' }} min={1} />
|
||||||
|
</Form.Item>
|
||||||
|
|
||||||
|
<Form.Item
|
||||||
|
name="chunkOverlap"
|
||||||
|
label={t('knowledge.chunk_overlap')}
|
||||||
|
initialValue={0}
|
||||||
|
rules={[
|
||||||
|
({ getFieldValue }) => ({
|
||||||
|
validator(_, value) {
|
||||||
|
if (!value || getFieldValue('chunkSize') > value) {
|
||||||
|
return Promise.resolve()
|
||||||
|
}
|
||||||
|
return Promise.reject(new Error(t('message.error.chunk_overlap_too_large')))
|
||||||
|
}
|
||||||
|
})
|
||||||
|
]}
|
||||||
|
dependencies={['chunkSize']}>
|
||||||
|
<InputNumber style={{ width: '100%' }} min={0} />
|
||||||
|
</Form.Item>
|
||||||
</Form>
|
</Form>
|
||||||
</Modal>
|
</Modal>
|
||||||
)
|
)
|
||||||
|
|||||||
@ -22,7 +22,9 @@ export const getKnowledgeBaseParams = (base: KnowledgeBase): KnowledgeBaseParams
|
|||||||
dimensions: base.dimensions,
|
dimensions: base.dimensions,
|
||||||
apiKey: aiProvider.getApiKey() || 'secret',
|
apiKey: aiProvider.getApiKey() || 'secret',
|
||||||
apiVersion: provider.apiVersion,
|
apiVersion: provider.apiVersion,
|
||||||
baseURL: host
|
baseURL: host,
|
||||||
|
chunkSize: base.chunkSize,
|
||||||
|
chunkOverlap: base.chunkOverlap
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -232,6 +232,8 @@ export interface KnowledgeBase {
|
|||||||
created_at: number
|
created_at: number
|
||||||
updated_at: number
|
updated_at: number
|
||||||
version: number
|
version: number
|
||||||
|
chunkSize?: number
|
||||||
|
chunkOverlap?: number
|
||||||
}
|
}
|
||||||
|
|
||||||
export type KnowledgeBaseParams = {
|
export type KnowledgeBaseParams = {
|
||||||
@ -241,6 +243,8 @@ export type KnowledgeBaseParams = {
|
|||||||
apiKey: string
|
apiKey: string
|
||||||
apiVersion?: string
|
apiVersion?: string
|
||||||
baseURL: string
|
baseURL: string
|
||||||
|
chunkSize?: number
|
||||||
|
chunkOverlap?: number
|
||||||
}
|
}
|
||||||
|
|
||||||
export type GenerateImageParams = {
|
export type GenerateImageParams = {
|
||||||
|
|||||||
@ -1693,7 +1693,7 @@ __metadata:
|
|||||||
|
|
||||||
"@llm-tools/embedjs@patch:@llm-tools/embedjs@npm%3A0.1.25#~/.yarn/patches/@llm-tools-embedjs-npm-0.1.25-ec5645cf36.patch":
|
"@llm-tools/embedjs@patch:@llm-tools/embedjs@npm%3A0.1.25#~/.yarn/patches/@llm-tools-embedjs-npm-0.1.25-ec5645cf36.patch":
|
||||||
version: 0.1.25
|
version: 0.1.25
|
||||||
resolution: "@llm-tools/embedjs@patch:@llm-tools/embedjs@npm%3A0.1.25#~/.yarn/patches/@llm-tools-embedjs-npm-0.1.25-ec5645cf36.patch::version=0.1.25&hash=7b05b5"
|
resolution: "@llm-tools/embedjs@patch:@llm-tools/embedjs@npm%3A0.1.25#~/.yarn/patches/@llm-tools-embedjs-npm-0.1.25-ec5645cf36.patch::version=0.1.25&hash=3b8a9c"
|
||||||
dependencies:
|
dependencies:
|
||||||
"@langchain/textsplitters": "npm:^0.1.0"
|
"@langchain/textsplitters": "npm:^0.1.0"
|
||||||
"@llm-tools/embedjs-interfaces": "npm:0.1.25"
|
"@llm-tools/embedjs-interfaces": "npm:0.1.25"
|
||||||
@ -1703,7 +1703,7 @@ __metadata:
|
|||||||
md5: "npm:^2.3.0"
|
md5: "npm:^2.3.0"
|
||||||
mime: "npm:^4.0.6"
|
mime: "npm:^4.0.6"
|
||||||
stream-mime-type: "npm:^2.0.0"
|
stream-mime-type: "npm:^2.0.0"
|
||||||
checksum: 10c0/d0a37a5c7232571a71eff7e90ff4ba612bf33022a6eccd933c3a778844320f427a936d0851aae00092e34407c8c2f3555fe4444c6f2139f978ecfdd42fd89375
|
checksum: 10c0/3ef5fb0068e662d9fc3ff794c0c200fca91fba548d1989a628ad2c3576e3f97838f3abca683adc77b1774d57e09c6d155c1c4b9d69eb20aac26bd274148f72a1
|
||||||
languageName: node
|
languageName: node
|
||||||
linkType: hard
|
linkType: hard
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user