cherry-studio/.yarn/patches/@llm-tools-embedjs-npm-0.1.25-ec5645cf36.patch

218 lines
11 KiB
Diff

diff --git a/src/core/rag-embedding.js b/src/core/rag-embedding.js
index 50c3c4064af17bc4c7c46554d8f2419b3afceb0e..632c9b2e04d2e0e3bb09ef1cd8f29d2560e6afc1 100644
--- a/src/core/rag-embedding.js
+++ b/src/core/rag-embedding.js
@@ -1,10 +1,8 @@
export class RAGEmbedding {
static singleton;
static async init(embeddingModel) {
- if (!this.singleton) {
- await embeddingModel.init();
- this.singleton = new RAGEmbedding(embeddingModel);
- }
+ await embeddingModel.init();
+ this.singleton = new RAGEmbedding(embeddingModel);
}
static getInstance() {
return RAGEmbedding.singleton;
diff --git a/src/loaders/local-path-loader.d.ts b/src/loaders/local-path-loader.d.ts
index 48c20e68c469cd309be2dc8f28e44c1bd04a26e9..87002be39e7305a02e2a607b0c0d95cbbc359f9d 100644
--- a/src/loaders/local-path-loader.d.ts
+++ b/src/loaders/local-path-loader.d.ts
@@ -1,19 +1,29 @@
-import { BaseLoader } from '@llm-tools/embedjs-interfaces';
+import { BaseLoader } from "@llm-tools/embedjs-interfaces";
export declare class LocalPathLoader extends BaseLoader<{
- type: 'LocalPathLoader';
+ type: "LocalPathLoader";
}> {
- private readonly debug;
- private readonly path;
- constructor({ path }: {
- path: string;
- });
- getUnfilteredChunks(): AsyncGenerator<{
- metadata: {
- type: "LocalPathLoader";
- originalPath: string;
- source: string;
- };
- pageContent: string;
- }, void, unknown>;
- private recursivelyAddPath;
+ private readonly debug;
+ private readonly path;
+ constructor({
+ path,
+ chunkSize,
+ chunkOverlap,
+ }: {
+ path: string;
+ chunkSize?: number;
+ chunkOverlap?: number;
+ });
+ getUnfilteredChunks(): AsyncGenerator<
+ {
+ metadata: {
+ type: "LocalPathLoader";
+ originalPath: string;
+ source: string;
+ };
+ pageContent: string;
+ },
+ void,
+ unknown
+ >;
+ private recursivelyAddPath;
}
diff --git a/src/loaders/local-path-loader.js b/src/loaders/local-path-loader.js
index 4cf8a6bd1d890244c8ec49d4a05ee3bd58861c79..fd0fe1951c73da315b0c9bf4a8f33effbadb9f8f 100644
--- a/src/loaders/local-path-loader.js
+++ b/src/loaders/local-path-loader.js
@@ -8,8 +8,8 @@ import { BaseLoader } from '@llm-tools/embedjs-interfaces';
export class LocalPathLoader extends BaseLoader {
debug = createDebugMessages('embedjs:loader:LocalPathLoader');
path;
- constructor({ path }) {
- super(`LocalPathLoader_${md5(path)}`, { path });
+ constructor({ path, chunkSize, chunkOverlap}) {
+ super(`LocalPathLoader_${md5(path)}`, { path }, chunkSize ?? 1000, chunkOverlap ?? 0);
this.path = path;
}
async *getUnfilteredChunks() {
@@ -36,10 +36,12 @@ export class LocalPathLoader extends BaseLoader {
const extension = currentPath.split('.').pop().toLowerCase();
if (extension === 'md' || extension === 'mdx')
mime = 'text/markdown';
+ if (extension === 'txt')
+ mime = 'text/plain';
this.debug(`File '${this.path}' mime type updated to 'text/markdown'`);
}
try {
- const loader = await createLoaderFromMimeType(currentPath, mime);
+ const loader = await createLoaderFromMimeType(currentPath, mime, this.chunkSize, this.chunkOverlap);
for await (const result of await loader.getUnfilteredChunks()) {
yield {
pageContent: result.pageContent,
diff --git a/src/util/mime.d.ts b/src/util/mime.d.ts
index 57f56a1b8edc98366af9f84d671676c41c2f01ca..f53856fa9c78afbeee9e085c7ed0b3a131f8ee5a 100644
--- a/src/util/mime.d.ts
+++ b/src/util/mime.d.ts
@@ -1,2 +1,7 @@
-import { BaseLoader } from '@llm-tools/embedjs-interfaces';
-export declare function createLoaderFromMimeType(loaderData: string, mimeType: string): Promise<BaseLoader>;
+import { BaseLoader } from "@llm-tools/embedjs-interfaces";
+export declare function createLoaderFromMimeType(
+ loaderData: string,
+ mimeType: string,
+ chunkSize?: number,
+ chunkOverlap?: number
+): Promise<BaseLoader>;
diff --git a/src/util/mime.js b/src/util/mime.js
index 9af30bd5b8cf42985f547073a4c19756292c33a3..54ae20343131a533ab70236d3060b6accc8f6126 100644
--- a/src/util/mime.js
+++ b/src/util/mime.js
@@ -1,7 +1,9 @@
import mime from 'mime';
import createDebugMessages from 'debug';
import { TextLoader } from '../loaders/text-loader.js';
-export async function createLoaderFromMimeType(loaderData, mimeType) {
+import fs from 'node:fs';
+
+export async function createLoaderFromMimeType(loaderData, mimeType, chunkSize, chunkOverlap) {
createDebugMessages('embedjs:util:createLoaderFromMimeType')(`Incoming mime type '${mimeType}'`);
switch (mimeType) {
case 'application/msword':
@@ -10,7 +12,7 @@ export async function createLoaderFromMimeType(loaderData, mimeType) {
throw new Error('Package `@llm-tools/embedjs-loader-msoffice` needs to be installed to load docx files');
});
createDebugMessages('embedjs:util:createLoaderFromMimeType')('Dynamically imported DocxLoader');
- return new DocxLoader({ filePathOrUrl: loaderData });
+ return new DocxLoader({ filePathOrUrl: loaderData, chunkSize, chunkOverlap });
}
case 'application/vnd.ms-excel':
case 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': {
@@ -18,21 +20,21 @@ export async function createLoaderFromMimeType(loaderData, mimeType) {
throw new Error('Package `@llm-tools/embedjs-loader-msoffice` needs to be installed to load excel files');
});
createDebugMessages('embedjs:util:createLoaderFromMimeType')('Dynamically imported ExcelLoader');
- return new ExcelLoader({ filePathOrUrl: loaderData });
+ return new ExcelLoader({ filePathOrUrl: loaderData, chunkSize, chunkOverlap });
}
case 'application/pdf': {
const { PdfLoader } = await import('@llm-tools/embedjs-loader-pdf').catch(() => {
throw new Error('Package `@llm-tools/embedjs-loader-pdf` needs to be installed to load PDF files');
});
createDebugMessages('embedjs:util:createLoaderFromMimeType')('Dynamically imported PdfLoader');
- return new PdfLoader({ filePathOrUrl: loaderData });
+ return new PdfLoader({ filePathOrUrl: loaderData, chunkSize, chunkOverlap });
}
case 'application/vnd.openxmlformats-officedocument.presentationml.presentation': {
const { PptLoader } = await import('@llm-tools/embedjs-loader-msoffice').catch(() => {
throw new Error('Package `@llm-tools/embedjs-loader-msoffice` needs to be installed to load pptx files');
});
createDebugMessages('embedjs:util:createLoaderFromMimeType')('Dynamically imported PptLoader');
- return new PptLoader({ filePathOrUrl: loaderData });
+ return new PptLoader({ filePathOrUrl: loaderData, chunkSize, chunkOverlap });
}
case 'text/plain': {
const fineType = mime.getType(loaderData);
@@ -42,24 +44,26 @@ export async function createLoaderFromMimeType(loaderData, mimeType) {
throw new Error('Package `@llm-tools/embedjs-loader-csv` needs to be installed to load CSV files');
});
createDebugMessages('embedjs:util:createLoaderFromMimeType')('Dynamically imported CsvLoader');
- return new CsvLoader({ filePathOrUrl: loaderData });
+ return new CsvLoader({ filePathOrUrl: loaderData, chunkSize, chunkOverlap });
+ }
+ else{
+ const content = fs.readFileSync(loaderData, 'utf-8');
+ return new TextLoader({ text: content, chunkSize, chunkOverlap });
}
- else
- return new TextLoader({ text: loaderData });
}
case 'application/csv': {
const { CsvLoader } = await import('@llm-tools/embedjs-loader-csv').catch(() => {
throw new Error('Package `@llm-tools/embedjs-loader-csv` needs to be installed to load CSV files');
});
createDebugMessages('embedjs:util:createLoaderFromMimeType')('Dynamically imported CsvLoader');
- return new CsvLoader({ filePathOrUrl: loaderData });
+ return new CsvLoader({ filePathOrUrl: loaderData, chunkSize, chunkOverlap });
}
case 'text/html': {
const { WebLoader } = await import('@llm-tools/embedjs-loader-web').catch(() => {
throw new Error('Package `@llm-tools/embedjs-loader-web` needs to be installed to load web documents');
});
createDebugMessages('embedjs:util:createLoaderFromMimeType')('Dynamically imported WebLoader');
- return new WebLoader({ urlOrContent: loaderData });
+ return new WebLoader({ urlOrContent: loaderData, chunkSize, chunkOverlap });
}
case 'text/xml': {
const { SitemapLoader } = await import('@llm-tools/embedjs-loader-sitemap').catch(() => {
@@ -67,14 +71,14 @@ export async function createLoaderFromMimeType(loaderData, mimeType) {
});
createDebugMessages('embedjs:util:createLoaderFromMimeType')('Dynamically imported SitemapLoader');
if (await SitemapLoader.test(loaderData)) {
- return new SitemapLoader({ url: loaderData });
+ return new SitemapLoader({ url: loaderData, chunkSize, chunkOverlap });
}
//This is not a Sitemap but is still XML
const { XmlLoader } = await import('@llm-tools/embedjs-loader-xml').catch(() => {
throw new Error('Package `@llm-tools/embedjs-loader-xml` needs to be installed to load XML documents');
});
createDebugMessages('embedjs:util:createLoaderFromMimeType')('Dynamically imported XmlLoader');
- return new XmlLoader({ filePathOrUrl: loaderData });
+ return new XmlLoader({ filePathOrUrl: loaderData, chunkSize, chunkOverlap });
}
case 'text/x-markdown':
case 'text/markdown': {
@@ -82,7 +86,7 @@ export async function createLoaderFromMimeType(loaderData, mimeType) {
throw new Error('Package `@llm-tools/embedjs-loader-markdown` needs to be installed to load markdown files');
});
createDebugMessages('embedjs:util:createLoaderFromMimeType')('Dynamically imported MarkdownLoader');
- return new MarkdownLoader({ filePathOrUrl: loaderData });
+ return new MarkdownLoader({ filePathOrUrl: loaderData, chunkSize, chunkOverlap });
}
case undefined:
throw new Error(`MIME type could not be detected. Please file an issue if you think this is a bug.`);