From a4c0224ab5c3e0156257dfba284058ceb6d6da3b Mon Sep 17 00:00:00 2001 From: Hao He <57698783+Harris-H@users.noreply.github.com> Date: Sun, 9 Mar 2025 17:36:19 +0800 Subject: [PATCH] feat(loader): optimize EpubLoader memory usage with file streams (#3074) * Enhance update error logging and fix duplicate type import - Improve error logging in AppUpdater with more detailed error information and timestamps - Remove duplicate MCPServer type import in Inputbar component * feat(loader): optimize EpubLoader memory usage with file streams Replace in-memory arrays with file streams for EPUB processing to reduce memory consumption when handling large e-books. Use temporary files for chapter content, add completion logs, and ensure proper cleanup. This prevents memory overflow issues with large EPUB files (>5MB). --- src/main/loader/epubLoader.ts | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/src/main/loader/epubLoader.ts b/src/main/loader/epubLoader.ts index 8364d12f..ba9a1c6b 100644 --- a/src/main/loader/epubLoader.ts +++ b/src/main/loader/epubLoader.ts @@ -1,9 +1,11 @@ import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters' import { BaseLoader } from '@llm-tools/embedjs-interfaces' import { cleanString } from '@llm-tools/embedjs-utils' +import { app } from 'electron' import Logger from 'electron-log' import EPub from 'epub' import * as fs from 'fs' +import path from 'path' /** * epub 加载器的配置选项 @@ -157,7 +159,9 @@ export class EpubLoader extends BaseLoader((resolve, reject) => { + writeStream.on('finish', resolve) + writeStream.on('error', reject) + }) + + // 从临时文件读取内容 + this.extractedText = fs.readFileSync(tempFilePath, 'utf-8') + + // 删除临时文件 + fs.unlinkSync(tempFilePath) + + // 只添加一条完成日志 + Logger.info(`[EpubLoader] 电子书 ${this.metadata?.title || path.basename(this.filePath)} 处理完成`) } catch (error) { Logger.error('[EpubLoader] Error in extractTextFromEpub:', error) throw error