feat(loader): optimize EpubLoader memory usage with file streams (#3074)

* Enhance update error logging and fix duplicate type import - Improve error logging in AppUpdater with more detailed error information and timestamps - Remove duplicate MCPServer type import in Inputbar component * feat(loader): optimize EpubLoader memory usage with file streams Replace in-memory arrays with file streams for EPUB processing to reduce memory consumption when handling large e-books. Use temporary files for chapter content, add completion logs, and ensure proper cleanup. This prevents memory overflow issues with large EPUB files (>5MB).
2025-03-09 17:36:19 +08:00 · 2025-03-09 17:36:19 +08:00 · a4c0224ab5
commit a4c0224ab5
parent 9e9c954560
1 changed files with 24 additions and 4 deletions
--- a/src/main/loader/epubLoader.ts
+++ b/src/main/loader/epubLoader.ts
@ -1,9 +1,11 @@
 import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters'
 import { BaseLoader } from '@llm-tools/embedjs-interfaces'
 import { cleanString } from '@llm-tools/embedjs-utils'
 import { app } from 'electron'
 import Logger from 'electron-log'
 import EPub from 'epub'
 import * as fs from 'fs'
 import path from 'path'
 /**
 * epub 加载器的配置选项
@ -157,7 +159,9 @@ export class EpubLoader extends BaseLoader<Record<string, string | number | bool
        throw new Error('No content found in epub file')
      }
-      const chapterTexts: string[] = []
+      // 使用临时文件而不是内存数组
      const tempFilePath = path.join(app.getPath('temp'), `epub-${Date.now()}.txt`)
      const writeStream = fs.createWriteStream(tempFilePath)
      // 遍历所有章节
      for (const chapter of chapters) {
@ -175,15 +179,31 @@ export class EpubLoader extends BaseLoader<Record<string, string | number | bool
            .trim() // 移除首尾空白
          if (text) {
-            chapterTexts.push(text)
+            // 直接写入文件
            writeStream.write(text + '\n\n')
          }
        } catch (error) {
          Logger.error(`[EpubLoader] Error processing chapter ${chapter.id}:`, error)
        }
      }
-      // 使用双换行符连接所有章节文本
+      // 关闭写入流
-      this.extractedText = chapterTexts.join('\n\n')
+      writeStream.end()
      // 等待写入完成
      await new Promise<void>((resolve, reject) => {
        writeStream.on('finish', resolve)
        writeStream.on('error', reject)
      })
      // 从临时文件读取内容
      this.extractedText = fs.readFileSync(tempFilePath, 'utf-8')
      // 删除临时文件
      fs.unlinkSync(tempFilePath)
      // 只添加一条完成日志
      Logger.info(`[EpubLoader] 电子书 ${this.metadata?.title || path.basename(this.filePath)} 处理完成`)
    } catch (error) {
      Logger.error('[EpubLoader] Error in extractTextFromEpub:', error)
      throw error