feat(loader): optimize EpubLoader memory usage with file streams (#3074)

* Enhance update error logging and fix duplicate type import

- Improve error logging in AppUpdater with more detailed error information and timestamps
- Remove duplicate MCPServer type import in Inputbar component

* feat(loader): optimize EpubLoader memory usage with file streams

Replace in-memory arrays with file streams for EPUB processing to reduce
memory consumption when handling large e-books. Use temporary files for
chapter content, add completion logs, and ensure proper cleanup.

This prevents memory overflow issues with large EPUB files (>5MB).
This commit is contained in:
Hao He 2025-03-09 17:36:19 +08:00 committed by GitHub
parent 9e9c954560
commit a4c0224ab5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1,9 +1,11 @@
import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters' import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters'
import { BaseLoader } from '@llm-tools/embedjs-interfaces' import { BaseLoader } from '@llm-tools/embedjs-interfaces'
import { cleanString } from '@llm-tools/embedjs-utils' import { cleanString } from '@llm-tools/embedjs-utils'
import { app } from 'electron'
import Logger from 'electron-log' import Logger from 'electron-log'
import EPub from 'epub' import EPub from 'epub'
import * as fs from 'fs' import * as fs from 'fs'
import path from 'path'
/** /**
* epub * epub
@ -157,7 +159,9 @@ export class EpubLoader extends BaseLoader<Record<string, string | number | bool
throw new Error('No content found in epub file') throw new Error('No content found in epub file')
} }
const chapterTexts: string[] = [] // 使用临时文件而不是内存数组
const tempFilePath = path.join(app.getPath('temp'), `epub-${Date.now()}.txt`)
const writeStream = fs.createWriteStream(tempFilePath)
// 遍历所有章节 // 遍历所有章节
for (const chapter of chapters) { for (const chapter of chapters) {
@ -175,15 +179,31 @@ export class EpubLoader extends BaseLoader<Record<string, string | number | bool
.trim() // 移除首尾空白 .trim() // 移除首尾空白
if (text) { if (text) {
chapterTexts.push(text) // 直接写入文件
writeStream.write(text + '\n\n')
} }
} catch (error) { } catch (error) {
Logger.error(`[EpubLoader] Error processing chapter ${chapter.id}:`, error) Logger.error(`[EpubLoader] Error processing chapter ${chapter.id}:`, error)
} }
} }
// 使用双换行符连接所有章节文本 // 关闭写入流
this.extractedText = chapterTexts.join('\n\n') writeStream.end()
// 等待写入完成
await new Promise<void>((resolve, reject) => {
writeStream.on('finish', resolve)
writeStream.on('error', reject)
})
// 从临时文件读取内容
this.extractedText = fs.readFileSync(tempFilePath, 'utf-8')
// 删除临时文件
fs.unlinkSync(tempFilePath)
// 只添加一条完成日志
Logger.info(`[EpubLoader] 电子书 ${this.metadata?.title || path.basename(this.filePath)} 处理完成`)
} catch (error) { } catch (error) {
Logger.error('[EpubLoader] Error in extractTextFromEpub:', error) Logger.error('[EpubLoader] Error in extractTextFromEpub:', error)
throw error throw error