feat(loader): optimize EpubLoader memory usage with file streams (#3074)
* Enhance update error logging and fix duplicate type import - Improve error logging in AppUpdater with more detailed error information and timestamps - Remove duplicate MCPServer type import in Inputbar component * feat(loader): optimize EpubLoader memory usage with file streams Replace in-memory arrays with file streams for EPUB processing to reduce memory consumption when handling large e-books. Use temporary files for chapter content, add completion logs, and ensure proper cleanup. This prevents memory overflow issues with large EPUB files (>5MB).
This commit is contained in:
parent
9e9c954560
commit
a4c0224ab5
@ -1,9 +1,11 @@
|
|||||||
import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters'
|
import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters'
|
||||||
import { BaseLoader } from '@llm-tools/embedjs-interfaces'
|
import { BaseLoader } from '@llm-tools/embedjs-interfaces'
|
||||||
import { cleanString } from '@llm-tools/embedjs-utils'
|
import { cleanString } from '@llm-tools/embedjs-utils'
|
||||||
|
import { app } from 'electron'
|
||||||
import Logger from 'electron-log'
|
import Logger from 'electron-log'
|
||||||
import EPub from 'epub'
|
import EPub from 'epub'
|
||||||
import * as fs from 'fs'
|
import * as fs from 'fs'
|
||||||
|
import path from 'path'
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* epub 加载器的配置选项
|
* epub 加载器的配置选项
|
||||||
@ -157,7 +159,9 @@ export class EpubLoader extends BaseLoader<Record<string, string | number | bool
|
|||||||
throw new Error('No content found in epub file')
|
throw new Error('No content found in epub file')
|
||||||
}
|
}
|
||||||
|
|
||||||
const chapterTexts: string[] = []
|
// 使用临时文件而不是内存数组
|
||||||
|
const tempFilePath = path.join(app.getPath('temp'), `epub-${Date.now()}.txt`)
|
||||||
|
const writeStream = fs.createWriteStream(tempFilePath)
|
||||||
|
|
||||||
// 遍历所有章节
|
// 遍历所有章节
|
||||||
for (const chapter of chapters) {
|
for (const chapter of chapters) {
|
||||||
@ -175,15 +179,31 @@ export class EpubLoader extends BaseLoader<Record<string, string | number | bool
|
|||||||
.trim() // 移除首尾空白
|
.trim() // 移除首尾空白
|
||||||
|
|
||||||
if (text) {
|
if (text) {
|
||||||
chapterTexts.push(text)
|
// 直接写入文件
|
||||||
|
writeStream.write(text + '\n\n')
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
Logger.error(`[EpubLoader] Error processing chapter ${chapter.id}:`, error)
|
Logger.error(`[EpubLoader] Error processing chapter ${chapter.id}:`, error)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// 使用双换行符连接所有章节文本
|
// 关闭写入流
|
||||||
this.extractedText = chapterTexts.join('\n\n')
|
writeStream.end()
|
||||||
|
|
||||||
|
// 等待写入完成
|
||||||
|
await new Promise<void>((resolve, reject) => {
|
||||||
|
writeStream.on('finish', resolve)
|
||||||
|
writeStream.on('error', reject)
|
||||||
|
})
|
||||||
|
|
||||||
|
// 从临时文件读取内容
|
||||||
|
this.extractedText = fs.readFileSync(tempFilePath, 'utf-8')
|
||||||
|
|
||||||
|
// 删除临时文件
|
||||||
|
fs.unlinkSync(tempFilePath)
|
||||||
|
|
||||||
|
// 只添加一条完成日志
|
||||||
|
Logger.info(`[EpubLoader] 电子书 ${this.metadata?.title || path.basename(this.filePath)} 处理完成`)
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
Logger.error('[EpubLoader] Error in extractTextFromEpub:', error)
|
Logger.error('[EpubLoader] Error in extractTextFromEpub:', error)
|
||||||
throw error
|
throw error
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user