feat: message 增加 metrics 字段 用以统计token生成速度和首字时延 (#479)

This commit is contained in:
adfnekc 2024-12-16 17:10:36 +08:00 committed by GitHub
parent 6ced973b35
commit b2554333a9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 48 additions and 6 deletions

View File

@ -389,6 +389,7 @@
"messages.input.paste_long_text_as_file": "Paste long text as file", "messages.input.paste_long_text_as_file": "Paste long text as file",
"messages.input.send_shortcuts": "Send shortcuts", "messages.input.send_shortcuts": "Send shortcuts",
"messages.input.show_estimated_tokens": "Show estimated tokens", "messages.input.show_estimated_tokens": "Show estimated tokens",
"messages.metrics": "{{time_first_token_millsec}}ms to first token • {{token_speed}} tok/sec • ",
"messages.input.title": "Input Settings", "messages.input.title": "Input Settings",
"messages.markdown_rendering_input_message": "Markdown render input msg", "messages.markdown_rendering_input_message": "Markdown render input msg",
"messages.math_engine": "Math render engine", "messages.math_engine": "Math render engine",

View File

@ -389,6 +389,7 @@
"messages.input.paste_long_text_as_file": "长文本粘贴为文件", "messages.input.paste_long_text_as_file": "长文本粘贴为文件",
"messages.input.send_shortcuts": "发送快捷键", "messages.input.send_shortcuts": "发送快捷键",
"messages.input.show_estimated_tokens": "显示预估 Token 数", "messages.input.show_estimated_tokens": "显示预估 Token 数",
"messages.metrics": "首字时延 {{time_first_token_millsec}}ms • 每秒 {{token_speed}} token • ",
"messages.input.title": "输入设置", "messages.input.title": "输入设置",
"messages.markdown_rendering_input_message": "Markdown 渲染输入消息", "messages.markdown_rendering_input_message": "Markdown 渲染输入消息",
"messages.math_engine": "数学公式引擎", "messages.math_engine": "数学公式引擎",

View File

@ -1,6 +1,7 @@
import { useRuntime } from '@renderer/hooks/useRuntime' import { useRuntime } from '@renderer/hooks/useRuntime'
import { EVENT_NAMES, EventEmitter } from '@renderer/services/EventService' import { EVENT_NAMES, EventEmitter } from '@renderer/services/EventService'
import { Message } from '@renderer/types' import { Message } from '@renderer/types'
import { t } from 'i18next'
import styled from 'styled-components' import styled from 'styled-components'
const MessgeTokens: React.FC<{ message: Message; isLastMessage: boolean }> = ({ message, isLastMessage }) => { const MessgeTokens: React.FC<{ message: Message; isLastMessage: boolean }> = ({ message, isLastMessage }) => {
@ -27,9 +28,19 @@ const MessgeTokens: React.FC<{ message: Message; isLastMessage: boolean }> = ({
} }
if (message.role === 'assistant') { if (message.role === 'assistant') {
let metrixs = ''
if (message?.metrics?.completion_tokens && message?.metrics?.time_completion_millsec) {
metrixs = t('settings.messages.metrics', {
time_first_token_millsec: message?.metrics?.time_first_token_millsec,
token_speed: (message?.metrics?.completion_tokens / (message?.metrics?.time_completion_millsec / 1000)).toFixed(
2
)
})
}
return ( return (
<MessageMetadata className="message-tokens" onClick={locateMessage}> <MessageMetadata className="message-tokens" onClick={locateMessage}>
Tokens: {message?.usage?.total_tokens} | {message?.usage?.prompt_tokens} | {message?.usage?.completion_tokens} {metrixs !== '' ? metrixs : ''}
Tokens: {message?.usage?.total_tokens} {message?.usage?.prompt_tokens} {message?.usage?.completion_tokens}
</MessageMetadata> </MessageMetadata>
) )
} }
@ -38,7 +49,7 @@ const MessgeTokens: React.FC<{ message: Message; isLastMessage: boolean }> = ({
} }
const MessageMetadata = styled.div` const MessageMetadata = styled.div`
font-size: 12px; font-size: 11px;
color: var(--color-text-2); color: var(--color-text-2);
user-select: text; user-select: text;
margin: 2px 0; margin: 2px 0;

View File

@ -132,6 +132,9 @@ export default class OpenAIProvider extends BaseProvider {
const isOpenAIo1 = model.id.includes('o1-') const isOpenAIo1 = model.id.includes('o1-')
const isSupportStreamOutput = streamOutput const isSupportStreamOutput = streamOutput
let time_first_token_millsec = 0
const start_time_millsec = new Date().getTime()
// @ts-ignore key is not typed // @ts-ignore key is not typed
const stream = await this.sdk.chat.completions.create({ const stream = await this.sdk.chat.completions.create({
model: model.id, model: model.id,
@ -146,20 +149,35 @@ export default class OpenAIProvider extends BaseProvider {
}) })
if (!isSupportStreamOutput) { if (!isSupportStreamOutput) {
let time_completion_millsec = new Date().getTime() - start_time_millsec
return onChunk({ return onChunk({
text: stream.choices[0].message?.content || '', text: stream.choices[0].message?.content || '',
usage: stream.usage usage: stream.usage,
metrics: {
completion_tokens: stream.usage?.completion_tokens,
time_completion_millsec: time_completion_millsec,
time_first_token_sec: 0,
}
}) })
} }
for await (const chunk of stream) { for await (const chunk of stream) {
if (window.keyv.get(EVENT_NAMES.CHAT_COMPLETION_PAUSED)) { if (window.keyv.get(EVENT_NAMES.CHAT_COMPLETION_PAUSED)) {
break break
} }
if (time_first_token_millsec == 0) {
time_first_token_millsec = new Date().getTime() - start_time_millsec
}
let time_completion_millsec = new Date().getTime() - start_time_millsec
onChunk({ onChunk({
text: chunk.choices[0]?.delta?.content || '', text: chunk.choices[0]?.delta?.content || '',
usage: chunk.usage usage: chunk.usage,
metrics: {
completion_tokens: chunk.usage?.completion_tokens,
time_completion_millsec: time_completion_millsec,
time_first_token_millsec: time_first_token_millsec,
}
}) })
} }
} }

View File

@ -1,6 +1,9 @@
import { Metrics } from "@renderer/types"
interface ChunkCallbackData { interface ChunkCallbackData {
text?: string text?: string
usage?: OpenAI.Completions.CompletionUsage usage?: OpenAI.Completions.CompletionUsage
metrics?: Metrics
} }
interface CompletionsParams { interface CompletionsParams {

View File

@ -57,9 +57,10 @@ export async function fetchChatCompletion({
messages, messages,
assistant, assistant,
onFilterMessages: (messages) => (_messages = messages), onFilterMessages: (messages) => (_messages = messages),
onChunk: ({ text, usage }) => { onChunk: ({ text, usage, metrics }) => {
message.content = message.content + text || '' message.content = message.content + text || ''
message.usage = usage message.usage = usage
message.metrics = metrics
onResponse({ ...message, status: 'pending' }) onResponse({ ...message, status: 'pending' })
} }
}) })

View File

@ -47,10 +47,17 @@ export type Message = {
files?: FileType[] files?: FileType[]
images?: string[] images?: string[]
usage?: OpenAI.Completions.CompletionUsage usage?: OpenAI.Completions.CompletionUsage
metrics?: Metrics
type: 'text' | '@' | 'clear' type: 'text' | '@' | 'clear'
isPreset?: boolean isPreset?: boolean
} }
export type Metrics = {
completion_tokens?: number
time_completion_millsec?: number
time_first_token_millsec?: number
}
export type Topic = { export type Topic = {
id: string id: string
assistantId: string assistantId: string