feat: message 增加 metrics 字段 用以统计token生成速度和首字时延 (#479)
This commit is contained in:
parent
6ced973b35
commit
b2554333a9
@ -389,6 +389,7 @@
|
|||||||
"messages.input.paste_long_text_as_file": "Paste long text as file",
|
"messages.input.paste_long_text_as_file": "Paste long text as file",
|
||||||
"messages.input.send_shortcuts": "Send shortcuts",
|
"messages.input.send_shortcuts": "Send shortcuts",
|
||||||
"messages.input.show_estimated_tokens": "Show estimated tokens",
|
"messages.input.show_estimated_tokens": "Show estimated tokens",
|
||||||
|
"messages.metrics": "{{time_first_token_millsec}}ms to first token • {{token_speed}} tok/sec • ",
|
||||||
"messages.input.title": "Input Settings",
|
"messages.input.title": "Input Settings",
|
||||||
"messages.markdown_rendering_input_message": "Markdown render input msg",
|
"messages.markdown_rendering_input_message": "Markdown render input msg",
|
||||||
"messages.math_engine": "Math render engine",
|
"messages.math_engine": "Math render engine",
|
||||||
|
|||||||
@ -389,6 +389,7 @@
|
|||||||
"messages.input.paste_long_text_as_file": "长文本粘贴为文件",
|
"messages.input.paste_long_text_as_file": "长文本粘贴为文件",
|
||||||
"messages.input.send_shortcuts": "发送快捷键",
|
"messages.input.send_shortcuts": "发送快捷键",
|
||||||
"messages.input.show_estimated_tokens": "显示预估 Token 数",
|
"messages.input.show_estimated_tokens": "显示预估 Token 数",
|
||||||
|
"messages.metrics": "首字时延 {{time_first_token_millsec}}ms • 每秒 {{token_speed}} token • ",
|
||||||
"messages.input.title": "输入设置",
|
"messages.input.title": "输入设置",
|
||||||
"messages.markdown_rendering_input_message": "Markdown 渲染输入消息",
|
"messages.markdown_rendering_input_message": "Markdown 渲染输入消息",
|
||||||
"messages.math_engine": "数学公式引擎",
|
"messages.math_engine": "数学公式引擎",
|
||||||
|
|||||||
@ -1,6 +1,7 @@
|
|||||||
import { useRuntime } from '@renderer/hooks/useRuntime'
|
import { useRuntime } from '@renderer/hooks/useRuntime'
|
||||||
import { EVENT_NAMES, EventEmitter } from '@renderer/services/EventService'
|
import { EVENT_NAMES, EventEmitter } from '@renderer/services/EventService'
|
||||||
import { Message } from '@renderer/types'
|
import { Message } from '@renderer/types'
|
||||||
|
import { t } from 'i18next'
|
||||||
import styled from 'styled-components'
|
import styled from 'styled-components'
|
||||||
|
|
||||||
const MessgeTokens: React.FC<{ message: Message; isLastMessage: boolean }> = ({ message, isLastMessage }) => {
|
const MessgeTokens: React.FC<{ message: Message; isLastMessage: boolean }> = ({ message, isLastMessage }) => {
|
||||||
@ -27,9 +28,19 @@ const MessgeTokens: React.FC<{ message: Message; isLastMessage: boolean }> = ({
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (message.role === 'assistant') {
|
if (message.role === 'assistant') {
|
||||||
|
let metrixs = ''
|
||||||
|
if (message?.metrics?.completion_tokens && message?.metrics?.time_completion_millsec) {
|
||||||
|
metrixs = t('settings.messages.metrics', {
|
||||||
|
time_first_token_millsec: message?.metrics?.time_first_token_millsec,
|
||||||
|
token_speed: (message?.metrics?.completion_tokens / (message?.metrics?.time_completion_millsec / 1000)).toFixed(
|
||||||
|
2
|
||||||
|
)
|
||||||
|
})
|
||||||
|
}
|
||||||
return (
|
return (
|
||||||
<MessageMetadata className="message-tokens" onClick={locateMessage}>
|
<MessageMetadata className="message-tokens" onClick={locateMessage}>
|
||||||
Tokens: {message?.usage?.total_tokens} | ↑{message?.usage?.prompt_tokens} | ↓{message?.usage?.completion_tokens}
|
{metrixs !== '' ? metrixs : ''}
|
||||||
|
Tokens: {message?.usage?.total_tokens} ↑ {message?.usage?.prompt_tokens} ↓ {message?.usage?.completion_tokens}
|
||||||
</MessageMetadata>
|
</MessageMetadata>
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
@ -38,7 +49,7 @@ const MessgeTokens: React.FC<{ message: Message; isLastMessage: boolean }> = ({
|
|||||||
}
|
}
|
||||||
|
|
||||||
const MessageMetadata = styled.div`
|
const MessageMetadata = styled.div`
|
||||||
font-size: 12px;
|
font-size: 11px;
|
||||||
color: var(--color-text-2);
|
color: var(--color-text-2);
|
||||||
user-select: text;
|
user-select: text;
|
||||||
margin: 2px 0;
|
margin: 2px 0;
|
||||||
|
|||||||
@ -132,6 +132,9 @@ export default class OpenAIProvider extends BaseProvider {
|
|||||||
const isOpenAIo1 = model.id.includes('o1-')
|
const isOpenAIo1 = model.id.includes('o1-')
|
||||||
const isSupportStreamOutput = streamOutput
|
const isSupportStreamOutput = streamOutput
|
||||||
|
|
||||||
|
let time_first_token_millsec = 0
|
||||||
|
const start_time_millsec = new Date().getTime()
|
||||||
|
|
||||||
// @ts-ignore key is not typed
|
// @ts-ignore key is not typed
|
||||||
const stream = await this.sdk.chat.completions.create({
|
const stream = await this.sdk.chat.completions.create({
|
||||||
model: model.id,
|
model: model.id,
|
||||||
@ -146,20 +149,35 @@ export default class OpenAIProvider extends BaseProvider {
|
|||||||
})
|
})
|
||||||
|
|
||||||
if (!isSupportStreamOutput) {
|
if (!isSupportStreamOutput) {
|
||||||
|
let time_completion_millsec = new Date().getTime() - start_time_millsec
|
||||||
return onChunk({
|
return onChunk({
|
||||||
text: stream.choices[0].message?.content || '',
|
text: stream.choices[0].message?.content || '',
|
||||||
usage: stream.usage
|
usage: stream.usage,
|
||||||
|
metrics: {
|
||||||
|
completion_tokens: stream.usage?.completion_tokens,
|
||||||
|
time_completion_millsec: time_completion_millsec,
|
||||||
|
time_first_token_sec: 0,
|
||||||
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
for await (const chunk of stream) {
|
for await (const chunk of stream) {
|
||||||
if (window.keyv.get(EVENT_NAMES.CHAT_COMPLETION_PAUSED)) {
|
if (window.keyv.get(EVENT_NAMES.CHAT_COMPLETION_PAUSED)) {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
if (time_first_token_millsec == 0) {
|
||||||
|
time_first_token_millsec = new Date().getTime() - start_time_millsec
|
||||||
|
}
|
||||||
|
let time_completion_millsec = new Date().getTime() - start_time_millsec
|
||||||
onChunk({
|
onChunk({
|
||||||
text: chunk.choices[0]?.delta?.content || '',
|
text: chunk.choices[0]?.delta?.content || '',
|
||||||
usage: chunk.usage
|
usage: chunk.usage,
|
||||||
|
metrics: {
|
||||||
|
completion_tokens: chunk.usage?.completion_tokens,
|
||||||
|
time_completion_millsec: time_completion_millsec,
|
||||||
|
time_first_token_millsec: time_first_token_millsec,
|
||||||
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
3
src/renderer/src/providers/index.d.ts
vendored
3
src/renderer/src/providers/index.d.ts
vendored
@ -1,6 +1,9 @@
|
|||||||
|
import { Metrics } from "@renderer/types"
|
||||||
|
|
||||||
interface ChunkCallbackData {
|
interface ChunkCallbackData {
|
||||||
text?: string
|
text?: string
|
||||||
usage?: OpenAI.Completions.CompletionUsage
|
usage?: OpenAI.Completions.CompletionUsage
|
||||||
|
metrics?: Metrics
|
||||||
}
|
}
|
||||||
|
|
||||||
interface CompletionsParams {
|
interface CompletionsParams {
|
||||||
|
|||||||
@ -57,9 +57,10 @@ export async function fetchChatCompletion({
|
|||||||
messages,
|
messages,
|
||||||
assistant,
|
assistant,
|
||||||
onFilterMessages: (messages) => (_messages = messages),
|
onFilterMessages: (messages) => (_messages = messages),
|
||||||
onChunk: ({ text, usage }) => {
|
onChunk: ({ text, usage, metrics }) => {
|
||||||
message.content = message.content + text || ''
|
message.content = message.content + text || ''
|
||||||
message.usage = usage
|
message.usage = usage
|
||||||
|
message.metrics = metrics
|
||||||
onResponse({ ...message, status: 'pending' })
|
onResponse({ ...message, status: 'pending' })
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|||||||
@ -47,10 +47,17 @@ export type Message = {
|
|||||||
files?: FileType[]
|
files?: FileType[]
|
||||||
images?: string[]
|
images?: string[]
|
||||||
usage?: OpenAI.Completions.CompletionUsage
|
usage?: OpenAI.Completions.CompletionUsage
|
||||||
|
metrics?: Metrics
|
||||||
type: 'text' | '@' | 'clear'
|
type: 'text' | '@' | 'clear'
|
||||||
isPreset?: boolean
|
isPreset?: boolean
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export type Metrics = {
|
||||||
|
completion_tokens?: number
|
||||||
|
time_completion_millsec?: number
|
||||||
|
time_first_token_millsec?: number
|
||||||
|
}
|
||||||
|
|
||||||
export type Topic = {
|
export type Topic = {
|
||||||
id: string
|
id: string
|
||||||
assistantId: string
|
assistantId: string
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user