mirror of
https://github.com/alibaba/higress.git
synced 2026-06-09 04:37:31 +08:00
fix: ai statistics doc (#2040)
This commit is contained in:
@@ -61,46 +61,6 @@ Attribute 配置说明:
|
|||||||
|
|
||||||
### 空配置
|
### 空配置
|
||||||
#### 监控
|
#### 监控
|
||||||
```
|
|
||||||
route_upstream_model_metric_input_token{ai_route="llm",ai_cluster="outbound|443||qwen.dns",ai_model="qwen-turbo"} 10
|
|
||||||
route_upstream_model_metric_llm_duration_count{ai_route="llm",ai_cluster="outbound|443||qwen.dns",ai_model="qwen-turbo"} 1
|
|
||||||
route_upstream_model_metric_llm_first_token_duration{ai_route="llm",ai_cluster="outbound|443||qwen.dns",ai_model="qwen-turbo"} 309
|
|
||||||
route_upstream_model_metric_llm_service_duration{ai_route="llm",ai_cluster="outbound|443||qwen.dns",ai_model="qwen-turbo"} 1955
|
|
||||||
route_upstream_model_metric_output_token{ai_route="llm",ai_cluster="outbound|443||qwen.dns",ai_model="qwen-turbo"} 69
|
|
||||||
```
|
|
||||||
|
|
||||||
#### 日志
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"ai_log":"{\"model\":\"qwen-turbo\",\"input_token\":\"10\",\"output_token\":\"69\",\"llm_first_token_duration\":\"309\",\"llm_service_duration\":\"1955\"}"
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
#### 链路追踪
|
|
||||||
配置为空时,不会在span中添加额外的attribute
|
|
||||||
|
|
||||||
### 从非openai协议提取token使用信息
|
|
||||||
在ai-proxy中设置协议为original时,以百炼为例,可作如下配置指定如何提取model, input_token, output_token
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
attributes:
|
|
||||||
- key: model
|
|
||||||
value_source: response_body
|
|
||||||
value: usage.models.0.model_id
|
|
||||||
apply_to_log: true
|
|
||||||
apply_to_span: false
|
|
||||||
- key: input_token
|
|
||||||
value_source: response_body
|
|
||||||
value: usage.models.0.input_tokens
|
|
||||||
apply_to_log: true
|
|
||||||
apply_to_span: false
|
|
||||||
- key: output_token
|
|
||||||
value_source: response_body
|
|
||||||
value: usage.models.0.output_tokens
|
|
||||||
apply_to_log: true
|
|
||||||
apply_to_span: false
|
|
||||||
```
|
|
||||||
#### 监控
|
|
||||||
|
|
||||||
```
|
```
|
||||||
# counter 类型,输入 token 数量的累加值
|
# counter 类型,输入 token 数量的累加值
|
||||||
@@ -140,11 +100,51 @@ irate(route_upstream_model_consumer_metric_llm_service_duration[2m])
|
|||||||
irate(route_upstream_model_consumer_metric_llm_duration_count[2m])
|
irate(route_upstream_model_consumer_metric_llm_duration_count[2m])
|
||||||
```
|
```
|
||||||
|
|
||||||
|
#### 日志
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"ai_log":"{\"model\":\"qwen-turbo\",\"input_token\":\"10\",\"output_token\":\"69\",\"llm_first_token_duration\":\"309\",\"llm_service_duration\":\"1955\"}"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 链路追踪
|
||||||
|
配置为空时,不会在span中添加额外的attribute
|
||||||
|
|
||||||
|
### 从非openai协议提取token使用信息
|
||||||
|
在ai-proxy中设置协议为original时,以百炼为例,可作如下配置指定如何提取model, input_token, output_token
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
attributes:
|
||||||
|
- key: model
|
||||||
|
value_source: response_body
|
||||||
|
value: usage.models.0.model_id
|
||||||
|
apply_to_log: true
|
||||||
|
apply_to_span: false
|
||||||
|
- key: input_token
|
||||||
|
value_source: response_body
|
||||||
|
value: usage.models.0.input_tokens
|
||||||
|
apply_to_log: true
|
||||||
|
apply_to_span: false
|
||||||
|
- key: output_token
|
||||||
|
value_source: response_body
|
||||||
|
value: usage.models.0.output_tokens
|
||||||
|
apply_to_log: true
|
||||||
|
apply_to_span: false
|
||||||
|
```
|
||||||
|
#### 监控
|
||||||
|
|
||||||
|
```
|
||||||
|
route_upstream_model_consumer_metric_input_token{ai_route="bailian",ai_cluster="qwen",ai_model="qwen-max"} 343
|
||||||
|
route_upstream_model_consumer_metric_output_token{ai_route="bailian",ai_cluster="qwen",ai_model="qwen-max"} 153
|
||||||
|
route_upstream_model_consumer_metric_llm_service_duration{ai_route="bailian",ai_cluster="qwen",ai_model="qwen-max"} 3725
|
||||||
|
route_upstream_model_consumer_metric_llm_duration_count{ai_route="bailian",ai_cluster="qwen",ai_model="qwen-max"} 1
|
||||||
|
```
|
||||||
|
|
||||||
#### 日志
|
#### 日志
|
||||||
此配置下日志效果如下:
|
此配置下日志效果如下:
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"ai_log": "{\"model\":\"qwen-max\",\"input_token\":\"343\",\"output_token\":\"153\",\"llm_service_duration\":\"19110\"}"
|
"ai_log": "{\"model\":\"qwen-max\",\"input_token\":\"343\",\"output_token\":\"153\",\"llm_service_duration\":\"19110\"}"
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -152,7 +152,7 @@ irate(route_upstream_model_consumer_metric_llm_duration_count[2m])
|
|||||||
链路追踪的 span 中可以看到 model, input_token, output_token 三个额外的 attribute
|
链路追踪的 span 中可以看到 model, input_token, output_token 三个额外的 attribute
|
||||||
|
|
||||||
### 配合认证鉴权记录consumer
|
### 配合认证鉴权记录consumer
|
||||||
举例如下:
|
举例如下:
|
||||||
```yaml
|
```yaml
|
||||||
attributes:
|
attributes:
|
||||||
- key: consumer # 配合认证鉴权记录consumer
|
- key: consumer # 配合认证鉴权记录consumer
|
||||||
|
|||||||
@@ -48,12 +48,12 @@ The meanings of various values for `value_source` are as follows:
|
|||||||
|
|
||||||
When `value_source` is `response_streaming_body`, `rule` should be configured to specify how to obtain the specified value from the streaming body. The meaning of the value is as follows:
|
When `value_source` is `response_streaming_body`, `rule` should be configured to specify how to obtain the specified value from the streaming body. The meaning of the value is as follows:
|
||||||
|
|
||||||
- `first`: extract value from the first valid chunk
|
- `first`: extract value from the first valid chunk
|
||||||
- `replace`: extract value from the last valid chunk
|
- `replace`: extract value from the last valid chunk
|
||||||
- `append`: join value pieces from all valid chunks
|
- `append`: join value pieces from all valid chunks
|
||||||
|
|
||||||
## Configuration example
|
## Configuration example
|
||||||
If you want to record ai-statistic related statistical values in the gateway access log, you need to modify log_format and add a new field based on the original log_format. The example is as follows:
|
If you want to record ai-statistic related statistical values in the gateway access log, you need to modify log_format and add a new field based on the original log_format. The example is as follows:
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
'{"ai_log":"%FILTER_STATE(wasm.ai_log:PLAIN)%"}'
|
'{"ai_log":"%FILTER_STATE(wasm.ai_log:PLAIN)%"}'
|
||||||
@@ -61,48 +61,6 @@ If you want to record ai-statistic related statistical values in the
|
|||||||
|
|
||||||
### Empty
|
### Empty
|
||||||
#### Metric
|
#### Metric
|
||||||
```
|
|
||||||
route_upstream_model_metric_input_token{ai_route="llm",ai_cluster="outbound|443||qwen.dns",ai_model="qwen-turbo"} 10
|
|
||||||
route_upstream_model_metric_llm_duration_count{ai_route="llm",ai_cluster="outbound|443||qwen.dns",ai_model="qwen-turbo"} 1
|
|
||||||
route_upstream_model_metric_llm_first_token_duration{ai_route="llm",ai_cluster="outbound|443||qwen.dns",ai_model="qwen-turbo"} 309
|
|
||||||
route_upstream_model_metric_llm_service_duration{ai_route="llm",ai_cluster="outbound|443||qwen.dns",ai_model="qwen-turbo"} 1955
|
|
||||||
route_upstream_model_metric_output_token{ai_route="llm",ai_cluster="outbound|443||qwen.dns",ai_model="qwen-turbo"} 69
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Log
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"ai_log":"{\"model\":\"qwen-turbo\",\"input_token\":\"10\",\"output_token\":\"69\",\"llm_first_token_duration\":\"309\",\"llm_service_duration\":\"1955\"}"
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Trace
|
|
||||||
When the configuration is empty, no additional attributes will be added to the span.
|
|
||||||
|
|
||||||
### Extract token usage information from non-openai protocols
|
|
||||||
When setting the protocol to original in ai-proxy, taking Alibaba Cloud Bailian as an example, you can make the following configuration to specify how to extract `model`, `input_token`, `output_token`
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
attributes:
|
|
||||||
- key: model
|
|
||||||
value_source: response_body
|
|
||||||
value: usage.models.0.model_id
|
|
||||||
apply_to_log: true
|
|
||||||
apply_to_span: false
|
|
||||||
- key: input_token
|
|
||||||
value_source: response_body
|
|
||||||
value: usage.models.0.input_tokens
|
|
||||||
apply_to_log: true
|
|
||||||
apply_to_span: false
|
|
||||||
- key: output_token
|
|
||||||
value_source: response_body
|
|
||||||
value: usage.models.0.output_tokens
|
|
||||||
apply_to_log: true
|
|
||||||
apply_to_span: false
|
|
||||||
```
|
|
||||||
#### Metric
|
|
||||||
|
|
||||||
Here is the English translation:
|
|
||||||
|
|
||||||
```
|
```
|
||||||
# counter, cumulative count of input tokens
|
# counter, cumulative count of input tokens
|
||||||
@@ -145,7 +103,47 @@ irate(route_upstream_model_consumer_metric_llm_duration_count[2m])
|
|||||||
#### Log
|
#### Log
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"ai_log": "{\"model\":\"qwen-max\",\"input_token\":\"343\",\"output_token\":\"153\",\"llm_service_duration\":\"19110\"}"
|
"ai_log":"{\"model\":\"qwen-turbo\",\"input_token\":\"10\",\"output_token\":\"69\",\"llm_first_token_duration\":\"309\",\"llm_service_duration\":\"1955\"}"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Trace
|
||||||
|
When the configuration is empty, no additional attributes will be added to the span.
|
||||||
|
|
||||||
|
### Extract token usage information from non-openai protocols
|
||||||
|
When setting the protocol to original in ai-proxy, taking Alibaba Cloud Bailian as an example, you can make the following configuration to specify how to extract `model`, `input_token`, `output_token`
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
attributes:
|
||||||
|
- key: model
|
||||||
|
value_source: response_body
|
||||||
|
value: usage.models.0.model_id
|
||||||
|
apply_to_log: true
|
||||||
|
apply_to_span: false
|
||||||
|
- key: input_token
|
||||||
|
value_source: response_body
|
||||||
|
value: usage.models.0.input_tokens
|
||||||
|
apply_to_log: true
|
||||||
|
apply_to_span: false
|
||||||
|
- key: output_token
|
||||||
|
value_source: response_body
|
||||||
|
value: usage.models.0.output_tokens
|
||||||
|
apply_to_log: true
|
||||||
|
apply_to_span: false
|
||||||
|
```
|
||||||
|
#### Metric
|
||||||
|
|
||||||
|
```
|
||||||
|
route_upstream_model_consumer_metric_input_token{ai_route="bailian",ai_cluster="qwen",ai_model="qwen-max"} 343
|
||||||
|
route_upstream_model_consumer_metric_output_token{ai_route="bailian",ai_cluster="qwen",ai_model="qwen-max"} 153
|
||||||
|
route_upstream_model_consumer_metric_llm_service_duration{ai_route="bailian",ai_cluster="qwen",ai_model="qwen-max"} 3725
|
||||||
|
route_upstream_model_consumer_metric_llm_duration_count{ai_route="bailian",ai_cluster="qwen",ai_model="qwen-max"} 1
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Log
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"ai_log": "{\"model\":\"qwen-max\",\"input_token\":\"343\",\"output_token\":\"153\",\"llm_service_duration\":\"19110\"}"
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -164,7 +162,7 @@ attributes:
|
|||||||
### Record questions and answers
|
### Record questions and answers
|
||||||
```yaml
|
```yaml
|
||||||
attributes:
|
attributes:
|
||||||
- key: question
|
- key: question
|
||||||
value_source: request_body
|
value_source: request_body
|
||||||
value: messages.@reverse.0.content
|
value: messages.@reverse.0.content
|
||||||
apply_to_log: true
|
apply_to_log: true
|
||||||
|
|||||||
Reference in New Issue
Block a user