From b1307ba97e73b0fbd59cf155b899efe098675266 Mon Sep 17 00:00:00 2001 From: Se7en Date: Thu, 10 Apr 2025 15:38:19 +0800 Subject: [PATCH] fix: ai statistics doc (#2040) --- .../extensions/ai-statistics/README.md | 84 ++++++++--------- .../extensions/ai-statistics/README_EN.md | 92 +++++++++---------- 2 files changed, 87 insertions(+), 89 deletions(-) diff --git a/plugins/wasm-go/extensions/ai-statistics/README.md b/plugins/wasm-go/extensions/ai-statistics/README.md index bb08cc125..7d8c07969 100644 --- a/plugins/wasm-go/extensions/ai-statistics/README.md +++ b/plugins/wasm-go/extensions/ai-statistics/README.md @@ -61,46 +61,6 @@ Attribute 配置说明: ### 空配置 #### 监控 -``` -route_upstream_model_metric_input_token{ai_route="llm",ai_cluster="outbound|443||qwen.dns",ai_model="qwen-turbo"} 10 -route_upstream_model_metric_llm_duration_count{ai_route="llm",ai_cluster="outbound|443||qwen.dns",ai_model="qwen-turbo"} 1 -route_upstream_model_metric_llm_first_token_duration{ai_route="llm",ai_cluster="outbound|443||qwen.dns",ai_model="qwen-turbo"} 309 -route_upstream_model_metric_llm_service_duration{ai_route="llm",ai_cluster="outbound|443||qwen.dns",ai_model="qwen-turbo"} 1955 -route_upstream_model_metric_output_token{ai_route="llm",ai_cluster="outbound|443||qwen.dns",ai_model="qwen-turbo"} 69 -``` - -#### 日志 -```json -{ - "ai_log":"{\"model\":\"qwen-turbo\",\"input_token\":\"10\",\"output_token\":\"69\",\"llm_first_token_duration\":\"309\",\"llm_service_duration\":\"1955\"}" -} -``` - -#### 链路追踪 -配置为空时,不会在span中添加额外的attribute - -### 从非openai协议提取token使用信息 -在ai-proxy中设置协议为original时,以百炼为例,可作如下配置指定如何提取model, input_token, output_token - -```yaml -attributes: - - key: model - value_source: response_body - value: usage.models.0.model_id - apply_to_log: true - apply_to_span: false - - key: input_token - value_source: response_body - value: usage.models.0.input_tokens - apply_to_log: true - apply_to_span: false - - key: output_token - value_source: response_body - value: usage.models.0.output_tokens - apply_to_log: true - apply_to_span: false -``` -#### 监控 ``` # counter 类型,输入 token 数量的累加值 @@ -140,11 +100,51 @@ irate(route_upstream_model_consumer_metric_llm_service_duration[2m]) irate(route_upstream_model_consumer_metric_llm_duration_count[2m]) ``` +#### 日志 +```json +{ + "ai_log":"{\"model\":\"qwen-turbo\",\"input_token\":\"10\",\"output_token\":\"69\",\"llm_first_token_duration\":\"309\",\"llm_service_duration\":\"1955\"}" +} +``` + +#### 链路追踪 +配置为空时,不会在span中添加额外的attribute + +### 从非openai协议提取token使用信息 +在ai-proxy中设置协议为original时,以百炼为例,可作如下配置指定如何提取model, input_token, output_token + +```yaml +attributes: + - key: model + value_source: response_body + value: usage.models.0.model_id + apply_to_log: true + apply_to_span: false + - key: input_token + value_source: response_body + value: usage.models.0.input_tokens + apply_to_log: true + apply_to_span: false + - key: output_token + value_source: response_body + value: usage.models.0.output_tokens + apply_to_log: true + apply_to_span: false +``` +#### 监控 + +``` +route_upstream_model_consumer_metric_input_token{ai_route="bailian",ai_cluster="qwen",ai_model="qwen-max"} 343 +route_upstream_model_consumer_metric_output_token{ai_route="bailian",ai_cluster="qwen",ai_model="qwen-max"} 153 +route_upstream_model_consumer_metric_llm_service_duration{ai_route="bailian",ai_cluster="qwen",ai_model="qwen-max"} 3725 +route_upstream_model_consumer_metric_llm_duration_count{ai_route="bailian",ai_cluster="qwen",ai_model="qwen-max"} 1 +``` + #### 日志 此配置下日志效果如下: ```json { - "ai_log": "{\"model\":\"qwen-max\",\"input_token\":\"343\",\"output_token\":\"153\",\"llm_service_duration\":\"19110\"}" + "ai_log": "{\"model\":\"qwen-max\",\"input_token\":\"343\",\"output_token\":\"153\",\"llm_service_duration\":\"19110\"}" } ``` @@ -152,7 +152,7 @@ irate(route_upstream_model_consumer_metric_llm_duration_count[2m]) 链路追踪的 span 中可以看到 model, input_token, output_token 三个额外的 attribute ### 配合认证鉴权记录consumer -举例如下: +举例如下: ```yaml attributes: - key: consumer # 配合认证鉴权记录consumer diff --git a/plugins/wasm-go/extensions/ai-statistics/README_EN.md b/plugins/wasm-go/extensions/ai-statistics/README_EN.md index 6afe3f8f5..63c5cfc2b 100644 --- a/plugins/wasm-go/extensions/ai-statistics/README_EN.md +++ b/plugins/wasm-go/extensions/ai-statistics/README_EN.md @@ -48,12 +48,12 @@ The meanings of various values for `value_source` ​​are as follows: When `value_source` is `response_streaming_body`, `rule` should be configured to specify how to obtain the specified value from the streaming body. The meaning of the value is as follows: -- `first`: extract value from the first valid chunk -- `replace`: extract value from the last valid chunk +- `first`: extract value from the first valid chunk +- `replace`: extract value from the last valid chunk - `append`: join value pieces from all valid chunks ## Configuration example -If you want to record ai-statistic related statistical values ​​​​in the gateway access log, you need to modify log_format and add a new field based on the original log_format. The example is as follows: +If you want to record ai-statistic related statistical values in the gateway access log, you need to modify log_format and add a new field based on the original log_format. The example is as follows: ```yaml '{"ai_log":"%FILTER_STATE(wasm.ai_log:PLAIN)%"}' @@ -61,48 +61,6 @@ If you want to record ai-statistic related statistical values ​​​​in the ### Empty #### Metric -``` -route_upstream_model_metric_input_token{ai_route="llm",ai_cluster="outbound|443||qwen.dns",ai_model="qwen-turbo"} 10 -route_upstream_model_metric_llm_duration_count{ai_route="llm",ai_cluster="outbound|443||qwen.dns",ai_model="qwen-turbo"} 1 -route_upstream_model_metric_llm_first_token_duration{ai_route="llm",ai_cluster="outbound|443||qwen.dns",ai_model="qwen-turbo"} 309 -route_upstream_model_metric_llm_service_duration{ai_route="llm",ai_cluster="outbound|443||qwen.dns",ai_model="qwen-turbo"} 1955 -route_upstream_model_metric_output_token{ai_route="llm",ai_cluster="outbound|443||qwen.dns",ai_model="qwen-turbo"} 69 -``` - -#### Log -```json -{ - "ai_log":"{\"model\":\"qwen-turbo\",\"input_token\":\"10\",\"output_token\":\"69\",\"llm_first_token_duration\":\"309\",\"llm_service_duration\":\"1955\"}" -} -``` - -#### Trace -When the configuration is empty, no additional attributes will be added to the span. - -### Extract token usage information from non-openai protocols -When setting the protocol to original in ai-proxy, taking Alibaba Cloud Bailian as an example, you can make the following configuration to specify how to extract `model`, `input_token`, `output_token` - -```yaml -attributes: - - key: model - value_source: response_body - value: usage.models.0.model_id - apply_to_log: true - apply_to_span: false - - key: input_token - value_source: response_body - value: usage.models.0.input_tokens - apply_to_log: true - apply_to_span: false - - key: output_token - value_source: response_body - value: usage.models.0.output_tokens - apply_to_log: true - apply_to_span: false -``` -#### Metric - -Here is the English translation: ``` # counter, cumulative count of input tokens @@ -145,7 +103,47 @@ irate(route_upstream_model_consumer_metric_llm_duration_count[2m]) #### Log ```json { - "ai_log": "{\"model\":\"qwen-max\",\"input_token\":\"343\",\"output_token\":\"153\",\"llm_service_duration\":\"19110\"}" + "ai_log":"{\"model\":\"qwen-turbo\",\"input_token\":\"10\",\"output_token\":\"69\",\"llm_first_token_duration\":\"309\",\"llm_service_duration\":\"1955\"}" +} +``` + +#### Trace +When the configuration is empty, no additional attributes will be added to the span. + +### Extract token usage information from non-openai protocols +When setting the protocol to original in ai-proxy, taking Alibaba Cloud Bailian as an example, you can make the following configuration to specify how to extract `model`, `input_token`, `output_token` + +```yaml +attributes: + - key: model + value_source: response_body + value: usage.models.0.model_id + apply_to_log: true + apply_to_span: false + - key: input_token + value_source: response_body + value: usage.models.0.input_tokens + apply_to_log: true + apply_to_span: false + - key: output_token + value_source: response_body + value: usage.models.0.output_tokens + apply_to_log: true + apply_to_span: false +``` +#### Metric + +``` +route_upstream_model_consumer_metric_input_token{ai_route="bailian",ai_cluster="qwen",ai_model="qwen-max"} 343 +route_upstream_model_consumer_metric_output_token{ai_route="bailian",ai_cluster="qwen",ai_model="qwen-max"} 153 +route_upstream_model_consumer_metric_llm_service_duration{ai_route="bailian",ai_cluster="qwen",ai_model="qwen-max"} 3725 +route_upstream_model_consumer_metric_llm_duration_count{ai_route="bailian",ai_cluster="qwen",ai_model="qwen-max"} 1 +``` + +#### Log +```json +{ + "ai_log": "{\"model\":\"qwen-max\",\"input_token\":\"343\",\"output_token\":\"153\",\"llm_service_duration\":\"19110\"}" } ``` @@ -164,7 +162,7 @@ attributes: ### Record questions and answers ```yaml attributes: - - key: question + - key: question value_source: request_body value: messages.@reverse.0.content apply_to_log: true