mirror of
https://github.com/alibaba/higress.git
synced 2026-05-28 14:47:29 +08:00
feat: Support statistics images/audio/responses API Token usage (#2542)
Signed-off-by: Xijun Dai <daixijun1990@gmail.com>
This commit is contained in:
@@ -1,7 +1,6 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
@@ -10,13 +9,15 @@ import (
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-quota/util"
|
||||
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
|
||||
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
|
||||
"github.com/higress-group/wasm-go/pkg/log"
|
||||
"github.com/higress-group/wasm-go/pkg/tokenusage"
|
||||
"github.com/higress-group/wasm-go/pkg/wrapper"
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/resp"
|
||||
|
||||
"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-quota/util"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -45,10 +46,10 @@ func main() {}
|
||||
func init() {
|
||||
wrapper.SetCtx(
|
||||
pluginName,
|
||||
wrapper.ParseConfigBy(parseConfig),
|
||||
wrapper.ProcessRequestHeadersBy(onHttpRequestHeaders),
|
||||
wrapper.ProcessRequestBodyBy(onHttpRequestBody),
|
||||
wrapper.ProcessStreamingResponseBodyBy(onHttpStreamingResponseBody),
|
||||
wrapper.ParseConfig(parseConfig),
|
||||
wrapper.ProcessRequestHeaders(onHttpRequestHeaders),
|
||||
wrapper.ProcessRequestBody(onHttpRequestBody),
|
||||
wrapper.ProcessStreamingResponseBody(onHttpStreamingResponseBody),
|
||||
)
|
||||
}
|
||||
|
||||
@@ -75,7 +76,7 @@ type RedisInfo struct {
|
||||
Database int `required:"false" yaml:"database" json:"database"`
|
||||
}
|
||||
|
||||
func parseConfig(json gjson.Result, config *QuotaConfig, log log.Log) error {
|
||||
func parseConfig(json gjson.Result, config *QuotaConfig) error {
|
||||
log.Debugf("parse config()")
|
||||
// admin
|
||||
config.AdminPath = json.Get("admin_path").String()
|
||||
@@ -129,7 +130,7 @@ func parseConfig(json gjson.Result, config *QuotaConfig, log log.Log) error {
|
||||
return config.redisClient.Init(username, password, int64(timeout), wrapper.WithDataBase(database))
|
||||
}
|
||||
|
||||
func onHttpRequestHeaders(context wrapper.HttpContext, config QuotaConfig, log log.Log) types.Action {
|
||||
func onHttpRequestHeaders(context wrapper.HttpContext, config QuotaConfig) types.Action {
|
||||
log.Debugf("onHttpRequestHeaders()")
|
||||
// get tokens
|
||||
consumer, err := proxywasm.GetHttpRequestHeader("x-mse-consumer")
|
||||
@@ -142,7 +143,7 @@ func onHttpRequestHeaders(context wrapper.HttpContext, config QuotaConfig, log l
|
||||
|
||||
rawPath := context.Path()
|
||||
path, _ := url.Parse(rawPath)
|
||||
chatMode, adminMode := getOperationMode(path.Path, config.AdminPath, log)
|
||||
chatMode, adminMode := getOperationMode(path.Path, config.AdminPath)
|
||||
context.SetContext("chatMode", chatMode)
|
||||
context.SetContext("adminMode", adminMode)
|
||||
context.SetContext("consumer", consumer)
|
||||
@@ -153,7 +154,7 @@ func onHttpRequestHeaders(context wrapper.HttpContext, config QuotaConfig, log l
|
||||
if chatMode == ChatModeAdmin {
|
||||
// query quota
|
||||
if adminMode == AdminModeQuery {
|
||||
return queryQuota(context, config, consumer, path, log)
|
||||
return queryQuota(context, config, consumer, path)
|
||||
}
|
||||
if adminMode == AdminModeRefresh || adminMode == AdminModeDelta {
|
||||
context.BufferRequestBody()
|
||||
@@ -186,7 +187,7 @@ func onHttpRequestHeaders(context wrapper.HttpContext, config QuotaConfig, log l
|
||||
return types.HeaderStopAllIterationAndWatermark
|
||||
}
|
||||
|
||||
func onHttpRequestBody(ctx wrapper.HttpContext, config QuotaConfig, body []byte, log log.Log) types.Action {
|
||||
func onHttpRequestBody(ctx wrapper.HttpContext, config QuotaConfig, body []byte) types.Action {
|
||||
log.Debugf("onHttpRequestBody()")
|
||||
chatMode, ok := ctx.GetContext("chatMode").(ChatMode)
|
||||
if !ok {
|
||||
@@ -205,16 +206,16 @@ func onHttpRequestBody(ctx wrapper.HttpContext, config QuotaConfig, body []byte,
|
||||
}
|
||||
|
||||
if adminMode == AdminModeRefresh {
|
||||
return refreshQuota(ctx, config, adminConsumer, string(body), log)
|
||||
return refreshQuota(ctx, config, adminConsumer, string(body))
|
||||
}
|
||||
if adminMode == AdminModeDelta {
|
||||
return deltaQuota(ctx, config, adminConsumer, string(body), log)
|
||||
return deltaQuota(ctx, config, adminConsumer, string(body))
|
||||
}
|
||||
|
||||
return types.ActionContinue
|
||||
}
|
||||
|
||||
func onHttpStreamingResponseBody(ctx wrapper.HttpContext, config QuotaConfig, data []byte, endOfStream bool, log log.Log) []byte {
|
||||
func onHttpStreamingResponseBody(ctx wrapper.HttpContext, config QuotaConfig, data []byte, endOfStream bool) []byte {
|
||||
chatMode, ok := ctx.GetContext("chatMode").(ChatMode)
|
||||
if !ok {
|
||||
return data
|
||||
@@ -222,11 +223,9 @@ func onHttpStreamingResponseBody(ctx wrapper.HttpContext, config QuotaConfig, da
|
||||
if chatMode == ChatModeNone || chatMode == ChatModeAdmin {
|
||||
return data
|
||||
}
|
||||
var inputToken, outputToken int64
|
||||
var consumer string
|
||||
if inputToken, outputToken, ok := getUsage(data); ok {
|
||||
ctx.SetContext("input_token", inputToken)
|
||||
ctx.SetContext("output_token", outputToken)
|
||||
if usage := tokenusage.GetTokenUsage(ctx, data); usage.TotalToken > 0 {
|
||||
ctx.SetContext(tokenusage.CtxKeyInputToken, usage.InputToken)
|
||||
ctx.SetContext(tokenusage.CtxKeyOutputToken, usage.OutputToken)
|
||||
}
|
||||
|
||||
// chat completion mode
|
||||
@@ -234,39 +233,19 @@ func onHttpStreamingResponseBody(ctx wrapper.HttpContext, config QuotaConfig, da
|
||||
return data
|
||||
}
|
||||
|
||||
if ctx.GetContext("input_token") == nil || ctx.GetContext("output_token") == nil || ctx.GetContext("consumer") == nil {
|
||||
if ctx.GetContext(tokenusage.CtxKeyInputToken) == nil || ctx.GetContext(tokenusage.CtxKeyOutputToken) == nil || ctx.GetContext("consumer") == nil {
|
||||
return data
|
||||
}
|
||||
|
||||
inputToken = ctx.GetContext("input_token").(int64)
|
||||
outputToken = ctx.GetContext("output_token").(int64)
|
||||
consumer = ctx.GetContext("consumer").(string)
|
||||
inputToken := ctx.GetContext(tokenusage.CtxKeyInputToken).(int64)
|
||||
outputToken := ctx.GetContext(tokenusage.CtxKeyOutputToken).(int64)
|
||||
consumer := ctx.GetContext("consumer").(string)
|
||||
totalToken := int(inputToken + outputToken)
|
||||
log.Debugf("update consumer:%s, totalToken:%d", consumer, totalToken)
|
||||
config.redisClient.DecrBy(config.RedisKeyPrefix+consumer, totalToken, nil)
|
||||
return data
|
||||
}
|
||||
|
||||
func getUsage(data []byte) (inputTokenUsage int64, outputTokenUsage int64, ok bool) {
|
||||
chunks := bytes.Split(bytes.TrimSpace(data), []byte("\n\n"))
|
||||
for _, chunk := range chunks {
|
||||
// the feature strings are used to identify the usage data, like:
|
||||
// {"model":"gpt2","usage":{"prompt_tokens":1,"completion_tokens":1}}
|
||||
if !bytes.Contains(chunk, []byte("prompt_tokens")) || !bytes.Contains(chunk, []byte("completion_tokens")) {
|
||||
continue
|
||||
}
|
||||
inputTokenObj := gjson.GetBytes(chunk, "usage.prompt_tokens")
|
||||
outputTokenObj := gjson.GetBytes(chunk, "usage.completion_tokens")
|
||||
if inputTokenObj.Exists() && outputTokenObj.Exists() {
|
||||
inputTokenUsage = inputTokenObj.Int()
|
||||
outputTokenUsage = outputTokenObj.Int()
|
||||
ok = true
|
||||
return
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func deniedNoKeyAuthData() types.Action {
|
||||
util.SendResponse(http.StatusUnauthorized, "ai-quota.no_key", "text/plain", "Request denied by ai quota check. No Key Authentication information found.")
|
||||
return types.ActionContinue
|
||||
@@ -277,7 +256,7 @@ func deniedUnauthorizedConsumer() types.Action {
|
||||
return types.ActionContinue
|
||||
}
|
||||
|
||||
func getOperationMode(path string, adminPath string, log log.Log) (ChatMode, AdminMode) {
|
||||
func getOperationMode(path string, adminPath string) (ChatMode, AdminMode) {
|
||||
fullAdminPath := "/v1/chat/completions" + adminPath
|
||||
if strings.HasSuffix(path, fullAdminPath+"/refresh") {
|
||||
return ChatModeAdmin, AdminModeRefresh
|
||||
@@ -294,7 +273,7 @@ func getOperationMode(path string, adminPath string, log log.Log) (ChatMode, Adm
|
||||
return ChatModeNone, AdminModeNone
|
||||
}
|
||||
|
||||
func refreshQuota(ctx wrapper.HttpContext, config QuotaConfig, adminConsumer string, body string, log log.Log) types.Action {
|
||||
func refreshQuota(ctx wrapper.HttpContext, config QuotaConfig, adminConsumer string, body string) types.Action {
|
||||
// check consumer
|
||||
if adminConsumer != config.AdminConsumer {
|
||||
util.SendResponse(http.StatusForbidden, "ai-quota.unauthorized", "text/plain", "Request denied by ai quota check. Unauthorized admin consumer.")
|
||||
@@ -328,7 +307,8 @@ func refreshQuota(ctx wrapper.HttpContext, config QuotaConfig, adminConsumer str
|
||||
|
||||
return types.ActionPause
|
||||
}
|
||||
func queryQuota(ctx wrapper.HttpContext, config QuotaConfig, adminConsumer string, url *url.URL, log log.Log) types.Action {
|
||||
|
||||
func queryQuota(ctx wrapper.HttpContext, config QuotaConfig, adminConsumer string, url *url.URL) types.Action {
|
||||
// check consumer
|
||||
if adminConsumer != config.AdminConsumer {
|
||||
util.SendResponse(http.StatusForbidden, "ai-quota.unauthorized", "text/plain", "Request denied by ai quota check. Unauthorized admin consumer.")
|
||||
@@ -371,7 +351,8 @@ func queryQuota(ctx wrapper.HttpContext, config QuotaConfig, adminConsumer strin
|
||||
}
|
||||
return types.ActionPause
|
||||
}
|
||||
func deltaQuota(ctx wrapper.HttpContext, config QuotaConfig, adminConsumer string, body string, log log.Log) types.Action {
|
||||
|
||||
func deltaQuota(ctx wrapper.HttpContext, config QuotaConfig, adminConsumer string, body string) types.Action {
|
||||
// check consumer
|
||||
if adminConsumer != config.AdminConsumer {
|
||||
util.SendResponse(http.StatusForbidden, "ai-quota.unauthorized", "text/plain", "Request denied by ai quota check. Unauthorized admin consumer.")
|
||||
|
||||
Reference in New Issue
Block a user