Files
higress/plugins/wasm-go/extensions/ai-token-ratelimit/main.go

324 lines
11 KiB
Go

// Copyright (c) 2024 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"fmt"
"net"
"net/url"
"strconv"
"strings"
"ai-token-ratelimit/config"
"ai-token-ratelimit/util"
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
"github.com/higress-group/wasm-go/pkg/log"
"github.com/higress-group/wasm-go/pkg/tokenusage"
"github.com/higress-group/wasm-go/pkg/wrapper"
"github.com/tidwall/gjson"
"github.com/tidwall/resp"
)
func main() {}
func init() {
wrapper.SetCtx(
"ai-token-ratelimit",
wrapper.ParseConfig(parseConfig),
wrapper.ProcessRequestHeaders(onHttpRequestHeaders),
wrapper.ProcessStreamingResponseBody(onHttpStreamingBody),
)
}
const (
RedisKeyPrefix string = "higress-token-ratelimit"
// AiTokenGlobalRateLimitFormat 全局限流模式 redis key 为 RedisKeyPrefix:限流规则名称:global_threshold:时间窗口:窗口内限流数
AiTokenGlobalRateLimitFormat = RedisKeyPrefix + ":%s:global_threshold:%d:%d"
// AiTokenRateLimitFormat 规则限流模式 redis key 为 RedisKeyPrefix:限流规则名称:限流类型:时间窗口:窗口内限流数:限流key名称:限流key对应的实际值
AiTokenRateLimitFormat = RedisKeyPrefix + ":%s:%s:%d:%d:%s:%s"
RequestPhaseFixedWindowScript = `
local ttl = redis.call('ttl', KEYS[1])
if ttl < 0 then
redis.call('set', KEYS[1], ARGV[1], 'EX', ARGV[2])
return {ARGV[1], ARGV[1], ARGV[2]}
end
return {ARGV[1], redis.call('get', KEYS[1]), ttl}
`
ResponsePhaseFixedWindowScript = `
local ttl = redis.call('ttl', KEYS[1])
if ttl < 0 then
redis.call('set', KEYS[1], ARGV[1]-ARGV[3], 'EX', ARGV[2])
return {ARGV[1], ARGV[1]-ARGV[3], ARGV[2]}
end
return {ARGV[1], redis.call('decrby', KEYS[1], ARGV[3]), ttl}
`
LimitRedisContextKey = "LimitRedisContext"
CookieHeader = "cookie"
RateLimitResetHeader = "X-TokenRateLimit-Reset" // 限流重置时间(触发限流时返回)
TokenRateLimitCount = "token_ratelimit_count" // metric name
)
type LimitContext struct {
count int
remaining int
reset int
}
type LimitRedisContext struct {
key string
count int64
window int64
}
func parseConfig(json gjson.Result, cfg *config.AiTokenRateLimitConfig) error {
err := config.InitRedisClusterClient(json, cfg)
if err != nil {
return err
}
err = config.ParseAiTokenRateLimitConfig(json, cfg)
if err != nil {
return err
}
// Metric settings
cfg.CounterMetrics = make(map[string]proxywasm.MetricCounter)
return nil
}
func onHttpRequestHeaders(ctx wrapper.HttpContext, cfg config.AiTokenRateLimitConfig) types.Action {
ctx.DisableReroute()
limitKey, count, timeWindow := "", int64(0), int64(0)
if cfg.GlobalThreshold != nil {
// 全局限流模式
limitKey = fmt.Sprintf(AiTokenGlobalRateLimitFormat, cfg.RuleName, cfg.GlobalThreshold.TimeWindow, cfg.GlobalThreshold.Count)
count = cfg.GlobalThreshold.Count
timeWindow = cfg.GlobalThreshold.TimeWindow
} else {
// 规则限流模式
val, ruleItem, configItem := checkRequestAgainstLimitRule(ctx, cfg.RuleItems)
if ruleItem == nil || configItem == nil {
// 没有匹配到限流规则直接返回
return types.ActionContinue
}
limitKey = fmt.Sprintf(AiTokenRateLimitFormat, cfg.RuleName, ruleItem.LimitType, configItem.TimeWindow, configItem.Count, ruleItem.Key, val)
count = configItem.Count
timeWindow = configItem.TimeWindow
}
ctx.SetContext(LimitRedisContextKey, LimitRedisContext{
key: limitKey,
count: count,
window: timeWindow,
})
// 执行限流逻辑
keys := []interface{}{limitKey}
args := []interface{}{count, timeWindow}
err := cfg.RedisClient.Eval(RequestPhaseFixedWindowScript, 1, keys, args, func(response resp.Value) {
resultArray := response.Array()
if len(resultArray) != 3 {
log.Errorf("redis response parse error, response: %v", response)
proxywasm.ResumeHttpRequest()
return
}
context := LimitContext{
count: resultArray[0].Integer(),
remaining: resultArray[1].Integer(),
reset: resultArray[2].Integer(),
}
if context.remaining < 0 {
// 触发限流
ctx.SetUserAttribute("token_ratelimit_status", "limited")
ctx.WriteUserAttributeToLogWithKey(wrapper.AILogKey)
rejected(cfg, context)
} else {
proxywasm.ResumeHttpRequest()
}
})
if err != nil {
log.Errorf("redis call failed: %v", err)
return types.ActionContinue
}
return types.HeaderStopAllIterationAndWatermark
}
func onHttpStreamingBody(ctx wrapper.HttpContext, cfg config.AiTokenRateLimitConfig, data []byte, endOfStream bool) []byte {
if usage := tokenusage.GetTokenUsage(ctx, data); usage.TotalToken > 0 {
ctx.SetContext(tokenusage.CtxKeyInputToken, usage.InputToken)
ctx.SetContext(tokenusage.CtxKeyOutputToken, usage.OutputToken)
}
if endOfStream {
if ctx.GetContext(tokenusage.CtxKeyInputToken) == nil || ctx.GetContext(tokenusage.CtxKeyOutputToken) == nil {
return data
}
inputToken := ctx.GetContext(tokenusage.CtxKeyInputToken).(int64)
outputToken := ctx.GetContext(tokenusage.CtxKeyOutputToken).(int64)
limitRedisContext, ok := ctx.GetContext(LimitRedisContextKey).(LimitRedisContext)
if !ok {
return data
}
keys := []interface{}{limitRedisContext.key}
args := []interface{}{limitRedisContext.count, limitRedisContext.window, inputToken + outputToken}
err := cfg.RedisClient.Eval(ResponsePhaseFixedWindowScript, 1, keys, args, nil)
if err != nil {
log.Errorf("redis call failed: %v", err)
}
}
return data
}
func checkRequestAgainstLimitRule(ctx wrapper.HttpContext, ruleItems []config.LimitRuleItem) (string, *config.LimitRuleItem, *config.LimitConfigItem) {
if len(ruleItems) > 0 {
for _, rule := range ruleItems {
val, ruleItem, configItem := hitRateRuleItem(ctx, rule)
if ruleItem != nil && configItem != nil {
return val, ruleItem, configItem
}
}
}
return "", nil, nil
}
func hitRateRuleItem(ctx wrapper.HttpContext, rule config.LimitRuleItem) (string, *config.LimitRuleItem, *config.LimitConfigItem) {
switch rule.LimitType {
// 根据HTTP请求头限流
case config.LimitByHeaderType, config.LimitByPerHeaderType:
val, err := proxywasm.GetHttpRequestHeader(rule.Key)
if err != nil {
return logDebugAndReturnEmpty("failed to get request header %s: %v", rule.Key, err)
}
return val, &rule, findMatchingItem(rule.LimitType, rule.ConfigItems, val)
// 根据HTTP请求参数限流
case config.LimitByParamType, config.LimitByPerParamType:
parse, err := url.Parse(ctx.Path())
if err != nil {
return logDebugAndReturnEmpty("failed to parse request path: %v", err)
}
query, err := url.ParseQuery(parse.RawQuery)
if err != nil {
return logDebugAndReturnEmpty("failed to parse query params: %v", err)
}
val, ok := query[rule.Key]
if !ok {
return logDebugAndReturnEmpty("request param %s is empty", rule.Key)
}
return val[0], &rule, findMatchingItem(rule.LimitType, rule.ConfigItems, val[0])
// 根据consumer限流
case config.LimitByConsumerType, config.LimitByPerConsumerType:
val, err := proxywasm.GetHttpRequestHeader(util.ConsumerHeader)
if err != nil {
return logDebugAndReturnEmpty("failed to get request header %s: %v", util.ConsumerHeader, err)
}
return val, &rule, findMatchingItem(rule.LimitType, rule.ConfigItems, val)
// 根据cookie中key值限流
case config.LimitByCookieType, config.LimitByPerCookieType:
cookie, err := proxywasm.GetHttpRequestHeader(CookieHeader)
if err != nil {
return logDebugAndReturnEmpty("failed to get request cookie : %v", err)
}
val := util.ExtractCookieValueByKey(cookie, rule.Key)
if val == "" {
return logDebugAndReturnEmpty("cookie key '%s' extracted from cookie '%s' is empty.", rule.Key, cookie)
}
return val, &rule, findMatchingItem(rule.LimitType, rule.ConfigItems, val)
// 根据客户端IP限流
case config.LimitByPerIpType:
realIp, err := getDownStreamIp(rule)
if err != nil {
log.Warnf("failed to get down stream ip: %v", err)
return "", &rule, nil
}
for _, item := range rule.ConfigItems {
if _, found, _ := item.IpNet.Get(realIp); !found {
continue
}
return realIp.String(), &rule, &item
}
}
return "", nil, nil
}
func logDebugAndReturnEmpty(errMsg string, args ...interface{}) (string, *config.LimitRuleItem, *config.LimitConfigItem) {
log.Debugf(errMsg, args...)
return "", nil, nil
}
func findMatchingItem(limitType config.LimitRuleItemType, items []config.LimitConfigItem, key string) *config.LimitConfigItem {
for _, item := range items {
// per类型,检查allType和regexpType
if limitType == config.LimitByPerHeaderType ||
limitType == config.LimitByPerParamType ||
limitType == config.LimitByPerConsumerType ||
limitType == config.LimitByPerCookieType {
if item.ConfigType == config.AllType || (item.ConfigType == config.RegexpType && item.Regexp.MatchString(key)) {
return &item
}
}
// 其他类型,直接比较key
if item.Key == key {
return &item
}
}
return nil
}
func getDownStreamIp(rule config.LimitRuleItem) (net.IP, error) {
var (
realIpStr string
err error
)
if rule.LimitByPerIp.SourceType == config.HeaderSourceType {
realIpStr, err = proxywasm.GetHttpRequestHeader(rule.LimitByPerIp.HeaderName)
if err == nil {
realIpStr = strings.Split(strings.Trim(realIpStr, " "), ",")[0]
}
} else {
var bs []byte
bs, err = proxywasm.GetProperty([]string{"source", "address"})
realIpStr = string(bs)
}
if err != nil {
return nil, err
}
ip := util.ParseIP(realIpStr)
realIP := net.ParseIP(ip)
if realIP == nil {
return nil, fmt.Errorf("invalid ip[%s]", ip)
}
return realIP, nil
}
func generateMetricName(route, cluster, model, consumer, metricName string) string {
return fmt.Sprintf("route.%s.upstream.%s.model.%s.consumer.%s.metric.%s", route, cluster, model, consumer, metricName)
}
func rejected(cfg config.AiTokenRateLimitConfig, context LimitContext) {
headers := make(map[string][]string)
headers[RateLimitResetHeader] = []string{strconv.Itoa(context.reset)}
_ = proxywasm.SendHttpResponseWithDetail(
cfg.RejectedCode, "ai-token-ratelimit.rejected", util.ReconvertHeaders(headers), []byte(cfg.RejectedMsg), -1)
route, _ := util.GetRouteName()
cluster, _ := util.GetClusterName()
consumer, _ := util.GetConsumer()
cfg.IncrementCounter(generateMetricName(route, cluster, "none", consumer, TokenRateLimitCount), 1)
}