feat: ai-token-ratelimit support setting global rate limit thresholds for routes (#2667)

2026-06-04 01:57:26 +08:00 · 2025-07-28 08:14:35 +08:00
parent e6e4193679
commit 6a1557f6ac
14 changed files with 981 additions and 564 deletions
--- a/plugins/wasm-go/extensions/ai-token-ratelimit/main.go
+++ b/plugins/wasm-go/extensions/ai-token-ratelimit/main.go
@@ -21,6 +21,8 @@ import (
 	"strconv"
 	"strings"

+	"ai-token-ratelimit/config"
+	"ai-token-ratelimit/util"
 	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
 	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
 	"github.com/higress-group/wasm-go/pkg/log"
@@ -42,8 +44,12 @@ func init() {
 }

 const (
-	ClusterRateLimitFormat        string = "higress-token-ratelimit:%s:%s:%d:%d:%s:%s" // ruleName, limitType, timewindow, windowsize, key, val
-	RequestPhaseFixedWindowScript string = `
+	RedisKeyPrefix string = "higress-token-ratelimit"
+	// AiTokenGlobalRateLimitFormat  全局限流模式 redis key 为 RedisKeyPrefix:限流规则名称:global_threshold:时间窗口:窗口内限流数
+	AiTokenGlobalRateLimitFormat = RedisKeyPrefix + ":%s:global_threshold:%d:%d"
+	// AiTokenRateLimitFormat 规则限流模式 redis key 为 RedisKeyPrefix:限流规则名称:限流类型:时间窗口:窗口内限流数:限流key名称:限流key对应的实际值
+	AiTokenRateLimitFormat        = RedisKeyPrefix + ":%s:%s:%d:%d:%s:%s"
+	RequestPhaseFixedWindowScript = `
 	local ttl = redis.call('ttl', KEYS[1])
 	if ttl < 0 then
 	redis.call('set', KEYS[1], ARGV[1], 'EX', ARGV[2])
@@ -51,7 +57,7 @@ const (
 	end
 	return {ARGV[1], redis.call('get', KEYS[1]), ttl}
 	`
-	ResponsePhaseFixedWindowScript string = `
+	ResponsePhaseFixedWindowScript = `
 	local ttl = redis.call('ttl', KEYS[1])
 	if ttl < 0 then
 	redis.call('set', KEYS[1], ARGV[1]-ARGV[3], 'EX', ARGV[2])
@@ -60,14 +66,11 @@ const (
 	return {ARGV[1], redis.call('decrby', KEYS[1], ARGV[3]), ttl}
 	`

-	LimitRedisContextKey string = "LimitRedisContext"
+	LimitRedisContextKey = "LimitRedisContext"

-	ConsumerHeader string = "x-mse-consumer" // LimitByConsumer从该request header获取consumer的名字
-	CookieHeader   string = "cookie"
+	CookieHeader = "cookie"

-	RateLimitLimitHeader     string = "X-TokenRateLimit-Limit"     // 限制的总请求数
-	RateLimitRemainingHeader string = "X-TokenRateLimit-Remaining" // 剩余还可以发送的请求数
-	RateLimitResetHeader     string = "X-TokenRateLimit-Reset"     // 限流重置时间（触发限流时返回）
+	RateLimitResetHeader = "X-TokenRateLimit-Reset" // 限流重置时间（触发限流时返回）

 	TokenRateLimitCount = "token_ratelimit_count" // metric name
 )
@@ -84,42 +87,52 @@ type LimitRedisContext struct {
 	window int64
 }

-func parseConfig(json gjson.Result, config *ClusterKeyRateLimitConfig) error {
-	err := initRedisClusterClient(json, config)
+func parseConfig(json gjson.Result, cfg *config.AiTokenRateLimitConfig) error {
+	err := config.InitRedisClusterClient(json, cfg)
 	if err != nil {
 		return err
 	}
-	err = parseClusterKeyRateLimitConfig(json, config)
+	err = config.ParseAiTokenRateLimitConfig(json, cfg)
 	if err != nil {
 		return err
 	}
 	// Metric settings
-	config.counterMetrics = make(map[string]proxywasm.MetricCounter)
+	cfg.CounterMetrics = make(map[string]proxywasm.MetricCounter)
 	return nil
 }

-func onHttpRequestHeaders(ctx wrapper.HttpContext, config ClusterKeyRateLimitConfig) types.Action {
+func onHttpRequestHeaders(ctx wrapper.HttpContext, cfg config.AiTokenRateLimitConfig) types.Action {
 	ctx.DisableReroute()
-	// 判断是否命中限流规则
-	val, ruleItem, configItem := checkRequestAgainstLimitRule(ctx, config.ruleItems)
-	if ruleItem == nil || configItem == nil {
-		return types.ActionContinue
+	limitKey, count, timeWindow := "", int64(0), int64(0)
+
+	if cfg.GlobalThreshold != nil {
+		// 全局限流模式
+		limitKey = fmt.Sprintf(AiTokenGlobalRateLimitFormat, cfg.RuleName, cfg.GlobalThreshold.TimeWindow, cfg.GlobalThreshold.Count)
+		count = cfg.GlobalThreshold.Count
+		timeWindow = cfg.GlobalThreshold.TimeWindow
+	} else {
+		// 规则限流模式
+		val, ruleItem, configItem := checkRequestAgainstLimitRule(ctx, cfg.RuleItems)
+		if ruleItem == nil || configItem == nil {
+			// 没有匹配到限流规则直接返回
+			return types.ActionContinue
+		}
+
+		limitKey = fmt.Sprintf(AiTokenRateLimitFormat, cfg.RuleName, ruleItem.LimitType, configItem.TimeWindow, configItem.Count, ruleItem.Key, val)
+		count = configItem.Count
+		timeWindow = configItem.TimeWindow
 	}

-	// 构建redis限流key和参数
-	limitKey := fmt.Sprintf(ClusterRateLimitFormat, config.ruleName, ruleItem.limitType, configItem.timeWindow, configItem.count, ruleItem.key, val)
-	keys := []interface{}{limitKey}
-	args := []interface{}{configItem.count, configItem.timeWindow}
-
-	limitRedisContext := LimitRedisContext{
+	ctx.SetContext(LimitRedisContextKey, LimitRedisContext{
 		key:    limitKey,
-		count:  configItem.count,
-		window: configItem.timeWindow,
-	}
-	ctx.SetContext(LimitRedisContextKey, limitRedisContext)
+		count:  count,
+		window: timeWindow,
+	})

 	// 执行限流逻辑
-	err := config.redisClient.Eval(RequestPhaseFixedWindowScript, 1, keys, args, func(response resp.Value) {
+	keys := []interface{}{limitKey}
+	args := []interface{}{count, timeWindow}
+	err := cfg.RedisClient.Eval(RequestPhaseFixedWindowScript, 1, keys, args, func(response resp.Value) {
 		resultArray := response.Array()
 		if len(resultArray) != 3 {
 			log.Errorf("redis response parse error, response: %v", response)
@@ -135,7 +148,7 @@ func onHttpRequestHeaders(ctx wrapper.HttpContext, config ClusterKeyRateLimitCon
 			// 触发限流
 			ctx.SetUserAttribute("token_ratelimit_status", "limited")
 			ctx.WriteUserAttributeToLogWithKey(wrapper.AILogKey)
-			rejected(config, context)
+			rejected(cfg, context)
 		} else {
 			proxywasm.ResumeHttpRequest()
 		}
@@ -147,7 +160,7 @@ func onHttpRequestHeaders(ctx wrapper.HttpContext, config ClusterKeyRateLimitCon
 	return types.HeaderStopAllIterationAndWatermark
 }

-func onHttpStreamingBody(ctx wrapper.HttpContext, config ClusterKeyRateLimitConfig, data []byte, endOfStream bool) []byte {
+func onHttpStreamingBody(ctx wrapper.HttpContext, cfg config.AiTokenRateLimitConfig, data []byte, endOfStream bool) []byte {
 	if usage := tokenusage.GetTokenUsage(ctx, data); usage.TotalToken > 0 {
 		ctx.SetContext(tokenusage.CtxKeyInputToken, usage.InputToken)
 		ctx.SetContext(tokenusage.CtxKeyOutputToken, usage.OutputToken)
@@ -164,7 +177,7 @@ func onHttpStreamingBody(ctx wrapper.HttpContext, config ClusterKeyRateLimitConf
 		}
 		keys := []interface{}{limitRedisContext.key}
 		args := []interface{}{limitRedisContext.count, limitRedisContext.window, inputToken + outputToken}
-		err := config.redisClient.Eval(ResponsePhaseFixedWindowScript, 1, keys, args, nil)
+		err := cfg.RedisClient.Eval(ResponsePhaseFixedWindowScript, 1, keys, args, nil)
 		if err != nil {
 			log.Errorf("redis call failed: %v", err)
 		}
@@ -172,27 +185,29 @@ func onHttpStreamingBody(ctx wrapper.HttpContext, config ClusterKeyRateLimitConf
 	return data
 }

-func checkRequestAgainstLimitRule(ctx wrapper.HttpContext, ruleItems []LimitRuleItem) (string, *LimitRuleItem, *LimitConfigItem) {
-	for _, rule := range ruleItems {
-		val, ruleItem, configItem := hitRateRuleItem(ctx, rule)
-		if ruleItem != nil && configItem != nil {
-			return val, ruleItem, configItem
+func checkRequestAgainstLimitRule(ctx wrapper.HttpContext, ruleItems []config.LimitRuleItem) (string, *config.LimitRuleItem, *config.LimitConfigItem) {
+	if len(ruleItems) > 0 {
+		for _, rule := range ruleItems {
+			val, ruleItem, configItem := hitRateRuleItem(ctx, rule)
+			if ruleItem != nil && configItem != nil {
+				return val, ruleItem, configItem
+			}
 		}
 	}
 	return "", nil, nil
 }

-func hitRateRuleItem(ctx wrapper.HttpContext, rule LimitRuleItem) (string, *LimitRuleItem, *LimitConfigItem) {
-	switch rule.limitType {
+func hitRateRuleItem(ctx wrapper.HttpContext, rule config.LimitRuleItem) (string, *config.LimitRuleItem, *config.LimitConfigItem) {
+	switch rule.LimitType {
 	// 根据HTTP请求头限流
-	case limitByHeaderType, limitByPerHeaderType:
-		val, err := proxywasm.GetHttpRequestHeader(rule.key)
+	case config.LimitByHeaderType, config.LimitByPerHeaderType:
+		val, err := proxywasm.GetHttpRequestHeader(rule.Key)
 		if err != nil {
-			return logDebugAndReturnEmpty("failed to get request header %s: %v", rule.key, err)
+			return logDebugAndReturnEmpty("failed to get request header %s: %v", rule.Key, err)
 		}
-		return val, &rule, findMatchingItem(rule.limitType, rule.configItems, val)
+		return val, &rule, findMatchingItem(rule.LimitType, rule.ConfigItems, val)
 	// 根据HTTP请求参数限流
-	case limitByParamType, limitByPerParamType:
+	case config.LimitByParamType, config.LimitByPerParamType:
 		parse, err := url.Parse(ctx.Path())
 		if err != nil {
 			return logDebugAndReturnEmpty("failed to parse request path: %v", err)
@@ -201,38 +216,38 @@ func hitRateRuleItem(ctx wrapper.HttpContext, rule LimitRuleItem) (string, *Limi
 		if err != nil {
 			return logDebugAndReturnEmpty("failed to parse query params: %v", err)
 		}
-		val, ok := query[rule.key]
+		val, ok := query[rule.Key]
 		if !ok {
-			return logDebugAndReturnEmpty("request param %s is empty", rule.key)
+			return logDebugAndReturnEmpty("request param %s is empty", rule.Key)
 		}
-		return val[0], &rule, findMatchingItem(rule.limitType, rule.configItems, val[0])
+		return val[0], &rule, findMatchingItem(rule.LimitType, rule.ConfigItems, val[0])
 	// 根据consumer限流
-	case limitByConsumerType, limitByPerConsumerType:
-		val, err := proxywasm.GetHttpRequestHeader(ConsumerHeader)
+	case config.LimitByConsumerType, config.LimitByPerConsumerType:
+		val, err := proxywasm.GetHttpRequestHeader(util.ConsumerHeader)
 		if err != nil {
-			return logDebugAndReturnEmpty("failed to get request header %s: %v", ConsumerHeader, err)
+			return logDebugAndReturnEmpty("failed to get request header %s: %v", util.ConsumerHeader, err)
 		}
-		return val, &rule, findMatchingItem(rule.limitType, rule.configItems, val)
+		return val, &rule, findMatchingItem(rule.LimitType, rule.ConfigItems, val)
 	// 根据cookie中key值限流
-	case limitByCookieType, limitByPerCookieType:
+	case config.LimitByCookieType, config.LimitByPerCookieType:
 		cookie, err := proxywasm.GetHttpRequestHeader(CookieHeader)
 		if err != nil {
 			return logDebugAndReturnEmpty("failed to get request cookie : %v", err)
 		}
-		val := extractCookieValueByKey(cookie, rule.key)
+		val := util.ExtractCookieValueByKey(cookie, rule.Key)
 		if val == "" {
-			return logDebugAndReturnEmpty("cookie key '%s' extracted from cookie '%s' is empty.", rule.key, cookie)
+			return logDebugAndReturnEmpty("cookie key '%s' extracted from cookie '%s' is empty.", rule.Key, cookie)
 		}
-		return val, &rule, findMatchingItem(rule.limitType, rule.configItems, val)
+		return val, &rule, findMatchingItem(rule.LimitType, rule.ConfigItems, val)
 	// 根据客户端IP限流
-	case limitByPerIpType:
+	case config.LimitByPerIpType:
 		realIp, err := getDownStreamIp(rule)
 		if err != nil {
 			log.Warnf("failed to get down stream ip: %v", err)
 			return "", &rule, nil
 		}
-		for _, item := range rule.configItems {
-			if _, found, _ := item.ipNet.Get(realIp); !found {
+		for _, item := range rule.ConfigItems {
+			if _, found, _ := item.IpNet.Get(realIp); !found {
 				continue
 			}
 			return realIp.String(), &rule, &item
@@ -241,37 +256,37 @@ func hitRateRuleItem(ctx wrapper.HttpContext, rule LimitRuleItem) (string, *Limi
 	return "", nil, nil
 }

-func logDebugAndReturnEmpty(errMsg string, args ...interface{}) (string, *LimitRuleItem, *LimitConfigItem) {
+func logDebugAndReturnEmpty(errMsg string, args ...interface{}) (string, *config.LimitRuleItem, *config.LimitConfigItem) {
 	log.Debugf(errMsg, args...)
 	return "", nil, nil
 }

-func findMatchingItem(limitType limitRuleItemType, items []LimitConfigItem, key string) *LimitConfigItem {
+func findMatchingItem(limitType config.LimitRuleItemType, items []config.LimitConfigItem, key string) *config.LimitConfigItem {
 	for _, item := range items {
 		// per类型,检查allType和regexpType
-		if limitType == limitByPerHeaderType ||
-			limitType == limitByPerParamType ||
-			limitType == limitByPerConsumerType ||
-			limitType == limitByPerCookieType {
-			if item.configType == allType || (item.configType == regexpType && item.regexp.MatchString(key)) {
+		if limitType == config.LimitByPerHeaderType ||
+			limitType == config.LimitByPerParamType ||
+			limitType == config.LimitByPerConsumerType ||
+			limitType == config.LimitByPerCookieType {
+			if item.ConfigType == config.AllType || (item.ConfigType == config.RegexpType && item.Regexp.MatchString(key)) {
 				return &item
 			}
 		}
 		// 其他类型,直接比较key
-		if item.key == key {
+		if item.Key == key {
 			return &item
 		}
 	}
 	return nil
 }

-func getDownStreamIp(rule LimitRuleItem) (net.IP, error) {
+func getDownStreamIp(rule config.LimitRuleItem) (net.IP, error) {
 	var (
 		realIpStr string
 		err       error
 	)
-	if rule.limitByPerIp.sourceType == HeaderSourceType {
-		realIpStr, err = proxywasm.GetHttpRequestHeader(rule.limitByPerIp.headerName)
+	if rule.LimitByPerIp.SourceType == config.HeaderSourceType {
+		realIpStr, err = proxywasm.GetHttpRequestHeader(rule.LimitByPerIp.HeaderName)
 		if err == nil {
 			realIpStr = strings.Split(strings.Trim(realIpStr, " "), ",")[0]
 		}
@@ -283,7 +298,7 @@ func getDownStreamIp(rule LimitRuleItem) (net.IP, error) {
 	if err != nil {
 		return nil, err
 	}
-	ip := parseIP(realIpStr)
+	ip := util.ParseIP(realIpStr)
 	realIP := net.ParseIP(ip)
 	if realIP == nil {
 		return nil, fmt.Errorf("invalid ip[%s]", ip)
@@ -291,54 +306,18 @@ func getDownStreamIp(rule LimitRuleItem) (net.IP, error) {
 	return realIP, nil
 }

-func (config *ClusterKeyRateLimitConfig) incrementCounter(metricName string, inc uint64) {
-	if inc == 0 {
-		return
-	}
-	counter, ok := config.counterMetrics[metricName]
-	if !ok {
-		counter = proxywasm.DefineCounterMetric(metricName)
-		config.counterMetrics[metricName] = counter
-	}
-	counter.Increment(inc)
-}
-
 func generateMetricName(route, cluster, model, consumer, metricName string) string {
 	return fmt.Sprintf("route.%s.upstream.%s.model.%s.consumer.%s.metric.%s", route, cluster, model, consumer, metricName)
 }

-func getRouteName() (string, error) {
-	if raw, err := proxywasm.GetProperty([]string{"route_name"}); err != nil {
-		return "-", err
-	} else {
-		return string(raw), nil
-	}
-}
-
-func getClusterName() (string, error) {
-	if raw, err := proxywasm.GetProperty([]string{"cluster_name"}); err != nil {
-		return "-", err
-	} else {
-		return string(raw), nil
-	}
-}
-
-func getConsumer() (string, error) {
-	if consumer, err := proxywasm.GetHttpRequestHeader(ConsumerHeader); err != nil {
-		return "none", err
-	} else {
-		return consumer, nil
-	}
-}
-
-func rejected(config ClusterKeyRateLimitConfig, context LimitContext) {
+func rejected(cfg config.AiTokenRateLimitConfig, context LimitContext) {
 	headers := make(map[string][]string)
 	headers[RateLimitResetHeader] = []string{strconv.Itoa(context.reset)}
 	_ = proxywasm.SendHttpResponseWithDetail(
-		config.rejectedCode, "ai-token-ratelimit.rejected", reconvertHeaders(headers), []byte(config.rejectedMsg), -1)
+		cfg.RejectedCode, "ai-token-ratelimit.rejected", util.ReconvertHeaders(headers), []byte(cfg.RejectedMsg), -1)

-	route, _ := getRouteName()
-	cluster, _ := getClusterName()
-	consumer, _ := getConsumer()
-	config.incrementCounter(generateMetricName(route, cluster, "none", consumer, TokenRateLimitCount), 1)
+	route, _ := util.GetRouteName()
+	cluster, _ := util.GetClusterName()
+	consumer, _ := util.GetConsumer()
+	cfg.IncrementCounter(generateMetricName(route, cluster, "none", consumer, TokenRateLimitCount), 1)
 }