feat: ai-token-ratelimit support setting global rate limit thresholds for routes​ (#2667)

This commit is contained in:
韩贤涛
2025-07-28 08:14:35 +08:00
committed by GitHub
parent e6e4193679
commit 6a1557f6ac
14 changed files with 981 additions and 564 deletions

View File

@@ -21,6 +21,8 @@ import (
"strconv"
"strings"
"ai-token-ratelimit/config"
"ai-token-ratelimit/util"
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
"github.com/higress-group/wasm-go/pkg/log"
@@ -42,8 +44,12 @@ func init() {
}
const (
ClusterRateLimitFormat string = "higress-token-ratelimit:%s:%s:%d:%d:%s:%s" // ruleName, limitType, timewindow, windowsize, key, val
RequestPhaseFixedWindowScript string = `
RedisKeyPrefix string = "higress-token-ratelimit"
// AiTokenGlobalRateLimitFormat 全局限流模式 redis key 为 RedisKeyPrefix:限流规则名称:global_threshold:时间窗口:窗口内限流数
AiTokenGlobalRateLimitFormat = RedisKeyPrefix + ":%s:global_threshold:%d:%d"
// AiTokenRateLimitFormat 规则限流模式 redis key 为 RedisKeyPrefix:限流规则名称:限流类型:时间窗口:窗口内限流数:限流key名称:限流key对应的实际值
AiTokenRateLimitFormat = RedisKeyPrefix + ":%s:%s:%d:%d:%s:%s"
RequestPhaseFixedWindowScript = `
local ttl = redis.call('ttl', KEYS[1])
if ttl < 0 then
redis.call('set', KEYS[1], ARGV[1], 'EX', ARGV[2])
@@ -51,7 +57,7 @@ const (
end
return {ARGV[1], redis.call('get', KEYS[1]), ttl}
`
ResponsePhaseFixedWindowScript string = `
ResponsePhaseFixedWindowScript = `
local ttl = redis.call('ttl', KEYS[1])
if ttl < 0 then
redis.call('set', KEYS[1], ARGV[1]-ARGV[3], 'EX', ARGV[2])
@@ -60,14 +66,11 @@ const (
return {ARGV[1], redis.call('decrby', KEYS[1], ARGV[3]), ttl}
`
LimitRedisContextKey string = "LimitRedisContext"
LimitRedisContextKey = "LimitRedisContext"
ConsumerHeader string = "x-mse-consumer" // LimitByConsumer从该request header获取consumer的名字
CookieHeader string = "cookie"
CookieHeader = "cookie"
RateLimitLimitHeader string = "X-TokenRateLimit-Limit" // 限制的总请求数
RateLimitRemainingHeader string = "X-TokenRateLimit-Remaining" // 剩余还可以发送的请求数
RateLimitResetHeader string = "X-TokenRateLimit-Reset" // 限流重置时间(触发限流时返回)
RateLimitResetHeader = "X-TokenRateLimit-Reset" // 限流重置时间(触发限流时返回)
TokenRateLimitCount = "token_ratelimit_count" // metric name
)
@@ -84,42 +87,52 @@ type LimitRedisContext struct {
window int64
}
func parseConfig(json gjson.Result, config *ClusterKeyRateLimitConfig) error {
err := initRedisClusterClient(json, config)
func parseConfig(json gjson.Result, cfg *config.AiTokenRateLimitConfig) error {
err := config.InitRedisClusterClient(json, cfg)
if err != nil {
return err
}
err = parseClusterKeyRateLimitConfig(json, config)
err = config.ParseAiTokenRateLimitConfig(json, cfg)
if err != nil {
return err
}
// Metric settings
config.counterMetrics = make(map[string]proxywasm.MetricCounter)
cfg.CounterMetrics = make(map[string]proxywasm.MetricCounter)
return nil
}
func onHttpRequestHeaders(ctx wrapper.HttpContext, config ClusterKeyRateLimitConfig) types.Action {
func onHttpRequestHeaders(ctx wrapper.HttpContext, cfg config.AiTokenRateLimitConfig) types.Action {
ctx.DisableReroute()
// 判断是否命中限流规则
val, ruleItem, configItem := checkRequestAgainstLimitRule(ctx, config.ruleItems)
if ruleItem == nil || configItem == nil {
return types.ActionContinue
limitKey, count, timeWindow := "", int64(0), int64(0)
if cfg.GlobalThreshold != nil {
// 全局限流模式
limitKey = fmt.Sprintf(AiTokenGlobalRateLimitFormat, cfg.RuleName, cfg.GlobalThreshold.TimeWindow, cfg.GlobalThreshold.Count)
count = cfg.GlobalThreshold.Count
timeWindow = cfg.GlobalThreshold.TimeWindow
} else {
// 规则限流模式
val, ruleItem, configItem := checkRequestAgainstLimitRule(ctx, cfg.RuleItems)
if ruleItem == nil || configItem == nil {
// 没有匹配到限流规则直接返回
return types.ActionContinue
}
limitKey = fmt.Sprintf(AiTokenRateLimitFormat, cfg.RuleName, ruleItem.LimitType, configItem.TimeWindow, configItem.Count, ruleItem.Key, val)
count = configItem.Count
timeWindow = configItem.TimeWindow
}
// 构建redis限流key和参数
limitKey := fmt.Sprintf(ClusterRateLimitFormat, config.ruleName, ruleItem.limitType, configItem.timeWindow, configItem.count, ruleItem.key, val)
keys := []interface{}{limitKey}
args := []interface{}{configItem.count, configItem.timeWindow}
limitRedisContext := LimitRedisContext{
ctx.SetContext(LimitRedisContextKey, LimitRedisContext{
key: limitKey,
count: configItem.count,
window: configItem.timeWindow,
}
ctx.SetContext(LimitRedisContextKey, limitRedisContext)
count: count,
window: timeWindow,
})
// 执行限流逻辑
err := config.redisClient.Eval(RequestPhaseFixedWindowScript, 1, keys, args, func(response resp.Value) {
keys := []interface{}{limitKey}
args := []interface{}{count, timeWindow}
err := cfg.RedisClient.Eval(RequestPhaseFixedWindowScript, 1, keys, args, func(response resp.Value) {
resultArray := response.Array()
if len(resultArray) != 3 {
log.Errorf("redis response parse error, response: %v", response)
@@ -135,7 +148,7 @@ func onHttpRequestHeaders(ctx wrapper.HttpContext, config ClusterKeyRateLimitCon
// 触发限流
ctx.SetUserAttribute("token_ratelimit_status", "limited")
ctx.WriteUserAttributeToLogWithKey(wrapper.AILogKey)
rejected(config, context)
rejected(cfg, context)
} else {
proxywasm.ResumeHttpRequest()
}
@@ -147,7 +160,7 @@ func onHttpRequestHeaders(ctx wrapper.HttpContext, config ClusterKeyRateLimitCon
return types.HeaderStopAllIterationAndWatermark
}
func onHttpStreamingBody(ctx wrapper.HttpContext, config ClusterKeyRateLimitConfig, data []byte, endOfStream bool) []byte {
func onHttpStreamingBody(ctx wrapper.HttpContext, cfg config.AiTokenRateLimitConfig, data []byte, endOfStream bool) []byte {
if usage := tokenusage.GetTokenUsage(ctx, data); usage.TotalToken > 0 {
ctx.SetContext(tokenusage.CtxKeyInputToken, usage.InputToken)
ctx.SetContext(tokenusage.CtxKeyOutputToken, usage.OutputToken)
@@ -164,7 +177,7 @@ func onHttpStreamingBody(ctx wrapper.HttpContext, config ClusterKeyRateLimitConf
}
keys := []interface{}{limitRedisContext.key}
args := []interface{}{limitRedisContext.count, limitRedisContext.window, inputToken + outputToken}
err := config.redisClient.Eval(ResponsePhaseFixedWindowScript, 1, keys, args, nil)
err := cfg.RedisClient.Eval(ResponsePhaseFixedWindowScript, 1, keys, args, nil)
if err != nil {
log.Errorf("redis call failed: %v", err)
}
@@ -172,27 +185,29 @@ func onHttpStreamingBody(ctx wrapper.HttpContext, config ClusterKeyRateLimitConf
return data
}
func checkRequestAgainstLimitRule(ctx wrapper.HttpContext, ruleItems []LimitRuleItem) (string, *LimitRuleItem, *LimitConfigItem) {
for _, rule := range ruleItems {
val, ruleItem, configItem := hitRateRuleItem(ctx, rule)
if ruleItem != nil && configItem != nil {
return val, ruleItem, configItem
func checkRequestAgainstLimitRule(ctx wrapper.HttpContext, ruleItems []config.LimitRuleItem) (string, *config.LimitRuleItem, *config.LimitConfigItem) {
if len(ruleItems) > 0 {
for _, rule := range ruleItems {
val, ruleItem, configItem := hitRateRuleItem(ctx, rule)
if ruleItem != nil && configItem != nil {
return val, ruleItem, configItem
}
}
}
return "", nil, nil
}
func hitRateRuleItem(ctx wrapper.HttpContext, rule LimitRuleItem) (string, *LimitRuleItem, *LimitConfigItem) {
switch rule.limitType {
func hitRateRuleItem(ctx wrapper.HttpContext, rule config.LimitRuleItem) (string, *config.LimitRuleItem, *config.LimitConfigItem) {
switch rule.LimitType {
// 根据HTTP请求头限流
case limitByHeaderType, limitByPerHeaderType:
val, err := proxywasm.GetHttpRequestHeader(rule.key)
case config.LimitByHeaderType, config.LimitByPerHeaderType:
val, err := proxywasm.GetHttpRequestHeader(rule.Key)
if err != nil {
return logDebugAndReturnEmpty("failed to get request header %s: %v", rule.key, err)
return logDebugAndReturnEmpty("failed to get request header %s: %v", rule.Key, err)
}
return val, &rule, findMatchingItem(rule.limitType, rule.configItems, val)
return val, &rule, findMatchingItem(rule.LimitType, rule.ConfigItems, val)
// 根据HTTP请求参数限流
case limitByParamType, limitByPerParamType:
case config.LimitByParamType, config.LimitByPerParamType:
parse, err := url.Parse(ctx.Path())
if err != nil {
return logDebugAndReturnEmpty("failed to parse request path: %v", err)
@@ -201,38 +216,38 @@ func hitRateRuleItem(ctx wrapper.HttpContext, rule LimitRuleItem) (string, *Limi
if err != nil {
return logDebugAndReturnEmpty("failed to parse query params: %v", err)
}
val, ok := query[rule.key]
val, ok := query[rule.Key]
if !ok {
return logDebugAndReturnEmpty("request param %s is empty", rule.key)
return logDebugAndReturnEmpty("request param %s is empty", rule.Key)
}
return val[0], &rule, findMatchingItem(rule.limitType, rule.configItems, val[0])
return val[0], &rule, findMatchingItem(rule.LimitType, rule.ConfigItems, val[0])
// 根据consumer限流
case limitByConsumerType, limitByPerConsumerType:
val, err := proxywasm.GetHttpRequestHeader(ConsumerHeader)
case config.LimitByConsumerType, config.LimitByPerConsumerType:
val, err := proxywasm.GetHttpRequestHeader(util.ConsumerHeader)
if err != nil {
return logDebugAndReturnEmpty("failed to get request header %s: %v", ConsumerHeader, err)
return logDebugAndReturnEmpty("failed to get request header %s: %v", util.ConsumerHeader, err)
}
return val, &rule, findMatchingItem(rule.limitType, rule.configItems, val)
return val, &rule, findMatchingItem(rule.LimitType, rule.ConfigItems, val)
// 根据cookie中key值限流
case limitByCookieType, limitByPerCookieType:
case config.LimitByCookieType, config.LimitByPerCookieType:
cookie, err := proxywasm.GetHttpRequestHeader(CookieHeader)
if err != nil {
return logDebugAndReturnEmpty("failed to get request cookie : %v", err)
}
val := extractCookieValueByKey(cookie, rule.key)
val := util.ExtractCookieValueByKey(cookie, rule.Key)
if val == "" {
return logDebugAndReturnEmpty("cookie key '%s' extracted from cookie '%s' is empty.", rule.key, cookie)
return logDebugAndReturnEmpty("cookie key '%s' extracted from cookie '%s' is empty.", rule.Key, cookie)
}
return val, &rule, findMatchingItem(rule.limitType, rule.configItems, val)
return val, &rule, findMatchingItem(rule.LimitType, rule.ConfigItems, val)
// 根据客户端IP限流
case limitByPerIpType:
case config.LimitByPerIpType:
realIp, err := getDownStreamIp(rule)
if err != nil {
log.Warnf("failed to get down stream ip: %v", err)
return "", &rule, nil
}
for _, item := range rule.configItems {
if _, found, _ := item.ipNet.Get(realIp); !found {
for _, item := range rule.ConfigItems {
if _, found, _ := item.IpNet.Get(realIp); !found {
continue
}
return realIp.String(), &rule, &item
@@ -241,37 +256,37 @@ func hitRateRuleItem(ctx wrapper.HttpContext, rule LimitRuleItem) (string, *Limi
return "", nil, nil
}
func logDebugAndReturnEmpty(errMsg string, args ...interface{}) (string, *LimitRuleItem, *LimitConfigItem) {
func logDebugAndReturnEmpty(errMsg string, args ...interface{}) (string, *config.LimitRuleItem, *config.LimitConfigItem) {
log.Debugf(errMsg, args...)
return "", nil, nil
}
func findMatchingItem(limitType limitRuleItemType, items []LimitConfigItem, key string) *LimitConfigItem {
func findMatchingItem(limitType config.LimitRuleItemType, items []config.LimitConfigItem, key string) *config.LimitConfigItem {
for _, item := range items {
// per类型,检查allType和regexpType
if limitType == limitByPerHeaderType ||
limitType == limitByPerParamType ||
limitType == limitByPerConsumerType ||
limitType == limitByPerCookieType {
if item.configType == allType || (item.configType == regexpType && item.regexp.MatchString(key)) {
if limitType == config.LimitByPerHeaderType ||
limitType == config.LimitByPerParamType ||
limitType == config.LimitByPerConsumerType ||
limitType == config.LimitByPerCookieType {
if item.ConfigType == config.AllType || (item.ConfigType == config.RegexpType && item.Regexp.MatchString(key)) {
return &item
}
}
// 其他类型,直接比较key
if item.key == key {
if item.Key == key {
return &item
}
}
return nil
}
func getDownStreamIp(rule LimitRuleItem) (net.IP, error) {
func getDownStreamIp(rule config.LimitRuleItem) (net.IP, error) {
var (
realIpStr string
err error
)
if rule.limitByPerIp.sourceType == HeaderSourceType {
realIpStr, err = proxywasm.GetHttpRequestHeader(rule.limitByPerIp.headerName)
if rule.LimitByPerIp.SourceType == config.HeaderSourceType {
realIpStr, err = proxywasm.GetHttpRequestHeader(rule.LimitByPerIp.HeaderName)
if err == nil {
realIpStr = strings.Split(strings.Trim(realIpStr, " "), ",")[0]
}
@@ -283,7 +298,7 @@ func getDownStreamIp(rule LimitRuleItem) (net.IP, error) {
if err != nil {
return nil, err
}
ip := parseIP(realIpStr)
ip := util.ParseIP(realIpStr)
realIP := net.ParseIP(ip)
if realIP == nil {
return nil, fmt.Errorf("invalid ip[%s]", ip)
@@ -291,54 +306,18 @@ func getDownStreamIp(rule LimitRuleItem) (net.IP, error) {
return realIP, nil
}
func (config *ClusterKeyRateLimitConfig) incrementCounter(metricName string, inc uint64) {
if inc == 0 {
return
}
counter, ok := config.counterMetrics[metricName]
if !ok {
counter = proxywasm.DefineCounterMetric(metricName)
config.counterMetrics[metricName] = counter
}
counter.Increment(inc)
}
func generateMetricName(route, cluster, model, consumer, metricName string) string {
return fmt.Sprintf("route.%s.upstream.%s.model.%s.consumer.%s.metric.%s", route, cluster, model, consumer, metricName)
}
func getRouteName() (string, error) {
if raw, err := proxywasm.GetProperty([]string{"route_name"}); err != nil {
return "-", err
} else {
return string(raw), nil
}
}
func getClusterName() (string, error) {
if raw, err := proxywasm.GetProperty([]string{"cluster_name"}); err != nil {
return "-", err
} else {
return string(raw), nil
}
}
func getConsumer() (string, error) {
if consumer, err := proxywasm.GetHttpRequestHeader(ConsumerHeader); err != nil {
return "none", err
} else {
return consumer, nil
}
}
func rejected(config ClusterKeyRateLimitConfig, context LimitContext) {
func rejected(cfg config.AiTokenRateLimitConfig, context LimitContext) {
headers := make(map[string][]string)
headers[RateLimitResetHeader] = []string{strconv.Itoa(context.reset)}
_ = proxywasm.SendHttpResponseWithDetail(
config.rejectedCode, "ai-token-ratelimit.rejected", reconvertHeaders(headers), []byte(config.rejectedMsg), -1)
cfg.RejectedCode, "ai-token-ratelimit.rejected", util.ReconvertHeaders(headers), []byte(cfg.RejectedMsg), -1)
route, _ := getRouteName()
cluster, _ := getClusterName()
consumer, _ := getConsumer()
config.incrementCounter(generateMetricName(route, cluster, "none", consumer, TokenRateLimitCount), 1)
route, _ := util.GetRouteName()
cluster, _ := util.GetClusterName()
consumer, _ := util.GetConsumer()
cfg.IncrementCounter(generateMetricName(route, cluster, "none", consumer, TokenRateLimitCount), 1)
}