higress/plugins/wasm-go/extensions/ai-token-ratelimit/main.go

// Copyright (c) 2024 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package main

import (
	"bytes"
	"fmt"
	"net"
	"net/url"
	"strconv"
	"strings"

	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
	"github.com/tidwall/gjson"
	"github.com/tidwall/resp"
)

func main() {
	wrapper.SetCtx(
		"ai-token-ratelimit",
		wrapper.ParseConfigBy(parseConfig),
		wrapper.ProcessRequestHeadersBy(onHttpRequestHeaders),
		wrapper.ProcessStreamingResponseBodyBy(onHttpStreamingBody),
	)
}

const (
	ClusterRateLimitFormat        string = "higress-token-ratelimit:%s:%s:%d:%d:%s:%s" // ruleName, limitType, timewindow, windowsize, key, val
	RequestPhaseFixedWindowScript string = `
	local ttl = redis.call('ttl', KEYS[1])
	if ttl < 0 then
	redis.call('set', KEYS[1], ARGV[1], 'EX', ARGV[2])
	return {ARGV[1], ARGV[1], ARGV[2]}
	end
	return {ARGV[1], redis.call('get', KEYS[1]), ttl}
	`
	ResponsePhaseFixedWindowScript string = `
	local ttl = redis.call('ttl', KEYS[1])
	if ttl < 0 then
	redis.call('set', KEYS[1], ARGV[1]-ARGV[3], 'EX', ARGV[2])
	return {ARGV[1], ARGV[1]-ARGV[3], ARGV[2]}
	end
	return {ARGV[1], redis.call('decrby', KEYS[1], ARGV[3]), ttl}
	`

	LimitRedisContextKey string = "LimitRedisContext"

	ConsumerHeader string = "x-mse-consumer" // LimitByConsumer从该request header获取consumer的名字
	CookieHeader   string = "cookie"

	RateLimitLimitHeader     string = "X-TokenRateLimit-Limit"     // 限制的总请求数
	RateLimitRemainingHeader string = "X-TokenRateLimit-Remaining" // 剩余还可以发送的请求数
	RateLimitResetHeader     string = "X-TokenRateLimit-Reset"     // 限流重置时间（触发限流时返回）

	TokenRateLimitCount = "token_ratelimit_count" // metric name
)

type LimitContext struct {
	count     int
	remaining int
	reset     int
}

type LimitRedisContext struct {
	key    string
	count  int64
	window int64
}

func parseConfig(json gjson.Result, config *ClusterKeyRateLimitConfig, log wrapper.Log) error {
	err := initRedisClusterClient(json, config, log)
	if err != nil {
		return err
	}
	err = parseClusterKeyRateLimitConfig(json, config)
	if err != nil {
		return err
	}
	// Metric settings
	config.counterMetrics = make(map[string]proxywasm.MetricCounter)
	return nil
}

func onHttpRequestHeaders(ctx wrapper.HttpContext, config ClusterKeyRateLimitConfig, log wrapper.Log) types.Action {
	// 判断是否命中限流规则
	val, ruleItem, configItem := checkRequestAgainstLimitRule(ctx, config.ruleItems, log)
	if ruleItem == nil || configItem == nil {
		return types.ActionContinue
	}

	// 构建redis限流key和参数
	limitKey := fmt.Sprintf(ClusterRateLimitFormat, config.ruleName, ruleItem.limitType, configItem.timeWindow, configItem.count, ruleItem.key, val)
	keys := []interface{}{limitKey}
	args := []interface{}{configItem.count, configItem.timeWindow}

	limitRedisContext := LimitRedisContext{
		key:    limitKey,
		count:  configItem.count,
		window: configItem.timeWindow,
	}
	ctx.SetContext(LimitRedisContextKey, limitRedisContext)

	// 执行限流逻辑
	err := config.redisClient.Eval(RequestPhaseFixedWindowScript, 1, keys, args, func(response resp.Value) {
		resultArray := response.Array()
		if len(resultArray) != 3 {
			log.Errorf("redis response parse error, response: %v", response)
			proxywasm.ResumeHttpRequest()
			return
		}
		context := LimitContext{
			count:     resultArray[0].Integer(),
			remaining: resultArray[1].Integer(),
			reset:     resultArray[2].Integer(),
		}
		if context.remaining < 0 {
			// 触发限流
			ctx.SetUserAttribute("token_ratelimit_status", "limited")
			ctx.WriteUserAttributeToLogWithKey(wrapper.AILogKey)
			rejected(config, context)
		} else {
			proxywasm.ResumeHttpRequest()
		}
	})
	if err != nil {
		log.Errorf("redis call failed: %v", err)
		return types.ActionContinue
	}
	return types.ActionPause
}

func onHttpStreamingBody(ctx wrapper.HttpContext, config ClusterKeyRateLimitConfig, data []byte, endOfStream bool, log wrapper.Log) []byte {
	var inputToken, outputToken int64
	if inputToken, outputToken, ok := getUsage(data); ok {
		ctx.SetContext("input_token", inputToken)
		ctx.SetContext("output_token", outputToken)
	}
	if endOfStream {
		if ctx.GetContext("input_token") == nil || ctx.GetContext("output_token") == nil {
			return data
		}
		inputToken = ctx.GetContext("input_token").(int64)
		outputToken = ctx.GetContext("output_token").(int64)
		limitRedisContext, ok := ctx.GetContext(LimitRedisContextKey).(LimitRedisContext)
		if !ok {
			return data
		}
		keys := []interface{}{limitRedisContext.key}
		args := []interface{}{limitRedisContext.count, limitRedisContext.window, inputToken + outputToken}
		err := config.redisClient.Eval(ResponsePhaseFixedWindowScript, 1, keys, args, nil)
		if err != nil {
			log.Errorf("redis call failed: %v", err)
		}
	}
	return data
}

func getUsage(data []byte) (inputTokenUsage int64, outputTokenUsage int64, ok bool) {
	chunks := bytes.Split(bytes.TrimSpace(data), []byte("\n\n"))
	for _, chunk := range chunks {
		// the feature strings are used to identify the usage data, like:
		// {"model":"gpt2","usage":{"prompt_tokens":1,"completion_tokens":1}}
		if !bytes.Contains(chunk, []byte("prompt_tokens")) || !bytes.Contains(chunk, []byte("completion_tokens")) {
			continue
		}
		inputTokenObj := gjson.GetBytes(chunk, "usage.prompt_tokens")
		outputTokenObj := gjson.GetBytes(chunk, "usage.completion_tokens")
		if inputTokenObj.Exists() && outputTokenObj.Exists() {
			inputTokenUsage = inputTokenObj.Int()
			outputTokenUsage = outputTokenObj.Int()
			ok = true
			return
		}
	}
	return
}

func checkRequestAgainstLimitRule(ctx wrapper.HttpContext, ruleItems []LimitRuleItem, log wrapper.Log) (string, *LimitRuleItem, *LimitConfigItem) {
	for _, rule := range ruleItems {
		val, ruleItem, configItem := hitRateRuleItem(ctx, rule, log)
		if ruleItem != nil && configItem != nil {
			return val, ruleItem, configItem
		}
	}
	return "", nil, nil
}

func hitRateRuleItem(ctx wrapper.HttpContext, rule LimitRuleItem, log wrapper.Log) (string, *LimitRuleItem, *LimitConfigItem) {
	switch rule.limitType {
	// 根据HTTP请求头限流
	case limitByHeaderType, limitByPerHeaderType:
		val, err := proxywasm.GetHttpRequestHeader(rule.key)
		if err != nil {
			return logDebugAndReturnEmpty(log, "failed to get request header %s: %v", rule.key, err)
		}
		return val, &rule, findMatchingItem(rule.limitType, rule.configItems, val)
	// 根据HTTP请求参数限流
	case limitByParamType, limitByPerParamType:
		parse, err := url.Parse(ctx.Path())
		if err != nil {
			return logDebugAndReturnEmpty(log, "failed to parse request path: %v", err)
		}
		query, err := url.ParseQuery(parse.RawQuery)
		if err != nil {
			return logDebugAndReturnEmpty(log, "failed to parse query params: %v", err)
		}
		val, ok := query[rule.key]
		if !ok {
			return logDebugAndReturnEmpty(log, "request param %s is empty", rule.key)
		}
		return val[0], &rule, findMatchingItem(rule.limitType, rule.configItems, val[0])
	// 根据consumer限流
	case limitByConsumerType, limitByPerConsumerType:
		val, err := proxywasm.GetHttpRequestHeader(ConsumerHeader)
		if err != nil {
			return logDebugAndReturnEmpty(log, "failed to get request header %s: %v", ConsumerHeader, err)
		}
		return val, &rule, findMatchingItem(rule.limitType, rule.configItems, val)
	// 根据cookie中key值限流
	case limitByCookieType, limitByPerCookieType:
		cookie, err := proxywasm.GetHttpRequestHeader(CookieHeader)
		if err != nil {
			return logDebugAndReturnEmpty(log, "failed to get request cookie : %v", err)
		}
		val := extractCookieValueByKey(cookie, rule.key)
		if val == "" {
			return logDebugAndReturnEmpty(log, "cookie key '%s' extracted from cookie '%s' is empty.", rule.key, cookie)
		}
		return val, &rule, findMatchingItem(rule.limitType, rule.configItems, val)
	// 根据客户端IP限流
	case limitByPerIpType:
		realIp, err := getDownStreamIp(rule)
		if err != nil {
			log.Warnf("failed to get down stream ip: %v", err)
			return "", &rule, nil
		}
		for _, item := range rule.configItems {
			if _, found, _ := item.ipNet.Get(realIp); !found {
				continue
			}
			return realIp.String(), &rule, &item
		}
	}
	return "", nil, nil
}

func logDebugAndReturnEmpty(log wrapper.Log, errMsg string, args ...interface{}) (string, *LimitRuleItem, *LimitConfigItem) {
	log.Debugf(errMsg, args...)
	return "", nil, nil
}

func findMatchingItem(limitType limitRuleItemType, items []LimitConfigItem, key string) *LimitConfigItem {
	for _, item := range items {
		// per类型,检查allType和regexpType
		if limitType == limitByPerHeaderType ||
			limitType == limitByPerParamType ||
			limitType == limitByPerConsumerType ||
			limitType == limitByPerCookieType {
			if item.configType == allType || (item.configType == regexpType && item.regexp.MatchString(key)) {
				return &item
			}
		}
		// 其他类型,直接比较key
		if item.key == key {
			return &item
		}
	}
	return nil
}

func getDownStreamIp(rule LimitRuleItem) (net.IP, error) {
	var (
		realIpStr string
		err       error
	)
	if rule.limitByPerIp.sourceType == HeaderSourceType {
		realIpStr, err = proxywasm.GetHttpRequestHeader(rule.limitByPerIp.headerName)
		if err == nil {
			realIpStr = strings.Split(strings.Trim(realIpStr, " "), ",")[0]
		}
	} else {
		var bs []byte
		bs, err = proxywasm.GetProperty([]string{"source", "address"})
		realIpStr = string(bs)
	}
	if err != nil {
		return nil, err
	}
	ip := parseIP(realIpStr)
	realIP := net.ParseIP(ip)
	if realIP == nil {
		return nil, fmt.Errorf("invalid ip[%s]", ip)
	}
	return realIP, nil
}

func (config *ClusterKeyRateLimitConfig) incrementCounter(metricName string, inc uint64) {
	if inc == 0 {
		return
	}
	counter, ok := config.counterMetrics[metricName]
	if !ok {
		counter = proxywasm.DefineCounterMetric(metricName)
		config.counterMetrics[metricName] = counter
	}
	counter.Increment(inc)
}

func generateMetricName(route, cluster, model, consumer, metricName string) string {
	return fmt.Sprintf("route.%s.upstream.%s.model.%s.consumer.%s.metric.%s", route, cluster, model, consumer, metricName)
}

func getRouteName() (string, error) {
	if raw, err := proxywasm.GetProperty([]string{"route_name"}); err != nil {
		return "-", err
	} else {
		return string(raw), nil
	}
}

func getClusterName() (string, error) {
	if raw, err := proxywasm.GetProperty([]string{"cluster_name"}); err != nil {
		return "-", err
	} else {
		return string(raw), nil
	}
}

func getConsumer() (string, error) {
	if consumer, err := proxywasm.GetHttpRequestHeader(ConsumerHeader); err != nil {
		return "none", err
	} else {
		return consumer, nil
	}
}

func rejected(config ClusterKeyRateLimitConfig, context LimitContext) {
	headers := make(map[string][]string)
	headers[RateLimitResetHeader] = []string{strconv.Itoa(context.reset)}
	_ = proxywasm.SendHttpResponseWithDetail(
		config.rejectedCode, "ai-token-ratelimit.rejected", reconvertHeaders(headers), []byte(config.rejectedMsg), -1)

	route, _ := getRouteName()
	cluster, _ := getClusterName()
	consumer, _ := getConsumer()
	config.incrementCounter(generateMetricName(route, cluster, "none", consumer, TokenRateLimitCount), 1)
}