Files
higress/plugins/wasm-go/extensions/ai-proxy/test/cooldown.go
2026-05-20 18:11:11 +08:00

674 lines
22 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package test
import (
"encoding/json"
"strings"
"testing"
"time"
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
"github.com/higress-group/wasm-go/pkg/test"
"github.com/stretchr/testify/require"
)
// setUpstreamResponse marks the response as coming from upstream so onHttpResponseHeaders processes it
func setUpstreamResponse(host test.TestHost) {
_ = host.SetProperty([]string{"response", "code_details"}, []byte("via_upstream"))
}
// 测试配置cooldown only无 healthCheckModel
var cooldownOnlyConfig = func() json.RawMessage {
data, _ := json.Marshal(map[string]interface{}{
"provider": map[string]interface{}{
"type": "openai",
"apiTokens": []string{"sk-token-a", "sk-token-b"},
"modelMapping": map[string]string{
"*": "gpt-3.5-turbo",
},
"failover": map[string]interface{}{
"enabled": true,
"failureThreshold": 1,
"cooldownDuration": 100,
"failoverOnStatus": []string{"429"},
},
},
})
return data
}()
// 测试配置cooldown + healthCheck 同时配置
var cooldownWithHealthCheckConfig = func() json.RawMessage {
data, _ := json.Marshal(map[string]interface{}{
"provider": map[string]interface{}{
"type": "openai",
"apiTokens": []string{"sk-token-a", "sk-token-b"},
"modelMapping": map[string]string{
"*": "gpt-3.5-turbo",
},
"failover": map[string]interface{}{
"enabled": true,
"failureThreshold": 1,
"cooldownDuration": 100,
"healthCheckModel": "gpt-3.5-turbo",
"healthCheckTimeout": 5000,
"failoverOnStatus": []string{"429"},
},
},
})
return data
}()
// 测试配置failover 启用但既没有 cooldown 也没有 healthCheckModel
var failoverNoRecoveryConfig = func() json.RawMessage {
data, _ := json.Marshal(map[string]interface{}{
"provider": map[string]interface{}{
"type": "openai",
"apiTokens": []string{"sk-token-a"},
"modelMapping": map[string]string{
"*": "gpt-3.5-turbo",
},
"failover": map[string]interface{}{
"enabled": true,
"failureThreshold": 1,
"failoverOnStatus": []string{"429"},
},
},
})
return data
}()
// 测试配置cooldown 较长,用于测试冷却未到期的场景
var cooldownLongDurationConfig = func() json.RawMessage {
data, _ := json.Marshal(map[string]interface{}{
"provider": map[string]interface{}{
"type": "openai",
"apiTokens": []string{"sk-token-a", "sk-token-b"},
"modelMapping": map[string]string{
"*": "gpt-3.5-turbo",
},
"failover": map[string]interface{}{
"enabled": true,
"failureThreshold": 1,
"cooldownDuration": 600000,
"failoverOnStatus": []string{"429"},
},
},
})
return data
}()
// 测试配置:三个 tokenfailureThreshold=2
var cooldownThreeTokensConfig = func() json.RawMessage {
data, _ := json.Marshal(map[string]interface{}{
"provider": map[string]interface{}{
"type": "openai",
"apiTokens": []string{"sk-token-a", "sk-token-b", "sk-token-c"},
"modelMapping": map[string]string{
"*": "gpt-3.5-turbo",
},
"failover": map[string]interface{}{
"enabled": true,
"failureThreshold": 2,
"cooldownDuration": 100,
"failoverOnStatus": []string{"429"},
},
},
})
return data
}()
// 测试配置:单个 token + cooldown
var cooldownSingleTokenConfig = func() json.RawMessage {
data, _ := json.Marshal(map[string]interface{}{
"provider": map[string]interface{}{
"type": "openai",
"apiTokens": []string{"sk-only-token"},
"modelMapping": map[string]string{
"*": "gpt-3.5-turbo",
},
"failover": map[string]interface{}{
"enabled": true,
"failureThreshold": 1,
"cooldownDuration": 100,
"failoverOnStatus": []string{"429"},
},
},
})
return data
}()
// 测试配置cooldown + 多种 failoverOnStatus
var cooldownMultiStatusConfig = func() json.RawMessage {
data, _ := json.Marshal(map[string]interface{}{
"provider": map[string]interface{}{
"type": "openai",
"apiTokens": []string{"sk-token-a", "sk-token-b"},
"modelMapping": map[string]string{
"*": "gpt-3.5-turbo",
},
"failover": map[string]interface{}{
"enabled": true,
"failureThreshold": 1,
"cooldownDuration": 100,
"failoverOnStatus": []string{"429", "5.*"},
},
},
})
return data
}()
// ============ Parse Config Tests ============
func RunCooldownParseConfigTests(t *testing.T) {
test.RunGoTest(t, func(t *testing.T) {
// cooldown only 配置应正常启动
t.Run("cooldown only config starts ok", func(t *testing.T) {
host, status := test.NewTestHost(cooldownOnlyConfig)
defer host.Reset()
require.Equal(t, types.OnPluginStartStatusOK, status)
config, err := host.GetMatchConfig()
require.NoError(t, err)
require.NotNil(t, config)
})
// cooldown + healthCheck 同时配置应正常启动
t.Run("cooldown with healthCheck config starts ok", func(t *testing.T) {
host, status := test.NewTestHost(cooldownWithHealthCheckConfig)
defer host.Reset()
require.Equal(t, types.OnPluginStartStatusOK, status)
config, err := host.GetMatchConfig()
require.NoError(t, err)
require.NotNil(t, config)
})
// failover 启用但既没有 cooldown 也没有 healthCheckModel 应启动失败
t.Run("failover without recovery config fails", func(t *testing.T) {
host, status := test.NewTestHost(failoverNoRecoveryConfig)
defer host.Reset()
require.Equal(t, types.OnPluginStartStatusFailed, status)
})
// 单 token + cooldown 配置应正常启动
t.Run("single token with cooldown config starts ok", func(t *testing.T) {
host, status := test.NewTestHost(cooldownSingleTokenConfig)
defer host.Reset()
require.Equal(t, types.OnPluginStartStatusOK, status)
config, err := host.GetMatchConfig()
require.NoError(t, err)
require.NotNil(t, config)
})
})
}
// ============ Failover on 429 Tests ============
func RunCooldownOnHttpResponseHeadersTests(t *testing.T) {
test.RunGoTest(t, func(t *testing.T) {
// 429 响应应触发 failover 日志
t.Run("429 triggers failover", func(t *testing.T) {
host, status := test.NewTestHost(cooldownOnlyConfig)
defer host.Reset()
require.Equal(t, types.OnPluginStartStatusOK, status)
host.CallOnHttpRequestHeaders([][2]string{
{":authority", "example.com"},
{":path", "/v1/chat/completions"},
{":method", "POST"},
{"Content-Type", "application/json"},
})
host.CallOnHttpRequestBody([]byte(`{"model":"gpt-3.5-turbo","messages":[{"role":"user","content":"hi"}]}`))
setUpstreamResponse(host)
action := host.CallOnHttpResponseHeaders([][2]string{
{":status", "429"},
{"Content-Type", "application/json"},
})
require.Equal(t, types.ActionContinue, action)
// 验证 failover 日志
warnLogs := host.GetWarnLogs()
hasFailoverLog := false
for _, log := range warnLogs {
if strings.Contains(log, "need failover") && strings.Contains(log, "429") {
hasFailoverLog = true
break
}
}
require.True(t, hasFailoverLog, "Should have failover warning log on 429")
})
// 200 响应不应触发 failover
t.Run("200 does not trigger failover", func(t *testing.T) {
host, status := test.NewTestHost(cooldownOnlyConfig)
defer host.Reset()
require.Equal(t, types.OnPluginStartStatusOK, status)
host.CallOnHttpRequestHeaders([][2]string{
{":authority", "example.com"},
{":path", "/v1/chat/completions"},
{":method", "POST"},
{"Content-Type", "application/json"},
})
host.CallOnHttpRequestBody([]byte(`{"model":"gpt-3.5-turbo","messages":[{"role":"user","content":"hi"}]}`))
setUpstreamResponse(host)
action := host.CallOnHttpResponseHeaders([][2]string{
{":status", "200"},
{"Content-Type", "application/json"},
})
require.Equal(t, types.ActionContinue, action)
warnLogs := host.GetWarnLogs()
for _, log := range warnLogs {
require.False(t, strings.Contains(log, "need failover"), "Should not have failover log on 200")
}
})
// 非 failoverOnStatus 的错误码不应触发 failover
t.Run("500 does not trigger failover when only 429 configured", func(t *testing.T) {
host, status := test.NewTestHost(cooldownOnlyConfig)
defer host.Reset()
require.Equal(t, types.OnPluginStartStatusOK, status)
host.CallOnHttpRequestHeaders([][2]string{
{":authority", "example.com"},
{":path", "/v1/chat/completions"},
{":method", "POST"},
{"Content-Type", "application/json"},
})
host.CallOnHttpRequestBody([]byte(`{"model":"gpt-3.5-turbo","messages":[{"role":"user","content":"hi"}]}`))
setUpstreamResponse(host)
action := host.CallOnHttpResponseHeaders([][2]string{
{":status", "500"},
{"Content-Type", "application/json"},
})
require.Equal(t, types.ActionContinue, action)
warnLogs := host.GetWarnLogs()
for _, log := range warnLogs {
require.False(t, strings.Contains(log, "need failover"), "Should not have failover log on 500 when only 429 configured")
}
})
// 多种 failoverOnStatus 匹配测试500 应触发 failover
t.Run("500 triggers failover when 5xx configured", func(t *testing.T) {
host, status := test.NewTestHost(cooldownMultiStatusConfig)
defer host.Reset()
require.Equal(t, types.OnPluginStartStatusOK, status)
host.CallOnHttpRequestHeaders([][2]string{
{":authority", "example.com"},
{":path", "/v1/chat/completions"},
{":method", "POST"},
{"Content-Type", "application/json"},
})
host.CallOnHttpRequestBody([]byte(`{"model":"gpt-3.5-turbo","messages":[{"role":"user","content":"hi"}]}`))
setUpstreamResponse(host)
action := host.CallOnHttpResponseHeaders([][2]string{
{":status", "500"},
{"Content-Type", "application/json"},
})
require.Equal(t, types.ActionContinue, action)
warnLogs := host.GetWarnLogs()
hasFailoverLog := false
for _, log := range warnLogs {
if strings.Contains(log, "need failover") {
hasFailoverLog = true
break
}
}
require.True(t, hasFailoverLog, "Should have failover log on 500 when 5.* configured")
})
})
}
// ============ Cooldown Recovery Tests ============
func RunCooldownRecoveryTests(t *testing.T) {
test.RunGoTest(t, func(t *testing.T) {
// token 被摘除后,冷却到期后 tick 应恢复
t.Run("token recovered after cooldown expires", func(t *testing.T) {
host, status := test.NewTestHost(cooldownOnlyConfig)
defer host.Reset()
require.Equal(t, types.OnPluginStartStatusOK, status)
// 触发一次 429 使 token 被摘除failureThreshold=1
host.CallOnHttpRequestHeaders([][2]string{
{":authority", "example.com"},
{":path", "/v1/chat/completions"},
{":method", "POST"},
{"Content-Type", "application/json"},
})
host.CallOnHttpRequestBody([]byte(`{"model":"gpt-3.5-turbo","messages":[{"role":"user","content":"hi"}]}`))
setUpstreamResponse(host)
host.CallOnHttpResponseHeaders([][2]string{
{":status", "429"},
{"Content-Type", "application/json"},
})
// 验证 token 被标记为不可用
infoLogs := host.GetInfoLogs()
hasUnavailableLog := false
for _, log := range infoLogs {
if strings.Contains(log, "is unavailable now") {
hasUnavailableLog = true
break
}
}
require.True(t, hasUnavailableLog, "Token should be marked as unavailable after 429")
// 等待冷却到期cooldownDuration=100ms
time.Sleep(150 * time.Millisecond)
// 触发 tick 执行冷却恢复
host.Tick()
// 验证 token 被恢复
infoLogs = host.GetInfoLogs()
hasRecoveryLog := false
for _, log := range infoLogs {
if strings.Contains(log, "cooldown recovery") && strings.Contains(log, "restoring to available list") {
hasRecoveryLog = true
break
}
}
require.True(t, hasRecoveryLog, "Token should be recovered after cooldown expires and tick fires")
})
// 冷却未到期时 tick 不应恢复 token
t.Run("token not recovered before cooldown expires", func(t *testing.T) {
host, status := test.NewTestHost(cooldownLongDurationConfig)
defer host.Reset()
require.Equal(t, types.OnPluginStartStatusOK, status)
// 触发 429 使 token 被摘除
host.CallOnHttpRequestHeaders([][2]string{
{":authority", "example.com"},
{":path", "/v1/chat/completions"},
{":method", "POST"},
{"Content-Type", "application/json"},
})
host.CallOnHttpRequestBody([]byte(`{"model":"gpt-3.5-turbo","messages":[{"role":"user","content":"hi"}]}`))
setUpstreamResponse(host)
host.CallOnHttpResponseHeaders([][2]string{
{":status", "429"},
{"Content-Type", "application/json"},
})
// 立即 tick冷却未到期cooldownDuration=600000ms
host.Tick()
// 验证 token 未被恢复
infoLogs := host.GetInfoLogs()
for _, log := range infoLogs {
require.False(t, strings.Contains(log, "cooldown recovery"),
"Token should NOT be recovered before cooldown expires")
}
})
// failureThreshold > 1 时,单次失败不应摘除 token
t.Run("single failure does not remove token when threshold is 2", func(t *testing.T) {
host, status := test.NewTestHost(cooldownThreeTokensConfig)
defer host.Reset()
require.Equal(t, types.OnPluginStartStatusOK, status)
// 只触发一次 429failureThreshold=2
host.CallOnHttpRequestHeaders([][2]string{
{":authority", "example.com"},
{":path", "/v1/chat/completions"},
{":method", "POST"},
{"Content-Type", "application/json"},
})
host.CallOnHttpRequestBody([]byte(`{"model":"gpt-3.5-turbo","messages":[{"role":"user","content":"hi"}]}`))
setUpstreamResponse(host)
host.CallOnHttpResponseHeaders([][2]string{
{":status", "429"},
{"Content-Type", "application/json"},
})
// 验证 token 未被摘除(需要连续 2 次失败)
infoLogs := host.GetInfoLogs()
for _, log := range infoLogs {
require.False(t, strings.Contains(log, "is unavailable now"),
"Token should NOT be removed after single failure when threshold is 2")
}
// 验证有 debug 日志记录失败次数
debugLogs := host.GetDebugLogs()
hasThresholdLog := false
for _, log := range debugLogs {
if strings.Contains(log, "has not reached the failure threshold") {
hasThresholdLog = true
break
}
}
require.True(t, hasThresholdLog, "Should log that failure threshold not reached")
})
// 单个 token 被摘除后,应使用不可用 token 兜底
t.Run("single token fallback to unavailable when all removed", func(t *testing.T) {
host, status := test.NewTestHost(cooldownSingleTokenConfig)
defer host.Reset()
require.Equal(t, types.OnPluginStartStatusOK, status)
// 触发 429 使唯一的 token 被摘除
host.CallOnHttpRequestHeaders([][2]string{
{":authority", "example.com"},
{":path", "/v1/chat/completions"},
{":method", "POST"},
{"Content-Type", "application/json"},
})
host.CallOnHttpRequestBody([]byte(`{"model":"gpt-3.5-turbo","messages":[{"role":"user","content":"hi"}]}`))
setUpstreamResponse(host)
host.CallOnHttpResponseHeaders([][2]string{
{":status", "429"},
{"Content-Type", "application/json"},
})
host.CompleteHttp()
// 发起新请求,应使用不可用 token 兜底
host.CallOnHttpRequestHeaders([][2]string{
{":authority", "example.com"},
{":path", "/v1/chat/completions"},
{":method", "POST"},
{"Content-Type", "application/json"},
})
requestHeaders := host.GetRequestHeaders()
authValue, hasAuth := test.GetHeaderValue(requestHeaders, "Authorization")
require.True(t, hasAuth, "Authorization header should exist even when all tokens unavailable")
require.Contains(t, authValue, "sk-only-token", "Should fallback to the unavailable token")
// 验证有 warn 日志
warnLogs := host.GetWarnLogs()
hasAllUnavailableLog := false
for _, log := range warnLogs {
if strings.Contains(log, "all tokens are unavailable") {
hasAllUnavailableLog = true
break
}
}
require.True(t, hasAllUnavailableLog, "Should warn that all tokens are unavailable")
})
// 单个 token 被摘除后,冷却到期后恢复,新请求应正常使用
t.Run("single token recovered after cooldown and used in new request", func(t *testing.T) {
host, status := test.NewTestHost(cooldownSingleTokenConfig)
defer host.Reset()
require.Equal(t, types.OnPluginStartStatusOK, status)
// 触发 429 使 token 被摘除
host.CallOnHttpRequestHeaders([][2]string{
{":authority", "example.com"},
{":path", "/v1/chat/completions"},
{":method", "POST"},
{"Content-Type", "application/json"},
})
host.CallOnHttpRequestBody([]byte(`{"model":"gpt-3.5-turbo","messages":[{"role":"user","content":"hi"}]}`))
setUpstreamResponse(host)
host.CallOnHttpResponseHeaders([][2]string{
{":status", "429"},
{"Content-Type", "application/json"},
})
host.CompleteHttp()
// 等待冷却到期
time.Sleep(150 * time.Millisecond)
host.Tick()
// 验证恢复日志
infoLogs := host.GetInfoLogs()
hasRecoveryLog := false
for _, log := range infoLogs {
if strings.Contains(log, "cooldown recovery") {
hasRecoveryLog = true
break
}
}
require.True(t, hasRecoveryLog, "Token should be recovered after cooldown")
// 发起新请求,应正常使用恢复的 token不再有 all tokens unavailable 警告)
host.CallOnHttpRequestHeaders([][2]string{
{":authority", "example.com"},
{":path", "/v1/chat/completions"},
{":method", "POST"},
{"Content-Type", "application/json"},
})
requestHeaders := host.GetRequestHeaders()
authValue, hasAuth := test.GetHeaderValue(requestHeaders, "Authorization")
require.True(t, hasAuth, "Authorization header should exist")
require.Contains(t, authValue, "sk-only-token", "Should use the recovered token")
})
// 两个 token一个被摘除后另一个继续使用
t.Run("second token used after first is removed", func(t *testing.T) {
host, status := test.NewTestHost(cooldownOnlyConfig)
defer host.Reset()
require.Equal(t, types.OnPluginStartStatusOK, status)
// 触发 429 使一个 token 被摘除
host.CallOnHttpRequestHeaders([][2]string{
{":authority", "example.com"},
{":path", "/v1/chat/completions"},
{":method", "POST"},
{"Content-Type", "application/json"},
})
host.CallOnHttpRequestBody([]byte(`{"model":"gpt-3.5-turbo","messages":[{"role":"user","content":"hi"}]}`))
setUpstreamResponse(host)
host.CallOnHttpResponseHeaders([][2]string{
{":status", "429"},
{"Content-Type", "application/json"},
})
host.CompleteHttp()
// 验证 token 被摘除
infoLogs := host.GetInfoLogs()
hasUnavailableLog := false
for _, log := range infoLogs {
if strings.Contains(log, "is unavailable now") {
hasUnavailableLog = true
break
}
}
require.True(t, hasUnavailableLog, "One token should be marked unavailable")
// 发起新请求,应使用剩余的可用 token
host.CallOnHttpRequestHeaders([][2]string{
{":authority", "example.com"},
{":path", "/v1/chat/completions"},
{":method", "POST"},
{"Content-Type", "application/json"},
})
requestHeaders := host.GetRequestHeaders()
authValue, hasAuth := test.GetHeaderValue(requestHeaders, "Authorization")
require.True(t, hasAuth, "Authorization header should exist")
require.True(t, strings.Contains(authValue, "sk-token-"), "Should use one of the configured tokens")
})
// 成功请求应重置失败计数
t.Run("successful request resets failure count", func(t *testing.T) {
resetCountConfig := func() json.RawMessage {
data, _ := json.Marshal(map[string]interface{}{
"provider": map[string]interface{}{
"type": "openai",
"apiTokens": []string{"sk-reset-token"},
"modelMapping": map[string]string{
"*": "gpt-3.5-turbo",
},
"failover": map[string]interface{}{
"enabled": true,
"failureThreshold": 2,
"cooldownDuration": 100,
"failoverOnStatus": []string{"429"},
},
},
})
return data
}()
host, status := test.NewTestHost(resetCountConfig)
defer host.Reset()
require.Equal(t, types.OnPluginStartStatusOK, status)
// 第一次请求 429failureThreshold=2不会摘除
host.CallOnHttpRequestHeaders([][2]string{
{":authority", "example.com"},
{":path", "/v1/chat/completions"},
{":method", "POST"},
{"Content-Type", "application/json"},
})
host.CallOnHttpRequestBody([]byte(`{"model":"gpt-3.5-turbo","messages":[{"role":"user","content":"hi"}]}`))
setUpstreamResponse(host)
host.CallOnHttpResponseHeaders([][2]string{
{":status", "429"},
{"Content-Type", "application/json"},
})
host.CompleteHttp()
// 第二次请求成功200应重置失败计数
host.CallOnHttpRequestHeaders([][2]string{
{":authority", "example.com"},
{":path", "/v1/chat/completions"},
{":method", "POST"},
{"Content-Type", "application/json"},
})
host.CallOnHttpRequestBody([]byte(`{"model":"gpt-3.5-turbo","messages":[{"role":"user","content":"hi"}]}`))
setUpstreamResponse(host)
host.CallOnHttpResponseHeaders([][2]string{
{":status", "200"},
{"Content-Type", "application/json"},
})
host.CompleteHttp()
// 第三次请求 429因为计数已重置不应摘除
host.CallOnHttpRequestHeaders([][2]string{
{":authority", "example.com"},
{":path", "/v1/chat/completions"},
{":method", "POST"},
{"Content-Type", "application/json"},
})
host.CallOnHttpRequestBody([]byte(`{"model":"gpt-3.5-turbo","messages":[{"role":"user","content":"hi"}]}`))
setUpstreamResponse(host)
host.CallOnHttpResponseHeaders([][2]string{
{":status", "429"},
{"Content-Type", "application/json"},
})
// 验证 token 未被摘除
infoLogs := host.GetInfoLogs()
for _, log := range infoLogs {
require.False(t, strings.Contains(log, "is unavailable now"),
"Token should NOT be removed because failure count was reset by successful request")
}
})
})
}