fix: remove non-standard thinking/reasoning_max_tokens from Claude-to… (#3667)

This commit is contained in:
woody
2026-04-01 09:40:14 +08:00
committed by GitHub
parent cd8ed99db5
commit 89587c1c9b
8 changed files with 363 additions and 38 deletions

View File

@@ -228,3 +228,11 @@ func TestConsumerAffinity(t *testing.T) {
test.RunConsumerAffinityParseConfigTests(t)
test.RunConsumerAffinityOnHttpRequestHeadersTests(t)
}
func TestOpenRouter(t *testing.T) {
test.RunOpenRouterClaudeAutoConversionTests(t)
}
func TestZhipuAI(t *testing.T) {
test.RunZhipuAIClaudeAutoConversionTests(t)
}

View File

@@ -187,14 +187,16 @@ func (c *ClaudeToOpenAIConverter) ConvertClaudeRequestToOpenAI(body []byte) ([]b
}
// Convert thinking configuration if present
// Only set standard OpenAI fields (reasoning_effort). Non-standard fields like
// "thinking" and "reasoning_max_tokens" are NOT set here because they are not
// recognized by OpenAI/Azure and will cause 400 errors. Providers that need
// these non-standard fields (e.g., ZhipuAI) should handle them in their own
// OnRequestBody implementation.
if claudeRequest.Thinking != nil {
log.Debugf("[Claude->OpenAI] Found thinking config: type=%s, budget_tokens=%d",
claudeRequest.Thinking.Type, claudeRequest.Thinking.BudgetTokens)
if claudeRequest.Thinking.Type == "enabled" {
openaiRequest.ReasoningMaxTokens = claudeRequest.Thinking.BudgetTokens
openaiRequest.Thinking = &thinkingParam{Type: "enabled", BudgetToken: claudeRequest.Thinking.BudgetTokens}
// Set ReasoningEffort based on budget_tokens
// low: <4096, medium: >=4096 and <16384, high: >=16384
if claudeRequest.Thinking.BudgetTokens < 4096 {
@@ -205,14 +207,9 @@ func (c *ClaudeToOpenAIConverter) ConvertClaudeRequestToOpenAI(body []byte) ([]b
openaiRequest.ReasoningEffort = "high"
}
log.Debugf("[Claude->OpenAI] Converted thinking config: budget_tokens=%d, reasoning_effort=%s, reasoning_max_tokens=%d",
claudeRequest.Thinking.BudgetTokens, openaiRequest.ReasoningEffort, openaiRequest.ReasoningMaxTokens)
log.Debugf("[Claude->OpenAI] Converted thinking config: budget_tokens=%d, reasoning_effort=%s",
claudeRequest.Thinking.BudgetTokens, openaiRequest.ReasoningEffort)
}
} else {
// Explicitly disable thinking when not configured in Claude request
// This prevents providers like ZhipuAI from enabling thinking by default
openaiRequest.Thinking = &thinkingParam{Type: "disabled"}
log.Debugf("[Claude->OpenAI] No thinking config found, explicitly disabled")
}
result, err := json.Marshal(openaiRequest)

View File

@@ -642,11 +642,9 @@ func TestClaudeToOpenAIConverter_ConvertThinkingConfig(t *testing.T) {
converter := &ClaudeToOpenAIConverter{}
tests := []struct {
name string
claudeRequest string
expectedMaxTokens int
expectedEffort string
expectThinkingConfig bool
name string
claudeRequest string
expectedEffort string
}{
{
name: "thinking_enabled_low",
@@ -656,9 +654,7 @@ func TestClaudeToOpenAIConverter_ConvertThinkingConfig(t *testing.T) {
"messages": [{"role": "user", "content": "Hello"}],
"thinking": {"type": "enabled", "budget_tokens": 2048}
}`,
expectedMaxTokens: 2048,
expectedEffort: "low",
expectThinkingConfig: true,
expectedEffort: "low",
},
{
name: "thinking_enabled_medium",
@@ -668,9 +664,7 @@ func TestClaudeToOpenAIConverter_ConvertThinkingConfig(t *testing.T) {
"messages": [{"role": "user", "content": "Hello"}],
"thinking": {"type": "enabled", "budget_tokens": 8192}
}`,
expectedMaxTokens: 8192,
expectedEffort: "medium",
expectThinkingConfig: true,
expectedEffort: "medium",
},
{
name: "thinking_enabled_high",
@@ -680,9 +674,7 @@ func TestClaudeToOpenAIConverter_ConvertThinkingConfig(t *testing.T) {
"messages": [{"role": "user", "content": "Hello"}],
"thinking": {"type": "enabled", "budget_tokens": 20480}
}`,
expectedMaxTokens: 20480,
expectedEffort: "high",
expectThinkingConfig: true,
expectedEffort: "high",
},
{
name: "thinking_disabled",
@@ -692,9 +684,7 @@ func TestClaudeToOpenAIConverter_ConvertThinkingConfig(t *testing.T) {
"messages": [{"role": "user", "content": "Hello"}],
"thinking": {"type": "disabled"}
}`,
expectedMaxTokens: 0,
expectedEffort: "",
expectThinkingConfig: false,
expectedEffort: "",
},
{
name: "no_thinking",
@@ -703,9 +693,7 @@ func TestClaudeToOpenAIConverter_ConvertThinkingConfig(t *testing.T) {
"max_tokens": 1000,
"messages": [{"role": "user", "content": "Hello"}]
}`,
expectedMaxTokens: 0,
expectedEffort: "",
expectThinkingConfig: false,
expectedEffort: "",
},
}
@@ -719,13 +707,23 @@ func TestClaudeToOpenAIConverter_ConvertThinkingConfig(t *testing.T) {
err = json.Unmarshal(result, &openaiRequest)
assert.NoError(t, err)
if tt.expectThinkingConfig {
assert.Equal(t, tt.expectedMaxTokens, openaiRequest.ReasoningMaxTokens)
assert.Equal(t, tt.expectedEffort, openaiRequest.ReasoningEffort)
} else {
assert.Equal(t, 0, openaiRequest.ReasoningMaxTokens)
assert.Equal(t, "", openaiRequest.ReasoningEffort)
}
assert.Equal(t, tt.expectedEffort, openaiRequest.ReasoningEffort)
// Verify non-standard fields are NEVER set in the converted request.
// These fields are not recognized by OpenAI/Azure and would cause 400 errors.
assert.Equal(t, 0, openaiRequest.ReasoningMaxTokens,
"reasoning_max_tokens must not be set - it is not a standard OpenAI parameter")
assert.Nil(t, openaiRequest.Thinking,
"thinking must not be set - it is not a standard OpenAI parameter")
// Also verify at the raw JSON level to catch any serialization issues
var rawJSON map[string]interface{}
err = json.Unmarshal(result, &rawJSON)
require.NoError(t, err)
assert.NotContains(t, rawJSON, "thinking",
"raw JSON must not contain 'thinking' field")
assert.NotContains(t, rawJSON, "reasoning_max_tokens",
"raw JSON must not contain 'reasoning_max_tokens' field")
})
}
}
@@ -930,7 +928,6 @@ func TestClaudeToOpenAIConverter_StripCchFromSystemMessage(t *testing.T) {
assert.Equal(t, "You are a helpful assistant.", systemMsg.Content)
})
}
func TestStripCchFromBillingHeader(t *testing.T) {
tests := []struct {
name string

View File

@@ -79,6 +79,22 @@ func (o *openrouterProvider) TransformRequestBody(ctx wrapper.HttpContext, apiNa
// Check if ReasoningMaxTokens exists in the request body
reasoningMaxTokens := gjson.GetBytes(body, "reasoning_max_tokens")
if !reasoningMaxTokens.Exists() || reasoningMaxTokens.Int() == 0 {
// Check if budget_tokens was stored in context (from Claude auto-conversion path)
// Only use it when thinking was explicitly enabled, to avoid dirty input
if thinkingType, _ := ctx.GetContext(ctxKeyClaudeThinkingType).(string); thinkingType == "enabled" {
if budgetTokens, ok := ctx.GetContext(ctxKeyClaudeBudgetTokens).(int); ok && budgetTokens > 0 {
// Use budget_tokens from Claude thinking config
modifiedBody, err := sjson.DeleteBytes(body, "reasoning_effort")
if err != nil {
modifiedBody = body
}
modifiedBody, err = sjson.SetBytes(modifiedBody, "reasoning.max_tokens", budgetTokens)
if err != nil {
return nil, err
}
return o.config.defaultTransformRequestBody(ctx, apiName, modifiedBody)
}
}
// No reasoning_max_tokens, use default transformation
return o.config.defaultTransformRequestBody(ctx, apiName, body)
}

View File

@@ -168,6 +168,8 @@ const (
finishReasonLength = "length"
finishReasonToolCall = "tool_calls"
ctxKeyClaudeBudgetTokens = "claudeBudgetTokens"
ctxKeyClaudeThinkingType = "claudeThinkingType"
ctxKeyIncrementalStreaming = "incrementalStreaming"
ctxKeyApiKey = "apiKey"
CtxKeyApiName = "apiName"
@@ -1155,6 +1157,21 @@ func (c *ProviderConfig) handleRequestBody(
// If main.go detected a Claude request that needs conversion, convert the body
needClaudeConversion, _ := ctx.GetContext("needClaudeResponseConversion").(bool)
if needClaudeConversion {
// Extract thinking config from original Claude body before conversion,
// so downstream providers (OpenRouter, ZhipuAI) can access it.
thinkingType := gjson.GetBytes(body, "thinking.type").String()
if thinkingType == "" {
// Claude request had no thinking field at all - treat as disabled
thinkingType = "disabled"
}
ctx.SetContext(ctxKeyClaudeThinkingType, thinkingType)
// Only extract budget_tokens when thinking is explicitly enabled
if thinkingType == "enabled" {
if budgetTokens := gjson.GetBytes(body, "thinking.budget_tokens").Int(); budgetTokens > 0 {
ctx.SetContext(ctxKeyClaudeBudgetTokens, int(budgetTokens))
}
}
// Convert Claude protocol to OpenAI protocol
converter := &ClaudeToOpenAIConverter{}
body, err = converter.ConvertClaudeRequestToOpenAI(body)

View File

@@ -95,6 +95,10 @@ func (m *zhipuAiProvider) TransformRequestBody(ctx wrapper.HttpContext, apiName
body, _ = sjson.SetBytes(body, "thinking", map[string]string{"type": "enabled"})
// Remove reasoning_effort field as ZhipuAI doesn't recognize it
body, _ = sjson.DeleteBytes(body, "reasoning_effort")
} else if thinkingType, ok := ctx.GetContext(ctxKeyClaudeThinkingType).(string); ok && thinkingType != "enabled" {
// Request came from Claude auto-conversion with thinking explicitly disabled or absent.
// Explicitly set thinking=disabled to prevent ZhipuAI from enabling it by default.
body, _ = sjson.SetBytes(body, "thinking", map[string]string{"type": "disabled"})
}
return m.config.defaultTransformRequestBody(ctx, apiName, body)

View File

@@ -0,0 +1,148 @@
package test
import (
"encoding/json"
"testing"
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
"github.com/higress-group/wasm-go/pkg/test"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
var basicOpenRouterConfig = func() json.RawMessage {
data, _ := json.Marshal(map[string]interface{}{
"provider": map[string]interface{}{
"type": "openrouter",
"apiTokens": []string{"sk-openrouter-test"},
},
})
return data
}()
func RunOpenRouterClaudeAutoConversionTests(t *testing.T) {
test.RunGoTest(t, func(t *testing.T) {
t.Run("claude thinking budget_tokens is converted to reasoning.max_tokens", func(t *testing.T) {
host, status := test.NewTestHost(basicOpenRouterConfig)
defer host.Reset()
require.Equal(t, types.OnPluginStartStatusOK, status)
// Send request with Claude /v1/messages path to trigger auto-conversion
action := host.CallOnHttpRequestHeaders([][2]string{
{":authority", "example.com"},
{":path", "/v1/messages"},
{":method", "POST"},
{"Content-Type", "application/json"},
})
require.Equal(t, types.HeaderStopIteration, action)
// Claude request body with thinking enabled
requestBody := `{
"model": "anthropic/claude-sonnet-4",
"max_tokens": 8000,
"messages": [{"role": "user", "content": "Hello"}],
"thinking": {"type": "enabled", "budget_tokens": 10000}
}`
action = host.CallOnHttpRequestBody([]byte(requestBody))
require.Equal(t, types.ActionContinue, action)
transformedBody := host.GetRequestBody()
require.NotNil(t, transformedBody)
var bodyMap map[string]interface{}
err := json.Unmarshal(transformedBody, &bodyMap)
require.NoError(t, err)
// reasoning.max_tokens should be set from budget_tokens
reasoning, ok := bodyMap["reasoning"].(map[string]interface{})
require.True(t, ok, "reasoning field should be present")
assert.Equal(t, float64(10000), reasoning["max_tokens"],
"reasoning.max_tokens should preserve the original budget_tokens value")
// reasoning_effort should be removed (OpenRouter uses reasoning.max_tokens instead)
assert.NotContains(t, bodyMap, "reasoning_effort",
"reasoning_effort should be removed")
// Non-standard fields should not be present
assert.NotContains(t, bodyMap, "thinking",
"thinking should not be in the final request")
assert.NotContains(t, bodyMap, "reasoning_max_tokens",
"reasoning_max_tokens should not be in the final request")
})
t.Run("claude without thinking uses default transformation", func(t *testing.T) {
host, status := test.NewTestHost(basicOpenRouterConfig)
defer host.Reset()
require.Equal(t, types.OnPluginStartStatusOK, status)
action := host.CallOnHttpRequestHeaders([][2]string{
{":authority", "example.com"},
{":path", "/v1/messages"},
{":method", "POST"},
{"Content-Type", "application/json"},
})
require.Equal(t, types.HeaderStopIteration, action)
requestBody := `{
"model": "anthropic/claude-sonnet-4",
"max_tokens": 1000,
"messages": [{"role": "user", "content": "Hello"}]
}`
action = host.CallOnHttpRequestBody([]byte(requestBody))
require.Equal(t, types.ActionContinue, action)
transformedBody := host.GetRequestBody()
require.NotNil(t, transformedBody)
var bodyMap map[string]interface{}
err := json.Unmarshal(transformedBody, &bodyMap)
require.NoError(t, err)
// No reasoning fields should be present
assert.NotContains(t, bodyMap, "reasoning")
assert.NotContains(t, bodyMap, "reasoning_effort")
assert.NotContains(t, bodyMap, "thinking")
assert.NotContains(t, bodyMap, "reasoning_max_tokens")
})
t.Run("claude thinking disabled does not set reasoning", func(t *testing.T) {
host, status := test.NewTestHost(basicOpenRouterConfig)
defer host.Reset()
require.Equal(t, types.OnPluginStartStatusOK, status)
action := host.CallOnHttpRequestHeaders([][2]string{
{":authority", "example.com"},
{":path", "/v1/messages"},
{":method", "POST"},
{"Content-Type", "application/json"},
})
require.Equal(t, types.HeaderStopIteration, action)
// thinking disabled with budget_tokens (dirty input)
requestBody := `{
"model": "anthropic/claude-sonnet-4",
"max_tokens": 1000,
"messages": [{"role": "user", "content": "Hello"}],
"thinking": {"type": "disabled", "budget_tokens": 5000}
}`
action = host.CallOnHttpRequestBody([]byte(requestBody))
require.Equal(t, types.ActionContinue, action)
transformedBody := host.GetRequestBody()
require.NotNil(t, transformedBody)
var bodyMap map[string]interface{}
err := json.Unmarshal(transformedBody, &bodyMap)
require.NoError(t, err)
// Should NOT have reasoning.max_tokens since thinking was disabled
assert.NotContains(t, bodyMap, "reasoning",
"reasoning should not be set when thinking is disabled")
assert.NotContains(t, bodyMap, "thinking")
assert.NotContains(t, bodyMap, "reasoning_max_tokens")
})
})
}

View File

@@ -0,0 +1,138 @@
package test
import (
"encoding/json"
"testing"
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
"github.com/higress-group/wasm-go/pkg/test"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
var basicZhipuAIConfig = func() json.RawMessage {
data, _ := json.Marshal(map[string]interface{}{
"provider": map[string]interface{}{
"type": "zhipuai",
"apiTokens": []string{"sk-zhipuai-test"},
},
})
return data
}()
func RunZhipuAIClaudeAutoConversionTests(t *testing.T) {
test.RunGoTest(t, func(t *testing.T) {
t.Run("claude thinking enabled sets thinking enabled for zhipuai", func(t *testing.T) {
host, status := test.NewTestHost(basicZhipuAIConfig)
defer host.Reset()
require.Equal(t, types.OnPluginStartStatusOK, status)
action := host.CallOnHttpRequestHeaders([][2]string{
{":authority", "example.com"},
{":path", "/v1/messages"},
{":method", "POST"},
{"Content-Type", "application/json"},
})
require.Equal(t, types.HeaderStopIteration, action)
requestBody := `{
"model": "glm-4",
"max_tokens": 1000,
"messages": [{"role": "user", "content": "Hello"}],
"thinking": {"type": "enabled", "budget_tokens": 8192}
}`
action = host.CallOnHttpRequestBody([]byte(requestBody))
require.Equal(t, types.ActionContinue, action)
transformedBody := host.GetRequestBody()
require.NotNil(t, transformedBody)
var bodyMap map[string]interface{}
err := json.Unmarshal(transformedBody, &bodyMap)
require.NoError(t, err)
// ZhipuAI should have thinking=enabled (converted from reasoning_effort)
thinking, ok := bodyMap["thinking"].(map[string]interface{})
require.True(t, ok, "thinking field should be present")
assert.Equal(t, "enabled", thinking["type"])
// reasoning_effort should be removed (ZhipuAI doesn't recognize it)
assert.NotContains(t, bodyMap, "reasoning_effort")
})
t.Run("claude without thinking sets thinking disabled for zhipuai", func(t *testing.T) {
host, status := test.NewTestHost(basicZhipuAIConfig)
defer host.Reset()
require.Equal(t, types.OnPluginStartStatusOK, status)
action := host.CallOnHttpRequestHeaders([][2]string{
{":authority", "example.com"},
{":path", "/v1/messages"},
{":method", "POST"},
{"Content-Type", "application/json"},
})
require.Equal(t, types.HeaderStopIteration, action)
requestBody := `{
"model": "glm-4",
"max_tokens": 1000,
"messages": [{"role": "user", "content": "Hello"}]
}`
action = host.CallOnHttpRequestBody([]byte(requestBody))
require.Equal(t, types.ActionContinue, action)
transformedBody := host.GetRequestBody()
require.NotNil(t, transformedBody)
var bodyMap map[string]interface{}
err := json.Unmarshal(transformedBody, &bodyMap)
require.NoError(t, err)
// ZhipuAI should explicitly set thinking=disabled
thinking, ok := bodyMap["thinking"].(map[string]interface{})
require.True(t, ok, "thinking field should be present for disabled state")
assert.Equal(t, "disabled", thinking["type"])
})
t.Run("claude thinking disabled sets thinking disabled for zhipuai", func(t *testing.T) {
host, status := test.NewTestHost(basicZhipuAIConfig)
defer host.Reset()
require.Equal(t, types.OnPluginStartStatusOK, status)
action := host.CallOnHttpRequestHeaders([][2]string{
{":authority", "example.com"},
{":path", "/v1/messages"},
{":method", "POST"},
{"Content-Type", "application/json"},
})
require.Equal(t, types.HeaderStopIteration, action)
requestBody := `{
"model": "glm-4",
"max_tokens": 1000,
"messages": [{"role": "user", "content": "Hello"}],
"thinking": {"type": "disabled"}
}`
action = host.CallOnHttpRequestBody([]byte(requestBody))
require.Equal(t, types.ActionContinue, action)
transformedBody := host.GetRequestBody()
require.NotNil(t, transformedBody)
var bodyMap map[string]interface{}
err := json.Unmarshal(transformedBody, &bodyMap)
require.NoError(t, err)
// ZhipuAI should explicitly set thinking=disabled
thinking, ok := bodyMap["thinking"].(map[string]interface{})
require.True(t, ok, "thinking field should be present for disabled state")
assert.Equal(t, "disabled", thinking["type"])
// No reasoning fields
assert.NotContains(t, bodyMap, "reasoning_effort")
})
})
}