From 385f8d8b4e2a8b2640112802778b0e39df335130 Mon Sep 17 00:00:00 2001 From: Jingze <52855280+Jing-ze@users.noreply.github.com> Date: Thu, 28 May 2026 09:54:56 +0800 Subject: [PATCH] fix(ai-proxy): natively route Anthropic Messages to vertex :rawPredict (#3860) Signed-off-by: jingze Co-authored-by: Claude Opus 4.7 --- .../extensions/ai-proxy/provider/vertex.go | 78 ++++- .../ai-proxy/provider/vertex_test.go | 281 ++++++++++++++++++ 2 files changed, 352 insertions(+), 7 deletions(-) diff --git a/plugins/wasm-go/extensions/ai-proxy/provider/vertex.go b/plugins/wasm-go/extensions/ai-proxy/provider/vertex.go index b1c213784..09bb602b9 100644 --- a/plugins/wasm-go/extensions/ai-proxy/provider/vertex.go +++ b/plugins/wasm-go/extensions/ai-proxy/provider/vertex.go @@ -102,12 +102,13 @@ func (v *vertexProviderInitializer) ValidateConfig(config *ProviderConfig) error func (v *vertexProviderInitializer) DefaultCapabilities() map[string]string { return map[string]string{ - string(ApiNameChatCompletion): vertexPathTemplate, - string(ApiNameEmbeddings): vertexPathTemplate, - string(ApiNameImageGeneration): vertexPathTemplate, - string(ApiNameImageEdit): vertexPathTemplate, - string(ApiNameImageVariation): vertexPathTemplate, - string(ApiNameVertexRaw): "", // 空字符串表示保持原路径,不做路径转换 + string(ApiNameChatCompletion): vertexPathTemplate, + string(ApiNameEmbeddings): vertexPathTemplate, + string(ApiNameImageGeneration): vertexPathTemplate, + string(ApiNameImageEdit): vertexPathTemplate, + string(ApiNameImageVariation): vertexPathTemplate, + string(ApiNameAnthropicMessages): vertexPathAnthropicTemplate, // 原生支持 Anthropic Messages API, 透传到 :rawPredict + string(ApiNameVertexRaw): "", // 空字符串表示保持原路径,不做路径转换 } } @@ -195,6 +196,12 @@ func (v *vertexProvider) TransformRequestHeaders(ctx wrapper.HttpContext, apiNam } util.OverwriteRequestHostHeader(headers, finalVertexDomain) + + // 剥除 Anthropic 客户端可能携带的凭据头, 避免泄漏到 Google. + // vertex 一律用 OAuth Bearer (标准模式) 或 ?key= (Express 模式) 鉴权, + // 这些头对 vertex 没有任何意义, 留着只会把 sk-ant-... 这类密钥转发到上游日志. + headers.Del("x-api-key") + headers.Del("anthropic-api-key") } func (v *vertexProvider) getToken() (cached bool, err error) { @@ -295,7 +302,10 @@ func (v *vertexProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, headers := util.GetRequestHeaders() // OpenAI 兼容模式: 不转换请求体,只设置路径和进行模型映射 - if v.isOpenAICompatibleMode() { + // 注意: Anthropic Messages API (/v1/messages) 一律走 native passthrough, + // 不受 vertexOpenAICompatible 配置影响 —— vertex 的 OpenAI 兼容端点只为 Gemini 设计, + // 用它转译 Claude 请求是无谓的 OpenAI 中转, 还会丢失 Anthropic 特有字段. + if v.isOpenAICompatibleMode() && apiName != ApiNameAnthropicMessages { ctx.SetContext(contextOpenAICompatibleMarker, true) body, err := v.onOpenAICompatibleRequestBody(ctx, apiName, body, headers) if err != nil { @@ -346,6 +356,8 @@ func (v *vertexProvider) TransformRequestBodyHeaders(ctx wrapper.HttpContext, ap switch apiName { case ApiNameChatCompletion: return v.onChatCompletionRequestBody(ctx, body, headers) + case ApiNameAnthropicMessages: + return v.onAnthropicMessagesRequestBody(ctx, body, headers) case ApiNameEmbeddings: return v.onEmbeddingsRequestBody(ctx, body, headers) case ApiNameImageGeneration: @@ -385,6 +397,48 @@ func (v *vertexProvider) onOpenAICompatibleRequestBody(ctx wrapper.HttpContext, return body, nil } +// onAnthropicMessagesRequestBody 处理 /v1/messages 请求, 透传 Anthropic body 到 vertex 的 +// :rawPredict / :streamRawPredict 端点. 不做任何协议转换, 仅做必要的 vertex-side adjustment: +// 1. 模型映射 (modelMapping) —— vertex 上 Claude 模型必须用全限定名 (e.g. claude-sonnet-4@20250514) +// 2. 构造 :rawPredict / :streamRawPredict path +// 3. 删除 body 里的 "model" 字段 (vertex Anthropic 端点不接受 body 里的 model) +// 4. 注入 "anthropic_version": "vertex-2023-10-16" +// +// 这条路径让 builtin tool (web_search_*, bash_*, computer_*, text_editor_*, code_execution_*) +// 的 `type` 字段以及 custom tool 的 cache_control / thinking block 等 Anthropic 特有字段 +// 全部原样传到上游, 不会触发 `tools.0.custom.name` 这类校验错误. +func (v *vertexProvider) onAnthropicMessagesRequestBody(ctx wrapper.HttpContext, body []byte, headers http.Header) ([]byte, error) { + stream := gjson.GetBytes(body, "stream").Bool() + + model := gjson.GetBytes(body, "model").String() + if err := v.config.mapModel(ctx, &model); err != nil { + return nil, err + } + + path := v.getAhthropicRequestPath(ctx, ApiNameAnthropicMessages, model, stream) + util.OverwriteRequestPathHeader(headers, path) + + body, err := sjson.DeleteBytes(body, "model") + if err != nil { + return nil, fmt.Errorf("unable to strip model from anthropic body: %v", err) + } + body, err = sjson.SetBytes(body, "anthropic_version", vertexAnthropicVersion) + if err != nil { + return nil, fmt.Errorf("unable to inject anthropic_version: %v", err) + } + + // vertex Anthropic 端点要求 max_tokens 必填, 客户端漏传会被 400. + // 跟 claude provider buildClaudeTextGenRequest 保持一致, 缺省补 claudeDefaultMaxTokens. + if !gjson.GetBytes(body, "max_tokens").Exists() { + body, err = sjson.SetBytes(body, "max_tokens", claudeDefaultMaxTokens) + if err != nil { + return nil, fmt.Errorf("unable to inject default max_tokens: %v", err) + } + } + + return body, nil +} + func (v *vertexProvider) onChatCompletionRequestBody(ctx wrapper.HttpContext, body []byte, headers http.Header) ([]byte, error) { request := &chatCompletionRequest{} err := v.config.parseRequestAndMapModel(ctx, request, body) @@ -654,6 +708,11 @@ func (v *vertexProvider) parseImageSize(size string) (aspectRatio, imageSize str } func (v *vertexProvider) OnStreamingResponseBody(ctx wrapper.HttpContext, name ApiName, chunk []byte, isLastChunk bool) ([]byte, error) { + // Anthropic Messages API: vertex 的 :streamRawPredict 已经返回标准 Anthropic SSE, 原样透传 + if name == ApiNameAnthropicMessages { + return chunk, nil + } + // OpenAI 兼容模式: 透传响应,但需要解码 Unicode 转义序列 // Vertex AI OpenAI-compatible API 返回 ASCII-safe JSON,将非 ASCII 字符编码为 \uXXXX if ctx.GetContext(contextOpenAICompatibleMarker) != nil && ctx.GetContext(contextOpenAICompatibleMarker).(bool) { @@ -733,6 +792,11 @@ func (v *vertexProvider) OnStreamingResponseBody(ctx wrapper.HttpContext, name A } func (v *vertexProvider) TransformResponseBody(ctx wrapper.HttpContext, apiName ApiName, body []byte) ([]byte, error) { + // Anthropic Messages API: vertex 的 :rawPredict 已经返回标准 Anthropic JSON, 原样透传 + if apiName == ApiNameAnthropicMessages { + return body, nil + } + // OpenAI 兼容模式: 透传响应,但需要解码 Unicode 转义序列 // Vertex AI OpenAI-compatible API 返回 ASCII-safe JSON,将非 ASCII 字符编码为 \uXXXX if ctx.GetContext(contextOpenAICompatibleMarker) != nil && ctx.GetContext(contextOpenAICompatibleMarker).(bool) { diff --git a/plugins/wasm-go/extensions/ai-proxy/provider/vertex_test.go b/plugins/wasm-go/extensions/ai-proxy/provider/vertex_test.go index c9113d40e..716f14844 100644 --- a/plugins/wasm-go/extensions/ai-proxy/provider/vertex_test.go +++ b/plugins/wasm-go/extensions/ai-proxy/provider/vertex_test.go @@ -1,11 +1,13 @@ package provider import ( + "net/http" "testing" "github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy/util" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "github.com/tidwall/gjson" ) func TestAppendOrReplaceAPIKey(t *testing.T) { @@ -292,3 +294,282 @@ func TestVertexProviderApplyResponseFormatNilSafety(t *testing.T) { require.NoError(t, v.applyResponseFormatToGenerationConfig(nil, &vertexChatGenerationConfig{}, "gemini-2.5-flash")) require.NoError(t, v.applyResponseFormatToGenerationConfig(map[string]interface{}{}, &vertexChatGenerationConfig{}, "gemini-2.5-flash")) } + +// newAnthropicVertexProvider builds a vertexProvider with project/region/modelMapping +// suitable for exercising onAnthropicMessagesRequestBody without OAuth or wasm runtime. +func newAnthropicVertexProvider(openAICompat bool) *vertexProvider { + cfg := ProviderConfig{ + vertexProjectId: "test-proj", + vertexRegion: "us-east5", + vertexOpenAICompatible: openAICompat, + modelMapping: map[string]string{ + "claude-sonnet-4": "claude-sonnet-4@20250514", + "claude-sonnet-4-5": "claude-sonnet-4-5@20250929", + }, + } + return &vertexProvider{config: cfg} +} + +// TestVertexAnthropicPassthrough_BuiltinTool_TypePreserved is the core regression test +// for the original bug: builtin Anthropic tools (e.g. web_search_20250305) carry only +// a `type` discriminator and no `name`. The previous Anthropic→OpenAI→Anthropic round +// trip lost the type field, producing `tools.0.custom.name: String should have at least +// 1 character` from vertex. After the fix, the body is passthrough — `type` survives. +func TestVertexAnthropicPassthrough_BuiltinTool_TypePreserved(t *testing.T) { + v := newAnthropicVertexProvider(false) + ctx := newMapCtx() + headers := http.Header{} + body := []byte(`{ + "model": "claude-sonnet-4", + "max_tokens": 4096, + "messages": [{"role": "user", "content": "search the web"}], + "tools": [ + {"type": "web_search_20250305"}, + {"type": "bash_20250124"}, + {"type": "text_editor_20250124"} + ] + }`) + + out, err := v.onAnthropicMessagesRequestBody(ctx, body, headers) + require.NoError(t, err) + + // Path: non-stream → :rawPredict, model fully-qualified via modelMapping. + assert.Equal(t, + "/v1/projects/test-proj/locations/us-east5/publishers/anthropic/models/claude-sonnet-4@20250514:rawPredict", + headers.Get(":path")) + + // Body: model stripped, anthropic_version injected. + assert.False(t, gjson.GetBytes(out, "model").Exists(), "model must be stripped (vertex :rawPredict rejects it)") + assert.Equal(t, vertexAnthropicVersion, gjson.GetBytes(out, "anthropic_version").String()) + + // The bug-defining assertion: builtin tool `type` survives verbatim, and we did + // NOT manufacture a `name` for it. If a future change re-introduces the lossy + // conversion, the type will disappear or a synthetic name will appear and this + // test will fail. + tools := gjson.GetBytes(out, "tools").Array() + require.Len(t, tools, 3) + assert.Equal(t, "web_search_20250305", tools[0].Get("type").String()) + assert.False(t, tools[0].Get("name").Exists(), "builtin tool must not have a synthetic name") + assert.Equal(t, "bash_20250124", tools[1].Get("type").String()) + assert.Equal(t, "text_editor_20250124", tools[2].Get("type").String()) +} + +// TestVertexAnthropicPassthrough_StreamPath verifies stream=true routes to +// :streamRawPredict and stream=false routes to :rawPredict. +func TestVertexAnthropicPassthrough_StreamPath(t *testing.T) { + t.Run("stream true → streamRawPredict", func(t *testing.T) { + v := newAnthropicVertexProvider(false) + ctx := newMapCtx() + headers := http.Header{} + body := []byte(`{"model":"claude-sonnet-4","max_tokens":16,"stream":true,"messages":[{"role":"user","content":"hi"}]}`) + + _, err := v.onAnthropicMessagesRequestBody(ctx, body, headers) + require.NoError(t, err) + + assert.Equal(t, + "/v1/projects/test-proj/locations/us-east5/publishers/anthropic/models/claude-sonnet-4@20250514:streamRawPredict", + headers.Get(":path")) + }) + + t.Run("stream false → rawPredict", func(t *testing.T) { + v := newAnthropicVertexProvider(false) + ctx := newMapCtx() + headers := http.Header{} + body := []byte(`{"model":"claude-sonnet-4","max_tokens":16,"messages":[{"role":"user","content":"hi"}]}`) + + _, err := v.onAnthropicMessagesRequestBody(ctx, body, headers) + require.NoError(t, err) + + assert.Equal(t, + "/v1/projects/test-proj/locations/us-east5/publishers/anthropic/models/claude-sonnet-4@20250514:rawPredict", + headers.Get(":path")) + }) +} + +// TestVertexAnthropicPassthrough_ModelMappingUnconfigured verifies that when no +// mapping entry matches, the model name is left untouched (vertex will 404 — we +// don't second-guess the user's config here). +func TestVertexAnthropicPassthrough_ModelMappingUnconfigured(t *testing.T) { + v := &vertexProvider{config: ProviderConfig{ + vertexProjectId: "test-proj", + vertexRegion: "us-east5", + // no modelMapping configured + }} + ctx := newMapCtx() + headers := http.Header{} + body := []byte(`{"model":"claude-sonnet-4","max_tokens":16,"messages":[{"role":"user","content":"hi"}]}`) + + _, err := v.onAnthropicMessagesRequestBody(ctx, body, headers) + require.NoError(t, err) + + // model name passes through as-is (no @date suffix) + assert.Equal(t, + "/v1/projects/test-proj/locations/us-east5/publishers/anthropic/models/claude-sonnet-4:rawPredict", + headers.Get(":path")) +} + +// TestVertexAnthropicPassthrough_CustomToolFieldsPreserved verifies that +// custom tool fields not in the OpenAI schema (cache_control, thinking config, +// arbitrary input_schema shapes) survive the passthrough — they were silently +// dropped by the old double-conversion path. +func TestVertexAnthropicPassthrough_CustomToolFieldsPreserved(t *testing.T) { + v := newAnthropicVertexProvider(false) + ctx := newMapCtx() + headers := http.Header{} + body := []byte(`{ + "model": "claude-sonnet-4", + "max_tokens": 1024, + "messages": [{"role": "user", "content": "list files"}], + "tools": [{ + "name": "Bash", + "description": "run a shell command", + "input_schema": { + "type": "object", + "properties": {"command": {"type": "string"}}, + "required": ["command"] + }, + "cache_control": {"type": "ephemeral"} + }], + "thinking": {"type": "enabled", "budget_tokens": 1024} + }`) + + out, err := v.onAnthropicMessagesRequestBody(ctx, body, headers) + require.NoError(t, err) + + tool := gjson.GetBytes(out, "tools.0") + assert.Equal(t, "Bash", tool.Get("name").String()) + assert.Equal(t, "ephemeral", tool.Get("cache_control.type").String(), "cache_control must survive passthrough") + assert.Equal(t, "object", tool.Get("input_schema.type").String()) + assert.Equal(t, "command", tool.Get("input_schema.required.0").String()) + + thinking := gjson.GetBytes(out, "thinking") + assert.Equal(t, "enabled", thinking.Get("type").String(), "thinking config must survive passthrough") + assert.Equal(t, int64(1024), thinking.Get("budget_tokens").Int()) +} + +// TestVertexAnthropicPassthrough_OpenAICompatibleConfigDoesNotInterfere verifies +// the contract from the plan: vertexOpenAICompatible: true affects ONLY +// chat/completions; /v1/messages still goes to the Anthropic native endpoint. +// (We exercise the handler that TransformRequestBodyHeaders dispatches to; +// see OnRequestBody:302 for the bypass condition itself.) +func TestVertexAnthropicPassthrough_OpenAICompatibleConfigDoesNotInterfere(t *testing.T) { + v := newAnthropicVertexProvider(true) // vertexOpenAICompatible: true + ctx := newMapCtx() + headers := http.Header{} + body := []byte(`{"model":"claude-sonnet-4","max_tokens":16,"messages":[{"role":"user","content":"hi"}]}`) + + out, err := v.onAnthropicMessagesRequestBody(ctx, body, headers) + require.NoError(t, err) + + // Must be Anthropic native path, NOT /v1beta1/.../openai/chat/completions. + assert.Contains(t, headers.Get(":path"), "publishers/anthropic/models/") + assert.Contains(t, headers.Get(":path"), ":rawPredict") + assert.NotContains(t, headers.Get(":path"), "/openai/") + assert.Equal(t, vertexAnthropicVersion, gjson.GetBytes(out, "anthropic_version").String()) +} + +// TestVertexAnthropicPassthrough_DispatchedFromTransformRequestBodyHeaders covers +// the wiring step: TransformRequestBodyHeaders sees ApiNameAnthropicMessages and +// routes to the passthrough handler. Guards against accidental removal of the +// case branch. +func TestVertexAnthropicPassthrough_DispatchedFromTransformRequestBodyHeaders(t *testing.T) { + v := newAnthropicVertexProvider(false) + ctx := newMapCtx() + headers := http.Header{} + body := []byte(`{"model":"claude-sonnet-4","max_tokens":16,"messages":[{"role":"user","content":"hi"}],"tools":[{"type":"web_search_20250305"}]}`) + + out, err := v.TransformRequestBodyHeaders(ctx, ApiNameAnthropicMessages, body, headers) + require.NoError(t, err) + + assert.Contains(t, headers.Get(":path"), ":rawPredict") + assert.Equal(t, "web_search_20250305", gjson.GetBytes(out, "tools.0.type").String()) + assert.False(t, gjson.GetBytes(out, "model").Exists()) +} + +// TestVertexAnthropicPassthrough_ResponseBodyUnchanged verifies the non-stream +// response branch: TransformResponseBody returns the body verbatim for +// ApiNameAnthropicMessages, so vertex's native Anthropic JSON reaches the client +// without OpenAI→Anthropic re-translation. +func TestVertexAnthropicPassthrough_ResponseBodyUnchanged(t *testing.T) { + v := newAnthropicVertexProvider(false) + ctx := newMapCtx() + body := []byte(`{"id":"msg_01","type":"message","role":"assistant","content":[{"type":"text","text":"hi"}],"model":"claude-sonnet-4@20250514","stop_reason":"end_turn","usage":{"input_tokens":3,"output_tokens":1}}`) + + out, err := v.TransformResponseBody(ctx, ApiNameAnthropicMessages, body) + require.NoError(t, err) + assert.Equal(t, body, out, "vertex Anthropic response must be returned byte-for-byte") +} + +// TestVertexAnthropicPassthrough_StreamingChunkUnchanged verifies the streaming +// counterpart: each SSE chunk is forwarded verbatim because vertex's +// :streamRawPredict already emits standard Anthropic SSE events. +func TestVertexAnthropicPassthrough_StreamingChunkUnchanged(t *testing.T) { + v := newAnthropicVertexProvider(false) + ctx := newMapCtx() + chunk := []byte("event: message_start\ndata: {\"type\":\"message_start\",\"message\":{\"id\":\"msg_01\",\"type\":\"message\",\"role\":\"assistant\",\"content\":[],\"model\":\"claude-sonnet-4@20250514\",\"stop_reason\":null,\"stop_sequence\":null,\"usage\":{\"input_tokens\":3,\"output_tokens\":1}}}\n\n") + + out, err := v.OnStreamingResponseBody(ctx, ApiNameAnthropicMessages, chunk, false) + require.NoError(t, err) + assert.Equal(t, chunk, out, "vertex Anthropic SSE chunk must be returned byte-for-byte") +} + +// TestVertexTransformRequestHeaders_StripsAnthropicCredentialHeaders ensures +// that Anthropic-style client auth headers (carried by SDKs like the Anthropic +// Python/TypeScript SDKs and Claude Code) are NOT forwarded to vertex. +// Vertex uses OAuth Bearer / API key in URL — these headers are meaningless to +// vertex and forwarding them would leak the client's sk-ant-... credential to +// Google logs. +func TestVertexTransformRequestHeaders_StripsAnthropicCredentialHeaders(t *testing.T) { + v := newAnthropicVertexProvider(false) + ctx := newMapCtx() + headers := http.Header{} + headers.Set("x-api-key", "sk-ant-api03-secret") + headers.Set("anthropic-api-key", "sk-ant-api03-secret") + headers.Set("content-type", "application/json") + + v.TransformRequestHeaders(ctx, ApiNameAnthropicMessages, headers) + + assert.Empty(t, headers.Get("x-api-key"), "x-api-key must be stripped before forwarding to vertex") + assert.Empty(t, headers.Get("anthropic-api-key"), "anthropic-api-key must be stripped before forwarding to vertex") + // Sanity: unrelated headers untouched. + assert.Equal(t, "application/json", headers.Get("content-type")) +} + +// TestVertexAnthropicPassthrough_MaxTokensDefault ensures that when the client +// omits max_tokens, the passthrough handler injects claudeDefaultMaxTokens. +// Vertex's Anthropic endpoint rejects requests without max_tokens with a 400 — +// some SDKs (and lenient clients) leave it unset, expecting the upstream to +// default. Matches buildClaudeTextGenRequest's behavior in claude.go. +func TestVertexAnthropicPassthrough_MaxTokensDefault(t *testing.T) { + t.Run("missing max_tokens gets defaulted", func(t *testing.T) { + v := newAnthropicVertexProvider(false) + ctx := newMapCtx() + headers := http.Header{} + body := []byte(`{ + "model": "claude-sonnet-4", + "messages": [{"role": "user", "content": "hi"}] + }`) + + out, err := v.onAnthropicMessagesRequestBody(ctx, body, headers) + require.NoError(t, err) + + assert.Equal(t, int64(claudeDefaultMaxTokens), gjson.GetBytes(out, "max_tokens").Int()) + }) + + t.Run("client-supplied max_tokens preserved", func(t *testing.T) { + v := newAnthropicVertexProvider(false) + ctx := newMapCtx() + headers := http.Header{} + body := []byte(`{ + "model": "claude-sonnet-4", + "max_tokens": 1024, + "messages": [{"role": "user", "content": "hi"}] + }`) + + out, err := v.onAnthropicMessagesRequestBody(ctx, body, headers) + require.NoError(t, err) + + assert.Equal(t, int64(1024), gjson.GetBytes(out, "max_tokens").Int(), + "client-supplied max_tokens must not be overwritten by the default") + }) +}