From cd8ed99db55f80dbdbab4faa5202e06671c42279 Mon Sep 17 00:00:00 2001
From: woody <yaodiwu618@gmail.com>
Date: Mon, 30 Mar 2026 17:26:55 +0800
Subject: [PATCH] Vertex structured outputs (#3649)

---
 .../wasm-go/extensions/ai-proxy/main_test.go  |   2 +
 .../extensions/ai-proxy/provider/vertex.go    | 167 +++++++++-
 .../ai-proxy/provider/vertex_test.go          | 258 ++++++++++++++
 .../extensions/ai-proxy/test/vertex.go        | 314 +++++++++++++++++-
 4 files changed, 720 insertions(+), 21 deletions(-)
 create mode 100644 plugins/wasm-go/extensions/ai-proxy/provider/vertex_test.go

diff --git a/plugins/wasm-go/extensions/ai-proxy/main_test.go b/plugins/wasm-go/extensions/ai-proxy/main_test.go
index 8801c1c24..0965da701 100644
--- a/plugins/wasm-go/extensions/ai-proxy/main_test.go
+++ b/plugins/wasm-go/extensions/ai-proxy/main_test.go
@@ -196,6 +196,8 @@ func TestVertex(t *testing.T) {
 	test.RunVertexExpressModeOnHttpRequestBodyTests(t)
 	test.RunVertexExpressModeOnHttpResponseBodyTests(t)
 	test.RunVertexExpressModeOnStreamingResponseBodyTests(t)
+	test.RunVertexOpenAICompatibleModeOnHttpRequestHeadersTests(t)
+	test.RunVertexOpenAICompatibleModeOnHttpRequestBodyTests(t)
 	test.RunVertexExpressModeImageGenerationRequestBodyTests(t)
 	test.RunVertexExpressModeImageGenerationResponseBodyTests(t)
 	test.RunVertexExpressModeImageEditVariationRequestBodyTests(t)
diff --git a/plugins/wasm-go/extensions/ai-proxy/provider/vertex.go b/plugins/wasm-go/extensions/ai-proxy/provider/vertex.go
index f868da178..2ee4272d2 100644
--- a/plugins/wasm-go/extensions/ai-proxy/provider/vertex.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/vertex.go
@@ -259,12 +259,12 @@ func (v *vertexProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName,
 	if v.isOpenAICompatibleMode() {
 		ctx.SetContext(contextOpenAICompatibleMarker, true)
 		body, err := v.onOpenAICompatibleRequestBody(ctx, apiName, body, headers)
-		headers.Set("Content-Length", fmt.Sprint(len(body)))
-		util.ReplaceRequestHeaders(headers)
-		_ = proxywasm.ReplaceHttpRequestBody(body)
 		if err != nil {
 			return types.ActionContinue, err
 		}
+		headers.Set("Content-Length", fmt.Sprint(len(body)))
+		util.ReplaceRequestHeaders(headers)
+		_ = proxywasm.ReplaceHttpRequestBody(body)
 		// OpenAI 兼容模式需要 OAuth token
 		cached, err := v.getToken()
 		if cached {
@@ -277,6 +277,9 @@ func (v *vertexProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName,
 	}
 
 	body, err := v.TransformRequestBodyHeaders(ctx, apiName, body, headers)
+	if err != nil {
+		return types.ActionContinue, err
+	}
 	headers.Set("Content-Length", fmt.Sprint(len(body)))
 
 	if v.isExpressMode() {
@@ -284,15 +287,12 @@ func (v *vertexProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName,
 		headers.Del("Authorization")
 		util.ReplaceRequestHeaders(headers)
 		_ = proxywasm.ReplaceHttpRequestBody(body)
-		return types.ActionContinue, err
+		return types.ActionContinue, nil
 	}
 
 	// 标准模式: 需要获取 OAuth token
 	util.ReplaceRequestHeaders(headers)
 	_ = proxywasm.ReplaceHttpRequestBody(body)
-	if err != nil {
-		return types.ActionContinue, err
-	}
 	cached, err := v.getToken()
 	if cached {
 		return types.ActionContinue, nil
@@ -369,7 +369,10 @@ func (v *vertexProvider) onChatCompletionRequestBody(ctx wrapper.HttpContext, bo
 		path := v.getRequestPath(ApiNameChatCompletion, request.Model, request.Stream)
 		util.OverwriteRequestPathHeader(headers, path)
 
-		vertexRequest := v.buildVertexChatRequest(request)
+		vertexRequest, err := v.buildVertexChatRequest(request)
+		if err != nil {
+			return nil, err
+		}
 		return json.Marshal(vertexRequest)
 	}
 }
@@ -971,7 +974,7 @@ func (v *vertexProvider) getOpenAICompatibleRequestPath() string {
 	return fmt.Sprintf(vertexOpenAICompatiblePathTemplate, v.config.vertexProjectId, v.config.vertexRegion)
 }
 
-func (v *vertexProvider) buildVertexChatRequest(request *chatCompletionRequest) *vertexChatRequest {
+func (v *vertexProvider) buildVertexChatRequest(request *chatCompletionRequest) (*vertexChatRequest, error) {
 	safetySettings := make([]vertexChatSafetySetting, 0)
 	for category, threshold := range v.config.geminiSafetySetting {
 		safetySettings = append(safetySettings, vertexChatSafetySetting{
@@ -1006,6 +1009,9 @@ func (v *vertexProvider) buildVertexChatRequest(request *chatCompletionRequest)
 		}
 		vertexRequest.GenerationConfig.ThinkingConfig = thinkingConfig
 	}
+	if err := v.applyResponseFormatToGenerationConfig(request.ResponseFormat, &vertexRequest.GenerationConfig, request.Model); err != nil {
+		return nil, err
+	}
 	if request.Tools != nil {
 		functions := make([]function, 0, len(request.Tools))
 		for _, tool := range request.Tools {
@@ -1091,7 +1097,130 @@ func (v *vertexProvider) buildVertexChatRequest(request *chatCompletionRequest)
 		}
 	}
 
-	return &vertexRequest
+	return &vertexRequest, nil
+}
+
+// applyResponseFormatToGenerationConfig maps OpenAI response_format into Vertex generationConfig.
+// The mapping is strict for type=json_schema to avoid silently breaking structured-output contracts.
+func (v *vertexProvider) applyResponseFormatToGenerationConfig(responseFormat map[string]interface{}, generationConfig *vertexChatGenerationConfig, model string) error {
+	if generationConfig == nil || len(responseFormat) == 0 {
+		return nil
+	}
+
+	// NOTE: Gemini 2.0 structured output requires propertyOrdering.
+	// Because gemini-2.0-* is legacy and rarely used, we intentionally do not implement
+	// propertyOrdering synthesis here; instead we ignore response_format and keep request
+	// as non-structured output for stability and minimal conversion behavior.
+	if requiresPropertyOrderingForModel(model) {
+		return nil
+	}
+
+	responseFormatType, _ := responseFormat["type"].(string)
+	responseFormatType = strings.ToLower(responseFormatType)
+
+	switch responseFormatType {
+	case "":
+		// Be tolerant for non-standard clients that pass schema directly in response_format.
+		if isJSONSchemaMap(responseFormat) {
+			generationConfig.ResponseMimeType = util.MimeTypeApplicationJson
+			generationConfig.ResponseSchema = responseFormat
+		}
+	case "json_object":
+		generationConfig.ResponseMimeType = util.MimeTypeApplicationJson
+	case "json_schema":
+		schema := extractOpenAIJSONSchema(responseFormat)
+		if len(schema) == 0 {
+			return fmt.Errorf("invalid response_format.json_schema: missing schema object")
+		}
+		generationConfig.ResponseMimeType = util.MimeTypeApplicationJson
+		generationConfig.ResponseSchema = schema
+	case "text":
+		// Vertex defaults to text output when no response mime/schema is provided.
+	default:
+		// Be tolerant for non-standard usage where response_format itself is a JSON schema.
+		if isJSONSchemaType(responseFormatType) && isJSONSchemaMap(responseFormat) {
+			generationConfig.ResponseMimeType = util.MimeTypeApplicationJson
+			generationConfig.ResponseSchema = responseFormat
+		}
+	}
+	return nil
+}
+
+func extractOpenAIJSONSchema(responseFormat map[string]interface{}) map[string]interface{} {
+	jsonSchemaValue, ok := responseFormat["json_schema"]
+	if !ok {
+		return nil
+	}
+
+	jsonSchemaMap, ok := jsonSchemaValue.(map[string]interface{})
+	if !ok {
+		return nil
+	}
+
+	// OpenAI canonical format:
+	// {
+	//   "type":"json_schema",
+	//   "json_schema":{"name":"...","strict":true,"schema":{...}}
+	// }
+	if nestedSchemaValue, ok := jsonSchemaMap["schema"]; ok {
+		if nestedSchema, ok := nestedSchemaValue.(map[string]interface{}); ok {
+			return nestedSchema
+		}
+	}
+
+	// Tolerate non-standard format where json_schema itself is the schema.
+	if isJSONSchemaMap(jsonSchemaMap) {
+		return jsonSchemaMap
+	}
+	return nil
+}
+
+func isJSONSchemaType(value string) bool {
+	switch strings.ToLower(value) {
+	case "object", "array", "string", "number", "integer", "boolean", "null":
+		return true
+	default:
+		return false
+	}
+}
+
+func isJSONSchemaMap(schema map[string]interface{}) bool {
+	if len(schema) == 0 {
+		return false
+	}
+
+	if typeValue, ok := schema["type"].(string); ok && isJSONSchemaType(typeValue) {
+		return true
+	}
+
+	// Schema might omit "type" and still be valid for specific cases.
+	schemaKeys := []string{
+		"anyOf",
+		"enum",
+		"format",
+		"items",
+		"maximum",
+		"maxItems",
+		"minimum",
+		"minItems",
+		"nullable",
+		"properties",
+		"description",
+		"propertyOrdering",
+		"required",
+	}
+	for _, key := range schemaKeys {
+		if _, ok := schema[key]; ok {
+			return true
+		}
+	}
+
+	return false
+}
+
+func requiresPropertyOrderingForModel(model string) bool {
+	model = strings.ToLower(model)
+	return strings.HasPrefix(model, "gemini-2.0-")
 }
 
 func (v *vertexProvider) buildEmbeddingRequest(request *embeddingsRequest) *vertexEmbeddingRequest {
@@ -1170,14 +1299,16 @@ type vertexChatSafetySetting struct {
 }
 
 type vertexChatGenerationConfig struct {
-	Temperature        float64              `json:"temperature,omitempty"`
-	TopP               float64              `json:"topP,omitempty"`
-	TopK               int                  `json:"topK,omitempty"`
-	CandidateCount     int                  `json:"candidateCount,omitempty"`
-	MaxOutputTokens    int                  `json:"maxOutputTokens,omitempty"`
-	ThinkingConfig     vertexThinkingConfig `json:"thinkingConfig,omitempty"`
-	ResponseModalities []string             `json:"responseModalities,omitempty"`
-	ImageConfig        *vertexImageConfig   `json:"imageConfig,omitempty"`
+	Temperature        float64                `json:"temperature,omitempty"`
+	TopP               float64                `json:"topP,omitempty"`
+	TopK               int                    `json:"topK,omitempty"`
+	CandidateCount     int                    `json:"candidateCount,omitempty"`
+	MaxOutputTokens    int                    `json:"maxOutputTokens,omitempty"`
+	ThinkingConfig     vertexThinkingConfig   `json:"thinkingConfig,omitempty"`
+	ResponseMimeType   string                 `json:"responseMimeType,omitempty"`
+	ResponseSchema     map[string]interface{} `json:"responseSchema,omitempty"`
+	ResponseModalities []string               `json:"responseModalities,omitempty"`
+	ImageConfig        *vertexImageConfig     `json:"imageConfig,omitempty"`
 }
 
 type vertexImageConfig struct {
diff --git a/plugins/wasm-go/extensions/ai-proxy/provider/vertex_test.go b/plugins/wasm-go/extensions/ai-proxy/provider/vertex_test.go
new file mode 100644
index 000000000..5b81fcc06
--- /dev/null
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/vertex_test.go
@@ -0,0 +1,258 @@
+package provider
+
+import (
+	"testing"
+
+	"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy/util"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+func TestVertexProviderBuildChatRequestStructuredOutputMapping(t *testing.T) {
+	t.Run("json_object response format", func(t *testing.T) {
+		v := &vertexProvider{}
+		req := &chatCompletionRequest{
+			Model: "gemini-2.5-flash",
+			Messages: []chatMessage{
+				{Role: roleUser, Content: "hello"},
+			},
+			ResponseFormat: map[string]interface{}{
+				"type": "json_object",
+			},
+		}
+
+		vertexReq, err := v.buildVertexChatRequest(req)
+		require.NoError(t, err)
+		require.NotNil(t, vertexReq)
+
+		assert.Equal(t, util.MimeTypeApplicationJson, vertexReq.GenerationConfig.ResponseMimeType)
+		assert.Nil(t, vertexReq.GenerationConfig.ResponseSchema)
+	})
+
+	t.Run("json_schema response format with nested schema", func(t *testing.T) {
+		v := &vertexProvider{}
+		schema := map[string]interface{}{
+			"type": "object",
+			"properties": map[string]interface{}{
+				"answer": map[string]interface{}{
+					"type": "string",
+				},
+			},
+			"required": []interface{}{"answer"},
+		}
+		req := &chatCompletionRequest{
+			Model: "gemini-2.5-flash",
+			Messages: []chatMessage{
+				{Role: roleUser, Content: "hello"},
+			},
+			ResponseFormat: map[string]interface{}{
+				"type": "json_schema",
+				"json_schema": map[string]interface{}{
+					"name":   "response",
+					"strict": true,
+					"schema": schema,
+				},
+			},
+		}
+
+		vertexReq, err := v.buildVertexChatRequest(req)
+		require.NoError(t, err)
+		require.NotNil(t, vertexReq)
+
+		assert.Equal(t, util.MimeTypeApplicationJson, vertexReq.GenerationConfig.ResponseMimeType)
+		assert.Equal(t, schema, vertexReq.GenerationConfig.ResponseSchema)
+	})
+
+	t.Run("json_schema response format with direct schema object", func(t *testing.T) {
+		v := &vertexProvider{}
+		schema := map[string]interface{}{
+			"type": "object",
+			"properties": map[string]interface{}{
+				"city": map[string]interface{}{
+					"type": "string",
+				},
+			},
+			"required": []interface{}{"city"},
+		}
+		req := &chatCompletionRequest{
+			Model: "gemini-2.5-flash",
+			Messages: []chatMessage{
+				{Role: roleUser, Content: "hello"},
+			},
+			ResponseFormat: map[string]interface{}{
+				"type":        "json_schema",
+				"json_schema": schema,
+			},
+		}
+
+		vertexReq, err := v.buildVertexChatRequest(req)
+		require.NoError(t, err)
+		require.NotNil(t, vertexReq)
+
+		assert.Equal(t, util.MimeTypeApplicationJson, vertexReq.GenerationConfig.ResponseMimeType)
+		assert.Equal(t, schema, vertexReq.GenerationConfig.ResponseSchema)
+	})
+
+	t.Run("json_schema response format without valid schema should return error", func(t *testing.T) {
+		v := &vertexProvider{}
+		req := &chatCompletionRequest{
+			Model: "gemini-2.5-flash",
+			Messages: []chatMessage{
+				{Role: roleUser, Content: "hello"},
+			},
+			ResponseFormat: map[string]interface{}{
+				"type":        "json_schema",
+				"json_schema": "invalid",
+			},
+		}
+
+		vertexReq, err := v.buildVertexChatRequest(req)
+		require.Error(t, err)
+		assert.Nil(t, vertexReq)
+		assert.Contains(t, err.Error(), "invalid response_format.json_schema")
+	})
+
+	t.Run("direct schema in response_format for compatibility", func(t *testing.T) {
+		v := &vertexProvider{}
+		schema := map[string]interface{}{
+			"type": "object",
+			"properties": map[string]interface{}{
+				"result": map[string]interface{}{
+					"type": "string",
+				},
+			},
+		}
+		req := &chatCompletionRequest{
+			Model: "gemini-2.5-flash",
+			Messages: []chatMessage{
+				{Role: roleUser, Content: "hello"},
+			},
+			ResponseFormat: schema,
+		}
+
+		vertexReq, err := v.buildVertexChatRequest(req)
+		require.NoError(t, err)
+		require.NotNil(t, vertexReq)
+
+		assert.Equal(t, util.MimeTypeApplicationJson, vertexReq.GenerationConfig.ResponseMimeType)
+		assert.Equal(t, schema, vertexReq.GenerationConfig.ResponseSchema)
+	})
+
+	t.Run("text response format keeps default text output", func(t *testing.T) {
+		v := &vertexProvider{}
+		req := &chatCompletionRequest{
+			Model: "gemini-2.5-flash",
+			Messages: []chatMessage{
+				{Role: roleUser, Content: "hello"},
+			},
+			ResponseFormat: map[string]interface{}{
+				"type": "text",
+			},
+		}
+
+		vertexReq, err := v.buildVertexChatRequest(req)
+		require.NoError(t, err)
+		require.NotNil(t, vertexReq)
+
+		assert.Empty(t, vertexReq.GenerationConfig.ResponseMimeType)
+		assert.Nil(t, vertexReq.GenerationConfig.ResponseSchema)
+	})
+
+	t.Run("unknown response format does not inject schema config", func(t *testing.T) {
+		v := &vertexProvider{}
+		req := &chatCompletionRequest{
+			Model: "gemini-2.5-flash",
+			Messages: []chatMessage{
+				{Role: roleUser, Content: "hello"},
+			},
+			ResponseFormat: map[string]interface{}{
+				"type": "xml",
+			},
+		}
+
+		vertexReq, err := v.buildVertexChatRequest(req)
+		require.NoError(t, err)
+		require.NotNil(t, vertexReq)
+
+		assert.Empty(t, vertexReq.GenerationConfig.ResponseMimeType)
+		assert.Nil(t, vertexReq.GenerationConfig.ResponseSchema)
+	})
+
+	t.Run("gemini 2.0 json_schema is ignored for stability", func(t *testing.T) {
+		v := &vertexProvider{}
+		schema := map[string]interface{}{
+			"type": "object",
+			"properties": map[string]interface{}{
+				"answer": map[string]interface{}{
+					"type": "string",
+				},
+			},
+		}
+		req := &chatCompletionRequest{
+			Model: "gemini-2.0-flash",
+			Messages: []chatMessage{
+				{Role: roleUser, Content: "hello"},
+			},
+			ResponseFormat: map[string]interface{}{
+				"type": "json_schema",
+				"json_schema": map[string]interface{}{
+					"name":   "response",
+					"strict": true,
+					"schema": schema,
+				},
+			},
+		}
+
+		vertexReq, err := v.buildVertexChatRequest(req)
+		require.NoError(t, err)
+		require.NotNil(t, vertexReq)
+		assert.Empty(t, vertexReq.GenerationConfig.ResponseMimeType)
+		assert.Nil(t, vertexReq.GenerationConfig.ResponseSchema)
+	})
+
+	t.Run("gemini 2.0 malformed json_schema is also ignored", func(t *testing.T) {
+		v := &vertexProvider{}
+		req := &chatCompletionRequest{
+			Model: "gemini-2.0-flash",
+			Messages: []chatMessage{
+				{Role: roleUser, Content: "hello"},
+			},
+			ResponseFormat: map[string]interface{}{
+				"type":        "json_schema",
+				"json_schema": "invalid",
+			},
+		}
+
+		vertexReq, err := v.buildVertexChatRequest(req)
+		require.NoError(t, err)
+		require.NotNil(t, vertexReq)
+		assert.Empty(t, vertexReq.GenerationConfig.ResponseMimeType)
+		assert.Nil(t, vertexReq.GenerationConfig.ResponseSchema)
+	})
+
+	t.Run("gemini 2.0 json_object is ignored", func(t *testing.T) {
+		v := &vertexProvider{}
+		req := &chatCompletionRequest{
+			Model: "gemini-2.0-flash",
+			Messages: []chatMessage{
+				{Role: roleUser, Content: "hello"},
+			},
+			ResponseFormat: map[string]interface{}{
+				"type": "json_object",
+			},
+		}
+
+		vertexReq, err := v.buildVertexChatRequest(req)
+		require.NoError(t, err)
+		require.NotNil(t, vertexReq)
+		assert.Empty(t, vertexReq.GenerationConfig.ResponseMimeType)
+		assert.Nil(t, vertexReq.GenerationConfig.ResponseSchema)
+	})
+}
+
+func TestVertexProviderApplyResponseFormatNilSafety(t *testing.T) {
+	v := &vertexProvider{}
+	require.NoError(t, v.applyResponseFormatToGenerationConfig(map[string]interface{}{"type": "json_object"}, nil, "gemini-2.5-flash"))
+	require.NoError(t, v.applyResponseFormatToGenerationConfig(nil, &vertexChatGenerationConfig{}, "gemini-2.5-flash"))
+	require.NoError(t, v.applyResponseFormatToGenerationConfig(map[string]interface{}{}, &vertexChatGenerationConfig{}, "gemini-2.5-flash"))
+}
diff --git a/plugins/wasm-go/extensions/ai-proxy/test/vertex.go b/plugins/wasm-go/extensions/ai-proxy/test/vertex.go
index a534dacbf..591633a65 100644
--- a/plugins/wasm-go/extensions/ai-proxy/test/vertex.go
+++ b/plugins/wasm-go/extensions/ai-proxy/test/vertex.go
@@ -380,6 +380,273 @@ func RunVertexExpressModeOnHttpRequestBodyTests(t *testing.T) {
 			require.True(t, hasVertexLogs, "Should have vertex processing logs")
 		})
 
+		// 测试 Vertex Express Mode structured outputs: json_schema 映射
+		t.Run("vertex express mode structured outputs json_schema request body mapping", func(t *testing.T) {
+			host, status := test.NewTestHost(vertexExpressModeConfig)
+			defer host.Reset()
+			require.Equal(t, types.OnPluginStartStatusOK, status)
+
+			host.CallOnHttpRequestHeaders([][2]string{
+				{":authority", "example.com"},
+				{":path", "/v1/chat/completions"},
+				{":method", "POST"},
+				{"Content-Type", "application/json"},
+			})
+
+			requestBody := `{
+				"model":"gemini-2.5-flash",
+				"messages":[{"role":"user","content":"return structured output"}],
+				"response_format":{
+					"type":"json_schema",
+					"json_schema":{
+						"name":"demo_schema",
+						"strict":true,
+						"schema":{
+							"type":"object",
+							"properties":{
+								"answer":{"type":"string"}
+							},
+							"required":["answer"]
+						}
+					}
+				}
+			}`
+			action := host.CallOnHttpRequestBody([]byte(requestBody))
+			require.Equal(t, types.ActionContinue, action)
+
+			processedBody := host.GetRequestBody()
+			require.NotNil(t, processedBody)
+
+			var transformed map[string]interface{}
+			require.NoError(t, json.Unmarshal(processedBody, &transformed))
+
+			generationConfig, ok := transformed["generationConfig"].(map[string]interface{})
+			require.True(t, ok, "generationConfig should exist")
+			require.Equal(t, "application/json", generationConfig["responseMimeType"], "responseMimeType should be mapped for json_schema")
+
+			responseSchema, ok := generationConfig["responseSchema"].(map[string]interface{})
+			require.True(t, ok, "responseSchema should be mapped from response_format.json_schema.schema")
+			require.Equal(t, "object", responseSchema["type"])
+
+			properties, ok := responseSchema["properties"].(map[string]interface{})
+			require.True(t, ok, "responseSchema.properties should exist")
+			_, hasAnswer := properties["answer"]
+			require.True(t, hasAnswer, "responseSchema.properties.answer should exist")
+		})
+
+		// 测试 Gemini 2.0 structured outputs: 忽略 response_format，按非结构化输出处理
+		t.Run("vertex express mode structured outputs gemini 2.0 ignore response format", func(t *testing.T) {
+			host, status := test.NewTestHost(vertexExpressModeConfig)
+			defer host.Reset()
+			require.Equal(t, types.OnPluginStartStatusOK, status)
+
+			host.CallOnHttpRequestHeaders([][2]string{
+				{":authority", "example.com"},
+				{":path", "/v1/chat/completions"},
+				{":method", "POST"},
+				{"Content-Type", "application/json"},
+			})
+
+			requestBody := `{
+				"model":"gemini-2.0-flash",
+				"messages":[{"role":"user","content":"return structured output"}],
+				"response_format":{
+					"type":"json_schema",
+					"json_schema":{
+						"name":"demo_schema",
+						"strict":true,
+						"schema":{
+							"type":"object",
+							"properties":{
+								"beta":{"type":"string"},
+								"alpha":{
+									"type":"object",
+									"properties":{
+										"z":{"type":"string"},
+										"a":{"type":"string"}
+									}
+								}
+							}
+						}
+					}
+				}
+			}`
+			action := host.CallOnHttpRequestBody([]byte(requestBody))
+			require.Equal(t, types.ActionContinue, action)
+
+			processedBody := host.GetRequestBody()
+			require.NotNil(t, processedBody)
+
+			var transformed map[string]interface{}
+			require.NoError(t, json.Unmarshal(processedBody, &transformed))
+
+			generationConfig, ok := transformed["generationConfig"].(map[string]interface{})
+			require.True(t, ok, "generationConfig should exist")
+			_, hasMimeType := generationConfig["responseMimeType"]
+			_, hasSchema := generationConfig["responseSchema"]
+			require.False(t, hasMimeType, "gemini-2.0 should ignore response_format and not set responseMimeType")
+			require.False(t, hasSchema, "gemini-2.0 should ignore response_format and not set responseSchema")
+		})
+
+		// 测试 Vertex Express Mode structured outputs: json_object 映射
+		t.Run("vertex express mode structured outputs json_object request body mapping", func(t *testing.T) {
+			host, status := test.NewTestHost(vertexExpressModeConfig)
+			defer host.Reset()
+			require.Equal(t, types.OnPluginStartStatusOK, status)
+
+			host.CallOnHttpRequestHeaders([][2]string{
+				{":authority", "example.com"},
+				{":path", "/v1/chat/completions"},
+				{":method", "POST"},
+				{"Content-Type", "application/json"},
+			})
+
+			requestBody := `{
+				"model":"gemini-2.5-flash",
+				"messages":[{"role":"user","content":"return json"}],
+				"response_format":{"type":"json_object"}
+			}`
+			action := host.CallOnHttpRequestBody([]byte(requestBody))
+			require.Equal(t, types.ActionContinue, action)
+
+			processedBody := host.GetRequestBody()
+			require.NotNil(t, processedBody)
+
+			var transformed map[string]interface{}
+			require.NoError(t, json.Unmarshal(processedBody, &transformed))
+
+			generationConfig, ok := transformed["generationConfig"].(map[string]interface{})
+			require.True(t, ok, "generationConfig should exist")
+			require.Equal(t, "application/json", generationConfig["responseMimeType"], "responseMimeType should be mapped for json_object")
+
+			_, hasSchema := generationConfig["responseSchema"]
+			require.False(t, hasSchema, "json_object should not inject responseSchema")
+		})
+
+		// 测试 Vertex Express Mode structured outputs: 兼容 direct schema
+		t.Run("vertex express mode structured outputs direct schema response_format mapping", func(t *testing.T) {
+			host, status := test.NewTestHost(vertexExpressModeConfig)
+			defer host.Reset()
+			require.Equal(t, types.OnPluginStartStatusOK, status)
+
+			host.CallOnHttpRequestHeaders([][2]string{
+				{":authority", "example.com"},
+				{":path", "/v1/chat/completions"},
+				{":method", "POST"},
+				{"Content-Type", "application/json"},
+			})
+
+			requestBody := `{
+				"model":"gemini-2.5-flash",
+				"messages":[{"role":"user","content":"return structured output"}],
+				"response_format":{
+					"type":"object",
+					"properties":{"city":{"type":"string"}}
+				}
+			}`
+			action := host.CallOnHttpRequestBody([]byte(requestBody))
+			require.Equal(t, types.ActionContinue, action)
+
+			processedBody := host.GetRequestBody()
+			require.NotNil(t, processedBody)
+
+			var transformed map[string]interface{}
+			require.NoError(t, json.Unmarshal(processedBody, &transformed))
+
+			generationConfig, ok := transformed["generationConfig"].(map[string]interface{})
+			require.True(t, ok, "generationConfig should exist")
+			require.Equal(t, "application/json", generationConfig["responseMimeType"], "direct schema should be mapped to JSON mime type")
+
+			responseSchema, ok := generationConfig["responseSchema"].(map[string]interface{})
+			require.True(t, ok, "direct schema should be mapped to responseSchema")
+			require.Equal(t, "object", responseSchema["type"])
+		})
+
+		// 测试 Vertex Express Mode structured outputs: 异常 json_schema 应返回错误（不能静默降级）
+		t.Run("vertex express mode structured outputs malformed json_schema mapping", func(t *testing.T) {
+			host, status := test.NewTestHost(vertexExpressModeConfig)
+			defer host.Reset()
+			require.Equal(t, types.OnPluginStartStatusOK, status)
+
+			host.CallOnHttpRequestHeaders([][2]string{
+				{":authority", "example.com"},
+				{":path", "/v1/chat/completions"},
+				{":method", "POST"},
+				{"Content-Type", "application/json"},
+			})
+
+			requestBody := `{
+				"model":"gemini-2.5-flash",
+				"messages":[{"role":"user","content":"return structured output"}],
+				"response_format":{
+					"type":"json_schema",
+					"json_schema":"invalid"
+				}
+			}`
+			action := host.CallOnHttpRequestBody([]byte(requestBody))
+			require.Equal(t, types.ActionContinue, action)
+
+			errorLogs := host.GetErrorLogs()
+			hasInvalidSchemaError := false
+			for _, log := range errorLogs {
+				if strings.Contains(log, "invalid response_format.json_schema") {
+					hasInvalidSchemaError = true
+					break
+				}
+			}
+			require.True(t, hasInvalidSchemaError, "malformed json_schema should produce explicit validation error")
+
+			processedBody := host.GetRequestBody()
+			require.NotNil(t, processedBody)
+			require.Contains(t, string(processedBody), `"response_format"`, "failed request should keep original body")
+			require.NotContains(t, string(processedBody), `"generationConfig"`, "failed request should not be rewritten into Vertex format")
+
+			requestHeaders := host.GetRequestHeaders()
+			pathHeader := ""
+			for _, header := range requestHeaders {
+				if header[0] == ":path" {
+					pathHeader = header[1]
+					break
+				}
+			}
+			require.Equal(t, "/v1/chat/completions", pathHeader, "failed validation should not rewrite upstream path")
+		})
+
+		// 测试 Vertex Express Mode structured outputs: 未知类型不映射
+		t.Run("vertex express mode structured outputs unknown response format type", func(t *testing.T) {
+			host, status := test.NewTestHost(vertexExpressModeConfig)
+			defer host.Reset()
+			require.Equal(t, types.OnPluginStartStatusOK, status)
+
+			host.CallOnHttpRequestHeaders([][2]string{
+				{":authority", "example.com"},
+				{":path", "/v1/chat/completions"},
+				{":method", "POST"},
+				{"Content-Type", "application/json"},
+			})
+
+			requestBody := `{
+				"model":"gemini-2.5-flash",
+				"messages":[{"role":"user","content":"return xml"}],
+				"response_format":{"type":"xml"}
+			}`
+			action := host.CallOnHttpRequestBody([]byte(requestBody))
+			require.Equal(t, types.ActionContinue, action)
+
+			processedBody := host.GetRequestBody()
+			require.NotNil(t, processedBody)
+
+			var transformed map[string]interface{}
+			require.NoError(t, json.Unmarshal(processedBody, &transformed))
+
+			generationConfig, ok := transformed["generationConfig"].(map[string]interface{})
+			require.True(t, ok, "generationConfig should exist")
+			_, hasMime := generationConfig["responseMimeType"]
+			_, hasSchema := generationConfig["responseSchema"]
+			require.False(t, hasMime, "unknown response_format type should not inject responseMimeType")
+			require.False(t, hasSchema, "unknown response_format type should not inject responseSchema")
+		})
+
 		// 测试 Vertex Express Mode 请求体处理（嵌入接口）
 		t.Run("vertex express mode embeddings request body", func(t *testing.T) {
 			host, status := test.NewTestHost(vertexExpressModeConfig)
@@ -613,8 +880,8 @@ func RunVertexOpenAICompatibleModeOnHttpRequestBodyTests(t *testing.T) {
 			requestBody := `{"model":"gemini-2.0-flash","messages":[{"role":"user","content":"test"}]}`
 			action := host.CallOnHttpRequestBody([]byte(requestBody))
 
-			// OpenAI 兼容模式需要等待 OAuth token，所以返回 ActionPause
-			require.Equal(t, types.ActionPause, action)
+			// 测试环境使用伪造密钥，OAuth 获取会失败，期望 ActionContinue 并记录错误
+			require.Equal(t, types.ActionContinue, action)
 
 			// 验证请求体保持 OpenAI 格式（不转换为 Vertex 原生格式）
 			processedBody := host.GetRequestBody()
@@ -637,6 +904,47 @@ func RunVertexOpenAICompatibleModeOnHttpRequestBodyTests(t *testing.T) {
 			require.Contains(t, pathHeader, "/endpoints/openapi/chat/completions", "Path should contain openapi chat completions endpoint")
 		})
 
+		// 测试 Vertex OpenAI 兼容模式 structured outputs 请求体透传
+		t.Run("vertex openai compatible mode structured outputs passthrough", func(t *testing.T) {
+			host, status := test.NewTestHost(vertexOpenAICompatibleModeConfig)
+			defer host.Reset()
+			require.Equal(t, types.OnPluginStartStatusOK, status)
+
+			host.CallOnHttpRequestHeaders([][2]string{
+				{":authority", "example.com"},
+				{":path", "/v1/chat/completions"},
+				{":method", "POST"},
+				{"Content-Type", "application/json"},
+			})
+
+			requestBody := `{
+				"model":"gemini-2.0-flash",
+				"messages":[{"role":"user","content":"test"}],
+				"response_format":{
+					"type":"json_schema",
+					"json_schema":{
+						"name":"demo_schema",
+						"strict":true,
+						"schema":{
+							"type":"object",
+							"properties":{"answer":{"type":"string"}},
+							"required":["answer"]
+						}
+					}
+				}
+			}`
+			action := host.CallOnHttpRequestBody([]byte(requestBody))
+			require.Equal(t, types.ActionContinue, action)
+
+			processedBody := host.GetRequestBody()
+			require.NotNil(t, processedBody)
+			bodyStr := string(processedBody)
+
+			require.Contains(t, bodyStr, `"response_format"`, "OpenAI compatible mode should preserve response_format")
+			require.Contains(t, bodyStr, `"json_schema"`, "OpenAI compatible mode should preserve json_schema")
+			require.NotContains(t, bodyStr, `"generationConfig"`, "OpenAI compatible mode should not convert to Vertex native generationConfig")
+		})
+
 		// 测试 Vertex OpenAI 兼容模式请求体处理（含模型映射）
 		t.Run("vertex openai compatible mode with model mapping", func(t *testing.T) {
 			host, status := test.NewTestHost(vertexOpenAICompatibleModeWithModelMappingConfig)
@@ -655,7 +963,7 @@ func RunVertexOpenAICompatibleModeOnHttpRequestBodyTests(t *testing.T) {
 			requestBody := `{"model":"gpt-4","messages":[{"role":"user","content":"test"}]}`
 			action := host.CallOnHttpRequestBody([]byte(requestBody))
 
-			require.Equal(t, types.ActionPause, action)
+			require.Equal(t, types.ActionContinue, action)
 
 			// 验证请求体中的模型名被映射
 			processedBody := host.GetRequestBody()