diff --git a/plugins/wasm-go/extensions/ai-proxy/README.md b/plugins/wasm-go/extensions/ai-proxy/README.md index 8f2ae49a5..f0af57492 100644 --- a/plugins/wasm-go/extensions/ai-proxy/README.md +++ b/plugins/wasm-go/extensions/ai-proxy/README.md @@ -130,10 +130,11 @@ Azure OpenAI 所对应的 `type` 为 `azure`。它特有的配置字段如下: 通义千问所对应的 `type` 为 `qwen`。它特有的配置字段如下: -| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 | -|--------------------|-----------------|------|-----|------------------------------------------------------------------| -| `qwenEnableSearch` | boolean | 非必填 | - | 是否启用通义千问内置的互联网搜索功能。 | -| `qwenFileIds` | array of string | 非必填 | - | 通过文件接口上传至Dashscope的文件 ID,其内容将被用做 AI 对话的上下文。不可与 `context` 字段同时配置。 | +| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 | +| ---------------------- | --------------- | -------- | ------ | ------------------------------------------------------------ | +| `qwenEnableSearch` | boolean | 非必填 | - | 是否启用通义千问内置的互联网搜索功能。 | +| `qwenFileIds` | array of string | 非必填 | - | 通过文件接口上传至Dashscope的文件 ID,其内容将被用做 AI 对话的上下文。不可与 `context` 字段同时配置。 | +| `qwenEnableCompatible` | boolean | 非必填 | false | 开启通义千问兼容模式。启用通义千问兼容模式后,将调用千问的兼容模式接口,同时对请求/响应不做修改。 | #### 百川智能 (Baichuan AI) diff --git a/plugins/wasm-go/extensions/ai-proxy/README_EN.md b/plugins/wasm-go/extensions/ai-proxy/README_EN.md index 4400e248d..891503a4f 100644 --- a/plugins/wasm-go/extensions/ai-proxy/README_EN.md +++ b/plugins/wasm-go/extensions/ai-proxy/README_EN.md @@ -106,6 +106,7 @@ For Qwen (Tongyi Qwen), the corresponding `type` is `qwen`. Its unique configura |--------------------|-----------------|----------------------|---------------|------------------------------------------------------------------------------------------------------------------------| | `qwenEnableSearch` | boolean | Optional | - | Whether to enable the built-in Internet search function provided by Qwen. | | `qwenFileIds` | array of string | Optional | - | The file IDs uploaded via the Dashscope file interface, whose content will be used as context for AI conversations. Cannot be configured with the `context` field. | +| `qwenEnableCompatible` | boolean | Optional | false | Enable Qwen compatibility mode. When Qwen compatibility mode is enabled, the compatible mode interface of Qwen will be called, and the request/response will not be modified. | #### Baichuan AI diff --git a/test/e2e/conformance/base/llm-mock.yaml b/test/e2e/conformance/base/llm-mock.yaml new file mode 100644 index 000000000..22b89272b --- /dev/null +++ b/test/e2e/conformance/base/llm-mock.yaml @@ -0,0 +1,46 @@ +# Copyright (c) 2025 Alibaba Group Holding Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: v1 +kind: Namespace +metadata: + name: higress-conformance-ai-backend + labels: + higress-conformance: infra +--- +apiVersion: v1 +kind: Pod +metadata: + name: llm-mock + namespace: higress-conformance-ai-backend + labels: + name: llm-mock +spec: + containers: + - name: llm-mock + image: registry.cn-hangzhou.aliyuncs.com/hxt/llm-mock:latest + ports: + - containerPort: 3000 +--- +apiVersion: v1 +kind: Service +metadata: + name: llm-mock-service + namespace: higress-conformance-ai-backend +spec: + selector: + name: llm-mock + clusterIP: None + ports: + - port: 3000 \ No newline at end of file diff --git a/test/e2e/conformance/tests/go-wasm-ai-proxy.go b/test/e2e/conformance/tests/go-wasm-ai-proxy.go index fc341b0be..882b0b081 100644 --- a/test/e2e/conformance/tests/go-wasm-ai-proxy.go +++ b/test/e2e/conformance/tests/go-wasm-ai-proxy.go @@ -1,4 +1,4 @@ -// Copyright (c) 2022 Alibaba Group Holding Ltd. +// Copyright (c) 2025 Alibaba Group Holding Ltd. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -21,90 +21,346 @@ import ( "github.com/alibaba/higress/test/e2e/conformance/utils/suite" ) +// The llm-mock service response has a fixed id of `chatcmpl-llm-mock`. +// The created field is fixed to 10. +// The response content is echoed back as the request content. +// The usage field is fixed to `{"prompt_tokens":9,"completion_tokens":1,"total_tokens":10}` (specific values may vary based on the corresponding response fields). + func init() { Register(WasmPluginsAiProxy) } var WasmPluginsAiProxy = suite.ConformanceTest{ ShortName: "WasmPluginAiProxy", - Description: "The Ingress in the higress-conformance-infra namespace test the ai-proxy WASM plugin.", + Description: "The Ingress in the higress-conformance-ai-backend namespace test the ai-proxy WASM plugin.", Features: []suite.SupportedFeature{suite.WASMGoConformanceFeature}, Manifests: []string{"tests/go-wasm-ai-proxy.yaml"}, Test: func(t *testing.T, suite *suite.ConformanceTestSuite) { testcases := []http.Assertion{ { Meta: http.AssertionMeta{ - TestCaseName: "case 1: openai", - TargetBackend: "infra-backend-v1", - TargetNamespace: "higress-conformance-infra", + TestCaseName: "baidu case 1: non-streaming request", + CompareTarget: http.CompareTargetResponse, }, Request: http.AssertionRequest{ ActualRequest: http.Request{ - Host: "openai.ai.com", - Path: "/v1/chat/completions", - Method:"POST", - ContentType: http.ContentTypeApplicationJson, - Body: []byte(`{ - "model": "gpt-3", - "messages": [{"role":"user","content":"hi"}]}`), - }, - ExpectedRequest: &http.ExpectedRequest{ - Request: http.Request{ - Host: "api.openai.com", - Path: "/v1/chat/completions", - Method: "POST", - ContentType: http.ContentTypeApplicationJson, - Body: []byte(`{ - "model": "gpt-3", - "messages": [{"role":"user","content":"hi"}], - "max_tokens": 123, - "temperature": 0.66}`), - }, + Host: "qianfan.baidubce.com", + Path: "/v1/chat/completions", + Method: "POST", + ContentType: http.ContentTypeApplicationJson, + Body: []byte(`{"model":"gpt-3","messages":[{"role":"user","content":"你好,你是谁?"}],"stream":false}`), }, }, Response: http.AssertionResponse{ ExpectedResponse: http.Response{ - StatusCode: 200, + StatusCode: 200, + ContentType: http.ContentTypeApplicationJson, + Body: []byte(`{"id":"chatcmpl-llm-mock","choices":[{"index":0,"message":{"role":"assistant","content":"你好,你是谁?"},"finish_reason":"stop"}],"created":10,"model":"ernie-3.5-8k","object":"chat.completion","usage":{"prompt_tokens":9,"completion_tokens":1,"total_tokens":10}}`), }, }, }, { Meta: http.AssertionMeta{ - TestCaseName: "case 2: qwen", - TargetBackend: "infra-backend-v1", - TargetNamespace: "higress-conformance-infra", + TestCaseName: "baidu case 2: streaming request", + CompareTarget: http.CompareTargetResponse, }, Request: http.AssertionRequest{ ActualRequest: http.Request{ - Host: "qwen.ai.com", - Path: "/v1/chat/completions", - Method:"POST", - ContentType: http.ContentTypeApplicationJson, - Body: []byte(`{ - "model": "qwen-long", - "input": {"messages": [{"role":"user","content":"hi"}]}, - "parameters": {"max_tokens": 321, "temperature": 0.7}}`), - }, - ExpectedRequest: &http.ExpectedRequest{ - Request: http.Request{ - Host: "dashscope.aliyuncs.com", - Path: "/api/v1/services/aigc/text-generation/generation", - Method: "POST", - ContentType: http.ContentTypeApplicationJson, - Body: []byte(`{ - "model": "qwen-long", - "input": {"messages": [{"role":"user","content":"hi"}]}, - "parameters": {"max_tokens": 321, "temperature": 0.66}}`), - }, + Host: "qianfan.baidubce.com", + Path: "/v1/chat/completions", + Method: "POST", + ContentType: http.ContentTypeApplicationJson, + Body: []byte(`{"model":"gpt-3","messages":[{"role":"user","content":"你好,你是谁?"}],"stream":true}`), }, }, Response: http.AssertionResponse{ ExpectedResponse: http.Response{ - StatusCode: 500, + StatusCode: 200, + ContentType: http.ContentTypeTextEventStream, + Body: []byte(`data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"你"}}],"created":10,"model":"ernie-3.5-8k","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"好"}}],"created":10,"model":"ernie-3.5-8k","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":","}}],"created":10,"model":"ernie-3.5-8k","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"你"}}],"created":10,"model":"ernie-3.5-8k","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"是"}}],"created":10,"model":"ernie-3.5-8k","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"谁"}}],"created":10,"model":"ernie-3.5-8k","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"?"}}],"created":10,"model":"ernie-3.5-8k","object":"chat.completion.chunk","usage":{}} + +data: [DONE] + +`), + }, + }, + }, + { + Meta: http.AssertionMeta{ + TestCaseName: "doubao case 1: non-streaming request", + CompareTarget: http.CompareTargetResponse, + }, + Request: http.AssertionRequest{ + ActualRequest: http.Request{ + Host: "ark.cn-beijing.volces.com", + Path: "/v1/chat/completions", + Method: "POST", + ContentType: http.ContentTypeApplicationJson, + Body: []byte(`{"model":"gpt-3","messages":[{"role":"user","content":"你好,你是谁?"}],"stream":false}`), + }, + }, + Response: http.AssertionResponse{ + ExpectedResponse: http.Response{ + StatusCode: 200, + ContentType: http.ContentTypeApplicationJson, + Body: []byte(`{"id":"chatcmpl-llm-mock","choices":[{"index":0,"message":{"role":"assistant","content":"你好,你是谁?"},"finish_reason":"stop"}],"created":10,"model":"fake_doubao_endpoint","object":"chat.completion","usage":{"prompt_tokens":9,"completion_tokens":1,"total_tokens":10}}`), + }, + }, + }, + { + Meta: http.AssertionMeta{ + TestCaseName: "doubao case 2: streaming request", + CompareTarget: http.CompareTargetResponse, + }, + Request: http.AssertionRequest{ + ActualRequest: http.Request{ + Host: "ark.cn-beijing.volces.com", + Path: "/v1/chat/completions", + Method: "POST", + ContentType: http.ContentTypeApplicationJson, + Body: []byte(`{"model":"gpt-3","messages":[{"role":"user","content":"你好,你是谁?"}],"stream":true}`), + }, + }, + Response: http.AssertionResponse{ + ExpectedResponse: http.Response{ + StatusCode: 200, + ContentType: http.ContentTypeTextEventStream, + Body: []byte(`data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"你"}}],"created":10,"model":"fake_doubao_endpoint","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"好"}}],"created":10,"model":"fake_doubao_endpoint","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":","}}],"created":10,"model":"fake_doubao_endpoint","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"你"}}],"created":10,"model":"fake_doubao_endpoint","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"是"}}],"created":10,"model":"fake_doubao_endpoint","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"谁"}}],"created":10,"model":"fake_doubao_endpoint","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"?"}}],"created":10,"model":"fake_doubao_endpoint","object":"chat.completion.chunk","usage":{}} + +data: [DONE] + +`), + }, + }, + }, + { + Meta: http.AssertionMeta{ + TestCaseName: "minimax case 1: proxy completion V2 API, non-streaming request", + CompareTarget: http.CompareTargetResponse, + }, + Request: http.AssertionRequest{ + ActualRequest: http.Request{ + Host: "api.minimax.chat-v2-api", + Path: "/v1/chat/completions", + Method: "POST", + ContentType: http.ContentTypeApplicationJson, + Body: []byte(`{"model":"gpt-3","messages":[{"role":"user","content":"你好,你是谁?"}],"stream":false}`), + }, + }, + Response: http.AssertionResponse{ + ExpectedResponse: http.Response{ + StatusCode: 200, + ContentType: http.ContentTypeApplicationJson, + Body: []byte(`{"id":"chatcmpl-llm-mock","choices":[{"index":0,"message":{"role":"assistant","content":"你好,你是谁?"},"finish_reason":"stop"}],"created":10,"model":"abab6.5s-chat","object":"chat.completion","usage":{"prompt_tokens":9,"completion_tokens":1,"total_tokens":10}}`), + }, + }, + }, + { + Meta: http.AssertionMeta{ + TestCaseName: "minimax case 2: proxy completion V2 API, streaming request", + CompareTarget: http.CompareTargetResponse, + }, + Request: http.AssertionRequest{ + ActualRequest: http.Request{ + Host: "api.minimax.chat-v2-api", + Path: "/v1/chat/completions", + Method: "POST", + ContentType: http.ContentTypeApplicationJson, + Body: []byte(`{"model":"gpt-3","messages":[{"role":"user","content":"你好,你是谁?"}],"stream":true}`), + }, + }, + Response: http.AssertionResponse{ + ExpectedResponse: http.Response{ + StatusCode: 200, + ContentType: http.ContentTypeTextEventStream, + Body: []byte(`data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"你"}}],"created":10,"model":"abab6.5s-chat","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"好"}}],"created":10,"model":"abab6.5s-chat","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":","}}],"created":10,"model":"abab6.5s-chat","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"你"}}],"created":10,"model":"abab6.5s-chat","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"是"}}],"created":10,"model":"abab6.5s-chat","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"谁"}}],"created":10,"model":"abab6.5s-chat","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"?"}}],"created":10,"model":"abab6.5s-chat","object":"chat.completion.chunk","usage":{}} + +data: [DONE] + +`), + }, + }, + }, + { + Meta: http.AssertionMeta{ + TestCaseName: "minimax case 3: proxy completion Pro API, non-streaming request", + CompareTarget: http.CompareTargetResponse, + }, + Request: http.AssertionRequest{ + ActualRequest: http.Request{ + Host: "api.minimax.chat-pro-api", + Path: "/v1/chat/completions", + Method: "POST", + ContentType: http.ContentTypeApplicationJson, + Body: []byte(`{"model":"gpt-3","messages":[{"role":"user","content":"你好,你是谁?"}],"stream":false}`), + }, + }, + Response: http.AssertionResponse{ + ExpectedResponse: http.Response{ + StatusCode: 200, + ContentType: http.ContentTypeApplicationJson, + Body: []byte(`{"id":"chatcmpl-llm-mock","choices":[{"index":0,"message":{"name":"MM智能助理","role":"assistant","content":"你好,你是谁?"},"finish_reason":"stop"}],"created":10,"model":"abab6.5s-chat","object":"chat.completion","usage":{"prompt_tokens":9,"completion_tokens":1,"total_tokens":10}}`), + }, + }, + }, + { + Meta: http.AssertionMeta{ + TestCaseName: "minimax case 4: proxy completion Pro API, streaming request", + CompareTarget: http.CompareTargetResponse, + }, + Request: http.AssertionRequest{ + ActualRequest: http.Request{ + Host: "api.minimax.chat-pro-api", + Path: "/v1/chat/completions", + Method: "POST", + ContentType: http.ContentTypeApplicationJson, + Body: []byte(`{"model":"gpt-3","messages":[{"role":"user","content":"你好,你是谁?"}],"stream":true}`), + }, + }, + Response: http.AssertionResponse{ + ExpectedResponse: http.Response{ + StatusCode: 200, + ContentType: http.ContentTypeTextEventStream, + Body: []byte(`data: {"choices":[{"index":0,"message":{"name":"MM智能助理","role":"assistant","content":"你"}}],"created":10,"model":"abab6.5s-chat","object":"chat.completion","usage":{}} + +data: {"choices":[{"index":0,"message":{"name":"MM智能助理","role":"assistant","content":"好"}}],"created":10,"model":"abab6.5s-chat","object":"chat.completion","usage":{}} + +data: {"choices":[{"index":0,"message":{"name":"MM智能助理","role":"assistant","content":","}}],"created":10,"model":"abab6.5s-chat","object":"chat.completion","usage":{}} + +data: {"choices":[{"index":0,"message":{"name":"MM智能助理","role":"assistant","content":"你"}}],"created":10,"model":"abab6.5s-chat","object":"chat.completion","usage":{}} + +data: {"choices":[{"index":0,"message":{"name":"MM智能助理","role":"assistant","content":"是"}}],"created":10,"model":"abab6.5s-chat","object":"chat.completion","usage":{}} + +data: {"choices":[{"index":0,"message":{"name":"MM智能助理","role":"assistant","content":"谁"}}],"created":10,"model":"abab6.5s-chat","object":"chat.completion","usage":{}} + +data: {"choices":[{"index":0,"message":{"name":"MM智能助理","role":"assistant","content":"?"}}],"created":10,"model":"abab6.5s-chat","object":"chat.completion","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"message":{"name":"MM智能助理","role":"assistant","content":"你好,你是谁?"},"finish_reason":"stop"}],"created":10,"model":"abab6.5s-chat","object":"chat.completion","usage":{"prompt_tokens":9,"completion_tokens":1,"total_tokens":10}} + +`), + }, + }, + }, + { + Meta: http.AssertionMeta{ + TestCaseName: "qwen case 1: compatible mode, non-streaming request", + CompareTarget: http.CompareTargetResponse, + }, + Request: http.AssertionRequest{ + ActualRequest: http.Request{ + Host: "dashscope.aliyuncs.com-compatible-mode", + Path: "/v1/chat/completions", + Method: "POST", + ContentType: http.ContentTypeApplicationJson, + Body: []byte(`{"model":"gpt-3","messages":[{"role":"user","content":"你好,你是谁?"}],"stream":false}`), + }, + }, + Response: http.AssertionResponse{ + ExpectedResponse: http.Response{ + StatusCode: 200, + ContentType: http.ContentTypeApplicationJson, + Body: []byte(`{"id":"chatcmpl-llm-mock","choices":[{"index":0,"message":{"role":"assistant","content":"你好,你是谁?"},"finish_reason":"stop"}],"created":10,"model":"qwen-turbo","object":"chat.completion","usage":{"prompt_tokens":9,"completion_tokens":1,"total_tokens":10}}`), + }, + }, + }, + { + Meta: http.AssertionMeta{ + TestCaseName: "qwen case 2: compatible mode, streaming request", + CompareTarget: http.CompareTargetResponse, + }, + Request: http.AssertionRequest{ + ActualRequest: http.Request{ + Host: "dashscope.aliyuncs.com-compatible-mode", + Path: "/v1/chat/completions", + Method: "POST", + ContentType: http.ContentTypeApplicationJson, + Body: []byte(`{"model":"gpt-3","messages":[{"role":"user","content":"你好,你是谁?"}],"stream":true}`), + }, + }, + Response: http.AssertionResponse{ + ExpectedResponse: http.Response{ + StatusCode: 200, + ContentType: http.ContentTypeTextEventStream, + Body: []byte(`data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"你"}}],"created":10,"model":"qwen-turbo","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"好"}}],"created":10,"model":"qwen-turbo","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":","}}],"created":10,"model":"qwen-turbo","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"你"}}],"created":10,"model":"qwen-turbo","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"是"}}],"created":10,"model":"qwen-turbo","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"谁"}}],"created":10,"model":"qwen-turbo","object":"chat.completion.chunk","usage":{}} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"?"}}],"created":10,"model":"qwen-turbo","object":"chat.completion.chunk","usage":{}} + +data: [DONE] + +`), + }, + }, + }, + { + Meta: http.AssertionMeta{ + TestCaseName: "qwen case 3: non-streaming request", + CompareTarget: http.CompareTargetResponse, + }, + Request: http.AssertionRequest{ + ActualRequest: http.Request{ + Host: "dashscope.aliyuncs.com", + Path: "/v1/chat/completions", + Method: "POST", + ContentType: http.ContentTypeApplicationJson, + Body: []byte(`{"model":"gpt-3","messages":[{"role":"user","content":"你好,你是谁?"}],"stream":false}`), + }, + }, + Response: http.AssertionResponse{ + ExpectedResponse: http.Response{ + StatusCode: 200, + ContentType: http.ContentTypeApplicationJson, + // Since the "created" field is generated by the ai-proxy plugin based on the current timestamp, it is ignored during comparison + JsonBodyIgnoreFields: []string{"created"}, + Body: []byte(`{"id":"chatcmpl-llm-mock","choices":[{"index":0,"message":{"role":"assistant","content":"你好,你是谁?"},"finish_reason":"stop"}],"created":1738218357,"model":"qwen-turbo","object":"chat.completion","usage":{"prompt_tokens":9,"completion_tokens":1,"total_tokens":10}}`), }, }, }, - } t.Run("WasmPlugins ai-proxy", func(t *testing.T) { for _, testcase := range testcases { diff --git a/test/e2e/conformance/tests/go-wasm-ai-proxy.yaml b/test/e2e/conformance/tests/go-wasm-ai-proxy.yaml index 71ef33979..79853c1a9 100644 --- a/test/e2e/conformance/tests/go-wasm-ai-proxy.yaml +++ b/test/e2e/conformance/tests/go-wasm-ai-proxy.yaml @@ -1,4 +1,4 @@ -# Copyright (c) 2022 Alibaba Group Holding Ltd. +# Copyright (c) 2025 Alibaba Group Holding Ltd. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,42 +14,116 @@ apiVersion: networking.k8s.io/v1 kind: Ingress metadata: - annotations: - name: wasmplugin-ai-proxy-openai - namespace: higress-conformance-infra + name: wasmplugin-ai-proxy-baidu + namespace: higress-conformance-ai-backend spec: ingressClassName: higress rules: - - host: "openai.ai.com" + - host: "qianfan.baidubce.com" http: paths: - pathType: Prefix path: "/" backend: service: - name: infra-backend-v1 + name: llm-mock-service port: - number: 8080 + number: 3000 --- apiVersion: networking.k8s.io/v1 kind: Ingress metadata: - annotations: - name: wasmplugin-ai-proxy-qwen - namespace: higress-conformance-infra + name: wasmplugin-ai-proxy-doubao + namespace: higress-conformance-ai-backend spec: ingressClassName: higress rules: - - host: "qwen.ai.com" + - host: "ark.cn-beijing.volces.com" http: paths: - pathType: Prefix path: "/" backend: service: - name: infra-backend-v1 + name: llm-mock-service port: - number: 8080 + number: 3000 +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: wasmplugin-ai-proxy-minimax-v2-api + namespace: higress-conformance-ai-backend +spec: + ingressClassName: higress + rules: + - host: "api.minimax.chat-v2-api" + http: + paths: + - pathType: Prefix + path: "/" + backend: + service: + name: llm-mock-service + port: + number: 3000 +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: wasmplugin-ai-proxy-minimax-pro-api + namespace: higress-conformance-ai-backend +spec: + ingressClassName: higress + rules: + - host: "api.minimax.chat-pro-api" + http: + paths: + - pathType: Prefix + path: "/" + backend: + service: + name: llm-mock-service + port: + number: 3000 +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: wasmplugin-ai-proxy-qwen-compatible-mode + namespace: higress-conformance-ai-backend +spec: + ingressClassName: higress + rules: + - host: "dashscope.aliyuncs.com-compatible-mode" + http: + paths: + - pathType: Prefix + path: "/" + backend: + service: + name: llm-mock-service + port: + number: 3000 +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: wasmplugin-ai-proxy-qwen + namespace: higress-conformance-ai-backend +spec: + ingressClassName: higress + rules: + - host: "dashscope.aliyuncs.com" + http: + paths: + - pathType: Prefix + path: "/" + backend: + service: + name: llm-mock-service + port: + number: 3000 --- apiVersion: extensions.higress.io/v1alpha1 kind: WasmPlugin @@ -57,31 +131,76 @@ metadata: name: ai-proxy namespace: higress-system spec: - priority: 200 + defaultConfigDisable: true + phase: UNSPECIFIED_PHASE + priority: 100 matchRules: - config: provider: - type: "openai" - customSettings: - - name: "max_tokens" - value: 123 - overwrite: false - - name: "temperature" - value: 0.66 - overwrite: true + apiTokens: + - fake_token + modelMapping: + 'gpt-3': ernie-3.5-8k + '*': ernie-3.5-8k + type: baidu ingress: - - higress-conformance-infra/wasmplugin-ai-proxy-openai + - higress-conformance-ai-backend/wasmplugin-ai-proxy-baidu - config: provider: - type: "qwen" - apiTokens: "fake-token" - customSettings: - - name: "max_tokens" - value: 123 - overwrite: false - - name: "temperature" - value: 0.66 - overwrite: true + apiTokens: + - fake_token + modelMapping: + '*': fake_doubao_endpoint + type: doubao ingress: - - higress-conformance-infra/wasmplugin-ai-proxy-qwen - url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-proxy:1.0.0 + - higress-conformance-ai-backend/wasmplugin-ai-proxy-doubao + - config: + provider: + apiTokens: + - fake_token + modelMapping: + 'gpt-3': abab6.5s-chat + 'gpt-4': abab6.5g-chat + '*': abab6.5t-chat + type: minimax + ingress: + - higress-conformance-ai-backend/wasmplugin-ai-proxy-minimax-v2-api + - config: + provider: + apiTokens: + - fake_token + modelMapping: + 'gpt-3': abab6.5s-chat + 'gpt-4': abab6.5g-chat + '*': abab6.5t-chat + type: minimax + minimaxApiType: pro + minimaxGroupId: 1 + ingress: + - higress-conformance-ai-backend/wasmplugin-ai-proxy-minimax-pro-api + - config: + provider: + apiTokens: + - fake_token + modelMapping: + 'gpt-3': qwen-turbo + 'gpt-35-turbo': qwen-plus + 'gpt-4-*': qwen-max + '*': qwen-turbo + type: qwen + qwenEnableCompatible: true + ingress: + - higress-conformance-ai-backend/wasmplugin-ai-proxy-qwen-compatible-mode + - config: + provider: + apiTokens: + - fake_token + modelMapping: + 'gpt-3': qwen-turbo + 'gpt-35-turbo': qwen-plus + 'gpt-4-*': qwen-max + '*': qwen-turbo + type: qwen + ingress: + - higress-conformance-ai-backend/wasmplugin-ai-proxy-qwen + url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-proxy:1.0.0 \ No newline at end of file diff --git a/test/e2e/conformance/utils/http/http.go b/test/e2e/conformance/utils/http/http.go index b2f33f0cd..a48cb1c76 100644 --- a/test/e2e/conformance/utils/http/http.go +++ b/test/e2e/conformance/utils/http/http.go @@ -76,6 +76,7 @@ const ( ContentTypeFormUrlencoded = "application/x-www-form-urlencoded" ContentTypeMultipartForm = "multipart/form-data" ContentTypeTextPlain = "text/plain" + ContentTypeTextEventStream = "text/event-stream" ) const ( @@ -140,11 +141,12 @@ type ExpectedRequest struct { // Response defines expected properties of a response from a backend. type Response struct { - StatusCode int - Headers map[string]string - Body []byte - ContentType string - AbsentHeaders []string + StatusCode int + Headers map[string]string + Body []byte + JsonBodyIgnoreFields []string + ContentType string + AbsentHeaders []string } // requiredConsecutiveSuccesses is the number of requests that must succeed in a row @@ -601,6 +603,7 @@ func CompareResponse(cRes *roundtripper.CapturedResponse, expected Assertion) er switch cTyp { case ContentTypeTextPlain: + case ContentTypeTextEventStream: if !bytes.Equal(expected.Response.ExpectedResponse.Body, cRes.Body) { return fmt.Errorf("expected %s body to be %s, got %s", cTyp, string(expected.Response.ExpectedResponse.Body), string(cRes.Body)) } @@ -616,7 +619,7 @@ func CompareResponse(cRes *roundtripper.CapturedResponse, expected Assertion) er return fmt.Errorf("failed to unmarshall CapturedResponse body %s, %s", string(cRes.Body), err.Error()) } - if !reflect.DeepEqual(eResBody, cResBody) { + if err := CompareJSONWithIgnoreFields(eResBody, cResBody, expected.Response.ExpectedResponse.JsonBodyIgnoreFields); err != nil { return fmt.Errorf("expected %s body to be %s, got %s", cTyp, string(expected.Response.ExpectedResponse.Body), string(cRes.Body)) } case ContentTypeFormUrlencoded: @@ -663,6 +666,47 @@ func CompareResponse(cRes *roundtripper.CapturedResponse, expected Assertion) er } return nil } + +// CompareJSONWithIgnoreFields compares two JSON objects, ignoring specified fields +func CompareJSONWithIgnoreFields(eResBody, cResBody map[string]interface{}, ignoreFields []string) error { + for key, eVal := range eResBody { + if contains(ignoreFields, key) { + continue + } + + cVal, exists := cResBody[key] + if !exists { + return fmt.Errorf("field %s exists in expected response but not in captured response", key) + } + + if !reflect.DeepEqual(eVal, cVal) { + return fmt.Errorf("field %s mismatch: expected %v, got %v", key, eVal, cVal) + } + } + + // Check if captured response has extra fields (excluding ignored fields) + for key := range cResBody { + if contains(ignoreFields, key) { + continue + } + + if _, exists := eResBody[key]; !exists { + return fmt.Errorf("field %s exists in captured response but not in expected response", key) + } + } + + return nil +} + +func contains(slice []string, str string) bool { + for _, s := range slice { + if s == str { + return true + } + } + return false +} + func ParseFormUrlencodedBody(body []byte) (map[string][]string, error) { ret := make(map[string][]string) kvs, err := url.ParseQuery(string(body)) diff --git a/test/e2e/conformance/utils/suite/suite.go b/test/e2e/conformance/utils/suite/suite.go index daced5ae0..caaa6c3f3 100644 --- a/test/e2e/conformance/utils/suite/suite.go +++ b/test/e2e/conformance/utils/suite/suite.go @@ -136,6 +136,7 @@ func New(s Options) *ConformanceTestSuite { "base/nacos.yaml", "base/dubbo.yaml", "base/opa.yaml", + "base/llm-mock.yaml", } } @@ -173,6 +174,7 @@ func (suite *ConformanceTestSuite) Setup(t *testing.T) { "higress-conformance-infra", "higress-conformance-app-backend", "higress-conformance-web-backend", + "higress-conformance-ai-backend", } kubernetes.NamespacesMustBeAccepted(t, suite.Client, suite.TimeoutConfig, namespaces)