From a1bf1ff009e43f06f3c4c012b3bc723b2bfcdd59 Mon Sep 17 00:00:00 2001 From: aias00 Date: Thu, 7 Aug 2025 17:22:47 +0800 Subject: [PATCH] feat(provider): add support for Grok provider in AI proxy (#2713) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: 韩贤涛 <601803023@qq.com> --- plugins/wasm-go/extensions/ai-proxy/README.md | 74 ++++++++++++ .../wasm-go/extensions/ai-proxy/README_EN.md | 74 ++++++++++++ .../extensions/ai-proxy/provider/grok.go | 75 ++++++++++++ .../extensions/ai-proxy/provider/provider.go | 2 + .../e2e/conformance/tests/go-wasm-ai-proxy.go | 60 ++++++++++ .../conformance/tests/go-wasm-ai-proxy.yaml | 108 +++++++++++------- 6 files changed, 354 insertions(+), 39 deletions(-) create mode 100644 plugins/wasm-go/extensions/ai-proxy/provider/grok.go diff --git a/plugins/wasm-go/extensions/ai-proxy/README.md b/plugins/wasm-go/extensions/ai-proxy/README.md index bb42b7b07..6128bce84 100644 --- a/plugins/wasm-go/extensions/ai-proxy/README.md +++ b/plugins/wasm-go/extensions/ai-proxy/README.md @@ -158,6 +158,10 @@ DeepSeek 所对应的 `type` 为 `deepseek`。它并无特有的配置字段。 Groq 所对应的 `type` 为 `groq`。它并无特有的配置字段。 +#### Grok + +Grok 所对应的 `type` 为 `grok`。它并无特有的配置字段。 + #### 文心一言(Baidu) 文心一言所对应的 `type` 为 `baidu`。它并无特有的配置字段。 @@ -863,6 +867,76 @@ provider: } ``` +### 使用 OpenAI 协议代理 Grok 服务 + +**配置信息** + +```yaml +provider: + type: grok + apiTokens: + - 'YOUR_GROK_API_TOKEN' +``` + +**请求示例** + +```json +{ + "messages": [ + { + "role": "system", + "content": "You are a helpful assistant that can answer questions and help with tasks." + }, + { + "role": "user", + "content": "What is 101*3?" + } + ], + "model": "grok-4" +} +``` + +**响应示例** + +```json +{ + "id": "a3d1008e-4544-40d4-d075-11527e794e4a", + "object": "chat.completion", + "created": 1752854522, + "model": "grok-4", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "101 multiplied by 3 is 303.", + "refusal": null + }, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 32, + "completion_tokens": 9, + "total_tokens": 135, + "prompt_tokens_details": { + "text_tokens": 32, + "audio_tokens": 0, + "image_tokens": 0, + "cached_tokens": 6 + }, + "completion_tokens_details": { + "reasoning_tokens": 94, + "audio_tokens": 0, + "accepted_prediction_tokens": 0, + "rejected_prediction_tokens": 0 + }, + "num_sources_used": 0 + }, + "system_fingerprint": "fp_3a7881249c" +} +``` + ### 使用 OpenAI 协议代理 Claude 服务 **配置信息** diff --git a/plugins/wasm-go/extensions/ai-proxy/README_EN.md b/plugins/wasm-go/extensions/ai-proxy/README_EN.md index e337b1b13..1297ccd60 100644 --- a/plugins/wasm-go/extensions/ai-proxy/README_EN.md +++ b/plugins/wasm-go/extensions/ai-proxy/README_EN.md @@ -129,6 +129,10 @@ For DeepSeek, the corresponding `type` is `deepseek`. It has no unique configura For Groq, the corresponding `type` is `groq`. It has no unique configuration fields. +#### Grok + +For Grok, the corresponding `type` is `grok`. It has no unique configuration fields. + #### ERNIE Bot For ERNIE Bot, the corresponding `type` is `baidu`. It has no unique configuration fields. @@ -809,6 +813,76 @@ provider: } ``` +### Using OpenAI Protocol Proxy for Grok Service + +**Configuration Information** + +```yaml +provider: + type: grok + apiTokens: + - "YOUR_GROK_API_TOKEN" +``` + +**Example Request** + +```json +{ + "messages": [ + { + "role": "system", + "content": "You are a helpful assistant that can answer questions and help with tasks." + }, + { + "role": "user", + "content": "What is 101*3?" + } + ], + "model": "grok-4" +} +``` + +**Example Response** + +```json +{ + "id": "a3d1008e-4544-40d4-d075-11527e794e4a", + "object": "chat.completion", + "created": 1752854522, + "model": "grok-4", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "101 multiplied by 3 is 303.", + "refusal": null + }, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 32, + "completion_tokens": 9, + "total_tokens": 135, + "prompt_tokens_details": { + "text_tokens": 32, + "audio_tokens": 0, + "image_tokens": 0, + "cached_tokens": 6 + }, + "completion_tokens_details": { + "reasoning_tokens": 94, + "audio_tokens": 0, + "accepted_prediction_tokens": 0, + "rejected_prediction_tokens": 0 + }, + "num_sources_used": 0 + }, + "system_fingerprint": "fp_3a7881249c" +} +``` + ### Using OpenAI Protocol Proxy for Claude Service **Configuration Information** diff --git a/plugins/wasm-go/extensions/ai-proxy/provider/grok.go b/plugins/wasm-go/extensions/ai-proxy/provider/grok.go new file mode 100644 index 000000000..56fe6bc77 --- /dev/null +++ b/plugins/wasm-go/extensions/ai-proxy/provider/grok.go @@ -0,0 +1,75 @@ +package provider + +import ( + "errors" + "net/http" + "strings" + + "github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy/util" + "github.com/higress-group/wasm-go/pkg/wrapper" + "github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types" +) + +// grokProvider is the provider for Grok service. +const ( + grokDomain = "api.x.ai" + grokChatCompletionPath = "/v1/chat/completions" +) + +type grokProviderInitializer struct{} + +func (g *grokProviderInitializer) ValidateConfig(config *ProviderConfig) error { + if config.apiTokens == nil || len(config.apiTokens) == 0 { + return errors.New("no apiToken found in provider config") + } + return nil +} + +func (g *grokProviderInitializer) DefaultCapabilities() map[string]string { + return map[string]string{ + string(ApiNameChatCompletion): grokChatCompletionPath, + } +} + +func (g *grokProviderInitializer) CreateProvider(config ProviderConfig) (Provider, error) { + config.setDefaultCapabilities(g.DefaultCapabilities()) + return &grokProvider{ + config: config, + contextCache: createContextCache(&config), + }, nil +} + +type grokProvider struct { + config ProviderConfig + contextCache *contextCache +} + +func (g *grokProvider) GetProviderType() string { + return providerTypeGrok +} + +func (g *grokProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName) error { + g.config.handleRequestHeaders(g, ctx, apiName) + return nil +} + +func (g *grokProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte) (types.Action, error) { + if !g.config.isSupportedAPI(apiName) { + return types.ActionContinue, errUnsupportedApiName + } + return g.config.handleRequestBody(g, g.contextCache, ctx, apiName, body) +} + +func (g *grokProvider) TransformRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, headers http.Header) { + util.OverwriteRequestPathHeaderByCapability(headers, string(apiName), g.config.capabilities) + util.OverwriteRequestHostHeader(headers, grokDomain) + util.OverwriteRequestAuthorizationHeader(headers, "Bearer "+g.config.GetApiTokenInUse(ctx)) + headers.Del("Content-Length") +} + +func (g *grokProvider) GetApiName(path string) ApiName { + if strings.Contains(path, grokChatCompletionPath) { + return ApiNameChatCompletion + } + return "" +} diff --git a/plugins/wasm-go/extensions/ai-proxy/provider/provider.go b/plugins/wasm-go/extensions/ai-proxy/provider/provider.go index 8050db6ae..2fd24f507 100644 --- a/plugins/wasm-go/extensions/ai-proxy/provider/provider.go +++ b/plugins/wasm-go/extensions/ai-proxy/provider/provider.go @@ -107,6 +107,7 @@ const ( providerTypeQwen = "qwen" providerTypeOpenAI = "openai" providerTypeGroq = "groq" + providerTypeGrok = "grok" providerTypeBaichuan = "baichuan" providerTypeYi = "yi" providerTypeDeepSeek = "deepseek" @@ -182,6 +183,7 @@ var ( providerTypeQwen: &qwenProviderInitializer{}, providerTypeOpenAI: &openaiProviderInitializer{}, providerTypeGroq: &groqProviderInitializer{}, + providerTypeGrok: &grokProviderInitializer{}, providerTypeBaichuan: &baichuanProviderInitializer{}, providerTypeYi: &yiProviderInitializer{}, providerTypeDeepSeek: &deepseekProviderInitializer{}, diff --git a/test/e2e/conformance/tests/go-wasm-ai-proxy.go b/test/e2e/conformance/tests/go-wasm-ai-proxy.go index 31aeb379e..98cb6aa02 100644 --- a/test/e2e/conformance/tests/go-wasm-ai-proxy.go +++ b/test/e2e/conformance/tests/go-wasm-ai-proxy.go @@ -1023,6 +1023,66 @@ data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"role":"assistant data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"role":"assistant","content":"USER: \n你好\n"},"finish_reason":"stop","logprobs":null}],"model":"dify","object":"chat.completion.chunk","usage":{"prompt_tokens":9,"completion_tokens":1,"total_tokens":10}} +`), + }, + }, + }, + { + Meta: http.AssertionMeta{ + TestCaseName: "grok case 1: non-streaming request", + CompareTarget: http.CompareTargetResponse, + }, + Request: http.AssertionRequest{ + ActualRequest: http.Request{ + Host: "api.x.ai", + Path: "/v1/chat/completions", + Method: "POST", + ContentType: http.ContentTypeApplicationJson, + Body: []byte(`{"model":"gpt-3","messages":[{"role":"user","content":"你好,你是谁?"}],"stream":false}`), + }, + }, + Response: http.AssertionResponse{ + ExpectedResponse: http.Response{ + StatusCode: 200, + ContentType: http.ContentTypeApplicationJson, + Body: []byte(`{"id":"chatcmpl-llm-mock","choices":[{"index":0,"message":{"role":"assistant","content":"你好,你是谁?"},"finish_reason":"stop","logprobs":null}],"created":10,"model":"grok-beta","object":"chat.completion","usage":{"prompt_tokens":9,"completion_tokens":1,"total_tokens":10}}`), + }, + }, + }, + { + Meta: http.AssertionMeta{ + TestCaseName: "grok case 2: streaming request", + CompareTarget: http.CompareTargetResponse, + }, + Request: http.AssertionRequest{ + ActualRequest: http.Request{ + Host: "api.x.ai", + Path: "/v1/chat/completions", + Method: "POST", + ContentType: http.ContentTypeApplicationJson, + Body: []byte(`{"model":"gpt-3","messages":[{"role":"user","content":"你好,你是谁?"}],"stream":true}`), + }, + }, + Response: http.AssertionResponse{ + ExpectedResponse: http.Response{ + StatusCode: 200, + ContentType: http.ContentTypeTextEventStream, + Body: []byte(`data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"你"},"finish_reason":null,"logprobs":null}],"created":10,"model":"grok-beta","object":"chat.completion.chunk","usage":null} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"好"},"finish_reason":null,"logprobs":null}],"created":10,"model":"grok-beta","object":"chat.completion.chunk","usage":null} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":","},"finish_reason":null,"logprobs":null}],"created":10,"model":"grok-beta","object":"chat.completion.chunk","usage":null} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"你"},"finish_reason":null,"logprobs":null}],"created":10,"model":"grok-beta","object":"chat.completion.chunk","usage":null} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"是"},"finish_reason":null,"logprobs":null}],"created":10,"model":"grok-beta","object":"chat.completion.chunk","usage":null} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"谁"},"finish_reason":null,"logprobs":null}],"created":10,"model":"grok-beta","object":"chat.completion.chunk","usage":null} + +data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"?"},"finish_reason":"stop","logprobs":null}],"created":10,"model":"grok-beta","object":"chat.completion.chunk","usage":null} + +data: [DONE] + `), }, }, diff --git a/test/e2e/conformance/tests/go-wasm-ai-proxy.yaml b/test/e2e/conformance/tests/go-wasm-ai-proxy.yaml index d4025e6f7..2c11cec22 100644 --- a/test/e2e/conformance/tests/go-wasm-ai-proxy.yaml +++ b/test/e2e/conformance/tests/go-wasm-ai-proxy.yaml @@ -334,6 +334,25 @@ spec: port: number: 3000 --- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: wasmplugin-ai-proxy-grok + namespace: higress-conformance-ai-backend +spec: + ingressClassName: higress + rules: + - host: "api.x.ai" + http: + paths: + - pathType: Prefix + path: "/" + backend: + service: + name: llm-mock-service + port: + number: 3000 +--- apiVersion: extensions.higress.io/v1alpha1 kind: WasmPlugin metadata: @@ -349,8 +368,8 @@ spec: apiTokens: - fake_token modelMapping: - 'gpt-3': 360gpt-turbo - '*': 360gpt-pro + "gpt-3": 360gpt-turbo + "*": 360gpt-pro type: ai360 ingress: - higress-conformance-ai-backend/wasmplugin-ai-proxy-ai360 @@ -359,8 +378,8 @@ spec: apiTokens: - fake_token modelMapping: - 'gpt-3': baichuan2-13b-chat-v1 - '*': baichuan-7b-v1 + "gpt-3": baichuan2-13b-chat-v1 + "*": baichuan-7b-v1 type: baichuan ingress: - higress-conformance-ai-backend/wasmplugin-ai-proxy-baichuan @@ -369,8 +388,8 @@ spec: apiTokens: - fake_token modelMapping: - 'gpt-3': ernie-3.5-8k - '*': ernie-3.5-8k + "gpt-3": ernie-3.5-8k + "*": ernie-3.5-8k type: baidu ingress: - higress-conformance-ai-backend/wasmplugin-ai-proxy-baidu @@ -379,8 +398,8 @@ spec: apiTokens: - fake_token modelMapping: - 'gpt-3': deepseek-reasoner - '*': deepseek-chat + "gpt-3": deepseek-reasoner + "*": deepseek-chat type: deepseek ingress: - higress-conformance-ai-backend/wasmplugin-ai-proxy-deepseek @@ -389,7 +408,7 @@ spec: apiTokens: - fake_token modelMapping: - '*': fake_doubao_endpoint + "*": fake_doubao_endpoint type: doubao ingress: - higress-conformance-ai-backend/wasmplugin-ai-proxy-doubao @@ -398,8 +417,8 @@ spec: apiTokens: - fake_token modelMapping: - 'gpt-3': cohere-command-r-08-2024 - '*': Phi-3.5-MoE-instruct + "gpt-3": cohere-command-r-08-2024 + "*": Phi-3.5-MoE-instruct type: github ingress: - higress-conformance-ai-backend/wasmplugin-ai-proxy-github @@ -408,8 +427,8 @@ spec: apiTokens: - fake_token modelMapping: - 'gpt-3': llama3-8b-8192 - '*': llama-3.1-8b-instant + "gpt-3": llama3-8b-8192 + "*": llama-3.1-8b-instant type: groq ingress: - higress-conformance-ai-backend/wasmplugin-ai-proxy-groq @@ -418,9 +437,9 @@ spec: apiTokens: - fake_token modelMapping: - 'gpt-3': abab6.5s-chat - 'gpt-4': abab6.5g-chat - '*': abab6.5t-chat + "gpt-3": abab6.5s-chat + "gpt-4": abab6.5g-chat + "*": abab6.5t-chat type: minimax ingress: - higress-conformance-ai-backend/wasmplugin-ai-proxy-minimax-v2-api @@ -429,9 +448,9 @@ spec: apiTokens: - fake_token modelMapping: - 'gpt-3': abab6.5s-chat - 'gpt-4': abab6.5g-chat - '*': abab6.5t-chat + "gpt-3": abab6.5s-chat + "gpt-4": abab6.5g-chat + "*": abab6.5t-chat type: minimax minimaxApiType: pro minimaxGroupId: 1 @@ -442,8 +461,8 @@ spec: apiTokens: - fake_token modelMapping: - 'gpt-3': mistral-tiny - '*': mistral-large-latest + "gpt-3": mistral-tiny + "*": mistral-large-latest type: mistral ingress: - higress-conformance-ai-backend/wasmplugin-ai-proxy-mistral @@ -452,10 +471,10 @@ spec: apiTokens: - fake_token modelMapping: - 'gpt-3': qwen-turbo - 'gpt-35-turbo': qwen-plus - 'gpt-4-*': qwen-max - '*': qwen-turbo + "gpt-3": qwen-turbo + "gpt-35-turbo": qwen-plus + "gpt-4-*": qwen-max + "*": qwen-turbo type: qwen qwenEnableCompatible: true ingress: @@ -465,10 +484,10 @@ spec: apiTokens: - fake_token modelMapping: - 'gpt-3': qwen-turbo - 'gpt-35-turbo': qwen-plus - 'gpt-4-*': qwen-max - '*': qwen-turbo + "gpt-3": qwen-turbo + "gpt-35-turbo": qwen-plus + "gpt-4-*": qwen-max + "*": qwen-turbo type: qwen ingress: - higress-conformance-ai-backend/wasmplugin-ai-proxy-qwen @@ -477,8 +496,8 @@ spec: apiTokens: - fake_token modelMapping: - 'gpt-3': step-1-8k - '*': step-1-32k + "gpt-3": step-1-8k + "*": step-1-32k type: stepfun ingress: - higress-conformance-ai-backend/wasmplugin-ai-proxy-stepfun @@ -487,8 +506,8 @@ spec: apiTokens: - fake_token modelMapping: - 'gpt-3': meta-llama/Meta-Llama-3-8B-Instruct-Turbo - '*': meta-llama/Llama-3-8b-chat-hf + "gpt-3": meta-llama/Meta-Llama-3-8B-Instruct-Turbo + "*": meta-llama/Llama-3-8b-chat-hf type: together-ai ingress: - higress-conformance-ai-backend/wasmplugin-ai-proxy-together-ai @@ -497,8 +516,8 @@ spec: apiTokens: - fake_token modelMapping: - 'gpt-3': Yi-Medium - '*': Yi-Large + "gpt-3": Yi-Medium + "*": Yi-Large type: yi ingress: - higress-conformance-ai-backend/wasmplugin-ai-proxy-yi @@ -507,8 +526,8 @@ spec: apiTokens: - fake_token modelMapping: - 'gpt-3': glm-4-plus - '*': glm-4-long + "gpt-3": glm-4-plus + "*": glm-4-long type: zhipuai ingress: - higress-conformance-ai-backend/wasmplugin-ai-proxy-zhipuai @@ -517,9 +536,20 @@ spec: apiTokens: - fake_token modelMapping: - '*': dify + "*": dify type: dify botType: Completion ingress: - higress-conformance-ai-backend/wasmplugin-ai-proxy-dify - url: file:///opt/plugins/wasm-go/extensions/ai-proxy/plugin.wasm \ No newline at end of file + - config: + provider: + apiTokens: + - fake_token + modelMapping: + "gpt-3": grok-beta + "gpt-4": grok-beta + "*": grok-beta + type: grok + ingress: + - higress-conformance-ai-backend/wasmplugin-ai-proxy-grok + url: file:///opt/plugins/wasm-go/extensions/ai-proxy/plugin.wasm