feat(provider): add support for Grok provider in AI proxy (#2713)

Co-authored-by: 韩贤涛 <601803023@qq.com>
2026-02-06 23:21:08 +08:00 · 2025-08-07 17:22:47 +08:00
parent b69e3a8f30
commit a1bf1ff009
6 changed files with 354 additions and 39 deletions
--- a/plugins/wasm-go/extensions/ai-proxy/README.md
+++ b/plugins/wasm-go/extensions/ai-proxy/README.md
@@ -158,6 +158,10 @@ DeepSeek 所对应的 `type` 为 `deepseek`。它并无特有的配置字段。

 Groq 所对应的 `type` 为 `groq`。它并无特有的配置字段。

+#### Grok
+
+Grok 所对应的 `type` 为 `grok`。它并无特有的配置字段。
+
 #### 文心一言（Baidu）

 文心一言所对应的 `type` 为 `baidu`。它并无特有的配置字段。
@@ -863,6 +867,76 @@ provider:
 }
 ```

+### 使用 OpenAI 协议代理 Grok 服务
+
+**配置信息**
+
+```yaml
+provider:
+  type: grok
+  apiTokens:
+    - 'YOUR_GROK_API_TOKEN'
+```
+
+**请求示例**
+
+```json
+{
+  "messages": [
+    {
+      "role": "system",
+      "content": "You are a helpful assistant that can answer questions and help with tasks."
+    },
+    {
+      "role": "user",
+      "content": "What is 101*3?"
+    }
+  ],
+  "model": "grok-4"
+}
+```
+
+**响应示例**
+
+```json
+{
+  "id": "a3d1008e-4544-40d4-d075-11527e794e4a",
+  "object": "chat.completion",
+  "created": 1752854522,
+  "model": "grok-4",
+  "choices": [
+    {
+      "index": 0,
+      "message": {
+        "role": "assistant",
+        "content": "101 multiplied by 3 is 303.",
+        "refusal": null
+      },
+      "finish_reason": "stop"
+    }
+  ],
+  "usage": {
+    "prompt_tokens": 32,
+    "completion_tokens": 9,
+    "total_tokens": 135,
+    "prompt_tokens_details": {
+      "text_tokens": 32,
+      "audio_tokens": 0,
+      "image_tokens": 0,
+      "cached_tokens": 6
+    },
+    "completion_tokens_details": {
+      "reasoning_tokens": 94,
+      "audio_tokens": 0,
+      "accepted_prediction_tokens": 0,
+      "rejected_prediction_tokens": 0
+    },
+    "num_sources_used": 0
+  },
+  "system_fingerprint": "fp_3a7881249c"
+}
+```
+
 ### 使用 OpenAI 协议代理 Claude 服务

 **配置信息**
--- a/plugins/wasm-go/extensions/ai-proxy/README_EN.md
+++ b/plugins/wasm-go/extensions/ai-proxy/README_EN.md
@@ -129,6 +129,10 @@ For DeepSeek, the corresponding `type` is `deepseek`. It has no unique configura

 For Groq, the corresponding `type` is `groq`. It has no unique configuration fields.

+#### Grok
+
+For Grok, the corresponding `type` is `grok`. It has no unique configuration fields.
+
 #### ERNIE Bot

 For ERNIE Bot, the corresponding `type` is `baidu`. It has no unique configuration fields.
@@ -809,6 +813,76 @@ provider:
 }
 ```

+### Using OpenAI Protocol Proxy for Grok Service
+
+**Configuration Information**
+
+```yaml
+provider:
+  type: grok
+  apiTokens:
+    - "YOUR_GROK_API_TOKEN"
+```
+
+**Example Request**
+
+```json
+{
+  "messages": [
+    {
+      "role": "system",
+      "content": "You are a helpful assistant that can answer questions and help with tasks."
+    },
+    {
+      "role": "user",
+      "content": "What is 101*3?"
+    }
+  ],
+  "model": "grok-4"
+}
+```
+
+**Example Response**
+
+```json
+{
+  "id": "a3d1008e-4544-40d4-d075-11527e794e4a",
+  "object": "chat.completion",
+  "created": 1752854522,
+  "model": "grok-4",
+  "choices": [
+    {
+      "index": 0,
+      "message": {
+        "role": "assistant",
+        "content": "101 multiplied by 3 is 303.",
+        "refusal": null
+      },
+      "finish_reason": "stop"
+    }
+  ],
+  "usage": {
+    "prompt_tokens": 32,
+    "completion_tokens": 9,
+    "total_tokens": 135,
+    "prompt_tokens_details": {
+      "text_tokens": 32,
+      "audio_tokens": 0,
+      "image_tokens": 0,
+      "cached_tokens": 6
+    },
+    "completion_tokens_details": {
+      "reasoning_tokens": 94,
+      "audio_tokens": 0,
+      "accepted_prediction_tokens": 0,
+      "rejected_prediction_tokens": 0
+    },
+    "num_sources_used": 0
+  },
+  "system_fingerprint": "fp_3a7881249c"
+}
+```
+
 ### Using OpenAI Protocol Proxy for Claude Service

 **Configuration Information**
--- a/plugins/wasm-go/extensions/ai-proxy/provider/grok.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/grok.go
@@ -0,0 +1,75 @@
+package provider
+
+import (
+	"errors"
+	"net/http"
+	"strings"
+
+	"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy/util"
+	"github.com/higress-group/wasm-go/pkg/wrapper"
+	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
+)
+
+// grokProvider is the provider for Grok service.
+const (
+	grokDomain             = "api.x.ai"
+	grokChatCompletionPath = "/v1/chat/completions"
+)
+
+type grokProviderInitializer struct{}
+
+func (g *grokProviderInitializer) ValidateConfig(config *ProviderConfig) error {
+	if config.apiTokens == nil || len(config.apiTokens) == 0 {
+		return errors.New("no apiToken found in provider config")
+	}
+	return nil
+}
+
+func (g *grokProviderInitializer) DefaultCapabilities() map[string]string {
+	return map[string]string{
+		string(ApiNameChatCompletion): grokChatCompletionPath,
+	}
+}
+
+func (g *grokProviderInitializer) CreateProvider(config ProviderConfig) (Provider, error) {
+	config.setDefaultCapabilities(g.DefaultCapabilities())
+	return &grokProvider{
+		config:       config,
+		contextCache: createContextCache(&config),
+	}, nil
+}
+
+type grokProvider struct {
+	config       ProviderConfig
+	contextCache *contextCache
+}
+
+func (g *grokProvider) GetProviderType() string {
+	return providerTypeGrok
+}
+
+func (g *grokProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName) error {
+	g.config.handleRequestHeaders(g, ctx, apiName)
+	return nil
+}
+
+func (g *grokProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte) (types.Action, error) {
+	if !g.config.isSupportedAPI(apiName) {
+		return types.ActionContinue, errUnsupportedApiName
+	}
+	return g.config.handleRequestBody(g, g.contextCache, ctx, apiName, body)
+}
+
+func (g *grokProvider) TransformRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, headers http.Header) {
+	util.OverwriteRequestPathHeaderByCapability(headers, string(apiName), g.config.capabilities)
+	util.OverwriteRequestHostHeader(headers, grokDomain)
+	util.OverwriteRequestAuthorizationHeader(headers, "Bearer "+g.config.GetApiTokenInUse(ctx))
+	headers.Del("Content-Length")
+}
+
+func (g *grokProvider) GetApiName(path string) ApiName {
+	if strings.Contains(path, grokChatCompletionPath) {
+		return ApiNameChatCompletion
+	}
+	return ""
+}
--- a/plugins/wasm-go/extensions/ai-proxy/provider/provider.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/provider.go
@@ -107,6 +107,7 @@ const (
 	providerTypeQwen       = "qwen"
 	providerTypeOpenAI     = "openai"
 	providerTypeGroq       = "groq"
+	providerTypeGrok       = "grok"
 	providerTypeBaichuan   = "baichuan"
 	providerTypeYi         = "yi"
 	providerTypeDeepSeek   = "deepseek"
@@ -182,6 +183,7 @@ var (
 		providerTypeQwen:       &qwenProviderInitializer{},
 		providerTypeOpenAI:     &openaiProviderInitializer{},
 		providerTypeGroq:       &groqProviderInitializer{},
+		providerTypeGrok:       &grokProviderInitializer{},
 		providerTypeBaichuan:   &baichuanProviderInitializer{},
 		providerTypeYi:         &yiProviderInitializer{},
 		providerTypeDeepSeek:   &deepseekProviderInitializer{},
--- a/test/e2e/conformance/tests/go-wasm-ai-proxy.go
+++ b/test/e2e/conformance/tests/go-wasm-ai-proxy.go
@@ -1023,6 +1023,66 @@ data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"role":"assistant

 data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"role":"assistant","content":"USER: \n你好\n"},"finish_reason":"stop","logprobs":null}],"model":"dify","object":"chat.completion.chunk","usage":{"prompt_tokens":9,"completion_tokens":1,"total_tokens":10}}

+`),
+					},
+				},
+			},
+			{
+				Meta: http.AssertionMeta{
+					TestCaseName:  "grok case 1: non-streaming request",
+					CompareTarget: http.CompareTargetResponse,
+				},
+				Request: http.AssertionRequest{
+					ActualRequest: http.Request{
+						Host:        "api.x.ai",
+						Path:        "/v1/chat/completions",
+						Method:      "POST",
+						ContentType: http.ContentTypeApplicationJson,
+						Body:        []byte(`{"model":"gpt-3","messages":[{"role":"user","content":"你好，你是谁？"}],"stream":false}`),
+					},
+				},
+				Response: http.AssertionResponse{
+					ExpectedResponse: http.Response{
+						StatusCode:  200,
+						ContentType: http.ContentTypeApplicationJson,
+						Body:        []byte(`{"id":"chatcmpl-llm-mock","choices":[{"index":0,"message":{"role":"assistant","content":"你好，你是谁？"},"finish_reason":"stop","logprobs":null}],"created":10,"model":"grok-beta","object":"chat.completion","usage":{"prompt_tokens":9,"completion_tokens":1,"total_tokens":10}}`),
+					},
+				},
+			},
+			{
+				Meta: http.AssertionMeta{
+					TestCaseName:  "grok case 2: streaming request",
+					CompareTarget: http.CompareTargetResponse,
+				},
+				Request: http.AssertionRequest{
+					ActualRequest: http.Request{
+						Host:        "api.x.ai",
+						Path:        "/v1/chat/completions",
+						Method:      "POST",
+						ContentType: http.ContentTypeApplicationJson,
+						Body:        []byte(`{"model":"gpt-3","messages":[{"role":"user","content":"你好，你是谁？"}],"stream":true}`),
+					},
+				},
+				Response: http.AssertionResponse{
+					ExpectedResponse: http.Response{
+						StatusCode:  200,
+						ContentType: http.ContentTypeTextEventStream,
+						Body: []byte(`data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"你"},"finish_reason":null,"logprobs":null}],"created":10,"model":"grok-beta","object":"chat.completion.chunk","usage":null}
+
+data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"好"},"finish_reason":null,"logprobs":null}],"created":10,"model":"grok-beta","object":"chat.completion.chunk","usage":null}
+
+data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"，"},"finish_reason":null,"logprobs":null}],"created":10,"model":"grok-beta","object":"chat.completion.chunk","usage":null}
+
+data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"你"},"finish_reason":null,"logprobs":null}],"created":10,"model":"grok-beta","object":"chat.completion.chunk","usage":null}
+
+data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"是"},"finish_reason":null,"logprobs":null}],"created":10,"model":"grok-beta","object":"chat.completion.chunk","usage":null}
+
+data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"谁"},"finish_reason":null,"logprobs":null}],"created":10,"model":"grok-beta","object":"chat.completion.chunk","usage":null}
+
+data: {"id":"chatcmpl-llm-mock","choices":[{"index":0,"delta":{"content":"？"},"finish_reason":"stop","logprobs":null}],"created":10,"model":"grok-beta","object":"chat.completion.chunk","usage":null}
+
+data: [DONE]
+
 `),
 					},
 				},
--- a/test/e2e/conformance/tests/go-wasm-ai-proxy.yaml
+++ b/test/e2e/conformance/tests/go-wasm-ai-proxy.yaml
@@ -334,6 +334,25 @@ spec:
                port:
                  number: 3000
 ---
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  name: wasmplugin-ai-proxy-grok
+  namespace: higress-conformance-ai-backend
+spec:
+  ingressClassName: higress
+  rules:
+    - host: "api.x.ai"
+      http:
+        paths:
+          - pathType: Prefix
+            path: "/"
+            backend:
+              service:
+                name: llm-mock-service
+                port:
+                  number: 3000
+---
 apiVersion: extensions.higress.io/v1alpha1
 kind: WasmPlugin
 metadata:
@@ -349,8 +368,8 @@ spec:
          apiTokens:
            - fake_token
          modelMapping:
-            'gpt-3': 360gpt-turbo
-            '*': 360gpt-pro
+            "gpt-3": 360gpt-turbo
+            "*": 360gpt-pro
          type: ai360
      ingress:
        - higress-conformance-ai-backend/wasmplugin-ai-proxy-ai360
@@ -359,8 +378,8 @@ spec:
          apiTokens:
            - fake_token
          modelMapping:
-            'gpt-3': baichuan2-13b-chat-v1
-            '*': baichuan-7b-v1
+            "gpt-3": baichuan2-13b-chat-v1
+            "*": baichuan-7b-v1
          type: baichuan
      ingress:
        - higress-conformance-ai-backend/wasmplugin-ai-proxy-baichuan
@@ -369,8 +388,8 @@ spec:
          apiTokens:
            - fake_token
          modelMapping:
-            'gpt-3': ernie-3.5-8k
-            '*': ernie-3.5-8k
+            "gpt-3": ernie-3.5-8k
+            "*": ernie-3.5-8k
          type: baidu
      ingress:
        - higress-conformance-ai-backend/wasmplugin-ai-proxy-baidu
@@ -379,8 +398,8 @@ spec:
          apiTokens:
            - fake_token
          modelMapping:
-            'gpt-3': deepseek-reasoner
-            '*': deepseek-chat
+            "gpt-3": deepseek-reasoner
+            "*": deepseek-chat
          type: deepseek
      ingress:
        - higress-conformance-ai-backend/wasmplugin-ai-proxy-deepseek
@@ -389,7 +408,7 @@ spec:
          apiTokens:
            - fake_token
          modelMapping:
-            '*': fake_doubao_endpoint
+            "*": fake_doubao_endpoint
          type: doubao
      ingress:
        - higress-conformance-ai-backend/wasmplugin-ai-proxy-doubao
@@ -398,8 +417,8 @@ spec:
          apiTokens:
            - fake_token
          modelMapping:
-            'gpt-3': cohere-command-r-08-2024
-            '*': Phi-3.5-MoE-instruct
+            "gpt-3": cohere-command-r-08-2024
+            "*": Phi-3.5-MoE-instruct
          type: github
      ingress:
        - higress-conformance-ai-backend/wasmplugin-ai-proxy-github
@@ -408,8 +427,8 @@ spec:
          apiTokens:
            - fake_token
          modelMapping:
-            'gpt-3': llama3-8b-8192
-            '*': llama-3.1-8b-instant
+            "gpt-3": llama3-8b-8192
+            "*": llama-3.1-8b-instant
          type: groq
      ingress:
        - higress-conformance-ai-backend/wasmplugin-ai-proxy-groq
@@ -418,9 +437,9 @@ spec:
          apiTokens:
            - fake_token
          modelMapping:
-            'gpt-3': abab6.5s-chat
-            'gpt-4': abab6.5g-chat
-            '*': abab6.5t-chat
+            "gpt-3": abab6.5s-chat
+            "gpt-4": abab6.5g-chat
+            "*": abab6.5t-chat
          type: minimax
      ingress:
        - higress-conformance-ai-backend/wasmplugin-ai-proxy-minimax-v2-api
@@ -429,9 +448,9 @@ spec:
          apiTokens:
            - fake_token
          modelMapping:
-            'gpt-3': abab6.5s-chat
-            'gpt-4': abab6.5g-chat
-            '*': abab6.5t-chat
+            "gpt-3": abab6.5s-chat
+            "gpt-4": abab6.5g-chat
+            "*": abab6.5t-chat
          type: minimax
          minimaxApiType: pro
          minimaxGroupId: 1
@@ -442,8 +461,8 @@ spec:
          apiTokens:
            - fake_token
          modelMapping:
-            'gpt-3': mistral-tiny
-            '*': mistral-large-latest
+            "gpt-3": mistral-tiny
+            "*": mistral-large-latest
          type: mistral
      ingress:
        - higress-conformance-ai-backend/wasmplugin-ai-proxy-mistral
@@ -452,10 +471,10 @@ spec:
          apiTokens:
            - fake_token
          modelMapping:
-            'gpt-3': qwen-turbo
-            'gpt-35-turbo': qwen-plus
-            'gpt-4-*': qwen-max
-            '*': qwen-turbo
+            "gpt-3": qwen-turbo
+            "gpt-35-turbo": qwen-plus
+            "gpt-4-*": qwen-max
+            "*": qwen-turbo
          type: qwen
          qwenEnableCompatible: true
      ingress:
@@ -465,10 +484,10 @@ spec:
          apiTokens:
            - fake_token
          modelMapping:
-            'gpt-3': qwen-turbo
-            'gpt-35-turbo': qwen-plus
-            'gpt-4-*': qwen-max
-            '*': qwen-turbo
+            "gpt-3": qwen-turbo
+            "gpt-35-turbo": qwen-plus
+            "gpt-4-*": qwen-max
+            "*": qwen-turbo
          type: qwen
      ingress:
        - higress-conformance-ai-backend/wasmplugin-ai-proxy-qwen
@@ -477,8 +496,8 @@ spec:
          apiTokens:
            - fake_token
          modelMapping:
-            'gpt-3': step-1-8k
-            '*': step-1-32k
+            "gpt-3": step-1-8k
+            "*": step-1-32k
          type: stepfun
      ingress:
        - higress-conformance-ai-backend/wasmplugin-ai-proxy-stepfun
@@ -487,8 +506,8 @@ spec:
          apiTokens:
            - fake_token
          modelMapping:
-            'gpt-3': meta-llama/Meta-Llama-3-8B-Instruct-Turbo
-            '*': meta-llama/Llama-3-8b-chat-hf
+            "gpt-3": meta-llama/Meta-Llama-3-8B-Instruct-Turbo
+            "*": meta-llama/Llama-3-8b-chat-hf
          type: together-ai
      ingress:
        - higress-conformance-ai-backend/wasmplugin-ai-proxy-together-ai
@@ -497,8 +516,8 @@ spec:
          apiTokens:
            - fake_token
          modelMapping:
-            'gpt-3': Yi-Medium
-            '*': Yi-Large
+            "gpt-3": Yi-Medium
+            "*": Yi-Large
          type: yi
      ingress:
        - higress-conformance-ai-backend/wasmplugin-ai-proxy-yi
@@ -507,8 +526,8 @@ spec:
          apiTokens:
            - fake_token
          modelMapping:
-            'gpt-3': glm-4-plus
-            '*': glm-4-long
+            "gpt-3": glm-4-plus
+            "*": glm-4-long
          type: zhipuai
      ingress:
        - higress-conformance-ai-backend/wasmplugin-ai-proxy-zhipuai
@@ -517,9 +536,20 @@ spec:
          apiTokens:
            - fake_token
          modelMapping:
-            '*': dify
+            "*": dify
          type: dify
          botType: Completion
      ingress:
        - higress-conformance-ai-backend/wasmplugin-ai-proxy-dify
+    - config:
+        provider:
+          apiTokens:
+            - fake_token
+          modelMapping:
+            "gpt-3": grok-beta
+            "gpt-4": grok-beta
+            "*": grok-beta
+          type: grok
+      ingress:
+        - higress-conformance-ai-backend/wasmplugin-ai-proxy-grok
  url: file:///opt/plugins/wasm-go/extensions/ai-proxy/plugin.wasm