support qwen compatible mode (#1205)

2026-06-09 12:47:28 +08:00 · 2024-08-15 18:52:49 +08:00
parent 8f7c10ee5f
commit 8fa1224cba
4 changed files with 35 additions and 2 deletions
--- a/plugins/wasm-go/extensions/ai-proxy/provider/provider.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/provider.go
@@ -140,6 +140,9 @@ type ProviderConfig struct {
 	// @Title zh-CN 启用通义千问搜索服务
 	// @Description zh-CN 仅适用于通义千问服务，表示是否启用通义千问的互联网搜索功能。
 	qwenEnableSearch bool `required:"false" yaml:"qwenEnableSearch" json:"qwenEnableSearch"`
+	// @Title zh-CN 开启通义千问兼容模式
+	// @Description zh-CN 启用通义千问兼容模式后，将调用千问的兼容模式接口，同时对请求/响应不做修改。
+	qwenEnableCompatible bool `required:"false" yaml:"qwenEnableCompatible" json:"qwenEnableCompatible"`
 	// @Title zh-CN Ollama Server IP/Domain
 	// @Description zh-CN 仅适用于 Ollama 服务。Ollama 服务器的主机地址。
 	ollamaServerHost string `required:"false" yaml:"ollamaServerHost" json:"ollamaServerHost"`
@@ -193,6 +196,7 @@ func (c *ProviderConfig) FromJson(json gjson.Result) {
 		c.qwenFileIds = append(c.qwenFileIds, fileId.String())
 	}
 	c.qwenEnableSearch = json.Get("qwenEnableSearch").Bool()
+	c.qwenEnableCompatible = json.Get("qwenEnableCompatible").Bool()
 	c.ollamaServerHost = json.Get("ollamaServerHost").String()
 	c.ollamaServerPort = uint32(json.Get("ollamaServerPort").Uint())
 	c.modelMapping = make(map[string]string)
--- a/plugins/wasm-go/extensions/ai-proxy/provider/qwen.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/qwen.go
@@ -13,6 +13,8 @@ import (
 	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
 	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
 	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
+	"github.com/tidwall/gjson"
+	"github.com/tidwall/sjson"
 )

 // qwenProvider is the provider for Qwen service.
@@ -23,6 +25,7 @@ const (
 	qwenDomain             = "dashscope.aliyuncs.com"
 	qwenChatCompletionPath = "/api/v1/services/aigc/text-generation/generation"
 	qwenTextEmbeddingPath  = "/api/v1/services/embeddings/text-embedding/text-embedding"
+	qwenCompatiblePath     = "/compatible-mode/v1/chat/completions"

 	qwenTopPMin = 0.000001
 	qwenTopPMax = 0.999999
@@ -63,7 +66,9 @@ func (m *qwenProvider) GetProviderType() string {
 }

 func (m *qwenProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) (types.Action, error) {
-	if apiName == ApiNameChatCompletion {
+	if m.config.qwenEnableCompatible {
+		_ = util.OverwriteRequestPath(qwenCompatiblePath)
+	} else if apiName == ApiNameChatCompletion {
 		_ = util.OverwriteRequestPath(qwenChatCompletionPath)
 	} else if apiName == ApiNameEmbeddings {
 		_ = util.OverwriteRequestPath(qwenTextEmbeddingPath)
@@ -85,6 +90,23 @@ func (m *qwenProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName
 }

 func (m *qwenProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
+	if m.config.qwenEnableCompatible {
+		if gjson.GetBytes(body, "model").Exists() {
+			rawModel := gjson.GetBytes(body, "model").String()
+			mappedModel := getMappedModel(rawModel, m.config.modelMapping, log)
+			newBody, err := sjson.SetBytes(body, "model", mappedModel)
+			if err != nil {
+				log.Errorf("Replace model error: %v", err)
+				return types.ActionContinue, err
+			}
+			err = proxywasm.ReplaceHttpRequestBody(newBody)
+			if err != nil {
+				log.Errorf("Replace request body error: %v", err)
+				return types.ActionContinue, err
+			}
+		}
+		return types.ActionContinue, nil
+	}
 	if apiName == ApiNameChatCompletion {
 		return m.onChatCompletionRequestBody(ctx, body, log)
 	}
@@ -220,7 +242,7 @@ func (m *qwenProvider) OnResponseHeaders(ctx wrapper.HttpContext, apiName ApiNam
 }

 func (m *qwenProvider) OnStreamingResponseBody(ctx wrapper.HttpContext, name ApiName, chunk []byte, isLastChunk bool, log wrapper.Log) ([]byte, error) {
-	if name != ApiNameChatCompletion {
+	if m.config.qwenEnableCompatible || name != ApiNameChatCompletion {
 		return chunk, nil
 	}

@@ -305,6 +327,9 @@ func (m *qwenProvider) OnStreamingResponseBody(ctx wrapper.HttpContext, name Api
 }

 func (m *qwenProvider) OnResponseBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
+	if m.config.qwenEnableCompatible {
+		return types.ActionContinue, nil
+	}
 	if apiName == ApiNameChatCompletion {
 		return m.onChatCompletionResponseBody(ctx, body, log)
 	}