mirror of
https://github.com/alibaba/higress.git
synced 2026-05-28 22:57:31 +08:00
feat(ai-proxy): support Qwen reranks and conversations paths (#3724)
This commit is contained in:
@@ -69,6 +69,9 @@ var (
|
|||||||
{provider.PathAnthropicComplete, provider.ApiNameAnthropicComplete},
|
{provider.PathAnthropicComplete, provider.ApiNameAnthropicComplete},
|
||||||
// Cohere style
|
// Cohere style
|
||||||
{provider.PathCohereV1Rerank, provider.ApiNameCohereV1Rerank},
|
{provider.PathCohereV1Rerank, provider.ApiNameCohereV1Rerank},
|
||||||
|
// Qwen style
|
||||||
|
{provider.PathQwenV1Reranks, provider.ApiNameQwenV1Rerank},
|
||||||
|
{provider.PathQwenV1Conversations, provider.ApiNameQwenV1Conversations},
|
||||||
}
|
}
|
||||||
pathPatternToApiName = []pair[*regexp.Regexp, provider.ApiName]{
|
pathPatternToApiName = []pair[*regexp.Regexp, provider.ApiName]{
|
||||||
// OpenAI style
|
// OpenAI style
|
||||||
|
|||||||
@@ -56,6 +56,9 @@ func Test_getApiName(t *testing.T) {
|
|||||||
{"gemini stream generate content", "/v1beta/models/gemini-1.0-pro:streamGenerateContent", provider.ApiNameGeminiStreamGenerateContent},
|
{"gemini stream generate content", "/v1beta/models/gemini-1.0-pro:streamGenerateContent", provider.ApiNameGeminiStreamGenerateContent},
|
||||||
// Cohere
|
// Cohere
|
||||||
{"cohere rerank", "/v1/rerank", provider.ApiNameCohereV1Rerank},
|
{"cohere rerank", "/v1/rerank", provider.ApiNameCohereV1Rerank},
|
||||||
|
// Qwen
|
||||||
|
{"qwen reranks", "/v1/reranks", provider.ApiNameQwenV1Rerank},
|
||||||
|
{"qwen conversations", "/v1/conversations", provider.ApiNameQwenV1Conversations},
|
||||||
// Unknown
|
// Unknown
|
||||||
{"unknown", "/v1/unknown", ""},
|
{"unknown", "/v1/unknown", ""},
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -71,6 +71,7 @@ const (
|
|||||||
ApiNameQwenAsyncAIGC ApiName = "qwen/v1/services/aigc"
|
ApiNameQwenAsyncAIGC ApiName = "qwen/v1/services/aigc"
|
||||||
ApiNameQwenAsyncTask ApiName = "qwen/v1/tasks"
|
ApiNameQwenAsyncTask ApiName = "qwen/v1/tasks"
|
||||||
ApiNameQwenV1Rerank ApiName = "qwen/v1/rerank"
|
ApiNameQwenV1Rerank ApiName = "qwen/v1/rerank"
|
||||||
|
ApiNameQwenV1Conversations ApiName = "qwen/v1/conversations"
|
||||||
ApiNameGeminiGenerateContent ApiName = "gemini/v1beta/generatecontent"
|
ApiNameGeminiGenerateContent ApiName = "gemini/v1beta/generatecontent"
|
||||||
ApiNameGeminiStreamGenerateContent ApiName = "gemini/v1beta/streamgeneratecontent"
|
ApiNameGeminiStreamGenerateContent ApiName = "gemini/v1beta/streamgeneratecontent"
|
||||||
ApiNameAnthropicMessages ApiName = "anthropic/v1/messages"
|
ApiNameAnthropicMessages ApiName = "anthropic/v1/messages"
|
||||||
@@ -118,6 +119,10 @@ const (
|
|||||||
// Cohere
|
// Cohere
|
||||||
PathCohereV1Rerank = "/v1/rerank"
|
PathCohereV1Rerank = "/v1/rerank"
|
||||||
|
|
||||||
|
// Qwen
|
||||||
|
PathQwenV1Reranks = "/v1/reranks"
|
||||||
|
PathQwenV1Conversations = "/v1/conversations"
|
||||||
|
|
||||||
providerTypeMoonshot = "moonshot"
|
providerTypeMoonshot = "moonshot"
|
||||||
providerTypeAzure = "azure"
|
providerTypeAzure = "azure"
|
||||||
providerTypeAi360 = "ai360"
|
providerTypeAi360 = "ai360"
|
||||||
|
|||||||
@@ -27,9 +27,11 @@ const (
|
|||||||
qwenChatCompletionPath = "/api/v1/services/aigc/text-generation/generation"
|
qwenChatCompletionPath = "/api/v1/services/aigc/text-generation/generation"
|
||||||
qwenTextEmbeddingPath = "/api/v1/services/embeddings/text-embedding/text-embedding"
|
qwenTextEmbeddingPath = "/api/v1/services/embeddings/text-embedding/text-embedding"
|
||||||
qwenTextRerankPath = "/api/v1/services/rerank/text-rerank/text-rerank"
|
qwenTextRerankPath = "/api/v1/services/rerank/text-rerank/text-rerank"
|
||||||
|
qwenCompatibleTextRerankPath = "/compatible-api/v1/reranks"
|
||||||
qwenCompatibleChatCompletionPath = "/compatible-mode/v1/chat/completions"
|
qwenCompatibleChatCompletionPath = "/compatible-mode/v1/chat/completions"
|
||||||
qwenCompatibleCompletionsPath = "/compatible-mode/v1/completions"
|
qwenCompatibleCompletionsPath = "/compatible-mode/v1/completions"
|
||||||
qwenCompatibleTextEmbeddingPath = "/compatible-mode/v1/embeddings"
|
qwenCompatibleTextEmbeddingPath = "/compatible-mode/v1/embeddings"
|
||||||
|
qwenCompatibleConversationsPath = "/compatible-mode/v1/conversations"
|
||||||
qwenCompatibleResponsesPath = "/compatible-mode/v1/responses"
|
qwenCompatibleResponsesPath = "/compatible-mode/v1/responses"
|
||||||
qwenCompatibleFilesPath = "/compatible-mode/v1/files"
|
qwenCompatibleFilesPath = "/compatible-mode/v1/files"
|
||||||
qwenCompatibleRetrieveFilePath = "/compatible-mode/v1/files/{file_id}"
|
qwenCompatibleRetrieveFilePath = "/compatible-mode/v1/files/{file_id}"
|
||||||
@@ -78,7 +80,8 @@ func (m *qwenProviderInitializer) DefaultCapabilities(qwenEnableCompatible bool)
|
|||||||
string(ApiNameRetrieveBatch): qwenCompatibleRetrieveBatchPath,
|
string(ApiNameRetrieveBatch): qwenCompatibleRetrieveBatchPath,
|
||||||
string(ApiNameQwenAsyncAIGC): qwenAsyncAIGCPath,
|
string(ApiNameQwenAsyncAIGC): qwenAsyncAIGCPath,
|
||||||
string(ApiNameQwenAsyncTask): qwenAsyncTaskPath,
|
string(ApiNameQwenAsyncTask): qwenAsyncTaskPath,
|
||||||
string(ApiNameQwenV1Rerank): qwenTextRerankPath,
|
string(ApiNameQwenV1Rerank): qwenCompatibleTextRerankPath,
|
||||||
|
string(ApiNameQwenV1Conversations): qwenCompatibleConversationsPath,
|
||||||
string(ApiNameAnthropicMessages): qwenAnthropicMessagesPath,
|
string(ApiNameAnthropicMessages): qwenAnthropicMessagesPath,
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@@ -715,8 +718,11 @@ func (m *qwenProvider) GetApiName(path string) ApiName {
|
|||||||
return ApiNameQwenAsyncAIGC
|
return ApiNameQwenAsyncAIGC
|
||||||
case strings.Contains(path, qwenAsyncTaskPath):
|
case strings.Contains(path, qwenAsyncTaskPath):
|
||||||
return ApiNameQwenAsyncTask
|
return ApiNameQwenAsyncTask
|
||||||
case strings.Contains(path, qwenTextRerankPath):
|
case strings.Contains(path, qwenTextRerankPath),
|
||||||
|
strings.Contains(path, qwenCompatibleTextRerankPath):
|
||||||
return ApiNameQwenV1Rerank
|
return ApiNameQwenV1Rerank
|
||||||
|
case strings.Contains(path, qwenCompatibleConversationsPath):
|
||||||
|
return ApiNameQwenV1Conversations
|
||||||
default:
|
default:
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -366,6 +366,29 @@ func RunQwenOnHttpRequestHeadersTests(t *testing.T) {
|
|||||||
require.Contains(t, authValue, "sk-qwen-test123456789", "Authorization should contain qwen API token")
|
require.Contains(t, authValue, "sk-qwen-test123456789", "Authorization should contain qwen API token")
|
||||||
})
|
})
|
||||||
|
|
||||||
|
// 测试qwen请求头处理(reranks接口)
|
||||||
|
t.Run("qwen reranks request headers", func(t *testing.T) {
|
||||||
|
host, status := test.NewTestHost(basicQwenConfig)
|
||||||
|
defer host.Reset()
|
||||||
|
require.Equal(t, types.OnPluginStartStatusOK, status)
|
||||||
|
|
||||||
|
action := host.CallOnHttpRequestHeaders([][2]string{
|
||||||
|
{":authority", "example.com"},
|
||||||
|
{":path", "/v1/reranks"},
|
||||||
|
{":method", "POST"},
|
||||||
|
{"Content-Type", "application/json"},
|
||||||
|
})
|
||||||
|
|
||||||
|
require.Equal(t, types.HeaderStopIteration, action)
|
||||||
|
|
||||||
|
requestHeaders := host.GetRequestHeaders()
|
||||||
|
require.NotNil(t, requestHeaders)
|
||||||
|
|
||||||
|
pathValue, hasPath := test.GetHeaderValue(requestHeaders, ":path")
|
||||||
|
require.True(t, hasPath)
|
||||||
|
require.Contains(t, pathValue, "/api/v1/services/rerank/text-rerank/text-rerank", "Path should be converted to qwen rerank path")
|
||||||
|
})
|
||||||
|
|
||||||
// 测试qwen自定义域名请求头处理
|
// 测试qwen自定义域名请求头处理
|
||||||
t.Run("qwen custom domain request headers", func(t *testing.T) {
|
t.Run("qwen custom domain request headers", func(t *testing.T) {
|
||||||
host, status := test.NewTestHost(qwenCustomDomainConfig)
|
host, status := test.NewTestHost(qwenCustomDomainConfig)
|
||||||
@@ -451,6 +474,52 @@ func RunQwenOnHttpRequestHeadersTests(t *testing.T) {
|
|||||||
require.True(t, hasPath)
|
require.True(t, hasPath)
|
||||||
require.Contains(t, pathValue, "/compatible-mode/v1/responses", "Path should use compatible mode responses path")
|
require.Contains(t, pathValue, "/compatible-mode/v1/responses", "Path should use compatible mode responses path")
|
||||||
})
|
})
|
||||||
|
|
||||||
|
// 测试qwen兼容模式请求头处理(reranks接口)
|
||||||
|
t.Run("qwen compatible mode reranks request headers", func(t *testing.T) {
|
||||||
|
host, status := test.NewTestHost(qwenEnableCompatibleConfig)
|
||||||
|
defer host.Reset()
|
||||||
|
require.Equal(t, types.OnPluginStartStatusOK, status)
|
||||||
|
|
||||||
|
action := host.CallOnHttpRequestHeaders([][2]string{
|
||||||
|
{":authority", "example.com"},
|
||||||
|
{":path", "/v1/reranks"},
|
||||||
|
{":method", "POST"},
|
||||||
|
{"Content-Type", "application/json"},
|
||||||
|
})
|
||||||
|
|
||||||
|
require.Equal(t, types.HeaderStopIteration, action)
|
||||||
|
|
||||||
|
requestHeaders := host.GetRequestHeaders()
|
||||||
|
require.NotNil(t, requestHeaders)
|
||||||
|
|
||||||
|
pathValue, hasPath := test.GetHeaderValue(requestHeaders, ":path")
|
||||||
|
require.True(t, hasPath)
|
||||||
|
require.Contains(t, pathValue, "/compatible-api/v1/reranks", "Path should use compatible API reranks path")
|
||||||
|
})
|
||||||
|
|
||||||
|
// 测试qwen兼容模式请求头处理(conversations接口)
|
||||||
|
t.Run("qwen compatible mode conversations request headers", func(t *testing.T) {
|
||||||
|
host, status := test.NewTestHost(qwenEnableCompatibleConfig)
|
||||||
|
defer host.Reset()
|
||||||
|
require.Equal(t, types.OnPluginStartStatusOK, status)
|
||||||
|
|
||||||
|
action := host.CallOnHttpRequestHeaders([][2]string{
|
||||||
|
{":authority", "example.com"},
|
||||||
|
{":path", "/v1/conversations"},
|
||||||
|
{":method", "POST"},
|
||||||
|
{"Content-Type", "application/json"},
|
||||||
|
})
|
||||||
|
|
||||||
|
require.Equal(t, types.HeaderStopIteration, action)
|
||||||
|
|
||||||
|
requestHeaders := host.GetRequestHeaders()
|
||||||
|
require.NotNil(t, requestHeaders)
|
||||||
|
|
||||||
|
pathValue, hasPath := test.GetHeaderValue(requestHeaders, ":path")
|
||||||
|
require.True(t, hasPath)
|
||||||
|
require.Contains(t, pathValue, "/compatible-mode/v1/conversations", "Path should use compatible mode conversations path")
|
||||||
|
})
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -583,6 +652,35 @@ func RunQwenOnHttpRequestBodyTests(t *testing.T) {
|
|||||||
require.True(t, hasFileLogs, "Should have file processing logs")
|
require.True(t, hasFileLogs, "Should have file processing logs")
|
||||||
})
|
})
|
||||||
|
|
||||||
|
// 测试qwen请求体处理(reranks接口)
|
||||||
|
t.Run("qwen reranks request body", func(t *testing.T) {
|
||||||
|
host, status := test.NewTestHost(qwenMultiModelConfig)
|
||||||
|
defer host.Reset()
|
||||||
|
require.Equal(t, types.OnPluginStartStatusOK, status)
|
||||||
|
|
||||||
|
host.CallOnHttpRequestHeaders([][2]string{
|
||||||
|
{":authority", "example.com"},
|
||||||
|
{":path", "/v1/reranks"},
|
||||||
|
{":method", "POST"},
|
||||||
|
{"Content-Type", "application/json"},
|
||||||
|
})
|
||||||
|
|
||||||
|
requestBody := `{
|
||||||
|
"model":"qwen3-rerank",
|
||||||
|
"documents":["doc1","doc2"],
|
||||||
|
"query":"test query",
|
||||||
|
"top_n":1
|
||||||
|
}`
|
||||||
|
action := host.CallOnHttpRequestBody([]byte(requestBody))
|
||||||
|
|
||||||
|
require.Equal(t, types.ActionContinue, action)
|
||||||
|
|
||||||
|
processedBody := host.GetRequestBody()
|
||||||
|
require.NotNil(t, processedBody)
|
||||||
|
require.Contains(t, string(processedBody), "qwen3-rerank", "Reranks request model should be preserved")
|
||||||
|
require.Contains(t, string(processedBody), "documents", "Reranks request documents should be preserved")
|
||||||
|
})
|
||||||
|
|
||||||
// 测试qwen请求体处理(qwen-vl模型,多模态)
|
// 测试qwen请求体处理(qwen-vl模型,多模态)
|
||||||
t.Run("qwen qwen-vl model multimodal request body", func(t *testing.T) {
|
t.Run("qwen qwen-vl model multimodal request body", func(t *testing.T) {
|
||||||
host, status := test.NewTestHost(qwenMultiModelConfig)
|
host, status := test.NewTestHost(qwenMultiModelConfig)
|
||||||
@@ -723,6 +821,63 @@ func RunQwenOnHttpRequestBodyTests(t *testing.T) {
|
|||||||
require.Contains(t, string(processedBody), "qwen-turbo", "Model name should be preserved in responses request")
|
require.Contains(t, string(processedBody), "qwen-turbo", "Model name should be preserved in responses request")
|
||||||
})
|
})
|
||||||
|
|
||||||
|
// 测试qwen请求体处理(兼容模式 reranks接口)
|
||||||
|
t.Run("qwen compatible mode reranks request body", func(t *testing.T) {
|
||||||
|
host, status := test.NewTestHost(qwenEnableCompatibleConfig)
|
||||||
|
defer host.Reset()
|
||||||
|
require.Equal(t, types.OnPluginStartStatusOK, status)
|
||||||
|
|
||||||
|
host.CallOnHttpRequestHeaders([][2]string{
|
||||||
|
{":authority", "example.com"},
|
||||||
|
{":path", "/v1/reranks"},
|
||||||
|
{":method", "POST"},
|
||||||
|
{"Content-Type", "application/json"},
|
||||||
|
})
|
||||||
|
|
||||||
|
requestBody := `{
|
||||||
|
"model":"qwen3-rerank",
|
||||||
|
"documents":["doc1","doc2"],
|
||||||
|
"query":"test query",
|
||||||
|
"top_n":1
|
||||||
|
}`
|
||||||
|
action := host.CallOnHttpRequestBody([]byte(requestBody))
|
||||||
|
|
||||||
|
require.Equal(t, types.ActionContinue, action)
|
||||||
|
|
||||||
|
processedBody := host.GetRequestBody()
|
||||||
|
require.NotNil(t, processedBody)
|
||||||
|
require.Contains(t, string(processedBody), "qwen-turbo", "Reranks request model should be mapped by wildcard")
|
||||||
|
require.Contains(t, string(processedBody), "documents", "Reranks request documents should be preserved")
|
||||||
|
})
|
||||||
|
|
||||||
|
// 测试qwen请求体处理(兼容模式 conversations接口)
|
||||||
|
t.Run("qwen compatible mode conversations request body", func(t *testing.T) {
|
||||||
|
host, status := test.NewTestHost(qwenEnableCompatibleConfig)
|
||||||
|
defer host.Reset()
|
||||||
|
require.Equal(t, types.OnPluginStartStatusOK, status)
|
||||||
|
|
||||||
|
host.CallOnHttpRequestHeaders([][2]string{
|
||||||
|
{":authority", "example.com"},
|
||||||
|
{":path", "/v1/conversations"},
|
||||||
|
{":method", "POST"},
|
||||||
|
{"Content-Type", "application/json"},
|
||||||
|
})
|
||||||
|
|
||||||
|
requestBody := `{
|
||||||
|
"metadata":{"topic":"demo"},
|
||||||
|
"items":[{"type":"message","role":"system","content":"test content"}]
|
||||||
|
}`
|
||||||
|
action := host.CallOnHttpRequestBody([]byte(requestBody))
|
||||||
|
|
||||||
|
require.Equal(t, types.ActionContinue, action)
|
||||||
|
|
||||||
|
processedBody := host.GetRequestBody()
|
||||||
|
require.NotNil(t, processedBody)
|
||||||
|
require.Contains(t, string(processedBody), "\"metadata\"", "Conversations metadata should be preserved")
|
||||||
|
require.Contains(t, string(processedBody), "\"items\"", "Conversations items should be preserved")
|
||||||
|
require.NotContains(t, string(processedBody), "\"model\":", "Conversations request should not inject model field")
|
||||||
|
})
|
||||||
|
|
||||||
// 测试qwen请求体处理(非兼容模式 responses接口应报不支持)
|
// 测试qwen请求体处理(非兼容模式 responses接口应报不支持)
|
||||||
t.Run("qwen non-compatible mode responses request body unsupported", func(t *testing.T) {
|
t.Run("qwen non-compatible mode responses request body unsupported", func(t *testing.T) {
|
||||||
host, status := test.NewTestHost(basicQwenConfig)
|
host, status := test.NewTestHost(basicQwenConfig)
|
||||||
@@ -755,9 +910,11 @@ func RunQwenOnHttpRequestBodyTests(t *testing.T) {
|
|||||||
// 覆盖 qwen.GetApiName 中以下分支:
|
// 覆盖 qwen.GetApiName 中以下分支:
|
||||||
// - qwenCompatibleTextEmbeddingPath => ApiNameEmbeddings
|
// - qwenCompatibleTextEmbeddingPath => ApiNameEmbeddings
|
||||||
// - qwenCompatibleResponsesPath => ApiNameResponses
|
// - qwenCompatibleResponsesPath => ApiNameResponses
|
||||||
|
// - qwenCompatibleTextRerankPath => ApiNameQwenV1Rerank
|
||||||
|
// - qwenCompatibleConversationsPath => ApiNameQwenV1Conversations
|
||||||
// - qwenAsyncAIGCPath => ApiNameQwenAsyncAIGC
|
// - qwenAsyncAIGCPath => ApiNameQwenAsyncAIGC
|
||||||
// - qwenAsyncTaskPath => ApiNameQwenAsyncTask
|
// - qwenAsyncTaskPath => ApiNameQwenAsyncTask
|
||||||
t.Run("qwen original protocol get api name coverage for compatible embeddings responses and async paths", func(t *testing.T) {
|
t.Run("qwen original protocol get api name coverage for compatible paths and async paths", func(t *testing.T) {
|
||||||
cases := []struct {
|
cases := []struct {
|
||||||
name string
|
name string
|
||||||
path string
|
path string
|
||||||
@@ -770,6 +927,14 @@ func RunQwenOnHttpRequestBodyTests(t *testing.T) {
|
|||||||
name: "compatible responses path",
|
name: "compatible responses path",
|
||||||
path: "/compatible-mode/v1/responses",
|
path: "/compatible-mode/v1/responses",
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "compatible reranks path",
|
||||||
|
path: "/compatible-api/v1/reranks",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "compatible conversations path",
|
||||||
|
path: "/compatible-mode/v1/conversations",
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "async aigc path",
|
name: "async aigc path",
|
||||||
path: "/api/v1/services/aigc/custom-async-endpoint",
|
path: "/api/v1/services/aigc/custom-async-endpoint",
|
||||||
|
|||||||
Reference in New Issue
Block a user