mirror of
https://github.com/alibaba/higress.git
synced 2026-06-26 02:35:02 +08:00
feat(ai-proxy): vLLM passthrough for Anthropic Messages and newer OpenAI endpoints (#3989)
Signed-off-by: Rand01ph <tanyawei1991@gmail.com> Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com> Co-authored-by: woody <yaodiwu618@gmail.com>
This commit is contained in:
@@ -69,6 +69,7 @@ var (
|
|||||||
{provider.PathOpenAIResponses, provider.ApiNameResponses},
|
{provider.PathOpenAIResponses, provider.ApiNameResponses},
|
||||||
{provider.PathOpenAIVideos, provider.ApiNameVideos},
|
{provider.PathOpenAIVideos, provider.ApiNameVideos},
|
||||||
// Anthropic style
|
// Anthropic style
|
||||||
|
{provider.PathAnthropicMessagesCountTokens, provider.ApiNameAnthropicCountTokens},
|
||||||
{provider.PathAnthropicMessages, provider.ApiNameAnthropicMessages},
|
{provider.PathAnthropicMessages, provider.ApiNameAnthropicMessages},
|
||||||
{provider.PathAnthropicComplete, provider.ApiNameAnthropicComplete},
|
{provider.PathAnthropicComplete, provider.ApiNameAnthropicComplete},
|
||||||
// Cohere style
|
// Cohere style
|
||||||
|
|||||||
@@ -54,6 +54,7 @@ func Test_getApiName(t *testing.T) {
|
|||||||
{"openai delete fine tuning checkpoint permission", "/v1/fine_tuning/checkpoints/checkpointid/permissions/permissionid", provider.ApiNameDeleteFineTuningCheckpointPermission},
|
{"openai delete fine tuning checkpoint permission", "/v1/fine_tuning/checkpoints/checkpointid/permissions/permissionid", provider.ApiNameDeleteFineTuningCheckpointPermission},
|
||||||
{"openai responses", "/v1/responses", provider.ApiNameResponses},
|
{"openai responses", "/v1/responses", provider.ApiNameResponses},
|
||||||
// Anthropic
|
// Anthropic
|
||||||
|
{"anthropic count_tokens", "/v1/messages/count_tokens", provider.ApiNameAnthropicCountTokens},
|
||||||
{"anthropic messages", "/v1/messages", provider.ApiNameAnthropicMessages},
|
{"anthropic messages", "/v1/messages", provider.ApiNameAnthropicMessages},
|
||||||
{"anthropic complete", "/v1/complete", provider.ApiNameAnthropicComplete},
|
{"anthropic complete", "/v1/complete", provider.ApiNameAnthropicComplete},
|
||||||
// Gemini
|
// Gemini
|
||||||
|
|||||||
@@ -77,6 +77,7 @@ const (
|
|||||||
ApiNameGeminiGenerateContent ApiName = "gemini/v1beta/generatecontent"
|
ApiNameGeminiGenerateContent ApiName = "gemini/v1beta/generatecontent"
|
||||||
ApiNameGeminiStreamGenerateContent ApiName = "gemini/v1beta/streamgeneratecontent"
|
ApiNameGeminiStreamGenerateContent ApiName = "gemini/v1beta/streamgeneratecontent"
|
||||||
ApiNameAnthropicMessages ApiName = "anthropic/v1/messages"
|
ApiNameAnthropicMessages ApiName = "anthropic/v1/messages"
|
||||||
|
ApiNameAnthropicCountTokens ApiName = "anthropic/v1/messages/count_tokens"
|
||||||
ApiNameAnthropicComplete ApiName = "anthropic/v1/complete"
|
ApiNameAnthropicComplete ApiName = "anthropic/v1/complete"
|
||||||
ApiNameVertexRaw ApiName = "vertex/raw"
|
ApiNameVertexRaw ApiName = "vertex/raw"
|
||||||
|
|
||||||
@@ -115,8 +116,9 @@ const (
|
|||||||
PathOpenAIRetrieveVideoContent = "/v1/videos/{video_id}/content"
|
PathOpenAIRetrieveVideoContent = "/v1/videos/{video_id}/content"
|
||||||
|
|
||||||
// Anthropic
|
// Anthropic
|
||||||
PathAnthropicMessages = "/v1/messages"
|
PathAnthropicMessages = "/v1/messages"
|
||||||
PathAnthropicComplete = "/v1/complete"
|
PathAnthropicMessagesCountTokens = "/v1/messages/count_tokens"
|
||||||
|
PathAnthropicComplete = "/v1/complete"
|
||||||
|
|
||||||
// Cohere
|
// Cohere
|
||||||
PathCohereV1Rerank = "/v1/rerank"
|
PathCohereV1Rerank = "/v1/rerank"
|
||||||
@@ -1478,7 +1480,8 @@ func (c *ProviderConfig) needToProcessRequestBody(apiName ApiName) bool {
|
|||||||
ApiNameResponses,
|
ApiNameResponses,
|
||||||
ApiNameGeminiGenerateContent,
|
ApiNameGeminiGenerateContent,
|
||||||
ApiNameGeminiStreamGenerateContent,
|
ApiNameGeminiStreamGenerateContent,
|
||||||
ApiNameAnthropicMessages:
|
ApiNameAnthropicMessages,
|
||||||
|
ApiNameAnthropicCountTokens:
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
|
|||||||
@@ -14,10 +14,20 @@ const (
|
|||||||
defaultVllmDomain = "vllm-service.cluster.local"
|
defaultVllmDomain = "vllm-service.cluster.local"
|
||||||
)
|
)
|
||||||
|
|
||||||
// isVllmDirectPath checks if the path is a known standard vLLM interface path.
|
// isVllmDirectPath checks if the path is a known standard vLLM interface path,
|
||||||
|
// i.e. the configured vllmCustomUrl already points at a concrete endpoint rather
|
||||||
|
// than a base path. Such paths are forwarded as-is; base paths get the per-API
|
||||||
|
// suffix appended. Must cover every endpoint in DefaultCapabilities that a user
|
||||||
|
// might configure directly, otherwise the path is mistakenly treated as a base
|
||||||
|
// and double-appended (e.g. /v1/responses -> /v1/responses/responses).
|
||||||
func isVllmDirectPath(path string) bool {
|
func isVllmDirectPath(path string) bool {
|
||||||
return strings.HasSuffix(path, "/completions") ||
|
return strings.HasSuffix(path, "/completions") ||
|
||||||
strings.HasSuffix(path, "/rerank")
|
strings.HasSuffix(path, "/rerank") ||
|
||||||
|
strings.HasSuffix(path, "/responses") ||
|
||||||
|
strings.HasSuffix(path, "/messages") ||
|
||||||
|
strings.HasSuffix(path, "/count_tokens") ||
|
||||||
|
strings.HasSuffix(path, "/transcriptions") ||
|
||||||
|
strings.HasSuffix(path, "/translations")
|
||||||
}
|
}
|
||||||
|
|
||||||
type vllmProviderInitializer struct{}
|
type vllmProviderInitializer struct{}
|
||||||
@@ -36,6 +46,13 @@ func (m *vllmProviderInitializer) DefaultCapabilities() map[string]string {
|
|||||||
string(ApiNameModels): PathOpenAIModels,
|
string(ApiNameModels): PathOpenAIModels,
|
||||||
string(ApiNameEmbeddings): PathOpenAIEmbeddings,
|
string(ApiNameEmbeddings): PathOpenAIEmbeddings,
|
||||||
string(ApiNameCohereV1Rerank): PathCohereV1Rerank,
|
string(ApiNameCohereV1Rerank): PathCohereV1Rerank,
|
||||||
|
// vLLM also natively serves the Anthropic Messages API and newer OpenAI
|
||||||
|
// endpoints; expose them as passthrough (no protocol translation).
|
||||||
|
string(ApiNameAnthropicMessages): PathAnthropicMessages,
|
||||||
|
string(ApiNameAnthropicCountTokens): PathAnthropicMessagesCountTokens,
|
||||||
|
string(ApiNameResponses): PathOpenAIResponses,
|
||||||
|
string(ApiNameAudioTranscription): PathOpenAIAudioTranscriptions,
|
||||||
|
string(ApiNameAudioTranslation): PathOpenAIAudioTranslations,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -154,6 +171,22 @@ func (m *vllmProvider) GetApiName(path string) ApiName {
|
|||||||
if strings.Contains(path, PathCohereV1Rerank) {
|
if strings.Contains(path, PathCohereV1Rerank) {
|
||||||
return ApiNameCohereV1Rerank
|
return ApiNameCohereV1Rerank
|
||||||
}
|
}
|
||||||
|
// count_tokens must be matched before /v1/messages: the former contains the latter.
|
||||||
|
if strings.Contains(path, PathAnthropicMessagesCountTokens) {
|
||||||
|
return ApiNameAnthropicCountTokens
|
||||||
|
}
|
||||||
|
if strings.Contains(path, PathAnthropicMessages) {
|
||||||
|
return ApiNameAnthropicMessages
|
||||||
|
}
|
||||||
|
if strings.Contains(path, PathOpenAIResponses) {
|
||||||
|
return ApiNameResponses
|
||||||
|
}
|
||||||
|
if strings.Contains(path, PathOpenAIAudioTranscriptions) {
|
||||||
|
return ApiNameAudioTranscription
|
||||||
|
}
|
||||||
|
if strings.Contains(path, PathOpenAIAudioTranslations) {
|
||||||
|
return ApiNameAudioTranslation
|
||||||
|
}
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
193
plugins/wasm-go/extensions/ai-proxy/provider/vllm_test.go
Normal file
193
plugins/wasm-go/extensions/ai-proxy/provider/vllm_test.go
Normal file
@@ -0,0 +1,193 @@
|
|||||||
|
package provider
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestVllmProviderInitializer_DefaultCapabilities(t *testing.T) {
|
||||||
|
initializer := &vllmProviderInitializer{}
|
||||||
|
|
||||||
|
capabilities := initializer.DefaultCapabilities()
|
||||||
|
expected := map[string]string{
|
||||||
|
string(ApiNameChatCompletion): PathOpenAIChatCompletions,
|
||||||
|
string(ApiNameCompletion): PathOpenAICompletions,
|
||||||
|
string(ApiNameModels): PathOpenAIModels,
|
||||||
|
string(ApiNameEmbeddings): PathOpenAIEmbeddings,
|
||||||
|
string(ApiNameCohereV1Rerank): PathCohereV1Rerank,
|
||||||
|
string(ApiNameAnthropicMessages): PathAnthropicMessages,
|
||||||
|
string(ApiNameAnthropicCountTokens): PathAnthropicMessagesCountTokens,
|
||||||
|
string(ApiNameResponses): PathOpenAIResponses,
|
||||||
|
string(ApiNameAudioTranscription): PathOpenAIAudioTranscriptions,
|
||||||
|
string(ApiNameAudioTranslation): PathOpenAIAudioTranslations,
|
||||||
|
}
|
||||||
|
|
||||||
|
assert.Equal(t, expected, capabilities)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestVllmProvider_GetApiName(t *testing.T) {
|
||||||
|
provider := &vllmProvider{}
|
||||||
|
|
||||||
|
cases := []struct {
|
||||||
|
path string
|
||||||
|
expected ApiName
|
||||||
|
}{
|
||||||
|
// existing (regression guard)
|
||||||
|
{PathOpenAIChatCompletions, ApiNameChatCompletion},
|
||||||
|
{PathOpenAICompletions, ApiNameCompletion},
|
||||||
|
{PathOpenAIModels, ApiNameModels},
|
||||||
|
{PathOpenAIEmbeddings, ApiNameEmbeddings},
|
||||||
|
{PathCohereV1Rerank, ApiNameCohereV1Rerank},
|
||||||
|
// new passthrough endpoints
|
||||||
|
// count_tokens must be checked before /v1/messages (substring) — guards the ordering
|
||||||
|
{PathAnthropicMessagesCountTokens, ApiNameAnthropicCountTokens},
|
||||||
|
{PathAnthropicMessages, ApiNameAnthropicMessages},
|
||||||
|
{PathOpenAIResponses, ApiNameResponses},
|
||||||
|
{PathOpenAIAudioTranscriptions, ApiNameAudioTranscription},
|
||||||
|
{PathOpenAIAudioTranslations, ApiNameAudioTranslation},
|
||||||
|
// unknown path
|
||||||
|
{"/v1/unknown", ApiName("")},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, c := range cases {
|
||||||
|
t.Run(c.path, func(t *testing.T) {
|
||||||
|
assert.Equal(t, c.expected, provider.GetApiName(c.path))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestVllm_isVllmDirectPath(t *testing.T) {
|
||||||
|
cases := []struct {
|
||||||
|
path string
|
||||||
|
want bool
|
||||||
|
}{
|
||||||
|
// existing direct endpoints
|
||||||
|
{"/v1/chat/completions", true},
|
||||||
|
{"/v1/completions", true},
|
||||||
|
{"/v1/rerank", true},
|
||||||
|
// newly added passthrough endpoints
|
||||||
|
{"/v1/responses", true},
|
||||||
|
{"/v1/messages", true},
|
||||||
|
{"/v1/messages/count_tokens", true},
|
||||||
|
{"/v1/audio/transcriptions", true},
|
||||||
|
{"/v1/audio/translations", true},
|
||||||
|
// base paths must NOT be treated as direct endpoints
|
||||||
|
{"/v1", false},
|
||||||
|
{"/", false},
|
||||||
|
{"/custom", false},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, c := range cases {
|
||||||
|
t.Run(c.path, func(t *testing.T) {
|
||||||
|
assert.Equal(t, c.want, isVllmDirectPath(c.path))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestVllmProviderInitializer_CreateProvider_customUrl verifies vllmCustomUrl
|
||||||
|
// handling: a base path gets the per-API suffix appended, while a direct endpoint
|
||||||
|
// URL is forwarded as-is (no double-append such as /v1/responses/responses).
|
||||||
|
func TestVllmProviderInitializer_CreateProvider_customUrl(t *testing.T) {
|
||||||
|
initializer := &vllmProviderInitializer{}
|
||||||
|
|
||||||
|
cases := []struct {
|
||||||
|
name string
|
||||||
|
customUrl string
|
||||||
|
wantDirect bool
|
||||||
|
wantPath string // expected customPath when direct
|
||||||
|
wantDomain string
|
||||||
|
capability ApiName // sample capability to check for base paths
|
||||||
|
wantCap string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "base path v1",
|
||||||
|
customUrl: "http://host:8000/v1",
|
||||||
|
wantDirect: false,
|
||||||
|
wantDomain: "host:8000",
|
||||||
|
capability: ApiNameResponses,
|
||||||
|
wantCap: "/v1/responses",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "custom base path",
|
||||||
|
customUrl: "http://host:8000/custom",
|
||||||
|
wantDirect: false,
|
||||||
|
wantDomain: "host:8000",
|
||||||
|
capability: ApiNameAnthropicMessages,
|
||||||
|
wantCap: "/custom/messages",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "direct responses endpoint",
|
||||||
|
customUrl: "http://host:8000/v1/responses",
|
||||||
|
wantDirect: true,
|
||||||
|
wantPath: "/v1/responses",
|
||||||
|
wantDomain: "host:8000",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "direct anthropic messages endpoint",
|
||||||
|
customUrl: "http://host:8000/v1/messages",
|
||||||
|
wantDirect: true,
|
||||||
|
wantPath: "/v1/messages",
|
||||||
|
wantDomain: "host:8000",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "direct count_tokens endpoint",
|
||||||
|
customUrl: "http://host:8000/v1/messages/count_tokens",
|
||||||
|
wantDirect: true,
|
||||||
|
wantPath: "/v1/messages/count_tokens",
|
||||||
|
wantDomain: "host:8000",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "direct audio transcription endpoint",
|
||||||
|
customUrl: "http://host:8000/v1/audio/transcriptions",
|
||||||
|
wantDirect: true,
|
||||||
|
wantPath: "/v1/audio/transcriptions",
|
||||||
|
wantDomain: "host:8000",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, c := range cases {
|
||||||
|
t.Run(c.name, func(t *testing.T) {
|
||||||
|
p, err := initializer.CreateProvider(ProviderConfig{vllmCustomUrl: c.customUrl})
|
||||||
|
assert.NoError(t, err)
|
||||||
|
vp, ok := p.(*vllmProvider)
|
||||||
|
assert.True(t, ok)
|
||||||
|
assert.Equal(t, c.wantDirect, vp.isDirectCustomPath)
|
||||||
|
assert.Equal(t, c.wantDomain, vp.customDomain)
|
||||||
|
if c.wantDirect {
|
||||||
|
assert.Equal(t, c.wantPath, vp.customPath)
|
||||||
|
}
|
||||||
|
if c.capability != "" {
|
||||||
|
assert.Equal(t, c.wantCap, vp.config.capabilities[string(c.capability)])
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestVllm_passthroughBodyAndSupport guards the body-handling and capability
|
||||||
|
// behaviour of the passthrough endpoints.
|
||||||
|
func TestVllm_passthroughBodyAndSupport(t *testing.T) {
|
||||||
|
cfg := &ProviderConfig{}
|
||||||
|
// Audio endpoints carry multipart/form-data bodies and must be passed through
|
||||||
|
// untouched (no JSON processing).
|
||||||
|
assert.False(t, cfg.needToProcessRequestBody(ApiNameAudioTranscription))
|
||||||
|
assert.False(t, cfg.needToProcessRequestBody(ApiNameAudioTranslation))
|
||||||
|
// Anthropic messages / count_tokens / responses carry JSON bodies (model
|
||||||
|
// mapping etc.), so they are processed.
|
||||||
|
assert.True(t, cfg.needToProcessRequestBody(ApiNameAnthropicMessages))
|
||||||
|
assert.True(t, cfg.needToProcessRequestBody(ApiNameAnthropicCountTokens))
|
||||||
|
assert.True(t, cfg.needToProcessRequestBody(ApiNameResponses))
|
||||||
|
|
||||||
|
// A vLLM provider declares the count_tokens capability and supports it.
|
||||||
|
vllmCfg := &ProviderConfig{}
|
||||||
|
vllmCfg.setDefaultCapabilities((&vllmProviderInitializer{}).DefaultCapabilities())
|
||||||
|
assert.True(t, vllmCfg.isSupportedAPI(ApiNameAnthropicCountTokens))
|
||||||
|
|
||||||
|
// A provider that does not declare it (the path is now globally recognized)
|
||||||
|
// rejects the request via isSupportedAPI instead of mishandling it.
|
||||||
|
otherCfg := &ProviderConfig{}
|
||||||
|
otherCfg.setDefaultCapabilities(map[string]string{
|
||||||
|
string(ApiNameChatCompletion): PathOpenAIChatCompletions,
|
||||||
|
})
|
||||||
|
assert.False(t, otherCfg.isSupportedAPI(ApiNameAnthropicCountTokens))
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user