diff --git a/plugins/wasm-go/extensions/ai-proxy/README.md b/plugins/wasm-go/extensions/ai-proxy/README.md index f7c9b3fbb..80fd0a082 100644 --- a/plugins/wasm-go/extensions/ai-proxy/README.md +++ b/plugins/wasm-go/extensions/ai-proxy/README.md @@ -19,7 +19,6 @@ description: AI 代理插件配置参考 `provider`的配置字段说明如下: - | 名称 | 数据类型 | 填写要求 | 默认值 | 描述 | | -------------- | --------------- | -------- | ------ | ------------------------------------------------------------ | | `type` | string | 必填 | - | AI 服务提供商名称 | @@ -106,9 +105,9 @@ MiniMax所对应的 `type` 为 `minimax`。它特有的配置字段如下: Anthropic Claude 所对应的 `type` 为 `claude`。它特有的配置字段如下: -| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 | -|-----------|--------|-----|-----|-------------------| -| `version` | string | 必填 | - | Claude 服务的 API 版本 | +| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 | +|-----------|--------|------|-----|----------------------------------| +| `claudeVersion` | string | 可选 | - | Claude 服务的 API 版本,默认为 2023-06-01 | #### Ollama @@ -533,6 +532,7 @@ provider: type: claude apiTokens: - "YOUR_CLAUDE_API_TOKEN" + version: "2023-06-01" ``` **请求示例** @@ -554,22 +554,25 @@ provider: ```json { - "id": "msg_01K8iLH18FGN7Xd9deurwtoD", - "type": "message", - "role": "assistant", - "model": "claude-3-opus-20240229", - "stop_sequence": null, - "usage": { - "input_tokens": 16, - "output_tokens": 141 - }, - "content": [ + "id": "msg_01Jt3GzyjuzymnxmZERJguLK", + "choices": [ { - "type": "text", - "text": "你好!我是Claude,一个由Anthropic公司开发的人工智能助手。我的任务是尽我所能帮助人类,比如回答问题,提供建议和意见,协助完成任务等。我掌握了很多知识,也具备一定的分析和推理能力,但我不是人类,也没有实体的身体。很高兴认识你!如果有什么需要帮助的地方,欢迎随时告诉我。" + "index": 0, + "message": { + "role": "assistant", + "content": "您好,我是一个由人工智能公司Anthropic开发的聊天助手。我的名字叫Claude,是一个聪明友善、知识渊博的对话系统。很高兴认识您!我可以就各种话题与您聊天,回答问题,提供建议和帮助。我会尽最大努力给您有帮助的回复。希望我们能有个愉快的交流!" + }, + "finish_reason": "stop" } ], - "stop_reason": "end_turn" + "created": 1717385918, + "model": "claude-3-opus-20240229", + "object": "chat.completion", + "usage": { + "prompt_tokens": 16, + "completion_tokens": 126, + "total_tokens": 142 + } } ``` ### 使用 OpenAI 协议代理混元服务 @@ -757,6 +760,8 @@ provider: ## 完整配置示例 +### Kubernetes 示例 + 以下以使用 OpenAI 协议代理 Groq 服务为例,展示完整的插件配置示例。 ```yaml @@ -828,3 +833,130 @@ curl "http:///v1/chat/completions" -H "Content-Type: application/js ] }' ``` + +### Docker-Compose 示例 + +`docker-compose.yml` 配置文件: + +```yaml +version: '3.7' +services: + envoy: + image: higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/envoy:1.20 + entrypoint: /usr/local/bin/envoy + # 开启了 debug 级别日志方便调试 + command: -c /etc/envoy/envoy.yaml --component-log-level wasm:debug + networks: + - higress-net + ports: + - "10000:10000" + volumes: + - ./envoy.yaml:/etc/envoy/envoy.yaml + - ./plugin.wasm:/etc/envoy/plugin.wasm +networks: + higress-net: {} +``` + +`envoy.yaml` 配置文件: + +```yaml +admin: + address: + socket_address: + protocol: TCP + address: 0.0.0.0 + port_value: 9901 +static_resources: + listeners: + - name: listener_0 + address: + socket_address: + protocol: TCP + address: 0.0.0.0 + port_value: 10000 + filter_chains: + - filters: + - name: envoy.filters.network.http_connection_manager + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager + scheme_header_transformation: + scheme_to_overwrite: https + stat_prefix: ingress_http + # Output envoy logs to stdout + access_log: + - name: envoy.access_loggers.stdout + typed_config: + "@type": type.googleapis.com/envoy.extensions.access_loggers.stream.v3.StdoutAccessLog + # Modify as required + route_config: + name: local_route + virtual_hosts: + - name: local_service + domains: [ "*" ] + routes: + - match: + prefix: "/" + route: + cluster: claude + timeout: 300s + http_filters: + - name: claude + typed_config: + "@type": type.googleapis.com/udpa.type.v1.TypedStruct + type_url: type.googleapis.com/envoy.extensions.filters.http.wasm.v3.Wasm + value: + config: + name: claude + vm_config: + runtime: envoy.wasm.runtime.v8 + code: + local: + filename: /etc/envoy/plugin.wasm + configuration: + "@type": "type.googleapis.com/google.protobuf.StringValue" + value: | # 插件配置 + { + "provider": { + "type": "claude", + "apiTokens": [ + "YOUR_API_TOKEN" + ] + } + } + - name: envoy.filters.http.router + clusters: + - name: claude + connect_timeout: 30s + type: LOGICAL_DNS + dns_lookup_family: V4_ONLY + lb_policy: ROUND_ROBIN + load_assignment: + cluster_name: claude + endpoints: + - lb_endpoints: + - endpoint: + address: + socket_address: + address: api.anthropic.com # API 服务地址 + port_value: 443 + transport_socket: + name: envoy.transport_sockets.tls + typed_config: + "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext + "sni": "api.anthropic.com" +``` + +访问示例: + +```bash +curl "http://localhost:10000/v1/chat/completions" -H "Content-Type: application/json" -d '{ + "model": "claude-3-opus-20240229", + "max_tokens": 1024, + "messages": [ + { + "role": "user", + "content": "你好,你是谁?" + } + ] +}' +``` diff --git a/plugins/wasm-go/extensions/ai-proxy/provider/claude.go b/plugins/wasm-go/extensions/ai-proxy/provider/claude.go new file mode 100644 index 000000000..d67eb52c1 --- /dev/null +++ b/plugins/wasm-go/extensions/ai-proxy/provider/claude.go @@ -0,0 +1,367 @@ +package provider + +import ( + "encoding/json" + "errors" + "fmt" + "github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy/util" + "github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper" + "github.com/higress-group/proxy-wasm-go-sdk/proxywasm" + "github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types" + "strings" + "time" +) + +// claudeProvider is the provider for Claude service. +const ( + claudeDomain = "api.anthropic.com" + claudeChatCompletionPath = "/v1/messages" + defaultVersion = "2023-06-01" + defaultMaxTokens = 4096 +) + +type claudeProviderInitializer struct{} + +type claudeTextGenRequest struct { + Model string `json:"model"` + Messages []chatMessage `json:"messages"` + System string `json:"system,omitempty"` + MaxTokens int `json:"max_tokens,omitempty"` + StopSequences []string `json:"stop_sequences,omitempty"` + Stream bool `json:"stream,omitempty"` + Temperature float64 `json:"temperature,omitempty"` + TopP float64 `json:"top_p,omitempty"` + TopK int `json:"top_k,omitempty"` +} + +type claudeTextGenResponse struct { + Id string `json:"id"` + Type string `json:"type"` + Role string `json:"role"` + Content []claudeTextGenContent `json:"content"` + Model string `json:"model"` + StopReason *string `json:"stop_reason"` + StopSequence *string `json:"stop_sequence"` + Usage claudeTextGenUsage `json:"usage"` + Error *claudeTextGenError `json:"error"` +} + +type claudeTextGenContent struct { + Type string `json:"type"` + Text string `json:"text,omitempty"` +} + +type claudeTextGenUsage struct { + InputTokens int `json:"input_tokens"` + OutputTokens int `json:"output_tokens"` +} + +type claudeTextGenError struct { + Type string `json:"type"` + Message string `json:"message"` +} + +type claudeTextGenStreamResponse struct { + Type string `json:"type"` + Message claudeTextGenResponse `json:"message"` + Index int `json:"index"` + ContentBlock *claudeTextGenContent `json:"content_block"` + Delta *claudeTextGenDelta `json:"delta"` + Usage claudeTextGenUsage `json:"usage"` +} + +type claudeTextGenDelta struct { + Type string `json:"type"` + Text string `json:"text"` + StopReason *string `json:"stop_reason"` + StopSequence *string `json:"stop_sequence"` +} + +func (c *claudeProviderInitializer) ValidateConfig(config ProviderConfig) error { + return nil +} + +func (c *claudeProviderInitializer) CreateProvider(config ProviderConfig) (Provider, error) { + return &claudeProvider{ + config: config, + contextCache: createContextCache(&config), + }, nil +} + +type claudeProvider struct { + config ProviderConfig + contextCache *contextCache +} + +func (c *claudeProvider) GetProviderType() string { + return providerTypeClaude +} + +func (c *claudeProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) (types.Action, error) { + if apiName != ApiNameChatCompletion { + return types.ActionContinue, errUnsupportedApiName + } + + _ = util.OverwriteRequestPath(claudeChatCompletionPath) + _ = util.OverwriteRequestHost(claudeDomain) + _ = proxywasm.ReplaceHttpRequestHeader("x-api-key", c.config.GetRandomToken()) + + if c.config.claudeVersion == "" { + c.config.claudeVersion = defaultVersion + } + _ = proxywasm.AddHttpRequestHeader("anthropic-version", c.config.claudeVersion) + _ = proxywasm.RemoveHttpRequestHeader("Accept-Encoding") + _ = proxywasm.RemoveHttpRequestHeader("Content-Length") + + return types.ActionContinue, nil +} + +func (c *claudeProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) { + if apiName != ApiNameChatCompletion { + return types.ActionContinue, errUnsupportedApiName + } + + // use original protocol + if c.config.protocol == protocolOriginal { + if c.config.context == nil { + return types.ActionContinue, nil + } + + request := &claudeTextGenRequest{} + if err := json.Unmarshal(body, request); err != nil { + return types.ActionContinue, fmt.Errorf("unable to unmarshal request: %v", err) + } + + err := c.contextCache.GetContent(func(content string, err error) { + defer func() { + _ = proxywasm.ResumeHttpRequest() + }() + + if err != nil { + log.Errorf("failed to load context file: %v", err) + _ = util.SendResponse(500, util.MimeTypeTextPlain, fmt.Sprintf("failed to load context file: %v", err)) + } + if err := replaceJsonRequestBody(request, log); err != nil { + _ = util.SendResponse(500, util.MimeTypeTextPlain, fmt.Sprintf("failed to replace request body: %v", err)) + } + }, log) + if err == nil { + return types.ActionPause, nil + } + return types.ActionContinue, err + } + + // use openai protocol + request := &chatCompletionRequest{} + if err := decodeChatCompletionRequest(body, request); err != nil { + return types.ActionContinue, err + } + + model := request.Model + if model == "" { + return types.ActionContinue, errors.New("missing model in chat completion request") + } + ctx.SetContext(ctxKeyOriginalRequestModel, model) + mappedModel := getMappedModel(model, c.config.modelMapping, log) + if mappedModel == "" { + return types.ActionContinue, errors.New("model becomes empty after applying the configured mapping") + } + request.Model = mappedModel + ctx.SetContext(ctxKeyFinalRequestModel, request.Model) + + streaming := request.Stream + if streaming { + _ = proxywasm.ReplaceHttpRequestHeader("Accept", "text/event-stream") + } + + if c.config.context == nil { + claudeRequest := c.buildClaudeTextGenRequest(request) + return types.ActionContinue, replaceJsonRequestBody(claudeRequest, log) + } + + err := c.contextCache.GetContent(func(content string, err error) { + defer func() { + _ = proxywasm.ResumeHttpRequest() + }() + if err != nil { + log.Errorf("failed to load context file: %v", err) + _ = util.SendResponse(500, util.MimeTypeTextPlain, fmt.Sprintf("failed to load context file: %v", err)) + } + insertContextMessage(request, content) + claudeRequest := c.buildClaudeTextGenRequest(request) + if err := replaceJsonRequestBody(claudeRequest, log); err != nil { + _ = util.SendResponse(500, util.MimeTypeTextPlain, fmt.Sprintf("failed to replace request body: %v", err)) + } + }, log) + if err == nil { + return types.ActionPause, nil + } + return types.ActionContinue, err +} + +func (c *claudeProvider) OnResponseBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) { + claudeResponse := &claudeTextGenResponse{} + if err := json.Unmarshal(body, claudeResponse); err != nil { + return types.ActionContinue, fmt.Errorf("unable to unmarshal claude response: %v", err) + } + if claudeResponse.Error != nil { + return types.ActionContinue, fmt.Errorf("claude response error, error_type: %s, error_message: %s", claudeResponse.Error.Type, claudeResponse.Error.Message) + } + response := c.responseClaude2OpenAI(ctx, claudeResponse) + return types.ActionContinue, replaceJsonResponseBody(response, log) +} + +func (c *claudeProvider) OnResponseHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) (types.Action, error) { + // use original protocol, skip OnStreamingResponseBody() and OnResponseBody() + if c.config.protocol == protocolOriginal { + ctx.DontReadResponseBody() + return types.ActionContinue, nil + } + + _ = proxywasm.RemoveHttpResponseHeader("Content-Length") + return types.ActionContinue, nil +} + +func (c *claudeProvider) OnStreamingResponseBody(ctx wrapper.HttpContext, name ApiName, chunk []byte, isLastChunk bool, log wrapper.Log) ([]byte, error) { + if isLastChunk || len(chunk) == 0 { + return nil, nil + } + + responseBuilder := &strings.Builder{} + lines := strings.Split(string(chunk), "\n") + for _, data := range lines { + // only process the line starting with "data:" + if strings.HasPrefix(data, "data:") { + // extract json data from the line + jsonData := strings.TrimPrefix(data, "data:") + var claudeResponse claudeTextGenStreamResponse + if err := json.Unmarshal([]byte(jsonData), &claudeResponse); err != nil { + log.Errorf("unable to unmarshal claude response: %v", err) + continue + } + response := c.streamResponseClaude2OpenAI(ctx, &claudeResponse, log) + if response != nil { + responseBody, err := json.Marshal(response) + if err != nil { + log.Errorf("unable to marshal response: %v", err) + return nil, err + } + c.appendResponse(responseBuilder, string(responseBody)) + } + } + } + modifiedResponseChunk := responseBuilder.String() + log.Debugf("modified response chunk: %s", modifiedResponseChunk) + return []byte(modifiedResponseChunk), nil +} + +func (c *claudeProvider) buildClaudeTextGenRequest(origRequest *chatCompletionRequest) *claudeTextGenRequest { + claudeRequest := claudeTextGenRequest{ + Model: origRequest.Model, + MaxTokens: origRequest.MaxTokens, + StopSequences: origRequest.Stop, + Stream: origRequest.Stream, + Temperature: origRequest.Temperature, + TopP: origRequest.TopP, + } + if claudeRequest.MaxTokens == 0 { + claudeRequest.MaxTokens = defaultMaxTokens + } + + for _, message := range origRequest.Messages { + if message.Role == roleSystem { + claudeRequest.System = message.Content + continue + } + claudeMessage := chatMessage{ + Role: message.Role, + Content: message.Content, + } + claudeRequest.Messages = append(claudeRequest.Messages, claudeMessage) + } + return &claudeRequest +} + +func (c *claudeProvider) responseClaude2OpenAI(ctx wrapper.HttpContext, origResponse *claudeTextGenResponse) *chatCompletionResponse { + choice := chatCompletionChoice{ + Index: 0, + Message: &chatMessage{Role: roleAssistant, Content: origResponse.Content[0].Text}, + FinishReason: stopReasonClaude2OpenAI(origResponse.StopReason), + } + + return &chatCompletionResponse{ + Id: origResponse.Id, + Created: time.Now().UnixMilli() / 1000, + Model: ctx.GetContext(ctxKeyFinalRequestModel).(string), + SystemFingerprint: "", + Object: objectChatCompletion, + Choices: []chatCompletionChoice{choice}, + Usage: chatCompletionUsage{ + PromptTokens: origResponse.Usage.InputTokens, + CompletionTokens: origResponse.Usage.OutputTokens, + TotalTokens: origResponse.Usage.InputTokens + origResponse.Usage.OutputTokens, + }, + } +} + +func stopReasonClaude2OpenAI(reason *string) string { + if reason == nil { + return "" + } + switch *reason { + case "end_turn": + return finishReasonStop + case "stop_sequence": + return finishReasonStop + case "max_tokens": + return finishReasonLength + default: + return *reason + } +} + +func (c *claudeProvider) streamResponseClaude2OpenAI(ctx wrapper.HttpContext, origResponse *claudeTextGenStreamResponse, log wrapper.Log) *chatCompletionResponse { + switch origResponse.Type { + case "message_start": + choice := chatCompletionChoice{ + Index: 0, + Delta: &chatMessage{Role: roleAssistant, Content: ""}, + } + return createChatCompletionResponse(ctx, origResponse, choice) + + case "content_block_delta": + choice := chatCompletionChoice{ + Index: 0, + Delta: &chatMessage{Content: origResponse.Delta.Text}, + } + return createChatCompletionResponse(ctx, origResponse, choice) + + case "message_delta": + choice := chatCompletionChoice{ + Index: 0, + Delta: &chatMessage{}, + FinishReason: stopReasonClaude2OpenAI(origResponse.Delta.StopReason), + } + return createChatCompletionResponse(ctx, origResponse, choice) + case "content_block_stop", "message_stop": + log.Debugf("skip processing response type: %s", origResponse.Type) + return nil + default: + log.Errorf("Unexpected response type: %s", origResponse.Type) + return nil + } +} + +func createChatCompletionResponse(ctx wrapper.HttpContext, response *claudeTextGenStreamResponse, choice chatCompletionChoice) *chatCompletionResponse { + return &chatCompletionResponse{ + Id: response.Message.Id, + Created: time.Now().UnixMilli() / 1000, + Model: ctx.GetContext(ctxKeyFinalRequestModel).(string), + Object: objectChatCompletionChunk, + Choices: []chatCompletionChoice{choice}, + } +} + +func (c *claudeProvider) appendResponse(responseBuilder *strings.Builder, responseBody string) { + responseBuilder.WriteString(fmt.Sprintf("%s %s\n\n", streamDataItemKey, responseBody)) +} diff --git a/plugins/wasm-go/extensions/ai-proxy/provider/model.go b/plugins/wasm-go/extensions/ai-proxy/provider/model.go index 7e709abd0..bc658a852 100644 --- a/plugins/wasm-go/extensions/ai-proxy/provider/model.go +++ b/plugins/wasm-go/extensions/ai-proxy/provider/model.go @@ -30,6 +30,7 @@ type chatCompletionRequest struct { Tools []tool `json:"tools,omitempty"` ToolChoice *toolChoice `json:"tool_choice,omitempty"` User string `json:"user,omitempty"` + Stop []string `json:"stop,omitempty"` } type streamOptions struct { diff --git a/plugins/wasm-go/extensions/ai-proxy/provider/provider.go b/plugins/wasm-go/extensions/ai-proxy/provider/provider.go index 7104340f8..b56c52ca7 100644 --- a/plugins/wasm-go/extensions/ai-proxy/provider/provider.go +++ b/plugins/wasm-go/extensions/ai-proxy/provider/provider.go @@ -25,6 +25,7 @@ const ( providerTypeDeepSeek = "deepseek" providerTypeZhipuAi = "zhipuai" providerTypeOllama = "ollama" + providerTypeClaude = "claude" providerTypeBaidu = "baidu" providerTypeHunyuan = "hunyuan" providerTypeStepfun = "stepfun" @@ -34,10 +35,11 @@ const ( protocolOriginal = "original" roleSystem = "system" - roleUser = "user" roleAssistant = "assistant" + roleUser = "user" - finishReasonStop = "stop" + finishReasonStop = "stop" + finishReasonLength = "length" ctxKeyIncrementalStreaming = "incrementalStreaming" ctxKeyApiName = "apiKey" @@ -73,6 +75,7 @@ var ( providerTypeDeepSeek: &deepseekProviderInitializer{}, providerTypeZhipuAi: &zhipuAiProviderInitializer{}, providerTypeOllama: &ollamaProviderInitializer{}, + providerTypeClaude: &claudeProviderInitializer{}, providerTypeBaidu: &baiduProviderInitializer{}, providerTypeHunyuan: &hunyuanProviderInitializer{}, providerTypeStepfun: &stepfunProviderInitializer{}, @@ -150,6 +153,9 @@ type ProviderConfig struct { // @Title zh-CN 模型对话上下文 // @Description zh-CN 配置一个外部获取对话上下文的文件来源,用于在AI请求中补充对话上下文 context *ContextConfig `required:"false" yaml:"context" json:"context"` + // @Title zh-CN 版本 + // @Description zh-CN 请求AI服务的版本,目前仅适用于Claude AI服务 + claudeVersion string `required:"false" yaml:"version" json:"version"` } func (c *ProviderConfig) FromJson(json gjson.Result) { @@ -184,7 +190,7 @@ func (c *ProviderConfig) FromJson(json gjson.Result) { c.context = &ContextConfig{} c.context.FromJson(contextJson) } - + c.claudeVersion = json.Get("claudeVersion").String() c.hunyuanAuthId = json.Get("hunyuanAuthId").String() c.hunyuanAuthKey = json.Get("hunyuanAuthKey").String() c.minimaxGroupId = json.Get("minimaxGroupId").String()