feat: support claude ai model (#969)

Signed-off-by: chengzw <chengzw258@163.com>
2026-06-10 05:07:30 +08:00 · 2024-06-19 13:53:21 +08:00
parent ab1bc0a73a
commit 51dca7055a
4 changed files with 526 additions and 20 deletions
--- a/plugins/wasm-go/extensions/ai-proxy/README.md
+++ b/plugins/wasm-go/extensions/ai-proxy/README.md
@@ -19,7 +19,6 @@ description: AI 代理插件配置参考
 `provider`的配置字段说明如下：
 | 名称           | 数据类型        | 填写要求 | 默认值 | 描述                                                         |
 | -------------- | --------------- | -------- | ------ | ------------------------------------------------------------ |
 | `type`         | string          | 必填     | -      | AI 服务提供商名称 |
@@ -107,8 +106,8 @@ MiniMax所对应的 `type` 为 `minimax`。它特有的配置字段如下：
 Anthropic Claude 所对应的 `type` 为 `claude`。它特有的配置字段如下：
 | 名称        | 数据类型   | 填写要求 | 默认值 | 描述                               |
-|-----------|--------|-----|-----|-------------------|
+|-----------|--------|------|-----|----------------------------------|
-| `version` | string | 必填  | -   | Claude 服务的 API 版本 |
+| `claudeVersion` | string | 可选   | -   | Claude 服务的 API 版本，默认为 2023-06-01 |
 #### Ollama
@@ -533,6 +532,7 @@ provider:
  type: claude
  apiTokens:
    - "YOUR_CLAUDE_API_TOKEN"
  version: "2023-06-01"
 ```
 **请求示例**
@@ -554,22 +554,25 @@ provider:
 ```json
 {
-  "id": "msg_01K8iLH18FGN7Xd9deurwtoD",
+  "id": "msg_01Jt3GzyjuzymnxmZERJguLK",
-  "type": "message",
+  "choices": [
  "role": "assistant",
  "model": "claude-3-opus-20240229",
  "stop_sequence": null,
  "usage": {
    "input_tokens": 16,
    "output_tokens": 141
  },
  "content": [
    {
-      "type": "text",
+      "index": 0,
-      "text": "你好!我是Claude,一个由Anthropic公司开发的人工智能助手。我的任务是尽我所能帮助人类,比如回答问题,提供建议和意见,协助完成任务等。我掌握了很多知识,也具备一定的分析和推理能力,但我不是人类,也没有实体的身体。很高兴认识你!如果有什么需要帮助的地方,欢迎随时告诉我。"
+      "message": {
        "role": "assistant",
        "content": "您好,我是一个由人工智能公司Anthropic开发的聊天助手。我的名字叫Claude,是一个聪明友善、知识渊博的对话系统。很高兴认识您!我可以就各种话题与您聊天,回答问题,提供建议和帮助。我会尽最大努力给您有帮助的回复。希望我们能有个愉快的交流!"
      },
      "finish_reason": "stop"
    }
  ],
-  "stop_reason": "end_turn"
+  "created": 1717385918,
  "model": "claude-3-opus-20240229",
  "object": "chat.completion",
  "usage": {
    "prompt_tokens": 16,
    "completion_tokens": 126,
    "total_tokens": 142
  }
 }
 ```
 ### 使用 OpenAI 协议代理混元服务
@@ -757,6 +760,8 @@ provider:
 ## 完整配置示例
 ### Kubernetes 示例
 以下以使用 OpenAI 协议代理 Groq 服务为例，展示完整的插件配置示例。
 ```yaml
@@ -828,3 +833,130 @@ curl "http://<YOUR-DOMAIN>/v1/chat/completions" -H "Content-Type: application/js
  ]
 }'
 ```
 ### Docker-Compose 示例
 `docker-compose.yml` 配置文件：
 ```yaml
 version: '3.7'
 services:
  envoy:
    image: higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/envoy:1.20
    entrypoint: /usr/local/bin/envoy
    # 开启了 debug 级别日志方便调试
    command: -c /etc/envoy/envoy.yaml --component-log-level wasm:debug
    networks:
      - higress-net
    ports:
      - "10000:10000"
    volumes:
      - ./envoy.yaml:/etc/envoy/envoy.yaml
      - ./plugin.wasm:/etc/envoy/plugin.wasm
 networks:
  higress-net: {}
 ```
 `envoy.yaml` 配置文件：
 ```yaml
 admin:
  address:
    socket_address:
      protocol: TCP
      address: 0.0.0.0
      port_value: 9901
 static_resources:
  listeners:
    - name: listener_0
      address:
        socket_address:
          protocol: TCP
          address: 0.0.0.0
          port_value: 10000
      filter_chains:
        - filters:
            - name: envoy.filters.network.http_connection_manager
              typed_config:
                "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
                scheme_header_transformation:
                  scheme_to_overwrite: https
                stat_prefix: ingress_http
                # Output envoy logs to stdout
                access_log:
                  - name: envoy.access_loggers.stdout
                    typed_config:
                      "@type": type.googleapis.com/envoy.extensions.access_loggers.stream.v3.StdoutAccessLog
                # Modify as required
                route_config:
                  name: local_route
                  virtual_hosts:
                    - name: local_service
                      domains: [ "*" ]
                      routes:
                        - match:
                            prefix: "/"
                          route:
                            cluster: claude
                            timeout: 300s
                http_filters:
                  - name: claude
                    typed_config:
                      "@type": type.googleapis.com/udpa.type.v1.TypedStruct
                      type_url: type.googleapis.com/envoy.extensions.filters.http.wasm.v3.Wasm
                      value:
                        config:
                          name: claude
                          vm_config:
                            runtime: envoy.wasm.runtime.v8
                            code:
                              local:
                                filename: /etc/envoy/plugin.wasm
                          configuration:
                            "@type": "type.googleapis.com/google.protobuf.StringValue"
                            value: | # 插件配置
                              {
                                "provider": {
                                  "type": "claude",                                
                                  "apiTokens": [
                                    "YOUR_API_TOKEN"
                                  ]                  
                                }
                              }
                  - name: envoy.filters.http.router
  clusters:
    - name: claude
      connect_timeout: 30s
      type: LOGICAL_DNS
      dns_lookup_family: V4_ONLY
      lb_policy: ROUND_ROBIN
      load_assignment:
        cluster_name: claude
        endpoints:
          - lb_endpoints:
              - endpoint:
                  address:
                    socket_address:
                      address: api.anthropic.com # API 服务地址
                      port_value: 443
      transport_socket:
        name: envoy.transport_sockets.tls
        typed_config:
          "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
          "sni": "api.anthropic.com"
 ```
 访问示例：
 ```bash
 curl "http://localhost:10000/v1/chat/completions"  -H "Content-Type: application/json"  -d '{
  "model": "claude-3-opus-20240229",
  "max_tokens": 1024,
  "messages": [
    {
      "role": "user",
      "content": "你好，你是谁？"
    }
  ]
 }'
 ```
--- a/plugins/wasm-go/extensions/ai-proxy/provider/claude.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/claude.go
@@ -0,0 +1,367 @@
 package provider
 import (
 	"encoding/json"
 	"errors"
 	"fmt"
 	"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy/util"
 	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
 	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
 	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
 	"strings"
 	"time"
 )
 // claudeProvider is the provider for Claude service.
 const (
 	claudeDomain             = "api.anthropic.com"
 	claudeChatCompletionPath = "/v1/messages"
 	defaultVersion           = "2023-06-01"
 	defaultMaxTokens         = 4096
 )
 type claudeProviderInitializer struct{}
 type claudeTextGenRequest struct {
 	Model         string        `json:"model"`
 	Messages      []chatMessage `json:"messages"`
 	System        string        `json:"system,omitempty"`
 	MaxTokens     int           `json:"max_tokens,omitempty"`
 	StopSequences []string      `json:"stop_sequences,omitempty"`
 	Stream        bool          `json:"stream,omitempty"`
 	Temperature   float64       `json:"temperature,omitempty"`
 	TopP          float64       `json:"top_p,omitempty"`
 	TopK          int           `json:"top_k,omitempty"`
 }
 type claudeTextGenResponse struct {
 	Id           string                 `json:"id"`
 	Type         string                 `json:"type"`
 	Role         string                 `json:"role"`
 	Content      []claudeTextGenContent `json:"content"`
 	Model        string                 `json:"model"`
 	StopReason   *string                `json:"stop_reason"`
 	StopSequence *string                `json:"stop_sequence"`
 	Usage        claudeTextGenUsage     `json:"usage"`
 	Error        *claudeTextGenError    `json:"error"`
 }
 type claudeTextGenContent struct {
 	Type string `json:"type"`
 	Text string `json:"text,omitempty"`
 }
 type claudeTextGenUsage struct {
 	InputTokens  int `json:"input_tokens"`
 	OutputTokens int `json:"output_tokens"`
 }
 type claudeTextGenError struct {
 	Type    string `json:"type"`
 	Message string `json:"message"`
 }
 type claudeTextGenStreamResponse struct {
 	Type         string                `json:"type"`
 	Message      claudeTextGenResponse `json:"message"`
 	Index        int                   `json:"index"`
 	ContentBlock *claudeTextGenContent `json:"content_block"`
 	Delta        *claudeTextGenDelta   `json:"delta"`
 	Usage        claudeTextGenUsage    `json:"usage"`
 }
 type claudeTextGenDelta struct {
 	Type         string  `json:"type"`
 	Text         string  `json:"text"`
 	StopReason   *string `json:"stop_reason"`
 	StopSequence *string `json:"stop_sequence"`
 }
 func (c *claudeProviderInitializer) ValidateConfig(config ProviderConfig) error {
 	return nil
 }
 func (c *claudeProviderInitializer) CreateProvider(config ProviderConfig) (Provider, error) {
 	return &claudeProvider{
 		config:       config,
 		contextCache: createContextCache(&config),
 	}, nil
 }
 type claudeProvider struct {
 	config       ProviderConfig
 	contextCache *contextCache
 }
 func (c *claudeProvider) GetProviderType() string {
 	return providerTypeClaude
 }
 func (c *claudeProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) (types.Action, error) {
 	if apiName != ApiNameChatCompletion {
 		return types.ActionContinue, errUnsupportedApiName
 	}
 	_ = util.OverwriteRequestPath(claudeChatCompletionPath)
 	_ = util.OverwriteRequestHost(claudeDomain)
 	_ = proxywasm.ReplaceHttpRequestHeader("x-api-key", c.config.GetRandomToken())
 	if c.config.claudeVersion == "" {
 		c.config.claudeVersion = defaultVersion
 	}
 	_ = proxywasm.AddHttpRequestHeader("anthropic-version", c.config.claudeVersion)
 	_ = proxywasm.RemoveHttpRequestHeader("Accept-Encoding")
 	_ = proxywasm.RemoveHttpRequestHeader("Content-Length")
 	return types.ActionContinue, nil
 }
 func (c *claudeProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
 	if apiName != ApiNameChatCompletion {
 		return types.ActionContinue, errUnsupportedApiName
 	}
 	// use original protocol
 	if c.config.protocol == protocolOriginal {
 		if c.config.context == nil {
 			return types.ActionContinue, nil
 		}
 		request := &claudeTextGenRequest{}
 		if err := json.Unmarshal(body, request); err != nil {
 			return types.ActionContinue, fmt.Errorf("unable to unmarshal request: %v", err)
 		}
 		err := c.contextCache.GetContent(func(content string, err error) {
 			defer func() {
 				_ = proxywasm.ResumeHttpRequest()
 			}()
 			if err != nil {
 				log.Errorf("failed to load context file: %v", err)
 				_ = util.SendResponse(500, util.MimeTypeTextPlain, fmt.Sprintf("failed to load context file: %v", err))
 			}
 			if err := replaceJsonRequestBody(request, log); err != nil {
 				_ = util.SendResponse(500, util.MimeTypeTextPlain, fmt.Sprintf("failed to replace request body: %v", err))
 			}
 		}, log)
 		if err == nil {
 			return types.ActionPause, nil
 		}
 		return types.ActionContinue, err
 	}
 	// use openai protocol
 	request := &chatCompletionRequest{}
 	if err := decodeChatCompletionRequest(body, request); err != nil {
 		return types.ActionContinue, err
 	}
 	model := request.Model
 	if model == "" {
 		return types.ActionContinue, errors.New("missing model in chat completion request")
 	}
 	ctx.SetContext(ctxKeyOriginalRequestModel, model)
 	mappedModel := getMappedModel(model, c.config.modelMapping, log)
 	if mappedModel == "" {
 		return types.ActionContinue, errors.New("model becomes empty after applying the configured mapping")
 	}
 	request.Model = mappedModel
 	ctx.SetContext(ctxKeyFinalRequestModel, request.Model)
 	streaming := request.Stream
 	if streaming {
 		_ = proxywasm.ReplaceHttpRequestHeader("Accept", "text/event-stream")
 	}
 	if c.config.context == nil {
 		claudeRequest := c.buildClaudeTextGenRequest(request)
 		return types.ActionContinue, replaceJsonRequestBody(claudeRequest, log)
 	}
 	err := c.contextCache.GetContent(func(content string, err error) {
 		defer func() {
 			_ = proxywasm.ResumeHttpRequest()
 		}()
 		if err != nil {
 			log.Errorf("failed to load context file: %v", err)
 			_ = util.SendResponse(500, util.MimeTypeTextPlain, fmt.Sprintf("failed to load context file: %v", err))
 		}
 		insertContextMessage(request, content)
 		claudeRequest := c.buildClaudeTextGenRequest(request)
 		if err := replaceJsonRequestBody(claudeRequest, log); err != nil {
 			_ = util.SendResponse(500, util.MimeTypeTextPlain, fmt.Sprintf("failed to replace request body: %v", err))
 		}
 	}, log)
 	if err == nil {
 		return types.ActionPause, nil
 	}
 	return types.ActionContinue, err
 }
 func (c *claudeProvider) OnResponseBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
 	claudeResponse := &claudeTextGenResponse{}
 	if err := json.Unmarshal(body, claudeResponse); err != nil {
 		return types.ActionContinue, fmt.Errorf("unable to unmarshal claude response: %v", err)
 	}
 	if claudeResponse.Error != nil {
 		return types.ActionContinue, fmt.Errorf("claude response error, error_type: %s, error_message: %s", claudeResponse.Error.Type, claudeResponse.Error.Message)
 	}
 	response := c.responseClaude2OpenAI(ctx, claudeResponse)
 	return types.ActionContinue, replaceJsonResponseBody(response, log)
 }
 func (c *claudeProvider) OnResponseHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) (types.Action, error) {
 	// use original protocol, skip OnStreamingResponseBody() and OnResponseBody()
 	if c.config.protocol == protocolOriginal {
 		ctx.DontReadResponseBody()
 		return types.ActionContinue, nil
 	}
 	_ = proxywasm.RemoveHttpResponseHeader("Content-Length")
 	return types.ActionContinue, nil
 }
 func (c *claudeProvider) OnStreamingResponseBody(ctx wrapper.HttpContext, name ApiName, chunk []byte, isLastChunk bool, log wrapper.Log) ([]byte, error) {
 	if isLastChunk || len(chunk) == 0 {
 		return nil, nil
 	}
 	responseBuilder := &strings.Builder{}
 	lines := strings.Split(string(chunk), "\n")
 	for _, data := range lines {
 		// only process the line starting with "data:"
 		if strings.HasPrefix(data, "data:") {
 			// extract json data from the line
 			jsonData := strings.TrimPrefix(data, "data:")
 			var claudeResponse claudeTextGenStreamResponse
 			if err := json.Unmarshal([]byte(jsonData), &claudeResponse); err != nil {
 				log.Errorf("unable to unmarshal claude response: %v", err)
 				continue
 			}
 			response := c.streamResponseClaude2OpenAI(ctx, &claudeResponse, log)
 			if response != nil {
 				responseBody, err := json.Marshal(response)
 				if err != nil {
 					log.Errorf("unable to marshal response: %v", err)
 					return nil, err
 				}
 				c.appendResponse(responseBuilder, string(responseBody))
 			}
 		}
 	}
 	modifiedResponseChunk := responseBuilder.String()
 	log.Debugf("modified response chunk: %s", modifiedResponseChunk)
 	return []byte(modifiedResponseChunk), nil
 }
 func (c *claudeProvider) buildClaudeTextGenRequest(origRequest *chatCompletionRequest) *claudeTextGenRequest {
 	claudeRequest := claudeTextGenRequest{
 		Model:         origRequest.Model,
 		MaxTokens:     origRequest.MaxTokens,
 		StopSequences: origRequest.Stop,
 		Stream:        origRequest.Stream,
 		Temperature:   origRequest.Temperature,
 		TopP:          origRequest.TopP,
 	}
 	if claudeRequest.MaxTokens == 0 {
 		claudeRequest.MaxTokens = defaultMaxTokens
 	}
 	for _, message := range origRequest.Messages {
 		if message.Role == roleSystem {
 			claudeRequest.System = message.Content
 			continue
 		}
 		claudeMessage := chatMessage{
 			Role:    message.Role,
 			Content: message.Content,
 		}
 		claudeRequest.Messages = append(claudeRequest.Messages, claudeMessage)
 	}
 	return &claudeRequest
 }
 func (c *claudeProvider) responseClaude2OpenAI(ctx wrapper.HttpContext, origResponse *claudeTextGenResponse) *chatCompletionResponse {
 	choice := chatCompletionChoice{
 		Index:        0,
 		Message:      &chatMessage{Role: roleAssistant, Content: origResponse.Content[0].Text},
 		FinishReason: stopReasonClaude2OpenAI(origResponse.StopReason),
 	}
 	return &chatCompletionResponse{
 		Id:                origResponse.Id,
 		Created:           time.Now().UnixMilli() / 1000,
 		Model:             ctx.GetContext(ctxKeyFinalRequestModel).(string),
 		SystemFingerprint: "",
 		Object:            objectChatCompletion,
 		Choices:           []chatCompletionChoice{choice},
 		Usage: chatCompletionUsage{
 			PromptTokens:     origResponse.Usage.InputTokens,
 			CompletionTokens: origResponse.Usage.OutputTokens,
 			TotalTokens:      origResponse.Usage.InputTokens + origResponse.Usage.OutputTokens,
 		},
 	}
 }
 func stopReasonClaude2OpenAI(reason *string) string {
 	if reason == nil {
 		return ""
 	}
 	switch *reason {
 	case "end_turn":
 		return finishReasonStop
 	case "stop_sequence":
 		return finishReasonStop
 	case "max_tokens":
 		return finishReasonLength
 	default:
 		return *reason
 	}
 }
 func (c *claudeProvider) streamResponseClaude2OpenAI(ctx wrapper.HttpContext, origResponse *claudeTextGenStreamResponse, log wrapper.Log) *chatCompletionResponse {
 	switch origResponse.Type {
 	case "message_start":
 		choice := chatCompletionChoice{
 			Index: 0,
 			Delta: &chatMessage{Role: roleAssistant, Content: ""},
 		}
 		return createChatCompletionResponse(ctx, origResponse, choice)
 	case "content_block_delta":
 		choice := chatCompletionChoice{
 			Index: 0,
 			Delta: &chatMessage{Content: origResponse.Delta.Text},
 		}
 		return createChatCompletionResponse(ctx, origResponse, choice)
 	case "message_delta":
 		choice := chatCompletionChoice{
 			Index:        0,
 			Delta:        &chatMessage{},
 			FinishReason: stopReasonClaude2OpenAI(origResponse.Delta.StopReason),
 		}
 		return createChatCompletionResponse(ctx, origResponse, choice)
 	case "content_block_stop", "message_stop":
 		log.Debugf("skip processing response type: %s", origResponse.Type)
 		return nil
 	default:
 		log.Errorf("Unexpected response type: %s", origResponse.Type)
 		return nil
 	}
 }
 func createChatCompletionResponse(ctx wrapper.HttpContext, response *claudeTextGenStreamResponse, choice chatCompletionChoice) *chatCompletionResponse {
 	return &chatCompletionResponse{
 		Id:      response.Message.Id,
 		Created: time.Now().UnixMilli() / 1000,
 		Model:   ctx.GetContext(ctxKeyFinalRequestModel).(string),
 		Object:  objectChatCompletionChunk,
 		Choices: []chatCompletionChoice{choice},
 	}
 }
 func (c *claudeProvider) appendResponse(responseBuilder *strings.Builder, responseBody string) {
 	responseBuilder.WriteString(fmt.Sprintf("%s %s\n\n", streamDataItemKey, responseBody))
 }
--- a/plugins/wasm-go/extensions/ai-proxy/provider/model.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/model.go
@@ -30,6 +30,7 @@ type chatCompletionRequest struct {
 	Tools            []tool         `json:"tools,omitempty"`
 	ToolChoice       *toolChoice    `json:"tool_choice,omitempty"`
 	User             string         `json:"user,omitempty"`
 	Stop             []string       `json:"stop,omitempty"`
 }
 type streamOptions struct {
--- a/plugins/wasm-go/extensions/ai-proxy/provider/provider.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/provider.go
@@ -25,6 +25,7 @@ const (
 	providerTypeDeepSeek = "deepseek"
 	providerTypeZhipuAi  = "zhipuai"
 	providerTypeOllama   = "ollama"
 	providerTypeClaude   = "claude"
 	providerTypeBaidu    = "baidu"
 	providerTypeHunyuan  = "hunyuan"
 	providerTypeStepfun  = "stepfun"
@@ -34,10 +35,11 @@ const (
 	protocolOriginal = "original"
 	roleSystem    = "system"
 	roleUser      = "user"
 	roleAssistant = "assistant"
 	roleUser      = "user"
 	finishReasonStop   = "stop"
 	finishReasonLength = "length"
 	ctxKeyIncrementalStreaming = "incrementalStreaming"
 	ctxKeyApiName              = "apiKey"
@@ -73,6 +75,7 @@ var (
 		providerTypeDeepSeek: &deepseekProviderInitializer{},
 		providerTypeZhipuAi:  &zhipuAiProviderInitializer{},
 		providerTypeOllama:   &ollamaProviderInitializer{},
 		providerTypeClaude:   &claudeProviderInitializer{},
 		providerTypeBaidu:    &baiduProviderInitializer{},
 		providerTypeHunyuan:  &hunyuanProviderInitializer{},
 		providerTypeStepfun:  &stepfunProviderInitializer{},
@@ -150,6 +153,9 @@ type ProviderConfig struct {
 	// @Title zh-CN 模型对话上下文
 	// @Description zh-CN 配置一个外部获取对话上下文的文件来源，用于在AI请求中补充对话上下文
 	context *ContextConfig `required:"false" yaml:"context" json:"context"`
 	// @Title zh-CN 版本
 	// @Description zh-CN 请求AI服务的版本，目前仅适用于Claude AI服务
 	claudeVersion string `required:"false" yaml:"version" json:"version"`
 }
 func (c *ProviderConfig) FromJson(json gjson.Result) {
@@ -184,7 +190,7 @@ func (c *ProviderConfig) FromJson(json gjson.Result) {
 		c.context = &ContextConfig{}
 		c.context.FromJson(contextJson)
 	}
-
+	c.claudeVersion = json.Get("claudeVersion").String()
 	c.hunyuanAuthId = json.Get("hunyuanAuthId").String()
 	c.hunyuanAuthKey = json.Get("hunyuanAuthKey").String()
 	c.minimaxGroupId = json.Get("minimaxGroupId").String()