package provider import ( "bytes" "encoding/json" "errors" "fmt" "hash/fnv" "math/rand" "net/http" "path" "regexp" "strconv" "strings" "github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy/util" "github.com/higress-group/proxy-wasm-go-sdk/proxywasm" "github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types" "github.com/higress-group/wasm-go/pkg/log" "github.com/higress-group/wasm-go/pkg/wrapper" "github.com/tidwall/gjson" "github.com/tidwall/sjson" ) type ( ApiName string Pointcut string basePathHandling string ) const ( // ApiName 格式 {vendor}/{version}/{apitype} // 表示遵循 厂商/版本/接口类型 的格式 // 目前openai是事实意义上的标准,但是也有其他厂商存在其他任务的一些可能的标准,比如cohere的rerank ApiNameCompletion ApiName = "openai/v1/completions" ApiNameChatCompletion ApiName = "openai/v1/chatcompletions" ApiNameEmbeddings ApiName = "openai/v1/embeddings" ApiNameImageGeneration ApiName = "openai/v1/imagegeneration" ApiNameImageEdit ApiName = "openai/v1/imageedit" ApiNameImageVariation ApiName = "openai/v1/imagevariation" ApiNameAudioSpeech ApiName = "openai/v1/audiospeech" ApiNameAudioTranscription ApiName = "openai/v1/audiotranscription" ApiNameAudioTranslation ApiName = "openai/v1/audiotranslation" ApiNameRealtime ApiName = "openai/v1/realtime" ApiNameFiles ApiName = "openai/v1/files" ApiNameRetrieveFile ApiName = "openai/v1/retrievefile" ApiNameRetrieveFileContent ApiName = "openai/v1/retrievefilecontent" ApiNameBatches ApiName = "openai/v1/batches" ApiNameRetrieveBatch ApiName = "openai/v1/retrievebatch" ApiNameCancelBatch ApiName = "openai/v1/cancelbatch" ApiNameModels ApiName = "openai/v1/models" ApiNameResponses ApiName = "openai/v1/responses" ApiNameFineTuningJobs ApiName = "openai/v1/fine-tuningjobs" ApiNameRetrieveFineTuningJob ApiName = "openai/v1/retrievefine-tuningjob" ApiNameFineTuningJobEvents ApiName = "openai/v1/fine-tuningjobsevents" ApiNameFineTuningJobCheckpoints ApiName = "openai/v1/fine-tuningjobcheckpoints" ApiNameCancelFineTuningJob ApiName = "openai/v1/cancelfine-tuningjob" ApiNameResumeFineTuningJob ApiName = "openai/v1/resumefine-tuningjob" ApiNamePauseFineTuningJob ApiName = "openai/v1/pausefine-tuningjob" ApiNameFineTuningCheckpointPermissions ApiName = "openai/v1/fine-tuningjobcheckpointpermissions" ApiNameDeleteFineTuningCheckpointPermission ApiName = "openai/v1/deletefine-tuningjobcheckpointpermission" ApiNameVideos ApiName = "openai/v1/videos" ApiNameRetrieveVideo ApiName = "openai/v1/retrievevideo" ApiNameVideoRemix ApiName = "openai/v1/videoremix" ApiNameRetrieveVideoContent ApiName = "openai/v1/retrievevideocontent" // TODO: 以下是一些非标准的API名称,需要进一步确认是否支持 ApiNameCohereV1Rerank ApiName = "cohere/v1/rerank" ApiNameQwenAsyncAIGC ApiName = "qwen/v1/services/aigc" ApiNameQwenAsyncTask ApiName = "qwen/v1/tasks" ApiNameQwenV1Rerank ApiName = "qwen/v1/rerank" ApiNameGeminiGenerateContent ApiName = "gemini/v1beta/generatecontent" ApiNameGeminiStreamGenerateContent ApiName = "gemini/v1beta/streamgeneratecontent" ApiNameAnthropicMessages ApiName = "anthropic/v1/messages" ApiNameAnthropicComplete ApiName = "anthropic/v1/complete" ApiNameVertexRaw ApiName = "vertex/raw" // OpenAI PathOpenAIPrefix = "/v1" PathOpenAICompletions = "/v1/completions" PathOpenAIChatCompletions = "/v1/chat/completions" PathOpenAIEmbeddings = "/v1/embeddings" PathOpenAIFiles = "/v1/files" PathOpenAIRetrieveFile = "/v1/files/{file_id}" PathOpenAIRetrieveFileContent = "/v1/files/{file_id}/content" PathOpenAIBatches = "/v1/batches" PathOpenAIRetrieveBatch = "/v1/batches/{batch_id}" PathOpenAICancelBatch = "/v1/batches/{batch_id}/cancel" PathOpenAIModels = "/v1/models" PathOpenAIImageGeneration = "/v1/images/generations" PathOpenAIImageEdit = "/v1/images/edits" PathOpenAIImageVariation = "/v1/images/variations" PathOpenAIAudioSpeech = "/v1/audio/speech" PathOpenAIAudioTranscriptions = "/v1/audio/transcriptions" PathOpenAIAudioTranslations = "/v1/audio/translations" PathOpenAIRealtime = "/v1/realtime" PathOpenAIResponses = "/v1/responses" PathOpenAIFineTuningJobs = "/v1/fine_tuning/jobs" PathOpenAIRetrieveFineTuningJob = "/v1/fine_tuning/jobs/{fine_tuning_job_id}" PathOpenAIFineTuningJobEvents = "/v1/fine_tuning/jobs/{fine_tuning_job_id}/events" PathOpenAIFineTuningJobCheckpoints = "/v1/fine_tuning/jobs/{fine_tuning_job_id}/checkpoints" PathOpenAICancelFineTuningJob = "/v1/fine_tuning/jobs/{fine_tuning_job_id}/cancel" PathOpenAIResumeFineTuningJob = "/v1/fine_tuning/jobs/{fine_tuning_job_id}/resume" PathOpenAIPauseFineTuningJob = "/v1/fine_tuning/jobs/{fine_tuning_job_id}/pause" PathOpenAIFineTuningCheckpointPermissions = "/v1/fine_tuning/checkpoints/{fine_tuned_model_checkpoint}/permissions" PathOpenAIFineDeleteTuningCheckpointPermission = "/v1/fine_tuning/checkpoints/{fine_tuned_model_checkpoint}/permissions/{permission_id}" PathOpenAIVideos = "/v1/videos" PathOpenAIRetrieveVideo = "/v1/videos/{video_id}" PathOpenAIVideoRemix = "/v1/videos/{video_id}/remix" PathOpenAIRetrieveVideoContent = "/v1/videos/{video_id}/content" // Anthropic PathAnthropicMessages = "/v1/messages" PathAnthropicComplete = "/v1/complete" // Cohere PathCohereV1Rerank = "/v1/rerank" providerTypeMoonshot = "moonshot" providerTypeAzure = "azure" providerTypeAi360 = "ai360" providerTypeGithub = "github" providerTypeQwen = "qwen" providerTypeOpenAI = "openai" providerTypeGroq = "groq" providerTypeGrok = "grok" providerTypeBaichuan = "baichuan" providerTypeYi = "yi" providerTypeDeepSeek = "deepseek" providerTypeZhipuAi = "zhipuai" providerTypeOllama = "ollama" providerTypeClaude = "claude" providerTypeBaidu = "baidu" providerTypeHunyuan = "hunyuan" providerTypeStepfun = "stepfun" providerTypeMinimax = "minimax" providerTypeCloudflare = "cloudflare" providerTypeSpark = "spark" providerTypeGemini = "gemini" providerTypeDeepl = "deepl" providerTypeMistral = "mistral" providerTypeCohere = "cohere" providerTypeDoubao = "doubao" providerTypeCoze = "coze" providerTypeTogetherAI = "together-ai" providerTypeDify = "dify" providerTypeBedrock = "bedrock" providerTypeVertex = "vertex" providerTypeTriton = "triton" providerTypeOpenRouter = "openrouter" providerTypeLongcat = "longcat" providerTypeFireworks = "fireworks" providerTypeVllm = "vllm" providerTypeGeneric = "generic" protocolOpenAI = "openai" protocolOriginal = "original" roleSystem = "system" roleDeveloper = "developer" roleAssistant = "assistant" roleUser = "user" roleTool = "tool" finishReasonStop = "stop" finishReasonLength = "length" finishReasonToolCall = "tool_calls" ctxKeyClaudeBudgetTokens = "claudeBudgetTokens" ctxKeyClaudeThinkingType = "claudeThinkingType" ctxKeyIncrementalStreaming = "incrementalStreaming" ctxKeyApiKey = "apiKey" CtxKeyApiName = "apiName" ctxKeyIsStreaming = "isStreaming" ctxKeyStreamingBody = "streamingBody" ctxKeyOriginalRequestModel = "originalRequestModel" ctxKeyFinalRequestModel = "finalRequestModel" ctxKeyPushedMessage = "pushedMessage" ctxKeyContentPushed = "contentPushed" ctxKeyReasoningContentPushed = "reasoningContentPushed" ctxKeyHasContentDelta = "hasContentDelta" ctxKeyBufferedReasoning = "bufferedReasoning" objectChatCompletion = "chat.completion" objectChatCompletionChunk = "chat.completion.chunk" reasoningBehaviorPassThrough = "passthrough" reasoningBehaviorIgnore = "ignore" reasoningBehaviorConcat = "concat" wildcard = "*" defaultTimeout = 2 * 60 * 1000 // ms basePathHandlingRemovePrefix basePathHandling = "removePrefix" basePathHandlingPrepend basePathHandling = "prepend" ) type providerInitializer interface { ValidateConfig(*ProviderConfig) error CreateProvider(ProviderConfig) (Provider, error) } var ( errUnsupportedApiName = errors.New("unsupported API name") // Providers that support the "developer" role. Other providers will have "developer" roles converted to "system". developerRoleSupportedProviders = map[string]bool{ providerTypeAzure: true, } providerInitializers = map[string]providerInitializer{ providerTypeMoonshot: &moonshotProviderInitializer{}, providerTypeAzure: &azureProviderInitializer{}, providerTypeAi360: &ai360ProviderInitializer{}, providerTypeGithub: &githubProviderInitializer{}, providerTypeQwen: &qwenProviderInitializer{}, providerTypeOpenAI: &openaiProviderInitializer{}, providerTypeGroq: &groqProviderInitializer{}, providerTypeGrok: &grokProviderInitializer{}, providerTypeBaichuan: &baichuanProviderInitializer{}, providerTypeYi: &yiProviderInitializer{}, providerTypeDeepSeek: &deepseekProviderInitializer{}, providerTypeZhipuAi: &zhipuAiProviderInitializer{}, providerTypeOllama: &ollamaProviderInitializer{}, providerTypeClaude: &claudeProviderInitializer{}, providerTypeBaidu: &baiduProviderInitializer{}, providerTypeHunyuan: &hunyuanProviderInitializer{}, providerTypeStepfun: &stepfunProviderInitializer{}, providerTypeMinimax: &minimaxProviderInitializer{}, providerTypeCloudflare: &cloudflareProviderInitializer{}, providerTypeSpark: &sparkProviderInitializer{}, providerTypeGemini: &geminiProviderInitializer{}, providerTypeDeepl: &deeplProviderInitializer{}, providerTypeMistral: &mistralProviderInitializer{}, providerTypeCohere: &cohereProviderInitializer{}, providerTypeDoubao: &doubaoProviderInitializer{}, providerTypeCoze: &cozeProviderInitializer{}, providerTypeTogetherAI: &togetherAIProviderInitializer{}, providerTypeDify: &difyProviderInitializer{}, providerTypeBedrock: &bedrockProviderInitializer{}, providerTypeVertex: &vertexProviderInitializer{}, providerTypeTriton: &tritonProviderInitializer{}, providerTypeOpenRouter: &openrouterProviderInitializer{}, providerTypeLongcat: &longcatProviderInitializer{}, providerTypeFireworks: &fireworksProviderInitializer{}, providerTypeVllm: &vllmProviderInitializer{}, providerTypeGeneric: &genericProviderInitializer{}, } ) type Provider interface { GetProviderType() string } type RequestHeadersHandler interface { OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName) error } type RequestBodyHandler interface { OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte) (types.Action, error) } type StreamingResponseBodyHandler interface { OnStreamingResponseBody(ctx wrapper.HttpContext, name ApiName, chunk []byte, isLastChunk bool) ([]byte, error) } type StreamingEventHandler interface { OnStreamingEvent(ctx wrapper.HttpContext, name ApiName, event StreamEvent) ([]StreamEvent, error) } type ApiNameHandler interface { GetApiName(path string) ApiName } type TransformRequestHeadersHandler interface { TransformRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, headers http.Header) } type TransformRequestBodyHandler interface { TransformRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte) ([]byte, error) } // TransformRequestBodyHeadersHandler allows to transform request headers based on the request body. // Some providers (e.g. gemini) transform request headers (e.g., path) based on the request body (e.g., model). type TransformRequestBodyHeadersHandler interface { TransformRequestBodyHeaders(ctx wrapper.HttpContext, apiName ApiName, body []byte, headers http.Header) ([]byte, error) } type TransformResponseHeadersHandler interface { TransformResponseHeaders(ctx wrapper.HttpContext, apiName ApiName, headers http.Header) } type TransformResponseBodyHandler interface { TransformResponseBody(ctx wrapper.HttpContext, apiName ApiName, body []byte) ([]byte, error) } type ProviderConfig struct { // @Title zh-CN ID // @Description zh-CN AI服务提供商标识 id string `required:"true" yaml:"id" json:"id"` // @Title zh-CN 类型 // @Description zh-CN AI服务提供商类型 typ string `required:"true" yaml:"type" json:"type"` // @Title zh-CN API Tokens // @Description zh-CN 在请求AI服务时用于认证的API Token列表。不同的AI服务提供商可能有不同的名称。部分供应商只支持配置一个API Token(如Azure OpenAI)。 apiTokens []string `required:"false" yaml:"apiToken" json:"apiTokens"` // @Title zh-CN 请求超时 // @Description zh-CN 请求AI服务的超时时间,单位为毫秒。默认值为120000,即2分钟。此项配置目前仅用于获取上下文信息,并不影响实际转发大模型请求。 timeout uint32 `required:"false" yaml:"timeout" json:"timeout"` // @Title zh-CN apiToken 故障切换 // @Description zh-CN 当 apiToken 不可用时移出 apiTokens 列表,对移除的 apiToken 进行健康检查,当重新可用后加回 apiTokens 列表 failover *failover `required:"false" yaml:"failover" json:"failover"` // @Title zh-CN 失败请求重试 // @Description zh-CN 对失败的请求立即进行重试 retryOnFailure *retryOnFailure `required:"false" yaml:"retryOnFailure" json:"retryOnFailure"` // @Title zh-CN 推理内容处理方式 // @Description zh-CN 如何处理大模型服务返回的推理内容。目前支持以下取值:passthrough(正常输出推理内容)、ignore(不输出推理内容)、concat(将推理内容拼接在常规输出内容之前)。默认为 normal。仅支持通义千问服务。 reasoningContentMode string `required:"false" yaml:"reasoningContentMode" json:"reasoningContentMode"` // @Title zh-CN 基于OpenAI协议的自定义后端URL // @Description zh-CN 仅适用于支持 openai 协议的服务。 openaiCustomUrl string `required:"false" yaml:"openaiCustomUrl" json:"openaiCustomUrl"` // @Title zh-CN Moonshot File ID // @Description zh-CN 仅适用于Moonshot AI服务。Moonshot AI服务的文件ID,其内容用于补充AI请求上下文 moonshotFileId string `required:"false" yaml:"moonshotFileId" json:"moonshotFileId"` // @Title zh-CN Azure OpenAI Service URL // @Description zh-CN 仅适用于Azure OpenAI服务。要请求的OpenAI服务的完整URL,包含api-version等参数 azureServiceUrl string `required:"false" yaml:"azureServiceUrl" json:"azureServiceUrl"` // @Title zh-CN 通义千问File ID // @Description zh-CN 仅适用于通义千问服务。上传到Dashscope的文件ID,其内容用于补充AI请求上下文。仅支持qwen-long模型。 qwenFileIds []string `required:"false" yaml:"qwenFileIds" json:"qwenFileIds"` // @Title zh-CN 启用通义千问搜索服务 // @Description zh-CN 仅适用于通义千问服务,表示是否启用通义千问的互联网搜索功能。 qwenEnableSearch bool `required:"false" yaml:"qwenEnableSearch" json:"qwenEnableSearch"` // @Title zh-CN 通义千问服务域名 // @Description zh-CN 仅适用于通义千问服务,默认转发域名为 dashscope.aliyuncs.com, 当使用金融云服务时,可以设置为 dashscope-finance.aliyuncs.com qwenDomain string `required:"false" yaml:"qwenDomain" json:"qwenDomain"` // @Title zh-CN 开启通义千问兼容模式 // @Description zh-CN 启用通义千问兼容模式后,将调用千问的兼容模式接口,同时对请求/响应不做修改。 qwenEnableCompatible bool `required:"false" yaml:"qwenEnableCompatible" json:"qwenEnableCompatible"` // @Title zh-CN Ollama Server IP/Domain // @Description zh-CN 仅适用于 Ollama 服务。Ollama 服务器的主机地址。 ollamaServerHost string `required:"false" yaml:"ollamaServerHost" json:"ollamaServerHost"` // @Title zh-CN Ollama Server Port // @Description zh-CN 仅适用于 Ollama 服务。Ollama 服务器的端口号。 ollamaServerPort uint32 `required:"false" yaml:"ollamaServerPort" json:"ollamaServerPort"` // @Title zh-CN hunyuan api key for authorization // @Description zh-CN 仅适用于Hun Yuan AI服务鉴权,API key/id 参考:https://cloud.tencent.com/document/api/1729/101843#Golang hunyuanAuthKey string `required:"false" yaml:"hunyuanAuthKey" json:"hunyuanAuthKey"` // @Title zh-CN hunyuan api id for authorization // @Description zh-CN 仅适用于Hun Yuan AI服务鉴权 hunyuanAuthId string `required:"false" yaml:"hunyuanAuthId" json:"hunyuanAuthId"` // @Title zh-CN Amazon Bedrock AccessKey for authorization // @Description zh-CN 仅适用于Amazon Bedrock服务鉴权,API key/id 参考:https://docs.aws.amazon.com/zh_cn/IAM/latest/UserGuide/reference_sigv.html awsAccessKey string `required:"false" yaml:"awsAccessKey" json:"awsAccessKey"` // @Title zh-CN Amazon Bedrock SecretKey for authorization // @Description zh-CN 仅适用于Amazon Bedrock服务鉴权 awsSecretKey string `required:"false" yaml:"awsSecretKey" json:"awsSecretKey"` // @Title zh-CN Amazon Bedrock Region // @Description zh-CN 仅适用于Amazon Bedrock服务访问 awsRegion string `required:"false" yaml:"awsRegion" json:"awsRegion"` // @Title zh-CN Amazon Bedrock 额外模型请求参数 // @Description zh-CN 仅适用于Amazon Bedrock服务,用于设置模型特定的推理参数 bedrockAdditionalFields map[string]interface{} `required:"false" yaml:"bedrockAdditionalFields" json:"bedrockAdditionalFields"` // @Title zh-CN Amazon Bedrock Prompt CachePoint 插入位置 // @Description zh-CN 仅适用于Amazon Bedrock服务。用于配置 cachePoint 插入位置,支持多选:systemPrompt、lastUserMessage、lastMessage。值为 true 表示启用该位置。 bedrockPromptCachePointPositions map[string]bool `required:"false" yaml:"bedrockPromptCachePointPositions" json:"bedrockPromptCachePointPositions"` // @Title zh-CN Amazon Bedrock Prompt Cache 保留策略(默认值) // @Description zh-CN 仅适用于Amazon Bedrock服务。作为请求中 prompt_cache_retention 缺省时的默认值,支持 in_memory 和 24h。 promptCacheRetention string `required:"false" yaml:"promptCacheRetention" json:"promptCacheRetention"` // @Title zh-CN minimax API type // @Description zh-CN 仅适用于 minimax 服务。minimax API 类型,v2 和 pro 中选填一项,默认值为 v2 minimaxApiType string `required:"false" yaml:"minimaxApiType" json:"minimaxApiType"` // @Title zh-CN minimax group id // @Description zh-CN 仅适用于 minimax 服务。minimax API 类型为 pro 时必填 minimaxGroupId string `required:"false" yaml:"minimaxGroupId" json:"minimaxGroupId"` // @Title zh-CN 模型名称映射表 // @Description zh-CN 用于将请求中的模型名称映射为目标AI服务商支持的模型名称。支持通过“*”来配置全局映射 modelMapping map[string]string `required:"false" yaml:"modelMapping" json:"modelMapping"` // @Title zh-CN 对外接口协议 // @Description zh-CN 通过本插件对外提供的AI服务接口协议。默认值为“openai”,即OpenAI的接口协议。如需保留原有接口协议,可配置为“original" protocol string `required:"false" yaml:"protocol" json:"protocol"` // @Title zh-CN 模型对话上下文 // @Description zh-CN 配置一个外部获取对话上下文的文件来源,用于在AI请求中补充对话上下文 context *ContextConfig `required:"false" yaml:"context" json:"context"` // @Title zh-CN 版本 // @Description zh-CN 请求AI服务的版本,目前仅适用于 Gemini 和 Claude AI服务 apiVersion string `required:"false" yaml:"apiVersion" json:"apiVersion"` // @Title zh-CN Cloudflare Account ID // @Description zh-CN 仅适用于 Cloudflare Workers AI 服务。参考:https://developers.cloudflare.com/workers-ai/get-started/rest-api/#2-run-a-model-via-api cloudflareAccountId string `required:"false" yaml:"cloudflareAccountId" json:"cloudflareAccountId"` // @Title zh-CN Gemini AI内容过滤和安全级别设定 // @Description zh-CN 仅适用于 Gemini AI 服务。参考:https://ai.google.dev/gemini-api/docs/safety-settings geminiSafetySetting map[string]string `required:"false" yaml:"geminiSafetySetting" json:"geminiSafetySetting"` // @Title zh-CN Gemini Thinking Budget 配置 // @Description zh-CN 仅适用于 Gemini AI 服务,用于控制思考预算 geminiThinkingBudget int64 `required:"false" yaml:"geminiThinkingBudget" json:"geminiThinkingBudget"` // @Title zh-CN Vertex AI访问区域 // @Description zh-CN 仅适用于Vertex AI服务。如需查看支持的区域的完整列表,请参阅https://cloud.google.com/vertex-ai/generative-ai/docs/learn/locations?hl=zh-cn#available-regions vertexRegion string `required:"false" yaml:"vertexRegion" json:"vertexRegion"` // @Title zh-CN Vertex AI项目Id // @Description zh-CN 仅适用于Vertex AI服务。创建和管理项目请参阅https://cloud.google.com/resource-manager/docs/creating-managing-projects?hl=zh-cn#identifiers vertexProjectId string `required:"false" yaml:"vertexProjectId" json:"vertexProjectId"` // @Title zh-CN Vertex 认证秘钥 // @Description zh-CN 用于Google服务账号认证的完整JSON密钥文件内容,获取可参考https://cloud.google.com/iam/docs/keys-create-delete?hl=zh-cn#iam-service-account-keys-create-console vertexAuthKey string `required:"false" yaml:"vertexAuthKey" json:"vertexAuthKey"` // @Title zh-CN Vertex 认证服务名 // @Description zh-CN 用于Google服务账号认证的服务,DNS类型的服务名 vertexAuthServiceName string `required:"false" yaml:"vertexAuthServiceName" json:"vertexAuthServiceName"` // @Title zh-CN Vertex token刷新提前时间 // @Description zh-CN 用于Google服务账号认证,access token过期时间判定提前刷新,单位为秒,默认值为60秒 vertexTokenRefreshAhead int64 `required:"false" yaml:"vertexTokenRefreshAhead" json:"vertexTokenRefreshAhead"` // @Title zh-CN Vertex AI OpenAI兼容模式 // @Description zh-CN 启用后将使用Vertex AI的OpenAI兼容API,请求和响应均使用OpenAI格式,无需协议转换。与Express Mode(apiTokens)互斥。 vertexOpenAICompatible bool `required:"false" yaml:"vertexOpenAICompatible" json:"vertexOpenAICompatible"` // @Title zh-CN 翻译服务需指定的目标语种 // @Description zh-CN 翻译结果的语种,目前仅适用于DeepL服务。 targetLang string `required:"false" yaml:"targetLang" json:"targetLang"` // @Title zh-CN 指定服务返回的响应需满足的JSON Schema // @Description zh-CN 目前仅适用于OpenAI部分模型服务。参考:https://platform.openai.com/docs/guides/structured-outputs responseJsonSchema map[string]interface{} `required:"false" yaml:"responseJsonSchema" json:"responseJsonSchema"` // @Title zh-CN 自定义认证Header名称 // @Description zh-CN 用于从请求中提取认证token的自定义header名称。如不配置,则按默认优先级检查 x-api-key、x-authorization、anthropic-api-key 和 Authorization header。 authHeaderKey string `required:"false" yaml:"authHeaderKey" json:"authHeaderKey"` // @Title zh-CN 自定义大模型参数配置 // @Description zh-CN 用于填充或者覆盖大模型调用时的参数 customSettings []CustomSetting // @Title zh-CN dify私有化部署的url difyApiUrl string `required:"false" yaml:"difyApiUrl" json:"difyApiUrl"` // @Title zh-CN dify的应用类型,Chat/Completion/Agent/Workflow botType string `required:"false" yaml:"botType" json:"botType"` // @Title zh-CN dify中应用类型为workflow时需要设置输入变量,当botType为workflow时一起使用 inputVariable string `required:"false" yaml:"inputVariable" json:"inputVariable"` // @Title zh-CN dify中应用类型为workflow时需要设置输出变量,当botType为workflow时一起使用 outputVariable string `required:"false" yaml:"outputVariable" json:"outputVariable"` // @Title zh-CN 额外支持的ai能力 // @Description zh-CN 开放的ai能力和urlpath映射,例如: {"openai/v1/chatcompletions": "/v1/chat/completions"} capabilities map[string]string // @Title zh-CN 如果配置了basePath,可用于在请求path中移除该前缀,或添加至请求path中,默认为进行移除 basePath string `required:"false" yaml:"basePath" json:"basePath"` // @Title zh-CN basePathHandling用于指定basePath的处理方式,可选值:removePrefix、prepend basePathHandling basePathHandling `required:"false" yaml:"basePathHandling" json:"basePathHandling"` // @Title zh-CN generic Provider 对应的Host // @Description zh-CN 仅适用于generic provider,用于覆盖请求转发的目标Host genericHost string `required:"false" yaml:"genericHost" json:"genericHost"` // @Title zh-CN 上下文清理命令 // @Description zh-CN 配置清理命令文本列表,当请求的 messages 中存在完全匹配任意一个命令的 user 消息时,将该消息及之前所有非 system 消息清理掉,实现主动清理上下文的效果 contextCleanupCommands []string `required:"false" yaml:"contextCleanupCommands" json:"contextCleanupCommands"` // @Title zh-CN 首包超时 // @Description zh-CN 流式请求中收到上游服务第一个响应包的超时时间,单位为毫秒。默认值为 0,表示不开启首包超时 firstByteTimeout uint32 `required:"false" yaml:"firstByteTimeout" json:"firstByteTimeout"` // @Title zh-CN Triton Model Version // @Description 仅适用于 NVIDIA Triton Interference Server :path 中的 modelVersion 参考:"https://docs.nvidia.com/deeplearning/triton-inference-server/user-guide/docs/protocol/extension_generate.html" tritonModelVersion string `required:"false" yaml:"tritonModelVersion" json:"tritonModelVersion"` // @Title zh-CN Triton Server 部署的 Domain // @Description 仅适用于 NVIDIA Triton Interference Server :path 中的 modelVersion 参考:"https://docs.nvidia.com/deeplearning/triton-inference-server/user-guide/docs/protocol/extension_generate.html" tritonDomain string `required:"false" yaml:"tritonDomain" json:"tritonDomain"` // @Title zh-CN vLLM自定义后端URL // @Description zh-CN 仅适用于vLLM服务。vLLM服务的完整URL,包含协议、域名、端口等 vllmCustomUrl string `required:"false" yaml:"vllmCustomUrl" json:"vllmCustomUrl"` // @Title zh-CN vLLM主机地址 // @Description zh-CN 仅适用于vLLM服务,指定vLLM服务器的主机地址,例如:vllm-service.cluster.local vllmServerHost string `required:"false" yaml:"vllmServerHost" json:"vllmServerHost"` // @Title zh-CN 豆包服务域名 // @Description zh-CN 仅适用于豆包服务,默认转发域名为 ark.cn-beijing.volces.com doubaoDomain string `required:"false" yaml:"doubaoDomain" json:"doubaoDomain"` // @Title zh-CN Claude Code 模式 // @Description zh-CN 仅适用于Claude服务。启用后将伪装成Claude Code客户端发起请求,支持使用Claude Code的OAuth Token进行认证。 claudeCodeMode bool `required:"false" yaml:"claudeCodeMode" json:"claudeCodeMode"` // @Title zh-CN 智谱AI服务域名 // @Description zh-CN 仅适用于智谱AI服务。默认为 open.bigmodel.cn(中国),可配置为 api.z.ai(国际) zhipuDomain string `required:"false" yaml:"zhipuDomain" json:"zhipuDomain"` // @Title zh-CN 智谱AI Code Plan 模式 // @Description zh-CN 仅适用于智谱AI服务。启用后将使用 /api/coding/paas/v4/chat/completions 接口 zhipuCodePlanMode bool `required:"false" yaml:"zhipuCodePlanMode" json:"zhipuCodePlanMode"` // @Title zh-CN 合并连续同角色消息 // @Description zh-CN 开启后,若请求的 messages 中存在连续的同角色消息(如连续两条 user 消息),将其内容合并为一条,以满足要求严格轮流交替(user→assistant→user→...)的模型服务商的要求。 mergeConsecutiveMessages bool `required:"false" yaml:"mergeConsecutiveMessages" json:"mergeConsecutiveMessages"` // @Title zh-CN 通用 Provider 域名 // @Description zh-CN 通用的 Provider 服务域名配置,适用于所有 Provider。当配置此字段时,将优先使用此域名覆盖默认的硬编码域名。常用于代理服务器场景 providerDomain string `required:"false" yaml:"providerDomain" json:"providerDomain"` // @Title zh-CN 空内容时提升思考为正文 // @Description zh-CN 开启后,若模型响应只包含 reasoning_content/thinking 而没有正文内容,将 reasoning 内容提升为正文内容返回,避免客户端收到空回复。 promoteThinkingOnEmpty bool `required:"false" yaml:"promoteThinkingOnEmpty" json:"promoteThinkingOnEmpty"` // @Title zh-CN HiClaw 模式 // @Description zh-CN 开启后同时启用 mergeConsecutiveMessages 和 promoteThinkingOnEmpty,适用于 HiClaw 多 Agent 协作场景。 hiclawMode bool `required:"false" yaml:"hiclawMode" json:"hiclawMode"` // @Title zh-CN Provider 基础路径 // @Description zh-CN 当配置了此值时,各个 Provider 在改写请求路径时会将其添加到路径前面,例如配置"/api/ai"后,请求路径"/v1/chat/completions"会被改写为"/api/ai/v1/chat/completions" providerBasePath string `required:"false" yaml:"providerBasePath" json:"providerBasePath"` } func (c *ProviderConfig) GetId() string { return c.id } func (c *ProviderConfig) GetType() string { return c.typ } func (c *ProviderConfig) GetProtocol() string { return c.protocol } func (c *ProviderConfig) GetVllmCustomUrl() string { return c.vllmCustomUrl } func (c *ProviderConfig) GetVllmServerHost() string { return c.vllmServerHost } func (c *ProviderConfig) GetContextCleanupCommands() []string { return c.contextCleanupCommands } func (c *ProviderConfig) IsOpenAIProtocol() bool { return c.protocol == protocolOpenAI } func (c *ProviderConfig) FromJson(json gjson.Result) { c.id = json.Get("id").String() c.typ = json.Get("type").String() c.apiTokens = make([]string, 0) for _, token := range json.Get("apiTokens").Array() { c.apiTokens = append(c.apiTokens, token.String()) } c.timeout = uint32(json.Get("timeout").Uint()) if c.timeout == 0 { c.timeout = defaultTimeout } // first byte timeout c.firstByteTimeout = uint32(json.Get("firstByteTimeout").Uint()) c.openaiCustomUrl = json.Get("openaiCustomUrl").String() c.moonshotFileId = json.Get("moonshotFileId").String() c.azureServiceUrl = json.Get("azureServiceUrl").String() c.qwenFileIds = make([]string, 0) for _, fileId := range json.Get("qwenFileIds").Array() { c.qwenFileIds = append(c.qwenFileIds, fileId.String()) } c.qwenEnableSearch = json.Get("qwenEnableSearch").Bool() if compatible := json.Get("qwenEnableCompatible"); compatible.Exists() { c.qwenEnableCompatible = compatible.Bool() } else { // Default use official compatiable mode c.qwenEnableCompatible = true } c.qwenDomain = json.Get("qwenDomain").String() if c.qwenDomain != "" { // TODO: validate the domain, if not valid, set to default } c.ollamaServerHost = json.Get("ollamaServerHost").String() c.ollamaServerPort = uint32(json.Get("ollamaServerPort").Uint()) c.modelMapping = make(map[string]string) for k, v := range json.Get("modelMapping").Map() { c.modelMapping[k] = v.String() } c.protocol = json.Get("protocol").String() if c.protocol == "" { c.protocol = protocolOpenAI } contextJson := json.Get("context") if contextJson.Exists() { c.context = &ContextConfig{} c.context.FromJson(contextJson) } // 这里获取 claudeVersion 字段,与结构体中定义 yaml/json 的 tag 不一致 c.apiVersion = json.Get("claudeVersion").String() if c.apiVersion == "" { // 增加获取 version 字段,用于适配其他模型的配置,并保持与结构体中定义的 tag 一致 c.apiVersion = json.Get("apiVersion").String() } c.hunyuanAuthId = json.Get("hunyuanAuthId").String() c.hunyuanAuthKey = json.Get("hunyuanAuthKey").String() c.awsAccessKey = json.Get("awsAccessKey").String() c.awsSecretKey = json.Get("awsSecretKey").String() c.awsRegion = json.Get("awsRegion").String() if c.typ == providerTypeBedrock { c.bedrockAdditionalFields = make(map[string]interface{}) for k, v := range json.Get("bedrockAdditionalFields").Map() { c.bedrockAdditionalFields[k] = v.Value() } c.promptCacheRetention = json.Get("promptCacheRetention").String() if rawPositions := json.Get("bedrockPromptCachePointPositions"); rawPositions.Exists() { c.bedrockPromptCachePointPositions = make(map[string]bool) for k, v := range rawPositions.Map() { c.bedrockPromptCachePointPositions[k] = v.Bool() } } } c.minimaxApiType = json.Get("minimaxApiType").String() c.minimaxGroupId = json.Get("minimaxGroupId").String() c.cloudflareAccountId = json.Get("cloudflareAccountId").String() if c.typ == providerTypeGemini || c.typ == providerTypeVertex { c.geminiSafetySetting = make(map[string]string) for k, v := range json.Get("geminiSafetySetting").Map() { c.geminiSafetySetting[k] = v.String() } } c.geminiThinkingBudget = json.Get("geminiThinkingBudget").Int() c.vertexRegion = json.Get("vertexRegion").String() c.vertexProjectId = json.Get("vertexProjectId").String() c.vertexAuthKey = json.Get("vertexAuthKey").String() c.vertexAuthServiceName = json.Get("vertexAuthServiceName").String() c.vertexTokenRefreshAhead = json.Get("vertexTokenRefreshAhead").Int() if c.vertexTokenRefreshAhead == 0 { c.vertexTokenRefreshAhead = 60 } c.vertexOpenAICompatible = json.Get("vertexOpenAICompatible").Bool() c.targetLang = json.Get("targetLang").String() if schemaValue, ok := json.Get("responseJsonSchema").Value().(map[string]interface{}); ok { c.responseJsonSchema = schemaValue } else { c.responseJsonSchema = nil } c.customSettings = make([]CustomSetting, 0) customSettingsJson := json.Get("customSettings") if customSettingsJson.Exists() { protocol := protocolOpenAI if c.protocol == protocolOriginal { // use provider name to represent original protocol name protocol = c.typ } for _, settingJson := range customSettingsJson.Array() { setting := CustomSetting{} setting.FromJson(settingJson) // use protocol info to rewrite setting setting.AdjustWithProtocol(protocol) if setting.Validate() { c.customSettings = append(c.customSettings, setting) } } } c.reasoningContentMode = json.Get("reasoningContentMode").String() if c.reasoningContentMode == "" { c.reasoningContentMode = reasoningBehaviorPassThrough } else { c.reasoningContentMode = strings.ToLower(c.reasoningContentMode) switch c.reasoningContentMode { case reasoningBehaviorPassThrough, reasoningBehaviorIgnore, reasoningBehaviorConcat: // valid values, no action needed default: c.reasoningContentMode = reasoningBehaviorPassThrough } } failoverJson := json.Get("failover") c.failover = &failover{ enabled: false, } if failoverJson.Exists() { c.failover.FromJson(failoverJson) } retryOnFailureJson := json.Get("retryOnFailure") c.retryOnFailure = &retryOnFailure{ enabled: false, } if retryOnFailureJson.Exists() { c.retryOnFailure.FromJson(retryOnFailureJson) } c.difyApiUrl = json.Get("difyApiUrl").String() c.botType = json.Get("botType").String() c.inputVariable = json.Get("inputVariable").String() c.outputVariable = json.Get("outputVariable").String() // NVIDIA triton c.tritonModelVersion = json.Get("tritonModelVersion").String() c.tritonDomain = json.Get("tritonDomain").String() c.capabilities = make(map[string]string) for capability, pathJson := range json.Get("capabilities").Map() { // 过滤掉不受支持的能力 switch capability { case string(ApiNameChatCompletion), string(ApiNameEmbeddings), string(ApiNameImageGeneration), string(ApiNameImageVariation), string(ApiNameImageEdit), string(ApiNameAudioSpeech), string(ApiNameAudioTranscription), string(ApiNameAudioTranslation), string(ApiNameRealtime), string(ApiNameResponses), string(ApiNameCohereV1Rerank), string(ApiNameVideos), string(ApiNameRetrieveVideo), string(ApiNameRetrieveVideoContent), string(ApiNameVideoRemix): c.capabilities[capability] = pathJson.String() } } c.basePath = json.Get("basePath").String() c.basePathHandling = basePathHandling(json.Get("basePathHandling").String()) if c.basePath != "" && c.basePathHandling == "" { c.basePathHandling = basePathHandlingRemovePrefix } c.genericHost = json.Get("genericHost").String() c.vllmServerHost = json.Get("vllmServerHost").String() c.vllmCustomUrl = json.Get("vllmCustomUrl").String() c.doubaoDomain = json.Get("doubaoDomain").String() c.claudeCodeMode = json.Get("claudeCodeMode").Bool() c.zhipuDomain = json.Get("zhipuDomain").String() c.zhipuCodePlanMode = json.Get("zhipuCodePlanMode").Bool() c.contextCleanupCommands = make([]string, 0) for _, cmd := range json.Get("contextCleanupCommands").Array() { if cmd.String() != "" { c.contextCleanupCommands = append(c.contextCleanupCommands, cmd.String()) } } c.mergeConsecutiveMessages = json.Get("mergeConsecutiveMessages").Bool() c.providerDomain = json.Get("providerDomain").String() c.promoteThinkingOnEmpty = json.Get("promoteThinkingOnEmpty").Bool() c.hiclawMode = json.Get("hiclawMode").Bool() if c.hiclawMode { c.mergeConsecutiveMessages = true c.promoteThinkingOnEmpty = true } c.providerBasePath = json.Get("providerBasePath").String() } func (c *ProviderConfig) Validate() error { if c.protocol != protocolOpenAI && c.protocol != protocolOriginal { return errors.New("invalid protocol in config") } if c.context != nil { if err := c.context.Validate(); err != nil { return err } } if c.failover.enabled { if err := c.failover.Validate(); err != nil { return err } } if c.typ == "" { return errors.New("missing type in provider config") } initializer, has := providerInitializers[c.typ] if !has { return errors.New("unknown provider type: " + c.typ) } if err := initializer.ValidateConfig(c); err != nil { return err } return nil } func (c *ProviderConfig) GetOrSetTokenWithContext(ctx wrapper.HttpContext) string { ctxApiKey := ctx.GetContext(ctxKeyApiKey) if ctxApiKey == nil { token := c.selectApiToken(ctx) ctxApiKey = token ctx.SetContext(ctxKeyApiKey, ctxApiKey) } return ctxApiKey.(string) } // selectApiToken selects an API token based on the request context // For stateful APIs, it uses consumer affinity if available func (c *ProviderConfig) selectApiToken(ctx wrapper.HttpContext) string { // Get API name from context if available ctxApiName := ctx.GetContext(CtxKeyApiName) var apiName string if ctxApiName != nil { // ctxApiName is of type ApiName, need to convert to string apiName = string(ctxApiName.(ApiName)) } // For stateful APIs, try to use consumer affinity if isStatefulAPI(apiName) { consumer := c.getConsumerFromContext(ctx) if consumer != "" { return c.GetTokenWithConsumerAffinity(ctx, consumer) } } // Fall back to random selection return c.GetRandomToken() } // getConsumerFromContext retrieves the consumer identifier from the request context func (c *ProviderConfig) getConsumerFromContext(ctx wrapper.HttpContext) string { consumer, err := proxywasm.GetHttpRequestHeader("x-mse-consumer") if err == nil && consumer != "" { return consumer } return "" } func (c *ProviderConfig) GetRandomToken() string { apiTokens := c.apiTokens count := len(apiTokens) switch count { case 0: return "" case 1: return apiTokens[0] default: return apiTokens[rand.Intn(count)] } } // isStatefulAPI checks if the given API name is a stateful API that requires consumer affinity func isStatefulAPI(apiName string) bool { // These APIs maintain session state and should be routed to the same provider consistently statefulAPIs := map[string]bool{ string(ApiNameResponses): true, // Response API - uses previous_response_id string(ApiNameFiles): true, // Files API - maintains file state string(ApiNameRetrieveFile): true, // File retrieval - depends on file upload string(ApiNameRetrieveFileContent): true, // File content - depends on file upload string(ApiNameBatches): true, // Batch API - maintains batch state string(ApiNameRetrieveBatch): true, // Batch status - depends on batch creation string(ApiNameCancelBatch): true, // Batch operations - depends on batch state string(ApiNameFineTuningJobs): true, // Fine-tuning - maintains job state string(ApiNameRetrieveFineTuningJob): true, // Fine-tuning job status string(ApiNameFineTuningJobEvents): true, // Fine-tuning events string(ApiNameFineTuningJobCheckpoints): true, // Fine-tuning checkpoints string(ApiNameCancelFineTuningJob): true, // Cancel fine-tuning job string(ApiNameResumeFineTuningJob): true, // Resume fine-tuning job } return statefulAPIs[apiName] } // GetTokenWithConsumerAffinity selects an API token based on consumer affinity // If x-mse-consumer header is present and API is stateful, it will consistently select the same token func (c *ProviderConfig) GetTokenWithConsumerAffinity(ctx wrapper.HttpContext, consumer string) string { apiTokens := c.apiTokens count := len(apiTokens) switch count { case 0: return "" case 1: return apiTokens[0] default: // Use FNV-1a hash for consistent token selection h := fnv.New32a() h.Write([]byte(consumer)) hashValue := h.Sum32() index := int(hashValue) % count if index < 0 { index += count } return apiTokens[index] } } func (c *ProviderConfig) IsOriginal() bool { return c.protocol == protocolOriginal } func (c *ProviderConfig) IsGeneric() bool { return c.typ == providerTypeGeneric } func (c *ProviderConfig) GetPromoteThinkingOnEmpty() bool { return c.promoteThinkingOnEmpty } func (c *ProviderConfig) ReplaceByCustomSettings(body []byte) ([]byte, error) { return ReplaceByCustomSettings(body, c.customSettings) } func CreateProvider(pc ProviderConfig) (Provider, error) { initializer, has := providerInitializers[pc.typ] if !has { return nil, errors.New("unknown provider type: " + pc.typ) } return initializer.CreateProvider(pc) } // applyProviderBasePath prepends the ProviderBasePath to the given path if configured. func (c *ProviderConfig) applyProviderBasePath(path string) string { if c.providerBasePath != "" && !strings.HasPrefix(path, c.providerBasePath) { return c.providerBasePath + path } return path } func (c *ProviderConfig) parseRequestAndMapModel(ctx wrapper.HttpContext, request interface{}, body []byte) error { switch req := request.(type) { case *chatCompletionRequest: if err := decodeChatCompletionRequest(body, req); err != nil { return err } streaming := req.Stream if streaming { _ = proxywasm.ReplaceHttpRequestHeader("Accept", "text/event-stream") ctx.SetContext(ctxKeyIsStreaming, true) } else { ctx.SetContext(ctxKeyIsStreaming, false) } return c.setRequestModel(ctx, req) case *embeddingsRequest: if err := decodeEmbeddingsRequest(body, req); err != nil { return err } return c.setRequestModel(ctx, req) case *imageGenerationRequest: if err := decodeImageGenerationRequest(body, req); err != nil { return err } return c.setRequestModel(ctx, req) case *imageEditRequest: if err := decodeImageEditRequest(body, req); err != nil { return err } return c.setRequestModel(ctx, req) case *imageVariationRequest: if err := decodeImageVariationRequest(body, req); err != nil { return err } return c.setRequestModel(ctx, req) default: return errors.New("unsupported request type") } } func (c *ProviderConfig) setRequestModel(ctx wrapper.HttpContext, request interface{}) error { var model *string switch req := request.(type) { case *chatCompletionRequest: model = &req.Model case *embeddingsRequest: model = &req.Model case *imageGenerationRequest: model = &req.Model case *imageEditRequest: model = &req.Model case *imageVariationRequest: model = &req.Model default: return errors.New("unsupported request type") } return c.mapModel(ctx, model) } func (c *ProviderConfig) mapModel(ctx wrapper.HttpContext, model *string) error { if *model == "" { return errors.New("missing model in request") } ctx.SetContext(ctxKeyOriginalRequestModel, *model) mappedModel := getMappedModel(*model, c.modelMapping) if mappedModel == "" { return errors.New("model becomes empty after applying the configured mapping") } *model = mappedModel ctx.SetContext(ctxKeyFinalRequestModel, *model) return nil } func getMappedModel(model string, modelMapping map[string]string) string { mappedModel := doGetMappedModel(model, modelMapping) if len(mappedModel) != 0 { return mappedModel } return model } func doGetMappedModel(model string, modelMapping map[string]string) string { if len(modelMapping) == 0 { return "" } if v, ok := modelMapping[model]; ok { log.Debugf("model [%s] is mapped to [%s] explictly", model, v) return v } for k, v := range modelMapping { if k == wildcard { continue } if strings.HasSuffix(k, wildcard) { k = strings.TrimSuffix(k, wildcard) if strings.HasPrefix(model, k) { log.Debugf("model [%s] is mapped to [%s] via prefix [%s]", model, v, k) return v } } if strings.HasPrefix(k, "~") { k = strings.TrimPrefix(k, "~") re := regexp.MustCompile(k) if re.MatchString(model) { v = re.ReplaceAllString(model, v) log.Debugf("model [%s] is mapped to [%s] via regex [%s]", model, v, k) return v } } } if v, ok := modelMapping[wildcard]; ok { log.Debugf("model [%s] is mapped to [%s] via wildcard", model, v) return v } return "" } // isDeveloperRoleSupported checks if the provider supports the "developer" role. func isDeveloperRoleSupported(providerType string) bool { return developerRoleSupportedProviders[providerType] } // convertDeveloperRoleToSystem converts "developer" roles to "system" role in the request body. // This is used for providers that don't support the "developer" role. func convertDeveloperRoleToSystem(body []byte) ([]byte, error) { request := &chatCompletionRequest{} if err := json.Unmarshal(body, request); err != nil { return body, fmt.Errorf("unable to unmarshal request for developer role conversion: %v", err) } converted := false for i := range request.Messages { if request.Messages[i].Role == roleDeveloper { request.Messages[i].Role = roleSystem converted = true } } if converted { return json.Marshal(request) } return body, nil } func ExtractStreamingEvents(ctx wrapper.HttpContext, chunk []byte) []StreamEvent { body := chunk if bufferedStreamingBody, has := ctx.GetContext(ctxKeyStreamingBody).([]byte); has { body = append(bufferedStreamingBody, chunk...) } body = bytes.ReplaceAll(body, []byte("\r\n"), []byte("\n")) body = bytes.ReplaceAll(body, []byte("\r"), []byte("\n")) eventStartIndex, lineStartIndex, valueStartIndex := -1, -1, -1 defer func() { if eventStartIndex >= 0 && eventStartIndex < len(body) { // Just in case the received chunk is not a complete event. ctx.SetContext(ctxKeyStreamingBody, body[eventStartIndex:]) } else { ctx.SetContext(ctxKeyStreamingBody, nil) } }() // Sample Qwen event response: // // event:result // :HTTP_STATUS/200 // data:{"output":{"choices":[{"message":{"content":"你好!","role":"assistant"},"finish_reason":"null"}]},"usage":{"total_tokens":116,"input_tokens":114,"output_tokens":2},"request_id":"71689cfc-1f42-9949-86e8-9563b7f832b1"} // // event:error // :HTTP_STATUS/400 // data:{"code":"InvalidParameter","message":"Preprocessor error","request_id":"0cbe6006-faec-9854-bf8b-c906d75c3bd8"} // var events []StreamEvent currentKey := "" currentEvent := &StreamEvent{} i, length := 0, len(body) for i = 0; i < length; i++ { ch := body[i] if ch != '\n' { if lineStartIndex == -1 { if eventStartIndex == -1 { eventStartIndex = i } lineStartIndex = i valueStartIndex = -1 } if valueStartIndex == -1 { if ch == ':' { valueStartIndex = i + 1 currentKey = string(body[lineStartIndex:valueStartIndex]) } } else if valueStartIndex == i && ch == ' ' { // Skip leading spaces in data. valueStartIndex = i + 1 } continue } if lineStartIndex != -1 { value := string(body[valueStartIndex:i]) currentEvent.SetValue(currentKey, value) } else if eventStartIndex != -1 { currentEvent.RawEvent = string(body[eventStartIndex : i+1]) // Extra new line. The current event is complete. events = append(events, *currentEvent) // Reset event parsing state. eventStartIndex = -1 currentEvent = &StreamEvent{} } // Reset line parsing state. lineStartIndex = -1 valueStartIndex = -1 currentKey = "" } return events } func (c *ProviderConfig) isSupportedAPI(apiName ApiName) bool { _, exist := c.capabilities[string(apiName)] return exist } func (c *ProviderConfig) IsSupportedAPI(apiName ApiName) bool { return c.isSupportedAPI(apiName) } func (c *ProviderConfig) setDefaultCapabilities(capabilities map[string]string) { if c.capabilities == nil { c.capabilities = make(map[string]string) } for capability, path := range capabilities { c.capabilities[capability] = path } } func (c *ProviderConfig) handleRequestBody( provider Provider, contextCache *contextCache, ctx wrapper.HttpContext, apiName ApiName, body []byte, ) (types.Action, error) { // add the first byte timeout header to the request if c.firstByteTimeout != 0 && c.isStreamingAPI(apiName, body) { err := proxywasm.ReplaceHttpRequestHeader("x-envoy-upstream-rq-first-byte-timeout-ms", strconv.FormatUint(uint64(c.firstByteTimeout), 10)) if err != nil { log.Errorf("failed to set x-envoy-upstream-rq-first-byte-timeout-ms header: %v", err) } log.Debugf("[firstByteTimeout] %d", c.firstByteTimeout) } // use original protocol if c.IsOriginal() { return types.ActionContinue, nil } var err error // handle claude protocol input - auto-detect based on conversion marker // If main.go detected a Claude request that needs conversion, convert the body needClaudeConversion, _ := ctx.GetContext("needClaudeResponseConversion").(bool) if needClaudeConversion { // Extract thinking config from original Claude body before conversion, // so downstream providers (OpenRouter, ZhipuAI) can access it. thinkingType := gjson.GetBytes(body, "thinking.type").String() if thinkingType == "" { // Claude request had no thinking field at all - treat as disabled thinkingType = "disabled" } ctx.SetContext(ctxKeyClaudeThinkingType, thinkingType) // Only extract budget_tokens when thinking is explicitly enabled if thinkingType == "enabled" { if budgetTokens := gjson.GetBytes(body, "thinking.budget_tokens").Int(); budgetTokens > 0 { ctx.SetContext(ctxKeyClaudeBudgetTokens, int(budgetTokens)) } } // Convert Claude protocol to OpenAI protocol converter := &ClaudeToOpenAIConverter{} body, err = converter.ConvertClaudeRequestToOpenAI(body) if err != nil { return types.ActionContinue, fmt.Errorf("failed to convert claude request to openai: %v", err) } log.Debugf("[Auto Protocol] converted Claude request body to OpenAI format") } // handle context cleanup command for chat completion requests if apiName == ApiNameChatCompletion && len(c.contextCleanupCommands) > 0 { body, err = cleanupContextMessages(body, c.contextCleanupCommands) if err != nil { log.Warnf("[contextCleanup] failed to cleanup context messages: %v", err) // Continue processing even if cleanup fails err = nil } } // merge consecutive same-role messages for providers that require strict role alternation if apiName == ApiNameChatCompletion && c.mergeConsecutiveMessages { body, err = mergeConsecutiveMessages(body) if err != nil { log.Warnf("[mergeConsecutiveMessages] failed to merge messages: %v", err) err = nil } else { log.Debugf("[mergeConsecutiveMessages] merged consecutive messages for provider: %s", c.typ) } } // convert developer role to system role for providers that don't support it if apiName == ApiNameChatCompletion && !isDeveloperRoleSupported(c.typ) { body, err = convertDeveloperRoleToSystem(body) if err != nil { log.Warnf("[developerRole] failed to convert developer role to system: %v", err) // Continue processing even if conversion fails err = nil } else { log.Debugf("[developerRole] converted developer role to system for provider: %s", c.typ) } } // use openai protocol (either original openai or converted from claude) if handler, ok := provider.(TransformRequestBodyHandler); ok { body, err = handler.TransformRequestBody(ctx, apiName, body) } else if handler, ok := provider.(TransformRequestBodyHeadersHandler); ok { headers := util.GetRequestHeaders() body, err = handler.TransformRequestBodyHeaders(ctx, apiName, body, headers) // Apply providerBasePath if configured if c.providerBasePath != "" { headers.Set(":path", c.applyProviderBasePath(headers.Get(":path"))) } util.ReplaceRequestHeaders(headers) } else { body, err = c.defaultTransformRequestBody(ctx, apiName, body) } if err != nil { return types.ActionContinue, err } if apiName == ApiNameChatCompletion { if c.context == nil { return types.ActionContinue, replaceRequestBody(body) } err = contextCache.GetContextFromFile(ctx, provider, body) if err == nil { return types.ActionPause, nil } return types.ActionContinue, err } return types.ActionContinue, replaceRequestBody(body) } func (c *ProviderConfig) handleRequestHeaders(provider Provider, ctx wrapper.HttpContext, apiName ApiName) { headers := util.GetRequestHeaders() originPath := headers.Get(":path") // Record the path after removePrefix processing var removePrefixPath string if c.basePath != "" && c.basePathHandling == basePathHandlingRemovePrefix { removePrefixPath = strings.TrimPrefix(originPath, c.basePath) headers.Set(":path", removePrefixPath) } if handler, ok := provider.(TransformRequestHeadersHandler); ok { handler.TransformRequestHeaders(ctx, apiName, headers) } // When using original protocol with removePrefix, restore the basePath-processed path. // This ensures basePathHandling works correctly even when TransformRequestHeaders // overwrites the path (which most providers do). // // TODO: Most providers (OpenAI, vLLM, DeepSeek, Claude, etc.) unconditionally overwrite // the path in TransformRequestHeaders without checking IsOriginal(). Ideally, each provider // should check IsOriginal() before overwriting the path (like Qwen does). Once all providers // are updated to handle protocol correctly, this workaround can be removed. // Affected providers: OpenAI, vLLM, ZhipuAI, Moonshot, Longcat, DeepSeek, Azure, Yi, // TogetherAI, Stepfun, Ollama, Hunyuan, GitHub, Doubao, Cohere, Baichuan, AI360, Claude, // Groq, Grok, Spark, Fireworks, Cloudflare, Baidu, OpenRouter, DeepL (24+ providers) if c.IsOriginal() && removePrefixPath != "" { headers.Set(":path", removePrefixPath) } if c.basePath != "" && c.basePathHandling == basePathHandlingPrepend && !strings.HasPrefix(headers.Get(":path"), c.basePath) { headers.Set(":path", path.Join(c.basePath, headers.Get(":path"))) } // Apply providerBasePath if configured currentPath := headers.Get(":path") if c.providerBasePath != "" { headers.Set(":path", c.applyProviderBasePath(currentPath)) } // Apply providerDomain if configured (overrides any domain set by the provider) if c.providerDomain != "" { util.OverwriteRequestHostHeader(headers, c.providerDomain) } util.ReplaceRequestHeaders(headers) } // defaultTransformRequestBody 默认的请求体转换方法,只做模型映射,用slog替换模型名称,不用序列化和反序列化,提高性能 func (c *ProviderConfig) defaultTransformRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte) ([]byte, error) { if contentType, err := proxywasm.GetHttpRequestHeader(util.HeaderContentType); err == nil && isMultipartFormData(contentType) { return c.defaultTransformMultipartRequestBody(ctx, apiName, body, contentType) } switch apiName { case ApiNameChatCompletion, ApiNameVideos, ApiNameVideoRemix: stream := gjson.GetBytes(body, "stream").Bool() if stream { _ = proxywasm.ReplaceHttpRequestHeader("Accept", "text/event-stream") ctx.SetContext(ctxKeyIsStreaming, true) } else { ctx.SetContext(ctxKeyIsStreaming, false) } } model := gjson.GetBytes(body, "model").String() ctx.SetContext(ctxKeyOriginalRequestModel, model) mappedModel := getMappedModel(model, c.modelMapping) ctx.SetContext(ctxKeyFinalRequestModel, mappedModel) return sjson.SetBytes(body, "model", mappedModel) } func (c *ProviderConfig) defaultTransformMultipartRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, contentType string) ([]byte, error) { if apiName != ApiNameImageEdit && apiName != ApiNameImageVariation { return body, nil } model, err := extractMultipartModel(body, contentType) if err != nil { return nil, err } ctx.SetContext(ctxKeyOriginalRequestModel, model) mappedModel := getMappedModel(model, c.modelMapping) ctx.SetContext(ctxKeyFinalRequestModel, mappedModel) if mappedModel == model || (mappedModel == "" && model == "") { return body, nil } return rewriteMultipartFormModel(body, contentType, mappedModel) } func (c *ProviderConfig) DefaultTransformResponseHeaders(ctx wrapper.HttpContext, headers http.Header) { if c.protocol == protocolOriginal { ctx.DontReadResponseBody() } else { headers.Del("Content-Length") } } func (c *ProviderConfig) isStreamingAPI(apiName ApiName, body []byte) bool { stream := false switch apiName { case ApiNameCompletion, ApiNameChatCompletion, ApiNameImageGeneration, ApiNameImageEdit, ApiNameResponses, ApiNameQwenAsyncAIGC, ApiNameAnthropicMessages, ApiNameAnthropicComplete: stream = gjson.GetBytes(body, "stream").Bool() case ApiNameGeminiStreamGenerateContent: stream = true } return stream } func (c *ProviderConfig) needToProcessRequestBody(apiName ApiName) bool { switch apiName { case ApiNameChatCompletion, ApiNameVideos, ApiNameVideoRemix, ApiNameCompletion, ApiNameEmbeddings, ApiNameImageGeneration, ApiNameImageEdit, ApiNameImageVariation, ApiNameAudioSpeech, ApiNameFineTuningJobs, ApiNameResponses, ApiNameGeminiGenerateContent, ApiNameGeminiStreamGenerateContent, ApiNameAnthropicMessages: return true } return false }