feat: Add an AI-Proxy Wasm plugin (#921)

Co-authored-by: 澄潭 <zty98751@alibaba-inc.com>
This commit is contained in:
Kent Dong
2024-05-14 17:00:12 +08:00
committed by GitHub
parent 5c7736980c
commit 333f9b48f3
21 changed files with 2124 additions and 1 deletions

View File

@@ -5,6 +5,9 @@ FROM $BUILDER as builder
ARG GOPROXY
ENV GOPROXY=${GOPROXY}
ARG EXTRA_TAGS=""
ENV EXTRA_TAGS=${EXTRA_TAGS}
ARG PLUGIN_NAME=hello-world
WORKDIR /workspace
@@ -14,7 +17,7 @@ COPY . .
WORKDIR /workspace/extensions/$PLUGIN_NAME
RUN go mod tidy
RUN tinygo build -o /main.wasm -scheduler=none -gc=custom -tags='custommalloc nottinygc_finalizer' -target=wasi ./
RUN tinygo build -o /main.wasm -scheduler=none -gc=custom -tags="custommalloc nottinygc_finalizer $EXTRA_TAGS" -target=wasi ./
FROM scratch as output

View File

@@ -0,0 +1,19 @@
# File generated by hgctl. Modify as required.
*
!/.gitignore
!*.go
!go.sum
!go.mod
!LICENSE
!*.md
!*.yaml
!*.yml
!*/
/out
/test

View File

@@ -0,0 +1,340 @@
---
title: AI 代理
keywords: [ higress,ai,proxy,rag ]
description: AI 代理插件配置参考
---
## 功能说明
`AI 代理`插件实现了基于 OpenAI API 契约的 AI 代理功能。目前支持 OpenAI、Azure OpenAI、月之暗面Moonshot和通义千问等 AI
服务提供商。
## 配置字段
### 基本配置
| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
|------------|--------|------|-----|------------------|
| `provider` | object | 必填 | - | 配置目标 AI 服务提供商的信息 |
`provider`的配置字段说明如下:
| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
|----------------|-------------------------|------|-----|-----------------------------------------------------------------------|
| `type` | string | 必填 | - | AI 服务提供商名称。目前支持以下值openai, azure, moonshot, qwen |
| `apiTokens` | array of string | 必填 | - | 用于在访问 AI 服务时进行认证的令牌。如果配置了多个 token插件会在请求时随机进行选择。部分服务提供商只支持配置一个 token。 |
| `timeout` | number | 非必填 | - | 访问 AI 服务的超时时间。单位为毫秒。默认值为 120000即 2 分钟 |
| `modelMapping` | map of string to string | 非必填 | - | AI 模型映射表,用于将请求中的模型名称映射为服务提供商支持模型名称。<br/>可以使用 "*" 为键来配置通用兜底映射关系 |
| `context` | object | 非必填 | - | 配置 AI 对话上下文信息 |
`context`的配置字段说明如下:
| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
|---------------|--------|------|-----|----------------------------------|
| `fileUrl` | string | 必填 | - | 保存 AI 对话上下文的文件 URL。仅支持纯文本类型的文件内容 |
| `serviceName` | string | 必填 | - | URL 所对应的 Higress 后端服务完整名称 |
| `servicePort` | number | 必填 | - | URL 所对应的 Higress 后端服务访问端口 |
### 提供商特有配置
#### OpenAI
OpenAI 所对应的 `type``openai`。它并无特有的配置字段。
#### Azure OpenAI
Azure OpenAI 所对应的 `type``azure`。它特有的配置字段如下:
| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
|-------------------|--------|------|-----|----------------------------------------------|
| `azureServiceUrl` | string | 必填 | - | Azure OpenAI 服务的 URL须包含 `api-version` 查询参数。 |
**注意:** Azure OpenAI 只支持配置一个 API Token。
#### 月之暗面Moonshot
月之暗面所对应的 `type``moonshot`。它特有的配置字段如下:
| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
|------------------|--------|------|-----|-------------------------------------------------------------|
| `moonshotFileId` | string | 非必填 | - | 通过文件接口上传至月之暗面的文件 ID其内容将被用做 AI 对话的上下文。不可与 `context` 字段同时配置。 |
#### 通义千问Qwen
通义千问所对应的 `type``qwen`。它并无特有的配置字段。
## 用法示例
### 使用 OpenAI 协议代理 Azure OpenAI 服务
使用最基本的 Azure OpenAI 服务,不配置任何上下文。
**配置信息**
```yaml
provider:
type: azure
apiTokens:
- "YOUR_AZURE_OPENAI_API_TOKEN"
azureServiceUrl: "https://YOUR_RESOURCE_NAME.openai.azure.com/openai/deployments/YOUR_DEPLOYMENT_NAME/chat/completions?api-version=2024-02-15-preview",
```
**请求示例**
```json
{
"model": "gpt-3",
"messages": [
{
"role": "user",
"content": "你好,你是谁?"
}
],
"temperature": 0.3
}
```
**响应示例**
```json
{
"choices": [
{
"content_filter_results": {
"hate": {
"filtered": false,
"severity": "safe"
},
"self_harm": {
"filtered": false,
"severity": "safe"
},
"sexual": {
"filtered": false,
"severity": "safe"
},
"violence": {
"filtered": false,
"severity": "safe"
}
},
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"content": "你好我是一个AI助手可以回答你的问题和提供帮助。有什么我可以帮到你的吗",
"role": "assistant"
}
}
],
"created": 1714807624,
"id": "chatcmpl-abcdefg1234567890",
"model": "gpt-35-turbo-16k",
"object": "chat.completion",
"prompt_filter_results": [
{
"prompt_index": 0,
"content_filter_results": {
"hate": {
"filtered": false,
"severity": "safe"
},
"self_harm": {
"filtered": false,
"severity": "safe"
},
"sexual": {
"filtered": false,
"severity": "safe"
},
"violence": {
"filtered": false,
"severity": "safe"
}
}
}
],
"system_fingerprint": null,
"usage": {
"completion_tokens": 40,
"prompt_tokens": 15,
"total_tokens": 55
}
}
```
### 使用 OpenAI 协议代理通义千问服务
使用通义千问服务,并配置从 OpenAI 大模型到通义千问的模型映射关系。
**配置信息**
```yaml
provider:
type: qwen
apiTokens:
- "YOUR_QWEN_API_TOKEN"
modelMapping:
'gpt-3': "qwen-turbo"
'gpt-35-turbo': "qwen-plus"
'gpt-4-turbo': "qwen-max"
'*': "qwen-turbo"
```
**请求示例**
```json
{
"model": "gpt-3",
"messages": [
{
"role": "user",
"content": "你好,你是谁?"
}
],
"temperature": 0.3
}
```
**响应示例**
```json
{
"id": "c2518bd3-0f46-97d1-be34-bb5777cb3108",
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "我是通义千问由阿里云开发的AI助手。我可以回答各种问题、提供信息和与用户进行对话。有什么我可以帮助你的吗"
},
"finish_reason": "stop"
}
],
"created": 1715175072,
"model": "qwen-turbo",
"object": "chat.completion",
"usage": {
"prompt_tokens": 24,
"completion_tokens": 33,
"total_tokens": 57
}
}
```
### 使用通义千问配合纯文本上下文信息
使用通义千问服务,同时配置纯文本上下文信息。
**配置信息**
```yaml
provider:
type: qwen
apiTokens:
- "YOUR_QWEN_API_TOKEN"
modelMapping:
"*": "qwen-turbo"
context:
- fileUrl: "http://file.default.svc.cluster.local/ai/context.txt",
serviceName: "file.dns",
servicePort: 80
```
**请求示例**
```json
{
"model": "gpt-3",
"messages": [
{
"role": "user",
"content": "请概述文案内容"
}
],
"temperature": 0.3
}
```
**响应示例**
```json
{
"id": "cmpl-77861a17681f4987ab8270dbf8001936",
"object": "chat.completion",
"created": 9756990,
"model": "moonshot-v1-128k",
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "这份文案是一份关于..."
},
"finish_reason": "stop"
}
],
"usage": {
"prompt_tokens": 20181,
"completion_tokens": 439,
"total_tokens": 20620
}
}
```
### 使用月之暗面配合其原生的文件上下文
提前上传文件至月之暗面,以文件内容作为上下文使用其 AI 服务。
**配置信息**
```yaml
provider:
type: moonshot
apiTokens:
- "YOUR_MOONSHOT_API_TOKEN"
moonshotFileId: "YOUR_MOONSHOT_FILE_ID",
modelMapping:
'*': "moonshot-v1-32k"
```
**请求示例**
```json
{
"model": "gpt-4-turbo",
"messages": [
{
"role": "user",
"content": "请概述文案内容"
}
],
"temperature": 0.3
}
```
**响应示例**
```json
{
"id": "cmpl-e5ca873642ca4f5d8b178c1742f9a8e8",
"object": "chat.completion",
"created": 1872961,
"model": "moonshot-v1-128k",
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "文案内容是关于一个名为“xxxx”的支付平台..."
},
"finish_reason": "stop"
}
],
"usage": {
"prompt_tokens": 11,
"completion_tokens": 498,
"total_tokens": 509
}
}
```

View File

@@ -0,0 +1,52 @@
package config
import (
"github.com/tidwall/gjson"
"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy/provider"
)
// @Name ai-proxy
// @Category custom
// @Phase UNSPECIFIED_PHASE
// @Priority 0
// @Title zh-CN AI代理
// @Description zh-CN 通过AI助手提供智能对话服务
// @IconUrl https://img.alicdn.com/imgextra/i1/O1CN018iKKih1iVx287RltL_!!6000000004419-2-tps-42-42.png
// @Version 0.1.0
//
// @Contact.name CH3CHO
// @Contact.url https://github.com/CH3CHO
// @Contact.email ch3cho@qq.com
//
// @Example
// { "provider": { "type": "qwen", "apiToken": "YOUR_DASHSCOPE_API_TOKEN", "modelMapping": { "*": "qwen-turbo" } } }
// @End
type PluginConfig struct {
// @Title zh-CN AI服务提供商配置
// @Description zh-CN AI服务提供商配置包含API接口、模型和知识库文件等信息
providerConfig provider.ProviderConfig `required:"true" yaml:"provider"`
provider provider.Provider `yaml:"-"`
}
func (c *PluginConfig) FromJson(json gjson.Result) {
c.providerConfig.FromJson(json.Get("provider"))
}
func (c *PluginConfig) Validate() error {
if err := c.providerConfig.Validate(); err != nil {
return err
}
return nil
}
func (c *PluginConfig) Complete() error {
var err error
c.provider, err = provider.CreateProvider(c.providerConfig)
return err
}
func (c *PluginConfig) GetProvider() provider.Provider {
return c.provider
}

View File

@@ -0,0 +1,110 @@
# File generated by hgctl. Modify as required.
admin:
address:
socket_address:
protocol: TCP
address: 0.0.0.0
port_value: 9901
static_resources:
listeners:
- name: listener_0
address:
socket_address:
protocol: TCP
address: 0.0.0.0
port_value: 10000
filter_chains:
- filters:
- name: envoy.filters.network.http_connection_manager
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
scheme_header_transformation:
scheme_to_overwrite: https
stat_prefix: ingress_http
# Output envoy logs to stdout
access_log:
- name: envoy.access_loggers.stdout
typed_config:
"@type": type.googleapis.com/envoy.extensions.access_loggers.stream.v3.StdoutAccessLog
# Modify as required
route_config:
name: local_route
virtual_hosts:
- name: local_service
domains: [ "*" ]
routes:
- match:
prefix: "/"
route:
cluster: moonshot
timeout: 300s
http_filters:
- name: wasmtest
typed_config:
"@type": type.googleapis.com/udpa.type.v1.TypedStruct
type_url: type.googleapis.com/envoy.extensions.filters.http.wasm.v3.Wasm
value:
config:
name: wasmtest
vm_config:
runtime: envoy.wasm.runtime.v8
code:
local:
filename: /etc/envoy/plugin.wasm
configuration:
"@type": "type.googleapis.com/google.protobuf.StringValue"
value: |
{
"provider": {
"type": "moonshot",
"domain": "api.moonshot.cn",
"apiTokens": [
"****",
"****"
],
"timeout": 1200000,
"modelMapping": {
"gpt-3": "moonshot-v1-8k",
"gpt-35-turbo": "moonshot-v1-32k",
"gpt-4-turbo": "moonshot-v1-128k",
"*": "moonshot-v1-8k"
},
}
}
- name: envoy.filters.http.router
clusters:
- name: httpbin
connect_timeout: 30s
type: LOGICAL_DNS
# Comment out the following line to test on v6 networks
dns_lookup_family: V4_ONLY
lb_policy: ROUND_ROBIN
load_assignment:
cluster_name: httpbin
endpoints:
- lb_endpoints:
- endpoint:
address:
socket_address:
address: httpbin
port_value: 80
- name: moonshot
connect_timeout: 30s
type: LOGICAL_DNS
dns_lookup_family: V4_ONLY
lb_policy: ROUND_ROBIN
load_assignment:
cluster_name: moonshot
endpoints:
- lb_endpoints:
- endpoint:
address:
socket_address:
address: api.moonshot.cn
port_value: 443
transport_socket:
name: envoy.transport_sockets.tls
typed_config:
"@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
"sni": "api.moonshot.cn"

View File

@@ -0,0 +1,26 @@
// File generated by hgctl. Modify as required.
module github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy
go 1.19
replace github.com/alibaba/higress/plugins/wasm-go => ../..
require (
github.com/alibaba/higress/plugins/wasm-go v0.0.0
github.com/higress-group/proxy-wasm-go-sdk v0.0.0-20240318034951-d5306e367c43
github.com/stretchr/testify v1.8.4
github.com/tidwall/gjson v1.14.3
)
require (
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/google/uuid v1.3.0 // indirect
github.com/higress-group/nottinygc v0.0.0-20231101025119-e93c4c2f8520 // indirect
github.com/magefile/mage v1.14.0 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/tidwall/match v1.1.1 // indirect
github.com/tidwall/pretty v1.2.0 // indirect
github.com/tidwall/resp v0.1.1 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)

View File

@@ -0,0 +1,26 @@
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/higress-group/nottinygc v0.0.0-20231101025119-e93c4c2f8520 h1:IHDghbGQ2DTIXHBHxWfqCYQW1fKjyJ/I7W1pMyUDeEA=
github.com/higress-group/nottinygc v0.0.0-20231101025119-e93c4c2f8520/go.mod h1:Nz8ORLaFiLWotg6GeKlJMhv8cci8mM43uEnLA5t8iew=
github.com/higress-group/proxy-wasm-go-sdk v0.0.0-20240318034951-d5306e367c43 h1:dCw7F/9ciw4NZN7w68wQRaygZ2zGOWMTIEoRvP1tlWs=
github.com/higress-group/proxy-wasm-go-sdk v0.0.0-20240318034951-d5306e367c43/go.mod h1:hNFjhrLUIq+kJ9bOcs8QtiplSQ61GZXtd2xHKx4BYRo=
github.com/magefile/mage v1.14.0 h1:6QDX3g6z1YvJ4olPhT1wksUcSa/V0a1B+pJb73fBjyo=
github.com/magefile/mage v1.14.0/go.mod h1:z5UZb/iS3GoOSn0JgWuiw7dxlurVYTu+/jHXqQg881A=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
github.com/tidwall/gjson v1.14.3 h1:9jvXn7olKEHU1S9vwoMGliaT8jq1vJ7IH/n9zD9Dnlw=
github.com/tidwall/gjson v1.14.3/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA=
github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM=
github.com/tidwall/pretty v1.2.0 h1:RWIZEg2iJ8/g6fDDYzMpobmaoGh5OLl4AXtGUGPcqCs=
github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
github.com/tidwall/resp v0.1.1 h1:Ly20wkhqKTmDUPlyM1S7pWo5kk0tDu8OoC/vFArXmwE=
github.com/tidwall/resp v0.1.1/go.mod h1:3/FrruOBAxPTPtundW0VXgmsQ4ZBA0Aw714lVYgwFa0=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

View File

@@ -0,0 +1,199 @@
// File generated by hgctl. Modify as required.
// See: https://higress.io/zh-cn/docs/user/wasm-go#2-%E7%BC%96%E5%86%99-maingo-%E6%96%87%E4%BB%B6
package main
import (
"fmt"
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
"net/url"
"strings"
"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy/config"
"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy/provider"
"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy/util"
"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
"github.com/tidwall/gjson"
)
const (
pluginName = "ai-proxy"
ctxKeyApiName = "apiKey"
)
func main() {
wrapper.SetCtx(
pluginName,
wrapper.ParseConfigBy(parseConfig),
wrapper.ProcessRequestHeadersBy(onHttpRequestHeader),
wrapper.ProcessRequestBodyBy(onHttpRequestBody),
wrapper.ProcessResponseHeadersBy(onHttpResponseHeaders),
wrapper.ProcessStreamingResponseBodyBy(onStreamingResponseBody),
wrapper.ProcessResponseBodyBy(onHttpResponseBody),
)
}
func parseConfig(json gjson.Result, pluginConfig *config.PluginConfig, log wrapper.Log) error {
//log.Debugf("loading config: %s", json.String())
pluginConfig.FromJson(json)
if err := pluginConfig.Validate(); err != nil {
return err
}
if err := pluginConfig.Complete(); err != nil {
return err
}
return nil
}
func onHttpRequestHeader(ctx wrapper.HttpContext, pluginConfig config.PluginConfig, log wrapper.Log) types.Action {
activeProvider := pluginConfig.GetProvider()
if activeProvider == nil {
log.Debugf("[onHttpRequestHeader] no active provider, skip processing")
ctx.DontReadRequestBody()
return types.ActionContinue
}
rawPath := ctx.Path()
path, _ := url.Parse(rawPath)
apiName := getApiName(path.Path)
if apiName == "" {
log.Debugf("[onHttpRequestHeader] unsupported path: %s", path.Path)
_ = util.SendResponse(404, util.MimeTypeTextPlain, "API not found: "+path.Path)
return types.ActionContinue
}
ctx.SetContext(ctxKeyApiName, apiName)
if handler, ok := activeProvider.(provider.RequestHeadersHandler); ok {
action, err := handler.OnRequestHeaders(ctx, apiName, log)
if err == nil {
return action
}
_ = util.SendResponse(404, util.MimeTypeTextPlain, fmt.Sprintf("failed to process request headers: %v", err))
return types.ActionContinue
}
if _, needHandleBody := activeProvider.(provider.RequestBodyHandler); needHandleBody {
ctx.DontReadRequestBody()
}
return types.ActionContinue
}
func onHttpRequestBody(ctx wrapper.HttpContext, pluginConfig config.PluginConfig, body []byte, log wrapper.Log) types.Action {
activeProvider := pluginConfig.GetProvider()
if activeProvider == nil {
log.Debugf("[onHttpRequestBody] no active provider, skip processing")
return types.ActionContinue
}
log.Debugf("[onHttpRequestBody] provider=%s", activeProvider.GetProviderType())
if handler, ok := activeProvider.(provider.RequestBodyHandler); ok {
apiName := ctx.GetContext(ctxKeyApiName).(provider.ApiName)
action, err := handler.OnRequestBody(ctx, apiName, body, log)
if err == nil {
return action
}
_ = util.SendResponse(404, util.MimeTypeTextPlain, fmt.Sprintf("failed to process request body: %v", err))
return types.ActionContinue
}
return types.ActionContinue
}
func onHttpResponseHeaders(ctx wrapper.HttpContext, pluginConfig config.PluginConfig, log wrapper.Log) types.Action {
activeProvider := pluginConfig.GetProvider()
if activeProvider == nil {
log.Debugf("[onHttpResponseHeaders] no active provider, skip processing")
ctx.DontReadResponseBody()
return types.ActionContinue
}
log.Debugf("[onHttpResponseHeaders] provider=%s", activeProvider.GetProviderType())
status, err := proxywasm.GetHttpResponseHeader(":status")
if err != nil || status != "200" {
if err != nil {
log.Errorf("unable to load :status header from response: %v", err)
}
ctx.DontReadResponseBody()
return types.ActionContinue
}
if handler, ok := activeProvider.(provider.ResponseHeadersHandler); ok {
apiName := ctx.GetContext(ctxKeyApiName).(provider.ApiName)
action, err := handler.OnResponseHeaders(ctx, apiName, log)
if err == nil {
return action
}
_ = util.SendResponse(404, util.MimeTypeTextPlain, fmt.Sprintf("failed to process response headers: %v", err))
return types.ActionContinue
}
_, needHandleBody := activeProvider.(provider.ResponseBodyHandler)
_, needHandleStreamingBody := activeProvider.(provider.StreamingResponseBodyHandler)
if !needHandleBody && !needHandleStreamingBody {
ctx.DontReadResponseBody()
} else if !needHandleStreamingBody {
ctx.BufferResponseBody()
}
return types.ActionContinue
}
func onStreamingResponseBody(ctx wrapper.HttpContext, pluginConfig config.PluginConfig, chunk []byte, isLastChunk bool, log wrapper.Log) []byte {
activeProvider := pluginConfig.GetProvider()
if activeProvider == nil {
log.Debugf("[onStreamingResponseBody] no active provider, skip processing")
return chunk
}
log.Debugf("[onStreamingResponseBody] provider=%s", activeProvider.GetProviderType())
log.Debugf("isLastChunk=%v chunk: %s", isLastChunk, string(chunk))
if handler, ok := activeProvider.(provider.StreamingResponseBodyHandler); ok {
apiName := ctx.GetContext(ctxKeyApiName).(provider.ApiName)
modifiedChunk, err := handler.OnStreamingResponseBody(ctx, apiName, chunk, isLastChunk, log)
if err == nil && modifiedChunk != nil {
return modifiedChunk
}
return chunk
}
return chunk
}
func onHttpResponseBody(ctx wrapper.HttpContext, pluginConfig config.PluginConfig, body []byte, log wrapper.Log) types.Action {
activeProvider := pluginConfig.GetProvider()
if activeProvider == nil {
log.Debugf("[onHttpResponseBody] no active provider, skip processing")
return types.ActionContinue
}
log.Debugf("[onHttpResponseBody] provider=%s", activeProvider.GetProviderType())
//log.Debugf("response body: %s", string(body))
if handler, ok := activeProvider.(provider.ResponseBodyHandler); ok {
apiName := ctx.GetContext(ctxKeyApiName).(provider.ApiName)
action, err := handler.OnResponseBody(ctx, apiName, body, log)
if err == nil {
return action
}
_ = util.SendResponse(404, util.MimeTypeTextPlain, fmt.Sprintf("failed to process response body: %v", err))
return types.ActionContinue
}
return types.ActionContinue
}
func getApiName(path string) provider.ApiName {
if strings.HasSuffix(path, "/v1/chat/completions") {
return provider.ApiNameChatCompletion
}
return ""
}

View File

@@ -0,0 +1,52 @@
# File generated by hgctl. Modify as required.
version: 1.0.0
build:
# The official builder image version
builder:
go: 1.19
tinygo: 0.28.1
oras: 1.0.0
# The WASM plugin project directory
input: ./
# The output of the build products
output:
# Choose between 'files' and 'image'
type: files
# Destination address: when type=files, specify the local directory path, e.g., './out' or
# type=image, specify the remote docker repository, e.g., 'docker.io/<your_username>/<your_image>'
dest: ./out
# The authentication configuration for pushing image to the docker repository
docker-auth: ~/.docker/config.json
# The directory for the WASM plugin configuration structure
model-dir: ./
# The WASM plugin configuration structure name
model: config.PluginConfig
# Enable debug mode
debug: false
test:
# Test environment name, that is a docker compose project name
name: wasm-test
# The output path to build products, that is the source of test configuration parameters
from-path: ./out
# The test configuration source
test-path: ./test
# Docker compose configuration, which is empty, looks for the following files from 'test-path':
# compose.yaml, compose.yml, docker-compose.yml, docker-compose.yaml
compose-file:
# Detached mode: Run containers in the background
detach: false
install:
# The namespace of the installation
namespace: higress-system
# Use to validate WASM plugin configuration when install by yaml
spec-yaml: ./out/spec.yaml
# Installation source. Choose between 'from-yaml' and 'from-go-project'
from-yaml: ./test/plugin-conf.yaml
# If 'from-go-src' is non-empty, the output type of the build option must be 'image'
from-go-src:
# Enable debug mode
debug: false

View File

@@ -0,0 +1,99 @@
package provider
import (
"errors"
"fmt"
"net/url"
"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy/util"
"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
)
// azureProvider is the provider for Azure OpenAI service.
type azureProviderInitializer struct {
}
func (m *azureProviderInitializer) ValidateConfig(config ProviderConfig) error {
if config.azureServiceUrl == "" {
return errors.New("missing azureServiceUrl in provider config")
}
if _, err := url.Parse(config.azureServiceUrl); err != nil {
return fmt.Errorf("invalid azureServiceUrl: %w", err)
}
return nil
}
func (m *azureProviderInitializer) CreateProvider(config ProviderConfig) (Provider, error) {
var serviceUrl *url.URL
if u, err := url.Parse(config.azureServiceUrl); err != nil {
return nil, fmt.Errorf("invalid azureServiceUrl: %w", err)
} else {
serviceUrl = u
}
return &azureProvider{
config: config,
serviceUrl: serviceUrl,
contextCache: createContextCache(&config),
}, nil
}
type azureProvider struct {
config ProviderConfig
contextCache *contextCache
serviceUrl *url.URL
}
func (m *azureProvider) GetProviderType() string {
return providerTypeAzure
}
func (m *azureProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) (types.Action, error) {
if apiName != ApiNameChatCompletion {
return types.ActionContinue, errUnsupportedApiName
}
_ = util.OverwriteRequestPath(m.serviceUrl.RequestURI())
_ = util.OverwriteRequestHost(m.serviceUrl.Host)
_ = proxywasm.ReplaceHttpRequestHeader("api-key", m.config.apiTokens[0])
if m.contextCache == nil {
ctx.DontReadRequestBody()
} else {
_ = proxywasm.RemoveHttpRequestHeader("Content-Length")
}
return types.ActionContinue, nil
}
func (m *azureProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
if apiName != ApiNameChatCompletion {
return types.ActionContinue, errUnsupportedApiName
}
if m.contextCache == nil {
return types.ActionContinue, nil
}
request := &chatCompletionRequest{}
if err := decodeChatCompletionRequest(body, request); err != nil {
return types.ActionContinue, err
}
err := m.contextCache.GetContent(func(content string, err error) {
defer func() {
_ = proxywasm.ResumeHttpRequest()
}()
if err != nil {
log.Errorf("failed to load context file: %v", err)
_ = util.SendResponse(500, util.MimeTypeTextPlain, fmt.Sprintf("failed to load context file: %v", err))
}
insertContextMessage(request, content)
if err := replaceJsonRequestBody(request, log); err != nil {
_ = util.SendResponse(500, util.MimeTypeTextPlain, fmt.Sprintf("failed to replace request body: %v", err))
}
}, log)
if err == nil {
return types.ActionPause, nil
}
return types.ActionContinue, err
}

View File

@@ -0,0 +1,17 @@
package provider
import "fmt"
type plainCluster struct {
serviceName string
servicePort int64
hostName string
}
func (c plainCluster) ClusterName() string {
return fmt.Sprintf("outbound|%d||%s", c.servicePort, c.serviceName)
}
func (c plainCluster) HostName() string {
return c.hostName
}

View File

@@ -0,0 +1,100 @@
package provider
import (
"errors"
"fmt"
"net/http"
"net/url"
"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
"github.com/tidwall/gjson"
)
type ContextConfig struct {
// @Title zh-CN 文件URL
// @Description zh-CN 用于获取对话上下文的文件的URL。目前仅支持HTTP和HTTPS协议纯文本格式文件
fileUrl string `required:"true" yaml:"url" json:"url"`
// @Title zh-CN 上游服务名称
// @Description zh-CN 文件服务所对应的网关内上游服务名称
serviceName string `required:"true" yaml:"serviceName" json:"serviceName"`
// @Title zh-CN 上游服务端口
// @Description zh-CN 文件服务所对应的网关内上游服务名称
servicePort int64 `required:"true" yaml:"serviceName" json:"serviceName"`
fileUrlObj *url.URL `yaml:"-"`
}
func (c *ContextConfig) FromJson(json gjson.Result) {
c.fileUrl = json.Get("fileUrl").String()
c.serviceName = json.Get("serviceName").String()
c.servicePort = json.Get("servicePort").Int()
}
func (c *ContextConfig) Validate() error {
if c.fileUrl == "" {
return errors.New("missing fileUrl in context config")
}
if fileUrlObj, err := url.Parse(c.fileUrl); err != nil {
return fmt.Errorf("invalid fileUrl in context config: %v", err)
} else {
c.fileUrlObj = fileUrlObj
}
if c.serviceName == "" {
return errors.New("missing serviceName in context config")
}
if c.servicePort == 0 {
return errors.New("missing servicePort in context config")
}
return nil
}
type contextCache struct {
client wrapper.HttpClient
fileUrl *url.URL
timeout uint32
loaded bool
content string
}
func (c *contextCache) GetContent(callback func(string, error), log wrapper.Log) error {
if callback == nil {
return errors.New("callback is nil")
}
if c.loaded {
log.Debugf("context file loaded from cache")
callback(c.content, nil)
return nil
}
log.Infof("loading context file from %s", c.fileUrl.String())
return c.client.Get(c.fileUrl.Path, nil, func(statusCode int, responseHeaders http.Header, responseBody []byte) {
if statusCode != http.StatusOK {
callback("", fmt.Errorf("failed to load context file, status: %d", statusCode))
return
}
c.content = string(responseBody)
c.loaded = true
log.Debugf("content: %s", c.content)
callback(c.content, nil)
}, c.timeout)
}
func createContextCache(providerConfig *ProviderConfig) *contextCache {
contextConfig := providerConfig.context
if contextConfig == nil {
return nil
}
fileUrlObj, _ := url.Parse(contextConfig.fileUrl)
cluster := plainCluster{
serviceName: contextConfig.serviceName,
servicePort: contextConfig.servicePort,
hostName: fileUrlObj.Host,
}
return &contextCache{
client: wrapper.NewClusterClient(cluster),
fileUrl: fileUrlObj,
timeout: providerConfig.timeout,
}
}

View File

@@ -0,0 +1,44 @@
package provider
type chatCompletionRequest struct {
Model string `json:"model"`
Messages []chatMessage `json:"messages"`
MaxTokens int `json:"max_tokens,omitempty"`
FrequencyPenalty float64 `json:"frequency_penalty,omitempty"`
N int `json:"n,omitempty"`
PresencePenalty float64 `json:"presence_penalty,omitempty"`
Seed int `json:"seed,omitempty"`
Stream bool `json:"stream,omitempty"`
Temperature float64 `json:"temperature,omitempty"`
TopP float64 `json:"top_p,omitempty"`
User string `json:"user,omitempty"`
}
type chatCompletionResponse struct {
Id string `json:"id,omitempty"`
Choices []chatCompletionChoice `json:"choices,omitempty"`
Created int64 `json:"created,omitempty"`
Model string `json:"model,omitempty"`
SystemFingerprint string `json:"system_fingerprint,omitempty"`
Object string `json:"object,omitempty"`
Usage chatCompletionUsage `json:"usage,omitempty"`
}
type chatCompletionChoice struct {
Index int `json:"index"`
Message *chatMessage `json:"message,omitempty"`
Delta *chatMessage `json:"delta,omitempty"`
FinishReason string `json:"finish_reason,omitempty"`
}
type chatCompletionUsage struct {
PromptTokens int `json:"prompt_tokens,omitempty"`
CompletionTokens int `json:"completion_tokens,omitempty"`
TotalTokens int `json:"total_tokens,omitempty"`
}
type chatMessage struct {
Name string `json:"name,omitempty"`
Role string `json:"role,omitempty"`
Content string `json:"content,omitempty"`
}

View File

@@ -0,0 +1,152 @@
package provider
import (
"errors"
"fmt"
"net/http"
"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy/util"
"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
"github.com/tidwall/gjson"
)
// moonshotProvider is the provider for Moonshot AI service.
const (
moonshotDomain = "api.moonshot.cn"
moonshotChatCompletionPath = "/v1/chat/completions"
)
type moonshotProviderInitializer struct {
}
func (m *moonshotProviderInitializer) ValidateConfig(config ProviderConfig) error {
if config.moonshotFileId != "" && config.context != nil {
return errors.New("moonshotFileId and context cannot be configured at the same time")
}
return nil
}
func (m *moonshotProviderInitializer) CreateProvider(config ProviderConfig) (Provider, error) {
return &moonshotProvider{
config: config,
client: wrapper.NewClusterClient(wrapper.RouteCluster{
Host: moonshotDomain,
}),
contextCache: createContextCache(&config),
}, nil
}
type moonshotProvider struct {
config ProviderConfig
client wrapper.HttpClient
fileContent string
contextCache *contextCache
}
func (m *moonshotProvider) GetProviderType() string {
return providerTypeMoonshot
}
func (m *moonshotProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) (types.Action, error) {
if apiName != ApiNameChatCompletion {
return types.ActionContinue, errUnsupportedApiName
}
_ = util.OverwriteRequestPath(moonshotChatCompletionPath)
_ = util.OverwriteRequestHost(moonshotDomain)
_ = proxywasm.ReplaceHttpRequestHeader("Authorization", "Bearer "+m.config.GetRandomToken())
_ = proxywasm.RemoveHttpRequestHeader("Content-Length")
return types.ActionContinue, nil
}
func (m *moonshotProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
if apiName != ApiNameChatCompletion {
return types.ActionContinue, errUnsupportedApiName
}
request := &chatCompletionRequest{}
if err := decodeChatCompletionRequest(body, request); err != nil {
return types.ActionContinue, err
}
model := request.Model
if model == "" {
return types.ActionContinue, errors.New("missing model in chat completion request")
}
mappedModel := getMappedModel(model, m.config.modelMapping, log)
if mappedModel == "" {
return types.ActionContinue, errors.New("model becomes empty after applying the configured mapping")
}
request.Model = mappedModel
if m.config.moonshotFileId == "" && m.contextCache == nil {
return types.ActionContinue, replaceJsonRequestBody(request, log)
}
err := m.getContextContent(func(content string, err error) {
defer func() {
_ = proxywasm.ResumeHttpRequest()
}()
if err != nil {
log.Errorf("failed to load context file: %v", err)
_ = util.SendResponse(500, util.MimeTypeTextPlain, fmt.Sprintf("failed to load context file: %v", err))
return
}
err = m.performChatCompletion(ctx, content, request, log)
if err != nil {
_ = util.SendResponse(500, util.MimeTypeTextPlain, fmt.Sprintf("failed to perform chat completion: %v", err))
}
}, log)
if err == nil {
return types.ActionPause, nil
}
return types.ActionContinue, err
}
func (m *moonshotProvider) performChatCompletion(ctx wrapper.HttpContext, fileContent string, request *chatCompletionRequest, log wrapper.Log) error {
insertContextMessage(request, fileContent)
return replaceJsonRequestBody(request, log)
}
func (m *moonshotProvider) getContextContent(callback func(string, error), log wrapper.Log) error {
if m.config.moonshotFileId != "" {
if m.fileContent != "" {
callback(m.fileContent, nil)
return nil
}
return m.sendRequest(http.MethodGet, "/v1/files/"+m.config.moonshotFileId+"/content", "",
func(statusCode int, responseHeaders http.Header, responseBody []byte) {
responseString := string(responseBody)
if statusCode != http.StatusOK {
log.Errorf("failed to load knowledge base file from AI service, status: %d body: %s", statusCode, responseString)
callback("", fmt.Errorf("failed to load knowledge base file from moonshot service, status: %d", statusCode))
return
}
responseJson := gjson.Parse(responseString)
m.fileContent = responseJson.Get("content").String()
callback(m.fileContent, nil)
})
}
if m.contextCache != nil {
return m.contextCache.GetContent(callback, log)
}
return errors.New("both moonshotFileId and context are not configured")
}
func (m *moonshotProvider) sendRequest(method, path string, body string, callback wrapper.ResponseCallback) error {
switch method {
case http.MethodGet:
headers := util.CreateHeaders("Authorization", "Bearer "+m.config.GetRandomToken())
return m.client.Get(path, headers, callback, m.config.timeout)
case http.MethodPost:
headers := util.CreateHeaders("Authorization", "Bearer "+m.config.GetRandomToken(), "Content-Type", "application/json")
return m.client.Post(path, headers, []byte(body), callback, m.config.timeout)
default:
return errors.New("unsupported method: " + method)
}
}

View File

@@ -0,0 +1,87 @@
package provider
import (
"fmt"
"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy/util"
"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
)
// azureProvider is the provider for Azure OpenAI service.
const (
openaiDomain = "api.openai.com"
openaiChatCompletionPath = "/v1/chat/completions"
)
type openaiProviderInitializer struct {
}
func (m *openaiProviderInitializer) ValidateConfig(config ProviderConfig) error {
return nil
}
func (m *openaiProviderInitializer) CreateProvider(config ProviderConfig) (Provider, error) {
return &openaiProvider{
config: config,
contextCache: createContextCache(&config),
}, nil
}
type openaiProvider struct {
config ProviderConfig
contextCache *contextCache
}
func (m *openaiProvider) GetProviderType() string {
return providerTypeOpenAI
}
func (m *openaiProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) (types.Action, error) {
if apiName != ApiNameChatCompletion {
return types.ActionContinue, errUnsupportedApiName
}
_ = util.OverwriteRequestPath(openaiChatCompletionPath)
_ = util.OverwriteRequestHost(openaiDomain)
_ = proxywasm.ReplaceHttpRequestHeader("Authorization", "Bearer "+m.config.GetRandomToken())
if m.contextCache == nil {
ctx.DontReadRequestBody()
} else {
_ = proxywasm.RemoveHttpRequestHeader("Content-Length")
}
return types.ActionContinue, nil
}
func (m *openaiProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
if apiName != ApiNameChatCompletion {
return types.ActionContinue, errUnsupportedApiName
}
if m.contextCache == nil {
return types.ActionContinue, nil
}
request := &chatCompletionRequest{}
if err := decodeChatCompletionRequest(body, request); err != nil {
return types.ActionContinue, err
}
err := m.contextCache.GetContent(func(content string, err error) {
defer func() {
_ = proxywasm.ResumeHttpRequest()
}()
if err != nil {
log.Errorf("failed to load context file: %v", err)
_ = util.SendResponse(500, util.MimeTypeTextPlain, fmt.Sprintf("failed to load context file: %v", err))
}
insertContextMessage(request, content)
if err := replaceJsonRequestBody(request, log); err != nil {
_ = util.SendResponse(500, util.MimeTypeTextPlain, fmt.Sprintf("failed to replace request body: %v", err))
}
}, log)
if err == nil {
return types.ActionPause, nil
}
return types.ActionContinue, err
}

View File

@@ -0,0 +1,199 @@
package provider
import (
"errors"
"math/rand"
"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
"github.com/tidwall/gjson"
)
type ApiName string
type Pointcut string
const (
ApiNameChatCompletion ApiName = "chatCompletion"
providerTypeMoonshot = "moonshot"
providerTypeAzure = "azure"
providerTypeQwen = "qwen"
providerTypeOpenAI = "openai"
protocolOpenAI = "openai"
protocolOriginal = "original"
roleSystem = "system"
ctxKeyStreamingBody = "streamingBody"
ctxKeyOriginalRequestModel = "originalRequestModel"
ctxKeyFinalRequestModel = "finalRequestModel"
objectChatCompletion = "chat.completion"
objectChatCompletionChunk = "chat.completion.chunk"
finishReasonStop = "stop"
wildcard = "*"
defaultTimeout = 2 * 60 * 1000 // ms
)
type providerInitializer interface {
ValidateConfig(ProviderConfig) error
CreateProvider(ProviderConfig) (Provider, error)
}
var (
errUnsupportedApiName = errors.New("unsupported API name")
providerInitializers = map[string]providerInitializer{
providerTypeMoonshot: &moonshotProviderInitializer{},
providerTypeAzure: &azureProviderInitializer{},
providerTypeQwen: &qwenProviderInitializer{},
providerTypeOpenAI: &openaiProviderInitializer{},
}
)
type Provider interface {
GetProviderType() string
}
type RequestHeadersHandler interface {
OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) (types.Action, error)
}
type RequestBodyHandler interface {
OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error)
}
type ResponseHeadersHandler interface {
OnResponseHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) (types.Action, error)
}
type StreamingResponseBodyHandler interface {
OnStreamingResponseBody(ctx wrapper.HttpContext, name ApiName, chunk []byte, isLastChunk bool, log wrapper.Log) ([]byte, error)
}
type ResponseBodyHandler interface {
OnResponseBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error)
}
type ProviderConfig struct {
// @Title zh-CN AI服务提供商
// @Description zh-CN AI服务提供商类型目前支持的取值为"moonshot"
typ string `required:"true" yaml:"type" json:"type"`
// @Title zh-CN API Tokens
// @Description zh-CN 在请求AI服务时用于认证的API Token列表。不同的AI服务提供商可能有不同的名称。部分供应商只支持配置一个API Token如Azure OpenAI
apiTokens []string `required:"false" yaml:"apiToken" json:"apiTokens"`
// @Title zh-CN 请求超时
// @Description zh-CN 请求AI服务的超时时间单位为毫秒。默认值为120000即2分钟
timeout uint32 `required:"false" yaml:"timeout" json:"timeout"`
// @Title zh-CN Moonshot File ID
// @Description zh-CN 仅适用于Moonshot AI服务。Moonshot AI服务的文件 ID其内容用于补充 AI 请求上下文
moonshotFileId string `required:"false" yaml:"moonshotFileId" json:"moonshotFileId"`
// @Title zh-CN Azure OpenAI Service URL
// @Description zh-CN 仅适用于Azure OpenAI服务。要请求的OpenAI服务的完整URL包含api-version等参数
azureServiceUrl string `required:"false" yaml:"azureServiceUrl" json:"azureServiceUrl"`
// @Title zh-CN 模型名称映射表
// @Description zh-CN 用于将请求中的模型名称映射为目标AI服务商支持的模型名称。支持通过“*”来配置全局映射
modelMapping map[string]string `required:"false" yaml:"modelMapping" json:"modelMapping"`
// @Title zh-CN 对外接口协议
// @Description zh-CN 通过本插件对外提供的AI服务接口协议。默认值为“openai”即OpenAI的接口协议。如需保留原有接口协议可配置为“original"
protocol string `required:"false" yaml:"protocol" json:"protocol"`
// @Title zh-CN 模型对话上下文
// @Description zh-CN 配置一个外部获取对话上下文的文件来源用于在AI请求中补充对话上下文
context *ContextConfig `required:"false" yaml:"context" json:"context"`
}
func (c *ProviderConfig) FromJson(json gjson.Result) {
c.typ = json.Get("type").String()
c.apiTokens = make([]string, 0)
for _, token := range json.Get("apiTokens").Array() {
c.apiTokens = append(c.apiTokens, token.String())
}
c.timeout = uint32(json.Get("timeout").Uint())
if c.timeout == 0 {
c.timeout = defaultTimeout
}
c.moonshotFileId = json.Get("moonshotFileId").String()
c.azureServiceUrl = json.Get("azureServiceUrl").String()
c.modelMapping = make(map[string]string)
for k, v := range json.Get("modelMapping").Map() {
c.modelMapping[k] = v.String()
}
c.protocol = json.Get("protocol").String()
if c.protocol == "" {
c.protocol = protocolOpenAI
}
contextJson := json.Get("context")
if contextJson.Exists() {
c.context = &ContextConfig{}
c.context.FromJson(contextJson)
}
}
func (c *ProviderConfig) Validate() error {
if c.apiTokens == nil || len(c.apiTokens) == 0 {
return errors.New("no apiToken found in provider config")
}
if c.timeout < 0 {
return errors.New("invalid timeout in config")
}
if c.protocol != protocolOpenAI && c.protocol != protocolOriginal {
return errors.New("invalid protocol in config")
}
if c.context != nil {
if err := c.context.Validate(); err != nil {
return err
}
}
if c.typ == "" {
return errors.New("missing type in provider config")
}
initializer, has := providerInitializers[c.typ]
if !has {
return errors.New("unknown provider type: " + c.typ)
}
if err := initializer.ValidateConfig(*c); err != nil {
return err
}
return nil
}
func (c *ProviderConfig) GetRandomToken() string {
apiTokens := c.apiTokens
count := len(apiTokens)
switch count {
case 0:
return ""
case 1:
return apiTokens[0]
default:
return apiTokens[rand.Intn(count)]
}
}
func CreateProvider(pc ProviderConfig) (Provider, error) {
initializer, has := providerInitializers[pc.typ]
if !has {
return nil, errors.New("unknown provider type: " + pc.typ)
}
return initializer.CreateProvider(pc)
}
func getMappedModel(model string, modelMapping map[string]string, log wrapper.Log) string {
if modelMapping == nil || len(modelMapping) == 0 {
return model
}
if v, ok := modelMapping[model]; ok && len(v) != 0 {
log.Debugf("model %s is mapped to %s explictly", model, v)
return v
}
if v, ok := modelMapping[wildcard]; ok {
log.Debugf("model %s is mapped to %s via wildcard", model, v)
return v
}
return model
}

View File

@@ -0,0 +1,447 @@
package provider
import (
"encoding/json"
"errors"
"fmt"
"math"
"strings"
"time"
"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy/util"
"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
)
// qwenProvider is the provider for Qwen service.
const (
qwenResultFormatMessage = "message"
qwenDomain = "dashscope.aliyuncs.com"
qwenChatCompletionPath = "/api/v1/services/aigc/text-generation/generation"
qwenTopPMin = 0.000001
qwenTopPMax = 0.999999
ctxKeyPushedMessageContent = "pushedMessageContent"
streamIdItemKey = "id:"
streamDataItemKey = "data:"
streamEndDataValue = "[DONE]"
streamEventHeader = "event: result\n:HTTP_STATUS/200\n"
)
type qwenProviderInitializer struct {
}
func (m *qwenProviderInitializer) ValidateConfig(config ProviderConfig) error {
return nil
}
func (m *qwenProviderInitializer) CreateProvider(config ProviderConfig) (Provider, error) {
return &qwenProvider{
config: config,
contextCache: createContextCache(&config),
}, nil
}
type qwenProvider struct {
config ProviderConfig
contextCache *contextCache
}
func (m *qwenProvider) GetProviderType() string {
return providerTypeQwen
}
const (
forceStreaming = true
)
func (m *qwenProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) (types.Action, error) {
if apiName != ApiNameChatCompletion {
return types.ActionContinue, errUnsupportedApiName
}
_ = util.OverwriteRequestPath(qwenChatCompletionPath)
_ = util.OverwriteRequestHost(qwenDomain)
_ = proxywasm.ReplaceHttpRequestHeader("Authorization", "Bearer "+m.config.GetRandomToken())
if m.config.protocol == protocolOriginal && m.config.context == nil {
ctx.DontReadRequestBody()
return types.ActionContinue, nil
}
if forceStreaming {
_ = proxywasm.RemoveHttpRequestHeader("Accept-Encoding")
_ = proxywasm.RemoveHttpRequestHeader("Content-Length")
_ = proxywasm.ReplaceHttpRequestHeader("Accept", "text/event-stream")
_ = proxywasm.ReplaceHttpRequestHeader("X-DashScope-SSE", "enable")
return types.ActionContinue, nil
} else {
// Delay the header processing to allow changing streaming mode in OnRequestBody
return types.HeaderStopIteration, nil
}
}
func (m *qwenProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
if apiName != ApiNameChatCompletion {
return types.ActionContinue, errUnsupportedApiName
}
if m.config.protocol == protocolOriginal {
if m.config.context == nil {
return types.ActionContinue, nil
}
request := &qwenTextGenRequest{}
if err := json.Unmarshal(body, request); err != nil {
return types.ActionContinue, fmt.Errorf("unable to unmarshal request: %v", err)
}
err := m.contextCache.GetContent(func(content string, err error) {
defer func() {
_ = proxywasm.ResumeHttpRequest()
}()
if err != nil {
log.Errorf("failed to load context file: %v", err)
_ = util.SendResponse(500, util.MimeTypeTextPlain, fmt.Sprintf("failed to load context file: %v", err))
}
m.insertContextMessage(request, content)
if err := replaceJsonRequestBody(request, log); err != nil {
_ = util.SendResponse(500, util.MimeTypeTextPlain, fmt.Sprintf("failed to replace request body: %v", err))
}
}, log)
if err == nil {
return types.ActionPause, nil
}
return types.ActionContinue, err
}
request := &chatCompletionRequest{}
if err := decodeChatCompletionRequest(body, request); err != nil {
return types.ActionContinue, err
}
model := request.Model
if model == "" {
return types.ActionContinue, errors.New("missing model in chat completion request")
}
ctx.SetContext(ctxKeyOriginalRequestModel, model)
mappedModel := getMappedModel(model, m.config.modelMapping, log)
if mappedModel == "" {
return types.ActionContinue, errors.New("model becomes empty after applying the configured mapping")
}
request.Model = mappedModel
ctx.SetContext(ctxKeyFinalRequestModel, request.Model)
if !forceStreaming {
if request.Stream {
_ = proxywasm.ReplaceHttpRequestHeader("Accept", "text/event-stream")
_ = proxywasm.ReplaceHttpRequestHeader("X-DashScope-SSE", "enable")
} else {
_ = proxywasm.ReplaceHttpRequestHeader("Accept", "*/*")
_ = proxywasm.RemoveHttpRequestHeader("X-DashScope-SSE")
}
}
if m.config.context == nil {
qwenRequest := m.buildQwenTextGenerationRequest(request)
return types.ActionContinue, replaceJsonRequestBody(qwenRequest, log)
}
err := m.contextCache.GetContent(func(content string, err error) {
defer func() {
_ = proxywasm.ResumeHttpRequest()
}()
if err != nil {
log.Errorf("failed to load context file: %v", err)
_ = util.SendResponse(500, util.MimeTypeTextPlain, fmt.Sprintf("failed to load context file: %v", err))
}
insertContextMessage(request, content)
qwenRequest := m.buildQwenTextGenerationRequest(request)
if err := replaceJsonRequestBody(qwenRequest, log); err != nil {
_ = util.SendResponse(500, util.MimeTypeTextPlain, fmt.Sprintf("failed to replace request body: %v", err))
}
}, log)
if err == nil {
return types.ActionPause, nil
}
return types.ActionContinue, err
}
func (m *qwenProvider) OnResponseHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) (types.Action, error) {
if m.config.protocol == protocolOriginal {
ctx.DontReadResponseBody()
return types.ActionContinue, nil
}
_ = proxywasm.RemoveHttpResponseHeader("Content-Length")
return types.ActionContinue, nil
}
func (m *qwenProvider) OnStreamingResponseBody(ctx wrapper.HttpContext, name ApiName, chunk []byte, isLastChunk bool, log wrapper.Log) ([]byte, error) {
receivedBody := chunk
if bufferedStreamingBody, has := ctx.GetContext(ctxKeyStreamingBody).([]byte); has {
receivedBody = append(bufferedStreamingBody, chunk...)
}
eventStartIndex, lineStartIndex, valueStartIndex := 0, -1, -1
defer func() {
if eventStartIndex != -1 {
// Just in case the received chunk is not a complete event.
ctx.SetContext(ctxKeyStreamingBody, receivedBody[eventStartIndex:])
} else {
ctx.SetContext(ctxKeyStreamingBody, nil)
}
}()
// Sample event response:
var responseBuilder strings.Builder
currentEventId, currentKey := "", ""
i, length := 0, len(receivedBody)
for i = 0; i < length; i++ {
ch := receivedBody[i]
if ch != '\n' {
if lineStartIndex == -1 {
lineStartIndex = i
valueStartIndex = -1
}
if valueStartIndex == -1 {
if ch == ':' {
valueStartIndex = i + 1
currentKey = string(receivedBody[lineStartIndex:valueStartIndex])
}
} else if valueStartIndex == i && ch == ' ' {
// Skip leading spaces in data.
valueStartIndex = i + 1
}
continue
}
if lineStartIndex == -1 {
// Extra new line, Should be an event separator.
eventStartIndex = i + 1
continue
}
key := currentKey
value := receivedBody[valueStartIndex:i]
// Reset message parsing state.
eventStartIndex = -1
lineStartIndex = -1
valueStartIndex = -1
currentKey = ""
switch key {
case streamIdItemKey:
currentEventId = string(value)
break
case streamDataItemKey:
if err := m.convertStreamEvent(ctx, &responseBuilder, currentEventId, value, log); err != nil {
return nil, err
}
break
default:
break
}
}
modifiedResponseChunk := responseBuilder.String()
log.Debugf("=== modified response chunk: %s", modifiedResponseChunk)
return []byte(modifiedResponseChunk), nil
}
func (m *qwenProvider) OnResponseBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
qwenResponse := &qwenTextGenResponse{}
if err := json.Unmarshal(body, qwenResponse); err != nil {
return types.ActionContinue, fmt.Errorf("unable to unmarshal Qwen response: %v", err)
}
response := m.buildChatCompletionResponse(ctx, qwenResponse)
return types.ActionContinue, replaceJsonResponseBody(response, log)
}
func (m *qwenProvider) buildQwenTextGenerationRequest(origRequest *chatCompletionRequest) *qwenTextGenRequest {
return &qwenTextGenRequest{
Model: origRequest.Model,
Input: qwenTextGenInput{
Messages: origRequest.Messages,
},
Parameters: qwenTextGenParameters{
ResultFormat: qwenResultFormatMessage,
MaxTokens: origRequest.MaxTokens,
N: origRequest.N,
Seed: origRequest.Seed,
Temperature: origRequest.Temperature,
TopP: math.Max(qwenTopPMin, math.Min(origRequest.TopP, qwenTopPMax)),
},
}
}
func (m *qwenProvider) buildChatCompletionResponse(ctx wrapper.HttpContext, qwenResponse *qwenTextGenResponse) *chatCompletionResponse {
choices := make([]chatCompletionChoice, 0, len(qwenResponse.Output.Choices))
for _, qwenChoice := range qwenResponse.Output.Choices {
choices = append(choices, chatCompletionChoice{
Message: &qwenChoice.Message,
FinishReason: qwenChoice.FinishReason,
})
}
return &chatCompletionResponse{
Id: qwenResponse.RequestId,
Created: time.Now().UnixMilli() / 1000,
Model: ctx.GetContext(ctxKeyFinalRequestModel).(string),
SystemFingerprint: "",
Object: objectChatCompletion,
Choices: choices,
Usage: chatCompletionUsage{
PromptTokens: qwenResponse.Usage.InputTokens,
CompletionTokens: qwenResponse.Usage.OutputTokens,
TotalTokens: qwenResponse.Usage.TotalTokens,
},
}
}
func (m *qwenProvider) buildChatCompletionStreamingResponse(ctx wrapper.HttpContext, qwenResponse *qwenTextGenResponse) []*chatCompletionResponse {
baseMessage := chatCompletionResponse{
Id: qwenResponse.RequestId,
Created: time.Now().UnixMilli() / 1000,
Model: ctx.GetContext(ctxKeyFinalRequestModel).(string),
SystemFingerprint: "",
Object: objectChatCompletionChunk,
}
responses := make([]*chatCompletionResponse, 0)
qwenChoice := qwenResponse.Output.Choices[0]
message := qwenChoice.Message
content := message.Content
if rawPushedContent := ctx.GetContext(ctxKeyPushedMessageContent); rawPushedContent != nil {
if pushedContent := rawPushedContent.(string); pushedContent != "" && strings.HasPrefix(content, pushedContent) {
content = content[len(pushedContent):]
}
}
if content != "" {
deltaResponse := *&baseMessage
deltaResponse.Choices = append(deltaResponse.Choices, chatCompletionChoice{Delta: &chatMessage{Role: message.Role, Content: content}})
responses = append(responses, &deltaResponse)
ctx.SetContext(ctxKeyPushedMessageContent, message.Content)
}
// Yes, Qwen uses a string "null" as null.
if qwenChoice.FinishReason != "" && qwenChoice.FinishReason != "null" {
finishResponse := *&baseMessage
finishResponse.Choices = append(finishResponse.Choices, chatCompletionChoice{FinishReason: qwenChoice.FinishReason})
responses = append(responses, &finishResponse)
}
return responses
}
func (m *qwenProvider) convertStreamEvent(ctx wrapper.HttpContext, responseBuilder *strings.Builder, eventId string, eventData []byte, log wrapper.Log) error {
if string(eventData) == streamEndDataValue {
responseBuilder.WriteString(streamIdItemKey)
responseBuilder.WriteString(eventId)
responseBuilder.WriteString("\n")
responseBuilder.WriteString(streamEventHeader)
responseBuilder.WriteString(streamDataItemKey)
responseBuilder.WriteString(streamEndDataValue)
responseBuilder.WriteString("\n\n")
return nil
}
qwenResponse := &qwenTextGenResponse{}
if err := json.Unmarshal(eventData, qwenResponse); err != nil {
log.Errorf("unable to unmarshal Qwen response: %v", err)
return fmt.Errorf("unable to unmarshal Qwen response: %v", err)
}
responses := m.buildChatCompletionStreamingResponse(ctx, qwenResponse)
for _, response := range responses {
responseBody, err := json.Marshal(response)
if err != nil {
log.Errorf("unable to marshal response: %v", err)
return fmt.Errorf("unable to marshal response: %v", err)
}
responseBuilder.WriteString(streamIdItemKey)
responseBuilder.WriteString(eventId)
responseBuilder.WriteString("\n")
responseBuilder.WriteString(streamEventHeader)
responseBuilder.WriteString(streamDataItemKey)
responseBuilder.Write(responseBody)
responseBuilder.WriteString("\n\n")
}
return nil
}
func (m *qwenProvider) insertContextMessage(request *qwenTextGenRequest, content string) {
fileMessage := chatMessage{
Role: roleSystem,
Content: content,
}
firstNonSystemMessageIndex := -1
messages := request.Input.Messages
if messages != nil {
for i, message := range request.Input.Messages {
if message.Role != roleSystem {
firstNonSystemMessageIndex = i
break
}
}
}
if firstNonSystemMessageIndex == -1 {
request.Input.Messages = append(request.Input.Messages, fileMessage)
} else {
request.Input.Messages = append(request.Input.Messages[:firstNonSystemMessageIndex], append([]chatMessage{fileMessage}, request.Input.Messages[firstNonSystemMessageIndex:]...)...)
}
}
type qwenTextGenRequest struct {
Model string `json:"model"`
Input qwenTextGenInput `json:"input"`
Parameters qwenTextGenParameters `json:"parameters,omitempty"`
}
type qwenTextGenInput struct {
Messages []chatMessage `json:"messages"`
}
type qwenTextGenParameters struct {
ResultFormat string `json:"result_format,omitempty"`
MaxTokens int `json:"max_tokens,omitempty"`
RepetitionPenalty float64 `json:"repetition_penalty,omitempty"`
N int `json:"n,omitempty"`
Seed int `json:"seed,omitempty"`
Temperature float64 `json:"temperature,omitempty"`
TopP float64 `json:"top_p,omitempty"`
}
type qwenTextGenResponse struct {
RequestId string `json:"request_id"`
Output qwenTextGenOutput `json:"output"`
Usage qwenTextGenUsage `json:"usage"`
}
type qwenTextGenOutput struct {
FinishReason string `json:"finish_reason"`
Choices []qwenTextGenChoice `json:"choices"`
}
type qwenTextGenChoice struct {
FinishReason string `json:"finish_reason"`
Message chatMessage `json:"message"`
}
type qwenTextGenUsage struct {
InputTokens int `json:"input_tokens"`
OutputTokens int `json:"output_tokens"`
TotalTokens int `json:"total_tokens"`
}

View File

@@ -0,0 +1,65 @@
package provider
import (
"encoding/json"
"errors"
"fmt"
"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
)
func decodeChatCompletionRequest(body []byte, request *chatCompletionRequest) error {
if err := json.Unmarshal(body, request); err != nil {
return fmt.Errorf("unable to unmarshal request: %v", err)
}
if request.Messages == nil || len(request.Messages) == 0 {
return errors.New("no message found in the request body")
}
return nil
}
func replaceJsonRequestBody(request interface{}, log wrapper.Log) error {
body, err := json.Marshal(request)
if err != nil {
return fmt.Errorf("unable to marshal request: %v", err)
}
log.Debugf("request body: %s", string(body))
err = proxywasm.ReplaceHttpRequestBody(body)
if err != nil {
return fmt.Errorf("unable to replace the original request body: %v", err)
}
return err
}
func insertContextMessage(request *chatCompletionRequest, content string) {
fileMessage := chatMessage{
Role: roleSystem,
Content: content,
}
firstNonSystemMessageIndex := -1
for i, message := range request.Messages {
if message.Role != roleSystem {
firstNonSystemMessageIndex = i
break
}
}
if firstNonSystemMessageIndex == -1 {
request.Messages = append(request.Messages, fileMessage)
} else {
request.Messages = append(request.Messages[:firstNonSystemMessageIndex], append([]chatMessage{fileMessage}, request.Messages[firstNonSystemMessageIndex:]...)...)
}
}
func replaceJsonResponseBody(response interface{}, log wrapper.Log) error {
body, err := json.Marshal(response)
if err != nil {
return fmt.Errorf("unable to marshal response: %v", err)
}
log.Debugf("response body: %s", string(body))
err = proxywasm.ReplaceHttpResponseBody(body)
if err != nil {
return fmt.Errorf("unable to replace the original response body: %v", err)
}
return err
}

View File

@@ -0,0 +1,36 @@
package util
import "github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
const (
HeaderContentType = "Content-Type"
MimeTypeTextPlain = "text/plain"
MimeTypeApplicationJson = "application/json"
)
func SendResponse(statusCode uint32, contentType, body string) error {
return proxywasm.SendHttpResponse(statusCode, CreateHeaders(HeaderContentType, contentType), []byte(body), -1)
}
func CreateHeaders(kvs ...string) [][2]string {
headers := make([][2]string, 0, len(kvs)/2)
for i := 0; i < len(kvs); i += 2 {
headers = append(headers, [2]string{kvs[i], kvs[i+1]})
}
return headers
}
func OverwriteRequestHost(host string) error {
if originHost, err := proxywasm.GetHttpRequestHeader(":authority"); err == nil {
_ = proxywasm.ReplaceHttpRequestHeader("X-ENVOY-ORIGINAL-HOST", originHost)
}
return proxywasm.ReplaceHttpRequestHeader(":authority", host)
}
func OverwriteRequestPath(path string) error {
if originPath, err := proxywasm.GetHttpRequestHeader(":path"); err == nil {
_ = proxywasm.ReplaceHttpRequestHeader("X-ENVOY-ORIGINAL-PATH", originPath)
}
return proxywasm.ReplaceHttpRequestHeader(":path", path)
}

View File

@@ -0,0 +1,22 @@
package util
import (
"strconv"
"strings"
)
func EscapeStringForJson(s string) string {
var builder strings.Builder
for _, c := range s { //iterate through rune
switch c {
case '"':
builder.WriteRune('\\')
builder.WriteRune(c)
break
default:
quoted := strconv.QuoteRune(c)
builder.WriteString(quoted[1 : len(quoted)-1])
}
}
return builder.String()
}

View File

@@ -0,0 +1,28 @@
package util
import (
"testing"
"github.com/stretchr/testify/assert"
)
func TestEscapeForJsonString(t *testing.T) {
var tests = []struct {
input, output string
}{
{"hello", "hello"},
{"hello\"world", "hello\\\"world"},
{"h\be\vl\tlo\rworld\n", "h\\be\\vl\\tlo\\rworld\\n"},
}
for _, tt := range tests {
// t.Run enables running "subtests", one for each
// table entry. These are shown separately
// when executing `go test -v`.
testName := tt.input
t.Run(testName, func(t *testing.T) {
output := EscapeStringForJson(tt.input)
assert.Equal(t, tt.output, output)
})
}
}