Compare commits

...

13 Commits

Author SHA1 Message Date
澄潭
3e7a63bd9b rel: Release v1.4.0 (#1014) 2024-05-29 21:20:50 +08:00
澄潭
206152daa0 Update Makefile.core.mk 2024-05-29 21:16:09 +08:00
澄潭
812edf1490 add ai cache plugin (#1010) 2024-05-29 21:10:22 +08:00
澄潭
b00f79f3af optimize mcp cds (#1013) 2024-05-29 19:43:40 +08:00
澄潭
ed05da13f4 Update manifests.yaml 2024-05-29 19:25:03 +08:00
澄潭
53bccf89f4 Update Makefile.core.mk 2024-05-28 14:05:22 +08:00
Yang Beining
51b9d9ec4b feat: Add the ZhipuAI (ChatGLM) provider to the ai-proxy wasm plugin #950 (#1007)
Co-authored-by: Kent Dong <ch3cho@qq.com>
2024-05-28 11:29:36 +08:00
Yifan Gao
50f79c9099 feat: support ollama ai model (#1001)
Co-authored-by: Kent Dong <ch3cho@qq.com>
2024-05-28 10:55:59 +08:00
澄潭
93966bf14b fix vs merge (#1006) 2024-05-27 23:05:39 +08:00
澄潭
ffa690994b fix wasm recover (#1003) 2024-05-27 18:08:43 +08:00
澄潭
ca1ad1dc73 Fixed the issue where an empty string system prompt would be set when enabling file id. (#999) 2024-05-24 16:27:02 +08:00
澄潭
e09edff827 Fix the issue with multiple system prompts when using qwen-long file id mode. (#994)
Signed-off-by: johnlanni <zty98751@alibaba-inc.com>
2024-05-24 14:43:34 +08:00
Ink33
2fee28d4e8 feat: run the specific e2e test with environment variable (#975)
Signed-off-by: Ink33 <Ink33@smlk.org>
2024-05-23 17:36:24 +08:00
32 changed files with 947 additions and 40 deletions

View File

@@ -177,8 +177,8 @@ install: pre-install
cd helm/higress; helm dependency build
helm install higress helm/higress -n higress-system --create-namespace --set 'global.local=true'
ENVOY_LATEST_IMAGE_TAG ?= sha-d91b22f
ISTIO_LATEST_IMAGE_TAG ?= sha-d91b22f
ENVOY_LATEST_IMAGE_TAG ?= sha-93966bf
ISTIO_LATEST_IMAGE_TAG ?= sha-b00f79f
install-dev: pre-install
helm install higress helm/core -n higress-system --create-namespace --set 'controller.tag=$(TAG)' --set 'gateway.replicas=1' --set 'pilot.tag=$(ISTIO_LATEST_IMAGE_TAG)' --set 'gateway.tag=$(ENVOY_LATEST_IMAGE_TAG)' --set 'global.local=true'
@@ -305,7 +305,7 @@ run-higress-e2e-test:
kubectl wait --timeout=10m -n higress-system deployment/higress-controller --for=condition=Available
@echo -e "\n\033[36mWaiting higress-gateway to be ready...\033[0m\n"
kubectl wait --timeout=10m -n higress-system deployment/higress-gateway --for=condition=Available
go test -v -tags conformance ./test/e2e/e2e_test.go --ingress-class=higress --debug=true --test-area=all
go test -v -tags conformance ./test/e2e/e2e_test.go --ingress-class=higress --debug=true --test-area=all --execute-tests=$(TEST_SHORTNAME)
# run-higress-e2e-test-run starts to run ingress e2e conformance tests.
.PHONY: run-higress-e2e-test-run
@@ -315,7 +315,7 @@ run-higress-e2e-test-run:
kubectl wait --timeout=10m -n higress-system deployment/higress-controller --for=condition=Available
@echo -e "\n\033[36mWaiting higress-gateway to be ready...\033[0m\n"
kubectl wait --timeout=10m -n higress-system deployment/higress-gateway --for=condition=Available
go test -v -tags conformance ./test/e2e/e2e_test.go --ingress-class=higress --debug=true --test-area=run
go test -v -tags conformance ./test/e2e/e2e_test.go --ingress-class=higress --debug=true --test-area=run --execute-tests=$(TEST_SHORTNAME)
# run-higress-e2e-test-clean starts to clean ingress e2e tests.
.PHONY: run-higress-e2e-test-clean
@@ -345,7 +345,7 @@ run-higress-e2e-test-wasmplugin:
kubectl wait --timeout=10m -n higress-system deployment/higress-controller --for=condition=Available
@echo -e "\n\033[36mWaiting higress-gateway to be ready...\033[0m\n"
kubectl wait --timeout=10m -n higress-system deployment/higress-gateway --for=condition=Available
go test -v -tags conformance ./test/e2e/e2e_test.go -isWasmPluginTest=true -wasmPluginType=$(PLUGIN_TYPE) -wasmPluginName=$(PLUGIN_NAME) --ingress-class=higress --debug=true --test-area=all
go test -v -tags conformance ./test/e2e/e2e_test.go -isWasmPluginTest=true -wasmPluginType=$(PLUGIN_TYPE) -wasmPluginName=$(PLUGIN_NAME) --ingress-class=higress --debug=true --test-area=all --execute-tests=$(TEST_SHORTNAME)
# run-higress-e2e-test-wasmplugin-run starts to run ingress e2e conformance tests.
.PHONY: run-higress-e2e-test-wasmplugin-run
@@ -355,7 +355,7 @@ run-higress-e2e-test-wasmplugin-run:
kubectl wait --timeout=10m -n higress-system deployment/higress-controller --for=condition=Available
@echo -e "\n\033[36mWaiting higress-gateway to be ready...\033[0m\n"
kubectl wait --timeout=10m -n higress-system deployment/higress-gateway --for=condition=Available
go test -v -tags conformance ./test/e2e/e2e_test.go -isWasmPluginTest=true -wasmPluginType=$(PLUGIN_TYPE) -wasmPluginName=$(PLUGIN_NAME) --ingress-class=higress --debug=true --test-area=run
go test -v -tags conformance ./test/e2e/e2e_test.go -isWasmPluginTest=true -wasmPluginType=$(PLUGIN_TYPE) -wasmPluginName=$(PLUGIN_NAME) --ingress-class=higress --debug=true --test-area=run --execute-tests=$(TEST_SHORTNAME)
# run-higress-e2e-test-wasmplugin-clean starts to clean ingress e2e tests.
.PHONY: run-higress-e2e-test-wasmplugin-clean

View File

@@ -1 +1 @@
v1.4.0-rc.1
v1.4.0

View File

@@ -0,0 +1,25 @@
diff -Naur envoy/bazel/repository_locations.bzl envoy-new/bazel/repository_locations.bzl
--- envoy/bazel/repository_locations.bzl 2024-05-27 18:04:13.116443196 +0800
+++ envoy-new/bazel/repository_locations.bzl 2024-05-27 18:02:24.812441069 +0800
@@ -1031,8 +1031,8 @@
project_name = "WebAssembly for Proxies (C++ host implementation)",
project_desc = "WebAssembly for Proxies (C++ host implementation)",
project_url = "https://github.com/higress-group/proxy-wasm-cpp-host",
- version = "cad2eb04d402dbf559101f3cb4f44da0d9c5b0b0",
- sha256 = "4efbcc97c58994fab92c9dc50c051ad16463647d4c0c6df36a7204d2984c1e63",
+ version = "28a33a5a3e6c1ff8f53128a74e89aeca47850f68",
+ sha256 = "1aaa5898c169aeff115eff2fedf58095b3509d2e59861ad498e661a990d78b3d",
strip_prefix = "proxy-wasm-cpp-host-{version}",
urls = ["https://github.com/higress-group/proxy-wasm-cpp-host/archive/{version}.tar.gz"],
use_category = ["dataplane_ext"],
diff -Naur envoy/source/extensions/filters/http/wasm/wasm_filter.h envoy-new/source/extensions/filters/http/wasm/wasm_filter.h
--- envoy/source/extensions/filters/http/wasm/wasm_filter.h 2024-05-27 18:04:13.112443196 +0800
+++ envoy-new/source/extensions/filters/http/wasm/wasm_filter.h 2024-05-27 18:03:25.360442258 +0800
@@ -51,6 +51,7 @@
if (opt_ref->recover()) {
ENVOY_LOG(info, "wasm vm recover success");
wasm = opt_ref->handle()->wasmHandle()->wasm().get();
+ handle = opt_ref->handle();
} else {
ENVOY_LOG(info, "wasm vm recover failed");
failed = true;

View File

@@ -1,5 +1,5 @@
apiVersion: v2
appVersion: 1.4.0-rc.1
appVersion: 1.4.0
description: Helm chart for deploying higress gateways
icon: https://higress.io/img/higress_logo_small.png
home: http://higress.io/
@@ -10,4 +10,4 @@ name: higress-core
sources:
- http://github.com/alibaba/higress
type: application
version: 1.4.0-rc.1
version: 1.4.0

View File

@@ -1,9 +1,9 @@
dependencies:
- name: higress-core
repository: file://../core
version: 1.4.0-rc.1
version: 1.4.0
- name: higress-console
repository: https://higress.io/helm-charts/
version: 1.4.0
digest: sha256:320b1b3ed08fad56dff0d21faaffe41a0325fdcdb96847e53a588d6b0df7e73e
generated: "2024-05-19T17:52:19.676747+08:00"
digest: sha256:bf4c58ac28d4691907eab44a13eee398fc05ade95cdae07cb91d7e20ce4ba382
generated: "2024-05-29T21:18:32.791995+08:00"

View File

@@ -1,5 +1,5 @@
apiVersion: v2
appVersion: 1.4.0-rc.1
appVersion: 1.4.0
description: Helm chart for deploying Higress gateways
icon: https://higress.io/img/higress_logo_small.png
home: http://higress.io/
@@ -12,9 +12,9 @@ sources:
dependencies:
- name: higress-core
repository: "file://../core"
version: 1.4.0-rc.1
version: 1.4.0
- name: higress-console
repository: "https://higress.io/helm-charts/"
version: 1.4.0
type: application
version: 1.4.0-rc.1
version: 1.4.0

View File

@@ -0,0 +1,69 @@
diff -Naur istio/pilot/pkg/model/push_context.go istio-new/pilot/pkg/model/push_context.go
--- istio/pilot/pkg/model/push_context.go 2024-05-27 23:03:09.000000000 +0800
+++ istio-new/pilot/pkg/model/push_context.go 2024-05-27 21:33:45.000000000 +0800
@@ -1482,8 +1482,14 @@
ns := virtualService.Namespace
rule := virtualService.Spec.(*networking.VirtualService)
// Added by ingress
- for _, host := range rule.Hosts {
- ps.virtualServiceIndex.byHost[host] = append(ps.virtualServiceIndex.byHost[host], virtualService)
+ if len(rule.Gateways) > 0 {
+ if len(rule.Hosts) == 0 {
+ ps.virtualServiceIndex.byHost[constants.GlobalWildcardHost] = append(ps.virtualServiceIndex.byHost[constants.GlobalWildcardHost], virtualService)
+ } else {
+ for _, host := range rule.Hosts {
+ ps.virtualServiceIndex.byHost[host] = append(ps.virtualServiceIndex.byHost[host], virtualService)
+ }
+ }
}
// End added by ingress
gwNames := getGatewayNames(rule)
diff -Naur istio/pilot/pkg/networking/core/v1alpha3/gateway.go istio-new/pilot/pkg/networking/core/v1alpha3/gateway.go
--- istio/pilot/pkg/networking/core/v1alpha3/gateway.go 2024-05-27 23:03:09.000000000 +0800
+++ istio-new/pilot/pkg/networking/core/v1alpha3/gateway.go 2024-05-27 22:58:33.000000000 +0800
@@ -376,8 +376,15 @@
gatewayVirtualServices[gatewayName] = virtualServices
}
for _, virtualService := range virtualServices {
- for _, host := range virtualService.Spec.(*networking.VirtualService).Hosts {
- hostSet.Insert(host)
+ rule := virtualService.Spec.(*networking.VirtualService)
+ if len(rule.Gateways) > 0 {
+ if len(rule.Hosts) == 0 {
+ hostSet.Insert(constants.GlobalWildcardHost)
+ break
+ }
+ for _, host := range rule.Hosts {
+ hostSet.Insert(host)
+ }
}
}
}
@@ -689,7 +696,7 @@
vHost = &route.VirtualHost{
Name: util.DomainName(hostRDSHost, port),
Domains: buildGatewayVirtualHostDomains(hostRDSHost, port),
- Routes: routes,
+ Routes: append(routes[:0:0], routes...),
IncludeRequestAttemptCount: true,
TypedPerFilterConfig: mseingress.ConstructTypedPerFilterConfigForVHost(globalHTTPFilters, virtualService),
}
@@ -884,7 +891,7 @@
newVHost := &route.VirtualHost{
Name: util.DomainName(string(hostname), port),
Domains: buildGatewayVirtualHostDomains(string(hostname), port),
- Routes: routes,
+ Routes: append(routes[:0:0], routes...),
IncludeRequestAttemptCount: true,
TypedPerFilterConfig: mseingress.ConstructTypedPerFilterConfigForVHost(globalHTTPFilters, virtualService),
}
diff -Naur istio/pkg/config/constants/constants.go istio-new/pkg/config/constants/constants.go
--- istio/pkg/config/constants/constants.go 2024-05-27 23:03:09.000000000 +0800
+++ istio-new/pkg/config/constants/constants.go 2024-05-27 21:31:58.000000000 +0800
@@ -145,5 +145,6 @@
// Added by ingress
HigressHostRDSNamePrefix = "higress-rds-"
DefaultScopedRouteName = "scoped-route"
+ GlobalWildcardHost = "*"
// End added by ingress
)

View File

@@ -0,0 +1,17 @@
diff -Naur istio/pilot/pkg/model/push_context.go istio-new/pilot/pkg/model/push_context.go
--- istio/pilot/pkg/model/push_context.go 2024-05-29 19:29:45.000000000 +0800
+++ istio-new/pilot/pkg/model/push_context.go 2024-05-29 19:11:03.000000000 +0800
@@ -769,6 +769,13 @@
for _, s := range svcs {
svcHost := string(s.Hostname)
+ // Added by ingress
+ if s.Attributes.Namespace == "mcp" {
+ gwSvcs = append(gwSvcs, s)
+ continue
+ }
+ // End added by ingress
+
if _, ok := hostsFromGateways[svcHost]; ok {
gwSvcs = append(gwSvcs, s)
}

View File

@@ -18,6 +18,7 @@ import (
"fmt"
"io"
"os"
"os/exec"
"github.com/alibaba/higress/pkg/cmd/hgctl/plugin/option"
"github.com/alibaba/higress/pkg/cmd/hgctl/plugin/utils"
@@ -86,6 +87,12 @@ func runInit(w io.Writer, target string) (err error) {
return errors.Wrap(err, "failed to create option.yaml")
}
cmd := exec.Command("go", "mod", "tidy")
cmd.Dir = dir
if err := cmd.Run(); err != nil {
return errors.Wrap(err, "failed to run go mod tidy")
}
fmt.Fprintf(w, "Initialized the project in %q\n", dir)
return nil

View File

@@ -31,8 +31,8 @@ package main
import (
"github.com/tidwall/gjson"
"github.com/tetratelabs/proxy-wasm-go-sdk/proxywasm"
"github.com/tetratelabs/proxy-wasm-go-sdk/proxywasm/types"
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
)
@@ -93,8 +93,8 @@ module {{ .Name }}
go 1.19
require (
github.com/alibaba/higress/plugins/wasm-go v0.0.0-20231019123123-86b223bc75f1
github.com/tetratelabs/proxy-wasm-go-sdk v0.22.0
github.com/alibaba/higress/plugins/wasm-go main
github.com/higress-group/proxy-wasm-go-sdk main
github.com/tidwall/gjson v1.14.3
)
`

View File

@@ -1,6 +1,6 @@
PLUGIN_NAME ?= hello-world
BUILDER_REGISTRY ?= higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/
REGISTRY ?=
REGISTRY ?= higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/
GO_VERSION ?= 1.19
TINYGO_VERSION ?= 0.28.1
ORAS_VERSION ?= 1.0.0
@@ -12,12 +12,14 @@ COMMIT_ID := $(shell git rev-parse --short HEAD 2>/dev/null)
IMAGE_TAG = $(if $(strip $(PLUGIN_VERSION)),${PLUGIN_VERSION},${BUILD_TIME}-${COMMIT_ID})
IMG ?= ${REGISTRY}${PLUGIN_NAME}:${IMAGE_TAG}
GOPROXY := $(shell go env GOPROXY)
EXTRA_TAGS ?=
.DEFAULT:
build:
DOCKER_BUILDKIT=1 docker build --build-arg PLUGIN_NAME=${PLUGIN_NAME} \
--build-arg BUILDER=${BUILDER} \
--build-arg GOPROXY=$(GOPROXY) \
--build-arg EXTRA_TAGS=$(EXTRA_TAGS) \
-t ${IMG} \
--output extensions/${PLUGIN_NAME} \
.
@@ -28,6 +30,7 @@ build-image:
DOCKER_BUILDKIT=1 docker build --build-arg PLUGIN_NAME=${PLUGIN_NAME} \
--build-arg BUILDER=${BUILDER} \
--build-arg GOPROXY=$(GOPROXY) \
--build-arg EXTRA_TAGS=$(EXTRA_TAGS) \
-t ${IMG} \
.
@echo ""

View File

@@ -0,0 +1,19 @@
# File generated by hgctl. Modify as required.
*
!/.gitignore
!*.go
!go.sum
!go.mod
!LICENSE
!*.md
!*.yaml
!*.yml
!*/
/out
/test

View File

@@ -0,0 +1,34 @@
## 简介
**Note**
> 需要数据面的proxy wasm版本大于等于0.2.100
> 编译时需要带上版本的tag例如`tinygo build -o main.wasm -scheduler=none -target=wasi -gc=custom -tags="custommalloc nottinygc_finalizer proxy_wasm_version_0_2_100" ./`
LLM 结果缓存插件,默认配置方式可以直接用于 openai 协议的结果缓存,同时支持流式和非流式响应的缓存。
## 配置说明
| Name | Type | Requirement | Default | Description |
| -------- | -------- | -------- | -------- | -------- |
| cacheKeyFrom.requestBody | string | optional | "messages.@reverse.0.content" | 从请求 Body 中基于 [GJSON PATH](https://github.com/tidwall/gjson/blob/master/SYNTAX.md) 语法提取字符串 |
| cacheValueFrom.responseBody | string | optional | "choices.0.message.content" | 从响应 Body 中基于 [GJSON PATH](https://github.com/tidwall/gjson/blob/master/SYNTAX.md) 语法提取字符串 |
| cacheStreamValueFrom.responseBody | string | optional | "choices.0.delta.content" | 从流式响应 Body 中基于 [GJSON PATH](https://github.com/tidwall/gjson/blob/master/SYNTAX.md) 语法提取字符串 |
| cacheKeyPrefix | string | optional | "higress-ai-cache:" | Redis缓存Key的前缀 |
| cacheTTL | integer | optional | 0 | 缓存的过期时间单位是秒默认值为0即永不过期 |
| redis.serviceName | string | requried | - | redis 服务名称,带服务类型的完整 FQDN 名称,例如 my-redis.dns、redis.my-ns.svc.cluster.local |
| redis.servicePort | integer | optional | 6379 | redis 服务端口 |
| redis.timeout | integer | optional | 1000 | 请求 redis 的超时时间,单位为毫秒 |
| redis.username | string | optional | - | 登陆 redis 的用户名 |
| redis.password | string | optional | - | 登陆 redis 的密码 |
| returnResponseTemplate | string | optional | `{"id":"from-cache","choices":[%s],"model":"gpt-4o","object":"chat.completion","usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}` | 返回 HTTP 响应的模版,用 %s 标记需要被 cache value 替换的部分 |
| returnStreamResponseTemplate | string | optional | `data:{"id":"from-cache","choices":[{"index":0,"delta":{"role":"assistant","content":"%s"},"finish_reason":"stop"}],"model":"gpt-4o","object":"chat.completion","usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}\n\ndata:[DONE]\n\n` | 返回流式 HTTP 响应的模版,用 %s 标记需要被 cache value 替换的部分 |
## 配置示例
```yaml
redis:
serviceName: my-redis.dns
timeout: 2000
```

View File

@@ -0,0 +1,23 @@
// File generated by hgctl. Modify as required.
module github.com/alibaba/higress/plugins/wasm-go/extensions/ai-cache
go 1.19
replace github.com/alibaba/higress/plugins/wasm-go => ../..
require (
github.com/alibaba/higress/plugins/wasm-go v1.3.6-0.20240528060522-53bccf89f441
github.com/higress-group/proxy-wasm-go-sdk v0.0.0-20240327114451-d6b7174a84fc
github.com/tidwall/gjson v1.14.3
github.com/tidwall/resp v0.1.1
github.com/tidwall/sjson v1.2.5
)
require (
github.com/google/uuid v1.3.0 // indirect
github.com/higress-group/nottinygc v0.0.0-20231101025119-e93c4c2f8520 // indirect
github.com/magefile/mage v1.14.0 // indirect
github.com/tidwall/match v1.1.1 // indirect
github.com/tidwall/pretty v1.2.0 // indirect
)

View File

@@ -0,0 +1,23 @@
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/higress-group/nottinygc v0.0.0-20231101025119-e93c4c2f8520 h1:IHDghbGQ2DTIXHBHxWfqCYQW1fKjyJ/I7W1pMyUDeEA=
github.com/higress-group/nottinygc v0.0.0-20231101025119-e93c4c2f8520/go.mod h1:Nz8ORLaFiLWotg6GeKlJMhv8cci8mM43uEnLA5t8iew=
github.com/higress-group/proxy-wasm-go-sdk v0.0.0-20240327114451-d6b7174a84fc h1:t2AT8zb6N/59Y78lyRWedVoVWHNRSCBh0oWCC+bluTQ=
github.com/higress-group/proxy-wasm-go-sdk v0.0.0-20240327114451-d6b7174a84fc/go.mod h1:hNFjhrLUIq+kJ9bOcs8QtiplSQ61GZXtd2xHKx4BYRo=
github.com/magefile/mage v1.14.0 h1:6QDX3g6z1YvJ4olPhT1wksUcSa/V0a1B+pJb73fBjyo=
github.com/magefile/mage v1.14.0/go.mod h1:z5UZb/iS3GoOSn0JgWuiw7dxlurVYTu+/jHXqQg881A=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
github.com/tidwall/gjson v1.14.3 h1:9jvXn7olKEHU1S9vwoMGliaT8jq1vJ7IH/n9zD9Dnlw=
github.com/tidwall/gjson v1.14.3/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA=
github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM=
github.com/tidwall/pretty v1.2.0 h1:RWIZEg2iJ8/g6fDDYzMpobmaoGh5OLl4AXtGUGPcqCs=
github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
github.com/tidwall/resp v0.1.1 h1:Ly20wkhqKTmDUPlyM1S7pWo5kk0tDu8OoC/vFArXmwE=
github.com/tidwall/resp v0.1.1/go.mod h1:3/FrruOBAxPTPtundW0VXgmsQ4ZBA0Aw714lVYgwFa0=
github.com/tidwall/sjson v1.2.5 h1:kLy8mja+1c9jlljvWTlSazM7cKDRfJuR/bOJhcY5NcY=
github.com/tidwall/sjson v1.2.5/go.mod h1:Fvgq9kS/6ociJEDnK0Fk1cpYF4FIW6ZF7LAe+6jwd28=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=

View File

@@ -0,0 +1,361 @@
// File generated by hgctl. Modify as required.
// See: https://higress.io/zh-cn/docs/user/wasm-go#2-%E7%BC%96%E5%86%99-maingo-%E6%96%87%E4%BB%B6
package main
import (
"errors"
"fmt"
"strings"
"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
"github.com/tidwall/gjson"
"github.com/tidwall/resp"
)
const (
CacheKeyContextKey = "cacheKey"
CacheContentContextKey = "cacheContent"
PartialMessageContextKey = "partialMessage"
ToolCallsContextKey = "toolCalls"
StreamContextKey = "stream"
DefaultCacheKeyPrefix = "higress-ai-cache:"
)
func main() {
wrapper.SetCtx(
"ai-cache",
wrapper.ParseConfigBy(parseConfig),
wrapper.ProcessRequestHeadersBy(onHttpRequestHeaders),
wrapper.ProcessRequestBodyBy(onHttpRequestBody),
wrapper.ProcessStreamingResponseBodyBy(onHttpResponseBody),
)
}
// @Name ai-cache
// @Category protocol
// @Phase AUTHN
// @Priority 10
// @Title zh-CN AI Cache
// @Description zh-CN 大模型结果缓存
// @IconUrl
// @Version 0.1.0
//
// @Contact.name johnlanni
// @Contact.url
// @Contact.email
//
// @Example
// redis:
// serviceName: my-redis.dns
// timeout: 2000
// cacheKeyFrom:
// requestBody: "messages.@reverse.0.content"
// cacheValueFrom:
// responseBody: "choices.0"
// returnResponseTemplate: |
// {"id":"from-cache","choices":[%s],"model":"gpt-4o","object":"chat.completion","usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}
// @End
type RedisInfo struct {
// @Title zh-CN redis 服务名称
// @Description zh-CN 带服务类型的完整 FQDN 名称,例如 my-redis.dns、redis.my-ns.svc.cluster.local
ServiceName string `required:"true" yaml:"serviceName" json:"serviceName"`
// @Title zh-CN redis 服务端口
// @Description zh-CN 默认值为6379
ServicePort int `required:"false" yaml:"servicePort" json:"servicePort"`
// @Title zh-CN 用户名
// @Description zh-CN 登陆 redis 的用户名,非必填
Username string `required:"false" yaml:"username" json:"username"`
// @Title zh-CN 密码
// @Description zh-CN 登陆 redis 的密码,非必填,可以只填密码
Password string `required:"false" yaml:"password" json:"password"`
// @Title zh-CN 请求超时
// @Description zh-CN 请求 redis 的超时时间单位为毫秒。默认值是1000即1秒
Timeout int `required:"false" yaml:"timeout" json:"timeout"`
}
type KVExtractor struct {
// @Title zh-CN 从请求 Body 中基于 [GJSON PATH](https://github.com/tidwall/gjson/blob/master/SYNTAX.md) 语法提取字符串
RequestBody string `required:"false" yaml:"requestBody" json:"requestBody"`
// @Title zh-CN 从响应 Body 中基于 [GJSON PATH](https://github.com/tidwall/gjson/blob/master/SYNTAX.md) 语法提取字符串
ResponseBody string `required:"false" yaml:"responseBody" json:"responseBody"`
}
type PluginConfig struct {
// @Title zh-CN Redis 地址信息
// @Description zh-CN 用于存储缓存结果的 Redis 地址
RedisInfo RedisInfo `required:"true" yaml:"redis" json:"redis"`
// @Title zh-CN 缓存 key 的来源
// @Description zh-CN 往 redis 里存时,使用的 key 的提取方式
CacheKeyFrom KVExtractor `required:"true" yaml:"cacheKeyFrom" json:"cacheKeyFrom"`
// @Title zh-CN 缓存 value 的来源
// @Description zh-CN 往 redis 里存时,使用的 value 的提取方式
CacheValueFrom KVExtractor `required:"true" yaml:"cacheValueFrom" json:"cacheValueFrom"`
// @Title zh-CN 流式响应下,缓存 value 的来源
// @Description zh-CN 往 redis 里存时,使用的 value 的提取方式
CacheStreamValueFrom KVExtractor `required:"true" yaml:"cacheStreamValueFrom" json:"cacheStreamValueFrom"`
// @Title zh-CN 返回 HTTP 响应的模版
// @Description zh-CN 用 %s 标记需要被 cache value 替换的部分
ReturnResponseTemplate string `required:"true" yaml:"returnResponseTemplate" json:"returnResponseTemplate"`
// @Title zh-CN 返回流式 HTTP 响应的模版
// @Description zh-CN 用 %s 标记需要被 cache value 替换的部分
ReturnStreamResponseTemplate string `required:"true" yaml:"returnStreamResponseTemplate" json:"returnStreamResponseTemplate"`
// @Title zh-CN 缓存的过期时间
// @Description zh-CN 单位是秒默认值为0即永不过期
CacheTTL int `required:"false" yaml:"cacheTTL" json:"cacheTTL"`
// @Title zh-CN Redis缓存Key的前缀
// @Description zh-CN 默认值是"higress-ai-cache:"
CacheKeyPrefix string `required:"false" yaml:"cacheKeyPrefix" json:"cacheKeyPrefix"`
redisClient wrapper.RedisClient `yaml:"-" json:"-"`
}
func parseConfig(json gjson.Result, c *PluginConfig, log wrapper.Log) error {
c.RedisInfo.ServiceName = json.Get("redis.serviceName").String()
if c.RedisInfo.ServiceName == "" {
return errors.New("redis service name must not by empty")
}
c.RedisInfo.ServicePort = int(json.Get("redis.servicePort").Int())
if c.RedisInfo.ServicePort == 0 {
if strings.HasSuffix(c.RedisInfo.ServiceName, ".static") {
// use default logic port which is 80 for static service
c.RedisInfo.ServicePort = 80
} else {
c.RedisInfo.ServicePort = 6379
}
}
c.RedisInfo.Username = json.Get("redis.username").String()
c.RedisInfo.Password = json.Get("redis.password").String()
c.RedisInfo.Timeout = int(json.Get("redis.timeout").Int())
if c.RedisInfo.Timeout == 0 {
c.RedisInfo.Timeout = 1000
}
c.CacheKeyFrom.RequestBody = json.Get("cacheKeyFrom.requestBody").String()
if c.CacheKeyFrom.RequestBody == "" {
c.CacheKeyFrom.RequestBody = "messages.@reverse.0.content"
}
c.CacheValueFrom.ResponseBody = json.Get("cacheValueFrom.responseBody").String()
if c.CacheValueFrom.ResponseBody == "" {
c.CacheValueFrom.ResponseBody = "choices.0.message.content"
}
c.CacheStreamValueFrom.ResponseBody = json.Get("cacheStreamValueFrom.responseBody").String()
if c.CacheStreamValueFrom.ResponseBody == "" {
c.CacheStreamValueFrom.ResponseBody = "choices.0.delta.content"
}
c.ReturnResponseTemplate = json.Get("returnResponseTemplate").String()
if c.ReturnResponseTemplate == "" {
c.ReturnResponseTemplate = `{"id":"from-cache","choices":[{"index":0,"message":{"role":"assistant","content":"%s"},"finish_reason":"stop"}],"model":"gpt-4o","object":"chat.completion","usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}`
}
c.ReturnStreamResponseTemplate = json.Get("returnStreamResponseTemplate").String()
if c.ReturnStreamResponseTemplate == "" {
c.ReturnStreamResponseTemplate = `data:{"id":"from-cache","choices":[{"index":0,"delta":{"role":"assistant","content":"%s"},"finish_reason":"stop"}],"model":"gpt-4o","object":"chat.completion","usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}` + "\n\ndata:[DONE]\n\n"
}
c.CacheKeyPrefix = json.Get("cacheKeyPrefix").String()
if c.CacheKeyPrefix == "" {
c.CacheKeyPrefix = DefaultCacheKeyPrefix
}
c.redisClient = wrapper.NewRedisClusterClient(wrapper.FQDNCluster{
FQDN: c.RedisInfo.ServiceName,
Port: int64(c.RedisInfo.ServicePort),
})
return c.redisClient.Init(c.RedisInfo.Username, c.RedisInfo.Password, int64(c.RedisInfo.Timeout))
}
func onHttpRequestHeaders(ctx wrapper.HttpContext, config PluginConfig, log wrapper.Log) types.Action {
contentType, _ := proxywasm.GetHttpRequestHeader("content-type")
// The request does not have a body.
if contentType == "" {
return types.ActionContinue
}
if !strings.Contains(contentType, "application/json") {
log.Warnf("content is not json, can't process:%s", contentType)
ctx.DontReadRequestBody()
return types.ActionContinue
}
// compatiable with qwen
x_dashscope_sse, _ := proxywasm.GetHttpRequestHeader("X-DashScope-SSE")
accept, _ := proxywasm.GetHttpRequestHeader("Accept")
if x_dashscope_sse == "enable" || strings.Contains(accept, "text/event-stream") {
ctx.SetContext(StreamContextKey, struct{}{})
}
proxywasm.RemoveHttpRequestHeader("Accept-Encoding")
// The request has a body and requires delaying the header transmission until a cache miss occurs,
// at which point the header should be sent.
return types.HeaderStopIteration
}
func TrimQuote(source string) string {
return strings.Trim(source, `"`)
}
func onHttpRequestBody(ctx wrapper.HttpContext, config PluginConfig, body []byte, log wrapper.Log) types.Action {
bodyJson := gjson.ParseBytes(body)
// TODO: It may be necessary to support stream mode determination for different LLM providers.
stream := false
if bodyJson.Get("stream").Bool() {
stream = true
ctx.SetContext(StreamContextKey, struct{}{})
} else if ctx.GetContext(StreamContextKey) != nil {
stream = true
}
key := TrimQuote(bodyJson.Get(config.CacheKeyFrom.RequestBody).Raw)
if key == "" {
log.Debug("parse key from request body failed")
return types.ActionContinue
}
ctx.SetContext(CacheKeyContextKey, key)
err := config.redisClient.Get(config.CacheKeyPrefix+key, func(response resp.Value) {
if err := response.Error(); err != nil {
log.Errorf("redis get key:%s failed, err:%v", key, err)
proxywasm.ResumeHttpRequest()
return
}
if response.IsNull() {
log.Debugf("cache miss, key:%s", key)
proxywasm.ResumeHttpRequest()
return
}
log.Debugf("cache hit, key:%s", key)
ctx.SetContext(CacheKeyContextKey, nil)
if !stream {
proxywasm.SendHttpResponse(200, [][2]string{{"content-type", "application/json; charset=utf-8"}}, []byte(fmt.Sprintf(config.ReturnResponseTemplate, response.String())), -1)
} else {
proxywasm.SendHttpResponse(200, [][2]string{{"content-type", "text/event-stream; charset=utf-8"}}, []byte(fmt.Sprintf(config.ReturnStreamResponseTemplate, response.String())), -1)
}
})
if err != nil {
log.Error("redis access failed")
return types.ActionContinue
}
return types.ActionPause
}
func processSSEMessage(ctx wrapper.HttpContext, config PluginConfig, sseMessage string, log wrapper.Log) string {
subMessages := strings.Split(sseMessage, "\n")
var message string
for _, msg := range subMessages {
if strings.HasPrefix(msg, "data:") {
message = msg
break
}
}
if len(message) < 6 {
log.Errorf("invalid message:%s", message)
return ""
}
// skip the prefix "data:"
bodyJson := message[5:]
if gjson.Get(bodyJson, config.CacheStreamValueFrom.ResponseBody).Exists() {
tempContentI := ctx.GetContext(CacheContentContextKey)
if tempContentI == nil {
content := TrimQuote(gjson.Get(bodyJson, config.CacheStreamValueFrom.ResponseBody).Raw)
ctx.SetContext(CacheContentContextKey, content)
return content
}
append := TrimQuote(gjson.Get(bodyJson, config.CacheStreamValueFrom.ResponseBody).Raw)
content := tempContentI.(string) + append
ctx.SetContext(CacheContentContextKey, content)
return content
} else if gjson.Get(bodyJson, "choices.0.delta.content.tool_calls").Exists() {
// TODO: compatible with other providers
ctx.SetContext(ToolCallsContextKey, struct{}{})
return ""
}
log.Debugf("unknown message:%s", bodyJson)
return ""
}
func onHttpResponseBody(ctx wrapper.HttpContext, config PluginConfig, chunk []byte, isLastChunk bool, log wrapper.Log) []byte {
if ctx.GetContext(ToolCallsContextKey) != nil {
// we should not cache tool call result
return chunk
}
keyI := ctx.GetContext(CacheKeyContextKey)
if keyI == nil {
return chunk
}
if !isLastChunk {
stream := ctx.GetContext(StreamContextKey)
if stream == nil {
tempContentI := ctx.GetContext(CacheContentContextKey)
if tempContentI == nil {
ctx.SetContext(CacheContentContextKey, chunk)
return chunk
}
tempContent := tempContentI.([]byte)
tempContent = append(tempContent, chunk...)
ctx.SetContext(CacheContentContextKey, tempContent)
} else {
var partialMessage []byte
partialMessageI := ctx.GetContext(PartialMessageContextKey)
if partialMessageI != nil {
partialMessage = append(partialMessageI.([]byte), chunk...)
} else {
partialMessage = chunk
}
messages := strings.Split(string(partialMessage), "\n\n")
for i, msg := range messages {
if i < len(messages)-1 {
// process complete message
processSSEMessage(ctx, config, msg, log)
}
}
if !strings.HasSuffix(string(partialMessage), "\n\n") {
ctx.SetContext(PartialMessageContextKey, []byte(messages[len(messages)-1]))
} else {
ctx.SetContext(PartialMessageContextKey, nil)
}
}
return chunk
}
// last chunk
key := keyI.(string)
stream := ctx.GetContext(StreamContextKey)
var value string
if stream == nil {
var body []byte
tempContentI := ctx.GetContext(CacheContentContextKey)
if tempContentI != nil {
body = append(tempContentI.([]byte), chunk...)
} else {
body = chunk
}
bodyJson := gjson.ParseBytes(body)
value = TrimQuote(bodyJson.Get(config.CacheValueFrom.ResponseBody).Raw)
if value == "" {
log.Warnf("parse value from response body failded, body:%s", body)
return chunk
}
} else {
if len(chunk) > 0 {
var lastMessage []byte
partialMessageI := ctx.GetContext(PartialMessageContextKey)
if partialMessageI != nil {
lastMessage = append(partialMessageI.([]byte), chunk...)
} else {
lastMessage = chunk
}
if !strings.HasSuffix(string(lastMessage), "\n\n") {
log.Warnf("invalid lastMessage:%s", lastMessage)
return chunk
}
// remove the last \n\n
lastMessage = lastMessage[:len(lastMessage)-2]
value = processSSEMessage(ctx, config, string(lastMessage), log)
} else {
tempContentI := ctx.GetContext(CacheContentContextKey)
if tempContentI == nil {
return chunk
}
value = tempContentI.(string)
}
}
config.redisClient.Set(config.CacheKeyPrefix+key, value, nil)
if config.CacheTTL != 0 {
config.redisClient.Expire(config.CacheKeyPrefix+key, config.CacheTTL, nil)
}
return chunk
}

View File

@@ -0,0 +1,52 @@
# File generated by hgctl. Modify as required.
version: 1.0.0
build:
# The official builder image version
builder:
go: 1.19
tinygo: 0.28.1
oras: 1.0.0
# The WASM plugin project directory
input: ./
# The output of the build products
output:
# Choose between 'files' and 'image'
type: files
# Destination address: when type=files, specify the local directory path, e.g., './out' or
# type=image, specify the remote docker repository, e.g., 'docker.io/<your_username>/<your_image>'
dest: ./out
# The authentication configuration for pushing image to the docker repository
docker-auth: ~/.docker/config.json
# The directory for the WASM plugin configuration structure
model-dir: ./
# The WASM plugin configuration structure name
model: PluginConfig
# Enable debug mode
debug: false
test:
# Test environment name, that is a docker compose project name
name: wasm-test
# The output path to build products, that is the source of test configuration parameters
from-path: ./out
# The test configuration source
test-path: ./test
# Docker compose configuration, which is empty, looks for the following files from 'test-path':
# compose.yaml, compose.yml, docker-compose.yml, docker-compose.yaml
compose-file:
# Detached mode: Run containers in the background
detach: false
install:
# The namespace of the installation
namespace: higress-system
# Use to validate WASM plugin configuration when install by yaml
spec-yaml: ./out/spec.yaml
# Installation source. Choose between 'from-yaml' and 'from-go-project'
from-yaml: ./test/plugin-conf.yaml
# If 'from-go-src' is non-empty, the output type of the build option must be 'image'
from-go-src:
# Enable debug mode
debug: false

View File

@@ -21,7 +21,7 @@ description: AI 代理插件配置参考
| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
|----------------|-----------------|------|-----|----------------------------------------------------------------------------------|
| `type` | string | 必填 | - | AI 服务提供商名称。目前支持以下取值openai, azure, moonshot, qwen |
| `type` | string | 必填 | - | AI 服务提供商名称。目前支持以下取值openai, azure, moonshot, qwen, zhipuai |
| `apiTokens` | array of string | 必填 | - | 用于在访问 AI 服务时进行认证的令牌。如果配置了多个 token插件会在请求时随机进行选择。部分服务提供商只支持配置一个 token。 |
| `timeout` | number | 非必填 | - | 访问 AI 服务的超时时间。单位为毫秒。默认值为 120000即 2 分钟 |
| `modelMapping` | map of string | 非必填 | - | AI 模型映射表,用于将请求中的模型名称映射为服务提供商支持模型名称。<br/>可以使用 "*" 为键来配置通用兜底映射关系 |
@@ -77,6 +77,10 @@ Azure OpenAI 所对应的 `type` 为 `azure`。它特有的配置字段如下:
零一万物所对应的 `type``yi`。它并无特有的配置字段。
#### 智谱AIZhipu AI
智谱AI所对应的 `type``zhipuai`。它并无特有的配置字段。
#### DeepSeekDeepSeek
DeepSeek所对应的 `type``deepseek`。它并无特有的配置字段。
@@ -93,6 +97,15 @@ Anthropic Claude 所对应的 `type` 为 `claude`。它特有的配置字段如
|-----------|--------|-----|-----|-------------------|
| `version` | string | 必填 | - | Claude 服务的 API 版本 |
#### Ollama
Ollama 所对应的 `type``ollama`。它特有的配置字段如下:
| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
|-------------------|--------|------|-----|----------------------------------------------|
| `ollamaServerHost` | string | 必填 | - | Ollama 服务器的主机地址 |
| `ollamaServerPort` | number | 必填 | - | Ollama 服务器的端口号默认为11434 |
## 用法示例
### 使用 OpenAI 协议代理 Azure OpenAI 服务

View File

@@ -36,7 +36,7 @@ func main() {
}
func parseConfig(json gjson.Result, pluginConfig *config.PluginConfig, log wrapper.Log) error {
//log.Debugf("loading config: %s", json.String())
// log.Debugf("loading config: %s", json.String())
pluginConfig.FromJson(json)
if err := pluginConfig.Validate(); err != nil {

View File

@@ -0,0 +1,114 @@
package provider
import (
"errors"
"fmt"
"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy/util"
"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
)
// ollamaProvider is the provider for Ollama service.
const (
ollamaChatCompletionPath = "/v1/chat/completions"
)
type ollamaProviderInitializer struct {
}
func (m *ollamaProviderInitializer) ValidateConfig(config ProviderConfig) error {
if config.ollamaServerHost == "" {
return errors.New("missing ollamaServerHost in provider config")
}
if config.ollamaServerPort == 0 {
return errors.New("missing ollamaServerPort in provider config")
}
return nil
}
func (m *ollamaProviderInitializer) CreateProvider(config ProviderConfig) (Provider, error) {
serverPortStr := fmt.Sprintf("%d", config.ollamaServerPort)
serviceDomain := config.ollamaServerHost + ":" + serverPortStr
return &ollamaProvider{
config: config,
serviceDomain: serviceDomain,
contextCache: createContextCache(&config),
}, nil
}
type ollamaProvider struct {
config ProviderConfig
serviceDomain string
contextCache *contextCache
}
func (m *ollamaProvider) GetProviderType() string {
return providerTypeOllama
}
func (m *ollamaProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) (types.Action, error) {
if apiName != ApiNameChatCompletion {
return types.ActionContinue, errUnsupportedApiName
}
_ = util.OverwriteRequestPath(ollamaChatCompletionPath)
_ = util.OverwriteRequestHost(m.serviceDomain)
_ = proxywasm.RemoveHttpRequestHeader("Content-Length")
return types.ActionContinue, nil
}
func (m *ollamaProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
if apiName != ApiNameChatCompletion {
return types.ActionContinue, errUnsupportedApiName
}
if m.config.modelMapping == nil && m.contextCache == nil {
return types.ActionContinue, nil
}
request := &chatCompletionRequest{}
if err := decodeChatCompletionRequest(body, request); err != nil {
return types.ActionContinue, err
}
model := request.Model
if model == "" {
return types.ActionContinue, errors.New("missing model in chat completion request")
}
mappedModel := getMappedModel(model, m.config.modelMapping, log)
if mappedModel == "" {
return types.ActionContinue, errors.New("model becomes empty after applying the configured mapping")
}
request.Model = mappedModel
if m.contextCache != nil {
err := m.contextCache.GetContent(func(content string, err error) {
defer func() {
_ = proxywasm.ResumeHttpRequest()
}()
if err != nil {
log.Errorf("failed to load context file: %v", err)
_ = util.SendResponse(500, util.MimeTypeTextPlain, fmt.Sprintf("failed to load context file: %v", err))
}
insertContextMessage(request, content)
if err := replaceJsonRequestBody(request, log); err != nil {
_ = util.SendResponse(500, util.MimeTypeTextPlain, fmt.Sprintf("failed to replace request body: %v", err))
}
}, log)
if err == nil {
return types.ActionPause, nil
} else {
return types.ActionContinue, err
}
} else {
if err := replaceJsonRequestBody(request, log); err != nil {
_ = util.SendResponse(500, util.MimeTypeTextPlain, fmt.Sprintf("failed to replace request body: %v", err))
return types.ActionContinue, err
}
_ = proxywasm.ResumeHttpRequest()
return types.ActionPause, nil
}
}

View File

@@ -23,6 +23,8 @@ const (
providerTypeBaichuan = "baichuan"
providerTypeYi = "yi"
providerTypeDeepSeek = "deepseek"
providerTypeZhipuAi = "zhipuai"
providerTypeOllama = "ollama"
protocolOpenAI = "openai"
protocolOriginal = "original"
@@ -60,6 +62,8 @@ var (
providerTypeBaichuan: &baichuanProviderInitializer{},
providerTypeYi: &yiProviderInitializer{},
providerTypeDeepSeek: &deepseekProviderInitializer{},
providerTypeZhipuAi: &zhipuAiProviderInitializer{},
providerTypeOllama: &ollamaProviderInitializer{},
}
)
@@ -89,7 +93,7 @@ type ResponseBodyHandler interface {
type ProviderConfig struct {
// @Title zh-CN AI服务提供商
// @Description zh-CN AI服务提供商类型目前支持的取值为"moonshot"、"qwen"、"openai"、"azure"、"baichuan"、"yi"
// @Description zh-CN AI服务提供商类型目前支持的取值为"moonshot"、"qwen"、"openai"、"azure"、"baichuan"、"yi"、"zhipuai"、"ollama"
typ string `required:"true" yaml:"type" json:"type"`
// @Title zh-CN API Tokens
// @Description zh-CN 在请求AI服务时用于认证的API Token列表。不同的AI服务提供商可能有不同的名称。部分供应商只支持配置一个API Token如Azure OpenAI
@@ -109,6 +113,12 @@ type ProviderConfig struct {
// @Title zh-CN 启用通义千问搜索服务
// @Description zh-CN 仅适用于通义千问服务,表示是否启用通义千问的互联网搜索功能。
qwenEnableSearch bool `required:"false" yaml:"qwenEnableSearch" json:"qwenEnableSearch"`
// @Title zh-CN Ollama Server IP/Domain
// @Description zh-CN 仅适用于 Ollama 服务。Ollama 服务器的主机地址。
ollamaServerHost string `required:"false" yaml:"ollamaServerHost" json:"ollamaServerHost"`
// @Title zh-CN Ollama Server Port
// @Description zh-CN 仅适用于 Ollama 服务。Ollama 服务器的端口号。
ollamaServerPort uint32 `required:"false" yaml:"ollamaServerPort" json:"ollamaServerPort"`
// @Title zh-CN 模型名称映射表
// @Description zh-CN 用于将请求中的模型名称映射为目标AI服务商支持的模型名称。支持通过“*”来配置全局映射
modelMapping map[string]string `required:"false" yaml:"modelMapping" json:"modelMapping"`
@@ -137,6 +147,8 @@ func (c *ProviderConfig) FromJson(json gjson.Result) {
c.qwenFileIds = append(c.qwenFileIds, fileId.String())
}
c.qwenEnableSearch = json.Get("qwenEnableSearch").Bool()
c.ollamaServerHost = json.Get("ollamaServerHost").String()
c.ollamaServerPort = uint32(json.Get("ollamaServerPort").Uint())
c.modelMapping = make(map[string]string)
for k, v := range json.Get("modelMapping").Map() {
c.modelMapping[k] = v.String()
@@ -170,6 +182,7 @@ func (c *ProviderConfig) Validate() error {
if c.typ == "" {
return errors.New("missing type in provider config")
}
initializer, has := providerInitializers[c.typ]
if !has {

View File

@@ -26,6 +26,8 @@ const (
qwenTopPMax = 0.999999
qwenDummySystemMessageContent = "You are a helpful assistant."
qwenLongModelName = "qwen-long"
)
type qwenProviderInitializer struct {
@@ -99,7 +101,7 @@ func (m *qwenProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, b
log.Errorf("failed to load context file: %v", err)
_ = util.SendResponse(500, util.MimeTypeTextPlain, fmt.Sprintf("failed to load context file: %v", err))
}
m.insertContextMessage(request, content)
m.insertContextMessage(request, content, false)
if err := replaceJsonRequestBody(request, log); err != nil {
_ = util.SendResponse(500, util.MimeTypeTextPlain, fmt.Sprintf("failed to replace request body: %v", err))
}
@@ -292,7 +294,7 @@ func (m *qwenProvider) buildQwenTextGenerationRequest(origRequest *chatCompletio
Tools: origRequest.Tools,
},
}
if len(m.config.qwenFileIds) != 0 {
if len(m.config.qwenFileIds) != 0 && origRequest.Model == qwenLongModelName {
builder := strings.Builder{}
for _, fileId := range m.config.qwenFileIds {
if builder.Len() != 0 {
@@ -301,7 +303,7 @@ func (m *qwenProvider) buildQwenTextGenerationRequest(origRequest *chatCompletio
builder.WriteString("fileid://")
builder.WriteString(fileId)
}
contextMessageId := m.insertContextMessage(request, builder.String())
contextMessageId := m.insertContextMessage(request, builder.String(), true)
if contextMessageId == 0 {
// The context message cannot come first. We need to add another dummy system message before it.
request.Input.Messages = append([]qwenMessage{{Role: roleSystem, Content: qwenDummySystemMessageContent}}, request.Input.Messages...)
@@ -417,12 +419,12 @@ func (m *qwenProvider) convertStreamEvent(ctx wrapper.HttpContext, responseBuild
return nil
}
func (m *qwenProvider) insertContextMessage(request *qwenTextGenRequest, content string) int {
func (m *qwenProvider) insertContextMessage(request *qwenTextGenRequest, content string, onlyOneSystemBeforeFile bool) int {
fileMessage := qwenMessage{
Role: roleSystem,
Content: content,
}
firstNonSystemMessageIndex := -1
var firstNonSystemMessageIndex int
messages := request.Input.Messages
if messages != nil {
for i, message := range request.Input.Messages {
@@ -432,12 +434,22 @@ func (m *qwenProvider) insertContextMessage(request *qwenTextGenRequest, content
}
}
}
if firstNonSystemMessageIndex == -1 {
if firstNonSystemMessageIndex == 0 {
request.Input.Messages = append([]qwenMessage{fileMessage}, request.Input.Messages...)
return 0
} else {
} else if !onlyOneSystemBeforeFile {
request.Input.Messages = append(request.Input.Messages[:firstNonSystemMessageIndex], append([]qwenMessage{fileMessage}, request.Input.Messages[firstNonSystemMessageIndex:]...)...)
return firstNonSystemMessageIndex
} else {
builder := strings.Builder{}
for _, message := range request.Input.Messages[:firstNonSystemMessageIndex] {
if builder.Len() != 0 {
builder.WriteString("\n")
}
builder.WriteString(message.Content)
}
request.Input.Messages = append([]qwenMessage{{Role: roleSystem, Content: builder.String()}, fileMessage}, request.Input.Messages[firstNonSystemMessageIndex:]...)
return 1
}
}

View File

@@ -37,14 +37,14 @@ func insertContextMessage(request *chatCompletionRequest, content string) {
Role: roleSystem,
Content: content,
}
firstNonSystemMessageIndex := -1
var firstNonSystemMessageIndex int
for i, message := range request.Messages {
if message.Role != roleSystem {
firstNonSystemMessageIndex = i
break
}
}
if firstNonSystemMessageIndex == -1 {
if firstNonSystemMessageIndex == 0 {
request.Messages = append([]chatMessage{fileMessage}, request.Messages...)
} else {
request.Messages = append(request.Messages[:firstNonSystemMessageIndex], append([]chatMessage{fileMessage}, request.Messages[firstNonSystemMessageIndex:]...)...)

View File

@@ -0,0 +1,84 @@
package provider
import (
"fmt"
"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy/util"
"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
)
const (
zhipuAiDomain = "open.bigmodel.cn"
zhipuAiChatCompletionPath = "/api/paas/v4/chat/completions"
)
type zhipuAiProviderInitializer struct{}
func (m *zhipuAiProviderInitializer) ValidateConfig(config ProviderConfig) error {
return nil
}
func (m *zhipuAiProviderInitializer) CreateProvider(config ProviderConfig) (Provider, error) {
return &zhipuAiProvider{
config: config,
contextCache: createContextCache(&config),
}, nil
}
type zhipuAiProvider struct {
config ProviderConfig
contextCache *contextCache
}
func (m *zhipuAiProvider) GetProviderType() string {
return providerTypeZhipuAi
}
func (m *zhipuAiProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) (types.Action, error) {
if apiName != ApiNameChatCompletion {
return types.ActionContinue, errUnsupportedApiName
}
_ = util.OverwriteRequestPath(zhipuAiChatCompletionPath)
_ = util.OverwriteRequestHost(zhipuAiDomain)
_ = proxywasm.ReplaceHttpRequestHeader("Authorization", "Bearer "+m.config.GetRandomToken())
if m.contextCache == nil {
ctx.DontReadRequestBody()
} else {
_ = proxywasm.RemoveHttpRequestHeader("Content-Length")
}
return types.ActionContinue, nil
}
func (m *zhipuAiProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
if apiName != ApiNameChatCompletion {
return types.ActionContinue, errUnsupportedApiName
}
if m.contextCache == nil {
return types.ActionContinue, nil
}
request := &chatCompletionRequest{}
if err := decodeChatCompletionRequest(body, request); err != nil {
return types.ActionContinue, err
}
err := m.contextCache.GetContent(func(content string, err error) {
defer func() {
_ = proxywasm.ResumeHttpRequest()
}()
if err != nil {
log.Errorf("failed to load context file: %v", err)
_ = util.SendResponse(500, util.MimeTypeTextPlain, fmt.Sprintf("failed to load context file: %v", err))
}
insertContextMessage(request, content)
if err := replaceJsonRequestBody(request, log); err != nil {
_ = util.SendResponse(500, util.MimeTypeTextPlain, fmt.Sprintf("failed to replace request body: %v", err))
}
}, log)
if err == nil {
return types.ActionPause, nil
}
return types.ActionContinue, err
}

View File

@@ -151,3 +151,20 @@ func (c ConsulCluster) HostName() string {
}
return c.ServiceName
}
type FQDNCluster struct {
FQDN string
Host string
Port int64
}
func (c FQDNCluster) ClusterName() string {
return fmt.Sprintf("outbound|%d||%s", c.Port, c.FQDN)
}
func (c FQDNCluster) HostName() string {
if c.Host != "" {
return c.Host
}
return c.FQDN
}

View File

@@ -142,7 +142,9 @@ func RedisCall(cluster Cluster, respQuery []byte, callback RedisResponseCallback
}
}
}
callback(responseValue)
if callback != nil {
callback(responseValue)
}
})
if err != nil {
proxywasm.LogCriticalf("redis call failed, request-id: %s, error: %v", requestID, err)

View File

@@ -22,6 +22,7 @@ Higress provides make target to run ingress api conformance tests and wasmplugin
+ Build ALL GO WasmPlugins for testing: `make higress-wasmplugin-test`
+ Only build one GO WasmPlugin for testing: `PLUGIN_NAME=request-block make higress-wasmplugin-test`
+ Only build one CPP WasmPlugin for testing: `PLUGIN_TYPE=CPP PLUGIN_NAME=key_auth make higress-wasmplugin-test`
+ Only run the specific test, splited by comma `TEST_SHORTNAME=WasmPluginsIPRestrictionAllow,WasmPluginsIPRestrictionDeny make higress-wasmplugin-test`
It can be divided into below steps:

View File

@@ -22,6 +22,7 @@ Higress 提供了运行 Ingress API 一致性测试和 wasmplugin 测试的 make
+ 为测试构建所有 GO WasmPlugins: `make higress-wasmplugin-test`
+ 仅为一个 GO WasmPlugin 构建测试: `PLUGIN_NAME=request-block make higress-wasmplugin-test`
+ 仅为一个 CPP WasmPlugin 构建测试: `PLUGIN_TYPE=CPP PLUGIN_NAME=key_auth make higress-wasmplugin-test`
+ 仅运行指定测试,用逗号分隔 `TEST_SHORTNAME=WasmPluginsIPRestrictionAllow,WasmPluginsIPRestrictionDeny make higress-wasmplugin-test`
可以分为以下步骤:

View File

@@ -64,7 +64,7 @@ spec:
# image: higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/echoserver:v20221109-7ee2f3e
# From https://github.com/Uncle-Justice/echo-server
image: 873292889/echo-server:1.3.0
image: higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/echo-server:1.3.0
env:
- name: POD_NAME
valueFrom:
@@ -113,7 +113,7 @@ spec:
# image: higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/echoserver:v20221109-7ee2f3e
# From https://github.com/Uncle-Justice/echo-server
image: 873292889/echo-server:1.3.0
image: higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/echo-server:1.3.0
env:
- name: POD_NAME
valueFrom:
@@ -162,7 +162,7 @@ spec:
# image: higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/echoserver:v20221109-7ee2f3e
# From https://github.com/Uncle-Justice/echo-server
image: 873292889/echo-server:1.3.0
image: higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/echo-server:1.3.0
env:
- name: POD_NAME
valueFrom:
@@ -209,7 +209,7 @@ spec:
containers:
- name: infra-backend-echo-body-v1
# FROM https://github.com/higress-group/echo-body
image: 873292889/echo-body:1.0.0
image: higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/echo-server:1.3.0
env:
- name: POD_NAME
valueFrom:
@@ -311,7 +311,7 @@ spec:
# image: higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/echoserver:v20221109-7ee2f3e
# From https://github.com/Uncle-Justice/echo-server
image: 873292889/echo-server:1.3.0
image: higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/echo-server:1.3.0
env:
- name: POD_NAME
valueFrom:
@@ -360,7 +360,7 @@ spec:
# image: higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/echoserver:v20221109-7ee2f3e
# From https://github.com/Uncle-Justice/echo-server
image: 873292889/echo-server:1.3.0
image: higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/echo-server:1.3.0
env:
- name: POD_NAME
valueFrom:
@@ -416,7 +416,7 @@ spec:
# image: higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/echoserver:v20221109-7ee2f3e
# From https://github.com/Uncle-Justice/echo-server
image: 873292889/echo-server:1.3.0
image: higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/echo-server:1.3.0
env:
- name: POD_NAME
valueFrom:

View File

@@ -23,6 +23,7 @@ var (
CleanupBaseResources = flag.Bool("cleanup-base-resources", true, "Whether to cleanup base test resources after the run")
SupportedFeatures = flag.String("supported-features", "", "Supported features included in conformance tests suites")
ExemptFeatures = flag.String("exempt-features", "", "Exempt Features excluded from conformance tests suites")
ExecuteTests = flag.String("execute-tests", "", "Execute the specific conformance tests")
IsWasmPluginTest = flag.Bool("isWasmPluginTest", false, "Determine if run wasm plugin conformance test")
WasmPluginType = flag.String("wasmPluginType", "GO", "Define wasm plugin type, currently supports GO, CPP")
WasmPluginName = flag.String("wasmPluginName", "", "Define wasm plugin name")

View File

@@ -15,6 +15,7 @@ package suite
import (
"fmt"
"strings"
"testing"
"github.com/alibaba/higress/test/e2e/conformance/utils/config"
@@ -43,6 +44,7 @@ type ConformanceTestSuite struct {
BaseManifests []string
Applier kubernetes.Applier
SkipTests sets.Set
ExecuteTests sets.Set
TimeoutConfig config.TimeoutConfig
SupportedFeatures sets.Set
}
@@ -51,6 +53,7 @@ type ConformanceTestSuite struct {
type Options struct {
SupportedFeatures sets.Set
ExemptFeatures sets.Set
ExecuteTests string
EnableAllSupportedFeatures bool
Client client.Client
@@ -116,6 +119,7 @@ func New(s Options) *ConformanceTestSuite {
BaseManifests: s.BaseManifests,
SupportedFeatures: s.SupportedFeatures,
GatewayAddress: s.GatewayAddress,
ExecuteTests: sets.NewSet(),
Applier: kubernetes.Applier{
NamespaceLabels: s.NamespaceLabels,
},
@@ -134,6 +138,13 @@ func New(s Options) *ConformanceTestSuite {
}
}
testNames := strings.Split(s.ExecuteTests, ",")
for i := range testNames {
if testNames[i] != "" {
suite.ExecuteTests = suite.ExecuteTests.Insert(testNames[i])
}
}
return suite
}
@@ -232,6 +243,10 @@ func (test *ConformanceTest) Run(t *testing.T, suite *ConformanceTestSuite) {
t.Skipf("🏊🏼 Skipping %s: test explicitly skipped", test.ShortName)
}
if len(suite.ExecuteTests) > 0 && !suite.ExecuteTests.Contains(test.ShortName) {
t.Skipf("🏊🏼 Skipping %s: test explicitly skipped", test.ShortName)
}
t.Logf("🔥 Running Conformance Test: %s", test.ShortName)
for _, manifestLocation := range test.PreDeleteRs {

View File

@@ -50,6 +50,7 @@ func TestHigressConformanceTests(t *testing.T) {
WasmPluginName: *flags.WasmPluginName,
WasmPluginType: *flags.WasmPluginType,
},
ExecuteTests: *flags.ExecuteTests,
GatewayAddress: "localhost",
EnableAllSupportedFeatures: true,
IsEnvoyConfigTest: *flags.IsEnvoyConfigTest,