Compare commits

..

24 Commits

Author SHA1 Message Date
007gzs
102596360a rustup target add wasm32-wasip1 2025-02-27 13:28:20 +08:00
007gzs
7354d80263 fix prebuild 2025-02-27 11:42:34 +08:00
007gzs
3646502248 fix clippy 2025-02-27 11:12:51 +08:00
007gzs
8be591d052 fix rust_wasm_build 2025-02-27 10:30:34 +08:00
Kent Dong
45fdd95a9c feat: Support pushing multi-arch images to a custom image registry (#1815) 2025-02-26 21:15:53 +08:00
Se7en
d3afe345ad fix: remove last failed apiToken from retry apiToken list (#1802) 2025-02-26 21:11:51 +08:00
韩贤涛
90ca903d2e feat: ext-auth plugin: Blacklist and whitelist modes support HTTP request method matching (#1798) 2025-02-26 20:54:52 +08:00
007gzs
2d8a8f26da Ai data masking msg window (#1775) 2025-02-26 20:48:37 +08:00
Se7en
9ea2410388 feat: update ai-token-ratelimit documentation by removing ai-statistics plugin (#1767) 2025-02-26 20:47:37 +08:00
littlejian
9e1792c245 add notes to gateway.rollingMaxUnavailable (#1819) 2025-02-26 20:46:53 +08:00
rinfx
3eda7def89 ai-search support quark (#1811) 2025-02-26 18:42:22 +08:00
澄潭
1787553294 set include_usage by default for all model providers (#1818) 2025-02-26 16:49:16 +08:00
澄潭
f6c48415d1 Add database configuration for plugins that use Redis. (#1814) 2025-02-26 10:52:54 +08:00
MARATRIX Li
e27d3d0971 fix(typo): use the correct bing name for ai-search. (#1807)
Signed-off-by: maratrixx <maratrix@163.com>
2025-02-25 13:37:32 +08:00
Kent Dong
49617c7a98 feat: Unify the SSE processing logic (#1800) 2025-02-25 11:00:18 +08:00
澄潭
53a015d8fe Update arxiv.md 2025-02-24 11:27:55 +08:00
澄潭
e711e9f997 Update full.md 2025-02-24 11:27:33 +08:00
澄潭
8530742472 Update README_EN.md 2025-02-24 11:16:09 +08:00
澄潭
c0c1f5113a Update README.md 2025-02-24 11:15:55 +08:00
澄潭
2e6ddd7e35 Add ai search plugin (#1804) 2025-02-24 11:14:47 +08:00
Kent Dong
2328e19c9d fix: Fix a bug in openaiCustomUrl support (#1790) 2025-02-22 12:12:49 +08:00
Kent Dong
fabc22f218 feat: Support transforming reasoning_content returned by Qwen to OpenAI contract (#1791) 2025-02-21 17:32:02 +08:00
Yiiong
2986e1911d feat: add ollama embedding to ai-cache (#1794) 2025-02-21 15:21:49 +08:00
澄潭
a566f7257d update helm docs (#1782) 2025-02-19 17:48:20 +08:00
71 changed files with 4178 additions and 842 deletions

View File

@@ -133,6 +133,11 @@ jobs:
command="
set -e
cd /workspace/plugins/wasm-rust/extensions/${PLUGIN_NAME}
if [ -f ./.prebuild ]; then
echo 'Found .prebuild file, sourcing it...'
. ./.prebuild
fi
rustup target add wasm32-wasip1
cargo build --target wasm32-wasip1 --release
cp target/wasm32-wasip1/release/*.wasm plugin.wasm
tar czvf plugin.tar.gz plugin.wasm

View File

@@ -1,229 +1,258 @@
name: Build Docker Images and Push to Image Registry
on:
push:
tags:
- "v*.*.*"
workflow_dispatch: ~
jobs:
build-controller-image:
runs-on: ubuntu-latest
environment:
name: image-registry-controller
env:
CONTROLLER_IMAGE_REGISTRY: ${{ vars.IMAGE_REGISTRY || 'higress-registry.cn-hangzhou.cr.aliyuncs.com' }}
CONTROLLER_IMAGE_NAME: ${{ vars.CONTROLLER_IMAGE_NAME || 'higress/higress' }}
steps:
- name: "Checkout ${{ github.ref }}"
uses: actions/checkout@v4
with:
fetch-depth: 1
- name: Free Up GitHub Actions Ubuntu Runner Disk Space 🔧
uses: jlumbroso/free-disk-space@main
with:
tool-cache: false
android: true
dotnet: true
haskell: true
large-packages: true
swap-storage: true
- name: "Setup Go"
uses: actions/setup-go@v5
with:
go-version: 1.21.5
- name: Setup Golang Caches
uses: actions/cache@v4
with:
path: |-
~/.cache/go-build
~/go/pkg/mod
key: ${{ runner.os }}-go-${{ github.run_id }}
restore-keys: ${{ runner.os }}-go
- name: Calculate Docker metadata
id: docker-meta
uses: docker/metadata-action@v5
with:
images: |
${{ env.CONTROLLER_IMAGE_REGISTRY }}/${{ env.CONTROLLER_IMAGE_NAME }}
tags: |
type=sha
type=ref,event=tag
type=semver,pattern={{version}}
type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', 'main') }}
- name: Login to Docker Registry
uses: docker/login-action@v3
with:
registry: ${{ env.CONTROLLER_IMAGE_REGISTRY }}
username: ${{ secrets.REGISTRY_USERNAME }}
password: ${{ secrets.REGISTRY_PASSWORD }}
- name: Build Docker Image and Push
run: |
GOPROXY="https://proxy.golang.org,direct" make docker-buildx-push
BUILT_IMAGE="higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/higress"
readarray -t IMAGES <<< "${{ steps.docker-meta.outputs.tags }}"
for image in ${IMAGES[@]}; do
echo "Image: $image"
docker buildx imagetools create $BUILT_IMAGE:$GITHUB_SHA --tag $image
done
build-pilot-image:
runs-on: ubuntu-latest
environment:
name: image-registry-pilot
env:
PILOT_IMAGE_REGISTRY: ${{ vars.IMAGE_REGISTRY || 'higress-registry.cn-hangzhou.cr.aliyuncs.com' }}
PILOT_IMAGE_NAME: ${{ vars.PILOT_IMAGE_NAME || 'higress/pilot' }}
steps:
- name: "Checkout ${{ github.ref }}"
uses: actions/checkout@v4
with:
fetch-depth: 1
- name: Free Up GitHub Actions Ubuntu Runner Disk Space 🔧
uses: jlumbroso/free-disk-space@main
with:
tool-cache: false
android: true
dotnet: true
haskell: true
large-packages: true
swap-storage: true
- name: "Setup Go"
uses: actions/setup-go@v5
with:
go-version: 1.21.5
- name: Setup Golang Caches
uses: actions/cache@v4
with:
path: |-
~/.cache/go-build
~/go/pkg/mod
key: ${{ runner.os }}-go-${{ github.run_id }}
restore-keys: ${{ runner.os }}-go
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v1
- name: Cache Docker layers
uses: actions/cache@v2
with:
path: /tmp/.buildx-cache
key: ${{ runner.os }}-buildx-${{ github.sha }}
restore-keys: |
${{ runner.os }}-buildx-
- name: Calculate Docker metadata
id: docker-meta
uses: docker/metadata-action@v5
with:
images: |
${{ env.PILOT_IMAGE_REGISTRY }}/${{ env.PILOT_IMAGE_NAME }}
tags: |
type=sha
type=ref,event=tag
type=semver,pattern={{version}}
type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', 'main') }}
- name: Login to Docker Registry
uses: docker/login-action@v3
with:
registry: ${{ env.PILOT_IMAGE_REGISTRY }}
username: ${{ secrets.REGISTRY_USERNAME }}
password: ${{ secrets.REGISTRY_PASSWORD }}
- name: Build Pilot-Discovery Image and Push
run: |
GOPROXY="https://proxy.golang.org,direct" make build-istio
BUILT_IMAGE="higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/pilot"
readarray -t IMAGES <<< "${{ steps.docker-meta.outputs.tags }}"
for image in ${IMAGES[@]}; do
echo "Image: $image"
docker buildx imagetools create $BUILT_IMAGE:$GITHUB_SHA --tag $image
done
build-gateway-image:
runs-on: ubuntu-latest
environment:
name: image-registry-pilot
env:
GATEWAY_IMAGE_REGISTRY: ${{ vars.IMAGE_REGISTRY || 'higress-registry.cn-hangzhou.cr.aliyuncs.com' }}
GATEWAY_IMAGE_NAME: ${{ vars.GATEWAY_IMAGE_NAME || 'higress/gateway' }}
steps:
- name: "Checkout ${{ github.ref }}"
uses: actions/checkout@v4
with:
fetch-depth: 1
- name: Free Up GitHub Actions Ubuntu Runner Disk Space 🔧
uses: jlumbroso/free-disk-space@main
with:
tool-cache: false
android: true
dotnet: true
haskell: true
large-packages: true
swap-storage: true
- name: "Setup Go"
uses: actions/setup-go@v5
with:
go-version: 1.21.5
- name: Setup Golang Caches
uses: actions/cache@v4
with:
path: |-
~/.cache/go-build
~/go/pkg/mod
key: ${{ runner.os }}-go-${{ github.run_id }}
restore-keys: ${{ runner.os }}-go
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v1
- name: Cache Docker layers
uses: actions/cache@v2
with:
path: /tmp/.buildx-cache
key: ${{ runner.os }}-buildx-${{ github.sha }}
restore-keys: |
${{ runner.os }}-buildx-
- name: Calculate Docker metadata
id: docker-meta
uses: docker/metadata-action@v5
with:
images: |
${{ env.GATEWAY_IMAGE_REGISTRY }}/${{ env.GATEWAY_IMAGE_NAME }}
tags: |
type=sha
type=ref,event=tag
type=semver,pattern={{version}}
type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', 'main') }}
- name: Login to Docker Registry
uses: docker/login-action@v3
with:
registry: ${{ env.GATEWAY_IMAGE_REGISTRY }}
username: ${{ secrets.REGISTRY_USERNAME }}
password: ${{ secrets.REGISTRY_PASSWORD }}
- name: Build Gateway Image and Push
run: |
GOPROXY="https://proxy.golang.org,direct" make build-gateway
BUILT_IMAGE="higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/proxyv2"
readarray -t IMAGES <<< "${{ steps.docker-meta.outputs.tags }}"
for image in ${IMAGES[@]}; do
echo "Image: $image"
docker buildx imagetools create $BUILT_IMAGE:$GITHUB_SHA --tag $image
done
name: Build Docker Images and Push to Image Registry
on:
push:
tags:
- "v*.*.*"
workflow_dispatch: ~
jobs:
build-controller-image:
runs-on: ubuntu-latest
environment:
name: image-registry-controller
env:
CONTROLLER_IMAGE_REGISTRY: ${{ vars.IMAGE_REGISTRY || 'higress-registry.cn-hangzhou.cr.aliyuncs.com' }}
CONTROLLER_IMAGE_NAME: ${{ vars.CONTROLLER_IMAGE_NAME || 'higress/higress' }}
steps:
- name: "Checkout ${{ github.ref }}"
uses: actions/checkout@v4
with:
fetch-depth: 1
- name: Free Up GitHub Actions Ubuntu Runner Disk Space 🔧
uses: jlumbroso/free-disk-space@main
with:
tool-cache: false
android: true
dotnet: true
haskell: true
large-packages: true
swap-storage: true
- name: "Setup Go"
uses: actions/setup-go@v5
with:
go-version: 1.21.5
- name: Setup Golang Caches
uses: actions/cache@v4
with:
path: |-
~/.cache/go-build
~/go/pkg/mod
key: ${{ runner.os }}-go-${{ github.run_id }}
restore-keys: ${{ runner.os }}-go
- name: Calculate Docker metadata
id: docker-meta
uses: docker/metadata-action@v5
with:
images: |
${{ env.CONTROLLER_IMAGE_REGISTRY }}/${{ env.CONTROLLER_IMAGE_NAME }}
tags: |
type=sha
type=ref,event=tag
type=semver,pattern={{version}}
type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', 'main') }}
- name: Login to Docker Registry
uses: docker/login-action@v3
with:
registry: ${{ env.CONTROLLER_IMAGE_REGISTRY }}
username: ${{ secrets.REGISTRY_USERNAME }}
password: ${{ secrets.REGISTRY_PASSWORD }}
- name: Build Docker Image and Push
run: |
BUILT_IMAGE=""
readarray -t IMAGES <<< "${{ steps.docker-meta.outputs.tags }}"
for image in ${IMAGES[@]}; do
echo "Image: $image"
if [ "$BUILT_IMAGE" == "" ]; then
GOPROXY="https://proxy.golang.org,direct" IMG_URL="$image" make docker-buildx-push
BUILT_IMAGE="$image"
else
docker buildx imagetools create $BUILT_IMAGE --tag $image
fi
done
build-pilot-image:
runs-on: ubuntu-latest
environment:
name: image-registry-pilot
env:
PILOT_IMAGE_REGISTRY: ${{ vars.IMAGE_REGISTRY || 'higress-registry.cn-hangzhou.cr.aliyuncs.com' }}
PILOT_IMAGE_NAME: ${{ vars.PILOT_IMAGE_NAME || 'higress/pilot' }}
steps:
- name: "Checkout ${{ github.ref }}"
uses: actions/checkout@v4
with:
fetch-depth: 1
- name: Free Up GitHub Actions Ubuntu Runner Disk Space 🔧
uses: jlumbroso/free-disk-space@main
with:
tool-cache: false
android: true
dotnet: true
haskell: true
large-packages: true
swap-storage: true
- name: "Setup Go"
uses: actions/setup-go@v5
with:
go-version: 1.21.5
- name: Setup Golang Caches
uses: actions/cache@v4
with:
path: |-
~/.cache/go-build
~/go/pkg/mod
key: ${{ runner.os }}-go-${{ github.run_id }}
restore-keys: ${{ runner.os }}-go
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
with:
image: tonistiigi/binfmt:qemu-v7.0.0
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Cache Docker layers
uses: actions/cache@v2
with:
path: /tmp/.buildx-cache
key: ${{ runner.os }}-buildx-${{ github.sha }}
restore-keys: |
${{ runner.os }}-buildx-
- name: Calculate Docker metadata
id: docker-meta
uses: docker/metadata-action@v5
with:
images: |
${{ env.PILOT_IMAGE_REGISTRY }}/${{ env.PILOT_IMAGE_NAME }}
tags: |
type=sha
type=ref,event=tag
type=semver,pattern={{version}}
type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', 'main') }}
- name: Login to Docker Registry
uses: docker/login-action@v3
with:
registry: ${{ env.PILOT_IMAGE_REGISTRY }}
username: ${{ secrets.REGISTRY_USERNAME }}
password: ${{ secrets.REGISTRY_PASSWORD }}
- name: Build Pilot-Discovery Image and Push
run: |
BUILT_IMAGE=""
readarray -t IMAGES <<< "${{ steps.docker-meta.outputs.tags }}"
for image in ${IMAGES[@]}; do
echo "Image: $image"
if [ "$BUILT_IMAGE" == "" ]; then
TAG=${image#*:}
HUB=${image%:*}
HUB=${HUB%/*}
BUILT_IMAGE="$HUB/pilot:$TAG"
GOPROXY="https://proxy.golang.org,direct" IMG_URL="$BUILT_IMAGE" make build-istio
fi
if [ "$BUILT_IMAGE" != "$image" ]; then
docker buildx imagetools create $BUILT_IMAGE --tag $image
fi
done
build-gateway-image:
runs-on: ubuntu-latest
environment:
name: image-registry-gateway
env:
GATEWAY_IMAGE_REGISTRY: ${{ vars.IMAGE_REGISTRY || 'higress-registry.cn-hangzhou.cr.aliyuncs.com' }}
GATEWAY_IMAGE_NAME: ${{ vars.GATEWAY_IMAGE_NAME || 'higress/gateway' }}
steps:
- name: "Checkout ${{ github.ref }}"
uses: actions/checkout@v4
with:
fetch-depth: 1
- name: Free Up GitHub Actions Ubuntu Runner Disk Space 🔧
uses: jlumbroso/free-disk-space@main
with:
tool-cache: false
android: true
dotnet: true
haskell: true
large-packages: true
swap-storage: true
- name: "Setup Go"
uses: actions/setup-go@v5
with:
go-version: 1.21.5
- name: Setup Golang Caches
uses: actions/cache@v4
with:
path: |-
~/.cache/go-build
~/go/pkg/mod
key: ${{ runner.os }}-go-${{ github.run_id }}
restore-keys: ${{ runner.os }}-go
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
with:
image: tonistiigi/binfmt:qemu-v7.0.0
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Cache Docker layers
uses: actions/cache@v2
with:
path: /tmp/.buildx-cache
key: ${{ runner.os }}-buildx-${{ github.sha }}
restore-keys: |
${{ runner.os }}-buildx-
- name: Calculate Docker metadata
id: docker-meta
uses: docker/metadata-action@v5
with:
images: |
${{ env.GATEWAY_IMAGE_REGISTRY }}/${{ env.GATEWAY_IMAGE_NAME }}
tags: |
type=sha
type=ref,event=tag
type=semver,pattern={{version}}
type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', 'main') }}
- name: Login to Docker Registry
uses: docker/login-action@v3
with:
registry: ${{ env.GATEWAY_IMAGE_REGISTRY }}
username: ${{ secrets.REGISTRY_USERNAME }}
password: ${{ secrets.REGISTRY_PASSWORD }}
- name: Build Gateway Image and Push
run: |
BUILT_IMAGE=""
readarray -t IMAGES <<< "${{ steps.docker-meta.outputs.tags }}"
for image in ${IMAGES[@]}; do
echo "Image: $image"
if [ "$BUILT_IMAGE" == "" ]; then
TAG=${image#*:}
HUB=${image%:*}
HUB=${HUB%/*}
BUILT_IMAGE="$HUB/proxyv2:$TAG"
GOPROXY="https://proxy.golang.org,direct" IMG_URL="$BUILT_IMAGE" make build-gateway
fi
if [ "$BUILT_IMAGE" != "$image" ]; then
docker buildx imagetools create $BUILT_IMAGE --tag $image
fi
done

View File

@@ -162,13 +162,13 @@ buildx-prepare:
build-gateway: prebuild buildx-prepare
USE_REAL_USER=1 TARGET_ARCH=amd64 DOCKER_TARGETS="docker.proxyv2" ./tools/hack/build-istio-image.sh init
USE_REAL_USER=1 TARGET_ARCH=arm64 DOCKER_TARGETS="docker.proxyv2" ./tools/hack/build-istio-image.sh init
DOCKER_TARGETS="docker.proxyv2" ./tools/hack/build-istio-image.sh docker.buildx
DOCKER_TARGETS="docker.proxyv2" IMG_URL="${IMG_URL}" ./tools/hack/build-istio-image.sh docker.buildx
build-gateway-local: prebuild
TARGET_ARCH=${TARGET_ARCH} DOCKER_TARGETS="docker.proxyv2" ./tools/hack/build-istio-image.sh docker
build-istio: prebuild buildx-prepare
DOCKER_TARGETS="docker.pilot" ./tools/hack/build-istio-image.sh docker.buildx
DOCKER_TARGETS="docker.pilot" IMG_URL="${IMG_URL}" ./tools/hack/build-istio-image.sh docker.buildx
build-istio-local: prebuild
TARGET_ARCH=${TARGET_ARCH} DOCKER_TARGETS="docker.pilot" ./tools/hack/build-istio-image.sh docker

View File

@@ -35,6 +35,8 @@ DOCKER_ALL_VARIANTS ?= debug distroless
INCLUDE_UNTAGGED_DEFAULT ?= false
DEFAULT_DISTRIBUTION=debug
HIGRESS_DOCKER_BUILDX_RULE ?= $(foreach VARIANT,$(DOCKER_BUILD_VARIANTS), time (mkdir -p $(HIGRESS_DOCKER_BUILD_TOP)/$@ && TARGET_ARCH=$(TARGET_ARCH) ./docker/docker-copy.sh $^ $(HIGRESS_DOCKER_BUILD_TOP)/$@ && cd $(HIGRESS_DOCKER_BUILD_TOP)/$@ $(BUILD_PRE) && docker buildx create --name higress --node higress0 --platform linux/amd64,linux/arm64 --use && docker buildx build --no-cache --platform linux/amd64,linux/arm64 $(BUILD_ARGS) --build-arg BASE_DISTRIBUTION=$(call normalize-tag,$(VARIANT)) -t $(HUB)/higress:$(TAG)$(call variant-tag,$(VARIANT)) -f Dockerfile.higress . --push ); )
HIGRESS_DOCKER_RULE ?= $(foreach VARIANT,$(DOCKER_BUILD_VARIANTS), time (mkdir -p $(HIGRESS_DOCKER_BUILD_TOP)/$@ && TARGET_ARCH=$(TARGET_ARCH) ./docker/docker-copy.sh $^ $(HIGRESS_DOCKER_BUILD_TOP)/$@ && cd $(HIGRESS_DOCKER_BUILD_TOP)/$@ $(BUILD_PRE) && docker build $(BUILD_ARGS) --build-arg BASE_DISTRIBUTION=$(call normalize-tag,$(VARIANT)) -t $(HUB)/higress:$(TAG)$(call variant-tag,$(VARIANT)) -f Dockerfile.higress . ); )
IMG ?= higress
IMG_URL ?= $(HUB)/$(IMG):$(TAG)
HIGRESS_DOCKER_BUILDX_RULE ?= $(foreach VARIANT,$(DOCKER_BUILD_VARIANTS), time (mkdir -p $(HIGRESS_DOCKER_BUILD_TOP)/$@ && TARGET_ARCH=$(TARGET_ARCH) ./docker/docker-copy.sh $^ $(HIGRESS_DOCKER_BUILD_TOP)/$@ && cd $(HIGRESS_DOCKER_BUILD_TOP)/$@ $(BUILD_PRE) && docker buildx create --name higress --node higress0 --platform linux/amd64,linux/arm64 --use && docker buildx build --no-cache --platform linux/amd64,linux/arm64 $(BUILD_ARGS) --build-arg BASE_DISTRIBUTION=$(call normalize-tag,$(VARIANT)) -t $(IMG_URL)$(call variant-tag,$(VARIANT)) -f Dockerfile.higress . --push ); )
HIGRESS_DOCKER_RULE ?= $(foreach VARIANT,$(DOCKER_BUILD_VARIANTS), time (mkdir -p $(HIGRESS_DOCKER_BUILD_TOP)/$@ && TARGET_ARCH=$(TARGET_ARCH) ./docker/docker-copy.sh $^ $(HIGRESS_DOCKER_BUILD_TOP)/$@ && cd $(HIGRESS_DOCKER_BUILD_TOP)/$@ $(BUILD_PRE) && docker build $(BUILD_ARGS) --build-arg BASE_DISTRIBUTION=$(call normalize-tag,$(VARIANT)) -t $(IMG_URL)$(call variant-tag,$(VARIANT)) -f Dockerfile.higress . ); )

View File

@@ -488,6 +488,7 @@ gateway:
externalTrafficPolicy: ""
rollingMaxSurge: 100%
# -- If global.local is true, the default value is 100%, otherwise it is 25%
rollingMaxUnavailable: 25%
resources:

View File

@@ -128,7 +128,7 @@ The command removes all the Kubernetes components associated with the chart and
| gateway.resources.requests.memory | string | `"2048Mi"` | |
| gateway.revision | string | `""` | revision declares which revision this gateway is a part of |
| gateway.rollingMaxSurge | string | `"100%"` | |
| gateway.rollingMaxUnavailable | string | `"25%"` | |
| gateway.rollingMaxUnavailable | string | `"25%"` | If global.local is true, the default value is 100%, otherwise it is 25% |
| gateway.securityContext | string | `nil` | Define the security context for the pod. If unset, this will be automatically set to the minimum privileges required to bind to port 80 and 443. On Kubernetes 1.22+, this only requires the `net.ipv4.ip_unprivileged_port_start` sysctl. |
| gateway.service.annotations | object | `{}` | |
| gateway.service.externalTrafficPolicy | string | `""` | |
@@ -162,7 +162,7 @@ The command removes all the Kubernetes components associated with the chart and
| global.enableH3 | bool | `false` | |
| global.enableIPv6 | bool | `false` | |
| global.enableIstioAPI | bool | `true` | If true, Higress Controller will monitor istio resources as well |
| global.enableLDSCache | bool | `true` | |
| global.enableLDSCache | bool | `false` | |
| global.enableProxyProtocol | bool | `false` | |
| global.enablePushAllMCPClusters | bool | `true` | |
| global.enableSRDS | bool | `true` | |

View File

@@ -86,7 +86,8 @@ LLM 结果缓存插件,默认配置方式可以直接用于 openai 协议的
| cache.password | string | optional | "" | 缓存服务密码 |
| cache.timeout | uint32 | optional | 10000 | 缓存服务的超时时间单位为毫秒。默认值是10000即10秒 |
| cache.cacheTTL | int | optional | 0 | 缓存过期时间,单位为秒。默认值是 0即 永不过期|
| cacheKeyPrefix | string | optional | "higress-ai-cache:" | 缓存 Key 的前缀,默认值为 "higress-ai-cache:" |
| cache.cacheKeyPrefix | string | optional | "higress-ai-cache:" | 缓存 Key 的前缀,默认值为 "higress-ai-cache:" |
| cache.database | int | optional | 0 | 使用的数据库id仅限redis例如配置为1对应`SELECT 1` |
## 其他配置
@@ -168,6 +169,7 @@ redis:
serviceName: my_redis.dns
servicePort: 6379
timeout: 100
database: 1
```
## 进阶用法

View File

@@ -15,26 +15,29 @@ Plugin Execution Phase: `Authentication Phase`
Plugin Execution Priority: `10`
## Configuration Description
| Name | Type | Requirement | Default | Description |
| -------- | -------- | -------- | -------- | -------- |
| cacheKeyFrom.requestBody | string | optional | "messages.@reverse.0.content" | Extracts a string from the request Body based on [GJSON PATH](https://github.com/tidwall/gjson/blob/master/SYNTAX.md) syntax |
| cacheValueFrom.responseBody | string | optional | "choices.0.message.content" | Extracts a string from the response Body based on [GJSON PATH](https://github.com/tidwall/gjson/blob/master/SYNTAX.md) syntax |
| Name | Type | Requirement | Default | Description |
| -------- | -------- | -------- | -------- | -------- |
| cacheKeyFrom.requestBody | string | optional | "messages.@reverse.0.content" | Extracts a string from the request Body based on [GJSON PATH](https://github.com/tidwall/gjson/blob/master/SYNTAX.md) syntax |
| cacheValueFrom.responseBody | string | optional | "choices.0.message.content" | Extracts a string from the response Body based on [GJSON PATH](https://github.com/tidwall/gjson/blob/master/SYNTAX.md) syntax |
| cacheStreamValueFrom.responseBody | string | optional | "choices.0.delta.content" | Extracts a string from the streaming response Body based on [GJSON PATH](https://github.com/tidwall/gjson/blob/master/SYNTAX.md) syntax |
| cacheKeyPrefix | string | optional | "higress-ai-cache:" | Prefix for the Redis cache key |
| cacheTTL | integer | optional | 0 | Cache expiration time in seconds, default value is 0, which means never expire |
| redis.serviceName | string | required | - | The complete FQDN name of the Redis service, including the service type, e.g., my-redis.dns, redis.my-ns.svc.cluster.local |
| redis.servicePort | integer | optional | 6379 | Redis service port |
| redis.timeout | integer | optional | 1000 | Timeout for requests to Redis, in milliseconds |
| redis.username | string | optional | - | Username for logging into Redis |
| redis.password | string | optional | - | Password for logging into Redis |
| returnResponseTemplate | string | optional | `{"id":"from-cache","choices":[%s],"model":"gpt-4o","object":"chat.completion","usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}` | Template for returning HTTP response, with %s marking the part to be replaced by cache value |
| returnStreamResponseTemplate | string | optional | `data:{"id":"from-cache","choices":[{"index":0,"delta":{"role":"assistant","content":"%s"},"finish_reason":"stop"}],"model":"gpt-4o","object":"chat.completion","usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}\n\ndata:[DONE]\n\n` | Template for returning streaming HTTP response, with %s marking the part to be replaced by cache value |
| cacheKeyPrefix | string | optional | "higress-ai-cache:" | Prefix for the Redis cache key |
| cacheTTL | integer | optional | 0 | Cache expiration time in seconds, default value is 0, which means never expire |
| redis.serviceName | string | required | - | The complete FQDN name of the Redis service, including the service type, e.g., my-redis.dns, redis.my-ns.svc.cluster.local |
| redis.servicePort | integer | optional | 6379 | Redis service port |
| redis.timeout | integer | optional | 1000 | Timeout for requests to Redis, in milliseconds |
| redis.username | string | optional | - | Username for logging into Redis |
| redis.database | int | optional | 0 | The database ID used, limited to Redis, for example, configured as 1, corresponds to `SELECT 1`. |
| redis.password | string | optional | - | Password for logging into Redis |
| returnResponseTemplate | string | optional | `{"id":"from-cache","choices":[%s],"model":"gpt-4o","object":"chat.completion","usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}` | Template for returning HTTP response, with %s marking the part to be replaced by cache value |
| returnStreamResponseTemplate | string | optional | `data:{"id":"from-cache","choices":[{"index":0,"delta":{"role":"assistant","content":"%s"},"finish_reason":"stop"}],"model":"gpt-4o","object":"chat.completion","usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}\n\ndata:[DONE]\n\n` | Template for returning streaming HTTP response, with %s marking the part to be replaced by cache value |
## Configuration Example
```yaml
redis:
serviceName: my-redis.dns
timeout: 2000
servicePort: 6379
database: 1
```
## Advanced Usage

View File

@@ -52,6 +52,9 @@ type ProviderConfig struct {
// @Title 缓存 Key 前缀
// @Description 缓存 Key 的前缀,默认值为 "higressAiCache:"
cacheKeyPrefix string
// @Title redis database
// @Description 指定 redis 的 database默认使用0
database int
}
func (c *ProviderConfig) GetProviderType() string {
@@ -79,6 +82,7 @@ func (c *ProviderConfig) FromJson(json gjson.Result) {
if !json.Get("password").Exists() {
c.password = ""
}
c.database = int(json.Get("database").Int())
c.timeout = uint32(json.Get("timeout").Int())
if !json.Get("timeout").Exists() {
c.timeout = 10000

View File

@@ -38,7 +38,7 @@ func (rp *redisProvider) GetProviderType() string {
}
func (rp *redisProvider) Init(username string, password string, timeout uint32) error {
return rp.client.Init(rp.config.username, rp.config.password, int64(rp.config.timeout))
return rp.client.Init(rp.config.username, rp.config.password, int64(rp.config.timeout), wrapper.WithDataBase(rp.config.database))
}
func (rp *redisProvider) Get(key string, cb wrapper.RedisResponseCallback) error {

View File

@@ -28,9 +28,9 @@ type PluginConfig struct {
embeddingProvider embedding.Provider
vectorProvider vector.Provider
embeddingProviderConfig embedding.ProviderConfig
vectorProviderConfig vector.ProviderConfig
cacheProviderConfig cache.ProviderConfig
embeddingProviderConfig *embedding.ProviderConfig
vectorProviderConfig *vector.ProviderConfig
cacheProviderConfig *cache.ProviderConfig
CacheKeyFrom string
CacheValueFrom string
@@ -47,7 +47,9 @@ type PluginConfig struct {
}
func (c *PluginConfig) FromJson(json gjson.Result, log wrapper.Log) {
c.embeddingProviderConfig = &embedding.ProviderConfig{}
c.vectorProviderConfig = &vector.ProviderConfig{}
c.cacheProviderConfig = &cache.ProviderConfig{}
c.vectorProviderConfig.FromJson(json.Get("vector"))
c.embeddingProviderConfig.FromJson(json.Get("embedding"))
c.cacheProviderConfig.FromJson(json.Get("cache"))
@@ -142,7 +144,7 @@ func (c *PluginConfig) Complete(log wrapper.Log) error {
var err error
if c.embeddingProviderConfig.GetProviderType() != "" {
log.Debugf("embedding provider is set to %s", c.embeddingProviderConfig.GetProviderType())
c.embeddingProvider, err = embedding.CreateProvider(c.embeddingProviderConfig)
c.embeddingProvider, err = embedding.CreateProvider(*c.embeddingProviderConfig)
if err != nil {
return err
}
@@ -152,7 +154,7 @@ func (c *PluginConfig) Complete(log wrapper.Log) error {
}
if c.cacheProviderConfig.GetProviderType() != "" {
log.Debugf("cache provider is set to %s", c.cacheProviderConfig.GetProviderType())
c.cacheProvider, err = cache.CreateProvider(c.cacheProviderConfig)
c.cacheProvider, err = cache.CreateProvider(*c.cacheProviderConfig)
if err != nil {
return err
}
@@ -162,7 +164,7 @@ func (c *PluginConfig) Complete(log wrapper.Log) error {
}
if c.vectorProviderConfig.GetProviderType() != "" {
log.Debugf("vector provider is set to %s", c.vectorProviderConfig.GetProviderType())
c.vectorProvider, err = vector.CreateProvider(c.vectorProviderConfig)
c.vectorProvider, err = vector.CreateProvider(*c.vectorProviderConfig)
if err != nil {
return err
}
@@ -182,7 +184,7 @@ func (c *PluginConfig) GetVectorProvider() vector.Provider {
}
func (c *PluginConfig) GetVectorProviderConfig() vector.ProviderConfig {
return c.vectorProviderConfig
return *c.vectorProviderConfig
}
func (c *PluginConfig) GetCacheProvider() cache.Provider {

View File

@@ -0,0 +1,151 @@
package embedding
import (
"encoding/json"
"errors"
"fmt"
"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
"github.com/tidwall/gjson"
"net/http"
"strconv"
)
const (
OLLAMA_DOMAIN = "localhost"
OLLAMA_PORT = 11434
OLLAMA_DEFAULT_MODEL_NAME = "llama3.2"
OLLAMA_ENDPOINT = "/api/embed"
)
type ollamaProviderInitializer struct {
}
func (c *ollamaProviderInitializer) InitConfig(json gjson.Result) {}
func (c *ollamaProviderInitializer) ValidateConfig() error {
return nil
}
type ollamaProvider struct {
config ProviderConfig
client *wrapper.ClusterClient[wrapper.FQDNCluster]
}
func (t *ollamaProviderInitializer) CreateProvider(c ProviderConfig) (Provider, error) {
if c.servicePort == 0 {
c.servicePort = OLLAMA_PORT
}
if c.serviceHost == "" {
c.serviceHost = OLLAMA_DOMAIN
}
if c.model == "" {
c.model = OLLAMA_DEFAULT_MODEL_NAME
}
return &ollamaProvider{
config: c,
client: wrapper.NewClusterClient(wrapper.FQDNCluster{
FQDN: c.serviceName,
Host: c.serviceHost,
Port: c.servicePort,
}),
}, nil
}
func (t *ollamaProvider) GetProviderType() string {
return PROVIDER_TYPE_OLLAMA
}
type ollamaResponse struct {
Model string `json:"model"`
Embeddings [][]float64 `json:"embeddings"`
TotalDuration int64 `json:"total_duration"`
LoadDuration int64 `json:"load_duration"`
PromptEvalCount int64 `json:"prompt_eval_count"`
}
type ollamaEmbeddingRequest struct {
Input string `json:"input"`
Model string `json:"model"`
}
func (t *ollamaProvider) constructParameters(text string, log wrapper.Log) (string, [][2]string, []byte, error) {
if text == "" {
err := errors.New("queryString text cannot be empty")
return "", nil, nil, err
}
data := ollamaEmbeddingRequest{
Input: text,
Model: t.config.model,
}
requestBody, err := json.Marshal(data)
if err != nil {
log.Errorf("failed to marshal request data: %v", err)
return "", nil, nil, err
}
headers := [][2]string{
{"Content-Type", "application/json"},
}
log.Debugf("constructParameters: %s", string(requestBody))
return OLLAMA_ENDPOINT, headers, requestBody, err
}
func (t *ollamaProvider) parseTextEmbedding(responseBody []byte) (*ollamaResponse, error) {
var resp ollamaResponse
if err := json.Unmarshal(responseBody, &resp); err != nil {
return nil, fmt.Errorf("failed to parse response: %w", err)
}
return &resp, nil
}
func (t *ollamaProvider) GetEmbedding(
queryString string,
ctx wrapper.HttpContext,
log wrapper.Log,
callback func(emb []float64, err error)) error {
embUrl, embHeaders, embRequestBody, err := t.constructParameters(queryString, log)
if err != nil {
log.Errorf("failed to construct parameters: %v", err)
return err
}
var resp *ollamaResponse
defer func() {
if err != nil {
callback(nil, err)
}
}()
err = t.client.Post(embUrl, embHeaders, embRequestBody,
func(statusCode int, responseHeaders http.Header, responseBody []byte) {
if statusCode != http.StatusOK {
err = errors.New("failed to get embedding due to status code: " + strconv.Itoa(statusCode))
callback(nil, err)
return
}
resp, err = t.parseTextEmbedding(responseBody)
if err != nil {
err = fmt.Errorf("failed to parse response: %v", err)
callback(nil, err)
return
}
log.Debugf("get embedding response: %d, %s", statusCode, responseBody)
if len(resp.Embeddings) == 0 {
err = errors.New("no embedding found in response")
callback(nil, err)
return
}
callback(resp.Embeddings[0], nil)
}, t.config.timeout)
return err
}

View File

@@ -12,6 +12,7 @@ const (
PROVIDER_TYPE_TEXTIN = "textin"
PROVIDER_TYPE_COHERE = "cohere"
PROVIDER_TYPE_OPENAI = "openai"
PROVIDER_TYPE_OLLAMA = "ollama"
)
type providerInitializer interface {
@@ -26,6 +27,7 @@ var (
PROVIDER_TYPE_TEXTIN: &textInProviderInitializer{},
PROVIDER_TYPE_COHERE: &cohereProviderInitializer{},
PROVIDER_TYPE_OPENAI: &openAIProviderInitializer{},
PROVIDER_TYPE_OLLAMA: &ollamaProviderInitializer{},
}
)

View File

@@ -23,7 +23,7 @@ const (
SKIP_CACHE_HEADER = "x-higress-skip-ai-cache"
ERROR_PARTIAL_MESSAGE_KEY = "errorPartialMessage"
DEFAULT_MAX_BODY_BYTES uint32 = 10 * 1024 * 1024
DEFAULT_MAX_BODY_BYTES uint32 = 100 * 1024 * 1024
)
func main() {

View File

@@ -20,17 +20,18 @@ description: AI 历史对话插件配置参考
## 配置字段
| 名称 | 数据类型 | 填写要求 | 默认值 | Description |
|-------------------|---------|----------|-----------------------|---------------------------------------------------------------------------|
| identityHeader | string | optional | "Authorization" | 身份解析对应的请求头,可用 Authorization,X-Mse-Consumer等 |
| fillHistoryCnt | integer | optional | 3 | 默认填充历史对话轮次 |
| cacheKeyPrefix | string | optional | "higress-ai-history:" | Redis缓存Key的前缀 |
| cacheTTL | integer | optional | 0 | 缓存的过期时间单位是秒默认值为0即永不过期 |
| redis.serviceName | string | required | - | redis 服务名称,带服务类型的完整 FQDN 名称,例如 my-redis.dns、redis.my-ns.svc.cluster.local |
| redis.servicePort | integer | optional | 6379 | redis 服务端口 |
| redis.timeout | integer | optional | 1000 | 请求 redis 的超时时间,单位为毫秒 |
| redis.username | string | optional | - | 登陆 redis 的用户名 |
| redis.password | string | optional | - | 登陆 redis 的密码 |
| 名称 | 数据类型 | 填写要求 | 默认值 | Description |
|-------------------|----------|----------|-----------------------|----------------------------------------------------------------------------------------------|
| identityHeader | string | optional | "Authorization" | 身份解析对应的请求头,可用 Authorization,X-Mse-Consumer等 |
| fillHistoryCnt | integer | optional | 3 | 默认填充历史对话轮次 |
| cacheKeyPrefix | string | optional | "higress-ai-history:" | Redis缓存Key的前缀 |
| cacheTTL | integer | optional | 0 | 缓存的过期时间单位是秒默认值为0即永不过期 |
| redis.serviceName | string | required | - | redis 服务名称,带服务类型的完整 FQDN 名称,例如 my-redis.dns、redis.my-ns.svc.cluster.local |
| redis.servicePort | integer | optional | 6379 | redis 服务端口 |
| redis.timeout | integer | optional | 1000 | 请求 redis 的超时时间,单位为毫秒 |
| redis.username | string | optional | - | 登陆 redis 的用户名 |
| redis.password | string | optional | - | 登陆 redis 的密码 |
| redis.database | int | optional | 0 | 使用的数据库id例如配置为1对应`SELECT 1` |
## 用法示例

View File

@@ -15,17 +15,19 @@ Plugin Execution Phase: `Default Phase`
Plugin Execution Priority: `650`
## Configuration Fields
| Name | Data Type | Required | Default Value | Description |
|-------------------|---------|----------|-----------------------|---------------------------------------------------------------------------|
| identityHeader | string | optional | "Authorization" | The request header for identity resolution, can be Authorization, X-Mse-Consumer, etc. |
| fillHistoryCnt | integer | optional | 3 | Default number of historical dialogues to be filled. |
| cacheKeyPrefix | string | optional | "higress-ai-history:" | Prefix for Redis cache key. |
| cacheTTL | integer | optional | 0 | Cache expiration time in seconds, default value is 0, meaning it never expires. |
| redis.serviceName | string | required | - | Redis service name, full FQDN name with service type, e.g., my-redis.dns, redis.my-ns.svc.cluster.local |
| redis.servicePort | integer | optional | 6379 | Redis service port. |
| redis.timeout | integer | optional | 1000 | Timeout for requests to Redis, in milliseconds. |
| redis.username | string | optional | - | Username for logging into Redis. |
| redis.password | string | optional | - | Password for logging into Redis. |
| Name | Data Type | Required | Default Value | Description |
|-------------------|-----------|----------|-----------------------|---------------------------------------------------------------------------------------------------------|
| identityHeader | string | optional | "Authorization" | The request header for identity resolution, can be Authorization, X-Mse-Consumer, etc. |
| fillHistoryCnt | integer | optional | 3 | Default number of historical dialogues to be filled. |
| cacheKeyPrefix | string | optional | "higress-ai-history:" | Prefix for Redis cache key. |
| cacheTTL | integer | optional | 0 | Cache expiration time in seconds, default value is 0, meaning it never expires. |
| redis.serviceName | string | required | - | Redis service name, full FQDN name with service type, e.g., my-redis.dns, redis.my-ns.svc.cluster.local |
| redis.servicePort | integer | optional | 6379 | Redis service port. |
| redis.timeout | integer | optional | 1000 | Timeout for requests to Redis, in milliseconds. |
| redis.username | string | optional | - | Username for logging into Redis. |
| redis.password | string | optional | - | Password for logging into Redis. |
| redis.database | int | optional | 0 | The database ID used, for example, configured as 1, corresponds to `SELECT 1`. |
## Usage Example
### Configuration Information

View File

@@ -76,6 +76,9 @@ type RedisInfo struct {
// @Title zh-CN 请求超时
// @Description zh-CN 请求 redis 的超时时间单位为毫秒。默认值是1000即1秒
Timeout int `required:"false" yaml:"timeout" json:"timeout"`
// @Title zh-CN Database
// @Description zh-CN redis database
Database int `required:"false" yaml:"database" json:"database"`
}
type KVExtractor struct {
@@ -138,6 +141,7 @@ func parseConfig(json gjson.Result, c *PluginConfig, log wrapper.Log) error {
if c.RedisInfo.Timeout == 0 {
c.RedisInfo.Timeout = 1000
}
c.RedisInfo.Database = int(json.Get("redis.database").Int())
c.QuestionFrom.RequestBody = "messages.@reverse.0.content"
c.AnswerValueFrom.ResponseBody = "choices.0.message.content"
c.AnswerStreamValueFrom.ResponseBody = "choices.0.delta.content"
@@ -159,7 +163,7 @@ func parseConfig(json gjson.Result, c *PluginConfig, log wrapper.Log) error {
FQDN: c.RedisInfo.ServiceName,
Port: int64(c.RedisInfo.ServicePort),
})
return c.redisClient.Init(c.RedisInfo.Username, c.RedisInfo.Password, int64(c.RedisInfo.Timeout))
return c.redisClient.Init(c.RedisInfo.Username, c.RedisInfo.Password, int64(c.RedisInfo.Timeout), wrapper.WithDataBase(c.RedisInfo.Database))
}
func onHttpRequestHeaders(ctx wrapper.HttpContext, config PluginConfig, log wrapper.Log) types.Action {

View File

@@ -31,18 +31,19 @@ description: AI 代理插件配置参考
`provider`的配置字段说明如下:
| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
|------------------| --------------- | -------- | ------ |-----------------------------------------------------------------------------------------------------------------------------------------------------------|
| `type` | string | 必填 | - | AI 服务提供商名称 |
| `apiTokens` | array of string | 非必填 | - | 用于在访问 AI 服务时进行认证的令牌。如果配置了多个 token插件会在请求时随机进行选择。部分服务提供商只支持配置一个 token。 |
| `timeout` | number | 非必填 | - | 访问 AI 服务的超时时间。单位为毫秒。默认值为 120000即 2 分钟 |
| `modelMapping` | map of string | 非必填 | - | AI 模型映射表,用于将请求中的模型名称映射为服务提供商支持模型名称。<br/>1. 支持前缀匹配。例如用 "gpt-3-*" 匹配所有名称以“gpt-3-”开头的模型;<br/>2. 支持使用 "*" 为键来配置通用兜底映射关系;<br/>3. 如果映射的目标名称为空字符串 "",则表示保留原模型名称。 |
| `protocol` | string | 非必填 | - | 插件对外提供的 API 接口契约。目前支持以下取值openai默认值使用 OpenAI 的接口契约、original使用目标服务提供商的原始接口契约 |
| `context` | object | 非必填 | - | 配置 AI 对话上下文信息 |
| `customSettings` | array of customSetting | 非必填 | - | 为AI请求指定覆盖或者填充参数 |
| `failover` | object | 非必填 | - | 配置 apiToken 的 failover 策略,当 apiToken 不可用时,将其移出 apiToken 列表,待健康检测通过后重新添加回 apiToken 列表 |
| `retryOnFailure` | object | 非必填 | - | 当请求失败时立即进行重试 |
| `capabilities` | map of string | 非必填 | - | 部分provider的部分ai能力原生兼容openai/v1格式不需要重写可以直接转发通过此配置项指定来开启转发, key表示的是采用的厂商协议能力values表示的真实的厂商该能力的api path, 厂商协议能力当前支持: openai/v1/chatcompletions, openai/v1/embeddings, openai/v1/imagegeneration, openai/v1/audiospeech, cohere/v1/rerank |
| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
|------------------| --------------- | -------- | ------ |-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `type` | string | 必填 | - | AI 服务提供商名称 |
| `apiTokens` | array of string | 非必填 | - | 用于在访问 AI 服务时进行认证的令牌。如果配置了多个 token插件会在请求时随机进行选择。部分服务提供商只支持配置一个 token。 |
| `timeout` | number | 非必填 | - | 访问 AI 服务的超时时间。单位为毫秒。默认值为 120000即 2 分钟 |
| `modelMapping` | map of string | 非必填 | - | AI 模型映射表,用于将请求中的模型名称映射为服务提供商支持模型名称。<br/>1. 支持前缀匹配。例如用 "gpt-3-*" 匹配所有名称以“gpt-3-”开头的模型;<br/>2. 支持使用 "*" 为键来配置通用兜底映射关系;<br/>3. 如果映射的目标名称为空字符串 "",则表示保留原模型名称。 |
| `protocol` | string | 非必填 | - | 插件对外提供的 API 接口契约。目前支持以下取值openai默认值使用 OpenAI 的接口契约、original使用目标服务提供商的原始接口契约 |
| `context` | object | 非必填 | - | 配置 AI 对话上下文信息 |
| `customSettings` | array of customSetting | 非必填 | - | 为AI请求指定覆盖或者填充参数 |
| `failover` | object | 非必填 | - | 配置 apiToken 的 failover 策略,当 apiToken 不可用时,将其移出 apiToken 列表,待健康检测通过后重新添加回 apiToken 列表 |
| `retryOnFailure` | object | 非必填 | - | 当请求失败时立即进行重试 |
| `reasoningContentMode` | string | 非必填 | - | 如何处理大模型服务返回的推理内容。目前支持以下取值passthrough正常输出推理内容、ignore不输出推理内容、concat将推理内容拼接在常规输出内容之前。默认为 passthrough。仅支持通义千问服务。 |
| `capabilities` | map of string | 非必填 | - | 部分provider的部分ai能力原生兼容openai/v1格式不需要重写可以直接转发通过此配置项指定来开启转发, key表示的是采用的厂商协议能力values表示的真实的厂商该能力的api path, 厂商协议能力当前支持: openai/v1/chatcompletions, openai/v1/embeddings, openai/v1/imagegeneration, openai/v1/audiospeech, cohere/v1/rerank |
`context`的配置字段说明如下:

View File

@@ -80,13 +80,16 @@ func (c *PluginConfig) Complete(log wrapper.Log) error {
c.activeProvider = nil
return nil
}
var err error
c.activeProvider, err = provider.CreateProvider(*c.activeProviderConfig)
if err != nil {
return err
}
providerConfig := c.GetProviderConfig()
err = providerConfig.SetApiTokensFailover(log, c.activeProvider)
return err
return providerConfig.SetApiTokensFailover(log, c.activeProvider)
}
func (c *PluginConfig) GetProvider() provider.Provider {

View File

@@ -15,12 +15,13 @@ import (
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
"github.com/tidwall/gjson"
"github.com/tidwall/sjson"
)
const (
pluginName = "ai-proxy"
defaultMaxBodyBytes uint32 = 10 * 1024 * 1024
defaultMaxBodyBytes uint32 = 100 * 1024 * 1024
)
func main() {
@@ -40,9 +41,11 @@ func parseGlobalConfig(json gjson.Result, pluginConfig *config.PluginConfig, log
pluginConfig.FromJson(json)
if err := pluginConfig.Validate(); err != nil {
log.Errorf("global rule config is invalid: %v", err)
return err
}
if err := pluginConfig.Complete(log); err != nil {
log.Errorf("failed to apply global rule config: %v", err)
return err
}
@@ -56,9 +59,11 @@ func parseOverrideRuleConfig(json gjson.Result, global config.PluginConfig, plug
pluginConfig.FromJson(json)
if err := pluginConfig.Validate(); err != nil {
log.Errorf("overriden rule config is invalid: %v", err)
return err
}
if err := pluginConfig.Complete(log); err != nil {
log.Errorf("failed to apply overriden rule config: %v", err)
return err
}
@@ -98,21 +103,23 @@ func onHttpRequestHeader(ctx wrapper.HttpContext, pluginConfig config.PluginConf
// Always remove the Accept-Encoding header to prevent the LLM from sending compressed responses,
// allowing plugins to inspect or modify the response correctly
proxywasm.RemoveHttpRequestHeader("Accept-Encoding")
_ = proxywasm.RemoveHttpRequestHeader("Accept-Encoding")
if handler, ok := activeProvider.(provider.RequestHeadersHandler); ok {
// Set the apiToken for the current request.
providerConfig.SetApiTokenInUse(ctx, log)
// Set available apiTokens of current request in the context, will be used in the retryOnFailure
providerConfig.SetAvailableApiTokens(ctx, log)
err := handler.OnRequestHeaders(ctx, apiName, log)
if err != nil {
util.ErrorHandler("ai-proxy.proc_req_headers_failed", fmt.Errorf("failed to process request headers: %v", err))
_ = util.ErrorHandler("ai-proxy.proc_req_headers_failed", fmt.Errorf("failed to process request headers: %v", err))
return types.ActionContinue
}
hasRequestBody := wrapper.HasRequestBody()
if hasRequestBody {
proxywasm.RemoveHttpRequestHeader("Content-Length")
_ = proxywasm.RemoveHttpRequestHeader("Content-Length")
ctx.SetRequestBodyBufferLimit(defaultMaxBodyBytes)
// Delay the header processing to allow changing in OnRequestBody
return types.HeaderStopIteration
@@ -136,23 +143,21 @@ func onHttpRequestBody(ctx wrapper.HttpContext, pluginConfig config.PluginConfig
if handler, ok := activeProvider.(provider.RequestBodyHandler); ok {
apiName, _ := ctx.GetContext(provider.CtxKeyApiName).(provider.ApiName)
newBody, settingErr := pluginConfig.GetProviderConfig().ReplaceByCustomSettings(body)
providerConfig := pluginConfig.GetProviderConfig()
newBody, settingErr := providerConfig.ReplaceByCustomSettings(body)
if settingErr != nil {
util.ErrorHandler(
"ai-proxy.proc_req_body_failed",
fmt.Errorf("failed to replace request body by custom settings: %v", settingErr),
)
return types.ActionContinue
log.Errorf("failed to replace request body by custom settings: %v", settingErr)
}
if providerConfig.IsOpenAIProtocol() {
newBody = normalizeOpenAiRequestBody(newBody, log)
}
log.Debugf("[onHttpRequestBody] newBody=%s", newBody)
body = newBody
action, err := handler.OnRequestBody(ctx, apiName, body, log)
if err == nil {
return action
}
util.ErrorHandler("ai-proxy.proc_req_body_failed", fmt.Errorf("failed to process request body: %v", err))
_ = util.ErrorHandler("ai-proxy.proc_req_body_failed", fmt.Errorf("failed to process request body: %v", err))
}
return types.ActionContinue
}
@@ -176,6 +181,7 @@ func onHttpResponseHeaders(ctx wrapper.HttpContext, pluginConfig config.PluginCo
providerConfig := pluginConfig.GetProviderConfig()
apiTokenInUse := providerConfig.GetApiTokenInUse(ctx)
apiTokens := providerConfig.GetAvailableApiToken(ctx)
status, err := proxywasm.GetHttpResponseHeader(":status")
if err != nil || status != "200" {
@@ -183,7 +189,7 @@ func onHttpResponseHeaders(ctx wrapper.HttpContext, pluginConfig config.PluginCo
log.Errorf("unable to load :status header from response: %v", err)
}
ctx.DontReadResponseBody()
return providerConfig.OnRequestFailed(activeProvider, ctx, apiTokenInUse, log)
return providerConfig.OnRequestFailed(activeProvider, ctx, apiTokenInUse, apiTokens, log)
}
// Reset ctxApiTokenRequestFailureCount if the request is successful,
@@ -201,7 +207,11 @@ func onHttpResponseHeaders(ctx wrapper.HttpContext, pluginConfig config.PluginCo
checkStream(ctx, log)
_, needHandleBody := activeProvider.(provider.TransformResponseBodyHandler)
_, needHandleStreamingBody := activeProvider.(provider.StreamingResponseBodyHandler)
var needHandleStreamingBody bool
_, needHandleStreamingBody = activeProvider.(provider.StreamingResponseBodyHandler)
if !needHandleStreamingBody {
_, needHandleStreamingBody = activeProvider.(provider.StreamingEventHandler)
}
if !needHandleBody && !needHandleStreamingBody {
ctx.DontReadResponseBody()
} else if !needHandleStreamingBody {
@@ -220,7 +230,7 @@ func onStreamingResponseBody(ctx wrapper.HttpContext, pluginConfig config.Plugin
}
log.Debugf("[onStreamingResponseBody] provider=%s", activeProvider.GetProviderType())
log.Debugf("isLastChunk=%v chunk: %s", isLastChunk, string(chunk))
log.Debugf("[onStreamingResponseBody] isLastChunk=%v chunk: %s", isLastChunk, string(chunk))
if handler, ok := activeProvider.(provider.StreamingResponseBodyHandler); ok {
apiName, _ := ctx.GetContext(provider.CtxKeyApiName).(provider.ApiName)
@@ -230,6 +240,38 @@ func onStreamingResponseBody(ctx wrapper.HttpContext, pluginConfig config.Plugin
}
return chunk
}
if handler, ok := activeProvider.(provider.StreamingEventHandler); ok {
apiName, _ := ctx.GetContext(provider.CtxKeyApiName).(provider.ApiName)
events := provider.ExtractStreamingEvents(ctx, chunk, log)
log.Debugf("[onStreamingResponseBody] %d events received", len(events))
if len(events) == 0 {
// No events are extracted, return the original chunk
return chunk
}
var responseBuilder strings.Builder
for _, event := range events {
log.Debugf("processing event: %v", event)
if event.IsEndData() {
responseBuilder.WriteString(event.ToHttpString())
continue
}
outputEvents, err := handler.OnStreamingEvent(ctx, apiName, event, log)
if err != nil {
log.Errorf("[onStreamingResponseBody] failed to process streaming event: %v\n%s", err, chunk)
return chunk
}
if outputEvents == nil || len(outputEvents) == 0 {
responseBuilder.WriteString(event.ToHttpString())
} else {
for _, outputEvent := range outputEvents {
responseBuilder.WriteString(outputEvent.ToHttpString())
}
}
}
return []byte(responseBuilder.String())
}
return chunk
}
@@ -247,16 +289,28 @@ func onHttpResponseBody(ctx wrapper.HttpContext, pluginConfig config.PluginConfi
apiName, _ := ctx.GetContext(provider.CtxKeyApiName).(provider.ApiName)
body, err := handler.TransformResponseBody(ctx, apiName, body, log)
if err != nil {
util.ErrorHandler("ai-proxy.proc_resp_body_failed", fmt.Errorf("failed to process response body: %v", err))
_ = util.ErrorHandler("ai-proxy.proc_resp_body_failed", fmt.Errorf("failed to process response body: %v", err))
return types.ActionContinue
}
if err = provider.ReplaceResponseBody(body, log); err != nil {
util.ErrorHandler("ai-proxy.replace_resp_body_failed", fmt.Errorf("failed to replace response body: %v", err))
_ = util.ErrorHandler("ai-proxy.replace_resp_body_failed", fmt.Errorf("failed to replace response body: %v", err))
}
}
return types.ActionContinue
}
func normalizeOpenAiRequestBody(body []byte, log wrapper.Log) []byte {
var err error
// Default setting include_usage.
if gjson.GetBytes(body, "stream").Bool() {
body, err = sjson.SetBytes(body, "stream_options.include_usage", true)
if err != nil {
log.Errorf("set include_usage failed, err:%s", err)
}
}
return body
}
func checkStream(ctx wrapper.HttpContext, log wrapper.Log) {
contentType, err := proxywasm.GetHttpResponseHeader("Content-Type")
if err != nil || !strings.HasPrefix(contentType, "text/event-stream") {

View File

@@ -32,6 +32,8 @@ type failover struct {
healthCheckModel string `required:"false" yaml:"healthCheckModel" json:"healthCheckModel"`
// @Title zh-CN 本次请求使用的 apiToken
ctxApiTokenInUse string
// @Title zh-CN 记录本次请求时所有可用的 apiToken
ctxAvailableApiTokensInRequest string
// @Title zh-CN 记录 apiToken 请求失败的次数key 为 apiTokenvalue 为失败次数
ctxApiTokenRequestFailureCount string
// @Title zh-CN 记录 apiToken 健康检测成功的次数key 为 apiTokenvalue 为成功次数
@@ -527,6 +529,22 @@ func (c *ProviderConfig) GetGlobalRandomToken(log wrapper.Log) string {
}
}
func (c *ProviderConfig) GetAvailableApiToken(ctx wrapper.HttpContext) []string {
apiTokens, _ := ctx.GetContext(c.failover.ctxAvailableApiTokensInRequest).([]string)
return apiTokens
}
// SetAvailableApiTokens set available apiTokens of current request in the context, will be used in the retryOnFailure
func (c *ProviderConfig) SetAvailableApiTokens(ctx wrapper.HttpContext, log wrapper.Log) {
var apiTokens []string
if c.isFailoverEnabled() {
apiTokens, _, _ = getApiTokens(c.failover.ctxApiTokens)
} else {
apiTokens = c.apiTokens
}
ctx.SetContext(c.failover.ctxAvailableApiTokensInRequest, apiTokens)
}
func (c *ProviderConfig) isFailoverEnabled() bool {
return c.failover.enabled
}
@@ -539,12 +557,12 @@ func (c *ProviderConfig) resetSharedData() {
_ = proxywasm.SetSharedData(c.failover.ctxApiTokenRequestFailureCount, nil, 0)
}
func (c *ProviderConfig) OnRequestFailed(activeProvider Provider, ctx wrapper.HttpContext, apiTokenInUse string, log wrapper.Log) types.Action {
func (c *ProviderConfig) OnRequestFailed(activeProvider Provider, ctx wrapper.HttpContext, apiTokenInUse string, apiTokens []string, log wrapper.Log) types.Action {
if c.isFailoverEnabled() {
c.handleUnavailableApiToken(ctx, apiTokenInUse, log)
}
if c.isRetryOnFailureEnabled() && ctx.GetContext(ctxKeyIsStreaming) != nil && !ctx.GetContext(ctxKeyIsStreaming).(bool) {
c.retryFailedRequest(activeProvider, ctx, log)
c.retryFailedRequest(activeProvider, ctx, apiTokenInUse, apiTokens, log)
return types.HeaderStopAllIterationAndWatermark
}
return types.ActionContinue

View File

@@ -1,6 +1,9 @@
package provider
import "strings"
import (
"fmt"
"strings"
)
const (
streamEventIdItemKey = "id:"
@@ -110,9 +113,16 @@ type chatCompletionChoice struct {
}
type usage struct {
PromptTokens int `json:"prompt_tokens,omitempty"`
CompletionTokens int `json:"completion_tokens,omitempty"`
TotalTokens int `json:"total_tokens,omitempty"`
PromptTokens int `json:"prompt_tokens,omitempty"`
CompletionTokens int `json:"completion_tokens,omitempty"`
TotalTokens int `json:"total_tokens,omitempty"`
CompletionTokensDetails *completionTokensDetails `json:"completion_tokens_details,omitempty"`
}
type completionTokensDetails struct {
ReasoningTokens int `json:"reasoning_tokens,omitempty"`
AcceptedPredictionTokens int `json:"accepted_prediction_tokens,omitempty"`
RejectedPredictionTokens int `json:"rejected_prediction_tokens,omitempty"`
}
type chatMessage struct {
@@ -126,6 +136,24 @@ type chatMessage struct {
Refusal string `json:"refusal,omitempty"`
}
func (m *chatMessage) handleReasoningContent(reasoningContentMode string) {
if m.ReasoningContent == "" {
return
}
switch reasoningContentMode {
case reasoningBehaviorIgnore:
m.ReasoningContent = ""
break
case reasoningBehaviorConcat:
m.Content = fmt.Sprintf("%v\n%v", m.ReasoningContent, m.Content)
m.ReasoningContent = ""
break
case reasoningBehaviorPassThrough:
default:
break
}
}
type messageContent struct {
Type string `json:"type,omitempty"`
Text string `json:"text"`
@@ -138,6 +166,9 @@ type imageUrl struct {
}
func (m *chatMessage) IsEmpty() bool {
if m.ReasoningContent != "" {
return false
}
if m.IsStringContent() && m.Content != "" {
return false
}
@@ -247,14 +278,18 @@ func (m *functionCall) IsEmpty() bool {
return m.Name == "" && m.Arguments == ""
}
type streamEvent struct {
type StreamEvent struct {
Id string `json:"id"`
Event string `json:"event"`
Data string `json:"data"`
HttpStatus string `json:"http_status"`
}
func (e *streamEvent) setValue(key, value string) {
func (e *StreamEvent) IsEndData() bool {
return e.Data == streamEndDataValue
}
func (e *StreamEvent) SetValue(key, value string) {
switch key {
case streamEventIdItemKey:
e.Id = value
@@ -269,6 +304,10 @@ func (e *streamEvent) setValue(key, value string) {
}
}
func (e *StreamEvent) ToHttpString() string {
return fmt.Sprintf("%s %s\n\n", streamDataItemKey, e.Data)
}
// https://platform.openai.com/docs/guides/images
type imageGenerationRequest struct {
Model string `json:"model"`

View File

@@ -102,12 +102,12 @@ func (m *moonshotProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiNam
}()
if err != nil {
log.Errorf("failed to load context file: %v", err)
util.ErrorHandler("ai-proxy.moonshot.load_ctx_failed", fmt.Errorf("failed to load context file: %v", err))
_ = util.ErrorHandler("ai-proxy.moonshot.load_ctx_failed", fmt.Errorf("failed to load context file: %v", err))
return
}
err = m.performChatCompletion(ctx, content, request, log)
if err != nil {
util.ErrorHandler("ai-proxy.moonshot.insert_ctx_failed", fmt.Errorf("failed to perform chat completion: %v", err))
_ = util.ErrorHandler("ai-proxy.moonshot.insert_ctx_failed", fmt.Errorf("failed to perform chat completion: %v", err))
}
}, log)
if err == nil {
@@ -161,79 +161,9 @@ func (m *moonshotProvider) sendRequest(method, path, body, apiKey string, callba
}
}
func (m *moonshotProvider) OnStreamingResponseBody(ctx wrapper.HttpContext, name ApiName, chunk []byte, isLastChunk bool, log wrapper.Log) ([]byte, error) {
func (m *moonshotProvider) OnStreamingEvent(ctx wrapper.HttpContext, name ApiName, event StreamEvent, log wrapper.Log) ([]StreamEvent, error) {
if name != ApiNameChatCompletion {
return chunk, nil
}
receivedBody := chunk
if bufferedStreamingBody, has := ctx.GetContext(ctxKeyStreamingBody).([]byte); has {
receivedBody = append(bufferedStreamingBody, chunk...)
}
eventStartIndex, lineStartIndex, valueStartIndex := -1, -1, -1
defer func() {
if eventStartIndex >= 0 && eventStartIndex < len(receivedBody) {
// Just in case the received chunk is not a complete event.
ctx.SetContext(ctxKeyStreamingBody, receivedBody[eventStartIndex:])
} else {
ctx.SetContext(ctxKeyStreamingBody, nil)
}
}()
var responseBuilder strings.Builder
currentKey := ""
currentEvent := &streamEvent{}
i, length := 0, len(receivedBody)
for i = 0; i < length; i++ {
ch := receivedBody[i]
if ch != '\n' {
if lineStartIndex == -1 {
if eventStartIndex == -1 {
eventStartIndex = i
}
lineStartIndex = i
valueStartIndex = -1
}
if valueStartIndex == -1 {
if ch == ':' {
valueStartIndex = i + 1
currentKey = string(receivedBody[lineStartIndex:valueStartIndex])
}
} else if valueStartIndex == i && ch == ' ' {
// Skip leading spaces in data.
valueStartIndex = i + 1
}
continue
}
if lineStartIndex != -1 {
value := string(receivedBody[valueStartIndex:i])
currentEvent.setValue(currentKey, value)
} else {
// Extra new line. The current event is complete.
log.Debugf("processing event: %v", currentEvent)
m.convertStreamEvent(&responseBuilder, currentEvent, log)
// Reset event parsing state.
eventStartIndex = -1
currentEvent = &streamEvent{}
}
// Reset line parsing state.
lineStartIndex = -1
valueStartIndex = -1
currentKey = ""
}
modifiedResponseChunk := responseBuilder.String()
log.Debugf("=== modified response chunk: %s", modifiedResponseChunk)
return []byte(modifiedResponseChunk), nil
}
func (m *moonshotProvider) convertStreamEvent(responseBuilder *strings.Builder, event *streamEvent, log wrapper.Log) error {
if event.Data == streamEndDataValue {
m.appendStreamEvent(responseBuilder, event)
return nil
return nil, nil
}
if gjson.Get(event.Data, "choices.0.usage").Exists() {
@@ -241,20 +171,19 @@ func (m *moonshotProvider) convertStreamEvent(responseBuilder *strings.Builder,
newData, err := sjson.Delete(event.Data, "choices.0.usage")
if err != nil {
log.Errorf("convert usage event error: %v", err)
return err
return nil, err
}
newData, err = sjson.SetRaw(newData, "usage", usageStr)
if err != nil {
log.Errorf("convert usage event error: %v", err)
return err
return nil, err
}
event.Data = newData
}
m.appendStreamEvent(responseBuilder, event)
return nil
return []StreamEvent{event}, nil
}
func (m *moonshotProvider) appendStreamEvent(responseBuilder *strings.Builder, event *streamEvent) {
func (m *moonshotProvider) appendStreamEvent(responseBuilder *strings.Builder, event *StreamEvent) {
responseBuilder.WriteString(streamDataItemKey)
responseBuilder.WriteString(event.Data)
responseBuilder.WriteString("\n\n")

View File

@@ -2,7 +2,6 @@ package provider
import (
"encoding/json"
"fmt"
"net/http"
"path"
"strings"
@@ -58,10 +57,10 @@ func (m *openaiProviderInitializer) CreateProvider(config ProviderConfig) (Provi
}
customUrl := strings.TrimPrefix(strings.TrimPrefix(config.openaiCustomUrl, "http://"), "https://")
pairs := strings.SplitN(customUrl, "/", 2)
if len(pairs) != 2 {
return nil, fmt.Errorf("invalid openaiCustomUrl:%s", config.openaiCustomUrl)
customPath := "/"
if len(pairs) == 2 {
customPath += pairs[1]
}
customPath := "/" + pairs[1]
isDirectCustomPath := isDirectPath(customPath)
capabilities := m.DefaultCapabilities()
if !isDirectCustomPath {
@@ -128,21 +127,14 @@ func (m *openaiProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName,
}
func (m *openaiProvider) TransformRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) ([]byte, error) {
request := &chatCompletionRequest{}
if err := decodeChatCompletionRequest(body, request); err != nil {
return nil, err
}
if m.config.responseJsonSchema != nil {
request := &chatCompletionRequest{}
if err := decodeChatCompletionRequest(body, request); err != nil {
return nil, err
}
log.Debugf("[ai-proxy] set response format to %s", m.config.responseJsonSchema)
request.ResponseFormat = m.config.responseJsonSchema
body, _ = json.Marshal(request)
}
if request.Stream {
// For stream requests, we need to include usage in the response.
if request.StreamOptions == nil {
request.StreamOptions = &streamOptions{IncludeUsage: true}
} else if !request.StreamOptions.IncludeUsage {
request.StreamOptions.IncludeUsage = true
}
}
return json.Marshal(request)
return m.config.defaultTransformRequestBody(ctx, apiName, body, log)
}

View File

@@ -85,6 +85,10 @@ const (
objectChatCompletion = "chat.completion"
objectChatCompletionChunk = "chat.completion.chunk"
reasoningBehaviorPassThrough = "passthrough"
reasoningBehaviorIgnore = "ignore"
reasoningBehaviorConcat = "concat"
wildcard = "*"
defaultTimeout = 2 * 60 * 1000 // ms
@@ -145,6 +149,10 @@ type StreamingResponseBodyHandler interface {
OnStreamingResponseBody(ctx wrapper.HttpContext, name ApiName, chunk []byte, isLastChunk bool, log wrapper.Log) ([]byte, error)
}
type StreamingEventHandler interface {
OnStreamingEvent(ctx wrapper.HttpContext, name ApiName, event StreamEvent, log wrapper.Log) ([]StreamEvent, error)
}
type ApiNameHandler interface {
GetApiName(path string) ApiName
}
@@ -190,6 +198,9 @@ type ProviderConfig struct {
// @Title zh-CN 失败请求重试
// @Description zh-CN 对失败的请求立即进行重试
retryOnFailure *retryOnFailure `required:"false" yaml:"retryOnFailure" json:"retryOnFailure"`
// @Title zh-CN 推理内容处理方式
// @Description zh-CN 如何处理大模型服务返回的推理内容。目前支持以下取值passthrough正常输出推理内容、ignore不输出推理内容、concat将推理内容拼接在常规输出内容之前。默认为 normal。仅支持通义千问服务。
reasoningContentMode string `required:"false" yaml:"reasoningContentMode" json:"reasoningContentMode"`
// @Title zh-CN 基于OpenAI协议的自定义后端URL
// @Description zh-CN 仅适用于支持 openai 协议的服务。
openaiCustomUrl string `required:"false" yaml:"openaiCustomUrl" json:"openaiCustomUrl"`
@@ -281,6 +292,10 @@ func (c *ProviderConfig) GetProtocol() string {
return c.protocol
}
func (c *ProviderConfig) IsOpenAIProtocol() bool {
return c.protocol == protocolOpenAI
}
func (c *ProviderConfig) FromJson(json gjson.Result) {
c.id = json.Get("id").String()
c.typ = json.Get("type").String()
@@ -359,6 +374,20 @@ func (c *ProviderConfig) FromJson(json gjson.Result) {
}
}
c.reasoningContentMode = json.Get("reasoningContentMode").String()
if c.reasoningContentMode == "" {
c.reasoningContentMode = reasoningBehaviorPassThrough
} else {
c.reasoningContentMode = strings.ToLower(c.reasoningContentMode)
switch c.reasoningContentMode {
case reasoningBehaviorPassThrough, reasoningBehaviorIgnore, reasoningBehaviorConcat:
break
default:
c.reasoningContentMode = reasoningBehaviorPassThrough
break
}
}
failoverJson := json.Get("failover")
c.failover = &failover{
enabled: false,
@@ -554,6 +583,81 @@ func doGetMappedModel(model string, modelMapping map[string]string, log wrapper.
return ""
}
func ExtractStreamingEvents(ctx wrapper.HttpContext, chunk []byte, log wrapper.Log) []StreamEvent {
body := chunk
if bufferedStreamingBody, has := ctx.GetContext(ctxKeyStreamingBody).([]byte); has {
body = append(bufferedStreamingBody, chunk...)
}
eventStartIndex, lineStartIndex, valueStartIndex := -1, -1, -1
defer func() {
if eventStartIndex >= 0 && eventStartIndex < len(body) {
// Just in case the received chunk is not a complete event.
ctx.SetContext(ctxKeyStreamingBody, body[eventStartIndex:])
} else {
ctx.SetContext(ctxKeyStreamingBody, nil)
}
}()
// Sample Qwen event response:
//
// event:result
// :HTTP_STATUS/200
// data:{"output":{"choices":[{"message":{"content":"你好!","role":"assistant"},"finish_reason":"null"}]},"usage":{"total_tokens":116,"input_tokens":114,"output_tokens":2},"request_id":"71689cfc-1f42-9949-86e8-9563b7f832b1"}
//
// event:error
// :HTTP_STATUS/400
// data:{"code":"InvalidParameter","message":"Preprocessor error","request_id":"0cbe6006-faec-9854-bf8b-c906d75c3bd8"}
//
var events []StreamEvent
currentKey := ""
currentEvent := &StreamEvent{}
i, length := 0, len(body)
for i = 0; i < length; i++ {
ch := body[i]
if ch != '\n' {
if lineStartIndex == -1 {
if eventStartIndex == -1 {
eventStartIndex = i
}
lineStartIndex = i
valueStartIndex = -1
}
if valueStartIndex == -1 {
if ch == ':' {
valueStartIndex = i + 1
currentKey = string(body[lineStartIndex:valueStartIndex])
}
} else if valueStartIndex == i && ch == ' ' {
// Skip leading spaces in data.
valueStartIndex = i + 1
}
continue
}
if lineStartIndex != -1 {
value := string(body[valueStartIndex:i])
currentEvent.SetValue(currentKey, value)
} else {
// Extra new line. The current event is complete.
events = append(events, *currentEvent)
// Reset event parsing state.
eventStartIndex = -1
currentEvent = &StreamEvent{}
}
// Reset line parsing state.
lineStartIndex = -1
valueStartIndex = -1
currentKey = ""
}
return events
}
func (c *ProviderConfig) isSupportedAPI(apiName ApiName) bool {
_, exist := c.capabilities[string(apiName)]
return exist

View File

@@ -188,89 +188,32 @@ func (m *qwenProvider) onEmbeddingsRequestBody(ctx wrapper.HttpContext, body []b
return json.Marshal(qwenRequest)
}
func (m *qwenProvider) OnStreamingResponseBody(ctx wrapper.HttpContext, name ApiName, chunk []byte, isLastChunk bool, log wrapper.Log) ([]byte, error) {
func (m *qwenProvider) OnStreamingEvent(ctx wrapper.HttpContext, name ApiName, event StreamEvent, log wrapper.Log) ([]StreamEvent, error) {
if m.config.qwenEnableCompatible || name != ApiNameChatCompletion {
return chunk, nil
}
receivedBody := chunk
if bufferedStreamingBody, has := ctx.GetContext(ctxKeyStreamingBody).([]byte); has {
receivedBody = append(bufferedStreamingBody, chunk...)
return nil, nil
}
incrementalStreaming := ctx.GetBoolContext(ctxKeyIncrementalStreaming, false)
eventStartIndex, lineStartIndex, valueStartIndex := -1, -1, -1
defer func() {
if eventStartIndex >= 0 && eventStartIndex < len(receivedBody) {
// Just in case the received chunk is not a complete event.
ctx.SetContext(ctxKeyStreamingBody, receivedBody[eventStartIndex:])
} else {
ctx.SetContext(ctxKeyStreamingBody, nil)
}
}()
// Sample Qwen event response:
//
// event:result
// :HTTP_STATUS/200
// data:{"output":{"choices":[{"message":{"content":"你好!","role":"assistant"},"finish_reason":"null"}]},"usage":{"total_tokens":116,"input_tokens":114,"output_tokens":2},"request_id":"71689cfc-1f42-9949-86e8-9563b7f832b1"}
//
// event:error
// :HTTP_STATUS/400
// data:{"code":"InvalidParameter","message":"Preprocessor error","request_id":"0cbe6006-faec-9854-bf8b-c906d75c3bd8"}
//
var responseBuilder strings.Builder
currentKey := ""
currentEvent := &streamEvent{}
i, length := 0, len(receivedBody)
for i = 0; i < length; i++ {
ch := receivedBody[i]
if ch != '\n' {
if lineStartIndex == -1 {
if eventStartIndex == -1 {
eventStartIndex = i
}
lineStartIndex = i
valueStartIndex = -1
}
if valueStartIndex == -1 {
if ch == ':' {
valueStartIndex = i + 1
currentKey = string(receivedBody[lineStartIndex:valueStartIndex])
}
} else if valueStartIndex == i && ch == ' ' {
// Skip leading spaces in data.
valueStartIndex = i + 1
}
continue
}
if lineStartIndex != -1 {
value := string(receivedBody[valueStartIndex:i])
currentEvent.setValue(currentKey, value)
} else {
// Extra new line. The current event is complete.
log.Debugf("processing event: %v", currentEvent)
if err := m.convertStreamEvent(ctx, &responseBuilder, currentEvent, incrementalStreaming, log); err != nil {
return nil, err
}
// Reset event parsing state.
eventStartIndex = -1
currentEvent = &streamEvent{}
}
// Reset line parsing state.
lineStartIndex = -1
valueStartIndex = -1
currentKey = ""
qwenResponse := &qwenTextGenResponse{}
if err := json.Unmarshal([]byte(event.Data), qwenResponse); err != nil {
log.Errorf("unable to unmarshal Qwen response: %v", err)
return nil, fmt.Errorf("unable to unmarshal Qwen response: %v", err)
}
modifiedResponseChunk := responseBuilder.String()
log.Debugf("=== modified response chunk: %s", modifiedResponseChunk)
return []byte(modifiedResponseChunk), nil
var outputEvents []StreamEvent
responses := m.buildChatCompletionStreamingResponse(ctx, qwenResponse, incrementalStreaming, log)
for _, response := range responses {
responseBody, err := json.Marshal(response)
if err != nil {
log.Errorf("unable to marshal response: %v", err)
return nil, fmt.Errorf("unable to marshal response: %v", err)
}
modifiedEvent := event
modifiedEvent.Data = string(responseBody)
outputEvents = append(outputEvents, modifiedEvent)
}
return outputEvents, nil
}
func (m *qwenProvider) TransformResponseBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) ([]byte, error) {
@@ -357,7 +300,7 @@ func (m *qwenProvider) buildQwenTextGenerationRequest(ctx wrapper.HttpContext, o
func (m *qwenProvider) buildChatCompletionResponse(ctx wrapper.HttpContext, qwenResponse *qwenTextGenResponse) *chatCompletionResponse {
choices := make([]chatCompletionChoice, 0, len(qwenResponse.Output.Choices))
for _, qwenChoice := range qwenResponse.Output.Choices {
message := qwenMessageToChatMessage(qwenChoice.Message)
message := qwenMessageToChatMessage(qwenChoice.Message, m.config.reasoningContentMode)
choices = append(choices, chatCompletionChoice{
Message: &message,
FinishReason: qwenChoice.FinishReason,
@@ -395,7 +338,8 @@ func (m *qwenProvider) buildChatCompletionStreamingResponse(ctx wrapper.HttpCont
finished := qwenChoice.FinishReason != "" && qwenChoice.FinishReason != "null"
message := qwenChoice.Message
deltaContentMessage := &chatMessage{Role: message.Role, Content: message.Content}
deltaContentMessage := &chatMessage{Role: message.Role, Content: message.Content, ReasoningContent: message.ReasoningContent}
deltaContentMessage.handleReasoningContent(m.config.reasoningContentMode)
deltaToolCallsMessage := &chatMessage{Role: message.Role, ToolCalls: append([]toolCall{}, message.ToolCalls...)}
if !incrementalStreaming {
for _, tc := range message.ToolCalls {
@@ -430,6 +374,11 @@ func (m *qwenProvider) buildChatCompletionStreamingResponse(ctx wrapper.HttpCont
}
}
}
if message.ReasoningContent == "" {
message.ReasoningContent = pushedMessage.ReasoningContent
} else {
deltaContentMessage.ReasoningContent = util.StripPrefix(deltaContentMessage.ReasoningContent, pushedMessage.ReasoningContent)
}
if len(deltaToolCallsMessage.ToolCalls) > 0 && pushedMessage.ToolCalls != nil {
for i, tc := range deltaToolCallsMessage.ToolCalls {
if i >= len(pushedMessage.ToolCalls) {
@@ -475,39 +424,6 @@ func (m *qwenProvider) buildChatCompletionStreamingResponse(ctx wrapper.HttpCont
return responses
}
func (m *qwenProvider) convertStreamEvent(ctx wrapper.HttpContext, responseBuilder *strings.Builder, event *streamEvent, incrementalStreaming bool, log wrapper.Log) error {
if event.Data == streamEndDataValue {
m.appendStreamEvent(responseBuilder, event)
return nil
}
if event.Event != eventResult || event.HttpStatus != httpStatus200 {
// Something goes wrong. Just pass through the event.
m.appendStreamEvent(responseBuilder, event)
return nil
}
qwenResponse := &qwenTextGenResponse{}
if err := json.Unmarshal([]byte(event.Data), qwenResponse); err != nil {
log.Errorf("unable to unmarshal Qwen response: %v", err)
return fmt.Errorf("unable to unmarshal Qwen response: %v", err)
}
responses := m.buildChatCompletionStreamingResponse(ctx, qwenResponse, incrementalStreaming, log)
for _, response := range responses {
responseBody, err := json.Marshal(response)
if err != nil {
log.Errorf("unable to marshal response: %v", err)
return fmt.Errorf("unable to marshal response: %v", err)
}
modifiedEvent := &*event
modifiedEvent.Data = string(responseBody)
m.appendStreamEvent(responseBuilder, modifiedEvent)
}
return nil
}
func (m *qwenProvider) insertHttpContextMessage(body []byte, content string, onlyOneSystemBeforeFile bool) ([]byte, error) {
request := &qwenTextGenRequest{}
if err := json.Unmarshal(body, request); err != nil {
@@ -552,7 +468,7 @@ func (m *qwenProvider) insertHttpContextMessage(body []byte, content string, onl
return json.Marshal(request)
}
func (m *qwenProvider) appendStreamEvent(responseBuilder *strings.Builder, event *streamEvent) {
func (m *qwenProvider) appendStreamEvent(responseBuilder *strings.Builder, event *StreamEvent) {
responseBuilder.WriteString(streamDataItemKey)
responseBuilder.WriteString(event.Data)
responseBuilder.WriteString("\n\n")
@@ -690,13 +606,16 @@ type qwenTextEmbeddings struct {
Embedding []float64 `json:"embedding"`
}
func qwenMessageToChatMessage(qwenMessage qwenMessage) chatMessage {
return chatMessage{
Name: qwenMessage.Name,
Role: qwenMessage.Role,
Content: qwenMessage.Content,
ToolCalls: qwenMessage.ToolCalls,
func qwenMessageToChatMessage(qwenMessage qwenMessage, reasoningContentMode string) chatMessage {
msg := chatMessage{
Name: qwenMessage.Name,
Role: qwenMessage.Role,
Content: qwenMessage.Content,
ReasoningContent: qwenMessage.ReasoningContent,
ToolCalls: qwenMessage.ToolCalls,
}
msg.handleReasoningContent(reasoningContentMode)
return msg
}
func (m *qwenMessage) IsStringContent() bool {

View File

@@ -1,11 +1,13 @@
package provider
import (
"math/rand"
"net/http"
"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy/util"
"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
"github.com/tidwall/gjson"
"net/http"
)
const (
@@ -38,12 +40,12 @@ func (c *ProviderConfig) isRetryOnFailureEnabled() bool {
return c.retryOnFailure.enabled
}
func (c *ProviderConfig) retryFailedRequest(activeProvider Provider, ctx wrapper.HttpContext, log wrapper.Log) {
func (c *ProviderConfig) retryFailedRequest(activeProvider Provider, ctx wrapper.HttpContext, apiTokenInUse string, apiTokens []string, log wrapper.Log) {
log.Debugf("Retry failed request: provider=%s", activeProvider.GetProviderType())
retryClient := createRetryClient(ctx)
apiName, _ := ctx.GetContext(CtxKeyApiName).(ApiName)
ctx.SetContext(ctxRetryCount, 1)
c.sendRetryRequest(ctx, apiName, activeProvider, retryClient, log)
c.sendRetryRequest(ctx, apiName, activeProvider, retryClient, apiTokenInUse, apiTokens, log)
}
func (c *ProviderConfig) transformResponseHeadersAndBody(ctx wrapper.HttpContext, activeProvider Provider, apiName ApiName, headers http.Header, body []byte, log wrapper.Log) ([][2]string, []byte) {
@@ -67,7 +69,8 @@ func (c *ProviderConfig) transformResponseHeadersAndBody(ctx wrapper.HttpContext
func (c *ProviderConfig) retryCall(
ctx wrapper.HttpContext, log wrapper.Log, activeProvider Provider,
apiName ApiName, statusCode int, responseHeaders http.Header, responseBody []byte,
retryClient *wrapper.ClusterClient[wrapper.RouteCluster]) {
retryClient *wrapper.ClusterClient[wrapper.RouteCluster],
apiTokenInUse string, apiTokens []string) {
retryCount := ctx.GetContext(ctxRetryCount).(int)
log.Debugf("Sent retry request: %d/%d", retryCount, c.retryOnFailure.maxRetries)
@@ -76,6 +79,7 @@ func (c *ProviderConfig) retryCall(
log.Debugf("Retry request succeeded")
headers, body := c.transformResponseHeadersAndBody(ctx, activeProvider, apiName, responseHeaders, responseBody, log)
proxywasm.SendHttpResponse(200, headers, body, -1)
return
} else {
log.Debugf("The retry request still failed, status: %d, responseHeaders: %v, responseBody: %s", statusCode, responseHeaders, string(responseBody))
}
@@ -83,26 +87,41 @@ func (c *ProviderConfig) retryCall(
retryCount++
if retryCount <= int(c.retryOnFailure.maxRetries) {
ctx.SetContext(ctxRetryCount, retryCount)
c.sendRetryRequest(ctx, apiName, activeProvider, retryClient, log)
c.sendRetryRequest(ctx, apiName, activeProvider, retryClient, apiTokenInUse, apiTokens, log)
} else {
log.Debugf("Reached the maximum retry count: %d", c.retryOnFailure.maxRetries)
proxywasm.ResumeHttpResponse()
return
}
}
func (c *ProviderConfig) sendRetryRequest(
ctx wrapper.HttpContext, apiName ApiName, activeProvider Provider,
retryClient *wrapper.ClusterClient[wrapper.RouteCluster], log wrapper.Log) {
retryClient *wrapper.ClusterClient[wrapper.RouteCluster],
apiTokenInUse string, apiTokens []string, log wrapper.Log) {
// Remove last failed token from retry apiTokens list
apiTokens = removeApiTokenFromRetryList(apiTokens, apiTokenInUse, log)
if len(apiTokens) == 0 {
log.Debugf("No more apiTokens to retry")
proxywasm.ResumeHttpResponse()
return
}
// Set apiTokenInUse for the retry request
apiTokenInUse = GetRandomToken(apiTokens)
log.Debugf("Retry request with apiToken: %s", apiTokenInUse)
ctx.SetContext(c.failover.ctxApiTokenInUse, apiTokenInUse)
requestHeaders, requestBody := c.getRetryRequestHeadersAndBody(ctx, activeProvider, apiName, log)
path := getRetryPath(ctx)
err := retryClient.Post(path, util.HeaderToSlice(requestHeaders), requestBody, func(statusCode int, responseHeaders http.Header, responseBody []byte) {
c.retryCall(ctx, log, activeProvider, apiName, statusCode, responseHeaders, responseBody, retryClient)
c.retryCall(ctx, log, activeProvider, apiName, statusCode, responseHeaders, responseBody, retryClient, apiTokenInUse, apiTokens)
}, uint32(c.retryOnFailure.retryTimeout))
if err != nil {
log.Errorf("Failed to send retry request: %v", err)
proxywasm.ResumeHttpResponse()
return
}
}
@@ -126,9 +145,7 @@ func getRetryPath(ctx wrapper.HttpContext) string {
}
func (c *ProviderConfig) getRetryRequestHeadersAndBody(ctx wrapper.HttpContext, activeProvider Provider, apiName ApiName, log wrapper.Log) (http.Header, []byte) {
// The retry request may be sent with different apiToken, so the header needs to be regenerated
c.SetApiTokenInUse(ctx, log)
// The retry request is sent with different apiToken, so the header needs to be regenerated
requestHeaders := http.Header{
"Content-Type": []string{"application/json"},
}
@@ -139,3 +156,27 @@ func (c *ProviderConfig) getRetryRequestHeadersAndBody(ctx wrapper.HttpContext,
return requestHeaders, requestBody
}
func removeApiTokenFromRetryList(apiTokens []string, removedApiToken string, log wrapper.Log) []string {
var availableApiTokens []string
for _, s := range apiTokens {
if s != removedApiToken {
availableApiTokens = append(availableApiTokens, s)
}
}
log.Debugf("Remove apiToken %s from retry apiTokens list", removedApiToken)
log.Debugf("Available retry apiTokens: %v", availableApiTokens)
return availableApiTokens
}
func GetRandomToken(apiTokens []string) string {
count := len(apiTokens)
switch count {
case 0:
return ""
case 1:
return apiTokens[0]
default:
return apiTokens[rand.Intn(count)]
}
}

View File

@@ -26,14 +26,14 @@ description: AI 配额管理插件配置参考
`redis`中每一项的配置字段说明
| 配置项 | 类型 | 必填 | 默认值 | 说明 |
| ------------ | ------ | ---- | ---------------------------------------------------------- | --------------------------- |
| service_name | string | 必填 | - | redis 服务名称,带服务类型的完整 FQDN 名称,例如 my-redis.dns、redis.my-ns.svc.cluster.local |
| service_port | int | 否 | 服务类型为固定地址static service默认值为80其他为6379 | 输入redis服务的服务端口 |
| username | string | 否 | - | redis用户名 |
| password | string | 否 | - | redis密码 |
| timeout | int | 否 | 1000 | redis连接超时时间单位毫秒 |
| 配置项 | 类型 | 必填 | 默认值 | 说明 |
| ------------ | ------ | ---- | ---------------------------------------------------------- | --------------------------- |
| service_name | string | 必填 | - | redis 服务名称,带服务类型的完整 FQDN 名称,例如 my-redis.dns、redis.my-ns.svc.cluster.local |
| service_port | int | 否 | 服务类型为固定地址static service默认值为80其他为6379 | 输入redis服务的服务端口 |
| username | string | 否 | - | redis用户名 |
| password | string | 否 | - | redis密码 |
| timeout | int | 否 | 1000 | redis连接超时时间单位毫秒 |
| database | int | 否 | 0 | 使用的数据库id例如配置为1对应`SELECT 1` |
## 配置示例

View File

@@ -18,13 +18,14 @@ Plugin execution priority: `750`
| `admin_path` | string | Optional | /quota | Prefix for the path to manage quota requests |
| `redis` | object | Yes | | Redis related configuration |
Explanation of each configuration field in `redis`
| Configuration Item | Type | Required | Default Value | Explanation |
|---------------------|------------------|----------|---------------------------------------------------------|-----------------------------------------------|
| service_name | string | Required | - | Redis service name, full FQDN name with service type, e.g., my-redis.dns, redis.my-ns.svc.cluster.local |
| service_port | int | No | Default value for static service is 80; others are 6379 | Service port for the redis service |
| username | string | No | - | Redis username |
| password | string | No | - | Redis password |
| timeout | int | No | 1000 | Redis connection timeout in milliseconds |
| Configuration Item | Type | Required | Default Value | Explanation |
|--------------------|--------|----------|---------------------------------------------------------|---------------------------------------------------------------------------------------------------------|
| service_name | string | Required | - | Redis service name, full FQDN name with service type, e.g., my-redis.dns, redis.my-ns.svc.cluster.local |
| service_port | int | No | Default value for static service is 80; others are 6379 | Service port for the redis service |
| username | string | No | - | Redis username |
| password | string | No | - | Redis password |
| timeout | int | No | 1000 | Redis connection timeout in milliseconds |
| database | int | No | 0 | The database ID used, for example, configured as 1, corresponds to `SELECT 1`. |
## Configuration Example
### Identify request parameter apikey and apply rate limiting accordingly

View File

@@ -2,11 +2,11 @@ module github.com/alibaba/higress/plugins/wasm-go/extensions/ai-quota
go 1.19
//replace github.com/alibaba/higress/plugins/wasm-go => ../..
replace github.com/alibaba/higress/plugins/wasm-go => ../..
require (
github.com/alibaba/higress/plugins/wasm-go v1.4.3-0.20240808022948-34f5722d93de
github.com/higress-group/proxy-wasm-go-sdk v0.0.0-20240711023527-ba358c48772f
github.com/higress-group/proxy-wasm-go-sdk v1.0.0
github.com/tidwall/gjson v1.17.3
github.com/tidwall/resp v0.1.1
)

View File

@@ -1,12 +1,10 @@
github.com/alibaba/higress/plugins/wasm-go v1.4.3-0.20240808022948-34f5722d93de h1:lDLqj7Hw41ox8VdsP7oCTPhjPa3+QJUCKApcLh2a45Y=
github.com/alibaba/higress/plugins/wasm-go v1.4.3-0.20240808022948-34f5722d93de/go.mod h1:359don/ahMxpfeLMzr29Cjwcu8IywTTDUzWlBPRNLHw=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/higress-group/nottinygc v0.0.0-20231101025119-e93c4c2f8520 h1:IHDghbGQ2DTIXHBHxWfqCYQW1fKjyJ/I7W1pMyUDeEA=
github.com/higress-group/nottinygc v0.0.0-20231101025119-e93c4c2f8520/go.mod h1:Nz8ORLaFiLWotg6GeKlJMhv8cci8mM43uEnLA5t8iew=
github.com/higress-group/proxy-wasm-go-sdk v0.0.0-20240711023527-ba358c48772f h1:ZIiIBRvIw62gA5MJhuwp1+2wWbqL9IGElQ499rUsYYg=
github.com/higress-group/proxy-wasm-go-sdk v0.0.0-20240711023527-ba358c48772f/go.mod h1:hNFjhrLUIq+kJ9bOcs8QtiplSQ61GZXtd2xHKx4BYRo=
github.com/higress-group/proxy-wasm-go-sdk v1.0.0 h1:BZRNf4R7jr9hwRivg/E29nkVaKEak5MWjBDhWjuHijU=
github.com/higress-group/proxy-wasm-go-sdk v1.0.0/go.mod h1:iiSyFbo+rAtbtGt/bsefv8GU57h9CCLYGJA74/tF5/0=
github.com/magefile/mage v1.14.0 h1:6QDX3g6z1YvJ4olPhT1wksUcSa/V0a1B+pJb73fBjyo=
github.com/magefile/mage v1.14.0/go.mod h1:z5UZb/iS3GoOSn0JgWuiw7dxlurVYTu+/jHXqQg881A=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=

View File

@@ -69,6 +69,7 @@ type RedisInfo struct {
Username string `required:"false" yaml:"username" json:"username"`
Password string `required:"false" yaml:"password" json:"password"`
Timeout int `required:"false" yaml:"timeout" json:"timeout"`
Database int `required:"false" yaml:"database" json:"database"`
}
func parseConfig(json gjson.Result, config *QuotaConfig, log wrapper.Log) error {
@@ -110,17 +111,19 @@ func parseConfig(json gjson.Result, config *QuotaConfig, log wrapper.Log) error
if timeout == 0 {
timeout = 1000
}
database := int(redisConfig.Get("database").Int())
config.redisInfo.ServiceName = serviceName
config.redisInfo.ServicePort = servicePort
config.redisInfo.Username = username
config.redisInfo.Password = password
config.redisInfo.Timeout = timeout
config.redisInfo.Database = database
config.redisClient = wrapper.NewRedisClusterClient(wrapper.FQDNCluster{
FQDN: serviceName,
Port: int64(servicePort),
})
return config.redisClient.Init(username, password, int64(timeout))
return config.redisClient.Init(username, password, int64(timeout), wrapper.WithDataBase(database))
}
func onHttpRequestHeaders(context wrapper.HttpContext, config QuotaConfig, log wrapper.Log) types.Action {

View File

@@ -0,0 +1,244 @@
---
title: AI 搜索增强
keywords: [higress,ai search]
description: higress 支持通过集成搜索引擎Google/Bing/Arxiv/Elasticsearch等的实时结果增强DeepSeek-R1等模型等回答准确性和时效性
---
## 功能说明
`ai-search`插件通过集成搜索引擎Google/Bing/Arxiv/Elasticsearch等的实时结果增强AI模型的回答准确性和时效性。插件会自动将搜索结果注入到提示模板中并根据配置决定是否在最终回答中添加引用来源。
## 运行属性
插件执行阶段:`默认阶段`
插件执行优先级:`440`
## 配置字段
| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
|------|----------|----------|--------|------|
| needReference | bool | 选填 | false | 是否在回答中添加引用来源 |
| referenceFormat | string | 选填 | `"**References:**\n%s"` | 引用内容格式,必须包含%s占位符 |
| defaultLang | string | 选填 | - | 默认搜索语言代码如zh-CN/en-US |
| promptTemplate | string | 选填 | 内置模板 | 提示模板,必须包含`{search_results}``{question}`占位符 |
| searchFrom | array of object | 必填 | - | 参考下面搜索引擎配置,至少配置一个引擎 |
| searchRewrite | object | 选填 | - | 搜索重写配置用于使用LLM服务优化搜索查询 |
## 搜索重写说明
搜索重写功能使用LLM服务对用户的原始查询进行分析和优化可以
1. 将用户的自然语言查询转换为更适合搜索引擎的关键词组合
2. 对于Arxiv论文搜索自动识别相关的论文类别并添加类别限定
3. 对于私有知识库搜索,将长查询拆分成多个精准的关键词组合
强烈建议在使用Arxiv或Elasticsearch引擎时启用此功能。对于Arxiv搜索它能准确识别论文所属领域并优化英文关键词对于私有知识库搜索它能提供更精准的关键词匹配显著提升搜索效果。
## 搜索重写配置
| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
|------|----------|----------|--------|------|
| llmServiceName | string | 必填 | - | LLM服务名称 |
| llmServicePort | number | 必填 | - | LLM服务端口 |
| llmApiKey | string | 必填 | - | LLM服务API密钥 |
| llmUrl | string | 必填 | - | LLM服务API地址 |
| llmModelName | string | 必填 | - | LLM模型名称 |
| timeoutMillisecond | number | 选填 | 30000 | API调用超时时间毫秒 |
## 搜索引擎通用配置
| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
|------|----------|----------|--------|------|
| type | string | 必填 | - | 引擎类型google/bing/arxiv/elasticsearch/quark |
| serviceName | string | 必填 | - | 后端服务名称 |
| servicePort | number | 必填 | - | 后端服务端口 |
| apiKey | string | 必填 | - | 搜索引擎API密钥/Aliyun AccessKey |
| count | number | 选填 | 10 | 单次搜索返回结果数量 |
| start | number | 选填 | 0 | 搜索结果偏移量从第start+1条结果开始返回 |
| timeoutMillisecond | number | 选填 | 5000 | API调用超时时间毫秒 |
| optionArgs | map | 选填 | - | 搜索引擎特定参数key-value格式 |
## Google 特定配置
| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
|------|----------|----------|--------|------|
| cx | string | 必填 | - | Google自定义搜索引擎ID用于指定搜索范围 |
## Arxiv 特定配置
| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
|------|----------|----------|--------|------|
| arxivCategory | string | 选填 | - | 搜索的论文[类别](https://arxiv.org/category_taxonomy)如cs.AI, cs.CL等 |
## Elasticsearch 特定配置
| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
|------|----------|----------|--------|------|
| index | string | 必填 | - | 要搜索的Elasticsearch索引名称 |
| contentField | string | 必填 | - | 要查询的内容字段名称 |
| linkField | string | 必填 | - | 结果链接字段名称 |
| titleField | string | 必填 | - | 结果标题字段名称 |
## Quark 特定配置
| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
|------|----------|----------|--------|------|
| secretKey | string | 必填 | - | Aliyun SecretKey |
| endpoint | string | 选填 | iqs.cn-zhangjiakou.aliyuncs.com | 请求搜索引擎服务时的接入点 |
## 配置示例
### 基础配置(单搜索引擎)
```yaml
needReference: true
searchFrom:
- type: google
apiKey: "your-google-api-key"
cx: "search-engine-id"
serviceName: "google-svc.dns"
servicePort: 443
count: 5
optionArgs:
fileType: "pdf"
```
### Arxiv搜索配置
```yaml
searchFrom:
- type: arxiv
serviceName: "arxiv-svc.dns"
servicePort: 443
arxivCategory: "cs.AI"
count: 10
```
### 夸克搜索配置
```yaml
searchFrom:
- type: quark
serviceName: "quark-svc.dns"
servicePort: 443
apiKey: "aliyun accessKey"
count: 10 # 搜索网页数最多10条
secretKey: "aliyun secretKey"
endpoint: "iqs.cn-zhangjiakou.aliyuncs.com"
```
### 多搜索引擎配置
```yaml
defaultLang: "en-US"
promptTemplate: |
# Search Results:
{search_results}
# Please answer this question:
{question}
searchFrom:
- type: google
apiKey: "google-key"
cx: "github-search-id" # 专门搜索GitHub内容的搜索引擎ID
serviceName: "google-svc.dns"
servicePort: 443
- type: google
apiKey: "google-key"
cx: "news-search-id" # 专门搜索Google News内容的搜索引擎ID
serviceName: "google-svc.dns"
servicePort: 443
- type: bing
apiKey: "bing-key"
serviceName: "bing-svc.dns"
servicePort: 443
optionArgs:
answerCount: "5"
```
### 并发查询配置
由于搜索引擎对单次查询返回结果数量有限制如Google限制单次最多返回100条结果可以通过以下方式获取更多结果
1. 设置较小的count值如10
2. 通过start参数指定结果偏移量
3. 并发发起多个查询请求每个请求的start值按count递增
例如要获取30条结果可以配置count=10并并发发起20个查询每个查询的start值分别为0,10,20
```yaml
searchFrom:
- type: google
apiKey: "your-google-api-key"
cx: "search-engine-id"
serviceName: "google-svc.dns"
servicePort: 443
start: 0
count: 10
- type: google
apiKey: "your-google-api-key"
cx: "search-engine-id"
serviceName: "google-svc.dns"
servicePort: 443
start: 10
count: 10
- type: google
apiKey: "your-google-api-key"
cx: "search-engine-id"
serviceName: "google-svc.dns"
servicePort: 443
start: 20
count: 10
```
注意,过高的并发可能会导致限流,需要根据实际情况调整。
### Elasticsearch 配置(用于对接私有知识库)
```yaml
searchFrom:
- type: elasticsearch
serviceName: "es-svc.static"
# 固定地址服务的端口默认是80
servicePort: 80
index: "knowledge_base"
contentField: "content"
linkField: "url"
titleField: "title"
```
### 自定义引用格式
```yaml
needReference: true
referenceFormat: "### 数据来源\n%s"
searchFrom:
- type: bing
apiKey: "your-bing-key"
serviceName: "search-service.dns"
servicePort: 8080
```
### 搜索重写配置
```yaml
searchFrom:
- type: google
apiKey: "your-google-api-key"
cx: "search-engine-id"
serviceName: "google-svc.dns"
servicePort: 443
searchRewrite:
llmServiceName: "llm-svc.dns"
llmServicePort: 443
llmApiKey: "your-llm-api-key"
llmUrl: "https://api.example.com/v1/chat/completions"
llmModelName: "gpt-3.5-turbo"
timeoutMillisecond: 15000
```
## 注意事项
1. 提示词模版必须包含`{search_results}``{question}`占位符,可选使用`{cur_date}`插入当前日期格式2006年1月2日
2. 默认模板包含搜索结果处理指引和回答规范,如无特殊需要可以直接用默认模板,否则请根据实际情况修改
3. 多个搜索引擎是并行查询,总超时时间 = 所有搜索引擎配置中最大timeoutMillisecond值 + 处理时间
4. Arxiv搜索不需要API密钥但可以指定论文类别arxivCategory来缩小搜索范围

View File

@@ -0,0 +1,243 @@
---
title: AI Search Enhancement
keywords: [higress, ai search]
description: Higress supports enhancing the accuracy and timeliness of responses from models like DeepSeek-R1 by integrating real-time results from search engines (Google/Bing/Arxiv/Elasticsearch etc.)
---
## Feature Description
The `ai-search` plugin enhances the accuracy and timeliness of AI model responses by integrating real-time results from search engines (Google/Bing/Arxiv/Elasticsearch etc.). The plugin automatically injects search results into the prompt template and determines whether to add reference sources in the final response based on configuration.
## Runtime Properties
Plugin execution stage: `Default stage`
Plugin execution priority: `440`
## Configuration Fields
| Name | Data Type | Requirement | Default Value | Description |
|------|-----------|-------------|---------------|-------------|
| needReference | bool | Optional | false | Whether to add reference sources in the response |
| referenceFormat | string | Optional | `"**References:**\n%s"` | Reference content format, must include %s placeholder |
| defaultLang | string | Optional | - | Default search language code (e.g. zh-CN/en-US) |
| promptTemplate | string | Optional | Built-in template | Prompt template, must include `{search_results}` and `{question}` placeholders |
| searchFrom | array of object | Required | - | Refer to search engine configuration below, at least one engine must be configured |
| searchRewrite | object | Optional | - | Search rewrite configuration, used to optimize search queries using an LLM service |
## Search Rewrite Description
The search rewrite feature uses an LLM service to analyze and optimize the user's original query, which can:
1. Convert natural language queries into keyword combinations better suited for search engines
2. For Arxiv paper searches, automatically identify relevant paper categories and add category constraints
3. For private knowledge base searches, break down long queries into multiple precise keyword combinations
It is strongly recommended to enable this feature when using Arxiv or Elasticsearch engines. For Arxiv searches, it can accurately identify paper domains and optimize English keywords; for private knowledge base searches, it can provide more precise keyword matching, significantly improving search effectiveness.
## Search Rewrite Configuration
| Name | Data Type | Requirement | Default Value | Description |
|------|-----------|-------------|---------------|-------------|
| llmServiceName | string | Required | - | LLM service name |
| llmServicePort | number | Required | - | LLM service port |
| llmApiKey | string | Required | - | LLM service API key |
| llmUrl | string | Required | - | LLM service API URL |
| llmModelName | string | Required | - | LLM model name |
| timeoutMillisecond | number | Optional | 30000 | API call timeout (milliseconds) |
## Search Engine Common Configuration
| Name | Data Type | Requirement | Default Value | Description |
|------|-----------|-------------|---------------|-------------|
| type | string | Required | - | Engine type (google/bing/arxiv/elasticsearch/quark) |
| apiKey | string | Required | - | Search engine API key/Aliyun AccessKey |
| serviceName | string | Required | - | Backend service name |
| servicePort | number | Required | - | Backend service port |
| count | number | Optional | 10 | Number of results returned per search |
| start | number | Optional | 0 | Search result offset (start returning from the start+1 result) |
| timeoutMillisecond | number | Optional | 5000 | API call timeout (milliseconds) |
| optionArgs | map | Optional | - | Search engine specific parameters (key-value format) |
## Google Specific Configuration
| Name | Data Type | Requirement | Default Value | Description |
|------|-----------|-------------|---------------|-------------|
| cx | string | Required | - | Google Custom Search Engine ID, used to specify search scope |
## Arxiv Specific Configuration
| Name | Data Type | Requirement | Default Value | Description |
|------|-----------|-------------|---------------|-------------|
| arxivCategory | string | Optional | - | Search paper [category](https://arxiv.org/category_taxonomy) (e.g. cs.AI, cs.CL etc.) |
## Elasticsearch Specific Configuration
| Name | Data Type | Requirement | Default Value | Description |
|------|-----------|-------------|---------------|-------------|
| index | string | Required | - | Elasticsearch index name to search |
| contentField | string | Required | - | Content field name to query |
| linkField | string | Required | - | Result link field name |
| titleField | string | Required | - | Result title field name |
## Quark Specific Configuration
| Name | Data Type | Requirement | Default Value | Description |
|------|----------|----------|--------|------|
| secretKey | string | Required | - | Aliyun SecretKey |
| endpoint | string | Optional | iqs.cn-zhangjiakou.aliyuncs.com | Endpoint for accessing quark |
## Configuration Examples
### Basic Configuration (Single Search Engine)
```yaml
needReference: true
searchFrom:
- type: google
apiKey: "your-google-api-key"
cx: "search-engine-id"
serviceName: "google-svc.dns"
servicePort: 443
count: 5
optionArgs:
fileType: "pdf"
```
### Arxiv Search Configuration
```yaml
searchFrom:
- type: arxiv
serviceName: "arxiv-svc.dns"
servicePort: 443
arxivCategory: "cs.AI"
count: 10
```
### 夸克搜索配置
```yaml
searchFrom:
- type: quark
serviceName: "quark-svc.dns"
servicePort: 443
apiKey: "aliyun accessKey"
count: 10
secretKey: "aliyun secretKey"
endpoint: "iqs.cn-zhangjiakou.aliyuncs.com"
```
### Multiple Search Engines Configuration
```yaml
defaultLang: "en-US"
promptTemplate: |
# Search Results:
{search_results}
# Please answer this question:
{question}
searchFrom:
- type: google
apiKey: "google-key"
cx: "github-search-id" # Search engine ID specifically for GitHub content
serviceName: "google-svc.dns"
servicePort: 443
- type: google
apiKey: "google-key"
cx: "news-search-id" # Search engine ID specifically for Google News content
serviceName: "google-svc.dns"
servicePort: 443
- type: bing
apiKey: "bing-key"
serviceName: "bing-svc.dns"
servicePort: 443
optionArgs:
answerCount: "5"
```
### Concurrent Query Configuration
Since search engines limit the number of results per query (e.g. Google limits to 100 results per query), you can get more results by:
1. Setting a smaller count value (e.g. 10)
2. Specifying result offset with start parameter
3. Concurrently initiating multiple query requests, with each request's start value incrementing by count
For example, to get 30 results, configure count=10 and concurrently initiate 3 queries with start values 0,10,20 respectively:
```yaml
searchFrom:
- type: google
apiKey: "your-google-api-key"
cx: "search-engine-id"
serviceName: "google-svc.dns"
servicePort: 443
start: 0
count: 10
- type: google
apiKey: "your-google-api-key"
cx: "search-engine-id"
serviceName: "google-svc.dns"
servicePort: 443
start: 10
count: 10
- type: google
apiKey: "your-google-api-key"
cx: "search-engine-id"
serviceName: "google-svc.dns"
servicePort: 443
start: 20
count: 10
```
Note that excessive concurrency may lead to rate limiting, adjust according to actual situation.
### Elasticsearch Configuration (For Private Knowledge Base Integration)
```yaml
searchFrom:
- type: elasticsearch
serviceName: "es-svc.static"
# static ip service use 80 as default port
servicePort: 80
index: "knowledge_base"
contentField: "content"
linkField: "url"
titleField: "title"
```
### Custom Reference Format
```yaml
needReference: true
referenceFormat: "### Data Sources\n%s"
searchFrom:
- type: bing
apiKey: "your-bing-key"
serviceName: "search-service.dns"
servicePort: 8080
```
### Search Rewrite Configuration
```yaml
searchFrom:
- type: google
apiKey: "your-google-api-key"
cx: "search-engine-id"
serviceName: "google-svc.dns"
servicePort: 443
searchRewrite:
llmServiceName: "llm-svc.dns"
llmServicePort: 443
llmApiKey: "your-llm-api-key"
llmUrl: "https://api.example.com/v1/chat/completions"
llmModelName: "gpt-3.5-turbo"
timeoutMillisecond: 15000
```
## Notes
1. The prompt template must include `{search_results}` and `{question}` placeholders, optionally use `{cur_date}` to insert current date (format: January 2, 2006)
2. The default template includes search results processing instructions and response specifications, you can use the default template unless there are special needs
3. Multiple search engines query in parallel, total timeout = maximum timeoutMillisecond value among all search engine configurations + processing time
4. Arxiv search doesn't require API key, but you can specify paper category (arxivCategory) to narrow search scope

View File

@@ -0,0 +1,134 @@
package arxiv
import (
"bytes"
"errors"
"fmt"
"net/http"
"net/url"
"strings"
"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
"github.com/antchfx/xmlquery"
"github.com/tidwall/gjson"
"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine"
)
type ArxivSearch struct {
optionArgs map[string]string
start int
count int
timeoutMillisecond uint32
client wrapper.HttpClient
arxivCategory string
}
func NewArxivSearch(config *gjson.Result) (*ArxivSearch, error) {
engine := &ArxivSearch{}
serviceName := config.Get("serviceName").String()
if serviceName == "" {
return nil, errors.New("serviceName not found")
}
servicePort := config.Get("servicePort").Int()
if servicePort == 0 {
return nil, errors.New("servicePort not found")
}
engine.client = wrapper.NewClusterClient(wrapper.FQDNCluster{
FQDN: serviceName,
Port: servicePort,
})
engine.start = int(config.Get("start").Uint())
engine.count = int(config.Get("count").Uint())
if engine.count == 0 {
engine.count = 10
}
engine.timeoutMillisecond = uint32(config.Get("timeoutMillisecond").Uint())
if engine.timeoutMillisecond == 0 {
engine.timeoutMillisecond = 5000
}
engine.optionArgs = map[string]string{}
for key, value := range config.Get("optionArgs").Map() {
valStr := value.String()
if valStr != "" {
engine.optionArgs[key] = value.String()
}
}
engine.arxivCategory = config.Get("arxivCategory").String()
return engine, nil
}
func (a ArxivSearch) NeedExectue(ctx engine.SearchContext) bool {
return ctx.EngineType == "arxiv"
}
func (a ArxivSearch) Client() wrapper.HttpClient {
return a.client
}
func (a ArxivSearch) CallArgs(ctx engine.SearchContext) engine.CallArgs {
var searchQueryItems []string
for _, q := range ctx.Querys {
searchQueryItems = append(searchQueryItems, fmt.Sprintf("all:%s", url.QueryEscape(q)))
}
searchQuery := strings.Join(searchQueryItems, "+AND+")
category := ctx.ArxivCategory
if category == "" {
category = a.arxivCategory
}
if category != "" {
searchQuery = fmt.Sprintf("%s+AND+cat:%s", searchQuery, category)
}
queryUrl := fmt.Sprintf("https://export.arxiv.org/api/query?search_query=%s&max_results=%d&start=%d",
searchQuery, a.count, a.start)
var extraArgs []string
for key, value := range a.optionArgs {
extraArgs = append(extraArgs, fmt.Sprintf("%s=%s", key, url.QueryEscape(value)))
}
if len(extraArgs) > 0 {
queryUrl = fmt.Sprintf("%s&%s", queryUrl, strings.Join(extraArgs, "&"))
}
return engine.CallArgs{
Method: http.MethodGet,
Url: queryUrl,
Headers: [][2]string{{"Accept", "application/atom+xml"}},
TimeoutMillisecond: a.timeoutMillisecond,
}
}
func (a ArxivSearch) ParseResult(ctx engine.SearchContext, response []byte) []engine.SearchResult {
var results []engine.SearchResult
doc, err := xmlquery.Parse(bytes.NewReader(response))
if err != nil {
return results
}
entries := xmlquery.Find(doc, "//entry")
for _, entry := range entries {
title := entry.SelectElement("title").InnerText()
link := ""
for _, l := range entry.SelectElements("link") {
if l.SelectAttr("rel") == "alternate" && l.SelectAttr("type") == "text/html" {
link = l.SelectAttr("href")
break
}
}
summary := entry.SelectElement("summary").InnerText()
publishTime := entry.SelectElement("published").InnerText()
authors := entry.SelectElements("author")
var authorNames []string
for _, author := range authors {
authorNames = append(authorNames, author.SelectElement("name").InnerText())
}
content := fmt.Sprintf("%s\nAuthors: %s\nPublication time: %s", summary, strings.Join(authorNames, ", "), publishTime)
result := engine.SearchResult{
Title: title,
Link: link,
Content: content,
}
if result.Valid() {
results = append(results, result)
}
}
return results
}

View File

@@ -0,0 +1,128 @@
package bing
import (
"errors"
"fmt"
"net/http"
"net/url"
"strings"
"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
"github.com/tidwall/gjson"
"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine"
)
type BingSearch struct {
optionArgs map[string]string
apiKey string
start int
count int
timeoutMillisecond uint32
client wrapper.HttpClient
}
func NewBingSearch(config *gjson.Result) (*BingSearch, error) {
engine := &BingSearch{}
engine.apiKey = config.Get("apiKey").String()
if engine.apiKey == "" {
return nil, errors.New("apiKey not found")
}
serviceName := config.Get("serviceName").String()
if serviceName == "" {
return nil, errors.New("serviceName not found")
}
servicePort := config.Get("servicePort").Int()
if servicePort == 0 {
return nil, errors.New("servicePort not found")
}
engine.client = wrapper.NewClusterClient(wrapper.FQDNCluster{
FQDN: serviceName,
Port: servicePort,
})
engine.start = int(config.Get("start").Uint())
engine.count = int(config.Get("count").Uint())
if engine.count == 0 {
engine.count = 10
}
engine.timeoutMillisecond = uint32(config.Get("timeoutMillisecond").Uint())
if engine.timeoutMillisecond == 0 {
engine.timeoutMillisecond = 5000
}
engine.optionArgs = map[string]string{}
for key, value := range config.Get("optionArgs").Map() {
valStr := value.String()
if valStr != "" {
engine.optionArgs[key] = value.String()
}
}
return engine, nil
}
func (b BingSearch) NeedExectue(ctx engine.SearchContext) bool {
return ctx.EngineType == "" || ctx.EngineType == "internet"
}
func (b BingSearch) Client() wrapper.HttpClient {
return b.client
}
func (b BingSearch) CallArgs(ctx engine.SearchContext) engine.CallArgs {
queryUrl := fmt.Sprintf("https://api.bing.microsoft.com/v7.0/search?q=%s&count=%d&offset=%d",
url.QueryEscape(strings.Join(ctx.Querys, " ")), b.count, b.start)
var extraArgs []string
for key, value := range b.optionArgs {
extraArgs = append(extraArgs, fmt.Sprintf("%s=%s", key, url.QueryEscape(value)))
}
if ctx.Language != "" {
extraArgs = append(extraArgs, fmt.Sprintf("mkt=%s", ctx.Language))
}
if len(extraArgs) > 0 {
queryUrl = fmt.Sprintf("%s&%s", queryUrl, strings.Join(extraArgs, "&"))
}
return engine.CallArgs{
Method: http.MethodGet,
Url: queryUrl,
Headers: [][2]string{{"Ocp-Apim-Subscription-Key", b.apiKey}},
TimeoutMillisecond: b.timeoutMillisecond,
}
}
func (b BingSearch) ParseResult(ctx engine.SearchContext, response []byte) []engine.SearchResult {
jsonObj := gjson.ParseBytes(response)
var results []engine.SearchResult
webPages := jsonObj.Get("webPages.value")
for _, page := range webPages.Array() {
result := engine.SearchResult{
Title: page.Get("name").String(),
Link: page.Get("url").String(),
Content: page.Get("snippet").String(),
}
if result.Valid() {
results = append(results, result)
}
deepLinks := page.Get("deepLinks")
for _, inner := range deepLinks.Array() {
innerResult := engine.SearchResult{
Title: inner.Get("name").String(),
Link: inner.Get("url").String(),
Content: inner.Get("snippet").String(),
}
if innerResult.Valid() {
results = append(results, innerResult)
}
}
}
news := jsonObj.Get("news.value")
for _, article := range news.Array() {
result := engine.SearchResult{
Title: article.Get("name").String(),
Link: article.Get("url").String(),
Content: article.Get("description").String(),
}
if result.Valid() {
results = append(results, result)
}
}
return results
}

View File

@@ -0,0 +1,114 @@
package elasticsearch
import (
"errors"
"fmt"
"net/http"
"strings"
"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
"github.com/tidwall/gjson"
"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine"
)
type ElasticsearchSearch struct {
client wrapper.HttpClient
index string
contentField string
linkField string
titleField string
start int
count int
timeoutMillisecond uint32
}
func NewElasticsearchSearch(config *gjson.Result) (*ElasticsearchSearch, error) {
engine := &ElasticsearchSearch{}
serviceName := config.Get("serviceName").String()
if serviceName == "" {
return nil, errors.New("serviceName not found")
}
servicePort := config.Get("servicePort").Int()
if servicePort == 0 {
return nil, errors.New("servicePort not found")
}
engine.client = wrapper.NewClusterClient(wrapper.FQDNCluster{
FQDN: serviceName,
Port: servicePort,
})
engine.index = config.Get("index").String()
if engine.index == "" {
return nil, errors.New("index not found")
}
engine.contentField = config.Get("contentField").String()
if engine.contentField == "" {
return nil, errors.New("contentField not found")
}
engine.linkField = config.Get("linkField").String()
if engine.linkField == "" {
return nil, errors.New("linkField not found")
}
engine.titleField = config.Get("titleField").String()
if engine.titleField == "" {
return nil, errors.New("titleField not found")
}
engine.timeoutMillisecond = uint32(config.Get("timeoutMillisecond").Uint())
if engine.timeoutMillisecond == 0 {
engine.timeoutMillisecond = 5000
}
engine.start = int(config.Get("start").Uint())
engine.count = int(config.Get("count").Uint())
if engine.count == 0 {
engine.count = 10
}
return engine, nil
}
func (e ElasticsearchSearch) NeedExectue(ctx engine.SearchContext) bool {
return ctx.EngineType == "private"
}
func (e ElasticsearchSearch) Client() wrapper.HttpClient {
return e.client
}
func (e ElasticsearchSearch) CallArgs(ctx engine.SearchContext) engine.CallArgs {
searchBody := fmt.Sprintf(`{
"query": {
"match": {
"%s": {
"query": "%s",
"operator": "AND"
}
}
}
}`, e.contentField, strings.Join(ctx.Querys, " "))
return engine.CallArgs{
Method: http.MethodPost,
Url: fmt.Sprintf("/%s/_search?from=%d&size=%d", e.index, e.start, e.count),
Headers: [][2]string{
{"Content-Type", "application/json"},
},
Body: []byte(searchBody),
TimeoutMillisecond: e.timeoutMillisecond,
}
}
func (e ElasticsearchSearch) ParseResult(ctx engine.SearchContext, response []byte) []engine.SearchResult {
jsonObj := gjson.ParseBytes(response)
var results []engine.SearchResult
for _, hit := range jsonObj.Get("hits.hits").Array() {
source := hit.Get("_source")
result := engine.SearchResult{
Title: source.Get(e.titleField).String(),
Link: source.Get(e.linkField).String(),
Content: source.Get(e.contentField).String(),
}
if result.Valid() {
results = append(results, result)
}
}
return results
}

View File

@@ -0,0 +1,120 @@
package google
import (
"errors"
"fmt"
"net/http"
"net/url"
"strings"
"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
"github.com/tidwall/gjson"
"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine"
)
type GoogleSearch struct {
optionArgs map[string]string
apiKey string
cx string
start int
count int
timeoutMillisecond uint32
client wrapper.HttpClient
}
func NewGoogleSearch(config *gjson.Result) (*GoogleSearch, error) {
engine := &GoogleSearch{}
engine.apiKey = config.Get("apiKey").String()
if engine.apiKey == "" {
return nil, errors.New("apiKey not found")
}
engine.cx = config.Get("cx").String()
if engine.cx == "" {
return nil, errors.New("cx not found")
}
serviceName := config.Get("serviceName").String()
if serviceName == "" {
return nil, errors.New("serviceName not found")
}
servicePort := config.Get("servicePort").Int()
if servicePort == 0 {
return nil, errors.New("servicePort not found")
}
engine.client = wrapper.NewClusterClient(wrapper.FQDNCluster{
FQDN: serviceName,
Port: servicePort,
})
engine.start = int(config.Get("start").Uint())
engine.count = int(config.Get("count").Uint())
if engine.count == 0 {
engine.count = 10
}
if engine.count > 10 || engine.start+engine.count > 100 {
return nil, errors.New("count must be less than 10, and start + count must be less than or equal to 100.")
}
engine.timeoutMillisecond = uint32(config.Get("timeoutMillisecond").Uint())
if engine.timeoutMillisecond == 0 {
engine.timeoutMillisecond = 5000
}
engine.optionArgs = map[string]string{}
for key, value := range config.Get("optionArgs").Map() {
valStr := value.String()
if valStr != "" {
engine.optionArgs[key] = value.String()
}
}
return engine, nil
}
func (g GoogleSearch) NeedExectue(ctx engine.SearchContext) bool {
return ctx.EngineType == "" || ctx.EngineType == "internet"
}
func (g GoogleSearch) Client() wrapper.HttpClient {
return g.client
}
func (g GoogleSearch) CallArgs(ctx engine.SearchContext) engine.CallArgs {
queryUrl := fmt.Sprintf("https://customsearch.googleapis.com/customsearch/v1?cx=%s&q=%s&num=%d&key=%s&start=%d",
g.cx, url.QueryEscape(strings.Join(ctx.Querys, " ")), g.count, g.apiKey, g.start+1)
var extraArgs []string
for key, value := range g.optionArgs {
extraArgs = append(extraArgs, fmt.Sprintf("%s=%s", key, url.QueryEscape(value)))
}
if ctx.Language != "" {
extraArgs = append(extraArgs, fmt.Sprintf("lr=lang_%s", ctx.Language))
}
if len(extraArgs) > 0 {
queryUrl = fmt.Sprintf("%s&%s", queryUrl, strings.Join(extraArgs, "&"))
}
return engine.CallArgs{
Method: http.MethodGet,
Url: queryUrl,
Headers: [][2]string{
{"Accept", "application/json"},
},
TimeoutMillisecond: g.timeoutMillisecond,
}
}
func (g GoogleSearch) ParseResult(ctx engine.SearchContext, response []byte) []engine.SearchResult {
jsonObj := gjson.ParseBytes(response)
var results []engine.SearchResult
for _, item := range jsonObj.Get("items").Array() {
content := item.Get("snippet").String()
metaDescription := item.Get("pagemap.metatags.0.og:description").String()
if metaDescription != "" {
content = fmt.Sprintf("%s\n...\n%s", content, metaDescription)
}
result := engine.SearchResult{
Title: item.Get("title").String(),
Link: item.Get("link").String(),
Content: content,
}
if result.Valid() {
results = append(results, result)
}
}
return results
}

View File

@@ -0,0 +1,194 @@
package quark
import (
"crypto/hmac"
"crypto/rand"
"crypto/sha256"
"encoding/hex"
"errors"
"fmt"
"net/http"
"net/url"
"sort"
"strings"
"time"
"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
"github.com/tidwall/gjson"
"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine"
)
type QuarkSearch struct {
apiKey string
secretKey string
timeoutMillisecond uint32
client wrapper.HttpClient
count uint32
endpoint string
}
const (
Path = "/linked-retrieval/linked-retrieval-entry/v2/linkedRetrieval/commands/genericSearch"
ContentSha256 = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" // for empty body
Action = "GenericSearch"
Version = "2024-11-11"
SignatureAlgorithm = "ACS3-HMAC-SHA256"
SignedHeaders = "host;x-acs-action;x-acs-content-sha256;x-acs-date;x-acs-signature-nonce;x-acs-version"
)
func urlEncoding(rawStr string) string {
encodedStr := url.PathEscape(rawStr)
encodedStr = strings.ReplaceAll(encodedStr, "+", "%2B")
encodedStr = strings.ReplaceAll(encodedStr, ":", "%3A")
encodedStr = strings.ReplaceAll(encodedStr, "=", "%3D")
encodedStr = strings.ReplaceAll(encodedStr, "&", "%26")
encodedStr = strings.ReplaceAll(encodedStr, "$", "%24")
encodedStr = strings.ReplaceAll(encodedStr, "@", "%40")
// encodedStr := url.QueryEscape(rawStr)
return encodedStr
}
func getSignature(stringToSign, secret string) string {
h := hmac.New(sha256.New, []byte(secret))
h.Write([]byte(stringToSign))
hash := h.Sum(nil)
return hex.EncodeToString(hash)
}
func getCanonicalHeaders(params map[string]string) string {
paramArray := []string{}
for k, v := range params {
paramArray = append(paramArray, k+":"+v)
}
sort.Slice(paramArray, func(i, j int) bool {
return paramArray[i] <= paramArray[j]
})
return strings.Join(paramArray, "\n") + "\n"
}
func getHasedString(input string) string {
hash := sha256.Sum256([]byte(input))
hashHex := hex.EncodeToString(hash[:])
return hashHex
}
func generateHexID(length int) (string, error) {
bytes := make([]byte, length/2)
if _, err := rand.Read(bytes); err != nil {
return "", err
}
return hex.EncodeToString(bytes), nil
}
func NewQuarkSearch(config *gjson.Result) (*QuarkSearch, error) {
engine := &QuarkSearch{}
engine.apiKey = config.Get("apiKey").String()
if engine.apiKey == "" {
return nil, errors.New("apiKey not found")
}
engine.secretKey = config.Get("secretKey").String()
if engine.secretKey == "" {
return nil, errors.New("secretKey not found")
}
serviceName := config.Get("serviceName").String()
if serviceName == "" {
return nil, errors.New("serviceName not found")
}
servicePort := config.Get("servicePort").Int()
if servicePort == 0 {
return nil, errors.New("servicePort not found")
}
engine.endpoint = config.Get("endpoint").String()
if engine.endpoint == "" {
engine.endpoint = "iqs.cn-zhangjiakou.aliyuncs.com"
}
engine.count = uint32(config.Get("count").Int())
if engine.count == 0 {
engine.count = 10
}
engine.client = wrapper.NewClusterClient(wrapper.FQDNCluster{
FQDN: serviceName,
Port: servicePort,
})
engine.timeoutMillisecond = uint32(config.Get("timeoutMillisecond").Uint())
if engine.timeoutMillisecond == 0 {
engine.timeoutMillisecond = 5000
}
return engine, nil
}
func (g QuarkSearch) NeedExectue(ctx engine.SearchContext) bool {
return ctx.EngineType == "" || ctx.EngineType == "internet"
}
func (g QuarkSearch) Client() wrapper.HttpClient {
return g.client
}
func (g QuarkSearch) CallArgs(ctx engine.SearchContext) engine.CallArgs {
query := strings.Join(ctx.Querys, " ")
canonicalURI := Path
queryParams := map[string]string{
"query": query,
"timeRange": "NoLimit",
}
queryParamsStr := []string{}
for k, v := range queryParams {
queryParamsStr = append(queryParamsStr, k+"="+urlEncoding(v))
}
canonicalQueryString := strings.Join(queryParamsStr, "&")
timeStamp := time.Now().UTC().Format("2006-01-02T15:04:05Z")
randomID, _ := generateHexID(32)
params := map[string]string{
"host": g.endpoint,
"x-acs-action": Action,
"x-acs-content-sha256": ContentSha256,
"x-acs-date": timeStamp,
"x-acs-signature-nonce": randomID,
"x-acs-version": Version,
}
canonicalHeaders := getCanonicalHeaders(params)
canonicalRequest := http.MethodGet + "\n" + canonicalURI + "\n" + canonicalQueryString + "\n" + canonicalHeaders + "\n" + SignedHeaders + "\n" + ContentSha256
stringToSign := SignatureAlgorithm + "\n" + getHasedString(canonicalRequest)
authHeaderFmt := "%s Credential=%s,SignedHeaders=%s,Signature=%s"
authHeader := fmt.Sprintf(authHeaderFmt, SignatureAlgorithm, g.apiKey, SignedHeaders, getSignature(stringToSign, g.secretKey))
reqParams := url.Values{}
for k, v := range queryParams {
reqParams.Add(k, v)
}
requestURL := fmt.Sprintf("https://%s%s?%s", g.endpoint, Path, reqParams.Encode())
return engine.CallArgs{
Method: http.MethodGet,
Url: requestURL,
Headers: [][2]string{
{"x-acs-date", timeStamp},
{"x-acs-signature-nonce", randomID},
{"x-acs-content-sha256", ContentSha256},
{"x-acs-version", Version},
{"x-acs-action", Action},
{"Authorization", authHeader},
},
Body: nil,
TimeoutMillisecond: g.timeoutMillisecond,
}
}
func (g QuarkSearch) ParseResult(ctx engine.SearchContext, response []byte) []engine.SearchResult {
jsonObj := gjson.ParseBytes(response)
var results []engine.SearchResult
for index, item := range jsonObj.Get("pageItems").Array() {
result := engine.SearchResult{
Title: item.Get("title").String(),
Link: item.Get("link").String(),
Content: item.Get("mainText").String(),
}
if result.Valid() && index < int(g.count) {
results = append(results, result)
}
}
return results
}

View File

@@ -0,0 +1,37 @@
package engine
import (
"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
)
type SearchResult struct {
Title string
Link string
Content string
}
func (result SearchResult) Valid() bool {
return result.Title != "" && result.Link != "" && result.Content != ""
}
type SearchContext struct {
EngineType string
Querys []string
Language string
ArxivCategory string
}
type CallArgs struct {
Method string
Url string
Headers [][2]string
Body []byte
TimeoutMillisecond uint32
}
type SearchEngine interface {
NeedExectue(ctx SearchContext) bool
Client() wrapper.HttpClient
CallArgs(ctx SearchContext) CallArgs
ParseResult(ctx SearchContext, response []byte) []SearchResult
}

View File

@@ -0,0 +1,26 @@
module github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search
go 1.18
replace github.com/alibaba/higress/plugins/wasm-go => ../..
require (
github.com/alibaba/higress/plugins/wasm-go v0.0.0
github.com/antchfx/xmlquery v1.4.4
github.com/higress-group/proxy-wasm-go-sdk v1.0.0
github.com/tidwall/gjson v1.18.0
github.com/tidwall/sjson v1.2.5
)
require (
github.com/antchfx/xpath v1.3.3 // indirect
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
github.com/google/uuid v1.3.0 // indirect
github.com/higress-group/nottinygc v0.0.0-20231101025119-e93c4c2f8520 // indirect
github.com/magefile/mage v1.14.0 // indirect
github.com/tidwall/match v1.1.1 // indirect
github.com/tidwall/pretty v1.2.0 // indirect
github.com/tidwall/resp v0.1.1 // indirect
golang.org/x/net v0.33.0 // indirect
golang.org/x/text v0.21.0 // indirect
)

View File

@@ -0,0 +1,96 @@
github.com/antchfx/xmlquery v1.4.4 h1:mxMEkdYP3pjKSftxss4nUHfjBhnMk4imGoR96FRY2dg=
github.com/antchfx/xmlquery v1.4.4/go.mod h1:AEPEEPYE9GnA2mj5Ur2L5Q5/2PycJ0N9Fusrx9b12fc=
github.com/antchfx/xpath v1.3.3 h1:tmuPQa1Uye0Ym1Zn65vxPgfltWb/Lxu2jeqIGteJSRs=
github.com/antchfx/xpath v1.3.3/go.mod h1:i54GszH55fYfBmoZXapTHN8T8tkcHfRgLyVwwqzXNcs=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE=
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/higress-group/nottinygc v0.0.0-20231101025119-e93c4c2f8520 h1:IHDghbGQ2DTIXHBHxWfqCYQW1fKjyJ/I7W1pMyUDeEA=
github.com/higress-group/nottinygc v0.0.0-20231101025119-e93c4c2f8520/go.mod h1:Nz8ORLaFiLWotg6GeKlJMhv8cci8mM43uEnLA5t8iew=
github.com/higress-group/proxy-wasm-go-sdk v1.0.0 h1:BZRNf4R7jr9hwRivg/E29nkVaKEak5MWjBDhWjuHijU=
github.com/higress-group/proxy-wasm-go-sdk v1.0.0/go.mod h1:iiSyFbo+rAtbtGt/bsefv8GU57h9CCLYGJA74/tF5/0=
github.com/magefile/mage v1.14.0 h1:6QDX3g6z1YvJ4olPhT1wksUcSa/V0a1B+pJb73fBjyo=
github.com/magefile/mage v1.14.0/go.mod h1:z5UZb/iS3GoOSn0JgWuiw7dxlurVYTu+/jHXqQg881A=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
github.com/tidwall/gjson v1.18.0 h1:FIDeeyB800efLX89e5a8Y0BNH+LOngJyGrIWxG2FKQY=
github.com/tidwall/gjson v1.18.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA=
github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM=
github.com/tidwall/pretty v1.2.0 h1:RWIZEg2iJ8/g6fDDYzMpobmaoGh5OLl4AXtGUGPcqCs=
github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
github.com/tidwall/resp v0.1.1 h1:Ly20wkhqKTmDUPlyM1S7pWo5kk0tDu8OoC/vFArXmwE=
github.com/tidwall/resp v0.1.1/go.mod h1:3/FrruOBAxPTPtundW0VXgmsQ4ZBA0Aw714lVYgwFa0=
github.com/tidwall/sjson v1.2.5 h1:kLy8mja+1c9jlljvWTlSazM7cKDRfJuR/bOJhcY5NcY=
github.com/tidwall/sjson v1.2.5/go.mod h1:Fvgq9kS/6ociJEDnK0Fk1cpYF4FIW6ZF7LAe+6jwd28=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/crypto v0.13.0/go.mod h1:y6Z2r+Rw4iayiXXAIxJIDAJ1zMW4yaTpebo8fPOliYc=
golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU=
golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8=
golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk=
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
golang.org/x/mod v0.15.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
golang.org/x/net v0.15.0/go.mod h1:idbUs1IY1+zTqbi8yxTbhexhEEk5ur9LInksu6HrEpk=
golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44=
golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM=
golang.org/x/net v0.33.0 h1:74SYHlV8BIgHIFC/LrYkOGIwL19eTYXQ5wc6TBuO36I=
golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y=
golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/telemetry v0.0.0-20240228155512-f48c80bd79b2/go.mod h1:TeRTkGYfJXctD9OcfyVLyj2J3IxLnKwHJR8f4D8a3YE=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo=
golang.org/x/term v0.12.0/go.mod h1:owVbMEjm3cBLCHdkQu9b1opXd4ETQWc3BhuQGKgXgvU=
golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk=
golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY=
golang.org/x/term v0.27.0/go.mod h1:iMsnZpn0cago0GOrHO2+Y7u7JPn5AylBrcoWkElMTSM=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo=
golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
golang.org/x/tools v0.13.0/go.mod h1:HvlwmtVNQAhOuCjW7xxvovg8wbNq7LwfXh/k7wXUl58=
golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=

View File

@@ -0,0 +1,568 @@
// Copyright (c) 2022 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
_ "embed"
"errors"
"fmt"
"net/http"
"strings"
"time"
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
"github.com/tidwall/gjson"
"github.com/tidwall/sjson"
"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine"
"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine/arxiv"
"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine/bing"
"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine/elasticsearch"
"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine/google"
"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine/quark"
)
type SearchRewrite struct {
client wrapper.HttpClient
url string
apiKey string
modelName string
timeoutMillisecond uint32
prompt string
}
type Config struct {
engine []engine.SearchEngine
promptTemplate string
referenceFormat string
defaultLanguage string
needReference bool
searchRewrite *SearchRewrite
}
const (
DEFAULT_MAX_BODY_BYTES uint32 = 100 * 1024 * 1024
)
//go:embed prompts/full.md
var fullSearchPrompts string
//go:embed prompts/arxiv.md
var arxivSearchPrompts string
//go:embed prompts/internet.md
var internetSearchPrompts string
//go:embed prompts/private.md
var privateSearchPrompts string
func main() {
wrapper.SetCtx(
"ai-search",
wrapper.ParseConfigBy(parseConfig),
wrapper.ProcessRequestHeadersBy(onHttpRequestHeaders),
wrapper.ProcessRequestBodyBy(onHttpRequestBody),
wrapper.ProcessResponseHeadersBy(onHttpResponseHeaders),
wrapper.ProcessStreamingResponseBodyBy(onStreamingResponseBody),
wrapper.ProcessResponseBodyBy(onHttpResponseBody),
)
}
func parseConfig(json gjson.Result, config *Config, log wrapper.Log) error {
config.needReference = json.Get("needReference").Bool()
if config.needReference {
config.referenceFormat = json.Get("referenceFormat").String()
if config.referenceFormat == "" {
config.referenceFormat = "**References:**\n%s"
} else if !strings.Contains(config.referenceFormat, "%s") {
return fmt.Errorf("invalid referenceFormat:%s", config.referenceFormat)
}
}
config.defaultLanguage = json.Get("defaultLang").String()
config.promptTemplate = json.Get("promptTemplate").String()
if config.promptTemplate == "" {
if config.needReference {
config.promptTemplate = `# 以下内容是基于用户发送的消息的搜索结果:
{search_results}
在我给你的搜索结果中,每个结果都是[webpage X begin]...[webpage X end]格式的X代表每篇文章的数字索引。请在适当的情况下在句子末尾引用上下文。请按照引用编号[X]的格式在答案中对应部分引用上下文。如果一句话源自多个上下文,请列出所有相关的引用编号,例如[3][5],切记不要将引用集中在最后返回引用编号,而是在答案对应部分列出。
在回答时,请注意以下几点:
- 今天是北京时间:{cur_date}。
- 并非搜索结果的所有内容都与用户的问题密切相关,你需要结合问题,对搜索结果进行甄别、筛选。
- 对于列举类的问题如列举所有航班信息尽量将答案控制在10个要点以内并告诉用户可以查看搜索来源、获得完整信息。优先提供信息完整、最相关的列举项如非必要不要主动告诉用户搜索结果未提供的内容。
- 对于创作类的问题(如写论文),请务必在正文的段落中引用对应的参考编号,例如[3][5],不能只在文章末尾引用。你需要解读并概括用户的题目要求,选择合适的格式,充分利用搜索结果并抽取重要信息,生成符合用户要求、极具思想深度、富有创造力与专业性的答案。你的创作篇幅需要尽可能延长,对于每一个要点的论述要推测用户的意图,给出尽可能多角度的回答要点,且务必信息量大、论述详尽。
- 如果回答很长请尽量结构化、分段落总结。如果需要分点作答尽量控制在5个点以内并合并相关的内容。
- 对于客观类的问答,如果问题的答案非常简短,可以适当补充一到两句相关信息,以丰富内容。
- 你需要根据用户要求和回答内容选择合适、美观的回答格式,确保可读性强。
- 你的回答应该综合多个相关网页来回答,不能重复引用一个网页。
- 除非用户要求,否则你回答的语言需要和用户提问的语言保持一致。
# 用户消息为:
{question}`
} else {
config.promptTemplate = `# 以下内容是基于用户发送的消息的搜索结果:
{search_results}
在我给你的搜索结果中,每个结果都是[webpage begin]...[webpage end]格式的。
在回答时,请注意以下几点:
- 今天是北京时间:{cur_date}。
- 并非搜索结果的所有内容都与用户的问题密切相关,你需要结合问题,对搜索结果进行甄别、筛选。
- 对于列举类的问题如列举所有航班信息尽量将答案控制在10个要点以内。如非必要不要主动告诉用户搜索结果未提供的内容。
- 对于创作类的问题(如写论文),你需要解读并概括用户的题目要求,选择合适的格式,充分利用搜索结果并抽取重要信息,生成符合用户要求、极具思想深度、富有创造力与专业性的答案。你的创作篇幅需要尽可能延长,对于每一个要点的论述要推测用户的意图,给出尽可能多角度的回答要点,且务必信息量大、论述详尽。
- 如果回答很长请尽量结构化、分段落总结。如果需要分点作答尽量控制在5个点以内并合并相关的内容。
- 对于客观类的问答,如果问题的答案非常简短,可以适当补充一到两句相关信息,以丰富内容。
- 你需要根据用户要求和回答内容选择合适、美观的回答格式,确保可读性强。
- 你的回答应该综合多个相关网页来回答,但回答中不要给出网页的引用来源。
- 除非用户要求,否则你回答的语言需要和用户提问的语言保持一致。
# 用户消息为:
{question}`
}
}
if !strings.Contains(config.promptTemplate, "{search_results}") ||
!strings.Contains(config.promptTemplate, "{question}") {
return fmt.Errorf("invalid promptTemplate, must contains {search_results} and {question}:%s", config.promptTemplate)
}
var internetExists, privateExists, arxivExists bool
for _, e := range json.Get("searchFrom").Array() {
switch e.Get("type").String() {
case "bing":
searchEngine, err := bing.NewBingSearch(&e)
if err != nil {
return fmt.Errorf("bing search engine init failed:%s", err)
}
config.engine = append(config.engine, searchEngine)
internetExists = true
case "google":
searchEngine, err := google.NewGoogleSearch(&e)
if err != nil {
return fmt.Errorf("google search engine init failed:%s", err)
}
config.engine = append(config.engine, searchEngine)
internetExists = true
case "arxiv":
searchEngine, err := arxiv.NewArxivSearch(&e)
if err != nil {
return fmt.Errorf("arxiv search engine init failed:%s", err)
}
config.engine = append(config.engine, searchEngine)
arxivExists = true
case "elasticsearch":
searchEngine, err := elasticsearch.NewElasticsearchSearch(&e)
if err != nil {
return fmt.Errorf("elasticsearch search engine init failed:%s", err)
}
config.engine = append(config.engine, searchEngine)
privateExists = true
case "quark":
searchEngine, err := quark.NewQuarkSearch(&e)
if err != nil {
return fmt.Errorf("elasticsearch search engine init failed:%s", err)
}
config.engine = append(config.engine, searchEngine)
internetExists = true
default:
return fmt.Errorf("unkown search engine:%s", e.Get("type").String())
}
}
searchRewriteJson := json.Get("searchRewrite")
if searchRewriteJson.Exists() {
searchRewrite := &SearchRewrite{}
llmServiceName := searchRewriteJson.Get("llmServiceName").String()
if llmServiceName == "" {
return errors.New("llm_service_name not found")
}
llmServicePort := searchRewriteJson.Get("llmServicePort").Int()
if llmServicePort == 0 {
return errors.New("llmServicePort not found")
}
searchRewrite.client = wrapper.NewClusterClient(wrapper.FQDNCluster{
FQDN: llmServiceName,
Port: llmServicePort,
})
llmApiKey := searchRewriteJson.Get("llmApiKey").String()
if llmApiKey == "" {
return errors.New("llmApiKey not found")
}
searchRewrite.apiKey = llmApiKey
llmUrl := searchRewriteJson.Get("llmUrl").String()
if llmUrl == "" {
return errors.New("llmUrl not found")
}
searchRewrite.url = llmUrl
llmModelName := searchRewriteJson.Get("llmModelName").String()
if llmModelName == "" {
return errors.New("llmModelName not found")
}
searchRewrite.modelName = llmModelName
llmTimeout := searchRewriteJson.Get("timeoutMillisecond").Uint()
if llmTimeout == 0 {
llmTimeout = 30000
}
searchRewrite.timeoutMillisecond = uint32(llmTimeout)
// The consideration here is that internet searches are generally available, but arxiv and private sources may not be.
if arxivExists {
if privateExists {
// private + internet + arxiv
searchRewrite.prompt = fullSearchPrompts
} else {
// internet + arxiv
searchRewrite.prompt = arxivSearchPrompts
}
} else if privateExists {
// private + internet
searchRewrite.prompt = privateSearchPrompts
} else if internetExists {
// only internet
searchRewrite.prompt = internetSearchPrompts
}
config.searchRewrite = searchRewrite
}
if len(config.engine) == 0 {
return fmt.Errorf("no avaliable search engine found")
}
log.Debugf("ai search enabled, config: %#v", config)
return nil
}
func onHttpRequestHeaders(ctx wrapper.HttpContext, config Config, log wrapper.Log) types.Action {
contentType, _ := proxywasm.GetHttpRequestHeader("content-type")
// The request does not have a body.
if contentType == "" {
return types.ActionContinue
}
if !strings.Contains(contentType, "application/json") {
log.Warnf("content is not json, can't process: %s", contentType)
ctx.DontReadRequestBody()
return types.ActionContinue
}
ctx.SetRequestBodyBufferLimit(DEFAULT_MAX_BODY_BYTES)
_ = proxywasm.RemoveHttpRequestHeader("Accept-Encoding")
return types.ActionContinue
}
func onHttpRequestBody(ctx wrapper.HttpContext, config Config, body []byte, log wrapper.Log) types.Action {
var queryIndex int
var query string
messages := gjson.GetBytes(body, "messages").Array()
for i := len(messages) - 1; i >= 0; i-- {
if messages[i].Get("role").String() == "user" {
queryIndex = i
query = messages[i].Get("content").String()
break
}
}
if query == "" {
log.Errorf("not found user query in body:%s", body)
return types.ActionContinue
}
searchRewrite := config.searchRewrite
if searchRewrite != nil {
startTime := time.Now()
rewritePrompt := strings.Replace(searchRewrite.prompt, "{question}", query, 1)
rewriteBody, _ := sjson.SetBytes([]byte(fmt.Sprintf(
`{"stream":false,"max_tokens":100,"model":"%s","messages":[{"role":"user","content":""}]}`,
searchRewrite.modelName)), "messages.0.content", rewritePrompt)
err := searchRewrite.client.Post(searchRewrite.url,
[][2]string{
{"Content-Type", "application/json"},
{"Authorization", fmt.Sprintf("Bearer %s", searchRewrite.apiKey)},
}, rewriteBody,
func(statusCode int, responseHeaders http.Header, responseBody []byte) {
if statusCode != http.StatusOK {
log.Errorf("search rewrite failed, status: %d", statusCode)
// After a rewrite failure, no further search is performed, thus quickly identifying the failure.
proxywasm.ResumeHttpRequest()
return
}
content := gjson.GetBytes(responseBody, "choices.0.message.content").String()
log.Infof("LLM rewritten query response: %s (took %v), original search query:%s",
strings.ReplaceAll(content, "\n", `\n`), time.Since(startTime), query)
if strings.Contains(content, "none") {
log.Debugf("no search required")
proxywasm.ResumeHttpRequest()
return
}
// Parse search queries from LLM response
var searchContexts []engine.SearchContext
for _, line := range strings.Split(content, "\n") {
line = strings.TrimSpace(line)
if line == "" {
continue
}
parts := strings.SplitN(line, ":", 2)
if len(parts) != 2 {
continue
}
engineType := strings.TrimSpace(parts[0])
queryStr := strings.TrimSpace(parts[1])
var ctx engine.SearchContext
ctx.Language = config.defaultLanguage
switch {
case engineType == "internet":
ctx.EngineType = engineType
ctx.Querys = []string{queryStr}
case engineType == "private":
ctx.EngineType = engineType
ctx.Querys = strings.Split(queryStr, ",")
for i := range ctx.Querys {
ctx.Querys[i] = strings.TrimSpace(ctx.Querys[i])
}
default:
// Arxiv category
ctx.EngineType = "arxiv"
ctx.ArxivCategory = engineType
ctx.Querys = strings.Split(queryStr, ",")
for i := range ctx.Querys {
ctx.Querys[i] = strings.TrimSpace(ctx.Querys[i])
}
}
if len(ctx.Querys) > 0 {
searchContexts = append(searchContexts, ctx)
if ctx.ArxivCategory != "" {
// Conduct i/nquiries in all areas to increase recall.
backupCtx := ctx
backupCtx.ArxivCategory = ""
searchContexts = append(searchContexts, backupCtx)
}
}
}
if len(searchContexts) == 0 {
log.Errorf("no valid search contexts found")
proxywasm.ResumeHttpRequest()
return
}
if types.ActionContinue == executeSearch(ctx, config, queryIndex, body, searchContexts, log) {
proxywasm.ResumeHttpRequest()
}
}, searchRewrite.timeoutMillisecond)
if err != nil {
log.Errorf("search rewrite call llm service failed:%s", err)
// After a rewrite failure, no further search is performed, thus quickly identifying the failure.
return types.ActionContinue
}
return types.ActionPause
}
// Execute search without rewrite
return executeSearch(ctx, config, queryIndex, body, []engine.SearchContext{{
Querys: []string{query},
Language: config.defaultLanguage,
}}, log)
}
func executeSearch(ctx wrapper.HttpContext, config Config, queryIndex int, body []byte, searchContexts []engine.SearchContext, log wrapper.Log) types.Action {
searchResultGroups := make([][]engine.SearchResult, len(config.engine))
var finished int
var searching int
for i := 0; i < len(config.engine); i++ {
configEngine := config.engine[i]
// Check if engine needs to execute for any of the search contexts
var needsExecute bool
for _, searchCtx := range searchContexts {
if configEngine.NeedExectue(searchCtx) {
needsExecute = true
break
}
}
if !needsExecute {
continue
}
// Process all search contexts for this engine
for _, searchCtx := range searchContexts {
if !configEngine.NeedExectue(searchCtx) {
continue
}
args := configEngine.CallArgs(searchCtx)
index := i
err := configEngine.Client().Call(args.Method, args.Url, args.Headers, args.Body,
func(statusCode int, responseHeaders http.Header, responseBody []byte) {
defer func() {
finished++
if finished == searching {
// Merge search results from all engines with deduplication
var mergedResults []engine.SearchResult
seenLinks := make(map[string]bool)
for _, results := range searchResultGroups {
for _, result := range results {
if !seenLinks[result.Link] {
seenLinks[result.Link] = true
mergedResults = append(mergedResults, result)
}
}
}
// Format search results for prompt template
var formattedResults []string
var formattedReferences []string
for j, result := range mergedResults {
if config.needReference {
formattedResults = append(formattedResults,
fmt.Sprintf("[webpage %d begin]\n%s\n[webpage %d end]", j+1, result.Content, j+1))
formattedReferences = append(formattedReferences,
fmt.Sprintf("[%d] [%s](%s)", j+1, result.Title, result.Link))
} else {
formattedResults = append(formattedResults,
fmt.Sprintf("[webpage begin]\n%s\n[webpage end]", result.Content))
}
}
// Prepare template variables
curDate := time.Now().In(time.FixedZone("CST", 8*3600)).Format("2006年1月2日")
searchResults := strings.Join(formattedResults, "\n")
log.Debugf("searchResults: %s", searchResults)
// Fill prompt template
prompt := strings.Replace(config.promptTemplate, "{search_results}", searchResults, 1)
prompt = strings.Replace(prompt, "{question}", searchContexts[0].Querys[0], 1)
prompt = strings.Replace(prompt, "{cur_date}", curDate, 1)
// Update request body with processed prompt
modifiedBody, err := sjson.SetBytes(body, fmt.Sprintf("messages.%d.content", queryIndex), prompt)
if err != nil {
log.Errorf("modify request message content failed, err:%v, body:%s", err, body)
} else {
log.Debugf("modifeid body:%s", modifiedBody)
proxywasm.ReplaceHttpRequestBody(modifiedBody)
if config.needReference {
ctx.SetContext("References", strings.Join(formattedReferences, "\n"))
}
}
proxywasm.ResumeHttpRequest()
}
}()
if statusCode != http.StatusOK {
log.Errorf("search call failed, status: %d, engine: %#v", statusCode, configEngine)
return
}
// Append results to existing slice for this engine
searchResultGroups[index] = append(searchResultGroups[index], configEngine.ParseResult(searchCtx, responseBody)...)
}, args.TimeoutMillisecond)
if err != nil {
log.Errorf("search call failed, engine: %#v", configEngine)
continue
}
searching++
}
}
if searching > 0 {
return types.ActionPause
}
return types.ActionContinue
}
func onHttpResponseHeaders(ctx wrapper.HttpContext, config Config, log wrapper.Log) types.Action {
if !config.needReference {
ctx.DontReadResponseBody()
return types.ActionContinue
}
proxywasm.RemoveHttpResponseHeader("content-length")
contentType, err := proxywasm.GetHttpResponseHeader("Content-Type")
if err != nil || !strings.HasPrefix(contentType, "text/event-stream") {
if err != nil {
log.Errorf("unable to load content-type header from response: %v", err)
}
ctx.BufferResponseBody()
ctx.SetResponseBodyBufferLimit(DEFAULT_MAX_BODY_BYTES)
}
return types.ActionContinue
}
func onHttpResponseBody(ctx wrapper.HttpContext, config Config, body []byte, log wrapper.Log) types.Action {
references := ctx.GetStringContext("References", "")
if references == "" {
return types.ActionContinue
}
content := gjson.GetBytes(body, "choices.0.message.content")
modifiedContent := fmt.Sprintf("%s\n\n%s", fmt.Sprintf(config.referenceFormat, references), content)
body, err := sjson.SetBytes(body, "choices.0.message.content", modifiedContent)
if err != nil {
log.Errorf("modify response message content failed, err:%v, body:%s", err, body)
return types.ActionContinue
}
proxywasm.ReplaceHttpResponseBody(body)
return types.ActionContinue
}
func onStreamingResponseBody(ctx wrapper.HttpContext, config Config, chunk []byte, isLastChunk bool, log wrapper.Log) []byte {
if ctx.GetBoolContext("ReferenceAppended", false) {
return chunk
}
references := ctx.GetStringContext("References", "")
if references == "" {
return chunk
}
modifiedChunk, responseReady := setReferencesToFirstMessage(ctx, chunk, fmt.Sprintf(config.referenceFormat, references), log)
if responseReady {
ctx.SetContext("ReferenceAppended", true)
return modifiedChunk
} else {
return []byte("")
}
}
const PARTIAL_MESSAGE_CONTEXT_KEY = "partialMessage"
func setReferencesToFirstMessage(ctx wrapper.HttpContext, chunk []byte, references string, log wrapper.Log) ([]byte, bool) {
if len(chunk) == 0 {
log.Debugf("chunk is empty")
return nil, false
}
var partialMessage []byte
partialMessageI := ctx.GetContext(PARTIAL_MESSAGE_CONTEXT_KEY)
if partialMessageI != nil {
if pMsg, ok := partialMessageI.([]byte); ok {
partialMessage = append(pMsg, chunk...)
} else {
log.Warnf("invalid partial message type: %T", partialMessageI)
partialMessage = chunk
}
} else {
partialMessage = chunk
}
if len(partialMessage) == 0 {
log.Debugf("partial message is empty")
return nil, false
}
messages := strings.Split(string(partialMessage), "\n\n")
if len(messages) > 1 {
firstMessage := messages[0]
log.Debugf("first message: %s", firstMessage)
firstMessage = strings.TrimPrefix(firstMessage, "data:")
firstMessage = strings.TrimPrefix(firstMessage, " ")
firstMessage = strings.TrimSuffix(firstMessage, "\n")
deltaContent := gjson.Get(firstMessage, "choices.0.delta.content")
modifiedMessage, err := sjson.Set(firstMessage, "choices.0.delta.content", fmt.Sprintf("%s\n\n%s", references, deltaContent))
if err != nil {
log.Errorf("modify response delta content failed, err:%v", err)
return partialMessage, true
}
modifiedMessage = fmt.Sprintf("data: %s", modifiedMessage)
log.Debugf("modified message: %s", firstMessage)
messages[0] = string(modifiedMessage)
return []byte(strings.Join(messages, "\n\n")), true
}
ctx.SetContext(PARTIAL_MESSAGE_CONTEXT_KEY, partialMessage)
return nil, false
}

View File

@@ -0,0 +1,214 @@
# 目标
你需要分析**用户发送的消息**,是否需要查询搜索引擎(Google/Bing)/论文资料库(Arxiv),并按照如下情况回复相应内容:
## 情况一:不需要查询搜索引擎/论文资料/私有知识库
### 情况举例:
1. **用户发送的消息**不是在提问或寻求帮助
2. **用户发送的消息**是要求翻译文字
### 思考过程
根据上面的**情况举例**,如果符合,则按照下面**回复内容示例**进行回复,注意不要输出思考过程
### 回复内容示例:
none
## 情况二:需要查询搜索引擎/论文资料
### 情况举例:
1. 答复**用户发送的消息**,需依赖互联网上最新的资料
2. 答复**用户发送的消息**,需依赖论文等专业资料
3. 通过查询资料,可以更好地答复**用户发送的消息**
### 思考过程
根据上面的**情况举例**,以及其他需要查询资料的情况,如果符合,按照以下步骤思考,并按照下面**回复内容示例**进行回复,注意不要输出思考过程:
1. What: 分析要答复**用户发送的消息**,需要了解什么知识和资料
2. Where: 判断了解这个知识和资料要向Google等搜索引擎提问还是向Arxiv论文资料库进行查询或者需要同时查询多个地方
3. How: 分析对于要查询的知识和资料,应该提出什么样的问题
4. Adjust: 明确要向什么地方查询什么问题后,按下面方式对问题进行调整
4.1. 向搜索引擎提问:用一句话概括问题,并且针对搜索引擎做问题优化
4.2. 向Arxiv论文资料库提问
4.2.1. 明确问题所属领域然后确定Arxiv的Category值Category可选的枚举如下:
- cs.AI: Artificial Intelligence
- cs.AR: Hardware Architecture
- cs.CC: Computational Complexity
- cs.CE: Computational Engineering, Finance, and Science
- cs.CG: Computational Geometry
- cs.CL: Computation and Language
- cs.CR: Cryptography and Security
- cs.CV: Computer Vision and Pattern Recognition
- cs.CY: Computers and Society
- cs.DB: Databases
- cs.DC: Distributed, Parallel, and Cluster Computing
- cs.DL: Digital Libraries
- cs.DM: Discrete Mathematics
- cs.DS: Data Structures and Algorithms
- cs.ET: Emerging Technologies
- cs.FL: Formal Languages and Automata Theory
- cs.GL: General Literature
- cs.GR: Graphics
- cs.GT: Computer Science and Game Theory
- cs.HC: Human-Computer Interaction
- cs.IR: Information Retrieval
- cs.IT: Information Theory
- cs.LG: Machine Learning
- cs.LO: Logic in Computer Science
- cs.MA: Multiagent Systems
- cs.MM: Multimedia
- cs.MS: Mathematical Software
- cs.NA: Numerical Analysis
- cs.NE: Neural and Evolutionary Computing
- cs.NI: Networking and Internet Architecture
- cs.OH: Other Computer Science
- cs.OS: Operating Systems
- cs.PF: Performance
- cs.PL: Programming Languages
- cs.RO: Robotics
- cs.SC: Symbolic Computation
- cs.SD: Sound
- cs.SE: Software Engineering
- cs.SI: Social and Information Networks
- cs.SY: Systems and Control
- econ.EM: Econometrics
- econ.GN: General Economics
- econ.TH: Theoretical Economics
- eess.AS: Audio and Speech Processing
- eess.IV: Image and Video Processing
- eess.SP: Signal Processing
- eess.SY: Systems and Control
- math.AC: Commutative Algebra
- math.AG: Algebraic Geometry
- math.AP: Analysis of PDEs
- math.AT: Algebraic Topology
- math.CA: Classical Analysis and ODEs
- math.CO: Combinatorics
- math.CT: Category Theory
- math.CV: Complex Variables
- math.DG: Differential Geometry
- math.DS: Dynamical Systems
- math.FA: Functional Analysis
- math.GM: General Mathematics
- math.GN: General Topology
- math.GR: Group Theory
- math.GT: Geometric Topology
- math.HO: History and Overview
- math.IT: Information Theory
- math.KT: K-Theory and Homology
- math.LO: Logic
- math.MG: Metric Geometry
- math.MP: Mathematical Physics
- math.NA: Numerical Analysis
- math.NT: Number Theory
- math.OA: Operator Algebras
- math.OC: Optimization and Control
- math.PR: Probability
- math.QA: Quantum Algebra
- math.RA: Rings and Algebras
- math.RT: Representation Theory
- math.SG: Symplectic Geometry
- math.SP: Spectral Theory
- math.ST: Statistics Theory
- astro-ph.CO: Cosmology and Nongalactic Astrophysics
- astro-ph.EP: Earth and Planetary Astrophysics
- astro-ph.GA: Astrophysics of Galaxies
- astro-ph.HE: High Energy Astrophysical Phenomena
- astro-ph.IM: Instrumentation and Methods for Astrophysics
- astro-ph.SR: Solar and Stellar Astrophysics
- cond-mat.dis-nn: Disordered Systems and Neural Networks
- cond-mat.mes-hall: Mesoscale and Nanoscale Physics
- cond-mat.mtrl-sci: Materials Science
- cond-mat.other: Other Condensed Matter
- cond-mat.quant-gas: Quantum Gases
- cond-mat.soft: Soft Condensed Matter
- cond-mat.stat-mech: Statistical Mechanics
- cond-mat.str-el: Strongly Correlated Electrons
- cond-mat.supr-con: Superconductivity
- gr-qc: General Relativity and Quantum Cosmology
- hep-ex: High Energy Physics - Experiment
- hep-lat: High Energy Physics - Lattice
- hep-ph: High Energy Physics - Phenomenology
- hep-th: High Energy Physics - Theory
- math-ph: Mathematical Physics
- nlin.AO: Adaptation and Self-Organizing Systems
- nlin.CD: Chaotic Dynamics
- nlin.CG: Cellular Automata and Lattice Gases
- nlin.PS: Pattern Formation and Solitons
- nlin.SI: Exactly Solvable and Integrable Systems
- nucl-ex: Nuclear Experiment
- nucl-th: Nuclear Theory
- physics.acc-ph: Accelerator Physics
- physics.ao-ph: Atmospheric and Oceanic Physics
- physics.app-ph: Applied Physics
- physics.atm-clus: Atomic and Molecular Clusters
- physics.atom-ph: Atomic Physics
- physics.bio-ph: Biological Physics
- physics.chem-ph: Chemical Physics
- physics.class-ph: Classical Physics
- physics.comp-ph: Computational Physics
- physics.data-an: Data Analysis, Statistics and Probability
- physics.ed-ph: Physics Education
- physics.flu-dyn: Fluid Dynamics
- physics.gen-ph: General Physics
- physics.geo-ph: Geophysics
- physics.hist-ph: History and Philosophy of Physics
- physics.ins-det: Instrumentation and Detectors
- physics.med-ph: Medical Physics
- physics.optics: Optics
- physics.plasm-ph: Plasma Physics
- physics.pop-ph: Popular Physics
- physics.soc-ph: Physics and Society
- physics.space-ph: Space Physics
- quant-ph: Quantum Physics
- q-bio.BM: Biomolecules
- q-bio.CB: Cell Behavior
- q-bio.GN: Genomics
- q-bio.MN: Molecular Networks
- q-bio.NC: Neurons and Cognition
- q-bio.OT: Other Quantitative Biology
- q-bio.PE: Populations and Evolution
- q-bio.QM: Quantitative Methods
- q-bio.SC: Subcellular Processes
- q-bio.TO: Tissues and Organs
- q-fin.CP: Computational Finance
- q-fin.EC: Economics
- q-fin.GN: General Finance
- q-fin.MF: Mathematical Finance
- q-fin.PM: Portfolio Management
- q-fin.PR: Pricing of Securities
- q-fin.RM: Risk Management
- q-fin.ST: Statistical Finance
- q-fin.TR: Trading and Market Microstructure
- stat.AP: Applications
- stat.CO: Computation
- stat.ME: Methodology
- stat.ML: Machine Learning
- stat.OT: Other Statistics
- stat.TH: Statistics Theory
4.2.2. 根据问题所属领域将问题拆分成多组关键词的组合同时组合中的关键词个数尽量不要超过3个
5. Final: 按照下面**回复内容示例**进行回复,注意:
- 不要输出思考过程
- 可以向多个查询目标分别查询多次多个查询用换行分隔总查询次数控制在5次以内
- 查询搜索引擎时,需要以"internet:"开头
- 查询Arxiv论文时需要以Arxiv的Category值开头例如"cs.AI:"
- 查询Arxiv论文时优先用英文表述关键词进行搜索
- 当用多个关键词查询时,关键词之间用","分隔
- 尽量满足**用户发送的消息**中的搜索要求,例如用户要求用英文搜索,则需用英文表述问题和关键词
- 用户如果没有要求搜索语言,则用和**用户发送的消息**一致的语言表述问题和关键词
- 如果**用户发送的消息**使用中文,至少要有一条向搜索引擎查询的中文问题
### 回复内容示例:
#### 用不同语言查询多次搜索引擎
internet: 黄金价格走势
internet: The trend of gold prices
#### 向Arxiv的多个类目查询多次
cs.AI: attention mechanism
cs.AI: neuron
q-bio.NC: brain,attention mechanism
#### 向多个查询目标查询多次
internet: 中国未来房价趋势
internet: 最新中国经济政策
econ.TH: policy, real estate
# 用户发送的消息为:
{question}

View File

@@ -0,0 +1,221 @@
# 目标
你需要分析**用户发送的消息**,是否需要查询搜索引擎(Google/Bing)/论文资料库(Arxiv)/私有知识库,并按照如下情况回复相应内容:
## 情况一:不需要查询搜索引擎/论文资料/私有知识库
### 情况举例:
1. **用户发送的消息**不是在提问或寻求帮助
2. **用户发送的消息**是要求翻译文字
### 思考过程
根据上面的**情况举例**,如果符合,则按照下面**回复内容示例**进行回复,注意不要输出思考过程
### 回复内容示例:
none
## 情况二:需要查询搜索引擎/论文资料/私有知识库
### 情况举例:
1. 答复**用户发送的消息**,需依赖互联网上最新的资料
2. 答复**用户发送的消息**,需依赖论文等专业资料
3. 通过查询资料,可以更好地答复**用户发送的消息**
### 思考过程
根据上面的**情况举例**,以及其他需要查询资料的情况,如果符合,按照以下步骤思考,并按照下面**回复内容示例**进行回复,注意不要输出思考过程:
1. What: 分析要答复**用户发送的消息**,需要了解什么知识和资料
2. Where: 判断了解这个知识和资料要向Google等搜索引擎提问还是向Arxiv论文资料库进行查询还是向私有知识库进行查询或者需要同时查询多个地方
3. How: 分析对于要查询的知识和资料,应该提出什么样的问题
4. Adjust: 明确要向什么地方查询什么问题后,按下面方式对问题进行调整
4.1. 向搜索引擎提问:用一句话概括问题,并且针对搜索引擎做问题优化
4.2. 向私有知识库提问将问题拆分成多组关键词的组合同时组合中的关键词个数尽量不要超过3个
4.3. 向Arxiv论文资料库提问
4.3.1. 明确问题所属领域然后确定Arxiv的Category值Category可选的枚举如下:
- cs.AI: Artificial Intelligence
- cs.AR: Hardware Architecture
- cs.CC: Computational Complexity
- cs.CE: Computational Engineering, Finance, and Science
- cs.CG: Computational Geometry
- cs.CL: Computation and Language
- cs.CR: Cryptography and Security
- cs.CV: Computer Vision and Pattern Recognition
- cs.CY: Computers and Society
- cs.DB: Databases
- cs.DC: Distributed, Parallel, and Cluster Computing
- cs.DL: Digital Libraries
- cs.DM: Discrete Mathematics
- cs.DS: Data Structures and Algorithms
- cs.ET: Emerging Technologies
- cs.FL: Formal Languages and Automata Theory
- cs.GL: General Literature
- cs.GR: Graphics
- cs.GT: Computer Science and Game Theory
- cs.HC: Human-Computer Interaction
- cs.IR: Information Retrieval
- cs.IT: Information Theory
- cs.LG: Machine Learning
- cs.LO: Logic in Computer Science
- cs.MA: Multiagent Systems
- cs.MM: Multimedia
- cs.MS: Mathematical Software
- cs.NA: Numerical Analysis
- cs.NE: Neural and Evolutionary Computing
- cs.NI: Networking and Internet Architecture
- cs.OH: Other Computer Science
- cs.OS: Operating Systems
- cs.PF: Performance
- cs.PL: Programming Languages
- cs.RO: Robotics
- cs.SC: Symbolic Computation
- cs.SD: Sound
- cs.SE: Software Engineering
- cs.SI: Social and Information Networks
- cs.SY: Systems and Control
- econ.EM: Econometrics
- econ.GN: General Economics
- econ.TH: Theoretical Economics
- eess.AS: Audio and Speech Processing
- eess.IV: Image and Video Processing
- eess.SP: Signal Processing
- eess.SY: Systems and Control
- math.AC: Commutative Algebra
- math.AG: Algebraic Geometry
- math.AP: Analysis of PDEs
- math.AT: Algebraic Topology
- math.CA: Classical Analysis and ODEs
- math.CO: Combinatorics
- math.CT: Category Theory
- math.CV: Complex Variables
- math.DG: Differential Geometry
- math.DS: Dynamical Systems
- math.FA: Functional Analysis
- math.GM: General Mathematics
- math.GN: General Topology
- math.GR: Group Theory
- math.GT: Geometric Topology
- math.HO: History and Overview
- math.IT: Information Theory
- math.KT: K-Theory and Homology
- math.LO: Logic
- math.MG: Metric Geometry
- math.MP: Mathematical Physics
- math.NA: Numerical Analysis
- math.NT: Number Theory
- math.OA: Operator Algebras
- math.OC: Optimization and Control
- math.PR: Probability
- math.QA: Quantum Algebra
- math.RA: Rings and Algebras
- math.RT: Representation Theory
- math.SG: Symplectic Geometry
- math.SP: Spectral Theory
- math.ST: Statistics Theory
- astro-ph.CO: Cosmology and Nongalactic Astrophysics
- astro-ph.EP: Earth and Planetary Astrophysics
- astro-ph.GA: Astrophysics of Galaxies
- astro-ph.HE: High Energy Astrophysical Phenomena
- astro-ph.IM: Instrumentation and Methods for Astrophysics
- astro-ph.SR: Solar and Stellar Astrophysics
- cond-mat.dis-nn: Disordered Systems and Neural Networks
- cond-mat.mes-hall: Mesoscale and Nanoscale Physics
- cond-mat.mtrl-sci: Materials Science
- cond-mat.other: Other Condensed Matter
- cond-mat.quant-gas: Quantum Gases
- cond-mat.soft: Soft Condensed Matter
- cond-mat.stat-mech: Statistical Mechanics
- cond-mat.str-el: Strongly Correlated Electrons
- cond-mat.supr-con: Superconductivity
- gr-qc: General Relativity and Quantum Cosmology
- hep-ex: High Energy Physics - Experiment
- hep-lat: High Energy Physics - Lattice
- hep-ph: High Energy Physics - Phenomenology
- hep-th: High Energy Physics - Theory
- math-ph: Mathematical Physics
- nlin.AO: Adaptation and Self-Organizing Systems
- nlin.CD: Chaotic Dynamics
- nlin.CG: Cellular Automata and Lattice Gases
- nlin.PS: Pattern Formation and Solitons
- nlin.SI: Exactly Solvable and Integrable Systems
- nucl-ex: Nuclear Experiment
- nucl-th: Nuclear Theory
- physics.acc-ph: Accelerator Physics
- physics.ao-ph: Atmospheric and Oceanic Physics
- physics.app-ph: Applied Physics
- physics.atm-clus: Atomic and Molecular Clusters
- physics.atom-ph: Atomic Physics
- physics.bio-ph: Biological Physics
- physics.chem-ph: Chemical Physics
- physics.class-ph: Classical Physics
- physics.comp-ph: Computational Physics
- physics.data-an: Data Analysis, Statistics and Probability
- physics.ed-ph: Physics Education
- physics.flu-dyn: Fluid Dynamics
- physics.gen-ph: General Physics
- physics.geo-ph: Geophysics
- physics.hist-ph: History and Philosophy of Physics
- physics.ins-det: Instrumentation and Detectors
- physics.med-ph: Medical Physics
- physics.optics: Optics
- physics.plasm-ph: Plasma Physics
- physics.pop-ph: Popular Physics
- physics.soc-ph: Physics and Society
- physics.space-ph: Space Physics
- quant-ph: Quantum Physics
- q-bio.BM: Biomolecules
- q-bio.CB: Cell Behavior
- q-bio.GN: Genomics
- q-bio.MN: Molecular Networks
- q-bio.NC: Neurons and Cognition
- q-bio.OT: Other Quantitative Biology
- q-bio.PE: Populations and Evolution
- q-bio.QM: Quantitative Methods
- q-bio.SC: Subcellular Processes
- q-bio.TO: Tissues and Organs
- q-fin.CP: Computational Finance
- q-fin.EC: Economics
- q-fin.GN: General Finance
- q-fin.MF: Mathematical Finance
- q-fin.PM: Portfolio Management
- q-fin.PR: Pricing of Securities
- q-fin.RM: Risk Management
- q-fin.ST: Statistical Finance
- q-fin.TR: Trading and Market Microstructure
- stat.AP: Applications
- stat.CO: Computation
- stat.ME: Methodology
- stat.ML: Machine Learning
- stat.OT: Other Statistics
- stat.TH: Statistics Theory
4.3.2. 根据问题所属领域将问题拆分成多组关键词的组合同时组合中的关键词个数尽量不要超过3个
5. Final: 按照下面**回复内容示例**进行回复,注意:
- 不要输出思考过程
- 可以向多个查询目标分别查询多次多个查询用换行分隔总查询次数控制在5次以内
- 查询搜索引擎时,需要以"internet:"开头
- 查询私有知识库时,需要以"private:"开头
- 查询Arxiv论文时需要以Arxiv的Category值开头例如"cs.AI:"
- 查询Arxiv论文时优先用英文表述关键词进行搜索
- 当用多个关键词查询时,关键词之间用","分隔
- 尽量满足**用户发送的消息**中的搜索要求,例如用户要求用英文搜索,则需用英文表述问题和关键词
- 用户如果没有要求搜索语言,则用和**用户发送的消息**一致的语言表述问题和关键词
- 如果**用户发送的消息**使用中文,至少要有一条向搜索引擎查询的中文问题
### 回复内容示例:
#### 用不同语言查询多次搜索引擎
internet: 黄金价格走势
internet: The trend of gold prices
#### 向Arxiv的多个类目查询多次
cs.AI: attention mechanism
cs.AI: neuron
q-bio.NC: brain,attention mechanism
#### 向私有知识库查询多次
private: 电子钱包,密码
private: 张三,身份证号
#### 向多个查询目标查询多次
internet: 中国未来房价趋势
internet: 最新中国经济政策
econ.TH: policy, real estate
private: 财务状况
# 用户发送的消息为:
{question}

View File

@@ -0,0 +1,41 @@
# 目标
你需要分析**用户发送的消息**,是否需要查询搜索引擎(Google/Bing),并按照如下情况回复相应内容:
## 情况一:不需要查询搜索引擎
### 情况举例:
1. **用户发送的消息**不是在提问或寻求帮助
2. **用户发送的消息**是要求翻译文字
### 思考过程
根据上面的**情况举例**,如果符合,则按照下面**回复内容示例**进行回复,注意不要输出思考过程
### 回复内容示例:
none
## 情况二:需要查询搜索引擎
### 情况举例:
1. 答复**用户发送的消息**,需依赖互联网上最新的资料
2. 答复**用户发送的消息**,需依赖论文等专业资料
3. 通过查询资料,可以更好地答复**用户发送的消息**
### 思考过程
根据上面的**情况举例**,以及其他需要查询资料的情况,如果符合,按照以下步骤思考,并按照下面**回复内容示例**进行回复,注意不要输出思考过程:
1. What: 分析要答复**用户发送的消息**,需要了解什么知识和资料
2. How: 分析对于要查询的知识和资料,应该提出什么样的问题
3. Adjust: 明确查询什么问题后,用一句话概括问题,并且针对搜索引擎做问题优化
4. Final: 按照下面**回复内容示例**进行回复,注意:
- 不要输出思考过程
- 可以查询多次多个查询用换行分隔总查询次数控制在5次以内
- 需要以"internet:"开头
- 尽量满足**用户发送的消息**中的搜索要求,例如用户要求用英文搜索,则需用英文表述问题和关键词
- 用户如果没有要求搜索语言,则用和**用户发送的消息**一致的语言表述问题和关键词
- 如果**用户发送的消息**使用中文,至少要有一条向搜索引擎查询的中文问题
### 回复内容示例:
#### 用不同语言查询多次搜索引擎
internet: 黄金价格走势
internet: The trend of gold prices
# 用户发送的消息为:
{question}

View File

@@ -0,0 +1,55 @@
# 目标
你需要分析**用户发送的消息**,是否需要查询搜索引擎(Google/Bing)/私有知识库,并按照如下情况回复相应内容:
## 情况一:不需要查询搜索引擎/私有知识库
### 情况举例:
1. **用户发送的消息**不是在提问或寻求帮助
2. **用户发送的消息**是要求翻译文字
### 思考过程
根据上面的**情况举例**,如果符合,则按照下面**回复内容示例**进行回复,注意不要输出思考过程
### 回复内容示例:
none
## 情况二:需要查询搜索引擎/私有知识库
### 情况举例:
1. 答复**用户发送的消息**,需依赖互联网上最新的资料
2. 答复**用户发送的消息**,需依赖论文等专业资料
3. 通过查询资料,可以更好地答复**用户发送的消息**
### 思考过程
根据上面的**情况举例**,以及其他需要查询资料的情况,如果符合,按照以下步骤思考,并按照下面**回复内容示例**进行回复,注意不要输出思考过程:
1. What: 分析要答复**用户发送的消息**,需要了解什么知识和资料
2. Where: 判断了解这个知识和资料要向Google等搜索引擎提问还是向私有知识库进行查询或者需要同时查询多个地方
3. How: 分析对于要查询的知识和资料,应该提出什么样的问题
4. Adjust: 明确要向什么地方查询什么问题后,按下面方式对问题进行调整
4.1. 向搜索引擎提问:用一句话概括问题,并且针对搜索引擎做问题优化
4.2. 向私有知识库提问将问题拆分成多组关键词的组合同时组合中的关键词个数尽量不要超过3个
5. Final: 按照下面**回复内容示例**进行回复,注意:
- 不要输出思考过程
- 可以向多个查询目标分别查询多次多个查询用换行分隔总查询次数控制在5次以内
- 查询搜索引擎时,需要以"internet:"开头
- 查询私有知识库时,需要以"private:"开头
- 当用多个关键词查询时,关键词之间用","分隔
- 尽量满足**用户发送的消息**中的搜索要求,例如用户要求用英文搜索,则需用英文表述问题和关键词
- 用户如果没有要求搜索语言,则用和**用户发送的消息**一致的语言表述问题和关键词
- 如果**用户发送的消息**使用中文,至少要有一条向搜索引擎查询的中文问题
### 回复内容示例:
#### 用不同语言查询多次搜索引擎
internet: 黄金价格走势
internet: The trend of gold prices
#### 向私有知识库查询多次
private: 电子钱包,密码
private: 张三,身份证号
#### 向多个查询目标查询多次
internet: 中国未来房价趋势
internet: 最新中国经济政策
private: 财务状况
# 用户发送的消息为:
{question}

View File

@@ -0,0 +1,56 @@
import argparse
import requests
import time
import json
def main():
# 解析命令行参数
parser = argparse.ArgumentParser(description='AI Search Test Script')
parser.add_argument('--question', required=True, help='The question to analyze')
parser.add_argument('--prompt', required=True, help='The prompt file to analyze')
args = parser.parse_args()
# 读取并解析prompts.md模板
# 这里假设prompts.md已经复制到当前目录
with open(args.prompt, 'r', encoding='utf-8') as f:
prompt_template = f.read()
# 替换模板中的{question}变量
prompt = prompt_template.replace('{question}', args.question)
# 准备请求数据
headers = {
'Content-Type': 'application/json',
}
data = {
"model": "deepseek-v3",
"max_tokens": 100,
"messages": [
{
"role": "user",
"content": prompt
}
]
}
# 发送请求并计时
start_time = time.time()
try:
response = requests.post(
'http://localhost:8080/v1/chat/completions',
headers=headers,
data=json.dumps(data)
)
response.raise_for_status()
end_time = time.time()
# 处理响应
result = response.json()
print("Response:")
print(result['choices'][0]['message']['content'])
print(f"\nRequest took {end_time - start_time:.2f} seconds")
except requests.exceptions.RequestException as e:
print(f"Request failed: {e}")
if __name__ == '__main__':
main()

View File

@@ -51,14 +51,14 @@ description: AI Token限流插件配置参考
`redis`中每一项的配置字段说明
| 配置项 | 类型 | 必填 | 默认值 | 说明 |
| ------------ | ------ | ---- | ---------------------------------------------------------- | --------------------------- |
| service_name | string | 必填 | - | redis 服务名称,带服务类型的完整 FQDN 名称,例如 my-redis.dns、redis.my-ns.svc.cluster.local |
| service_port | int | 否 | 服务类型为固定地址static service默认值为80其他为6379 | 输入redis服务的服务端口 |
| username | string | 否 | - | redis用户名 |
| password | string | 否 | - | redis密码 |
| timeout | int | 否 | 1000 | redis连接超时时间单位毫秒 |
| 配置项 | 类型 | 必填 | 默认值 | 说明 |
| ------------ | ------ | ---- | ---------------------------------------------------------- | --------------------------- |
| service_name | string | 必填 | - | redis 服务名称,带服务类型的完整 FQDN 名称,例如 my-redis.dns、redis.my-ns.svc.cluster.local |
| service_port | int | 否 | 服务类型为固定地址static service默认值为80其他为6379 | 输入redis服务的服务端口 |
| username | string | 否 | - | redis用户名 |
| password | string | 否 | - | redis密码 |
| timeout | int | 否 | 1000 | redis连接超时时间单位毫秒 |
| database | int | 否 | 0 | 使用的数据库id例如配置为1对应`SELECT 1` |
## 配置示例
@@ -258,24 +258,12 @@ spec:
'*': "qwen-turbo"
ingress:
- qwen
url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-proxy:v1.0.0
url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-proxy:1.0.0
phase: UNSPECIFIED_PHASE
priority: 100
---
apiVersion: extensions.higress.io/v1alpha1
kind: WasmPlugin
metadata:
name: ai-statistics
namespace: higress-system
spec:
defaultConfig:
enable: true
url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-token-statistics:v1.0.0
phase: UNSPECIFIED_PHASE
priority: 200
---
apiVersion: extensions.higress.io/v1alpha1
kind: WasmPlugin
metadata:
name: ai-token-ratelimit
namespace: higress-system
@@ -294,7 +282,7 @@ spec:
# service_name: redis.default.svc.cluster.local
service_name: redis.dns
service_port: 6379
url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-token-ratelimit:v1.0.0
url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-token-ratelimit:1.0.0
phase: UNSPECIFIED_PHASE
priority: 600
```
@@ -370,10 +358,19 @@ spec:
pathType: Prefix
```
转发 higress-gateway 的流量到本地,方便进行测试。
```bash
kubectl port-forward svc/higress-gateway -n higress-system 18000:80
```
触发限流效果如下:
```bash
curl "http://qwen-test.com:18000/v1/chat/completions?apikey=123456" -H "Content-Type: application/json" -d '{
curl "http://localhost:18000/v1/chat/completions?apikey=123456" \
-H "Host: qwen-test.com" \
-H "Content-Type: application/json" \
-d '{
"model": "gpt-3",
"messages": [
{

View File

@@ -43,13 +43,14 @@ Field descriptions for each item in `limit_keys`
| token_per_day | int | No, optionally select one in `token_per_second`, `token_per_minute`, `token_per_hour`, `token_per_day` | - | Allowed number of token requests per day |
Field descriptions for each item in `redis`
| Configuration Item | Type | Required | Default Value | Description |
| ----------------------- | ----------------- | -------- | --------------------------------------------------------------- | ----------------------------------------------- |
| service_name | string | Required | - | Full FQDN name of the redis service, including service type, e.g., my-redis.dns, redis.my-ns.svc.cluster.local |
| service_port | int | No | Default value for static addresses (static service) is 80; otherwise, it is 6379 | Input the service port of the redis service |
| username | string | No | - | Redis username |
| password | string | No | - | Redis password |
| timeout | int | No | 1000 | Redis connection timeout in milliseconds |
| Configuration Item | Type | Required | Default Value | Description |
| ----------------------- | ----------------- | -------- | --------------------------------------------------------------- | ----------------------------------------------- |
| service_name | string | Required | - | Full FQDN name of the redis service, including service type, e.g., my-redis.dns, redis.my-ns.svc.cluster.local |
| service_port | int | No | Default value for static addresses (static service) is 80; otherwise, it is 6379 | Input the service port of the redis service |
| username | string | No | - | Redis username |
| password | string | No | - | Redis password |
| timeout | int | No | 1000 | Redis connection timeout in milliseconds |
| database | int | No | 0 | The database ID used, for example, configured as 1, corresponds to `SELECT 1`. |
## Configuration Examples
### Identify request parameter apikey for differentiated rate limiting
@@ -233,24 +234,12 @@ spec:
'*': "qwen-turbo"
ingress:
- qwen
url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-proxy:v1.0.0
url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-proxy:1.0.0
phase: UNSPECIFIED_PHASE
priority: 100
---
apiVersion: extensions.higress.io/v1alpha1
kind: WasmPlugin
metadata:
name: ai-statistics
namespace: higress-system
spec:
defaultConfig:
enable: true
url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-token-statistics:v1.0.0
phase: UNSPECIFIED_PHASE
priority: 200
---
apiVersion: extensions.higress.io/v1alpha1
kind: WasmPlugin
metadata:
name: ai-token-ratelimit
namespace: higress-system
@@ -269,7 +258,7 @@ spec:
# service_name: redis.default.svc.cluster.local
service_name: redis.dns
service_port: 6379
url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-token-ratelimit:v1.0.0
url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-token-ratelimit:1.0.0
phase: UNSPECIFIED_PHASE
priority: 600
```
@@ -346,10 +335,19 @@ spec:
pathType: Prefix
```
Forward the traffic of higress-gateway to the local, making it convenient for testing.
```bash
kubectl port-forward svc/higress-gateway -n higress-system 18000:80
```
The rate limiting effect is triggered as follows:
```bash
curl "http://qwen-test.com:18000/v1/chat/completions?apikey=123456" -H "Content-Type: application/json" -d '{
curl "http://localhost:18000/v1/chat/completions?apikey=123456" \
-H "Host: qwen-test.com" \
-H "Content-Type: application/json" \
-d '{
"model": "gpt-3",
"messages": [
{

View File

@@ -110,7 +110,8 @@ func initRedisClusterClient(json gjson.Result, config *ClusterKeyRateLimitConfig
FQDN: serviceName,
Port: int64(servicePort),
})
return config.redisClient.Init(username, password, int64(timeout))
database := int(redisConfig.Get("database").Int())
return config.redisClient.Init(username, password, int64(timeout), wrapper.WithDataBase(database))
}
func parseClusterKeyRateLimitConfig(json gjson.Result, config *ClusterKeyRateLimitConfig) error {

View File

@@ -52,13 +52,14 @@ description: 基于 Key 集群限流插件配置参考
`redis` 中每一项的配置字段说明。
| 配置项 | 类型 | 必填 | 默认值 | 说明 |
| ------------ | ------ | ---- | ---------------------------------------------------------- |---------------------------------------------------------------------------|
| 配置项 | 类型 | 必填 | 默认值 | 说明 |
| ------------ | ------ | ---- | ---------------------------------------------------------- | --------------------------------------------------------------------------- |
| service_name | string | 必填 | - | redis 服务名称,带服务类型的完整 FQDN 名称,例如 my-redis.dns、redis.my-ns.svc.cluster.local |
| service_port | int | 否 | 服务类型为固定地址static service默认值为80其他为6379 | 输入redis服务的服务端口 |
| username | string | 否 | - | redis 用户名 |
| password | string | 否 | - | redis 密码 |
| timeout | int | 否 | 1000 | redis 连接超时时间,单位毫秒 |
| service_port | int | 否 | 服务类型为固定地址static service默认值为80其他为6379 | 输入redis服务的服务端口 |
| username | string | 否 | - | redis 用户名 |
| password | string | 否 | - | redis 密码 |
| timeout | int | 否 | 1000 | redis 连接超时时间,单位毫秒 |
| database | int | 否 | 0 | 使用的数据库id例如配置为1对应`SELECT 1` |
## 配置示例

View File

@@ -46,13 +46,15 @@ Description of configuration fields for each item in `limit_keys`.
| query_per_day | int | No, one of `query_per_second`, `query_per_minute`, `query_per_hour`, `query_per_day` is optional. | - | Allowed number of requests per day. |
Description of configuration fields for each item in `redis`.
| Configuration Item | Type | Required | Default Value | Description |
|---------------------------|---------------|----------|------------------------------------------------------------|---------------------------------------------------------------------------|
| service_name | string | Required | - | Full FQDN name of the Redis service, including service type, e.g., my-redis.dns, redis.my-ns.svc.cluster.local. |
| service_port | int | No | 80 for static services; otherwise 6379 | Service port for the Redis service. |
| username | string | No | - | Redis username. |
| password | string | No | - | Redis password. |
| timeout | int | No | 1000 | Redis connection timeout in milliseconds. |
| Configuration Item | Type | Required | Default Value | Description |
|--------------------|--------|----------|----------------------------------------|-----------------------------------------------------------------------------------------------------------------|
| service_name | string | Required | - | Full FQDN name of the Redis service, including service type, e.g., my-redis.dns, redis.my-ns.svc.cluster.local. |
| service_port | int | No | 80 for static services; otherwise 6379 | Service port for the Redis service. |
| username | string | No | - | Redis username. |
| password | string | No | - | Redis password. |
| timeout | int | No | 1000 | Redis connection timeout in milliseconds. |
| database | int | No | 0 | The database ID used, for example, configured as 1, corresponds to `SELECT 1`. |
## Configuration Examples

View File

@@ -110,7 +110,8 @@ func initRedisClusterClient(json gjson.Result, config *ClusterKeyRateLimitConfig
FQDN: serviceName,
Port: int64(servicePort),
})
return config.redisClient.Init(username, password, int64(timeout))
database := int(redisConfig.Get("database").Int())
return config.redisClient.Init(username, password, int64(timeout), wrapper.WithDataBase(database))
}
func parseClusterKeyRateLimitConfig(json gjson.Result, config *ClusterKeyRateLimitConfig) error {

View File

@@ -77,6 +77,7 @@ MatchRule 类型每一项的配置字段说明,在使用 `array of MatchRule`
| 名称 | 数据类型 | 必填 | 默认值 | 描述 |
| ------------------- | -------- | ---- | ------ | ------------------------------------------------------------ |
| `match_rule_domain` | string | 否 | - | 匹配规则域名,支持通配符模式,例如 `*.bar.com` |
| `match_rule_method` | []string | 否 | - | 匹配请求方法 |
| `match_rule_path` | string | 否 | - | 匹配请求路径的规则 |
| `match_rule_type` | string | 否 | - | 匹配请求路径的规则类型,可选 `exact` , `prefix` , `suffix`, `contains`, `regex` |
@@ -100,27 +101,41 @@ MatchRule 类型每一项的配置字段说明,在使用 `array of MatchRule`
**白名单模式**
```yaml
# 白名单模式配置,符合白名单规则的请求无需验证
match_type: 'whitelist'
match_list:
- match_rule_domain: '*.bar.com'
match_rule_path: '/foo'
match_rule_type: 'prefix'
# 所有以 api.example.com 为域名,且路径前缀为 /public 的请求无需验证
- match_rule_domain: 'api.example.com'
match_rule_path: '/public'
match_rule_type: 'prefix'
# 针对图片资源服务器 images.example.com所有 GET 请求无需验证
- match_rule_domain: 'images.example.com'
match_rule_method: ["GET"]
# 所有域名下,路径精确匹配 /health-check 的 HEAD 请求无需验证
- match_rule_method: ["HEAD"]
match_rule_path: '/health-check'
match_rule_type: 'exact'
```
泛域名 `*.bar.com` 下前缀匹配 `/foo` 的请求无需验证
**黑名单模式**
```yaml
# 黑名单模式配置,符合黑名单规则的请求需要验证
match_type: 'blacklist'
match_list:
- match_rule_domain: '*.bar.com'
match_rule_path: '/headers'
match_rule_type: 'prefix'
# 所有以 admin.example.com 为域名,且路径前缀为 /sensitive 的请求需要验证
- match_rule_domain: 'admin.example.com'
match_rule_path: '/sensitive'
match_rule_type: 'prefix'
# 所有域名下,路径精确匹配 /user 的 DELETE 请求需要验证
- match_rule_method: ["DELETE"]
match_rule_path: '/user'
match_rule_type: 'exact'
# 所有以 legacy.example.com 为域名的 POST 请求需要验证
- match_rule_domain: 'legacy.example.com'
match_rule_method: ["POST"]
```
只有泛域名 `*.bar.com` 下前缀匹配 `/header` 的请求需要验证
## 配置示例
下面假设 `ext-auth` 服务在 Kubernetes 中 serviceName 为 `ext-auth`,端口 `8090`,路径为 `/auth`,命名空间为 `backend`
@@ -185,13 +200,13 @@ content-length: 0
http_service:
authorization_request:
allowed_headers:
- exact: x-auth-version
- exact: x-auth-version
headers_to_add:
x-envoy-header: true
authorization_response:
allowed_upstream_headers:
- exact: x-user-id
- exact: x-auth-version
- exact: x-user-id
- exact: x-auth-version
endpoint_mode: envoy
endpoint:
service_name: ext-auth.backend.svc.cluster.local
@@ -287,13 +302,13 @@ content-length: 0
http_service:
authorization_request:
allowed_headers:
- exact: x-auth-version
- exact: x-auth-version
headers_to_add:
x-envoy-header: true
authorization_response:
allowed_upstream_headers:
- exact: x-user-id
- exact: x-auth-version
- exact: x-user-id
- exact: x-auth-version
endpoint_mode: forward_auth
endpoint:
service_name: ext-auth.backend.svc.cluster.local

View File

@@ -77,6 +77,7 @@ Configuration fields for each item of `MatchRule` type. When using `array of Mat
| Name | Data Type | Required | Default Value | Description |
| --- | --- | --- | --- | --- |
| `match_rule_domain` | string | No | - | The domain of the matching rule, supports wildcard patterns, e.g., `*.bar.com` |
| `match_rule_method` | []string | No | - | Matching rule for the request method |
| `match_rule_path` | string | No | - | The rule for matching the request path |
| `match_rule_type` | string | No | - | The type of the rule for matching the request path, can be `exact`, `prefix`, `suffix`, `contains`, `regex` |
@@ -100,27 +101,41 @@ Supports blacklist and whitelist mode configuration. The default is the whitelis
**Whitelist Mode**
```yaml
# Configuration for the whitelist mode. Requests that match the whitelist rules do not need verification.
match_type: 'whitelist'
match_list:
- match_rule_domain: '*.bar.com'
match_rule_path: '/foo'
match_rule_type: 'prefix'
# Requests with the domain name api.example.com and a path prefixed with /public do not need verification.
- match_rule_domain: 'api.example.com'
match_rule_path: '/public'
match_rule_type: 'prefix'
# For the image resource server images.example.com, all GET requests do not need verification.
- match_rule_domain: 'images.example.com'
match_rule_method: ["GET"]
# For all domains, HEAD requests with an exact path match of /health-check do not need verification.
- match_rule_method: ["HEAD"]
match_rule_path: '/health-check'
match_rule_type: 'exact'
```
Requests with a prefix match of `/foo` under the wildcard domain `*.bar.com` do not need to be verified.
**Blacklist Mode**
```yaml
# Configuration for the blacklist mode. Requests that match the blacklist rules need verification.
match_type: 'blacklist'
match_list:
- match_rule_domain: '*.bar.com'
match_rule_path: '/headers'
match_rule_type: 'prefix'
# Requests with the domain name admin.example.com and a path prefixed with /sensitive need verification.
- match_rule_domain: 'admin.example.com'
match_rule_path: '/sensitive'
match_rule_type: 'prefix'
# For all domains, DELETE requests with an exact path match of /user need verification.
- match_rule_method: ["DELETE"]
match_rule_path: '/user'
match_rule_type: 'exact'
# For the domain legacy.example.com, all POST requests need verification.
- match_rule_domain: 'legacy.example.com'
match_rule_method: ["POST"]
```
Only requests with a prefix match of `/header` under the wildcard domain `*.bar.com` need to be verified.
## Configuration Examples
@@ -186,13 +201,13 @@ Configuration of the `ext-auth` plugin:
http_service:
authorization_request:
allowed_headers:
- exact: x-auth-version
- exact: x-auth-version
headers_to_add:
x-envoy-header: true
authorization_response:
allowed_upstream_headers:
- exact: x-user-id
- exact: x-auth-version
- exact: x-user-id
- exact: x-auth-version
endpoint_mode: envoy
endpoint:
service_name: ext-auth.backend.svc.cluster.local
@@ -286,13 +301,13 @@ Configuration of the `ext-auth` plugin:
http_service:
authorization_request:
allowed_headers:
- exact: x-auth-version
- exact: x-auth-version
headers_to_add:
x-envoy-header: true
authorization_response:
allowed_upstream_headers:
- exact: x-user-id
- exact: x-auth-version
- exact: x-user-id
- exact: x-auth-version
endpoint_mode: forward_auth
endpoint:
service_name: ext-auth.backend.svc.cluster.local

View File

@@ -260,19 +260,28 @@ func parseMatchRules(json gjson.Result, config *ExtAuthConfig) error {
var err error
matchListConfig.ForEach(func(key, value gjson.Result) bool {
pathMatcher, buildErr := expr.BuildStringMatcher(
value.Get("match_rule_type").Str,
value.Get("match_rule_path").Str, false)
if buildErr != nil {
err = fmt.Errorf("failed to build string matcher for rule with domain %q, path %q, type %q: %w",
value.Get("match_rule_domain").Str,
value.Get("match_rule_path").Str,
value.Get("match_rule_type").Str,
buildErr)
return false // stop iterating
domain := value.Get("match_rule_domain").Str
methodArray := value.Get("match_rule_method").Array()
matchRuleType := value.Get("match_rule_type").Str
matchRulePath := value.Get("match_rule_path").Str
var pathMatcher expr.Matcher
var buildErr error
if matchRuleType == "" && matchRulePath == "" {
pathMatcher = nil
} else {
pathMatcher, buildErr = expr.BuildStringMatcher(matchRuleType, matchRulePath, false)
if buildErr != nil {
err = fmt.Errorf("failed to build string matcher for rule with domain %q, method %v, path %q, type %q: %w",
domain, methodArray, matchRulePath, matchRuleType, buildErr)
return false // stop iterating
}
}
ruleList = append(ruleList, expr.Rule{
Domain: value.Get("match_rule_domain").Str,
Domain: domain,
Method: convertToStringList(methodArray),
Path: pathMatcher,
})
return true // keep iterating
@@ -297,3 +306,11 @@ func convertToStringMap(result gjson.Result) map[string]string {
})
return m
}
func convertToStringList(results []gjson.Result) []string {
interfaces := make([]string, len(results))
for i, result := range results {
interfaces[i] = result.String()
}
return interfaces
}

View File

@@ -218,6 +218,7 @@ func TestParseConfig(t *testing.T) {
RuleList: []expr.Rule{
{
Domain: "*.bar.com",
Method: []string{},
Path: func() expr.Matcher {
pathMatcher, err := expr.BuildStringMatcher(expr.MatchPatternPrefix, "/headers", false)
if err != nil {
@@ -248,6 +249,7 @@ func TestParseConfig(t *testing.T) {
"match_list": [
{
"match_rule_domain": "*.foo.com",
"match_rule_method": ["GET"],
"match_rule_path": "/api",
"match_rule_type": "exact"
}
@@ -269,6 +271,7 @@ func TestParseConfig(t *testing.T) {
RuleList: []expr.Rule{
{
Domain: "*.foo.com",
Method: []string{"GET"},
Path: func() expr.Matcher {
pathMatcher, err := expr.BuildStringMatcher(expr.MatchPatternExact, "/api", false)
if err != nil {
@@ -284,6 +287,50 @@ func TestParseConfig(t *testing.T) {
StatusOnError: 403,
},
},
{
name: "Valid Match Rules with Whitelist - Only Method",
json: `{
"http_service": {
"endpoint_mode": "envoy",
"endpoint": {
"service_name": "example.com",
"service_port": 80,
"path_prefix": "/auth"
}
},
"match_type": "whitelist",
"match_list": [
{
"match_rule_method": ["GET"]
}
]
}`,
expected: ExtAuthConfig{
HttpService: HttpService{
EndpointMode: "envoy",
Client: wrapper.NewClusterClient(wrapper.FQDNCluster{
FQDN: "example.com",
Port: 80,
Host: "",
}),
PathPrefix: "/auth",
Timeout: 1000,
},
MatchRules: expr.MatchRules{
Mode: "whitelist",
RuleList: []expr.Rule{
{
Domain: "",
Method: []string{"GET"},
Path: nil,
},
},
},
FailureModeAllow: false,
FailureModeAllowHeaderAdd: false,
StatusOnError: 403,
},
},
{
name: "Missing Match Type",
json: `{
@@ -342,12 +389,13 @@ func TestParseConfig(t *testing.T) {
"match_list": [
{
"match_rule_domain": "*.bar.com",
"match_rule_method": ["POST","PUT","DELETE"],
"match_rule_path": "/headers",
"match_rule_type": "invalid_type"
}
]
}`,
expectedErr: `failed to build string matcher for rule with domain "*.bar.com", path "/headers", type "invalid_type": unknown string matcher type`,
expectedErr: `failed to build string matcher for rule with domain "*.bar.com", method [POST PUT DELETE], path "/headers", type "invalid_type": unknown string matcher type`,
},
}

View File

@@ -3,6 +3,7 @@ package expr
import (
"strings"
"ext-auth/util"
regexp "github.com/wasilibs/go-re2"
)
@@ -18,6 +19,7 @@ type MatchRules struct {
type Rule struct {
Domain string
Method []string
Path Matcher
}
@@ -28,19 +30,19 @@ func MatchRulesDefaults() MatchRules {
}
}
// IsAllowedByMode checks if the given domain and path are allowed based on the configuration mode.
func (config *MatchRules) IsAllowedByMode(domain, path string) bool {
// IsAllowedByMode checks if the given domain, method and path are allowed based on the configuration mode.
func (config *MatchRules) IsAllowedByMode(domain, method, path string) bool {
switch config.Mode {
case ModeWhitelist:
for _, rule := range config.RuleList {
if rule.matchDomainAndPath(domain, path) {
if rule.matchesAllConditions(domain, method, path) {
return true
}
}
return false
case ModeBlacklist:
for _, rule := range config.RuleList {
if rule.matchDomainAndPath(domain, path) {
if rule.matchesAllConditions(domain, method, path) {
return false
}
}
@@ -50,17 +52,21 @@ func (config *MatchRules) IsAllowedByMode(domain, path string) bool {
}
}
// matchDomainAndPath checks if the given domain and path match the rule.
// If rule.Domain is empty, it only checks rule.Path.
// If rule.Path is empty, it only checks rule.Domain.
// If both are empty, it returns false.
func (rule *Rule) matchDomainAndPath(domain, path string) bool {
if rule.Domain == "" && rule.Path == nil {
// matchesAllConditions checks if the given domain, method and path match all conditions of the rule.
func (rule *Rule) matchesAllConditions(domain, method, path string) bool {
// If all conditions are empty, return false
if rule.Domain == "" && rule.Path == nil && len(rule.Method) == 0 {
return false
}
// Check domain and path matching
domainMatch := rule.Domain == "" || matchDomain(domain, rule.Domain)
pathMatch := rule.Path == nil || rule.Path.Match(path)
return domainMatch && pathMatch
// Check HTTP method matching: if no methods are specified, any method is allowed
methodMatch := len(rule.Method) == 0 || util.ContainsString(rule.Method, method)
return domainMatch && pathMatch && methodMatch
}
// matchDomain checks if the given domain matches the pattern.

View File

@@ -6,11 +6,20 @@ import (
"github.com/stretchr/testify/assert"
)
func createMatcher(pattern string, caseSensitive bool) Matcher {
pathMatcher, err := newStringExactMatcher(pattern, caseSensitive)
if err != nil {
panic(err)
}
return pathMatcher
}
func TestIsAllowedByMode(t *testing.T) {
tests := []struct {
name string
config MatchRules
domain string
method string
path string
expected bool
}{
@@ -21,17 +30,13 @@ func TestIsAllowedByMode(t *testing.T) {
RuleList: []Rule{
{
Domain: "example.com",
Path: func() Matcher {
pathMatcher, err := newStringExactMatcher("/foo", true)
if err != nil {
t.Fatalf("Failed to create Matcher: %v", err)
}
return pathMatcher
}(),
Method: []string{"GET"},
Path: createMatcher("/foo", true),
},
},
},
domain: "example.com",
method: "GET",
path: "/foo",
expected: true,
},
@@ -42,18 +47,14 @@ func TestIsAllowedByMode(t *testing.T) {
RuleList: []Rule{
{
Domain: "example.com",
Path: func() Matcher {
pathMatcher, err := newStringExactMatcher("/foo", true)
if err != nil {
t.Fatalf("Failed to create Matcher: %v", err)
}
return pathMatcher
}(),
Method: []string{"GET"},
Path: createMatcher("/foo", true),
},
},
},
domain: "example.com",
path: "/bar",
method: "POST",
path: "/foo",
expected: false,
},
{
@@ -63,17 +64,13 @@ func TestIsAllowedByMode(t *testing.T) {
RuleList: []Rule{
{
Domain: "example.com",
Path: func() Matcher {
pathMatcher, err := newStringExactMatcher("/foo", true)
if err != nil {
t.Fatalf("Failed to create Matcher: %v", err)
}
return pathMatcher
}(),
Method: []string{"GET"},
Path: createMatcher("/foo", true),
},
},
},
domain: "example.com",
method: "GET",
path: "/foo",
expected: false,
},
@@ -84,18 +81,14 @@ func TestIsAllowedByMode(t *testing.T) {
RuleList: []Rule{
{
Domain: "example.com",
Path: func() Matcher {
pathMatcher, err := newStringExactMatcher("/foo", true)
if err != nil {
t.Fatalf("Failed to create Matcher: %v", err)
}
return pathMatcher
}(),
Method: []string{"GET"},
Path: createMatcher("/foo", true),
},
},
},
domain: "example.com",
path: "/bar",
method: "POST",
path: "/foo",
expected: true,
},
{
@@ -107,6 +100,7 @@ func TestIsAllowedByMode(t *testing.T) {
},
},
domain: "example.com",
method: "GET",
path: "/foo",
expected: true,
},
@@ -117,29 +111,25 @@ func TestIsAllowedByMode(t *testing.T) {
RuleList: []Rule{
{
Domain: "",
Path: func() Matcher {
pathMatcher, err := newStringExactMatcher("/foo", true)
if err != nil {
t.Fatalf("Failed to create Matcher: %v", err)
}
return pathMatcher
}(),
Path: createMatcher("/foo", true),
},
},
},
domain: "example.com",
method: "GET",
path: "/foo",
expected: true,
},
{
name: "Both Domain and Path are empty",
name: "All fields (Domain, Method, Path) are empty",
config: MatchRules{
Mode: ModeWhitelist,
RuleList: []Rule{
{Domain: "", Path: nil},
{Domain: "", Method: []string{}, Path: nil},
},
},
domain: "example.com",
method: "GET",
path: "/foo",
expected: false,
},
@@ -150,17 +140,13 @@ func TestIsAllowedByMode(t *testing.T) {
RuleList: []Rule{
{
Domain: "example.com",
Path: func() Matcher {
pathMatcher, err := newStringExactMatcher("/foo", true)
if err != nil {
t.Fatalf("Failed to create Matcher: %v", err)
}
return pathMatcher
}(),
Method: []string{"GET"},
Path: createMatcher("/foo", true),
},
},
},
domain: "example.com",
method: "GET",
path: "/foo",
expected: false,
},
@@ -171,17 +157,13 @@ func TestIsAllowedByMode(t *testing.T) {
RuleList: []Rule{
{
Domain: "*.example.com",
Path: func() Matcher {
pathMatcher, err := newStringExactMatcher("/foo", true)
if err != nil {
t.Fatalf("Failed to create Matcher: %v", err)
}
return pathMatcher
}(),
Method: []string{"GET"},
Path: createMatcher("/foo", true),
},
},
},
domain: "sub.example.com",
method: "GET",
path: "/foo",
expected: true,
},
@@ -192,20 +174,48 @@ func TestIsAllowedByMode(t *testing.T) {
RuleList: []Rule{
{
Domain: "*.example.com",
Path: func() Matcher {
pathMatcher, err := newStringExactMatcher("/foo", true)
if err != nil {
t.Fatalf("Failed to create Matcher: %v", err)
}
return pathMatcher
}(),
Method: []string{"GET"},
Path: createMatcher("/foo", true),
},
},
},
domain: "example.com",
method: "GET",
path: "/foo",
expected: false,
},
{
name: "Whitelist mode, only method matches",
config: MatchRules{
Mode: ModeWhitelist,
RuleList: []Rule{
{
Method: []string{"GET"},
Path: nil,
},
},
},
domain: "example.com",
method: "GET",
path: "/foo",
expected: true,
},
{
name: "Whitelist mode, only domain matches",
config: MatchRules{
Mode: ModeWhitelist,
RuleList: []Rule{
{
Domain: "example.com",
Path: nil,
},
},
},
domain: "example.com",
method: "GET",
path: "/foo",
expected: true,
},
{
name: "Blacklist mode, generic domain matches",
config: MatchRules{
@@ -213,17 +223,13 @@ func TestIsAllowedByMode(t *testing.T) {
RuleList: []Rule{
{
Domain: "*.example.com",
Path: func() Matcher {
pathMatcher, err := newStringExactMatcher("/foo", true)
if err != nil {
t.Fatalf("Failed to create Matcher: %v", err)
}
return pathMatcher
}(),
Method: []string{"GET"},
Path: createMatcher("/foo", true),
},
},
},
domain: "sub.example.com",
method: "GET",
path: "/foo",
expected: false,
},
@@ -234,25 +240,89 @@ func TestIsAllowedByMode(t *testing.T) {
RuleList: []Rule{
{
Domain: "*.example.com",
Path: func() Matcher {
pathMatcher, err := newStringExactMatcher("/foo", true)
if err != nil {
t.Fatalf("Failed to create Matcher: %v", err)
}
return pathMatcher
}(),
Method: []string{"GET"},
Path: createMatcher("/foo", true),
},
},
},
domain: "example.com",
method: "GET",
path: "/foo",
expected: true,
},
{
name: "Domain with special characters",
config: MatchRules{
Mode: ModeWhitelist,
RuleList: []Rule{
{
Domain: "example-*.com",
Method: []string{"GET"},
Path: createMatcher("/foo", true),
},
},
},
domain: "example-test.com",
method: "GET",
path: "/foo",
expected: true,
},
{
name: "Path with special characters",
config: MatchRules{
Mode: ModeWhitelist,
RuleList: []Rule{
{
Domain: "example.com",
Method: []string{"GET"},
Path: createMatcher("/foo-bar", true),
},
},
},
domain: "example.com",
method: "GET",
path: "/foo-bar",
expected: true,
},
{
name: "Multiple methods, one matches",
config: MatchRules{
Mode: ModeWhitelist,
RuleList: []Rule{
{
Domain: "example.com",
Method: []string{"GET", "POST"},
Path: createMatcher("/foo", true),
},
},
},
domain: "example.com",
method: "POST",
path: "/foo",
expected: true,
},
{
name: "Multiple methods, none match",
config: MatchRules{
Mode: ModeWhitelist,
RuleList: []Rule{
{
Domain: "example.com",
Method: []string{"GET", "POST"},
Path: createMatcher("/foo", true),
},
},
},
domain: "example.com",
method: "PUT",
path: "/foo",
expected: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := tt.config.IsAllowedByMode(tt.domain, tt.path)
result := tt.config.IsAllowedByMode(tt.domain, tt.method, tt.path)
assert.Equal(t, tt.expected, result)
})
}

View File

@@ -51,9 +51,8 @@ const (
)
func onHttpRequestHeaders(ctx wrapper.HttpContext, config config.ExtAuthConfig, log wrapper.Log) types.Action {
path := wrapper.GetRequestPathWithoutQuery()
// If the request's domain and path match the MatchRules, skip authentication
if config.MatchRules.IsAllowedByMode(ctx.Host(), path) {
if config.MatchRules.IsAllowedByMode(ctx.Host(), ctx.Method(), wrapper.GetRequestPathWithoutQuery()) {
ctx.DontReadRequestBody()
return types.ActionContinue
}

View File

@@ -37,3 +37,12 @@ func ExtractFromHeader(headers [][2]string, headerKey string) string {
}
return ""
}
func ContainsString(slice []string, s string) bool {
for _, item := range slice {
if strings.EqualFold(item, s) {
return true
}
}
return false
}

View File

@@ -3,10 +3,10 @@ WORKDIR /workspace
RUN rustup target add wasm32-wasip1
ARG PLUGIN_NAME="say-hello"
ARG BUILD_OPTS="--release"
ARG BUILDRC=".buildrc"
ARG PREBUILD=".prebuild"
COPY . .
WORKDIR /workspace/extensions/$PLUGIN_NAME
RUN if [ -f $BUILDRC ]; then sh $BUILDRC; fi
RUN if [ -f $PREBUILD ]; then sh $PREBUILD; fi
RUN cargo build --target wasm32-wasip1 $BUILD_OPTS \
&& cp target/wasm32-wasip1/release/*.wasm /main.wasm

View File

@@ -1 +0,0 @@
apt update && apt-get install gcc gcc-multilib llvm clang -y && apt clean

View File

@@ -0,0 +1,3 @@
apt-get update
apt-get install gcc gcc-multilib llvm clang -y
apt-get clean

View File

@@ -13,8 +13,10 @@
// limitations under the License.
mod deny_word;
mod msg_window;
use crate::deny_word::DenyWord;
use crate::msg_window::MsgWindow;
use fancy_regex::Regex;
use grok::patterns;
use higress_wasm_rust::log::Log;
@@ -27,8 +29,8 @@ use proxy_wasm::traits::{Context, HttpContext, RootContext};
use proxy_wasm::types::{Bytes, ContextType, DataAction, HeaderAction, LogLevel};
use rust_embed::Embed;
use serde::de::Error;
use serde::Deserialize;
use serde::Deserializer;
use serde::{Deserialize, Serialize};
use serde_json::{json, Value};
use std::cell::RefCell;
use std::collections::{BTreeMap, HashMap, VecDeque};
@@ -66,9 +68,12 @@ struct AiDataMasking {
config: Option<Rc<AiDataMaskingConfig>>,
mask_map: HashMap<String, Option<String>>,
is_openai: bool,
is_openai_stream: Option<bool>,
stream: bool,
res_body: Bytes,
log: Log,
msg_window: MsgWindow,
char_window_size: usize,
byte_window_size: usize,
}
fn deserialize_regexp<'de, D>(deserializer: D) -> Result<Regex, D::Error>
where
@@ -213,10 +218,33 @@ struct ResMessage {
#[serde(default)]
delta: Option<Message>,
}
#[derive(Default, Debug, Deserialize, Serialize, Clone)]
struct Usage {
completion_tokens: i32,
prompt_tokens: i32,
total_tokens: i32,
}
impl Usage {
pub fn add(&mut self, usage: &Usage) {
self.completion_tokens += usage.completion_tokens;
self.prompt_tokens += usage.prompt_tokens;
self.total_tokens += usage.total_tokens;
}
pub fn reset(&mut self) {
self.completion_tokens = 0;
self.prompt_tokens = 0;
self.total_tokens = 0;
}
}
#[derive(Default, Debug, Deserialize)]
struct Res {
#[serde(default)]
choices: Vec<ResMessage>,
#[serde(default)]
usage: Usage,
}
static SYSTEM_PATTERNS: &[(&str, &str)] = &[
@@ -334,9 +362,12 @@ impl RootContextWrapper<AiDataMaskingConfig> for AiDataMaskingRoot {
mask_map: HashMap::new(),
config: None,
is_openai: false,
is_openai_stream: None,
stream: false,
res_body: Bytes::new(),
msg_window: MsgWindow::new(),
log: Log::new(PLUGIN_NAME.to_string()),
char_window_size: 0,
byte_window_size: 0,
}))
}
}
@@ -416,32 +447,6 @@ impl AiDataMasking {
DataAction::StopIterationAndBuffer
}
fn process_sse_message(&mut self, sse_message: &str) -> Vec<String> {
let mut messages = Vec::new();
for msg in sse_message.split('\n') {
if !msg.starts_with("data:") {
continue;
}
let res: Res = if let Some(m) = msg.strip_prefix("data:") {
match serde_json::from_str(m) {
Ok(r) => r,
Err(_) => continue,
}
} else {
continue;
};
if res.choices.is_empty() {
continue;
}
for choice in &res.choices {
if let Some(delta) = &choice.delta {
messages.push(delta.content.clone());
}
}
}
messages
}
fn replace_request_msg(&mut self, message: &str) -> String {
let config = self.config.as_ref().unwrap();
let mut msg = message.to_string();
@@ -464,6 +469,13 @@ impl AiDataMasking {
}
Type::Replace => rule.regex.replace(from_word, &rule.value).to_string(),
};
if to_word.len() > self.byte_window_size {
self.byte_window_size = to_word.len();
}
if to_word.chars().count() > self.char_window_size {
self.char_window_size = to_word.chars().count();
}
replace_pair.push((from_word.to_string(), to_word.clone()));
if rule.restore && !to_word.is_empty() {
@@ -499,6 +511,7 @@ impl HttpContext for AiDataMasking {
_end_of_stream: bool,
) -> HeaderAction {
if has_request_body() {
self.set_http_request_header("Content-Length", None);
HeaderAction::StopIteration
} else {
HeaderAction::Continue
@@ -512,58 +525,41 @@ impl HttpContext for AiDataMasking {
self.set_http_response_header("Content-Length", None);
HeaderAction::Continue
}
fn on_http_response_body(&mut self, body_size: usize, _end_of_stream: bool) -> DataAction {
fn on_http_response_body(&mut self, body_size: usize, end_of_stream: bool) -> DataAction {
if !self.stream {
return DataAction::Continue;
}
if let Some(body) = self.get_http_response_body(0, body_size) {
self.res_body.extend(&body);
if let Ok(body_str) = String::from_utf8(self.res_body.clone()) {
if self.is_openai {
let messages = self.process_sse_message(&body_str);
if self.check_message(&messages.join("")) {
if body_size > 0 {
if let Some(body) = self.get_http_response_body(0, body_size) {
if self.is_openai && self.is_openai_stream.is_none() {
self.is_openai_stream = Some(body.starts_with(b"data:"));
}
self.msg_window.push(&body, self.is_openai_stream.unwrap());
if let Ok(mut msg) = String::from_utf8(self.msg_window.message.clone()) {
if self.check_message(&msg) {
return self.deny(true);
}
} else if self.check_message(&body_str) {
return self.deny(true);
}
}
if self.mask_map.is_empty() {
return DataAction::Continue;
}
if let Ok(body_str) = std::str::from_utf8(&body) {
let mut new_str = body_str.to_string();
if self.is_openai {
let messages = self.process_sse_message(body_str);
for message in messages {
let mut new_message = message.clone();
if !self.mask_map.is_empty() {
for (from_word, to_word) in self.mask_map.iter() {
if let Some(to) = to_word {
new_message = new_message.replace(from_word, to);
msg = msg.replace(from_word, to);
}
}
if new_message != message {
new_str = new_str.replace(
&json!(message).to_string(),
&json!(new_message).to_string(),
);
}
}
} else {
for (from_word, to_word) in self.mask_map.iter() {
if let Some(to) = to_word {
new_str = new_str.replace(from_word, to);
}
}
}
if new_str != body_str {
self.replace_http_response_body(new_str.as_bytes());
self.msg_window.message = msg.as_bytes().to_vec();
}
}
}
let new_body = if end_of_stream {
self.msg_window.finish(self.is_openai_stream.unwrap())
} else {
self.msg_window.pop(
self.char_window_size * 2,
self.byte_window_size * 2,
self.is_openai_stream.unwrap(),
)
};
self.replace_http_response_body(&new_body);
DataAction::Continue
}
}
@@ -586,7 +582,6 @@ impl HttpContextWrapper<AiDataMaskingConfig> for AiDataMasking {
return DataAction::Continue;
}
let config = self.config.as_ref().unwrap();
let mut req_body = match String::from_utf8(req_body.clone()) {
Ok(r) => r,
Err(_) => return DataAction::Continue,

View File

@@ -0,0 +1,338 @@
use higress_wasm_rust::event_stream::EventStream;
use serde_json::json;
use crate::{Res, Usage};
#[derive(Default)]
pub(crate) struct MsgWindow {
stream_parser: EventStream,
pub(crate) message: Vec<u8>,
usage: Usage,
}
impl MsgWindow {
pub fn new() -> Self {
MsgWindow::default()
}
fn update_event(&mut self, event: Vec<u8>) -> Option<Vec<u8>> {
if event.is_empty() || !event.starts_with(b"data:") {
Some(event)
} else if let Ok(res) = serde_json::from_slice::<Res>(&event[b"data:".len()..]) {
for choice in &res.choices {
if let Some(delta) = &choice.delta {
self.message.extend(delta.content.as_bytes());
}
}
self.usage.add(&res.usage);
None
} else if event.starts_with(b"data: [DONE]") {
None
} else {
Some(event)
}
}
pub fn push(&mut self, data: &[u8], is_openai: bool) {
if is_openai {
self.stream_parser.update(data.to_vec());
while let Some(event) = self.stream_parser.next() {
if let Some(msg) = self.update_event(event) {
self.message.extend(msg);
}
}
} else {
self.message.extend(data);
}
}
pub fn pop(
&mut self,
char_window_size: usize,
byte_window_size: usize,
is_openai: bool,
) -> Vec<u8> {
if let Ok(message) = String::from_utf8(self.message.clone()) {
let chars = message.chars().collect::<Vec<char>>();
if chars.len() <= char_window_size {
return Vec::new();
}
let ret = chars[..chars.len() - char_window_size]
.iter()
.collect::<String>();
self.message = chars[chars.len() - char_window_size..]
.iter()
.collect::<String>()
.as_bytes()
.to_vec();
if is_openai {
let usage = self.usage.clone();
self.usage.reset();
format!(
"data: {}\n\n",
json!({"choices": [{"index": 0, "delta": {"role": "assistant", "content": ret}}], "usage": usage})
).as_bytes().to_vec()
} else {
ret.as_bytes().to_vec()
}
} else {
let ret = self.message[..self.message.len() - byte_window_size].to_vec();
self.message = self.message[self.message.len() - byte_window_size..].to_vec();
ret
}
}
pub fn finish(&mut self, is_openai: bool) -> Vec<u8> {
if let Some(event) = self.stream_parser.flush() {
self.update_event(event);
}
if self.message.is_empty() {
Vec::new()
} else if is_openai {
format!(
"data: {}\n\ndata: [DONE]\n\n",
json!({"choices": [{"index": 0, "delta": {"role": "assistant", "content": String::from_utf8_lossy(&self.message)}}], "usage": self.usage})
).as_bytes().to_vec()
} else {
self.message.clone()
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_msg() {
let mut msg_win = MsgWindow::default();
let data = r#"data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"H"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"ig"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"ress"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":" 是"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"一个"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"基于"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":" Ist"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"io"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":" 的"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"高性能"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"服务"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"网格"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"数据"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"平面"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"项目"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":""},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"旨在"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"提供"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"高"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"吞"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"吐"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"量"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"、"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"低"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"延迟"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"和"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"可"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"扩展"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"的服务"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"通信"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"管理"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"。"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"它"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"为企业"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"级"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"应用"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"提供了"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"丰富的"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"流量"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"治理"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"功能"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":""},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"如"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"负载"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"均衡"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"、"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"熔"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"断"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"、"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"限"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"流"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"等"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":",并"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"支持"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"多"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"协议"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"代理"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":""},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"包括"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":" HTTP"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"/"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"1"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"."},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"1"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":","},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":" HTTP"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"/"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"2"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":","},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":" g"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"RPC"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":")。"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"H"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"ig"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"ress"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":" 的"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"设计"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"目标"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"是"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"优化"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":" Ist"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"io"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":" 在"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"大规模"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"集群"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"中的"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"性能"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"表现"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":""},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"满足"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"高"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"并发"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"场景"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"下的"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"需求"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"。"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":""},"finish_reason":"stop"}]}
data: [DONE]
"#;
let mut buffer = Vec::new();
for line in data.split("\n\n") {
msg_win.push(line.as_bytes(), true);
msg_win.push(b"\n\n", true);
if let Ok(mut msg) = String::from_utf8(msg_win.message.clone()) {
msg = msg.replace("Higress", "***higress***");
msg_win.message = msg.as_bytes().to_vec();
}
buffer.extend(msg_win.pop(7, 7, true));
}
buffer.extend(msg_win.finish(true));
let mut message = String::new();
for line in buffer.split(|&x| x == b'\n') {
if line.is_empty() {
continue;
}
assert!(line.starts_with(b"data:"));
if line.starts_with(b"data: [DONE]") {
continue;
}
let des = serde_json::from_slice(&line[b"data:".len()..]);
assert!(des.is_ok());
let res: Res = des.unwrap();
for choice in &res.choices {
if let Some(delta) = &choice.delta {
message.push_str(&delta.content);
}
}
}
assert_eq!(message, "***higress*** 是一个基于 Istio 的高性能服务网格数据平面项目,旨在提供高吞吐量、低延迟和可扩展的服务通信管理。它为企业级应用提供了丰富的流量治理功能,如负载均衡、熔断、限流等,并支持多协议代理(包括 HTTP/1.1, HTTP/2, gRPC。***higress*** 的设计目标是优化 Istio 在大规模集群中的性能表现,满足高并发场景下的需求。");
}
}

View File

@@ -108,10 +108,7 @@ impl EventStream {
}
fn is_2eol(&self, i: usize) -> Option<usize> {
let size1 = match self.is_eol(i) {
None => return None,
Some(size1) => size1,
};
let size1 = self.is_eol(i)?;
if i + size1 < self.buffer.len() {
match self.is_eol(i + size1) {
None => {

View File

@@ -30,9 +30,11 @@ fi
CONDITIONAL_HOST_MOUNTS+="--mount type=bind,source=${ROOT}/external/package,destination=/home/package "
CONDITIONAL_HOST_MOUNTS+="--mount type=bind,source=${ROOT}/external/envoy,destination=/home/envoy "
BUILD_TOOLS_IMG=${BUILD_TOOLS_IMG:-"higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/build-tools:release-1.19-ef344298e65eeb2d9e2d07b87eb4e715c2def613"}
BUILD_WITH_CONTAINER=1 \
CONDITIONAL_HOST_MOUNTS=${CONDITIONAL_HOST_MOUNTS} \
BUILD_ENVOY_BINARY_ONLY=1 \
DOCKER_RUN_OPTIONS="--user root -e HTTP_PROXY -e HTTPS_PROXY" \
IMG=higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/build-tools-proxy:release-1.19-ef344298e65eeb2d9e2d07b87eb4e715c2def613 \
IMG=${BUILD_TOOLS_IMG} \
make test_release

View File

@@ -25,14 +25,34 @@ CONDITIONAL_HOST_MOUNTS+="--mount type=bind,source=${ROOT}/external/package,dest
DOCKER_RUN_OPTIONS+="-e HTTP_PROXY -e HTTPS_PROXY"
BUILD_TOOLS_IMG=${BUILD_TOOLS_IMG:-"higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/build-tools:release-1.19-ef344298e65eeb2d9e2d07b87eb4e715c2def613"}
ORIGINAL_HUB=${HUB}
echo "IMG_URL=$IMG_URL"
if [ -n "$IMG_URL" ]; then
TAG=${IMG_URL#*:}
HUB=${IMG_URL%:*}
HUB=${HUB%/*}
if [ "$TAG" == "${IMG_URL}" ]; then
TAG=latest
fi
fi
echo "HUB=$HUB"
echo "TAG=$TAG"
GOOS_LOCAL=linux TARGET_OS=linux TARGET_ARCH=${TARGET_ARCH} \
ISTIO_ENVOY_LINUX_RELEASE_URL=${ISTIO_ENVOY_LINUX_RELEASE_URL} \
BUILD_WITH_CONTAINER=1 \
USE_REAL_USER=${USE_REAL_USER:-0} \
CONDITIONAL_HOST_MOUNTS=${CONDITIONAL_HOST_MOUNTS} \
DOCKER_BUILD_VARIANTS=default DOCKER_TARGETS="${DOCKER_TARGETS}" \
ISTIO_BASE_REGISTRY="${HUB}" \
ISTIO_BASE_REGISTRY="${ORIGINAL_HUB}" \
BASE_VERSION="${HIGRESS_BASE_VERSION}" \
DOCKER_RUN_OPTIONS=${DOCKER_RUN_OPTIONS} \
IMG=higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/build-tools:release-1.19-ef344298e65eeb2d9e2d07b87eb4e715c2def613 \
HUB="${HUB}" \
TAG="${TAG}" \
IMG=${BUILD_TOOLS_IMG} \
make "$@"

View File

@@ -19,7 +19,9 @@ set -euo pipefail
source "$(dirname -- "$0")/setup-istio-env.sh"
cd ${ROOT}/external/istio
rm -rf out/linux_${TARGET_ARCH};
rm -rf out/linux_${TARGET_ARCH};
BUILD_TOOLS_IMG=${BUILD_TOOLS_IMG:-"higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/build-tools:release-1.19-ef344298e65eeb2d9e2d07b87eb4e715c2def613"}
GOOS_LOCAL=linux TARGET_OS=linux TARGET_ARCH=${TARGET_ARCH} \
ISTIO_ENVOY_LINUX_RELEASE_URL=${ISTIO_ENVOY_LINUX_RELEASE_URL} \
@@ -28,5 +30,5 @@ GOOS_LOCAL=linux TARGET_OS=linux TARGET_ARCH=${TARGET_ARCH} \
ISTIO_BASE_REGISTRY="${HUB}" \
BASE_VERSION="${HIGRESS_BASE_VERSION}" \
DOCKER_RUN_OPTIONS="--user root -e HTTP_PROXY -e HTTPS_PROXY" \
IMG=higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/build-tools:release-1.19-ef344298e65eeb2d9e2d07b87eb4e715c2def613 \
IMG=${BUILD_TOOLS_IMG} \
make build-linux