feat: update ai-token-ratelimit documentation by removing ai-statistics plugin (#1767)

This commit is contained in:
Se7en
2025-02-26 20:47:37 +08:00
committed by GitHub
parent 9e1792c245
commit 9ea2410388
2 changed files with 24 additions and 30 deletions

View File

@@ -258,24 +258,12 @@ spec:
'*': "qwen-turbo" '*': "qwen-turbo"
ingress: ingress:
- qwen - qwen
url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-proxy:v1.0.0 url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-proxy:1.0.0
phase: UNSPECIFIED_PHASE phase: UNSPECIFIED_PHASE
priority: 100 priority: 100
--- ---
apiVersion: extensions.higress.io/v1alpha1 apiVersion: extensions.higress.io/v1alpha1
kind: WasmPlugin kind: WasmPlugin
metadata:
name: ai-statistics
namespace: higress-system
spec:
defaultConfig:
enable: true
url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-token-statistics:v1.0.0
phase: UNSPECIFIED_PHASE
priority: 200
---
apiVersion: extensions.higress.io/v1alpha1
kind: WasmPlugin
metadata: metadata:
name: ai-token-ratelimit name: ai-token-ratelimit
namespace: higress-system namespace: higress-system
@@ -294,7 +282,7 @@ spec:
# service_name: redis.default.svc.cluster.local # service_name: redis.default.svc.cluster.local
service_name: redis.dns service_name: redis.dns
service_port: 6379 service_port: 6379
url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-token-ratelimit:v1.0.0 url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-token-ratelimit:1.0.0
phase: UNSPECIFIED_PHASE phase: UNSPECIFIED_PHASE
priority: 600 priority: 600
``` ```
@@ -370,10 +358,19 @@ spec:
pathType: Prefix pathType: Prefix
``` ```
转发 higress-gateway 的流量到本地,方便进行测试。
```bash
kubectl port-forward svc/higress-gateway -n higress-system 18000:80
```
触发限流效果如下: 触发限流效果如下:
```bash ```bash
curl "http://qwen-test.com:18000/v1/chat/completions?apikey=123456" -H "Content-Type: application/json" -d '{ curl "http://localhost:18000/v1/chat/completions?apikey=123456" \
-H "Host: qwen-test.com" \
-H "Content-Type: application/json" \
-d '{
"model": "gpt-3", "model": "gpt-3",
"messages": [ "messages": [
{ {

View File

@@ -234,24 +234,12 @@ spec:
'*': "qwen-turbo" '*': "qwen-turbo"
ingress: ingress:
- qwen - qwen
url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-proxy:v1.0.0 url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-proxy:1.0.0
phase: UNSPECIFIED_PHASE phase: UNSPECIFIED_PHASE
priority: 100 priority: 100
--- ---
apiVersion: extensions.higress.io/v1alpha1 apiVersion: extensions.higress.io/v1alpha1
kind: WasmPlugin kind: WasmPlugin
metadata:
name: ai-statistics
namespace: higress-system
spec:
defaultConfig:
enable: true
url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-token-statistics:v1.0.0
phase: UNSPECIFIED_PHASE
priority: 200
---
apiVersion: extensions.higress.io/v1alpha1
kind: WasmPlugin
metadata: metadata:
name: ai-token-ratelimit name: ai-token-ratelimit
namespace: higress-system namespace: higress-system
@@ -270,7 +258,7 @@ spec:
# service_name: redis.default.svc.cluster.local # service_name: redis.default.svc.cluster.local
service_name: redis.dns service_name: redis.dns
service_port: 6379 service_port: 6379
url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-token-ratelimit:v1.0.0 url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-token-ratelimit:1.0.0
phase: UNSPECIFIED_PHASE phase: UNSPECIFIED_PHASE
priority: 600 priority: 600
``` ```
@@ -347,10 +335,19 @@ spec:
pathType: Prefix pathType: Prefix
``` ```
Forward the traffic of higress-gateway to the local, making it convenient for testing.
```bash
kubectl port-forward svc/higress-gateway -n higress-system 18000:80
```
The rate limiting effect is triggered as follows: The rate limiting effect is triggered as follows:
```bash ```bash
curl "http://qwen-test.com:18000/v1/chat/completions?apikey=123456" -H "Content-Type: application/json" -d '{ curl "http://localhost:18000/v1/chat/completions?apikey=123456" \
-H "Host: qwen-test.com" \
-H "Content-Type: application/json" \
-d '{
"model": "gpt-3", "model": "gpt-3",
"messages": [ "messages": [
{ {