mirror of
https://github.com/alibaba/higress.git
synced 2026-03-17 00:40:48 +08:00
feat: update ai-token-ratelimit documentation by removing ai-statistics plugin (#1767)
This commit is contained in:
@@ -258,24 +258,12 @@ spec:
|
||||
'*': "qwen-turbo"
|
||||
ingress:
|
||||
- qwen
|
||||
url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-proxy:v1.0.0
|
||||
url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-proxy:1.0.0
|
||||
phase: UNSPECIFIED_PHASE
|
||||
priority: 100
|
||||
---
|
||||
apiVersion: extensions.higress.io/v1alpha1
|
||||
kind: WasmPlugin
|
||||
metadata:
|
||||
name: ai-statistics
|
||||
namespace: higress-system
|
||||
spec:
|
||||
defaultConfig:
|
||||
enable: true
|
||||
url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-token-statistics:v1.0.0
|
||||
phase: UNSPECIFIED_PHASE
|
||||
priority: 200
|
||||
---
|
||||
apiVersion: extensions.higress.io/v1alpha1
|
||||
kind: WasmPlugin
|
||||
metadata:
|
||||
name: ai-token-ratelimit
|
||||
namespace: higress-system
|
||||
@@ -294,7 +282,7 @@ spec:
|
||||
# service_name: redis.default.svc.cluster.local
|
||||
service_name: redis.dns
|
||||
service_port: 6379
|
||||
url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-token-ratelimit:v1.0.0
|
||||
url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-token-ratelimit:1.0.0
|
||||
phase: UNSPECIFIED_PHASE
|
||||
priority: 600
|
||||
```
|
||||
@@ -370,10 +358,19 @@ spec:
|
||||
pathType: Prefix
|
||||
```
|
||||
|
||||
转发 higress-gateway 的流量到本地,方便进行测试。
|
||||
|
||||
```bash
|
||||
kubectl port-forward svc/higress-gateway -n higress-system 18000:80
|
||||
```
|
||||
|
||||
触发限流效果如下:
|
||||
|
||||
```bash
|
||||
curl "http://qwen-test.com:18000/v1/chat/completions?apikey=123456" -H "Content-Type: application/json" -d '{
|
||||
curl "http://localhost:18000/v1/chat/completions?apikey=123456" \
|
||||
-H "Host: qwen-test.com" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"model": "gpt-3",
|
||||
"messages": [
|
||||
{
|
||||
|
||||
@@ -234,24 +234,12 @@ spec:
|
||||
'*': "qwen-turbo"
|
||||
ingress:
|
||||
- qwen
|
||||
url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-proxy:v1.0.0
|
||||
url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-proxy:1.0.0
|
||||
phase: UNSPECIFIED_PHASE
|
||||
priority: 100
|
||||
---
|
||||
apiVersion: extensions.higress.io/v1alpha1
|
||||
kind: WasmPlugin
|
||||
metadata:
|
||||
name: ai-statistics
|
||||
namespace: higress-system
|
||||
spec:
|
||||
defaultConfig:
|
||||
enable: true
|
||||
url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-token-statistics:v1.0.0
|
||||
phase: UNSPECIFIED_PHASE
|
||||
priority: 200
|
||||
---
|
||||
apiVersion: extensions.higress.io/v1alpha1
|
||||
kind: WasmPlugin
|
||||
metadata:
|
||||
name: ai-token-ratelimit
|
||||
namespace: higress-system
|
||||
@@ -270,7 +258,7 @@ spec:
|
||||
# service_name: redis.default.svc.cluster.local
|
||||
service_name: redis.dns
|
||||
service_port: 6379
|
||||
url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-token-ratelimit:v1.0.0
|
||||
url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-token-ratelimit:1.0.0
|
||||
phase: UNSPECIFIED_PHASE
|
||||
priority: 600
|
||||
```
|
||||
@@ -347,10 +335,19 @@ spec:
|
||||
pathType: Prefix
|
||||
```
|
||||
|
||||
Forward the traffic of higress-gateway to the local, making it convenient for testing.
|
||||
|
||||
```bash
|
||||
kubectl port-forward svc/higress-gateway -n higress-system 18000:80
|
||||
```
|
||||
|
||||
The rate limiting effect is triggered as follows:
|
||||
|
||||
```bash
|
||||
curl "http://qwen-test.com:18000/v1/chat/completions?apikey=123456" -H "Content-Type: application/json" -d '{
|
||||
curl "http://localhost:18000/v1/chat/completions?apikey=123456" \
|
||||
-H "Host: qwen-test.com" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"model": "gpt-3",
|
||||
"messages": [
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user