mirror of
https://github.com/alibaba/higress.git
synced 2026-02-26 21:50:53 +08:00
Compare commits
45 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
36bcb595d6 | ||
|
|
783a8db512 | ||
|
|
44566f5259 | ||
|
|
73ba9238bd | ||
|
|
41a1455874 | ||
|
|
9d68ccbf35 | ||
|
|
db7dbb24a2 | ||
|
|
9a0cf9b762 | ||
|
|
bb786c9618 | ||
|
|
ef49d2f5f6 | ||
|
|
864bf5af39 | ||
|
|
527e922d50 | ||
|
|
1fe5eb6e13 | ||
|
|
87185baff2 | ||
|
|
76ada0b844 | ||
|
|
f4d3fec228 | ||
|
|
e94ac43dd1 | ||
|
|
dd29267fd7 | ||
|
|
01a9161153 | ||
|
|
ceb8b557dc | ||
|
|
753022e093 | ||
|
|
04cbbfc7e8 | ||
|
|
db66df39c4 | ||
|
|
dad6278a6d | ||
|
|
272d693df3 | ||
|
|
69bc800198 | ||
|
|
1daaa4b880 | ||
|
|
6e31a7b67c | ||
|
|
91f070906a | ||
|
|
e3aeddcc24 | ||
|
|
926913f0e7 | ||
|
|
c471bb2003 | ||
|
|
0b9256617e | ||
|
|
2670ecbf8e | ||
|
|
7040e4bd34 | ||
|
|
de8a4d0b03 | ||
|
|
b33a3a4d2e | ||
|
|
087cb48fc5 | ||
|
|
95f32002d2 | ||
|
|
fb8dd819e9 | ||
|
|
86934b3203 | ||
|
|
38068ee43d | ||
|
|
d81573e0d2 | ||
|
|
312b80f91d | ||
|
|
e42e6eeee6 |
123
.github/workflows/helm-docs.yaml
vendored
123
.github/workflows/helm-docs.yaml
vendored
@@ -39,126 +39,3 @@ jobs:
|
||||
fi
|
||||
git diff --exit-code
|
||||
rm -f ./helm-docs
|
||||
|
||||
translate-readme:
|
||||
needs: helm
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y jq
|
||||
|
||||
- name: Compare README.md
|
||||
id: compare_readme
|
||||
run: |
|
||||
cd ./helm/higress
|
||||
|
||||
BASE_BRANCH=${GITHUB_BASE_REF:-main}
|
||||
git fetch origin $BASE_BRANCH
|
||||
|
||||
if git diff --quiet origin/$BASE_BRANCH -- README.md; then
|
||||
echo "README.md has no local changes compared to $BASE_BRANCH. Skipping translation."
|
||||
echo "skip_translation=true" >> $GITHUB_ENV
|
||||
else
|
||||
echo "README.md has local changes compared to $BASE_BRANCH. Proceeding with translation."
|
||||
echo "skip_translation=false" >> $GITHUB_ENV
|
||||
echo "--------- diff ---------"
|
||||
git diff origin/$BASE_BRANCH -- README.md
|
||||
echo "------------------------"
|
||||
fi
|
||||
|
||||
- name: Translate README.md to Chinese
|
||||
if: env.skip_translation == 'false'
|
||||
env:
|
||||
API_URL: ${{ secrets.HIGRESS_OPENAI_API_URL }}
|
||||
API_KEY: ${{ secrets.HIGRESS_OPENAI_API_KEY }}
|
||||
API_MODEL: ${{ secrets.HIGRESS_OPENAI_API_MODEL }}
|
||||
run: |
|
||||
cat << 'EOF' > translate_readme.py
|
||||
import os
|
||||
import json
|
||||
import requests
|
||||
|
||||
API_URL = os.environ["API_URL"]
|
||||
API_KEY = os.environ["API_KEY"]
|
||||
API_MODEL = os.environ["API_MODEL"]
|
||||
README_PATH = "./helm/higress/README.md"
|
||||
OUTPUT_PATH = "./helm/higress/README.zh.md"
|
||||
|
||||
def stream_translation(api_url, api_key, payload):
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
}
|
||||
response = requests.post(api_url, headers=headers, json=payload, stream=True)
|
||||
response.raise_for_status()
|
||||
|
||||
with open(OUTPUT_PATH, "w", encoding="utf-8") as out_file:
|
||||
for line in response.iter_lines(decode_unicode=True):
|
||||
if line.strip() == "" or not line.startswith("data: "):
|
||||
continue
|
||||
data = line[6:]
|
||||
if data.strip() == "[DONE]":
|
||||
break
|
||||
try:
|
||||
chunk = json.loads(data)
|
||||
content = chunk["choices"][0]["delta"].get("content", "")
|
||||
if content:
|
||||
out_file.write(content)
|
||||
except Exception as e:
|
||||
print("Error parsing chunk:", e)
|
||||
|
||||
def main():
|
||||
if not os.path.exists(README_PATH):
|
||||
print("README.md not found!")
|
||||
return
|
||||
|
||||
with open(README_PATH, "r", encoding="utf-8") as f:
|
||||
content = f.read()
|
||||
|
||||
payload = {
|
||||
"model": API_MODEL,
|
||||
"messages": [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a translation assistant that translates English Markdown text to Chinese. Preserve original Markdown formatting and line breaks."
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": content
|
||||
}
|
||||
],
|
||||
"temperature": 0.3,
|
||||
"stream": True
|
||||
}
|
||||
|
||||
print("Streaming translation started...")
|
||||
stream_translation(API_URL, API_KEY, payload)
|
||||
print(f"Translation completed and saved to {OUTPUT_PATH}.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
EOF
|
||||
|
||||
python3 translate_readme.py
|
||||
rm -rf translate_readme.py
|
||||
|
||||
- name: Create Pull Request
|
||||
if: env.skip_translation == 'false'
|
||||
uses: peter-evans/create-pull-request@v7
|
||||
with:
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
commit-message: "Update helm translated README.zh.md"
|
||||
branch: update-helm-readme-zh
|
||||
title: "Update helm translated README.zh.md"
|
||||
body: |
|
||||
This PR updates the translated README.zh.md file.
|
||||
|
||||
- Automatically generated by GitHub Actions
|
||||
labels: translation, automated
|
||||
base: main
|
||||
131
.github/workflows/translate-readme.yaml
vendored
Normal file
131
.github/workflows/translate-readme.yaml
vendored
Normal file
@@ -0,0 +1,131 @@
|
||||
name: "Helm Docs"
|
||||
|
||||
on:
|
||||
workflow_dispatch: ~
|
||||
push:
|
||||
branches: [ main ]
|
||||
paths:
|
||||
- 'helm/higress/README.md'
|
||||
|
||||
jobs:
|
||||
translate-readme:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y jq
|
||||
|
||||
- name: Compare README.md
|
||||
id: compare_readme
|
||||
run: |
|
||||
cd ./helm/higress
|
||||
|
||||
BASE_BRANCH=${GITHUB_BASE_REF:-main}
|
||||
git fetch origin $BASE_BRANCH
|
||||
|
||||
if git diff --quiet origin/$BASE_BRANCH -- README.md; then
|
||||
echo "README.md has no local changes compared to $BASE_BRANCH. Skipping translation."
|
||||
echo "skip_translation=true" >> $GITHUB_ENV
|
||||
else
|
||||
echo "README.md has local changes compared to $BASE_BRANCH. Proceeding with translation."
|
||||
echo "skip_translation=false" >> $GITHUB_ENV
|
||||
echo "--------- diff ---------"
|
||||
git diff origin/$BASE_BRANCH -- README.md
|
||||
echo "------------------------"
|
||||
fi
|
||||
|
||||
- name: Translate README.md to Chinese
|
||||
if: env.skip_translation == 'false'
|
||||
env:
|
||||
API_URL: ${{ secrets.HIGRESS_OPENAI_API_URL }}
|
||||
API_KEY: ${{ secrets.HIGRESS_OPENAI_API_KEY }}
|
||||
API_MODEL: ${{ secrets.HIGRESS_OPENAI_API_MODEL }}
|
||||
run: |
|
||||
cat << 'EOF' > translate_readme.py
|
||||
import os
|
||||
import json
|
||||
import requests
|
||||
|
||||
API_URL = os.environ["API_URL"]
|
||||
API_KEY = os.environ["API_KEY"]
|
||||
API_MODEL = os.environ["API_MODEL"]
|
||||
README_PATH = "./helm/higress/README.md"
|
||||
OUTPUT_PATH = "./helm/higress/README.zh.md"
|
||||
|
||||
def stream_translation(api_url, api_key, payload):
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
}
|
||||
response = requests.post(api_url, headers=headers, json=payload, stream=True)
|
||||
response.raise_for_status()
|
||||
|
||||
with open(OUTPUT_PATH, "w", encoding="utf-8") as out_file:
|
||||
for line in response.iter_lines(decode_unicode=True):
|
||||
if line.strip() == "" or not line.startswith("data: "):
|
||||
continue
|
||||
data = line[6:]
|
||||
if data.strip() == "[DONE]":
|
||||
break
|
||||
try:
|
||||
chunk = json.loads(data)
|
||||
content = chunk["choices"][0]["delta"].get("content", "")
|
||||
if content:
|
||||
out_file.write(content)
|
||||
except Exception as e:
|
||||
print("Error parsing chunk:", e)
|
||||
|
||||
def main():
|
||||
if not os.path.exists(README_PATH):
|
||||
print("README.md not found!")
|
||||
return
|
||||
|
||||
with open(README_PATH, "r", encoding="utf-8") as f:
|
||||
content = f.read()
|
||||
|
||||
payload = {
|
||||
"model": API_MODEL,
|
||||
"messages": [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a translation assistant that translates English Markdown text to Chinese. Preserve original Markdown formatting and line breaks."
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": content
|
||||
}
|
||||
],
|
||||
"temperature": 0.3,
|
||||
"stream": True
|
||||
}
|
||||
|
||||
print("Streaming translation started...")
|
||||
stream_translation(API_URL, API_KEY, payload)
|
||||
print(f"Translation completed and saved to {OUTPUT_PATH}.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
EOF
|
||||
|
||||
python3 translate_readme.py
|
||||
rm -rf translate_readme.py
|
||||
|
||||
- name: Create Pull Request
|
||||
if: env.skip_translation == 'false'
|
||||
uses: peter-evans/create-pull-request@v7
|
||||
with:
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
commit-message: "Update helm translated README.zh.md"
|
||||
branch: update-helm-readme-zh
|
||||
title: "Update helm translated README.zh.md"
|
||||
body: |
|
||||
This PR updates the translated README.zh.md file.
|
||||
|
||||
- Automatically generated by GitHub Actions
|
||||
labels: translation, automated
|
||||
base: main
|
||||
@@ -33,6 +33,7 @@ header:
|
||||
- 'hgctl/cmd/hgctl/config/testdata/config'
|
||||
- 'hgctl/pkg/manifests'
|
||||
- 'pkg/ingress/kube/gateway/istio/testdata'
|
||||
- 'release-notes/**'
|
||||
|
||||
comment: on-failure
|
||||
dependency:
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
/istio @SpecialYang @johnlanni
|
||||
/pkg @SpecialYang @johnlanni @CH3CHO
|
||||
/plugins @johnlanni @CH3CHO @rinfx
|
||||
/plugins/wasm-go/extensions/ai-proxy @cr7258 @CH3CHO @rinfx
|
||||
/plugins/wasm-go/extensions/ai-proxy @cr7258 @CH3CHO @rinfx @wydream
|
||||
/plugins/wasm-rust @007gzs @jizhuozhi
|
||||
/registry @NameHaibinZhang @2456868764 @johnlanni
|
||||
/test @Xunzhuo @2456868764 @CH3CHO
|
||||
|
||||
@@ -144,7 +144,7 @@ docker-buildx-push: clean-env docker.higress-buildx
|
||||
export PARENT_GIT_TAG:=$(shell cat VERSION)
|
||||
export PARENT_GIT_REVISION:=$(TAG)
|
||||
|
||||
export ENVOY_PACKAGE_URL_PATTERN?=https://github.com/higress-group/proxy/releases/download/v2.1.6/envoy-symbol-ARCH.tar.gz
|
||||
export ENVOY_PACKAGE_URL_PATTERN?=https://github.com/higress-group/proxy/releases/download/v2.1.7/envoy-symbol-ARCH.tar.gz
|
||||
|
||||
build-envoy: prebuild
|
||||
./tools/hack/build-envoy.sh
|
||||
@@ -192,8 +192,8 @@ install: pre-install
|
||||
helm install higress helm/higress -n higress-system --create-namespace --set 'global.local=true'
|
||||
|
||||
HIGRESS_LATEST_IMAGE_TAG ?= latest
|
||||
ENVOY_LATEST_IMAGE_TAG ?= 958467a353d411ae3f06e03b096bfd342cddb2c6
|
||||
ISTIO_LATEST_IMAGE_TAG ?= d9c728d3b01f64855e012b08d136e306f1160397
|
||||
ENVOY_LATEST_IMAGE_TAG ?= latest
|
||||
ISTIO_LATEST_IMAGE_TAG ?= latest
|
||||
|
||||
install-dev: pre-install
|
||||
helm install higress helm/core -n higress-system --create-namespace --set 'controller.tag=$(TAG)' --set 'gateway.replicas=1' --set 'pilot.tag=$(ISTIO_LATEST_IMAGE_TAG)' --set 'gateway.tag=$(ENVOY_LATEST_IMAGE_TAG)' --set 'global.local=true'
|
||||
|
||||
Submodule envoy/envoy updated: b37a2988a1...583feb54ce
@@ -1,5 +1,5 @@
|
||||
apiVersion: v2
|
||||
appVersion: 2.1.4
|
||||
appVersion: 2.1.5
|
||||
description: Helm chart for deploying higress gateways
|
||||
icon: https://higress.io/img/higress_logo_small.png
|
||||
home: http://higress.io/
|
||||
@@ -15,4 +15,4 @@ dependencies:
|
||||
repository: "file://../redis"
|
||||
version: 0.0.1
|
||||
type: application
|
||||
version: 2.1.4
|
||||
version: 2.1.5
|
||||
|
||||
@@ -113,3 +113,36 @@ kind: VMPodScrape
|
||||
{{- fail "unexpected gateway.metrics.provider" -}}
|
||||
{{- end -}}
|
||||
{{- end -}}
|
||||
|
||||
{{- define "pluginServer.name" -}}
|
||||
{{- .Values.pluginServer.name | default "higress-plugin-server" -}}
|
||||
{{- end }}
|
||||
|
||||
{{- define "pluginServer.chart" -}}
|
||||
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
|
||||
{{- define "pluginServer.labels" -}}
|
||||
helm.sh/chart: {{ include "pluginServer.chart" . }}
|
||||
{{ include "pluginServer.selectorLabels" . }}
|
||||
{{- if .Chart.AppVersion }}
|
||||
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
|
||||
{{- end }}
|
||||
app.kubernetes.io/managed-by: {{ .Release.Service }}
|
||||
app.kubernetes.io/name: {{ include "pluginServer.name" . }}
|
||||
{{- end }}
|
||||
|
||||
{{- define "pluginServer.selectorLabels" -}}
|
||||
{{- if hasKey .Values.pluginServer.labels "app" }}
|
||||
{{- with .Values.pluginServer.labels.app }}app: {{.|quote}}
|
||||
{{- end}}
|
||||
{{- else }}app: {{ include "pluginServer.name" . }}
|
||||
{{- end }}
|
||||
{{- if hasKey .Values.pluginServer.labels "higress" }}
|
||||
{{- with .Values.pluginServer.labels.higress }}
|
||||
higress: {{.|quote}}
|
||||
{{- end}}
|
||||
{{- else }}
|
||||
higress: {{ include "pluginServer.name" . }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
@@ -9,9 +9,7 @@
|
||||
accessLogFile: "/dev/stdout"
|
||||
{{- end }}
|
||||
ingressControllerMode: "OFF"
|
||||
accessLogFormat: '{"ai_log":"%FILTER_STATE(wasm.ai_log:PLAIN)%","authority":"%REQ(X-ENVOY-ORIGINAL-HOST?:AUTHORITY)%","bytes_received":"%BYTES_RECEIVED%","bytes_sent":"%BYTES_SENT%","downstream_local_address":"%DOWNSTREAM_LOCAL_ADDRESS%","downstream_remote_address":"%DOWNSTREAM_REMOTE_ADDRESS%","duration":"%DURATION%","istio_policy_status":"%DYNAMIC_METADATA(istio.mixer:status)%","method":"%REQ(:METHOD)%","path":"%REQ(X-ENVOY-ORIGINAL-PATH?:PATH)%","protocol":"%PROTOCOL%","request_id":"%REQ(X-REQUEST-ID)%","requested_server_name":"%REQUESTED_SERVER_NAME%","response_code":"%RESPONSE_CODE%","response_flags":"%RESPONSE_FLAGS%","route_name":"%ROUTE_NAME%","start_time":"%START_TIME%","trace_id":"%REQ(X-B3-TRACEID)%","upstream_cluster":"%UPSTREAM_CLUSTER%","upstream_host":"%UPSTREAM_HOST%","upstream_local_address":"%UPSTREAM_LOCAL_ADDRESS%","upstream_service_time":"%RESP(X-ENVOY-UPSTREAM-SERVICE-TIME)%","upstream_transport_failure_reason":"%UPSTREAM_TRANSPORT_FAILURE_REASON%","user_agent":"%REQ(USER-AGENT)%","x_forwarded_for":"%REQ(X-FORWARDED-FOR)%","response_code_details":"%RESPONSE_CODE_DETAILS%"}
|
||||
|
||||
'
|
||||
accessLogFormat: '{"ai_log":"%FILTER_STATE(wasm.ai_log:PLAIN)%","authority":"%REQ(X-ENVOY-ORIGINAL-HOST?:AUTHORITY)%","bytes_received":"%BYTES_RECEIVED%","bytes_sent":"%BYTES_SENT%","downstream_local_address":"%DOWNSTREAM_LOCAL_ADDRESS%","downstream_remote_address":"%DOWNSTREAM_REMOTE_ADDRESS%","duration":"%DURATION%","istio_policy_status":"%DYNAMIC_METADATA(istio.mixer:status)%","method":"%REQ(:METHOD)%","path":"%REQ(X-ENVOY-ORIGINAL-PATH?:PATH)%","protocol":"%PROTOCOL%","request_id":"%REQ(X-REQUEST-ID)%","requested_server_name":"%REQUESTED_SERVER_NAME%","response_code":"%RESPONSE_CODE%","response_flags":"%RESPONSE_FLAGS%","route_name":"%ROUTE_NAME%","start_time":"%START_TIME%","trace_id":"%REQ(X-B3-TRACEID)%","upstream_cluster":"%UPSTREAM_CLUSTER%","upstream_host":"%UPSTREAM_HOST%","upstream_local_address":"%UPSTREAM_LOCAL_ADDRESS%","upstream_service_time":"%RESP(X-ENVOY-UPSTREAM-SERVICE-TIME)%","upstream_transport_failure_reason":"%UPSTREAM_TRANSPORT_FAILURE_REASON%","user_agent":"%REQ(USER-AGENT)%","x_forwarded_for":"%REQ(X-FORWARDED-FOR)%","response_code_details":"%RESPONSE_CODE_DETAILS%"}'
|
||||
dnsRefreshRate: 200s
|
||||
enableAutoMtls: false
|
||||
enablePrometheusMerge: false
|
||||
@@ -99,7 +97,7 @@ metadata:
|
||||
name: higress-config
|
||||
namespace: {{ .Release.Namespace }}
|
||||
labels:
|
||||
{{- include "gateway.labels" . | nindent 4 }}
|
||||
{{- include "gateway.labels" . | nindent 4 }}
|
||||
data:
|
||||
higress: |-
|
||||
{{- $existingConfig := lookup "v1" "ConfigMap" .Release.Namespace "higress-config" }}
|
||||
@@ -126,7 +124,7 @@ data:
|
||||
{{- else }}
|
||||
networks: {}
|
||||
{{- end }}
|
||||
|
||||
|
||||
mesh: |-
|
||||
{{- if .Values.meshConfig }}
|
||||
{{ $mesh | toYaml | indent 4 }}
|
||||
|
||||
@@ -6,4 +6,8 @@ metadata:
|
||||
namespace: {{ .Release.Namespace }}
|
||||
labels:
|
||||
{{- include "controller.labels" . | nindent 4 }}
|
||||
{{- with .Values.controller.serviceAccount.annotations }}
|
||||
annotations:
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
39
helm/core/templates/plugin-server-deployment.yaml
Normal file
39
helm/core/templates/plugin-server-deployment.yaml
Normal file
@@ -0,0 +1,39 @@
|
||||
{{- if .Values.global.enablePluginServer }}
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: {{ include "pluginServer.name" . }}
|
||||
namespace: {{ .Release.Namespace }}
|
||||
spec:
|
||||
replicas: {{ .Values.pluginServer.replicas }}
|
||||
selector:
|
||||
matchLabels:
|
||||
{{- include "pluginServer.selectorLabels" . | nindent 6 }}
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
{{- with .Values.pluginServer.podLabels }}
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- include "pluginServer.selectorLabels" . | nindent 8 }}
|
||||
spec:
|
||||
{{- with .Values.pluginServer.imagePullSecrets }}
|
||||
imagePullSecrets:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
containers:
|
||||
- name: {{ .Chart.Name }}
|
||||
image: {{ .Values.pluginServer.hub | default .Values.global.hub }}/{{ .Values.pluginServer.image | default "plugin-server" }}:{{ .Values.pluginServer.tag | default "1.0.0" }}
|
||||
{{- if .Values.global.imagePullPolicy }}
|
||||
imagePullPolicy: {{ .Values.global.imagePullPolicy }}
|
||||
{{- end }}
|
||||
ports:
|
||||
- containerPort: 8080
|
||||
resources:
|
||||
requests:
|
||||
cpu: {{ .Values.pluginServer.resources.requests.cpu }}
|
||||
memory: {{ .Values.pluginServer.resources.requests.memory }}
|
||||
limits:
|
||||
cpu: {{ .Values.pluginServer.resources.limits.cpu }}
|
||||
memory: {{ .Values.pluginServer.resources.limits.memory }}
|
||||
{{- end }}
|
||||
16
helm/core/templates/plugin-server-service.yaml
Normal file
16
helm/core/templates/plugin-server-service.yaml
Normal file
@@ -0,0 +1,16 @@
|
||||
{{- if .Values.global.enablePluginServer }}
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: {{ include "pluginServer.name" . }}
|
||||
namespace: {{ .Release.Namespace }}
|
||||
labels:
|
||||
{{- include "pluginServer.labels" . | nindent 4 }}
|
||||
spec:
|
||||
ports:
|
||||
- protocol: TCP
|
||||
port: {{ .Values.pluginServer.service.port }}
|
||||
targetPort: 8080
|
||||
selector:
|
||||
{{- include "pluginServer.selectorLabels" . | nindent 4 }}
|
||||
{{- end }}
|
||||
@@ -11,6 +11,7 @@ global:
|
||||
enableSRDS: true
|
||||
# -- Whether to enable Redis(redis-stack-server) for Higress, default is false.
|
||||
enableRedis: false
|
||||
enablePluginServer: false
|
||||
onDemandRDS: false
|
||||
hostRDSMergeSubset: false
|
||||
onlyPushRouteCluster: true
|
||||
@@ -580,8 +581,7 @@ controller:
|
||||
# -- Labels to apply to the pod
|
||||
podLabels: {}
|
||||
|
||||
podSecurityContext:
|
||||
{}
|
||||
podSecurityContext: {}
|
||||
# fsGroup: 2000
|
||||
|
||||
ports:
|
||||
@@ -708,13 +708,13 @@ tracing:
|
||||
enable: false
|
||||
sampling: 100
|
||||
timeout: 500
|
||||
skywalking:
|
||||
# access_token: ""
|
||||
service: ""
|
||||
port: 11800
|
||||
# skywalking:
|
||||
# access_token: ""
|
||||
# service: ""
|
||||
# port: 11800
|
||||
# zipkin:
|
||||
# service: ""
|
||||
# port: 9411
|
||||
# service: ""
|
||||
# port: 9411
|
||||
|
||||
# -- Downstream config settings
|
||||
downstream:
|
||||
@@ -767,4 +767,31 @@ redis:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
# -- Persistent Volume size
|
||||
size: 1Gi
|
||||
size: 1Gi
|
||||
|
||||
pluginServer:
|
||||
name: "higress-plugin-server"
|
||||
# -- Number of Higress Plugin Server pods, 2 recommended for high availability
|
||||
replicas: 2
|
||||
image: plugin-server
|
||||
|
||||
hub: higress-registry.cn-hangzhou.cr.aliyuncs.com/higress
|
||||
tag: ""
|
||||
|
||||
imagePullSecrets: []
|
||||
|
||||
labels: {}
|
||||
# -- Labels to apply to the pod
|
||||
podLabels: {}
|
||||
|
||||
# Plugin-server Service configuration
|
||||
service:
|
||||
port: 80 # Container target port (usually fixed)
|
||||
|
||||
resources:
|
||||
requests:
|
||||
cpu: 200m
|
||||
memory: 128Mi
|
||||
limits:
|
||||
cpu: 500m
|
||||
memory: 256Mi
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
dependencies:
|
||||
- name: higress-core
|
||||
repository: file://../core
|
||||
version: 2.1.4
|
||||
version: 2.1.5
|
||||
- name: higress-console
|
||||
repository: https://higress.io/helm-charts/
|
||||
version: 2.1.4
|
||||
digest: sha256:482d9c5263ed959848601ee249b4852aed842a7805f0b36e456639fd54649c45
|
||||
generated: "2025-06-10T20:57:14.150704+08:00"
|
||||
version: 2.1.5
|
||||
digest: sha256:1c7c8003686b2df2c67427054006aef21c92ab1ff86d2e5f5587daf02ebc7d61
|
||||
generated: "2025-07-02T17:38:10.089494+08:00"
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
apiVersion: v2
|
||||
appVersion: 2.1.4
|
||||
appVersion: 2.1.5
|
||||
description: Helm chart for deploying Higress gateways
|
||||
icon: https://higress.io/img/higress_logo_small.png
|
||||
home: http://higress.io/
|
||||
@@ -12,9 +12,9 @@ sources:
|
||||
dependencies:
|
||||
- name: higress-core
|
||||
repository: "file://../core"
|
||||
version: 2.1.4
|
||||
version: 2.1.5
|
||||
- name: higress-console
|
||||
repository: "https://higress.io/helm-charts/"
|
||||
version: 2.1.4
|
||||
version: 2.1.5
|
||||
type: application
|
||||
version: 2.1.4
|
||||
version: 2.1.5
|
||||
|
||||
@@ -165,6 +165,7 @@ The command removes all the Kubernetes components associated with the chart and
|
||||
| global.enableIPv6 | bool | `false` | |
|
||||
| global.enableIstioAPI | bool | `true` | If true, Higress Controller will monitor istio resources as well |
|
||||
| global.enableLDSCache | bool | `false` | |
|
||||
| global.enablePluginServer | bool | `false` | |
|
||||
| global.enableProxyProtocol | bool | `false` | |
|
||||
| global.enablePushAllMCPClusters | bool | `true` | |
|
||||
| global.enableRedis | bool | `false` | Whether to enable Redis(redis-stack-server) for Higress, default is false. |
|
||||
@@ -273,6 +274,19 @@ The command removes all the Kubernetes components associated with the chart and
|
||||
| pilot.serviceAnnotations | object | `{}` | |
|
||||
| pilot.tag | string | `""` | |
|
||||
| pilot.traceSampling | float | `1` | |
|
||||
| pluginServer.hub | string | `"higress-registry.cn-hangzhou.cr.aliyuncs.com/higress"` | |
|
||||
| pluginServer.image | string | `"plugin-server"` | |
|
||||
| pluginServer.imagePullSecrets | list | `[]` | |
|
||||
| pluginServer.labels | object | `{}` | |
|
||||
| pluginServer.name | string | `"higress-plugin-server"` | |
|
||||
| pluginServer.podLabels | object | `{}` | Labels to apply to the pod |
|
||||
| pluginServer.replicas | int | `2` | Number of Higress Plugin Server pods, 2 recommended for high availability |
|
||||
| pluginServer.resources.limits.cpu | string | `"500m"` | |
|
||||
| pluginServer.resources.limits.memory | string | `"256Mi"` | |
|
||||
| pluginServer.resources.requests.cpu | string | `"200m"` | |
|
||||
| pluginServer.resources.requests.memory | string | `"128Mi"` | |
|
||||
| pluginServer.service.port | int | `80` | |
|
||||
| pluginServer.tag | string | `""` | |
|
||||
| redis.redis.affinity | object | `{}` | Affinity for Redis |
|
||||
| redis.redis.image | string | `"redis-stack-server"` | Specify the image |
|
||||
| redis.redis.name | string | `"redis-stack-server"` | |
|
||||
@@ -292,7 +306,5 @@ The command removes all the Kubernetes components associated with the chart and
|
||||
| revision | string | `""` | |
|
||||
| tracing.enable | bool | `false` | |
|
||||
| tracing.sampling | int | `100` | |
|
||||
| tracing.skywalking.port | int | `11800` | |
|
||||
| tracing.skywalking.service | string | `""` | |
|
||||
| tracing.timeout | int | `500` | |
|
||||
| upstream | object | `{"connectionBufferLimits":10485760,"idleTimeout":10}` | Upstream config settings |
|
||||
Submodule istio/istio updated: 1492505b14...1c41de65e9
@@ -817,19 +817,23 @@ func (m *IngressConfig) convertDestinationRule(configs []common.WrapperConfig) [
|
||||
destinationRuleWrapper.DestinationRule.TrafficPolicy.Tls != nil {
|
||||
dr.DestinationRule.TrafficPolicy.Tls = destinationRuleWrapper.DestinationRule.TrafficPolicy.Tls
|
||||
}
|
||||
portTrafficPolicy := destinationRuleWrapper.DestinationRule.TrafficPolicy.PortLevelSettings[0]
|
||||
portUpdated := false
|
||||
for _, policy := range dr.DestinationRule.TrafficPolicy.PortLevelSettings {
|
||||
if policy.Port.Number == portTrafficPolicy.Port.Number {
|
||||
policy.Tls = portTrafficPolicy.Tls
|
||||
portUpdated = true
|
||||
break
|
||||
// Directly inherit or override the port policy (if it exists)
|
||||
if len(destinationRuleWrapper.DestinationRule.TrafficPolicy.PortLevelSettings) > 0 {
|
||||
portTrafficPolicy := destinationRuleWrapper.DestinationRule.TrafficPolicy.PortLevelSettings[0]
|
||||
portUpdated := false
|
||||
for _, policy := range dr.DestinationRule.TrafficPolicy.PortLevelSettings {
|
||||
if policy.Port.Number == portTrafficPolicy.Port.Number {
|
||||
policy.Tls = portTrafficPolicy.Tls
|
||||
policy.LoadBalancer = portTrafficPolicy.LoadBalancer
|
||||
portUpdated = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if portUpdated {
|
||||
continue
|
||||
}
|
||||
dr.DestinationRule.TrafficPolicy.PortLevelSettings = append(dr.DestinationRule.TrafficPolicy.PortLevelSettings, portTrafficPolicy)
|
||||
}
|
||||
if portUpdated {
|
||||
continue
|
||||
}
|
||||
dr.DestinationRule.TrafficPolicy.PortLevelSettings = append(dr.DestinationRule.TrafficPolicy.PortLevelSettings, portTrafficPolicy)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -25,6 +25,14 @@ type DBClient struct {
|
||||
panicCount int32 // Add panic counter
|
||||
}
|
||||
|
||||
// supports database types
|
||||
const (
|
||||
MYSQL = "mysql"
|
||||
POSTGRES = "postgres"
|
||||
CLICKHOUSE = "clickhouse"
|
||||
SQLITE = "sqlite"
|
||||
)
|
||||
|
||||
// NewDBClient creates a new DBClient instance and establishes a connection to the database
|
||||
func NewDBClient(dsn string, dbType string, stop chan struct{}) *DBClient {
|
||||
client := &DBClient{
|
||||
@@ -53,13 +61,13 @@ func (c *DBClient) connect() error {
|
||||
}
|
||||
|
||||
switch c.dbType {
|
||||
case "postgres":
|
||||
case POSTGRES:
|
||||
db, err = gorm.Open(postgres.Open(c.dsn), &gormConfig)
|
||||
case "clickhouse":
|
||||
case CLICKHOUSE:
|
||||
db, err = gorm.Open(clickhouse.Open(c.dsn), &gormConfig)
|
||||
case "mysql":
|
||||
case MYSQL:
|
||||
db, err = gorm.Open(mysql.Open(c.dsn), &gormConfig)
|
||||
case "sqlite":
|
||||
case SQLITE:
|
||||
db, err = gorm.Open(sqlite.Open(c.dsn), &gormConfig)
|
||||
default:
|
||||
return fmt.Errorf("unsupported database type %s", c.dbType)
|
||||
@@ -125,25 +133,166 @@ func (c *DBClient) reconnectLoop() {
|
||||
}
|
||||
}
|
||||
|
||||
// ExecuteSQL executes a raw SQL query and returns the result as a slice of maps
|
||||
func (c *DBClient) ExecuteSQL(query string, args ...interface{}) ([]map[string]interface{}, error) {
|
||||
func (c *DBClient) reconnectIfDbEmpty() error {
|
||||
if c.db == nil {
|
||||
// Trigger reconnection
|
||||
select {
|
||||
case c.reconnect <- struct{}{}:
|
||||
default:
|
||||
}
|
||||
return nil, fmt.Errorf("database is not connected, attempting to reconnect")
|
||||
return fmt.Errorf("database is not connected, attempting to reconnect")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
rows, err := c.db.Raw(query, args...).Rows()
|
||||
func (c *DBClient) handleSQLError(err error) error {
|
||||
if err != nil {
|
||||
// If execution fails, connection might be lost, trigger reconnection
|
||||
select {
|
||||
case c.reconnect <- struct{}{}:
|
||||
default:
|
||||
}
|
||||
return nil, fmt.Errorf("failed to execute SQL query: %w", err)
|
||||
return fmt.Errorf("failed to execute SQL: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// DescribeTable Get the structure of a specific table.
|
||||
func (c *DBClient) DescribeTable(table string) ([]map[string]interface{}, error) {
|
||||
var sql string
|
||||
var args []string
|
||||
switch c.dbType {
|
||||
case MYSQL:
|
||||
sql = `
|
||||
select
|
||||
column_name,
|
||||
column_type,
|
||||
is_nullable,
|
||||
column_key,
|
||||
column_default,
|
||||
extra,
|
||||
column_comment
|
||||
from information_schema.columns
|
||||
where table_schema = database() and table_name = ?
|
||||
`
|
||||
args = []string{table}
|
||||
|
||||
case POSTGRES:
|
||||
sql = `
|
||||
select
|
||||
column_name,
|
||||
data_type as column_type,
|
||||
is_nullable,
|
||||
case
|
||||
when column_default like 'nextval%%' then 'auto_increment'
|
||||
when column_default is not null then 'default'
|
||||
else ''
|
||||
end as column_key,
|
||||
column_default,
|
||||
case
|
||||
when column_default like 'nextval%%' then 'auto_increment'
|
||||
else ''
|
||||
end as extra,
|
||||
col_description((select oid from pg_class where relname = ?), ordinal_position) as column_comment
|
||||
from information_schema.columns
|
||||
where table_name = ?
|
||||
`
|
||||
args = []string{table, table}
|
||||
|
||||
case CLICKHOUSE:
|
||||
sql = `
|
||||
select
|
||||
name as column_name,
|
||||
type as column_type,
|
||||
if(is_nullable, 'YES', 'NO') as is_nullable,
|
||||
default_kind as column_key,
|
||||
default_expression as column_default,
|
||||
default_kind as extra,
|
||||
comment as column_comment
|
||||
from system.columns
|
||||
where database = currentDatabase() and table = ?
|
||||
`
|
||||
args = []string{table}
|
||||
|
||||
case SQLITE:
|
||||
sql = `
|
||||
select
|
||||
name as column_name,
|
||||
type as column_type,
|
||||
not (notnull = 1) as is_nullable,
|
||||
pk as column_key,
|
||||
dflt_value as column_default,
|
||||
'' as extra,
|
||||
'' as column_comment
|
||||
from pragma_table_info(?)
|
||||
`
|
||||
args = []string{table}
|
||||
|
||||
default:
|
||||
return nil, fmt.Errorf("unsupported database type: %s", c.dbType)
|
||||
}
|
||||
|
||||
return c.Query(sql, args)
|
||||
}
|
||||
|
||||
// ListTables List all tables in the connected database.
|
||||
func (c *DBClient) ListTables() ([]string, error) {
|
||||
var sql string
|
||||
switch c.dbType {
|
||||
case MYSQL:
|
||||
sql = "show tables"
|
||||
case POSTGRES:
|
||||
sql = "select tablename from pg_tables where schemaname = 'public'"
|
||||
case CLICKHOUSE:
|
||||
sql = "select name from system.tables where database = currentDatabase()"
|
||||
case SQLITE:
|
||||
sql = "select name from sqlite_master where type='table'"
|
||||
default:
|
||||
return nil, fmt.Errorf("unsupported database type: %s", c.dbType)
|
||||
}
|
||||
|
||||
rows, err := c.db.Raw(sql).Rows()
|
||||
if err := c.handleSQLError(err); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
var tables []string
|
||||
for rows.Next() {
|
||||
var table string
|
||||
if err := rows.Scan(&table); err != nil {
|
||||
return nil, fmt.Errorf("failed to scan table name: %w", err)
|
||||
}
|
||||
tables = append(tables, table)
|
||||
}
|
||||
|
||||
return tables, nil
|
||||
}
|
||||
|
||||
// Execute executes an INSERT, UPDATE, or DELETE raw SQL and returns the rows affected
|
||||
func (c *DBClient) Execute(sql string, args ...interface{}) (int64, error) {
|
||||
if err := c.reconnectIfDbEmpty(); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
tx := c.db.Exec(sql, args...)
|
||||
if err := c.handleSQLError(tx.Error); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
defer tx.Commit()
|
||||
|
||||
return tx.RowsAffected, nil
|
||||
}
|
||||
|
||||
// Query executes a raw SQL query and returns the result as a slice of maps
|
||||
func (c *DBClient) Query(sql string, args ...interface{}) ([]map[string]interface{}, error) {
|
||||
if err := c.reconnectIfDbEmpty(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
rows, err := c.db.Raw(sql, args...).Rows()
|
||||
if err := c.handleSQLError(err); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
|
||||
@@ -49,11 +49,24 @@ func (c *DBConfig) NewServer(serverName string) (*common.MCPServer, error) {
|
||||
)
|
||||
|
||||
dbClient := NewDBClient(c.dsn, c.dbType, mcpServer.GetDestoryChannel())
|
||||
descriptionSuffix := fmt.Sprintf("in database %s. Database description: %s", c.dbType, c.description)
|
||||
// Add query tool
|
||||
mcpServer.AddTool(
|
||||
mcp.NewToolWithRawSchema("query", fmt.Sprintf("Run a read-only SQL query in database %s. Database description: %s", c.dbType, c.description), GetQueryToolSchema()),
|
||||
mcp.NewToolWithRawSchema("query", fmt.Sprintf("Run a read-only SQL query %s", descriptionSuffix), GetQueryToolSchema()),
|
||||
HandleQueryTool(dbClient),
|
||||
)
|
||||
mcpServer.AddTool(
|
||||
mcp.NewToolWithRawSchema("execute", fmt.Sprintf("Execute an insert, update, or delete SQL %s", descriptionSuffix), GetExecuteToolSchema()),
|
||||
HandleExecuteTool(dbClient),
|
||||
)
|
||||
mcpServer.AddTool(
|
||||
mcp.NewToolWithRawSchema("list tables", fmt.Sprintf("List all tables %s", descriptionSuffix), GetListTablesToolSchema()),
|
||||
HandleListTablesTool(dbClient),
|
||||
)
|
||||
mcpServer.AddTool(
|
||||
mcp.NewToolWithRawSchema("describe table", fmt.Sprintf("Get the structure of a specific table %s", descriptionSuffix), GetDescribeTableToolSchema()),
|
||||
HandleDescribeTableTool(dbClient),
|
||||
)
|
||||
|
||||
return mcpServer, nil
|
||||
}
|
||||
|
||||
@@ -18,27 +18,80 @@ func HandleQueryTool(dbClient *DBClient) common.ToolHandlerFunc {
|
||||
return nil, fmt.Errorf("invalid message argument")
|
||||
}
|
||||
|
||||
results, err := dbClient.ExecuteSQL(message)
|
||||
results, err := dbClient.Query(message)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to execute SQL query: %w", err)
|
||||
}
|
||||
|
||||
jsonData, err := json.Marshal(results)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to marshal SQL results: %w", err)
|
||||
return buildCallToolResult(results)
|
||||
}
|
||||
}
|
||||
|
||||
// HandleExecuteTool handles SQL INSERT, UPDATE, or DELETE execution
|
||||
func HandleExecuteTool(dbClient *DBClient) common.ToolHandlerFunc {
|
||||
return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
|
||||
arguments := request.Params.Arguments
|
||||
message, ok := arguments["sql"].(string)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("invalid message argument")
|
||||
}
|
||||
|
||||
return &mcp.CallToolResult{
|
||||
Content: []mcp.Content{
|
||||
mcp.TextContent{
|
||||
Type: "text",
|
||||
Text: string(jsonData),
|
||||
},
|
||||
},
|
||||
}, nil
|
||||
results, err := dbClient.Execute(message)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to execute SQL query: %w", err)
|
||||
}
|
||||
|
||||
return buildCallToolResult(results)
|
||||
}
|
||||
}
|
||||
|
||||
// HandleListTablesTool handles list all tables
|
||||
func HandleListTablesTool(dbClient *DBClient) common.ToolHandlerFunc {
|
||||
return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
|
||||
results, err := dbClient.ListTables()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to execute SQL query: %w", err)
|
||||
}
|
||||
|
||||
return buildCallToolResult(results)
|
||||
}
|
||||
}
|
||||
|
||||
// HandleDescribeTableTool handles describe table
|
||||
func HandleDescribeTableTool(dbClient *DBClient) common.ToolHandlerFunc {
|
||||
return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
|
||||
arguments := request.Params.Arguments
|
||||
message, ok := arguments["table"].(string)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("invalid message argument")
|
||||
}
|
||||
|
||||
results, err := dbClient.DescribeTable(message)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to execute SQL query: %w", err)
|
||||
}
|
||||
|
||||
return buildCallToolResult(results)
|
||||
}
|
||||
}
|
||||
|
||||
// buildCallToolResult builds the call tool result
|
||||
func buildCallToolResult(results any) (*mcp.CallToolResult, error) {
|
||||
jsonData, err := json.Marshal(results)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to marshal SQL results: %w", err)
|
||||
}
|
||||
|
||||
return &mcp.CallToolResult{
|
||||
Content: []mcp.Content{
|
||||
mcp.TextContent{
|
||||
Type: "text",
|
||||
Text: string(jsonData),
|
||||
},
|
||||
},
|
||||
}, nil
|
||||
}
|
||||
|
||||
// GetQueryToolSchema returns the schema for query tool
|
||||
func GetQueryToolSchema() json.RawMessage {
|
||||
return json.RawMessage(`
|
||||
@@ -53,3 +106,44 @@ func GetQueryToolSchema() json.RawMessage {
|
||||
}
|
||||
`)
|
||||
}
|
||||
|
||||
// GetExecuteToolSchema returns the schema for execute tool
|
||||
func GetExecuteToolSchema() json.RawMessage {
|
||||
return json.RawMessage(`
|
||||
{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"sql": {
|
||||
"type": "string",
|
||||
"description": "The sql to execute"
|
||||
}
|
||||
}
|
||||
}
|
||||
`)
|
||||
}
|
||||
|
||||
// GetDescribeTableToolSchema returns the schema for DescribeTable tool
|
||||
func GetDescribeTableToolSchema() json.RawMessage {
|
||||
return json.RawMessage(`
|
||||
{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"table": {
|
||||
"type": "string",
|
||||
"description": "table name"
|
||||
}
|
||||
}
|
||||
}
|
||||
`)
|
||||
}
|
||||
|
||||
// GetListTablesToolSchema returns the schema for ListTables tool
|
||||
func GetListTablesToolSchema() json.RawMessage {
|
||||
return json.RawMessage(`
|
||||
{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
}
|
||||
}
|
||||
`)
|
||||
}
|
||||
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
@@ -94,13 +95,15 @@ func (s *SSEServer) HandleSSE(cb api.FilterCallbackHandler, stopChan chan struct
|
||||
defer s.sessions.Delete(sessionID)
|
||||
|
||||
channel := GetSSEChannelName(sessionID)
|
||||
u, err := url.Parse(s.baseURL + s.messageEndpoint)
|
||||
if err != nil {
|
||||
api.LogErrorf("Failed to parse base URL: %v", err)
|
||||
}
|
||||
|
||||
messageEndpoint := fmt.Sprintf(
|
||||
"%s%s?sessionId=%s",
|
||||
s.baseURL,
|
||||
s.messageEndpoint,
|
||||
sessionID,
|
||||
)
|
||||
q := u.Query()
|
||||
q.Set("sessionId", sessionID)
|
||||
u.RawQuery = q.Encode()
|
||||
messageEndpoint := u.String()
|
||||
|
||||
// go func() {
|
||||
// for {
|
||||
@@ -126,7 +129,7 @@ func (s *SSEServer) HandleSSE(cb api.FilterCallbackHandler, stopChan chan struct
|
||||
// }
|
||||
// }()
|
||||
|
||||
err := s.redisClient.Subscribe(channel, stopChan, func(message string) {
|
||||
err = s.redisClient.Subscribe(channel, stopChan, func(message string) {
|
||||
defer cb.EncoderFilterCallbacks().RecoverPanic()
|
||||
api.LogDebugf("SSE Send message: %s", message)
|
||||
cb.EncoderFilterCallbacks().InjectData([]byte(message))
|
||||
@@ -136,7 +139,7 @@ func (s *SSEServer) HandleSSE(cb api.FilterCallbackHandler, stopChan chan struct
|
||||
}
|
||||
|
||||
// Send the initial endpoint event
|
||||
initialEvent := fmt.Sprintf("event: endpoint\ndata: %s\r\n\r\n", messageEndpoint)
|
||||
initialEvent := fmt.Sprintf("event: endpoint\ndata: %s\n\n", messageEndpoint)
|
||||
err = s.redisClient.Publish(channel, initialEvent)
|
||||
if err != nil {
|
||||
api.LogErrorf("Failed to send initial event: %v", err)
|
||||
@@ -210,7 +213,7 @@ func (s *SSEServer) HandleMessage(w http.ResponseWriter, r *http.Request, body j
|
||||
var status int
|
||||
// Only send response if there is one (not for notifications)
|
||||
if response != nil {
|
||||
if sessionID != ""{
|
||||
if sessionID != "" {
|
||||
w.WriteHeader(http.StatusAccepted)
|
||||
status = http.StatusAccepted
|
||||
} else {
|
||||
|
||||
@@ -129,9 +129,15 @@ func (f *filter) processMcpRequestHeadersForRestUpstream(header api.RequestHeade
|
||||
if method != http.MethodGet {
|
||||
f.callbacks.DecoderFilterCallbacks().SendLocalReply(http.StatusMethodNotAllowed, "Method not allowed", nil, 0, "")
|
||||
} else {
|
||||
// to support the query param in Message Endpoint
|
||||
trimmed := strings.TrimSuffix(requestUrl.Path, GlobalSSEPathSuffix)
|
||||
if rq := requestUrl.RawQuery; rq != "" {
|
||||
trimmed += "?" + rq
|
||||
}
|
||||
|
||||
f.config.defaultServer = common.NewSSEServer(common.NewMCPServer(DefaultServerName, Version),
|
||||
common.WithSSEEndpoint(GlobalSSEPathSuffix),
|
||||
common.WithMessageEndpoint(strings.TrimSuffix(requestUrl.Path, GlobalSSEPathSuffix)),
|
||||
common.WithMessageEndpoint(trimmed),
|
||||
common.WithRedisClient(f.config.redisClient))
|
||||
f.serverName = f.config.defaultServer.GetServerName()
|
||||
body := "SSE connection create"
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
| ----------- | --------------- | ----------------------- | ------ | ------------------------------------------- |
|
||||
| `modelKey` | string | 选填 | model | 请求body中model参数的位置 |
|
||||
| `modelMapping` | map of string | 选填 | - | AI 模型映射表,用于将请求中的模型名称映射为服务提供商支持模型名称。<br/>1. 支持前缀匹配。例如用 "gpt-3-*" 匹配所有名称以“gpt-3-”开头的模型;<br/>2. 支持使用 "*" 为键来配置通用兜底映射关系;<br/>3. 如果映射的目标名称为空字符串 "",则表示保留原模型名称。 |
|
||||
| `enableOnPathSuffix` | array of string | 选填 | ["/v1/chat/completions"] | 只对这些特定路径后缀的请求生效 |
|
||||
| `enableOnPathSuffix` | array of string | 选填 | ["/completions","/embeddings","/images/generations","/audio/speech","/fine_tuning/jobs","/moderations","/image-synthesis","/video-synthesis"] | 只对这些特定路径后缀的请求生效|
|
||||
|
||||
|
||||
## 效果说明
|
||||
|
||||
@@ -7,7 +7,7 @@ The `model-mapper` plugin implements the functionality of routing based on the m
|
||||
| ----------- | --------------- | ----------------------- | ------ | ------------------------------------------- |
|
||||
| `modelKey` | string | Optional | model | The location of the model parameter in the request body. |
|
||||
| `modelMapping` | map of string | Optional | - | AI model mapping table, used to map the model names in the request to the model names supported by the service provider.<br/>1. Supports prefix matching. For example, use "gpt-3-*" to match all models whose names start with “gpt-3-”;<br/>2. Supports using "*" as the key to configure a generic fallback mapping relationship;<br/>3. If the target name in the mapping is an empty string "", it means to keep the original model name. |
|
||||
| `enableOnPathSuffix` | array of string | Optional | ["/v1/chat/completions"] | Only applies to requests with these specific path suffixes. |
|
||||
| `enableOnPathSuffix` | array of string | Optional | ["/completions","/embeddings","/images/generations","/audio/speech","/fine_tuning/jobs","/moderations","/image-synthesis","/video-synthesis"] | Only applies to requests with these specific path suffixes. |
|
||||
|
||||
## Runtime Properties
|
||||
|
||||
|
||||
@@ -43,7 +43,8 @@ struct ModelMapperConfigRule {
|
||||
std::string default_model_mapping_;
|
||||
std::vector<std::string> enable_on_path_suffix_ = {
|
||||
"/completions", "/embeddings", "/images/generations",
|
||||
"/audio/speech", "/fine_tuning/jobs", "/moderations"};
|
||||
"/audio/speech", "/fine_tuning/jobs", "/moderations",
|
||||
"/image-synthesis", "/video-synthesis"};
|
||||
};
|
||||
|
||||
// PluginRootContext is the root context for all streams processed by the
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
| `modelKey` | string | 选填 | model | 请求body中model参数的位置 |
|
||||
| `addProviderHeader` | string | 选填 | - | 从model参数中解析出的provider名字放到哪个请求header中 |
|
||||
| `modelToHeader` | string | 选填 | - | 直接将model参数放到哪个请求header中 |
|
||||
| `enableOnPathSuffix` | array of string | 选填 | ["/completions","/embeddings","/images/generations","/audio/speech","/fine_tuning/jobs","/moderations"] | 只对这些特定路径后缀的请求生效,可以配置为 "*" 以匹配所有路径 |
|
||||
| `enableOnPathSuffix` | array of string | 选填 | ["/completions","/embeddings","/images/generations","/audio/speech","/fine_tuning/jobs","/moderations","/image-synthesis","/video-synthesis"] | 只对这些特定路径后缀的请求生效,可以配置为 "*" 以匹配所有路径 |
|
||||
|
||||
## 运行属性
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@ The `model-router` plugin implements routing functionality based on the model pa
|
||||
| `modelKey` | string | Optional | model | Location of the model parameter in the request body |
|
||||
| `addProviderHeader` | string | Optional | - | Which request header to add the provider name parsed from the model parameter |
|
||||
| `modelToHeader` | string | Optional | - | Which request header to directly add the model parameter to |
|
||||
| `enableOnPathSuffix` | array of string | Optional | ["/v1/chat/completions"] | Only effective for requests with these specific path suffixes, can be configured as "*" to match all paths |
|
||||
| `enableOnPathSuffix` | array of string | Optional | ["/completions","/embeddings","/images/generations","/audio/speech","/fine_tuning/jobs","/moderations","/image-synthesis","/video-synthesis"] | Only effective for requests with these specific path suffixes, can be configured as "*" to match all paths |
|
||||
|
||||
## Runtime Properties
|
||||
|
||||
|
||||
@@ -49,7 +49,8 @@ struct ModelRouterConfigRule {
|
||||
std::string model_to_header_;
|
||||
std::vector<std::string> enable_on_path_suffix_ = {
|
||||
"/completions", "/embeddings", "/images/generations",
|
||||
"/audio/speech", "/fine_tuning/jobs", "/moderations"};
|
||||
"/audio/speech", "/fine_tuning/jobs", "/moderations",
|
||||
"/image-synthesis", "/video-synthesis"};
|
||||
};
|
||||
|
||||
class PluginContext;
|
||||
|
||||
98
plugins/wasm-go/extensions/ai-image-reader/README.md
Normal file
98
plugins/wasm-go/extensions/ai-image-reader/README.md
Normal file
@@ -0,0 +1,98 @@
|
||||
---
|
||||
title: AI IMAGE READER
|
||||
keywords: [ AI网关, AI IMAGE READER ]
|
||||
description: AI IMAGE READER 插件配置参考
|
||||
|
||||
---
|
||||
|
||||
## 功能说明
|
||||
|
||||
通过对接OCR服务实现AI-IMAGE-READER,目前支持阿里云模型服务灵积(dashscope)的qwen-vl-ocr模型提供OCR服务,流程如图所示:
|
||||
|
||||
<img src=".\ai-image-reader.png">
|
||||
|
||||
## 运行属性
|
||||
|
||||
插件执行阶段:`默认阶段`
|
||||
插件执行优先级:`400`
|
||||
|
||||
|
||||
## 配置说明
|
||||
|
||||
| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
|
||||
| ------------- | -------- | -------- | ------ | -------------------------------------- |
|
||||
| `apiKey` | string | 必填 | - | 用于在访问OCR服务时进行认证的令牌。 |
|
||||
| `type` | string | 必填 | - | 后端OCR服务提供商类型(例如dashscope) |
|
||||
| `serviceHost` | string | 必填 | - | 后端OCR服务域名 |
|
||||
| `serviceName` | string | 必填 | - | 后端OCR服务名 |
|
||||
| `servicePort` | int | 必填 | - | 后端OCR服务端口 |
|
||||
| `model` | string | 必填 | - | 后端OCR服务模型名称(例如qwen-vl-ocr) |
|
||||
| `timeout` | int | 选填 | 10000 | API调用超时时间(毫秒) |
|
||||
|
||||
## 示例
|
||||
|
||||
```yaml
|
||||
"apiKey": "YOUR_API_KEY",
|
||||
"type": "dashscope",
|
||||
"model": "qwen-vl-ocr",
|
||||
"timeout": 10000,
|
||||
"serviceHost": "dashscope.aliyuncs.com",
|
||||
"serviceName": "dashscope",
|
||||
"servicePort": "443"
|
||||
```
|
||||
|
||||
请求遵循openai api协议规范:
|
||||
|
||||
URL传递图片:
|
||||
|
||||
```
|
||||
messages=[{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": "What's in this image?"},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20241108/ctdzex/biaozhun.jpg",
|
||||
},
|
||||
},
|
||||
],
|
||||
}],
|
||||
```
|
||||
|
||||
Base64编码传递图片:
|
||||
|
||||
```
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{ "type": "text", "text": "what's in this image?" },
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": f"data:image/jpeg;base64,{base64_image}",
|
||||
},
|
||||
},
|
||||
],
|
||||
}
|
||||
],
|
||||
```
|
||||
|
||||
以下为使用ai-image-reader进行增强的例子,原始请求为:
|
||||
|
||||
```
|
||||
图片内容是什么?
|
||||
```
|
||||
|
||||
未经过ai-image-reader插件处理LLM返回的结果为:
|
||||
|
||||
```
|
||||
对不起,作为一个文本AI助手,我无法查看图片内容。您可以描述一下图片的内容,我可以尽力帮助您识别。
|
||||
```
|
||||
|
||||
经过ai-image-reader插件处理后LLM返回的结果为:
|
||||
|
||||
```
|
||||
非常感谢您分享的图片内容!根据您提供的文字信息,学习编写shell脚本对Linux系统管理员来说是非常有益的。通过自动化系统管理任务,可以提高效率并减少手动操作的时间。对于家用Linux爱好者来说,了解如何在命令行下操作也是很重要的,因为在某些情况下,命令行操作可能更为便捷和高效。在本书中,您将学习如何运用shell脚本处理系统管理任务,以及如何在Linux命令行下进行操作。希望这本书能够帮助您更好地理解和应用Linux系统管理和操作的知识!如果您有任何其他问题或需要进一步帮助,请随时告诉我。
|
||||
```
|
||||
94
plugins/wasm-go/extensions/ai-image-reader/README_EN.md
Normal file
94
plugins/wasm-go/extensions/ai-image-reader/README_EN.md
Normal file
@@ -0,0 +1,94 @@
|
||||
---
|
||||
title: AI IMAGE READER
|
||||
keywords: [ AI GATEWAY, AI IMAGE READER ]
|
||||
description: AI IMAGE READER Plugin Configuration Reference
|
||||
---
|
||||
|
||||
## Function Description
|
||||
|
||||
By integrating with OCR services to implement AI-IMAGE-READER, currently, it supports Alibaba Cloud's qwen-vl-ocr model under Dashscope for OCR services, and the process is shown in the figure below:<img src=".\ai-image-reader-en.png">
|
||||
|
||||
## Running Attributes
|
||||
|
||||
Plugin execution phase:`Default Phase`
|
||||
Plugin execution priority:`400`
|
||||
|
||||
## Configuration Description
|
||||
|
||||
| Name | Data Type | Requirement | Default Value | Description |
|
||||
| ------------- | --------- | ----------- | ------------- | ------------------------------------------------------------ |
|
||||
| `apiKey` | string | Required | - | Token for authenticating access to OCR services. |
|
||||
| `type` | string | Required | - | Provider type of the backend OCR service type(e.g. dashscope). |
|
||||
| `serviceHost` | string | Required | - | Host of the backend OCR service. |
|
||||
| `serviceName` | string | Required | - | Name of the backend OCR service. |
|
||||
| `servicePort` | int | Required | - | Port of the backend OCR service. |
|
||||
| `model` | string | Required | - | Model name of the backend OCR service (e.g., qwen-vl-ocr). |
|
||||
| `timeout` | int | Required | 10000 | API call timeout duration (milliseconds). |
|
||||
|
||||
## Example
|
||||
|
||||
```yaml
|
||||
"apiKey": "YOUR_API_KEY",
|
||||
"type": "dashscope",
|
||||
"model": "qwen-vl-ocr",
|
||||
"timeout": 10000,
|
||||
"serviceHost": "dashscope.aliyuncs.com",
|
||||
"serviceName": "dashscope",
|
||||
"servicePort": "443"
|
||||
```
|
||||
|
||||
Request to follow the OpenAI API protocol specifications:
|
||||
|
||||
Pass images via URL:
|
||||
|
||||
```
|
||||
messages=[{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": "What's in this image?"},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20241108/ctdzex/biaozhun.jpg",
|
||||
},
|
||||
},
|
||||
],
|
||||
}],
|
||||
```
|
||||
|
||||
Pass images via Base64:
|
||||
|
||||
```
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{ "type": "text", "text": "what's in this image?" },
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": f"data:image/jpeg;base64,{base64_image}",
|
||||
},
|
||||
},
|
||||
],
|
||||
}
|
||||
],
|
||||
```
|
||||
|
||||
The following is an example of using ai-image-reader for enhancement. The original request was:
|
||||
|
||||
```
|
||||
What is the content of the image?
|
||||
```
|
||||
|
||||
The result returned by the LLM without processing from the ai-image-reader plugin is:
|
||||
|
||||
```
|
||||
Sorry, as a text-based AI assistant, I cannot view image content. You can describe the content of the image, and I will do my best to help you identify it.
|
||||
```
|
||||
|
||||
The result returned by the LLM after processing by the ai-image-reader plugin is:
|
||||
|
||||
```
|
||||
Thank you for sharing the image! Mastering shell scripting is highly beneficial for Linux system administrators as it automates tasks, boosts efficiency, and cuts down manual work. For home Linux users, command-line skills are equally important for quick and efficient operations. This book will teach you to handle system management tasks with shell scripts and operate in the Linux command line. Hope it aids your Linux system management learning! Feel free to ask if you have more questions.
|
||||
```
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 30 KiB |
BIN
plugins/wasm-go/extensions/ai-image-reader/ai-image-reader.png
Normal file
BIN
plugins/wasm-go/extensions/ai-image-reader/ai-image-reader.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 24 KiB |
177
plugins/wasm-go/extensions/ai-image-reader/dashscope.go
Normal file
177
plugins/wasm-go/extensions/ai-image-reader/dashscope.go
Normal file
@@ -0,0 +1,177 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"github.com/alibaba/higress/plugins/wasm-go/pkg/log"
|
||||
"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
|
||||
"github.com/tidwall/gjson"
|
||||
"net/http"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
const (
|
||||
DashscopeDomain = "dashscope.aliyuncs.com"
|
||||
DashscopePort = 443
|
||||
DashscopeDefaultModelName = "qwen-vl-ocr"
|
||||
DashscopeEndpoint = "/compatible-mode/v1/chat/completions"
|
||||
MinPixels = 3136
|
||||
MaxPixels = 1003520
|
||||
)
|
||||
|
||||
type OcrReq struct {
|
||||
Model string `json:"model,omitempty"`
|
||||
Messages []chatMessage `json:"messages,omitempty"`
|
||||
}
|
||||
|
||||
type OcrResp struct {
|
||||
Choices []chatCompletionChoice `json:"choices"`
|
||||
}
|
||||
|
||||
type chatCompletionChoice struct {
|
||||
Message *chatMessageContent `json:"message,omitempty"`
|
||||
}
|
||||
|
||||
type chatMessageContent struct {
|
||||
Role string `json:"role,omitempty"`
|
||||
Content string `json:"content,omitempty"`
|
||||
}
|
||||
|
||||
type chatMessage struct {
|
||||
Role string `json:"role"`
|
||||
Content []content `json:"content"`
|
||||
}
|
||||
|
||||
type imageURL struct {
|
||||
URL string `json:"url"`
|
||||
}
|
||||
|
||||
type content struct {
|
||||
Type string `json:"type"`
|
||||
ImageUrl imageURL `json:"image_url,omitempty"`
|
||||
MinPixels int `json:"min_pixels,omitempty"`
|
||||
MaxPixels int `json:"max_pixels,omitempty"`
|
||||
Text string `json:"text,omitempty"`
|
||||
}
|
||||
|
||||
var dashScopeConfig dashScopeProviderConfig
|
||||
|
||||
type dashScopeProviderInitializer struct {
|
||||
}
|
||||
|
||||
func (d *dashScopeProviderInitializer) InitConfig(json gjson.Result) {
|
||||
dashScopeConfig.apiKey = json.Get("apiKey").String()
|
||||
}
|
||||
|
||||
func (d *dashScopeProviderInitializer) ValidateConfig() error {
|
||||
if dashScopeConfig.apiKey == "" {
|
||||
return errors.New("[DashScope] apiKey is required")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *dashScopeProviderInitializer) CreateProvider(c ProviderConfig) (Provider, error) {
|
||||
if c.servicePort == 0 {
|
||||
c.servicePort = DashscopePort
|
||||
}
|
||||
if c.serviceHost == "" {
|
||||
c.serviceHost = DashscopeDomain
|
||||
}
|
||||
return &DSProvider{
|
||||
config: c,
|
||||
client: wrapper.NewClusterClient(wrapper.FQDNCluster{
|
||||
FQDN: c.serviceName,
|
||||
Host: c.serviceHost,
|
||||
Port: int64(c.servicePort),
|
||||
}),
|
||||
}, nil
|
||||
}
|
||||
|
||||
type dashScopeProviderConfig struct {
|
||||
// @Title zh-CN 文字识别服务 API Key
|
||||
// @Description zh-CN 文字识别服务 API Key
|
||||
apiKey string
|
||||
}
|
||||
|
||||
type DSProvider struct {
|
||||
config ProviderConfig
|
||||
client wrapper.HttpClient
|
||||
}
|
||||
|
||||
func (d *DSProvider) GetProviderType() string {
|
||||
return ProviderTypeDashscope
|
||||
}
|
||||
|
||||
func (d *DSProvider) CallArgs(imageUrl string) CallArgs {
|
||||
model := d.config.model
|
||||
if model == "" {
|
||||
model = DashscopeDefaultModelName
|
||||
}
|
||||
reqBody := OcrReq{
|
||||
Model: model,
|
||||
Messages: []chatMessage{
|
||||
{
|
||||
Role: "user",
|
||||
Content: []content{
|
||||
{
|
||||
Type: "image_url",
|
||||
ImageUrl: imageURL{
|
||||
URL: imageUrl,
|
||||
},
|
||||
MinPixels: MinPixels,
|
||||
MaxPixels: MaxPixels,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
body, _ := json.Marshal(reqBody)
|
||||
return CallArgs{
|
||||
Method: http.MethodPost,
|
||||
Url: DashscopeEndpoint,
|
||||
Headers: [][2]string{
|
||||
{"Content-Type", "application/json"},
|
||||
{"Authorization", fmt.Sprintf("Bearer %s", dashScopeConfig.apiKey)},
|
||||
},
|
||||
Body: body,
|
||||
TimeoutMillisecond: d.config.timeout,
|
||||
}
|
||||
}
|
||||
|
||||
func (d *DSProvider) parseOcrResponse(responseBody []byte) (*OcrResp, error) {
|
||||
var resp OcrResp
|
||||
err := json.Unmarshal(responseBody, &resp)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &resp, nil
|
||||
}
|
||||
|
||||
func (d *DSProvider) DoOCR(
|
||||
imageUrl string,
|
||||
callback func(imageContent string, err error)) error {
|
||||
args := d.CallArgs(imageUrl)
|
||||
err := d.client.Call(args.Method, args.Url, args.Headers, args.Body,
|
||||
func(statusCode int, responseHeaders http.Header, responseBody []byte) {
|
||||
if statusCode != http.StatusOK {
|
||||
err := errors.New("failed to do ocr due to status code: " + strconv.Itoa(statusCode))
|
||||
callback("", err)
|
||||
return
|
||||
}
|
||||
log.Debugf("do ocr response: %d, %s", statusCode, responseBody)
|
||||
resp, err := d.parseOcrResponse(responseBody)
|
||||
if err != nil {
|
||||
err = fmt.Errorf("failed to parse response: %v", err)
|
||||
callback("", err)
|
||||
return
|
||||
}
|
||||
if len(resp.Choices) == 0 {
|
||||
err = errors.New("no ocr response found")
|
||||
callback("", err)
|
||||
return
|
||||
}
|
||||
callback(resp.Choices[0].Message.Content, nil)
|
||||
}, args.TimeoutMillisecond)
|
||||
return err
|
||||
}
|
||||
19
plugins/wasm-go/extensions/ai-image-reader/go.mod
Normal file
19
plugins/wasm-go/extensions/ai-image-reader/go.mod
Normal file
@@ -0,0 +1,19 @@
|
||||
module ai-image-reader
|
||||
|
||||
go 1.19
|
||||
|
||||
require (
|
||||
github.com/alibaba/higress/plugins/wasm-go v1.4.4-0.20250621002302-e94ac43dd15c
|
||||
github.com/tidwall/gjson v1.18.0
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/google/uuid v1.3.0 // indirect
|
||||
github.com/higress-group/nottinygc v0.0.0-20231101025119-e93c4c2f8520 // indirect
|
||||
github.com/higress-group/proxy-wasm-go-sdk v1.0.1 // indirect
|
||||
github.com/magefile/mage v1.14.0 // indirect
|
||||
github.com/tidwall/match v1.1.1 // indirect
|
||||
github.com/tidwall/pretty v1.2.0 // indirect
|
||||
github.com/tidwall/resp v0.1.1 // indirect
|
||||
github.com/tidwall/sjson v1.2.5 // indirect
|
||||
)
|
||||
25
plugins/wasm-go/extensions/ai-image-reader/go.sum
Normal file
25
plugins/wasm-go/extensions/ai-image-reader/go.sum
Normal file
@@ -0,0 +1,25 @@
|
||||
github.com/alibaba/higress/plugins/wasm-go v1.4.4-0.20250621002302-e94ac43dd15c h1:YGKECMrlahN6dyEaM/S5NEU4IJoFzWKsHQyawov6ep8=
|
||||
github.com/alibaba/higress/plugins/wasm-go v1.4.4-0.20250621002302-e94ac43dd15c/go.mod h1:E2xVWrIovU3rZi4HGlMfcYf+c/UVh3aCtpcJlNjpxYc=
|
||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||
github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
|
||||
github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||
github.com/higress-group/nottinygc v0.0.0-20231101025119-e93c4c2f8520 h1:IHDghbGQ2DTIXHBHxWfqCYQW1fKjyJ/I7W1pMyUDeEA=
|
||||
github.com/higress-group/nottinygc v0.0.0-20231101025119-e93c4c2f8520/go.mod h1:Nz8ORLaFiLWotg6GeKlJMhv8cci8mM43uEnLA5t8iew=
|
||||
github.com/higress-group/proxy-wasm-go-sdk v1.0.1 h1:f9X4I5Y6jK3GrdsWn/lCTI1z5Lu5GOMazqQohAC3Vzk=
|
||||
github.com/higress-group/proxy-wasm-go-sdk v1.0.1/go.mod h1:iiSyFbo+rAtbtGt/bsefv8GU57h9CCLYGJA74/tF5/0=
|
||||
github.com/magefile/mage v1.14.0 h1:6QDX3g6z1YvJ4olPhT1wksUcSa/V0a1B+pJb73fBjyo=
|
||||
github.com/magefile/mage v1.14.0/go.mod h1:z5UZb/iS3GoOSn0JgWuiw7dxlurVYTu+/jHXqQg881A=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
|
||||
github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
|
||||
github.com/tidwall/gjson v1.18.0 h1:FIDeeyB800efLX89e5a8Y0BNH+LOngJyGrIWxG2FKQY=
|
||||
github.com/tidwall/gjson v1.18.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
|
||||
github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA=
|
||||
github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM=
|
||||
github.com/tidwall/pretty v1.2.0 h1:RWIZEg2iJ8/g6fDDYzMpobmaoGh5OLl4AXtGUGPcqCs=
|
||||
github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
|
||||
github.com/tidwall/resp v0.1.1 h1:Ly20wkhqKTmDUPlyM1S7pWo5kk0tDu8OoC/vFArXmwE=
|
||||
github.com/tidwall/resp v0.1.1/go.mod h1:3/FrruOBAxPTPtundW0VXgmsQ4ZBA0Aw714lVYgwFa0=
|
||||
github.com/tidwall/sjson v1.2.5 h1:kLy8mja+1c9jlljvWTlSazM7cKDRfJuR/bOJhcY5NcY=
|
||||
github.com/tidwall/sjson v1.2.5/go.mod h1:Fvgq9kS/6ociJEDnK0Fk1cpYF4FIW6ZF7LAe+6jwd28=
|
||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
139
plugins/wasm-go/extensions/ai-image-reader/main.go
Normal file
139
plugins/wasm-go/extensions/ai-image-reader/main.go
Normal file
@@ -0,0 +1,139 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"github.com/alibaba/higress/plugins/wasm-go/pkg/log"
|
||||
"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
|
||||
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
|
||||
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/sjson"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const (
|
||||
DefaultMaxBodyBytes uint32 = 100 * 1024 * 1024
|
||||
)
|
||||
|
||||
type Config struct {
|
||||
promptTemplate string
|
||||
ocrProvider Provider
|
||||
ocrProviderConfig *ProviderConfig
|
||||
}
|
||||
|
||||
func main() {
|
||||
wrapper.SetCtx(
|
||||
"ai-image-reader",
|
||||
wrapper.ParseConfig(parseConfig),
|
||||
wrapper.ProcessRequestHeaders(onHttpRequestHeaders),
|
||||
wrapper.ProcessRequestBody(onHttpRequestBody),
|
||||
)
|
||||
}
|
||||
|
||||
func parseConfig(json gjson.Result, config *Config) error {
|
||||
config.promptTemplate = `# 用户发送的图片解析得到的文字内容如下:
|
||||
{image_content}
|
||||
在回答时,请注意以下几点:
|
||||
- 请你回答问题时结合用户图片的文字内容回答。
|
||||
- 除非用户要求,否则你回答的语言需要和用户提问的语言保持一致。
|
||||
|
||||
# 用户消息为:
|
||||
{question}`
|
||||
config.ocrProviderConfig = &ProviderConfig{}
|
||||
config.ocrProviderConfig.FromJson(json)
|
||||
if err := config.ocrProviderConfig.Validate(); err != nil {
|
||||
return err
|
||||
}
|
||||
var err error
|
||||
config.ocrProvider, err = CreateProvider(*config.ocrProviderConfig)
|
||||
if err != nil {
|
||||
return errors.New("create ocr provider failed")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func onHttpRequestHeaders(ctx wrapper.HttpContext, config Config) types.Action {
|
||||
contentType, _ := proxywasm.GetHttpRequestHeader("content-type")
|
||||
if contentType == "" {
|
||||
return types.ActionContinue
|
||||
}
|
||||
if !strings.Contains(contentType, "application/json") {
|
||||
log.Warnf("content is not json, can't process: %s", contentType)
|
||||
ctx.DontReadRequestBody()
|
||||
return types.ActionContinue
|
||||
}
|
||||
ctx.SetRequestBodyBufferLimit(DefaultMaxBodyBytes)
|
||||
_ = proxywasm.RemoveHttpRequestHeader("Accept-Encoding")
|
||||
return types.ActionContinue
|
||||
}
|
||||
|
||||
func onHttpRequestBody(ctx wrapper.HttpContext, config Config, body []byte) types.Action {
|
||||
var queryIndex int
|
||||
var query string
|
||||
messages := gjson.GetBytes(body, "messages").Array()
|
||||
var imageUrls []string
|
||||
for i := len(messages) - 1; i >= 0; i-- {
|
||||
if messages[i].Get("role").String() == "user" {
|
||||
queryIndex = i
|
||||
content := messages[i].Get("content").Array()
|
||||
for j := len(content) - 1; j >= 0; j-- {
|
||||
contentType := content[j].Get("type").String()
|
||||
if contentType == "image_url" {
|
||||
imageUrls = append(imageUrls, content[j].Get("image_url.url").String())
|
||||
} else if contentType == "text" {
|
||||
query = content[j].Get("text").String()
|
||||
}
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
if len(imageUrls) == 0 {
|
||||
return types.ActionContinue
|
||||
}
|
||||
return executeReadImage(imageUrls, config, query, queryIndex, body)
|
||||
}
|
||||
|
||||
func executeReadImage(imageUrls []string, config Config, query string, queryIndex int, body []byte) types.Action {
|
||||
var imageContents []string
|
||||
var totalImages int
|
||||
var finished int
|
||||
for _, imageUrl := range imageUrls {
|
||||
err := config.ocrProvider.DoOCR(imageUrl, func(imageContent string, err error) {
|
||||
defer func() {
|
||||
finished++
|
||||
if totalImages == finished {
|
||||
var processedContents []string
|
||||
for idx := len(imageContents) - 1; idx >= 0; idx-- {
|
||||
processedContents = append(processedContents, fmt.Sprintf("第%d张图片内容为 %s", totalImages-idx, imageContents[idx]))
|
||||
}
|
||||
imageSummary := fmt.Sprintf("总共有 %d 张图片。\n", totalImages)
|
||||
prompt := strings.Replace(config.promptTemplate, "{image_content}", imageSummary+strings.Join(processedContents, "\n"), 1)
|
||||
prompt = strings.Replace(prompt, "{question}", query, 1)
|
||||
modifiedBody, err := sjson.SetBytes(body, fmt.Sprintf("messages.%d.content", queryIndex), prompt)
|
||||
if err != nil {
|
||||
log.Errorf("modify request message content failed, err:%v, body:%s", err, body)
|
||||
} else {
|
||||
log.Debugf("modified body:%s", modifiedBody)
|
||||
proxywasm.ReplaceHttpRequestBody(modifiedBody)
|
||||
}
|
||||
proxywasm.ResumeHttpRequest()
|
||||
}
|
||||
}()
|
||||
if err != nil {
|
||||
log.Errorf("do ocr failed, err:%v", err)
|
||||
return
|
||||
}
|
||||
imageContents = append(imageContents, imageContent)
|
||||
})
|
||||
if err != nil {
|
||||
log.Errorf("ocr call failed, err:%v", err)
|
||||
continue
|
||||
}
|
||||
totalImages++
|
||||
}
|
||||
if totalImages > 0 {
|
||||
return types.ActionPause
|
||||
}
|
||||
return types.ActionContinue
|
||||
}
|
||||
109
plugins/wasm-go/extensions/ai-image-reader/provider.go
Normal file
109
plugins/wasm-go/extensions/ai-image-reader/provider.go
Normal file
@@ -0,0 +1,109 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"github.com/tidwall/gjson"
|
||||
)
|
||||
|
||||
const (
|
||||
ProviderTypeDashscope = "dashscope"
|
||||
)
|
||||
|
||||
type providerInitializer interface {
|
||||
InitConfig(json gjson.Result)
|
||||
ValidateConfig() error
|
||||
CreateProvider(ProviderConfig) (Provider, error)
|
||||
}
|
||||
|
||||
var (
|
||||
providerInitializers = map[string]providerInitializer{
|
||||
ProviderTypeDashscope: &dashScopeProviderInitializer{},
|
||||
}
|
||||
)
|
||||
|
||||
type ProviderConfig struct {
|
||||
// @Title zh-CN 文字识别服务提供者类型
|
||||
// @Description zh-CN 文字识别服务提供者类型,例如 DashScope
|
||||
typ string
|
||||
// @Title zh-CN DashScope 文字识别服务名称
|
||||
// @Description zh-CN 文字识别服务名称
|
||||
serviceName string
|
||||
// @Title zh-CN 文字识别服务域名
|
||||
// @Description zh-CN 文字识别服务域名
|
||||
serviceHost string
|
||||
// @Title zh-CN 文字识别服务端口
|
||||
// @Description zh-CN 文字识别服务端口
|
||||
servicePort int64
|
||||
// @Title zh-CN 文字识别服务超时时间
|
||||
// @Description zh-CN 文字识别服务超时时间
|
||||
timeout uint32
|
||||
// @Title zh-CN 文字识别服务使用的模型
|
||||
// @Description zh-CN 用于文字识别的模型名称, 在 DashScope 中默认为 "qwen-vl-ocr"
|
||||
model string
|
||||
|
||||
initializer providerInitializer
|
||||
}
|
||||
|
||||
func (c *ProviderConfig) FromJson(json gjson.Result) {
|
||||
c.typ = json.Get("type").String()
|
||||
i, has := providerInitializers[c.typ]
|
||||
if has {
|
||||
i.InitConfig(json)
|
||||
c.initializer = i
|
||||
}
|
||||
c.serviceName = json.Get("serviceName").String()
|
||||
c.serviceHost = json.Get("serviceHost").String()
|
||||
c.servicePort = json.Get("servicePort").Int()
|
||||
c.timeout = uint32(json.Get("timeout").Int())
|
||||
c.model = json.Get("model").String()
|
||||
if c.timeout == 0 {
|
||||
c.timeout = 10000
|
||||
}
|
||||
}
|
||||
|
||||
func (c *ProviderConfig) Validate() error {
|
||||
if c.typ == "" {
|
||||
return errors.New("ocr service provider type is required")
|
||||
}
|
||||
if c.serviceName == "" {
|
||||
return errors.New("ocr service name is required")
|
||||
}
|
||||
if c.typ == "" {
|
||||
return errors.New("ocr service type is required")
|
||||
}
|
||||
if c.initializer == nil {
|
||||
return errors.New("unknown ocr service provider type: " + c.typ)
|
||||
}
|
||||
if err := c.initializer.ValidateConfig(); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *ProviderConfig) GetProviderType() string {
|
||||
return c.typ
|
||||
}
|
||||
|
||||
func CreateProvider(pc ProviderConfig) (Provider, error) {
|
||||
initializer, has := providerInitializers[pc.typ]
|
||||
if !has {
|
||||
return nil, errors.New("unknown provider type: " + pc.typ)
|
||||
}
|
||||
return initializer.CreateProvider(pc)
|
||||
}
|
||||
|
||||
type CallArgs struct {
|
||||
Method string
|
||||
Url string
|
||||
Headers [][2]string
|
||||
Body []byte
|
||||
TimeoutMillisecond uint32
|
||||
}
|
||||
|
||||
type Provider interface {
|
||||
GetProviderType() string
|
||||
CallArgs(imageUrl string) CallArgs
|
||||
DoOCR(
|
||||
imageUrl string,
|
||||
callback func(imageContent string, err error)) error
|
||||
}
|
||||
1
plugins/wasm-go/extensions/ai-load-balancer/.gitignore
vendored
Normal file
1
plugins/wasm-go/extensions/ai-load-balancer/.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
||||
test/
|
||||
174
plugins/wasm-go/extensions/ai-load-balancer/README.md
Normal file
174
plugins/wasm-go/extensions/ai-load-balancer/README.md
Normal file
@@ -0,0 +1,174 @@
|
||||
---
|
||||
title: AI负载均衡
|
||||
keywords: [higress, llm, load balance]
|
||||
description: 针对LLM服务的负载均衡策略
|
||||
---
|
||||
|
||||
# 功能说明
|
||||
|
||||
**注意**:
|
||||
- Higress网关版本需要>=v2.1.5
|
||||
|
||||
对LLM服务提供热插拔的负载均衡策略,如果关闭插件,负载均衡策略会退化为服务本身的负载均衡策略(轮训、本地最小请求数、随机、一致性hash等)。
|
||||
|
||||
配置如下:
|
||||
|
||||
| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
|
||||
|--------------------|-----------------|------------------|-------------|-------------------------------------|
|
||||
| `lb_policy` | string | 必填 | | 负载均衡策略类型 |
|
||||
| `lb_config` | object | 必填 | | 当前负载均衡策略类型的配置 |
|
||||
|
||||
目前支持的负载均衡策略包括:
|
||||
- `global_least_request`: 基于redis实现的全局最小请求数负载均衡
|
||||
- `prefix_cache`: 基于 prompt 前缀匹配选择后端节点,如果通过前缀匹配无法匹配到节点,则通过全局最小请求数进行服务节点的选择
|
||||
- `least_busy`: [gateway-api-inference-extension](https://github.com/kubernetes-sigs/gateway-api-inference-extension/blob/main/README.md) 的 wasm 实现
|
||||
|
||||
# 全局最小请求数
|
||||
## 功能说明
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant C as Client
|
||||
participant H as Higress
|
||||
participant R as Redis
|
||||
participant H1 as Host1
|
||||
participant H2 as Host2
|
||||
|
||||
C ->> H: 发起请求
|
||||
H ->> R: 获取 host ongoing 请求数
|
||||
R ->> H: 返回结果
|
||||
H ->> R: 根据结果选择当前请求数最小的host,计数+1
|
||||
R ->> H: 返回结果
|
||||
H ->> H1: 绕过service原本的负载均衡策略,转发请求到对应host
|
||||
H1 ->> H: 返回响应
|
||||
H ->> R: host计数-1
|
||||
H ->> C: 返回响应
|
||||
```
|
||||
|
||||
## 配置说明
|
||||
|
||||
| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
|
||||
|--------------------|-----------------|------------------|-------------|-------------------------------------|
|
||||
| `serviceFQDN` | string | 必填 | | redis 服务的FQDN,例如: `redis.dns` |
|
||||
| `servicePort` | int | 必填 | | redis 服务的port |
|
||||
| `username` | string | 必填 | | redis 用户名 |
|
||||
| `password` | string | 选填 | 空 | redis 密码 |
|
||||
| `timeout` | int | 选填 | 3000ms | redis 请求超时时间 |
|
||||
| `database` | int | 选填 | 0 | redis 数据库序号 |
|
||||
|
||||
## 配置示例
|
||||
|
||||
```yaml
|
||||
lb_policy: global_least_request
|
||||
lb_config:
|
||||
serviceFQDN: redis.static
|
||||
servicePort: 6379
|
||||
username: default
|
||||
password: '123456'
|
||||
```
|
||||
|
||||
# 前缀匹配
|
||||
## 功能说明
|
||||
根据 prompt 前缀匹配选择 pod,以复用 KV Cache,如果通过前缀匹配无法匹配到节点,则通过全局最小请求数进行服务节点的选择
|
||||
|
||||
例如以下请求被路由到了pod 1
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "qwen-turbo",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "hi"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
那么后续具有相同前缀的请求也会被路由到 pod 1
|
||||
```json
|
||||
{
|
||||
"model": "qwen-turbo",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "hi"
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "Hi! How can I assist you today? 😊"
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "write a short story aboud 100 words"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
## 配置说明
|
||||
|
||||
| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
|
||||
|--------------------|-----------------|------------------|-------------|-------------------------------------|
|
||||
| `serviceFQDN` | string | 必填 | | redis 服务的FQDN,例如: `redis.dns` |
|
||||
| `servicePort` | int | 必填 | | redis 服务的port |
|
||||
| `username` | string | 必填 | | redis 用户名 |
|
||||
| `password` | string | 选填 | 空 | redis 密码 |
|
||||
| `timeout` | int | 选填 | 3000ms | redis 请求超时时间 |
|
||||
| `database` | int | 选填 | 0 | redis 数据库序号 |
|
||||
| `redisKeyTTL` | int | 选填 | 1800ms | prompt 前缀对应的key的ttl |
|
||||
|
||||
## 配置示例
|
||||
|
||||
```yaml
|
||||
lb_policy: prefix_cache
|
||||
lb_config:
|
||||
serviceFQDN: redis.static
|
||||
servicePort: 6379
|
||||
username: default
|
||||
password: '123456'
|
||||
```
|
||||
|
||||
# 最小负载
|
||||
## 功能说明
|
||||
[gateway-api-inference-extension](https://github.com/kubernetes-sigs/gateway-api-inference-extension/blob/main/README.md) 的 wasm 实现
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant C as Client
|
||||
participant H as Higress
|
||||
participant H1 as Host1
|
||||
participant H2 as Host2
|
||||
|
||||
loop 定期拉取metrics
|
||||
H ->> H1: /metrics
|
||||
H1 ->> H: vllm metrics
|
||||
H ->> H2: /metrics
|
||||
H2 ->> H: vllm metrics
|
||||
end
|
||||
|
||||
C ->> H: 发起请求
|
||||
H ->> H1: 根据vllm metrics选择合适的pod,绕过服务原始的lb policy直接转发
|
||||
H1 ->> H: 返回响应
|
||||
H ->> C: 返回响应
|
||||
```
|
||||
|
||||
<!-- pod选取流程图如下:
|
||||
|
||||
 -->
|
||||
|
||||
## 配置说明
|
||||
|
||||
| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
|
||||
|--------------------|-----------------|------------------|-------------|-------------------------------------|
|
||||
| `criticalModels` | []string | 选填 | | critical的模型列表 |
|
||||
|
||||
## 配置示例
|
||||
|
||||
```yaml
|
||||
lb_policy: least_busy
|
||||
lb_config:
|
||||
criticalModels:
|
||||
- meta-llama/Llama-2-7b-hf
|
||||
- sql-lora
|
||||
```
|
||||
177
plugins/wasm-go/extensions/ai-load-balancer/README_EN.md
Normal file
177
plugins/wasm-go/extensions/ai-load-balancer/README_EN.md
Normal file
@@ -0,0 +1,177 @@
|
||||
---
|
||||
title: AI Load Balance
|
||||
keywords: [higress, llm, load balance]
|
||||
description: LLM-oriented load balance policies
|
||||
---
|
||||
|
||||
# Introduction
|
||||
|
||||
**Attention**:
|
||||
- Version of Higress should >= v2.1.5
|
||||
|
||||
This plug-in provides the llm-oriented load balancing capability in a hot-swappable manner. If the plugin is closed, the load balancing strategy will degenerate into the load balancing strategy of the service itself (round robin, local minimum request number, random, consistent hash, etc.).
|
||||
|
||||
The configuration is:
|
||||
|
||||
| Name | Type | Required | default | description |
|
||||
|--------------------|-----------------|------------------|-------------|-------------------------------------|
|
||||
| `lb_policy` | string | required | | load balance type |
|
||||
| `lb_config` | object | required | | configuration for the current load balance type |
|
||||
|
||||
Current supported load balance policies are:
|
||||
|
||||
- `global_least_request`: global least request based on redis
|
||||
- `prefix_cache`: Select the backend node based on the prompt prefix match. If the node cannot be matched by prefix matching, the service node is selected based on the global minimum number of requests.
|
||||
- `least_busy`: implementation for [gateway-api-inference-extension](https://github.com/kubernetes-sigs/gateway-api-inference-extension/blob/main/README.md)
|
||||
|
||||
# Global Least Request
|
||||
## Introduction
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant C as Client
|
||||
participant H as Higress
|
||||
participant R as Redis
|
||||
participant H1 as Host1
|
||||
participant H2 as Host2
|
||||
|
||||
C ->> H: Send request
|
||||
H ->> R: Get host ongoing request number
|
||||
R ->> H: Return result
|
||||
H ->> R: According to the result, select the host with the smallest number of current requests, host rq count +1.
|
||||
R ->> H: Return result
|
||||
H ->> H1: Bypass the service's original load balancing strategy and forward the request to the corresponding host
|
||||
H1 ->> H: Return result
|
||||
H ->> R: host rq count -1
|
||||
H ->> C: Receive response
|
||||
```
|
||||
|
||||
## Configuration
|
||||
|
||||
| Name | Type | required | default | description |
|
||||
|--------------------|-----------------|------------------|-------------|-------------------------------------|
|
||||
| `serviceFQDN` | string | required | | redis FQDN, e.g. `redis.dns` |
|
||||
| `servicePort` | int | required | | redis port |
|
||||
| `username` | string | required | | redis username |
|
||||
| `password` | string | optional | `` | redis password |
|
||||
| `timeout` | int | optional | 3000ms | redis request timeout |
|
||||
| `database` | int | optional | 0 | redis database number |
|
||||
|
||||
## Configuration Example
|
||||
|
||||
```yaml
|
||||
lb_policy: global_least_request
|
||||
lb_config:
|
||||
serviceFQDN: redis.static
|
||||
servicePort: 6379
|
||||
username: default
|
||||
password: '123456'
|
||||
```
|
||||
|
||||
# Prefix Cache
|
||||
## Introduction
|
||||
Select pods based on the prompt prefix match to reuse KV Cache. If no node can be matched by prefix match, select the service node based on the global minimum number of requests.
|
||||
|
||||
For example, the following request is routed to pod 1:
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "qwen-turbo",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "hi"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
Then subsequent requests with the same prefix will also be routed to pod 1:
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "qwen-turbo",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "hi"
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "Hi! How can I assist you today? 😊"
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "write a short story aboud 100 words"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
## Configuration
|
||||
|
||||
| Name | Type | required | default | description |
|
||||
|--------------------|-----------------|-----------------------|-------------|---------------------------------|
|
||||
| `serviceFQDN` | string | required | | redis FQDN, e.g. `redis.dns` |
|
||||
| `servicePort` | int | required | | redis port |
|
||||
| `username` | string | required | | redis username |
|
||||
| `password` | string | optional | `` | redis password |
|
||||
| `timeout` | int | optional | 3000ms | redis request timeout |
|
||||
| `database` | int | optional | 0 | redis database number |
|
||||
| `redisKeyTTL` | int | optional | 1800ms | prompt prefix key's ttl |
|
||||
|
||||
## Configuration Example
|
||||
|
||||
```yaml
|
||||
lb_policy: prefix_cache
|
||||
lb_config:
|
||||
serviceFQDN: redis.static
|
||||
servicePort: 6379
|
||||
username: default
|
||||
password: '123456'
|
||||
```
|
||||
|
||||
# Least Busy
|
||||
## Introduction
|
||||
|
||||
wasm implementation for [gateway-api-inference-extension](https://github.com/kubernetes-sigs/gateway-api-inference-extension/blob/main/README.md)
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant C as Client
|
||||
participant H as Higress
|
||||
participant H1 as Host1
|
||||
participant H2 as Host2
|
||||
|
||||
loop fetch metrics periodically
|
||||
H ->> H1: /metrics
|
||||
H1 ->> H: vllm metrics
|
||||
H ->> H2: /metrics
|
||||
H2 ->> H: vllm metrics
|
||||
end
|
||||
|
||||
C ->> H: request
|
||||
H ->> H1: select pod according to vllm metrics, bypassing original service load balance policy
|
||||
H1 ->> H: response
|
||||
H ->> C: response
|
||||
```
|
||||
|
||||
<!-- flowchart for pod selection:
|
||||
|
||||
 -->
|
||||
|
||||
## Configuration
|
||||
|
||||
| Name | Type | Required | default | description |
|
||||
|--------------------|-----------------|------------------|-------------|-------------------------------------|
|
||||
| `criticalModels` | []string | required | | critical model names |
|
||||
|
||||
## Configuration Example
|
||||
|
||||
```yaml
|
||||
lb_policy: least_busy
|
||||
lb_config:
|
||||
criticalModels:
|
||||
- meta-llama/Llama-2-7b-hf
|
||||
- sql-lora
|
||||
```
|
||||
@@ -0,0 +1,178 @@
|
||||
package global_least_request
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"time"
|
||||
|
||||
"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-load-balancer/utils"
|
||||
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
|
||||
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
|
||||
"github.com/higress-group/wasm-go/pkg/log"
|
||||
"github.com/higress-group/wasm-go/pkg/wrapper"
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/resp"
|
||||
)
|
||||
|
||||
const (
|
||||
RedisKeyFormat = "higress:global_least_request_table:%s:%s"
|
||||
RedisLua = `local seed = KEYS[1]
|
||||
local hset_key = KEYS[2]
|
||||
local current_target = KEYS[3]
|
||||
local current_count = 0
|
||||
|
||||
math.randomseed(seed)
|
||||
|
||||
local function randomBool()
|
||||
return math.random() >= 0.5
|
||||
end
|
||||
|
||||
local function is_healthy(addr)
|
||||
for i = 4, #KEYS do
|
||||
if addr == KEYS[i] then
|
||||
return true
|
||||
end
|
||||
end
|
||||
return false
|
||||
end
|
||||
|
||||
if redis.call('HEXISTS', hset_key, current_target) ~= 0 then
|
||||
current_count = redis.call('HGET', hset_key, current_target)
|
||||
local hash = redis.call('HGETALL', hset_key)
|
||||
for i = 1, #hash, 2 do
|
||||
local addr = hash[i]
|
||||
local count = hash[i+1]
|
||||
if is_healthy(addr) then
|
||||
if count < current_count then
|
||||
current_target = addr
|
||||
current_count = count
|
||||
elseif count == current_count and randomBool() then
|
||||
current_target = addr
|
||||
current_count = count
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
redis.call("HINCRBY", hset_key, current_target, 1)
|
||||
|
||||
return current_target`
|
||||
)
|
||||
|
||||
type GlobalLeastRequestLoadBalancer struct {
|
||||
redisClient wrapper.RedisClient
|
||||
}
|
||||
|
||||
func NewGlobalLeastRequestLoadBalancer(json gjson.Result) (GlobalLeastRequestLoadBalancer, error) {
|
||||
lb := GlobalLeastRequestLoadBalancer{}
|
||||
serviceFQDN := json.Get("serviceFQDN").String()
|
||||
servicePort := json.Get("servicePort").Int()
|
||||
if serviceFQDN == "" || servicePort == 0 {
|
||||
log.Errorf("invalid redis service, serviceFQDN: %s, servicePort: %d", serviceFQDN, servicePort)
|
||||
return lb, errors.New("invalid redis service config")
|
||||
}
|
||||
lb.redisClient = wrapper.NewRedisClusterClient(wrapper.FQDNCluster{
|
||||
FQDN: serviceFQDN,
|
||||
Port: servicePort,
|
||||
})
|
||||
username := json.Get("username").String()
|
||||
password := json.Get("password").String()
|
||||
timeout := json.Get("timeout").Int()
|
||||
if timeout == 0 {
|
||||
timeout = 3000
|
||||
}
|
||||
// database default is 0
|
||||
database := json.Get("database").Int()
|
||||
return lb, lb.redisClient.Init(username, password, int64(timeout), wrapper.WithDataBase(int(database)))
|
||||
}
|
||||
|
||||
func (lb GlobalLeastRequestLoadBalancer) HandleHttpRequestHeaders(ctx wrapper.HttpContext) types.Action {
|
||||
// If return types.ActionContinue, SetUpstreamOverrideHost will not take effect
|
||||
return types.HeaderStopIteration
|
||||
}
|
||||
|
||||
func (lb GlobalLeastRequestLoadBalancer) HandleHttpRequestBody(ctx wrapper.HttpContext, body []byte) types.Action {
|
||||
routeName, err := utils.GetRouteName()
|
||||
if err != nil || routeName == "" {
|
||||
ctx.SetContext("error", true)
|
||||
return types.ActionContinue
|
||||
} else {
|
||||
ctx.SetContext("routeName", routeName)
|
||||
}
|
||||
clusterName, err := utils.GetClusterName()
|
||||
if err != nil || clusterName == "" {
|
||||
ctx.SetContext("error", true)
|
||||
return types.ActionContinue
|
||||
} else {
|
||||
ctx.SetContext("clusterName", clusterName)
|
||||
}
|
||||
hostInfos, err := proxywasm.GetUpstreamHosts()
|
||||
if err != nil {
|
||||
ctx.SetContext("error", true)
|
||||
return types.ActionContinue
|
||||
}
|
||||
// Only healthy host can be selected
|
||||
healthyHostArray := []string{}
|
||||
for _, hostInfo := range hostInfos {
|
||||
if gjson.Get(hostInfo[1], "health_status").String() == "Healthy" {
|
||||
healthyHostArray = append(healthyHostArray, hostInfo[0])
|
||||
}
|
||||
}
|
||||
if len(healthyHostArray) == 0 {
|
||||
ctx.SetContext("error", true)
|
||||
return types.ActionContinue
|
||||
}
|
||||
randomIndex := rand.Intn(len(healthyHostArray))
|
||||
hostSelected := healthyHostArray[randomIndex]
|
||||
keys := []interface{}{time.Now().Unix(), fmt.Sprintf(RedisKeyFormat, routeName, clusterName), hostSelected}
|
||||
for _, v := range healthyHostArray {
|
||||
keys = append(keys, v)
|
||||
}
|
||||
err = lb.redisClient.Eval(RedisLua, len(keys), keys, []interface{}{}, func(response resp.Value) {
|
||||
if err := response.Error(); err != nil {
|
||||
log.Errorf("HGetAll failed: %+v", err)
|
||||
ctx.SetContext("error", true)
|
||||
proxywasm.ResumeHttpRequest()
|
||||
return
|
||||
}
|
||||
hostSelected = response.String()
|
||||
if err := proxywasm.SetUpstreamOverrideHost([]byte(hostSelected)); err != nil {
|
||||
ctx.SetContext("error", true)
|
||||
log.Errorf("override upstream host failed, fallback to default lb policy, error informations: %+v", err)
|
||||
}
|
||||
log.Debugf("host_selected: %s", hostSelected)
|
||||
ctx.SetContext("host_selected", hostSelected)
|
||||
proxywasm.ResumeHttpRequest()
|
||||
})
|
||||
if err != nil {
|
||||
ctx.SetContext("error", true)
|
||||
return types.ActionContinue
|
||||
}
|
||||
return types.ActionPause
|
||||
}
|
||||
|
||||
func (lb GlobalLeastRequestLoadBalancer) HandleHttpResponseHeaders(ctx wrapper.HttpContext) types.Action {
|
||||
return types.ActionContinue
|
||||
}
|
||||
|
||||
func (lb GlobalLeastRequestLoadBalancer) HandleHttpStreamingResponseBody(ctx wrapper.HttpContext, data []byte, endOfStream bool) []byte {
|
||||
if endOfStream {
|
||||
isErr, _ := ctx.GetContext("error").(bool)
|
||||
if !isErr {
|
||||
routeName, _ := ctx.GetContext("routeName").(string)
|
||||
clusterName, _ := ctx.GetContext("clusterName").(string)
|
||||
host_selected, _ := ctx.GetContext("host_selected").(string)
|
||||
if host_selected == "" {
|
||||
log.Errorf("get host_selected failed")
|
||||
} else {
|
||||
lb.redisClient.HIncrBy(fmt.Sprintf(RedisKeyFormat, routeName, clusterName), host_selected, -1, nil)
|
||||
}
|
||||
}
|
||||
}
|
||||
return data
|
||||
}
|
||||
|
||||
func (lb GlobalLeastRequestLoadBalancer) HandleHttpResponseBody(ctx wrapper.HttpContext, body []byte) types.Action {
|
||||
return types.ActionContinue
|
||||
}
|
||||
23
plugins/wasm-go/extensions/ai-load-balancer/go.mod
Normal file
23
plugins/wasm-go/extensions/ai-load-balancer/go.mod
Normal file
@@ -0,0 +1,23 @@
|
||||
module github.com/alibaba/higress/plugins/wasm-go/extensions/ai-load-balancer
|
||||
|
||||
go 1.24.1
|
||||
|
||||
toolchain go1.24.3
|
||||
|
||||
require (
|
||||
github.com/higress-group/proxy-wasm-go-sdk v0.0.0-20250611100342-5654e89a7a80
|
||||
github.com/higress-group/wasm-go v1.0.1-0.20250628101008-bea7da01a545
|
||||
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
|
||||
github.com/prometheus/client_model v0.6.2
|
||||
github.com/tidwall/gjson v1.18.0
|
||||
github.com/tidwall/resp v0.1.1
|
||||
go.uber.org/multierr v1.11.0
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/google/uuid v1.6.0 // indirect
|
||||
github.com/prometheus/common v0.64.0
|
||||
github.com/tidwall/match v1.1.1 // indirect
|
||||
github.com/tidwall/pretty v1.2.1 // indirect
|
||||
google.golang.org/protobuf v1.36.6 // indirect
|
||||
)
|
||||
35
plugins/wasm-go/extensions/ai-load-balancer/go.sum
Normal file
35
plugins/wasm-go/extensions/ai-load-balancer/go.sum
Normal file
@@ -0,0 +1,35 @@
|
||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
|
||||
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
|
||||
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
|
||||
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||
github.com/higress-group/proxy-wasm-go-sdk v0.0.0-20250611100342-5654e89a7a80 h1:xqmtTZI0JQ2O+Lg9/CE6c+Tw9KD6FnvWw8EpLVuuvfg=
|
||||
github.com/higress-group/proxy-wasm-go-sdk v0.0.0-20250611100342-5654e89a7a80/go.mod h1:tRI2LfMudSkKHhyv1uex3BWzcice2s/l8Ah8axporfA=
|
||||
github.com/higress-group/wasm-go v1.0.1-0.20250628101008-bea7da01a545 h1:zPXEonKCAeLvXI1IpwGpIeVSvLY5AZ9h9uTJnOuiA3Q=
|
||||
github.com/higress-group/wasm-go v1.0.1-0.20250628101008-bea7da01a545/go.mod h1:ODBV27sjmhIW8Cqv3R74EUcTnbdkE69bmXBQFuRkY1M=
|
||||
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
|
||||
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk=
|
||||
github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE=
|
||||
github.com/prometheus/common v0.64.0 h1:pdZeA+g617P7oGv1CzdTzyeShxAGrTBsolKNOLQPGO4=
|
||||
github.com/prometheus/common v0.64.0/go.mod h1:0gZns+BLRQ3V6NdaerOhMbwwRbNh9hkGINtQAsP5GS8=
|
||||
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
|
||||
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
|
||||
github.com/tidwall/gjson v1.18.0 h1:FIDeeyB800efLX89e5a8Y0BNH+LOngJyGrIWxG2FKQY=
|
||||
github.com/tidwall/gjson v1.18.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
|
||||
github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA=
|
||||
github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM=
|
||||
github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
|
||||
github.com/tidwall/pretty v1.2.1 h1:qjsOFOWWQl+N3RsoF5/ssm1pHmJJwhjlSbZ51I6wMl4=
|
||||
github.com/tidwall/pretty v1.2.1/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
|
||||
github.com/tidwall/resp v0.1.1 h1:Ly20wkhqKTmDUPlyM1S7pWo5kk0tDu8OoC/vFArXmwE=
|
||||
github.com/tidwall/resp v0.1.1/go.mod h1:3/FrruOBAxPTPtundW0VXgmsQ4ZBA0Aw714lVYgwFa0=
|
||||
go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0=
|
||||
go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y=
|
||||
google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY=
|
||||
google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY=
|
||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
@@ -0,0 +1,68 @@
|
||||
/*
|
||||
Copyright 2025 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package backend
|
||||
|
||||
import "fmt"
|
||||
|
||||
type PodSet map[Pod]bool
|
||||
|
||||
type Pod struct {
|
||||
Name string
|
||||
Address string
|
||||
}
|
||||
|
||||
func (p Pod) String() string {
|
||||
return p.Name + ":" + p.Address
|
||||
}
|
||||
|
||||
type Metrics struct {
|
||||
// ActiveModels is a set of models(including LoRA adapters) that are currently cached to GPU.
|
||||
ActiveModels map[string]int
|
||||
// MaxActiveModels is the maximum number of models that can be loaded to GPU.
|
||||
MaxActiveModels int
|
||||
RunningQueueSize int
|
||||
WaitingQueueSize int
|
||||
KVCacheUsagePercent float64
|
||||
KvCacheMaxTokenCapacity int
|
||||
}
|
||||
|
||||
type PodMetrics struct {
|
||||
Pod
|
||||
Metrics
|
||||
}
|
||||
|
||||
func (pm *PodMetrics) String() string {
|
||||
return fmt.Sprintf("Pod: %+v; Metrics: %+v", pm.Pod, pm.Metrics)
|
||||
}
|
||||
|
||||
func (pm *PodMetrics) Clone() *PodMetrics {
|
||||
cm := make(map[string]int, len(pm.ActiveModels))
|
||||
for k, v := range pm.ActiveModels {
|
||||
cm[k] = v
|
||||
}
|
||||
clone := &PodMetrics{
|
||||
Pod: pm.Pod,
|
||||
Metrics: Metrics{
|
||||
ActiveModels: cm,
|
||||
RunningQueueSize: pm.RunningQueueSize,
|
||||
WaitingQueueSize: pm.WaitingQueueSize,
|
||||
KVCacheUsagePercent: pm.KVCacheUsagePercent,
|
||||
KvCacheMaxTokenCapacity: pm.KvCacheMaxTokenCapacity,
|
||||
},
|
||||
}
|
||||
return clone
|
||||
}
|
||||
@@ -0,0 +1,150 @@
|
||||
/*
|
||||
Copyright 2025 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Package vllm provides vllm specific pod metrics implementation.
|
||||
package vllm
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-load-balancer/least_busy/backend"
|
||||
|
||||
dto "github.com/prometheus/client_model/go"
|
||||
"go.uber.org/multierr"
|
||||
)
|
||||
|
||||
const (
|
||||
LoraRequestInfoMetricName = "vllm:lora_requests_info"
|
||||
LoraRequestInfoRunningAdaptersMetricName = "running_lora_adapters"
|
||||
LoraRequestInfoMaxAdaptersMetricName = "max_lora"
|
||||
// TODO: Replace these with the num_tokens_running/waiting below once we add those to the fork.
|
||||
RunningQueueSizeMetricName = "vllm:num_requests_running"
|
||||
WaitingQueueSizeMetricName = "vllm:num_requests_waiting"
|
||||
/* TODO: Uncomment this once the following are added to the fork.
|
||||
RunningQueueSizeMetricName = "vllm:num_tokens_running"
|
||||
WaitingQueueSizeMetricName = "vllm:num_tokens_waiting"
|
||||
*/
|
||||
KVCacheUsagePercentMetricName = "vllm:gpu_cache_usage_perc"
|
||||
KvCacheMaxTokenCapacityMetricName = "vllm:gpu_cache_max_token_capacity"
|
||||
)
|
||||
|
||||
// promToPodMetrics updates internal pod metrics with scraped prometheus metrics.
|
||||
// A combined error is returned if errors occur in one or more metric processing.
|
||||
// it returns a new PodMetrics pointer which can be used to atomically update the pod metrics map.
|
||||
func PromToPodMetrics(
|
||||
metricFamilies map[string]*dto.MetricFamily,
|
||||
existing *backend.PodMetrics,
|
||||
) (*backend.PodMetrics, error) {
|
||||
var errs error
|
||||
updated := existing.Clone()
|
||||
runningQueueSize, err := getLatestMetric(metricFamilies, RunningQueueSizeMetricName)
|
||||
errs = multierr.Append(errs, err)
|
||||
if err == nil {
|
||||
updated.RunningQueueSize = int(runningQueueSize.GetGauge().GetValue())
|
||||
}
|
||||
waitingQueueSize, err := getLatestMetric(metricFamilies, WaitingQueueSizeMetricName)
|
||||
errs = multierr.Append(errs, err)
|
||||
if err == nil {
|
||||
updated.WaitingQueueSize = int(waitingQueueSize.GetGauge().GetValue())
|
||||
}
|
||||
cachePercent, err := getLatestMetric(metricFamilies, KVCacheUsagePercentMetricName)
|
||||
errs = multierr.Append(errs, err)
|
||||
if err == nil {
|
||||
updated.KVCacheUsagePercent = cachePercent.GetGauge().GetValue()
|
||||
}
|
||||
|
||||
loraMetrics, _, err := getLatestLoraMetric(metricFamilies)
|
||||
errs = multierr.Append(errs, err)
|
||||
/* TODO: uncomment once this is available in vllm.
|
||||
kvCap, _, err := getGaugeLatestValue(metricFamilies, KvCacheMaxTokenCapacityMetricName)
|
||||
errs = multierr.Append(errs, err)
|
||||
if err != nil {
|
||||
updated.KvCacheMaxTokenCapacity = int(kvCap)
|
||||
}
|
||||
*/
|
||||
|
||||
if loraMetrics != nil {
|
||||
updated.ActiveModels = make(map[string]int)
|
||||
for _, label := range loraMetrics.GetLabel() {
|
||||
if label.GetName() == LoraRequestInfoRunningAdaptersMetricName {
|
||||
if label.GetValue() != "" {
|
||||
adapterList := strings.Split(label.GetValue(), ",")
|
||||
for _, adapter := range adapterList {
|
||||
updated.ActiveModels[adapter] = 0
|
||||
}
|
||||
}
|
||||
}
|
||||
if label.GetName() == LoraRequestInfoMaxAdaptersMetricName {
|
||||
if label.GetValue() != "" {
|
||||
updated.MaxActiveModels, err = strconv.Atoi(label.GetValue())
|
||||
if err != nil {
|
||||
errs = multierr.Append(errs, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return updated, errs
|
||||
}
|
||||
|
||||
// getLatestLoraMetric gets latest lora metric series in gauge metric family `vllm:lora_requests_info`
|
||||
// reason its specially fetched is because each label key value pair permutation generates new series
|
||||
// and only most recent is useful. The value of each series is the creation timestamp so we can
|
||||
// retrieve the latest by sorting the value.
|
||||
func getLatestLoraMetric(metricFamilies map[string]*dto.MetricFamily) (*dto.Metric, time.Time, error) {
|
||||
loraRequests, ok := metricFamilies[LoraRequestInfoMetricName]
|
||||
if !ok {
|
||||
// klog.Warningf("metric family %q not found", LoraRequestInfoMetricName)
|
||||
return nil, time.Time{}, fmt.Errorf("metric family %q not found", LoraRequestInfoMetricName)
|
||||
}
|
||||
var latestTs float64
|
||||
var latest *dto.Metric
|
||||
for _, m := range loraRequests.GetMetric() {
|
||||
if m.GetGauge().GetValue() > latestTs {
|
||||
latestTs = m.GetGauge().GetValue()
|
||||
latest = m
|
||||
}
|
||||
}
|
||||
return latest, time.Unix(0, int64(latestTs*1000)), nil
|
||||
}
|
||||
|
||||
// getLatestMetric gets the latest metric of a family. This should be used to get the latest Gauge metric.
|
||||
// Since vllm doesn't set the timestamp in metric, this metric essentially gets the first metric.
|
||||
func getLatestMetric(metricFamilies map[string]*dto.MetricFamily, metricName string) (*dto.Metric, error) {
|
||||
mf, ok := metricFamilies[metricName]
|
||||
if !ok {
|
||||
// klog.Warningf("metric family %q not found", metricName)
|
||||
return nil, fmt.Errorf("metric family %q not found", metricName)
|
||||
}
|
||||
if len(mf.GetMetric()) == 0 {
|
||||
return nil, fmt.Errorf("no metrics available for %q", metricName)
|
||||
}
|
||||
var latestTs int64
|
||||
var latest *dto.Metric
|
||||
for _, m := range mf.GetMetric() {
|
||||
if m.GetTimestampMs() >= latestTs {
|
||||
latestTs = m.GetTimestampMs()
|
||||
latest = m
|
||||
}
|
||||
}
|
||||
// klog.V(logutil.TRACE).Infof("Got metric value %+v for metric %v", latest, metricName)
|
||||
return latest, nil
|
||||
}
|
||||
@@ -0,0 +1,79 @@
|
||||
package least_busy
|
||||
|
||||
import (
|
||||
"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-load-balancer/least_busy/scheduling"
|
||||
|
||||
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
|
||||
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
|
||||
"github.com/higress-group/wasm-go/pkg/log"
|
||||
"github.com/higress-group/wasm-go/pkg/wrapper"
|
||||
"github.com/tidwall/gjson"
|
||||
)
|
||||
|
||||
type LeastBusyLoadBalancer struct {
|
||||
criticalModels map[string]struct{}
|
||||
}
|
||||
|
||||
func NewLeastBusyLoadBalancer(json gjson.Result) (LeastBusyLoadBalancer, error) {
|
||||
lb := LeastBusyLoadBalancer{}
|
||||
lb.criticalModels = make(map[string]struct{})
|
||||
for _, model := range json.Get("criticalModels").Array() {
|
||||
lb.criticalModels[model.String()] = struct{}{}
|
||||
}
|
||||
return lb, nil
|
||||
}
|
||||
|
||||
// Callbacks which are called in request path
|
||||
func (lb LeastBusyLoadBalancer) HandleHttpRequestHeaders(ctx wrapper.HttpContext) types.Action {
|
||||
// If return types.ActionContinue, SetUpstreamOverrideHost will not take effect
|
||||
return types.HeaderStopIteration
|
||||
}
|
||||
|
||||
func (lb LeastBusyLoadBalancer) HandleHttpRequestBody(ctx wrapper.HttpContext, body []byte) types.Action {
|
||||
requestModel := gjson.GetBytes(body, "model")
|
||||
if !requestModel.Exists() {
|
||||
return types.ActionContinue
|
||||
}
|
||||
_, isCritical := lb.criticalModels[requestModel.String()]
|
||||
llmReq := &scheduling.LLMRequest{
|
||||
Model: requestModel.String(),
|
||||
Critical: isCritical,
|
||||
}
|
||||
hostInfos, err := proxywasm.GetUpstreamHosts()
|
||||
if err != nil {
|
||||
return types.ActionContinue
|
||||
}
|
||||
hostMetrics := make(map[string]string)
|
||||
for _, hostInfo := range hostInfos {
|
||||
if gjson.Get(hostInfo[1], "health_status").String() == "Healthy" {
|
||||
hostMetrics[hostInfo[0]] = gjson.Get(hostInfo[1], "metrics").String()
|
||||
}
|
||||
}
|
||||
scheduler, err := scheduling.GetScheduler(hostMetrics)
|
||||
if err != nil {
|
||||
log.Debugf("initial scheduler failed: %v", err)
|
||||
return types.ActionContinue
|
||||
}
|
||||
targetPod, err := scheduler.Schedule(llmReq)
|
||||
log.Debugf("targetPod: %+v", targetPod.Address)
|
||||
if err != nil {
|
||||
log.Debugf("pod select failed: %v", err)
|
||||
proxywasm.SendHttpResponseWithDetail(429, "limited resources", nil, []byte("limited resources"), 0)
|
||||
} else {
|
||||
proxywasm.SetUpstreamOverrideHost([]byte(targetPod.Address))
|
||||
}
|
||||
return types.ActionContinue
|
||||
}
|
||||
|
||||
func (lb LeastBusyLoadBalancer) HandleHttpResponseHeaders(ctx wrapper.HttpContext) types.Action {
|
||||
ctx.DontReadResponseBody()
|
||||
return types.ActionContinue
|
||||
}
|
||||
|
||||
func (lb LeastBusyLoadBalancer) HandleHttpStreamingResponseBody(ctx wrapper.HttpContext, data []byte, endOfStream bool) []byte {
|
||||
return data
|
||||
}
|
||||
|
||||
func (lb LeastBusyLoadBalancer) HandleHttpResponseBody(ctx wrapper.HttpContext, body []byte) types.Action {
|
||||
return types.ActionContinue
|
||||
}
|
||||
@@ -0,0 +1,203 @@
|
||||
/*
|
||||
Copyright 2025 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package scheduling
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"math"
|
||||
|
||||
"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-load-balancer/least_busy/backend"
|
||||
|
||||
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
|
||||
)
|
||||
|
||||
type Filter interface {
|
||||
Name() string
|
||||
Filter(req *LLMRequest, pods []*backend.PodMetrics) ([]*backend.PodMetrics, error)
|
||||
}
|
||||
|
||||
// filter applies current filterFunc, and then recursively applies next filters depending success or
|
||||
// failure of the current filterFunc.
|
||||
// It can be used to construct a flow chart algorithm.
|
||||
type filter struct {
|
||||
name string
|
||||
filter filterFunc
|
||||
// nextOnSuccess filter will be applied after successfully applying the current filter.
|
||||
// The filtered results will be passed to the next filter.
|
||||
nextOnSuccess *filter
|
||||
// nextOnFailure filter will be applied if current filter fails.
|
||||
// The original input will be passed to the next filter.
|
||||
nextOnFailure *filter
|
||||
// nextOnSuccessOrFailure is a convenience field to configure the next filter regardless of the
|
||||
// success or failure of the current filter.
|
||||
// NOTE: When using nextOnSuccessOrFailure, both nextOnSuccess and nextOnFailure SHOULD be nil.
|
||||
// However if that's not the case, nextOnSuccess and nextOnFailure will be used, instead of
|
||||
// nextOnSuccessOrFailure, in the success and failure scenarios, respectively.
|
||||
nextOnSuccessOrFailure *filter
|
||||
|
||||
// callbacks api.FilterCallbackHandler
|
||||
}
|
||||
|
||||
func (f *filter) Name() string {
|
||||
if f == nil {
|
||||
return "nil"
|
||||
}
|
||||
return f.name
|
||||
}
|
||||
|
||||
func (f *filter) Filter(req *LLMRequest, pods []*backend.PodMetrics) ([]*backend.PodMetrics, error) {
|
||||
proxywasm.LogDebugf("Running filter %q on request %v with %v pods", f.name, req, len(pods))
|
||||
filtered, err := f.filter(req, pods)
|
||||
|
||||
next := f.nextOnSuccessOrFailure
|
||||
if err == nil && len(filtered) > 0 {
|
||||
if f.nextOnSuccess == nil && f.nextOnSuccessOrFailure == nil {
|
||||
// No succeeding filters to run, return.
|
||||
return filtered, err
|
||||
}
|
||||
if f.nextOnSuccess != nil {
|
||||
next = f.nextOnSuccess
|
||||
}
|
||||
// On success, pass the filtered result to the next filter.
|
||||
return next.Filter(req, filtered)
|
||||
} else {
|
||||
if f.nextOnFailure == nil && f.nextOnSuccessOrFailure == nil {
|
||||
// No succeeding filters to run, return.
|
||||
return filtered, err
|
||||
}
|
||||
if f.nextOnFailure != nil {
|
||||
next = f.nextOnFailure
|
||||
}
|
||||
// On failure, pass the initial set of pods to the next filter.
|
||||
return next.Filter(req, pods)
|
||||
}
|
||||
}
|
||||
|
||||
// filterFunc filters a set of input pods to a subset.
|
||||
type filterFunc func(req *LLMRequest, pods []*backend.PodMetrics) ([]*backend.PodMetrics, error)
|
||||
|
||||
// toFilterFunc is a helper function to convert a per pod filter func to the FilterFunc.
|
||||
func toFilterFunc(pp podPredicate) filterFunc {
|
||||
return func(req *LLMRequest, pods []*backend.PodMetrics) ([]*backend.PodMetrics, error) {
|
||||
filtered := []*backend.PodMetrics{}
|
||||
for _, pod := range pods {
|
||||
pass := pp(req, pod)
|
||||
if pass {
|
||||
filtered = append(filtered, pod)
|
||||
}
|
||||
}
|
||||
if len(filtered) == 0 {
|
||||
return nil, errors.New("no pods left")
|
||||
}
|
||||
return filtered, nil
|
||||
}
|
||||
}
|
||||
|
||||
// leastQueuingFilterFunc finds the max and min queue size of all pods, divides the whole range
|
||||
// (max-min) by the number of pods, and finds the pods that fall into the first range.
|
||||
// The intuition is that if there are multiple pods that share similar queue size in the low range,
|
||||
// we should consider them all instead of the absolute minimum one. This worked better than picking
|
||||
// the least one as it gives more choices for the next filter, which on aggregate gave better
|
||||
// results.
|
||||
// TODO: Compare this strategy with other strategies such as top K.
|
||||
func leastQueuingFilterFunc(req *LLMRequest, pods []*backend.PodMetrics) ([]*backend.PodMetrics, error) {
|
||||
min := math.MaxInt
|
||||
max := 0
|
||||
filtered := []*backend.PodMetrics{}
|
||||
|
||||
for _, pod := range pods {
|
||||
if pod.WaitingQueueSize <= min {
|
||||
min = pod.WaitingQueueSize
|
||||
}
|
||||
if pod.WaitingQueueSize >= max {
|
||||
max = pod.WaitingQueueSize
|
||||
}
|
||||
}
|
||||
|
||||
for _, pod := range pods {
|
||||
if pod.WaitingQueueSize >= min && pod.WaitingQueueSize <= min+(max-min)/len(pods) {
|
||||
filtered = append(filtered, pod)
|
||||
}
|
||||
}
|
||||
return filtered, nil
|
||||
}
|
||||
|
||||
func lowQueueingPodPredicate(_ *LLMRequest, pod *backend.PodMetrics) bool {
|
||||
return pod.WaitingQueueSize < queueingThresholdLoRA
|
||||
}
|
||||
|
||||
// leastKVCacheFilterFunc finds the max and min KV cache of all pods, divides the whole range
|
||||
// (max-min) by the number of pods, and finds the pods that fall into the first range.
|
||||
// The intuition is that if there are multiple pods that share similar KV cache in the low range, we
|
||||
// should consider them all instead of the absolute minimum one. This worked better than picking the
|
||||
// least one as it gives more choices for the next filter, which on aggregate gave better results.
|
||||
// TODO: Compare this strategy with other strategies such as top K.
|
||||
func leastKVCacheFilterFunc(req *LLMRequest, pods []*backend.PodMetrics) ([]*backend.PodMetrics, error) {
|
||||
min := math.MaxFloat64
|
||||
var max float64 = 0
|
||||
filtered := []*backend.PodMetrics{}
|
||||
|
||||
for _, pod := range pods {
|
||||
if pod.KVCacheUsagePercent <= min {
|
||||
min = pod.KVCacheUsagePercent
|
||||
}
|
||||
if pod.KVCacheUsagePercent >= max {
|
||||
max = pod.KVCacheUsagePercent
|
||||
}
|
||||
}
|
||||
|
||||
for _, pod := range pods {
|
||||
if pod.KVCacheUsagePercent >= min && pod.KVCacheUsagePercent <= min+(max-min)/float64(len(pods)) {
|
||||
filtered = append(filtered, pod)
|
||||
}
|
||||
}
|
||||
return filtered, nil
|
||||
}
|
||||
|
||||
// podPredicate is a filter function to check whether a pod is desired.
|
||||
type podPredicate func(req *LLMRequest, pod *backend.PodMetrics) bool
|
||||
|
||||
// We consider serving an adapter low cost it the adapter is active in the model server, or the
|
||||
// model server has room to load the adapter. The lowLoRACostPredicate ensures weak affinity by
|
||||
// spreading the load of a LoRA adapter across multiple pods, avoiding "pinning" all requests to
|
||||
// a single pod. This gave good performance in our initial benchmarking results in the scenario
|
||||
// where # of lora slots > # of lora adapters.
|
||||
func lowLoRACostPredicate(req *LLMRequest, pod *backend.PodMetrics) bool {
|
||||
_, ok := pod.ActiveModels[req.Model]
|
||||
return ok || len(pod.ActiveModels) < pod.MaxActiveModels
|
||||
}
|
||||
|
||||
// loRAAffinityPredicate is a filter function to check whether a pod has affinity to the lora requested.
|
||||
func loRAAffinityPredicate(req *LLMRequest, pod *backend.PodMetrics) bool {
|
||||
_, ok := pod.ActiveModels[req.Model]
|
||||
return ok
|
||||
}
|
||||
|
||||
// canAcceptNewLoraPredicate is a filter function to check whether a pod has room to load the adapter.
|
||||
func canAcceptNewLoraPredicate(req *LLMRequest, pod *backend.PodMetrics) bool {
|
||||
return len(pod.ActiveModels) < pod.MaxActiveModels
|
||||
}
|
||||
|
||||
func criticalRequestPredicate(req *LLMRequest, pod *backend.PodMetrics) bool {
|
||||
return req.Critical
|
||||
}
|
||||
|
||||
func noQueueAndLessThanKVCacheThresholdPredicate(queueThreshold int, kvCacheThreshold float64) podPredicate {
|
||||
return func(req *LLMRequest, pod *backend.PodMetrics) bool {
|
||||
return pod.WaitingQueueSize <= queueThreshold && pod.KVCacheUsagePercent <= kvCacheThreshold
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,158 @@
|
||||
/*
|
||||
Copyright 2025 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Package scheduling implements request scheduling algorithms.
|
||||
package scheduling
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"strings"
|
||||
|
||||
"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-load-balancer/least_busy/backend"
|
||||
"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-load-balancer/least_busy/backend/vllm"
|
||||
|
||||
"github.com/prometheus/common/expfmt"
|
||||
)
|
||||
|
||||
const (
|
||||
// TODO(https://github.com/kubernetes-sigs/gateway-api-inference-extension/issues/16) Make this configurable.
|
||||
kvCacheThreshold = 0.8
|
||||
// TODO(https://github.com/kubernetes-sigs/gateway-api-inference-extension/issues/16) Make this configurable.
|
||||
queueThresholdCritical = 5
|
||||
// TODO(https://github.com/kubernetes-sigs/gateway-api-inference-extension/issues/16) Make this configurable.
|
||||
// the threshold for queued requests to be considered low below which we can prioritize LoRA affinity.
|
||||
// The value of 50 is arrived heuristicically based on experiments.
|
||||
queueingThresholdLoRA = 50
|
||||
)
|
||||
|
||||
var (
|
||||
defaultFilter = &filter{
|
||||
name: "critical request",
|
||||
filter: toFilterFunc(criticalRequestPredicate),
|
||||
nextOnSuccess: lowLatencyFilter,
|
||||
nextOnFailure: sheddableRequestFilter,
|
||||
}
|
||||
|
||||
// queueLoRAAndKVCacheFilter applied least queue -> low cost lora -> least KV Cache filter
|
||||
queueLoRAAndKVCacheFilter = &filter{
|
||||
name: "least queuing",
|
||||
filter: leastQueuingFilterFunc,
|
||||
nextOnSuccessOrFailure: &filter{
|
||||
name: "low cost LoRA",
|
||||
filter: toFilterFunc(lowLoRACostPredicate),
|
||||
nextOnSuccessOrFailure: &filter{
|
||||
name: "least KV cache percent",
|
||||
filter: leastKVCacheFilterFunc,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
// queueAndKVCacheFilter applies least queue followed by least KV Cache filter
|
||||
queueAndKVCacheFilter = &filter{
|
||||
name: "least queuing",
|
||||
filter: leastQueuingFilterFunc,
|
||||
nextOnSuccessOrFailure: &filter{
|
||||
name: "least KV cache percent",
|
||||
filter: leastKVCacheFilterFunc,
|
||||
},
|
||||
}
|
||||
|
||||
lowLatencyFilter = &filter{
|
||||
name: "low queueing filter",
|
||||
filter: toFilterFunc((lowQueueingPodPredicate)),
|
||||
nextOnSuccess: &filter{
|
||||
name: "affinity LoRA",
|
||||
filter: toFilterFunc(loRAAffinityPredicate),
|
||||
nextOnSuccess: queueAndKVCacheFilter,
|
||||
nextOnFailure: &filter{
|
||||
name: "can accept LoRA Adapter",
|
||||
filter: toFilterFunc(canAcceptNewLoraPredicate),
|
||||
nextOnSuccessOrFailure: queueAndKVCacheFilter,
|
||||
},
|
||||
},
|
||||
nextOnFailure: queueLoRAAndKVCacheFilter,
|
||||
}
|
||||
|
||||
sheddableRequestFilter = &filter{
|
||||
// When there is at least one model server that's not queuing requests, and still has KV
|
||||
// cache below a certain threshold, we consider this model server has capacity to handle
|
||||
// a sheddable request without impacting critical requests.
|
||||
name: "has capacity for sheddable requests",
|
||||
filter: toFilterFunc(noQueueAndLessThanKVCacheThresholdPredicate(queueThresholdCritical, kvCacheThreshold)),
|
||||
nextOnSuccess: queueLoRAAndKVCacheFilter,
|
||||
// If all pods are queuing or running above the KVCache threshold, we drop the sheddable
|
||||
// request to make room for critical requests.
|
||||
nextOnFailure: &filter{
|
||||
name: "drop request",
|
||||
filter: func(req *LLMRequest, pods []*backend.PodMetrics) ([]*backend.PodMetrics, error) {
|
||||
// api.LogDebugf("Dropping request %v", req)
|
||||
return []*backend.PodMetrics{}, errors.New("dropping request due to limited backend resources")
|
||||
},
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
func NewScheduler(pm []*backend.PodMetrics) *Scheduler {
|
||||
|
||||
return &Scheduler{
|
||||
podMetrics: pm,
|
||||
filter: defaultFilter,
|
||||
}
|
||||
}
|
||||
|
||||
type Scheduler struct {
|
||||
podMetrics []*backend.PodMetrics
|
||||
filter Filter
|
||||
}
|
||||
|
||||
// Schedule finds the target pod based on metrics and the requested lora adapter.
|
||||
func (s *Scheduler) Schedule(req *LLMRequest) (targetPod backend.Pod, err error) {
|
||||
pods, err := s.filter.Filter(req, s.podMetrics)
|
||||
if err != nil || len(pods) == 0 {
|
||||
return backend.Pod{}, fmt.Errorf("failed to apply filter, resulted %v pods: %w", len(pods), err)
|
||||
}
|
||||
i := rand.Intn(len(pods))
|
||||
return pods[i].Pod, nil
|
||||
}
|
||||
|
||||
func GetScheduler(hostMetrics map[string]string) (*Scheduler, error) {
|
||||
if len(hostMetrics) == 0 {
|
||||
return nil, errors.New("backend is not support llm scheduling")
|
||||
}
|
||||
var pms []*backend.PodMetrics
|
||||
for addr, metric := range hostMetrics {
|
||||
parser := expfmt.TextParser{}
|
||||
metricFamilies, err := parser.TextToMetricFamilies(strings.NewReader(metric))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
pm := &backend.PodMetrics{
|
||||
Pod: backend.Pod{
|
||||
Name: addr,
|
||||
Address: addr,
|
||||
},
|
||||
Metrics: backend.Metrics{},
|
||||
}
|
||||
pm, err = vllm.PromToPodMetrics(metricFamilies, pm)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
pms = append(pms, pm)
|
||||
}
|
||||
return NewScheduler(pms), nil
|
||||
}
|
||||
@@ -0,0 +1,7 @@
|
||||
package scheduling
|
||||
|
||||
// LLMRequest is a structured representation of the fields we parse out of the LLMRequest body.
|
||||
type LLMRequest struct {
|
||||
Model string
|
||||
Critical bool
|
||||
}
|
||||
82
plugins/wasm-go/extensions/ai-load-balancer/main.go
Normal file
82
plugins/wasm-go/extensions/ai-load-balancer/main.go
Normal file
@@ -0,0 +1,82 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
|
||||
"github.com/higress-group/wasm-go/pkg/wrapper"
|
||||
"github.com/tidwall/gjson"
|
||||
|
||||
global_least_request "github.com/alibaba/higress/plugins/wasm-go/extensions/ai-load-balancer/global_least_request"
|
||||
least_busy "github.com/alibaba/higress/plugins/wasm-go/extensions/ai-load-balancer/least_busy"
|
||||
prefix_cache "github.com/alibaba/higress/plugins/wasm-go/extensions/ai-load-balancer/prefix_cache"
|
||||
)
|
||||
|
||||
func main() {}
|
||||
|
||||
func init() {
|
||||
wrapper.SetCtx(
|
||||
"ai-load-balancer",
|
||||
wrapper.ParseConfig(parseConfig),
|
||||
wrapper.ProcessRequestHeaders(onHttpRequestHeaders),
|
||||
wrapper.ProcessRequestBody(onHttpRequestBody),
|
||||
wrapper.ProcessResponseHeaders(onHttpResponseHeaders),
|
||||
wrapper.ProcessStreamingResponseBody(onHttpStreamingResponseBody),
|
||||
wrapper.ProcessResponseBody(onHttpResponseBody),
|
||||
)
|
||||
}
|
||||
|
||||
type LoadBalancer interface {
|
||||
HandleHttpRequestHeaders(ctx wrapper.HttpContext) types.Action
|
||||
HandleHttpRequestBody(ctx wrapper.HttpContext, body []byte) types.Action
|
||||
HandleHttpResponseHeaders(ctx wrapper.HttpContext) types.Action
|
||||
HandleHttpStreamingResponseBody(ctx wrapper.HttpContext, data []byte, endOfStream bool) []byte
|
||||
HandleHttpResponseBody(ctx wrapper.HttpContext, body []byte) types.Action
|
||||
}
|
||||
|
||||
type Config struct {
|
||||
policy string
|
||||
lb LoadBalancer
|
||||
}
|
||||
|
||||
const (
|
||||
LeastBusyLoadBalancerPolicy = "least_busy"
|
||||
GlobalLeastRequestLoadBalancerPolicy = "global_least_request"
|
||||
PrefixCache = "prefix_cache"
|
||||
)
|
||||
|
||||
func parseConfig(json gjson.Result, config *Config) error {
|
||||
config.policy = json.Get("lb_policy").String()
|
||||
var err error
|
||||
switch config.policy {
|
||||
case LeastBusyLoadBalancerPolicy:
|
||||
config.lb, err = least_busy.NewLeastBusyLoadBalancer(json.Get("lb_config"))
|
||||
case GlobalLeastRequestLoadBalancerPolicy:
|
||||
config.lb, err = global_least_request.NewGlobalLeastRequestLoadBalancer(json.Get("lb_config"))
|
||||
case PrefixCache:
|
||||
config.lb, err = prefix_cache.NewPrefixCacheLoadBalancer(json.Get("lb_config"))
|
||||
default:
|
||||
err = fmt.Errorf("lb_policy %s is not supported", config.policy)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func onHttpRequestHeaders(ctx wrapper.HttpContext, config Config) types.Action {
|
||||
return config.lb.HandleHttpRequestHeaders(ctx)
|
||||
}
|
||||
|
||||
func onHttpRequestBody(ctx wrapper.HttpContext, config Config, body []byte) types.Action {
|
||||
return config.lb.HandleHttpRequestBody(ctx, body)
|
||||
}
|
||||
|
||||
func onHttpResponseHeaders(ctx wrapper.HttpContext, config Config) types.Action {
|
||||
return config.lb.HandleHttpResponseHeaders(ctx)
|
||||
}
|
||||
|
||||
func onHttpStreamingResponseBody(ctx wrapper.HttpContext, config Config, data []byte, endOfStream bool) []byte {
|
||||
return config.lb.HandleHttpStreamingResponseBody(ctx, data, endOfStream)
|
||||
}
|
||||
|
||||
func onHttpResponseBody(ctx wrapper.HttpContext, config Config, body []byte) types.Action {
|
||||
return config.lb.HandleHttpResponseBody(ctx, body)
|
||||
}
|
||||
@@ -0,0 +1,302 @@
|
||||
package prefix_cache
|
||||
|
||||
import (
|
||||
"crypto/sha1"
|
||||
"encoding/hex"
|
||||
"errors"
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"strings"
|
||||
|
||||
"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-load-balancer/utils"
|
||||
|
||||
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
|
||||
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
|
||||
"github.com/higress-group/wasm-go/pkg/log"
|
||||
"github.com/higress-group/wasm-go/pkg/wrapper"
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/resp"
|
||||
)
|
||||
|
||||
const (
|
||||
RedisKeyFormat = "higress:global_least_request_table:%s:%s"
|
||||
RedisLua = `-- hex string => bytes
|
||||
local function hex_to_bytes(hex)
|
||||
local bytes = {}
|
||||
for i = 1, #hex, 2 do
|
||||
local byte_str = hex:sub(i, i+1)
|
||||
local byte_val = tonumber(byte_str, 16)
|
||||
table.insert(bytes, byte_val)
|
||||
end
|
||||
return bytes
|
||||
end
|
||||
|
||||
-- bytes => hex string
|
||||
local function bytes_to_hex(bytes)
|
||||
local result = ""
|
||||
for _, byte in ipairs(bytes) do
|
||||
result = result .. string.format("%02X", byte)
|
||||
end
|
||||
return result
|
||||
end
|
||||
|
||||
-- byte XOR
|
||||
local function byte_xor(a, b)
|
||||
local result = 0
|
||||
for i = 0, 7 do
|
||||
local bit_val = 2^i
|
||||
if ((a % (bit_val * 2)) >= bit_val) ~= ((b % (bit_val * 2)) >= bit_val) then
|
||||
result = result + bit_val
|
||||
end
|
||||
end
|
||||
return result
|
||||
end
|
||||
|
||||
-- hex string XOR
|
||||
local function hex_xor(a, b)
|
||||
if #a ~= #b then
|
||||
error("Hex strings must be of equal length, first is " .. a .. " second is " .. b)
|
||||
end
|
||||
|
||||
local a_bytes = hex_to_bytes(a)
|
||||
local b_bytes = hex_to_bytes(b)
|
||||
|
||||
local result_bytes = {}
|
||||
for i = 1, #a_bytes do
|
||||
table.insert(result_bytes, byte_xor(a_bytes[i], b_bytes[i]))
|
||||
end
|
||||
|
||||
return bytes_to_hex(result_bytes)
|
||||
end
|
||||
|
||||
-- check host whether healthy
|
||||
local function is_healthy(addr)
|
||||
for i = 4, #KEYS do
|
||||
if addr == KEYS[i] then
|
||||
return true
|
||||
end
|
||||
end
|
||||
return false
|
||||
end
|
||||
|
||||
local target = ""
|
||||
local key = ""
|
||||
local current_key = ""
|
||||
local count = #ARGV
|
||||
local ttl = KEYS[1]
|
||||
local hset_key = KEYS[2]
|
||||
local default_target = KEYS[3]
|
||||
|
||||
if count == 0 then
|
||||
return target
|
||||
end
|
||||
|
||||
-- find longest prefix
|
||||
local index = 1
|
||||
while index <= count do
|
||||
if current_key == "" then
|
||||
current_key = ARGV[index]
|
||||
else
|
||||
current_key = hex_xor(current_key, ARGV[index])
|
||||
end
|
||||
if redis.call("EXISTS", current_key) == 1 then
|
||||
key = current_key
|
||||
local tmp_target = redis.call("GET", key)
|
||||
if not is_healthy(tmp_target) then
|
||||
break
|
||||
end
|
||||
target = tmp_target
|
||||
-- update ttl for exist keys
|
||||
redis.call("EXPIRE", key, ttl)
|
||||
index = index + 1
|
||||
else
|
||||
break
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
-- global least request
|
||||
if target == "" then
|
||||
index = 1
|
||||
local current_count = 0
|
||||
target = default_target
|
||||
if redis.call('HEXISTS', hset_key, target) ~= 0 then
|
||||
current_count = redis.call('HGET', hset_key, target)
|
||||
local hash = redis.call('HGETALL', hset_key)
|
||||
for i = 1, #hash, 2 do
|
||||
local addr = hash[i]
|
||||
local count = hash[i+1]
|
||||
if count < current_count and is_healthy(addr) then
|
||||
target = addr
|
||||
current_count = count
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
-- update request count
|
||||
redis.call("HINCRBY", hset_key, target, 1)
|
||||
|
||||
-- add tree-path
|
||||
while index <= count do
|
||||
if key == "" then
|
||||
key = ARGV[index]
|
||||
else
|
||||
key = hex_xor(key, ARGV[index])
|
||||
end
|
||||
redis.call("SET", key, target)
|
||||
redis.call("EXPIRE", key, ttl)
|
||||
index = index + 1
|
||||
end
|
||||
|
||||
return target`
|
||||
)
|
||||
|
||||
type PrefixCacheLoadBalancer struct {
|
||||
redisClient wrapper.RedisClient
|
||||
redisKeyTTL int
|
||||
}
|
||||
|
||||
func NewPrefixCacheLoadBalancer(json gjson.Result) (PrefixCacheLoadBalancer, error) {
|
||||
lb := PrefixCacheLoadBalancer{}
|
||||
serviceFQDN := json.Get("serviceFQDN").String()
|
||||
servicePort := json.Get("servicePort").Int()
|
||||
if serviceFQDN == "" || servicePort == 0 {
|
||||
log.Errorf("invalid redis service, serviceFQDN: %s, servicePort: %d", serviceFQDN, servicePort)
|
||||
return lb, errors.New("invalid redis service config")
|
||||
}
|
||||
lb.redisClient = wrapper.NewRedisClusterClient(wrapper.FQDNCluster{
|
||||
FQDN: serviceFQDN,
|
||||
Port: servicePort,
|
||||
})
|
||||
username := json.Get("username").String()
|
||||
password := json.Get("password").String()
|
||||
timeout := json.Get("timeout").Int()
|
||||
if timeout == 0 {
|
||||
timeout = 3000
|
||||
}
|
||||
// database default is 0
|
||||
database := json.Get("database").Int()
|
||||
if json.Get("redisKeyTTL").Int() == 0 {
|
||||
lb.redisKeyTTL = int(json.Get("redisKeyTTL").Int())
|
||||
} else {
|
||||
lb.redisKeyTTL = 1800
|
||||
}
|
||||
return lb, lb.redisClient.Init(username, password, int64(timeout), wrapper.WithDataBase(int(database)))
|
||||
}
|
||||
|
||||
func (lb PrefixCacheLoadBalancer) HandleHttpRequestHeaders(ctx wrapper.HttpContext) types.Action {
|
||||
// If return types.ActionContinue, SetUpstreamOverrideHost will not take effect
|
||||
return types.HeaderStopIteration
|
||||
}
|
||||
|
||||
func (lb PrefixCacheLoadBalancer) HandleHttpRequestBody(ctx wrapper.HttpContext, body []byte) types.Action {
|
||||
var err error
|
||||
routeName, err := utils.GetRouteName()
|
||||
if err != nil || routeName == "" {
|
||||
ctx.SetContext("error", true)
|
||||
return types.ActionContinue
|
||||
} else {
|
||||
ctx.SetContext("routeName", routeName)
|
||||
}
|
||||
clusterName, err := utils.GetClusterName()
|
||||
if err != nil || clusterName == "" {
|
||||
ctx.SetContext("error", true)
|
||||
return types.ActionContinue
|
||||
} else {
|
||||
ctx.SetContext("clusterName", clusterName)
|
||||
}
|
||||
hostInfos, err := proxywasm.GetUpstreamHosts()
|
||||
if err != nil {
|
||||
ctx.SetContext("error", true)
|
||||
log.Error("get upstream cluster endpoints failed")
|
||||
return types.ActionContinue
|
||||
}
|
||||
healthyHosts := []string{}
|
||||
for _, hostInfo := range hostInfos {
|
||||
if gjson.Get(hostInfo[1], "health_status").String() == "Healthy" {
|
||||
healthyHosts = append(healthyHosts, hostInfo[0])
|
||||
}
|
||||
}
|
||||
if len(healthyHosts) == 0 {
|
||||
log.Info("upstream cluster has no healthy endpoints")
|
||||
return types.ActionContinue
|
||||
}
|
||||
defaultHost := healthyHosts[rand.Intn(len(healthyHosts))]
|
||||
params := []interface{}{}
|
||||
rawStr := ""
|
||||
messages := gjson.GetBytes(body, "messages").Array()
|
||||
for index, obj := range messages {
|
||||
if !obj.Get("role").Exists() || !obj.Get("content").Exists() {
|
||||
ctx.SetContext("error", true)
|
||||
log.Info("cannot extract role or content from request body, skip llm load balancing")
|
||||
return types.ActionContinue
|
||||
}
|
||||
role := obj.Get("role").String()
|
||||
content := obj.Get("content").String()
|
||||
rawStr += role + ":" + content
|
||||
if role == "user" || index == len(messages)-1 {
|
||||
sha1Str := computeSHA1(rawStr)
|
||||
params = append(params, sha1Str)
|
||||
rawStr = ""
|
||||
}
|
||||
}
|
||||
if len(params) == 0 {
|
||||
return types.ActionContinue
|
||||
}
|
||||
keys := []interface{}{lb.redisKeyTTL, fmt.Sprintf(RedisKeyFormat, routeName, clusterName), defaultHost}
|
||||
for _, v := range healthyHosts {
|
||||
keys = append(keys, v)
|
||||
}
|
||||
err = lb.redisClient.Eval(RedisLua, len(keys), keys, params, func(response resp.Value) {
|
||||
defer proxywasm.ResumeHttpRequest()
|
||||
if err := response.Error(); err != nil {
|
||||
ctx.SetContext("error", true)
|
||||
log.Errorf("Redis eval failed: %+v", err)
|
||||
return
|
||||
}
|
||||
hostSelected := response.String()
|
||||
if err := proxywasm.SetUpstreamOverrideHost([]byte(hostSelected)); err != nil {
|
||||
ctx.SetContext("error", true)
|
||||
log.Errorf("override upstream host failed, fallback to default lb policy, error informations: %+v", err)
|
||||
}
|
||||
log.Debugf("host_selected: %s", hostSelected)
|
||||
ctx.SetContext("host_selected", hostSelected)
|
||||
})
|
||||
if err != nil {
|
||||
ctx.SetContext("error", true)
|
||||
return types.ActionContinue
|
||||
}
|
||||
return types.ActionPause
|
||||
}
|
||||
|
||||
func (lb PrefixCacheLoadBalancer) HandleHttpResponseHeaders(ctx wrapper.HttpContext) types.Action {
|
||||
return types.ActionContinue
|
||||
}
|
||||
|
||||
func (lb PrefixCacheLoadBalancer) HandleHttpStreamingResponseBody(ctx wrapper.HttpContext, data []byte, endOfStream bool) []byte {
|
||||
if endOfStream {
|
||||
isErr, _ := ctx.GetContext("error").(bool)
|
||||
if !isErr {
|
||||
routeName, _ := ctx.GetContext("routeName").(string)
|
||||
clusterName, _ := ctx.GetContext("clusterName").(string)
|
||||
host_selected, _ := ctx.GetContext("host_selected").(string)
|
||||
if host_selected == "" {
|
||||
log.Errorf("get host_selected failed")
|
||||
} else {
|
||||
lb.redisClient.HIncrBy(fmt.Sprintf(RedisKeyFormat, routeName, clusterName), host_selected, -1, nil)
|
||||
}
|
||||
}
|
||||
}
|
||||
return data
|
||||
}
|
||||
|
||||
func (lb PrefixCacheLoadBalancer) HandleHttpResponseBody(ctx wrapper.HttpContext, body []byte) types.Action {
|
||||
return types.ActionContinue
|
||||
}
|
||||
|
||||
func computeSHA1(data string) string {
|
||||
hasher := sha1.New()
|
||||
hasher.Write([]byte(data))
|
||||
return strings.ToUpper(hex.EncodeToString(hasher.Sum(nil)))
|
||||
}
|
||||
19
plugins/wasm-go/extensions/ai-load-balancer/utils/utils.go
Normal file
19
plugins/wasm-go/extensions/ai-load-balancer/utils/utils.go
Normal file
@@ -0,0 +1,19 @@
|
||||
package utils
|
||||
|
||||
import "github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
|
||||
|
||||
func GetRouteName() (string, error) {
|
||||
if raw, err := proxywasm.GetProperty([]string{"route_name"}); err != nil {
|
||||
return "", err
|
||||
} else {
|
||||
return string(raw), nil
|
||||
}
|
||||
}
|
||||
|
||||
func GetClusterName() (string, error) {
|
||||
if raw, err := proxywasm.GetProperty([]string{"cluster_name"}); err != nil {
|
||||
return "", err
|
||||
} else {
|
||||
return string(raw), nil
|
||||
}
|
||||
}
|
||||
@@ -30,19 +30,20 @@ description: AI 代理插件配置参考
|
||||
|
||||
`provider`的配置字段说明如下:
|
||||
|
||||
| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
|
||||
| ---------------------- | ---------------------- | -------- | ------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `type` | string | 必填 | - | AI 服务提供商名称 |
|
||||
| `apiTokens` | array of string | 非必填 | - | 用于在访问 AI 服务时进行认证的令牌。如果配置了多个 token,插件会在请求时随机进行选择。部分服务提供商只支持配置一个 token。 |
|
||||
| `timeout` | number | 非必填 | - | 访问 AI 服务的超时时间。单位为毫秒。默认值为 120000,即 2 分钟。此项配置目前仅用于获取上下文信息,并不影响实际转发大模型请求。 |
|
||||
| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
|
||||
| ---------------------- | ---------------------- | -------- | ------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `type` | string | 必填 | - | AI 服务提供商名称 |
|
||||
| `apiTokens` | array of string | 非必填 | - | 用于在访问 AI 服务时进行认证的令牌。如果配置了多个 token,插件会在请求时随机进行选择。部分服务提供商只支持配置一个 token。 |
|
||||
| `timeout` | number | 非必填 | - | 访问 AI 服务的超时时间。单位为毫秒。默认值为 120000,即 2 分钟。此项配置目前仅用于获取上下文信息,并不影响实际转发大模型请求。 |
|
||||
| `modelMapping` | map of string | 非必填 | - | AI 模型映射表,用于将请求中的模型名称映射为服务提供商支持模型名称。<br/>1. 支持前缀匹配。例如用 "gpt-3-\*" 匹配所有名称以“gpt-3-”开头的模型;<br/>2. 支持使用 "\*" 为键来配置通用兜底映射关系;<br/>3. 如果映射的目标名称为空字符串 "",则表示保留原模型名称。<br/>4. 支持以 `~` 前缀使用正则匹配。例如用 "~gpt(.\*)" 匹配所有以 "gpt" 开头的模型并支持在目标模型中使用 capture group 引用匹配到的内容。示例: "~gpt(.\*): openai/gpt\$1" |
|
||||
| `protocol` | string | 非必填 | - | 插件对外提供的 API 接口契约。目前支持以下取值:openai(默认值,使用 OpenAI 的接口契约)、original(使用目标服务提供商的原始接口契约) |
|
||||
| `context` | object | 非必填 | - | 配置 AI 对话上下文信息 |
|
||||
| `customSettings` | array of customSetting | 非必填 | - | 为 AI 请求指定覆盖或者填充参数 |
|
||||
| `failover` | object | 非必填 | - | 配置 apiToken 的 failover 策略,当 apiToken 不可用时,将其移出 apiToken 列表,待健康检测通过后重新添加回 apiToken 列表 |
|
||||
| `retryOnFailure` | object | 非必填 | - | 当请求失败时立即进行重试 |
|
||||
| `reasoningContentMode` | string | 非必填 | - | 如何处理大模型服务返回的推理内容。目前支持以下取值:passthrough(正常输出推理内容)、ignore(不输出推理内容)、concat(将推理内容拼接在常规输出内容之前)。默认为 passthrough。仅支持通义千问服务。 |
|
||||
| `capabilities` | map of string | 非必填 | - | 部分 provider 的部分 ai 能力原生兼容 openai/v1 格式,不需要重写,可以直接转发,通过此配置项指定来开启转发, key 表示的是采用的厂商协议能力,values 表示的真实的厂商该能力的 api path, 厂商协议能力当前支持: openai/v1/chatcompletions, openai/v1/embeddings, openai/v1/imagegeneration, openai/v1/audiospeech, cohere/v1/rerank |
|
||||
| `protocol` | string | 非必填 | - | 插件对外提供的 API 接口契约。目前支持以下取值:openai(默认值,使用 OpenAI 的接口契约)、original(使用目标服务提供商的原始接口契约) |
|
||||
| `context` | object | 非必填 | - | 配置 AI 对话上下文信息 |
|
||||
| `customSettings` | array of customSetting | 非必填 | - | 为 AI 请求指定覆盖或者填充参数 |
|
||||
| `failover` | object | 非必填 | - | 配置 apiToken 的 failover 策略,当 apiToken 不可用时,将其移出 apiToken 列表,待健康检测通过后重新添加回 apiToken 列表 |
|
||||
| `retryOnFailure` | object | 非必填 | - | 当请求失败时立即进行重试 |
|
||||
| `reasoningContentMode` | string | 非必填 | - | 如何处理大模型服务返回的推理内容。目前支持以下取值:passthrough(正常输出推理内容)、ignore(不输出推理内容)、concat(将推理内容拼接在常规输出内容之前)。默认为 passthrough。仅支持通义千问服务。 |
|
||||
| `capabilities` | map of string | 非必填 | - | 部分 provider 的部分 ai 能力原生兼容 openai/v1 格式,不需要重写,可以直接转发,通过此配置项指定来开启转发, key 表示的是采用的厂商协议能力,values 表示的真实的厂商该能力的 api path, 厂商协议能力当前支持: openai/v1/chatcompletions, openai/v1/embeddings, openai/v1/imagegeneration, openai/v1/audiospeech, cohere/v1/rerank |
|
||||
| `subPath` | string | 非必填 | - | 如果配置了subPath,将会先移除请求path中该前缀,再进行后续处理 |
|
||||
|
||||
`context`的配置字段说明如下:
|
||||
|
||||
@@ -272,8 +273,19 @@ Google Vertex AI 所对应的 type 为 vertex。它特有的配置字段如下
|
||||
| `vertexRegion` | string | 必填 | - | Google Cloud 区域(如 us-central1, europe-west4 等),用于构建 Vertex API 地址 |
|
||||
| `vertexProjectId` | string | 必填 | - | Google Cloud 项目 ID,用于标识目标 GCP 项目 |
|
||||
| `vertexAuthServiceName` | string | 必填 | - | 用于 OAuth2 认证的服务名称,该服务为了访问oauth2.googleapis.com |
|
||||
| `vertexGeminiSafetySetting` | map of string | 非必填 | - | Gemini 模型的内容安全过滤设置。 |
|
||||
| `vertexTokenRefreshAhead` | number | 非必填 | - | Vertex access token刷新提前时间(单位秒) |
|
||||
| `geminiSafetySetting` | map of string | 非必填 | - | Gemini AI 内容过滤和安全级别设定。参考[Safety settings](https://ai.google.dev/gemini-api/docs/safety-settings) |
|
||||
| `vertexTokenRefreshAhead` | number | 非必填 | - | Vertex access token刷新提前时间(单位秒) |
|
||||
|
||||
#### AWS Bedrock
|
||||
|
||||
AWS Bedrock 所对应的 type 为 bedrock。它特有的配置字段如下:
|
||||
|
||||
| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
|
||||
|---------------------------|--------|------|-----|------------------------------|
|
||||
| `awsAccessKey` | string | 必填 | - | AWS Access Key,用于身份认证 |
|
||||
| `awsSecretKey` | string | 必填 | - | AWS Secret Access Key,用于身份认证 |
|
||||
| `awsRegion` | string | 必填 | - | AWS 区域,例如:us-east-1 |
|
||||
| `bedrockAdditionalFields` | map | 非必填 | - | Bedrock 额外模型请求参数 |
|
||||
|
||||
## 用法示例
|
||||
|
||||
@@ -1418,7 +1430,7 @@ provider:
|
||||
}
|
||||
```
|
||||
|
||||
### 使用 OpenAI 协议代理 gemini 服务
|
||||
### 使用 OpenAI 协议代理 Gemini 服务
|
||||
|
||||
**配置信息**
|
||||
|
||||
@@ -1704,6 +1716,61 @@ provider:
|
||||
}
|
||||
```
|
||||
|
||||
### 使用 OpenAI 协议代理 AWS Bedrock 服务
|
||||
|
||||
**配置信息**
|
||||
|
||||
```yaml
|
||||
provider:
|
||||
type: bedrock
|
||||
awsAccessKey: "YOUR_AWS_ACCESS_KEY_ID"
|
||||
awsSecretKey: "YOUR_AWS_SECRET_ACCESS_KEY"
|
||||
awsRegion: "YOUR_AWS_REGION"
|
||||
bedrockAdditionalFields:
|
||||
top_k: 200
|
||||
```
|
||||
|
||||
**请求示例**
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "arn:aws:bedrock:us-west-2::foundation-model/anthropic.claude-3-5-haiku-20241022-v1:0",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "你好,你是谁?"
|
||||
}
|
||||
],
|
||||
"stream": false
|
||||
}
|
||||
```
|
||||
|
||||
**响应示例**
|
||||
|
||||
```json
|
||||
{
|
||||
"id": "dc5812e2-6a62-49d6-829e-5c327b15e4e2",
|
||||
"choices": [
|
||||
{
|
||||
"index": 0,
|
||||
"message": {
|
||||
"role": "assistant",
|
||||
"content": "你好!我是Claude,一个由Anthropic开发的AI助手。很高兴认识你!我的目标是以诚实、有益且有意义的方式与人类交流。我会尽力提供准确和有帮助的信息,同时保持诚实和正直。请问我今天能为你做些什么呢?"
|
||||
},
|
||||
"finish_reason": "stop"
|
||||
}
|
||||
],
|
||||
"created": 1749657608,
|
||||
"model": "arn:aws:bedrock:us-west-2::foundation-model/anthropic.claude-3-5-haiku-20241022-v1:0",
|
||||
"object": "chat.completion",
|
||||
"usage": {
|
||||
"prompt_tokens": 16,
|
||||
"completion_tokens": 101,
|
||||
"total_tokens": 117
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
## 完整配置示例
|
||||
|
||||
|
||||
@@ -29,15 +29,16 @@ Plugin execution priority: `100`
|
||||
|
||||
**Details for the `provider` configuration fields:**
|
||||
|
||||
| Name | Data Type | Requirement | Default | Description |
|
||||
| -------------- | --------------- | -------- | ------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `type` | string | Required | - | Name of the AI service provider |
|
||||
| `apiTokens` | array of string | Optional | - | Tokens used for authentication when accessing AI services. If multiple tokens are configured, the plugin randomly selects one for each request. Some service providers only support configuring a single token. |
|
||||
| `timeout` | number | Optional | - | Timeout for accessing AI services, in milliseconds. The default value is 120000, which equals 2 minutes. Only used when retrieving context data. Won't affect the request forwarded to the LLM upstream. |
|
||||
| `modelMapping` | map of string | Optional | - | Mapping table for AI models, used to map model names in requests to names supported by the service provider.<br/>1. Supports prefix matching. For example, "gpt-3-\*" matches all model names starting with “gpt-3-”;<br/>2. Supports using "\*" as a key for a general fallback mapping;<br/>3. If the mapped target name is an empty string "", the original model name is preserved. |
|
||||
| `protocol` | string | Optional | - | API contract provided by the plugin. Currently supports the following values: openai (default, uses OpenAI's interface contract), original (uses the raw interface contract of the target service provider) |
|
||||
| `context` | object | Optional | - | Configuration for AI conversation context information |
|
||||
| `customSettings` | array of customSetting | Optional | - | Specifies overrides or fills parameters for AI requests |
|
||||
| Name | Data Type | Requirement | Default | Description |
|
||||
| -------------- | --------------- | -------- | ------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `type` | string | Required | - | Name of the AI service provider |
|
||||
| `apiTokens` | array of string | Optional | - | Tokens used for authentication when accessing AI services. If multiple tokens are configured, the plugin randomly selects one for each request. Some service providers only support configuring a single token. |
|
||||
| `timeout` | number | Optional | - | Timeout for accessing AI services, in milliseconds. The default value is 120000, which equals 2 minutes. Only used when retrieving context data. Won't affect the request forwarded to the LLM upstream. |
|
||||
| `modelMapping` | map of string | Optional | - | Mapping table for AI models, used to map model names in requests to names supported by the service provider.<br/>1. Supports prefix matching. For example, "gpt-3-\*" matches all model names starting with “gpt-3-”;<br/>2. Supports using "\*" as a key for a general fallback mapping;<br/>3. If the mapped target name is an empty string "", the original model name is preserved. |
|
||||
| `protocol` | string | Optional | - | API contract provided by the plugin. Currently supports the following values: openai (default, uses OpenAI's interface contract), original (uses the raw interface contract of the target service provider) |
|
||||
| `context` | object | Optional | - | Configuration for AI conversation context information |
|
||||
| `customSettings` | array of customSetting | Optional | - | Specifies overrides or fills parameters for AI requests |
|
||||
| `subPath` | string | Optional | - | If subPath is configured, the prefix will be removed from the request path before further processing. |
|
||||
|
||||
**Details for the `context` configuration fields:**
|
||||
|
||||
@@ -220,6 +221,17 @@ For Vertex, the corresponding `type` is `vertex`. Its unique configuration field
|
||||
| `vertexGeminiSafetySetting` | map of string | Optional | - | Gemini model content safety filtering settings. |
|
||||
| `vertexTokenRefreshAhead` | number | Optional | - | Vertex access token refresh ahead time in seconds |
|
||||
|
||||
#### AWS Bedrock
|
||||
|
||||
For AWS Bedrock, the corresponding `type` is `bedrock`. Its unique configuration field is:
|
||||
|
||||
| Name | Data Type | Requirement | Default | Description |
|
||||
|---------------------------|-----------|-------------|---------|---------------------------------------------------------|
|
||||
| `awsAccessKey` | string | Required | - | AWS Access Key used for authentication |
|
||||
| `awsSecretKey` | string | Required | - | AWS Secret Access Key used for authentication |
|
||||
| `awsRegion` | string | Required | - | AWS region, e.g., us-east-1 |
|
||||
| `bedrockAdditionalFields` | map | Optional | - | Additional inference parameters that the model supports |
|
||||
|
||||
## Usage Examples
|
||||
|
||||
### Using OpenAI Protocol Proxy for Azure OpenAI Service
|
||||
@@ -1481,6 +1493,57 @@ provider:
|
||||
}
|
||||
```
|
||||
|
||||
### Utilizing OpenAI Protocol Proxy for AWS Bedrock Services
|
||||
**Configuration Information**
|
||||
```yaml
|
||||
provider:
|
||||
type: bedrock
|
||||
awsAccessKey: "YOUR_AWS_ACCESS_KEY_ID"
|
||||
awsSecretKey: "YOUR_AWS_SECRET_ACCESS_KEY"
|
||||
awsRegion: "YOUR_AWS_REGION"
|
||||
bedrockAdditionalFields:
|
||||
top_k: 200
|
||||
```
|
||||
|
||||
**Request Example**
|
||||
```json
|
||||
{
|
||||
"model": "arn:aws:bedrock:us-west-2::foundation-model/anthropic.claude-3-5-haiku-20241022-v1:0",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "who are you"
|
||||
}
|
||||
],
|
||||
"stream": false
|
||||
}
|
||||
```
|
||||
|
||||
**Response Example**
|
||||
```json
|
||||
{
|
||||
"id": "d52da49d-daf3-49d9-a105-0b527481fe14",
|
||||
"choices": [
|
||||
{
|
||||
"index": 0,
|
||||
"message": {
|
||||
"role": "assistant",
|
||||
"content": "I'm Claude, an AI created by Anthropic. I aim to be helpful, honest, and harmless. I won't pretend to be human, and I'll always try to be direct and truthful about what I am and what I can do."
|
||||
},
|
||||
"finish_reason": "stop"
|
||||
}
|
||||
],
|
||||
"created": 1749659050,
|
||||
"model": "arn:aws:bedrock:us-west-2::foundation-model/anthropic.claude-3-5-haiku-20241022-v1:0",
|
||||
"object": "chat.completion",
|
||||
"usage": {
|
||||
"prompt_tokens": 10,
|
||||
"completion_tokens": 57,
|
||||
"total_tokens": 67
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Full Configuration Example
|
||||
|
||||
### Kubernetes Example
|
||||
|
||||
@@ -161,7 +161,8 @@ func onHttpRequestBody(ctx wrapper.HttpContext, pluginConfig config.PluginConfig
|
||||
if settingErr != nil {
|
||||
log.Errorf("failed to replace request body by custom settings: %v", settingErr)
|
||||
}
|
||||
if providerConfig.IsOpenAIProtocol() {
|
||||
// 仅 /v1/chat/completions 和 /v1/completions 接口支持 stream_options 参数
|
||||
if providerConfig.IsOpenAIProtocol() && (apiName == provider.ApiNameChatCompletion || apiName == provider.ApiNameCompletion) {
|
||||
newBody = normalizeOpenAiRequestBody(newBody)
|
||||
}
|
||||
log.Debugf("[onHttpRequestBody] newBody=%s", newBody)
|
||||
@@ -315,7 +316,7 @@ func onHttpResponseBody(ctx wrapper.HttpContext, pluginConfig config.PluginConfi
|
||||
func normalizeOpenAiRequestBody(body []byte) []byte {
|
||||
var err error
|
||||
// Default setting include_usage.
|
||||
if gjson.GetBytes(body, "stream").Bool() {
|
||||
if gjson.GetBytes(body, "stream").Bool() && (!gjson.GetBytes(body, "stream_options").Exists() || !gjson.GetBytes(body, "stream_options.include_usage").Exists()) {
|
||||
body, err = sjson.SetBytes(body, "stream_options.include_usage", true)
|
||||
if err != nil {
|
||||
log.Errorf("set include_usage failed, err:%s", err)
|
||||
@@ -379,6 +380,36 @@ func getApiName(path string) provider.ApiName {
|
||||
if strings.HasSuffix(path, "/v1/models") {
|
||||
return provider.ApiNameModels
|
||||
}
|
||||
if strings.HasSuffix(path, "/v1/fine_tuning/jobs") {
|
||||
return provider.ApiNameFineTuningJobs
|
||||
}
|
||||
if util.RegRetrieveFineTuningJobPath.MatchString(path) {
|
||||
return provider.ApiNameRetrieveFineTuningJob
|
||||
}
|
||||
if util.RegRetrieveFineTuningJobEventsPath.MatchString(path) {
|
||||
return provider.ApiNameFineTuningJobEvents
|
||||
}
|
||||
if util.RegRetrieveFineTuningJobCheckpointsPath.MatchString(path) {
|
||||
return provider.ApiNameFineTuningJobCheckpoints
|
||||
}
|
||||
if util.RegCancelFineTuningJobPath.MatchString(path) {
|
||||
return provider.ApiNameCancelFineTuningJob
|
||||
}
|
||||
if util.RegResumeFineTuningJobPath.MatchString(path) {
|
||||
return provider.ApiNameResumeFineTuningJob
|
||||
}
|
||||
if util.RegPauseFineTuningJobPath.MatchString(path) {
|
||||
return provider.ApiNamePauseFineTuningJob
|
||||
}
|
||||
if util.RegFineTuningCheckpointPermissionPath.MatchString(path) {
|
||||
return provider.ApiNameFineTuningCheckpointPermissions
|
||||
}
|
||||
if util.RegDeleteFineTuningCheckpointPermissionPath.MatchString(path) {
|
||||
return provider.ApiNameDeleteFineTuningCheckpointPermission
|
||||
}
|
||||
if strings.HasSuffix(path, "/v1/responses") {
|
||||
return provider.ApiNameResponses
|
||||
}
|
||||
// cohere style
|
||||
if strings.HasSuffix(path, "/v1/rerank") {
|
||||
return provider.ApiNameCohereV1Rerank
|
||||
|
||||
@@ -42,8 +42,7 @@ const (
|
||||
requestIdHeader = "X-Amzn-Requestid"
|
||||
)
|
||||
|
||||
type bedrockProviderInitializer struct {
|
||||
}
|
||||
type bedrockProviderInitializer struct{}
|
||||
|
||||
func (b *bedrockProviderInitializer) ValidateConfig(config *ProviderConfig) error {
|
||||
if len(config.awsAccessKey) == 0 || len(config.awsSecretKey) == 0 {
|
||||
@@ -104,7 +103,7 @@ func (b *bedrockProvider) convertEventFromBedrockToOpenAI(ctx wrapper.HttpContex
|
||||
chatChoice.Delta = &chatMessage{Content: bedrockEvent.Delta.Text}
|
||||
}
|
||||
if bedrockEvent.StopReason != nil {
|
||||
chatChoice.FinishReason = stopReasonBedrock2OpenAI(*bedrockEvent.StopReason)
|
||||
chatChoice.FinishReason = util.Ptr(stopReasonBedrock2OpenAI(*bedrockEvent.StopReason))
|
||||
}
|
||||
choices = append(choices, chatChoice)
|
||||
requestId := ctx.GetStringContext(requestIdHeader, "")
|
||||
@@ -118,7 +117,7 @@ func (b *bedrockProvider) convertEventFromBedrockToOpenAI(ctx wrapper.HttpContex
|
||||
}
|
||||
if bedrockEvent.Usage != nil {
|
||||
openAIFormattedChunk.Choices = choices[:0]
|
||||
openAIFormattedChunk.Usage = usage{
|
||||
openAIFormattedChunk.Usage = &usage{
|
||||
CompletionTokens: bedrockEvent.Usage.OutputTokens,
|
||||
PromptTokens: bedrockEvent.Usage.InputTokens,
|
||||
TotalTokens: bedrockEvent.Usage.TotalTokens,
|
||||
@@ -724,21 +723,34 @@ func (b *bedrockProvider) onChatCompletionRequestBody(ctx wrapper.HttpContext, b
|
||||
|
||||
func (b *bedrockProvider) buildBedrockTextGenerationRequest(origRequest *chatCompletionRequest, headers http.Header) ([]byte, error) {
|
||||
messages := make([]bedrockMessage, 0, len(origRequest.Messages))
|
||||
for i := range origRequest.Messages {
|
||||
messages = append(messages, chatMessage2BedrockMessage(origRequest.Messages[i]))
|
||||
systemMessages := make([]systemContentBlock, 0)
|
||||
|
||||
for _, msg := range origRequest.Messages {
|
||||
if msg.Role == roleSystem {
|
||||
systemMessages = append(systemMessages, systemContentBlock{Text: msg.StringContent()})
|
||||
} else {
|
||||
messages = append(messages, chatMessage2BedrockMessage(msg))
|
||||
}
|
||||
}
|
||||
|
||||
request := &bedrockTextGenRequest{
|
||||
System: systemMessages,
|
||||
Messages: messages,
|
||||
InferenceConfig: bedrockInferenceConfig{
|
||||
MaxTokens: origRequest.MaxTokens,
|
||||
Temperature: origRequest.Temperature,
|
||||
TopP: origRequest.TopP,
|
||||
},
|
||||
AdditionalModelRequestFields: map[string]interface{}{},
|
||||
AdditionalModelRequestFields: make(map[string]interface{}),
|
||||
PerformanceConfig: PerformanceConfiguration{
|
||||
Latency: "standard",
|
||||
},
|
||||
}
|
||||
|
||||
for key, value := range b.config.bedrockAdditionalFields {
|
||||
request.AdditionalModelRequestFields[key] = value
|
||||
}
|
||||
|
||||
requestBytes, err := json.Marshal(request)
|
||||
b.setAuthHeaders(requestBytes, headers)
|
||||
return requestBytes, err
|
||||
@@ -756,18 +768,19 @@ func (b *bedrockProvider) buildChatCompletionResponse(ctx wrapper.HttpContext, b
|
||||
Role: bedrockResponse.Output.Message.Role,
|
||||
Content: outputContent,
|
||||
},
|
||||
FinishReason: stopReasonBedrock2OpenAI(bedrockResponse.StopReason),
|
||||
FinishReason: util.Ptr(stopReasonBedrock2OpenAI(bedrockResponse.StopReason)),
|
||||
},
|
||||
}
|
||||
requestId := ctx.GetStringContext(requestIdHeader, "")
|
||||
modelId, _ := url.QueryUnescape(ctx.GetStringContext(ctxKeyFinalRequestModel, ""))
|
||||
return &chatCompletionResponse{
|
||||
Id: requestId,
|
||||
Created: time.Now().UnixMilli() / 1000,
|
||||
Model: ctx.GetStringContext(ctxKeyFinalRequestModel, ""),
|
||||
Model: modelId,
|
||||
SystemFingerprint: "",
|
||||
Object: objectChatCompletion,
|
||||
Choices: choices,
|
||||
Usage: usage{
|
||||
Usage: &usage{
|
||||
PromptTokens: bedrockResponse.Usage.InputTokens,
|
||||
CompletionTokens: bedrockResponse.Usage.OutputTokens,
|
||||
TotalTokens: bedrockResponse.Usage.TotalTokens,
|
||||
@@ -908,6 +921,7 @@ func (b *bedrockProvider) setAuthHeaders(body []byte, headers http.Header) {
|
||||
}
|
||||
|
||||
func (b *bedrockProvider) generateSignature(path, amzDate, dateStamp string, body []byte) string {
|
||||
path = encodeSigV4Path(path)
|
||||
hashedPayload := sha256Hex(body)
|
||||
|
||||
endpoint := fmt.Sprintf(bedrockDefaultDomain, b.config.awsRegion)
|
||||
@@ -925,6 +939,17 @@ func (b *bedrockProvider) generateSignature(path, amzDate, dateStamp string, bod
|
||||
return signature
|
||||
}
|
||||
|
||||
func encodeSigV4Path(path string) string {
|
||||
segments := strings.Split(path, "/")
|
||||
for i, seg := range segments {
|
||||
if seg == "" {
|
||||
continue
|
||||
}
|
||||
segments[i] = url.PathEscape(seg)
|
||||
}
|
||||
return strings.Join(segments, "/")
|
||||
}
|
||||
|
||||
func getSignatureKey(key, dateStamp, region, service string) []byte {
|
||||
kDate := hmacSha256([]byte("AWS4"+key), dateStamp)
|
||||
kRegion := hmacSha256(kDate, region)
|
||||
|
||||
@@ -341,7 +341,7 @@ func (c *claudeProvider) responseClaude2OpenAI(ctx wrapper.HttpContext, origResp
|
||||
choice := chatCompletionChoice{
|
||||
Index: 0,
|
||||
Message: &chatMessage{Role: roleAssistant, Content: origResponse.Content[0].Text},
|
||||
FinishReason: stopReasonClaude2OpenAI(origResponse.StopReason),
|
||||
FinishReason: util.Ptr(stopReasonClaude2OpenAI(origResponse.StopReason)),
|
||||
}
|
||||
|
||||
return &chatCompletionResponse{
|
||||
@@ -351,7 +351,7 @@ func (c *claudeProvider) responseClaude2OpenAI(ctx wrapper.HttpContext, origResp
|
||||
SystemFingerprint: "",
|
||||
Object: objectChatCompletion,
|
||||
Choices: []chatCompletionChoice{choice},
|
||||
Usage: usage{
|
||||
Usage: &usage{
|
||||
PromptTokens: origResponse.Usage.InputTokens,
|
||||
CompletionTokens: origResponse.Usage.OutputTokens,
|
||||
TotalTokens: origResponse.Usage.InputTokens + origResponse.Usage.OutputTokens,
|
||||
@@ -404,7 +404,7 @@ func (c *claudeProvider) streamResponseClaude2OpenAI(ctx wrapper.HttpContext, or
|
||||
choice := chatCompletionChoice{
|
||||
Index: origResponse.Index,
|
||||
Delta: &chatMessage{},
|
||||
FinishReason: stopReasonClaude2OpenAI(origResponse.Delta.StopReason),
|
||||
FinishReason: util.Ptr(stopReasonClaude2OpenAI(origResponse.Delta.StopReason)),
|
||||
}
|
||||
return c.createChatCompletionResponse(ctx, origResponse, choice)
|
||||
case "message_stop":
|
||||
@@ -415,7 +415,7 @@ func (c *claudeProvider) streamResponseClaude2OpenAI(ctx wrapper.HttpContext, or
|
||||
Object: objectChatCompletionChunk,
|
||||
Choices: []chatCompletionChoice{},
|
||||
ServiceTier: c.serviceTier,
|
||||
Usage: usage{
|
||||
Usage: &usage{
|
||||
PromptTokens: c.usage.PromptTokens,
|
||||
CompletionTokens: c.usage.CompletionTokens,
|
||||
TotalTokens: c.usage.TotalTokens,
|
||||
|
||||
@@ -116,23 +116,23 @@ func (d *difyProvider) responseDify2OpenAI(ctx wrapper.HttpContext, response *Di
|
||||
choice = chatCompletionChoice{
|
||||
Index: 0,
|
||||
Message: &chatMessage{Role: roleAssistant, Content: response.Answer},
|
||||
FinishReason: finishReasonStop,
|
||||
FinishReason: util.Ptr(finishReasonStop),
|
||||
}
|
||||
//response header中增加conversationId字段
|
||||
// response header中增加conversationId字段
|
||||
_ = proxywasm.ReplaceHttpResponseHeader("ConversationId", response.ConversationId)
|
||||
id = response.ConversationId
|
||||
case BotTypeCompletion:
|
||||
choice = chatCompletionChoice{
|
||||
Index: 0,
|
||||
Message: &chatMessage{Role: roleAssistant, Content: response.Answer},
|
||||
FinishReason: finishReasonStop,
|
||||
FinishReason: util.Ptr(finishReasonStop),
|
||||
}
|
||||
id = response.MessageId
|
||||
case BotTypeWorkflow:
|
||||
choice = chatCompletionChoice{
|
||||
Index: 0,
|
||||
Message: &chatMessage{Role: roleAssistant, Content: response.Data.Outputs[d.config.outputVariable]},
|
||||
FinishReason: finishReasonStop,
|
||||
FinishReason: util.Ptr(finishReasonStop),
|
||||
}
|
||||
id = response.Data.WorkflowId
|
||||
}
|
||||
@@ -143,7 +143,7 @@ func (d *difyProvider) responseDify2OpenAI(ctx wrapper.HttpContext, response *Di
|
||||
SystemFingerprint: "",
|
||||
Object: objectChatCompletion,
|
||||
Choices: []chatCompletionChoice{choice},
|
||||
Usage: response.MetaData.Usage,
|
||||
Usage: &response.MetaData.Usage,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -188,7 +188,7 @@ func (d *difyProvider) OnStreamingResponseBody(ctx wrapper.HttpContext, name Api
|
||||
func (d *difyProvider) streamResponseDify2OpenAI(ctx wrapper.HttpContext, response *DifyChunkChatResponse) *chatCompletionResponse {
|
||||
var choice chatCompletionChoice
|
||||
var id string
|
||||
var responseUsage usage
|
||||
var responseUsage *usage
|
||||
switch d.config.botType {
|
||||
case BotTypeChat, BotTypeAgent:
|
||||
choice = chatCompletionChoice{
|
||||
@@ -211,9 +211,9 @@ func (d *difyProvider) streamResponseDify2OpenAI(ctx wrapper.HttpContext, respon
|
||||
id = response.Data.WorkflowId
|
||||
}
|
||||
if response.Event == "message_end" || response.Event == "workflow_finished" {
|
||||
choice.FinishReason = finishReasonStop
|
||||
choice.FinishReason = util.Ptr(finishReasonStop)
|
||||
if response.Event == "message_end" {
|
||||
responseUsage = usage{
|
||||
responseUsage = &usage{
|
||||
PromptTokens: response.MetaData.Usage.PromptTokens,
|
||||
CompletionTokens: response.MetaData.Usage.CompletionTokens,
|
||||
TotalTokens: response.MetaData.Usage.TotalTokens,
|
||||
|
||||
@@ -6,8 +6,11 @@ import (
|
||||
"strings"
|
||||
|
||||
"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy/util"
|
||||
"github.com/alibaba/higress/plugins/wasm-go/pkg/log"
|
||||
"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
|
||||
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/sjson"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -15,6 +18,7 @@ const (
|
||||
doubaoChatCompletionPath = "/api/v3/chat/completions"
|
||||
doubaoEmbeddingsPath = "/api/v3/embeddings"
|
||||
doubaoImageGenerationPath = "/api/v3/images/generations"
|
||||
doubaoResponsesPath = "/api/v3/responses"
|
||||
)
|
||||
|
||||
type doubaoProviderInitializer struct{}
|
||||
@@ -31,6 +35,7 @@ func (m *doubaoProviderInitializer) DefaultCapabilities() map[string]string {
|
||||
string(ApiNameChatCompletion): doubaoChatCompletionPath,
|
||||
string(ApiNameEmbeddings): doubaoEmbeddingsPath,
|
||||
string(ApiNameImageGeneration): doubaoImageGenerationPath,
|
||||
string(ApiNameResponses): doubaoResponsesPath,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -70,6 +75,32 @@ func (m *doubaoProvider) TransformRequestHeaders(ctx wrapper.HttpContext, apiNam
|
||||
headers.Del("Content-Length")
|
||||
}
|
||||
|
||||
func (m *doubaoProvider) TransformRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte) ([]byte, error) {
|
||||
var err error
|
||||
switch apiName {
|
||||
case ApiNameResponses:
|
||||
// 移除火山 responses 接口暂时不支持的参数
|
||||
// 参考: https://www.volcengine.com/docs/82379/1569618
|
||||
// TODO: 这里应该用 DTO 处理
|
||||
for _, param := range []string{"parallel_tool_calls", "tool_choice"} {
|
||||
body, err = sjson.DeleteBytes(body, param)
|
||||
if err != nil {
|
||||
log.Warnf("[doubao] failed to delete %s in request body, err: %v", param, err)
|
||||
}
|
||||
}
|
||||
case ApiNameImageGeneration:
|
||||
// 火山生图接口默认会带上水印,但 OpenAI 接口不支持此参数
|
||||
// 参考: https://www.volcengine.com/docs/82379/1541523
|
||||
if res := gjson.GetBytes(body, "watermark"); !res.Exists() {
|
||||
body, err = sjson.SetBytes(body, "watermark", false)
|
||||
if err != nil {
|
||||
log.Warnf("[doubao] failed to set watermark in request body, err: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
return m.config.defaultTransformRequestBody(ctx, apiName, body)
|
||||
}
|
||||
|
||||
func (m *doubaoProvider) GetApiName(path string) ApiName {
|
||||
if strings.Contains(path, doubaoChatCompletionPath) {
|
||||
return ApiNameChatCompletion
|
||||
@@ -80,5 +111,8 @@ func (m *doubaoProvider) GetApiName(path string) ApiName {
|
||||
if strings.Contains(path, doubaoImageGenerationPath) {
|
||||
return ApiNameImageGeneration
|
||||
}
|
||||
if strings.Contains(path, doubaoResponsesPath) {
|
||||
return ApiNameResponses
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
@@ -500,7 +500,7 @@ func (g *geminiProvider) buildChatCompletionResponse(ctx wrapper.HttpContext, re
|
||||
Object: objectChatCompletion,
|
||||
Created: time.Now().UnixMilli() / 1000,
|
||||
Model: ctx.GetStringContext(ctxKeyFinalRequestModel, ""),
|
||||
Usage: usage{
|
||||
Usage: &usage{
|
||||
PromptTokens: response.UsageMetadata.PromptTokenCount,
|
||||
CompletionTokens: response.UsageMetadata.CandidatesTokenCount,
|
||||
TotalTokens: response.UsageMetadata.TotalTokenCount,
|
||||
@@ -514,7 +514,7 @@ func (g *geminiProvider) buildChatCompletionResponse(ctx wrapper.HttpContext, re
|
||||
Message: &chatMessage{
|
||||
Role: roleAssistant,
|
||||
},
|
||||
FinishReason: finishReasonStop,
|
||||
FinishReason: util.Ptr(finishReasonStop),
|
||||
}
|
||||
if part.FunctionCall != nil {
|
||||
choice.Message.ToolCalls = g.buildToolCalls(&candidate)
|
||||
@@ -524,7 +524,7 @@ func (g *geminiProvider) buildChatCompletionResponse(ctx wrapper.HttpContext, re
|
||||
choice.Message.Content = part.Text
|
||||
}
|
||||
|
||||
choice.FinishReason = candidate.FinishReason
|
||||
choice.FinishReason = util.Ptr(strings.ToLower(candidate.FinishReason))
|
||||
fullTextResponse.Choices = append(fullTextResponse.Choices, choice)
|
||||
choiceIndex += 1
|
||||
}
|
||||
@@ -560,6 +560,9 @@ func (g *geminiProvider) buildChatCompletionStreamResponse(ctx wrapper.HttpConte
|
||||
var choice chatCompletionChoice
|
||||
if len(geminiResp.Candidates) > 0 && len(geminiResp.Candidates[0].Content.Parts) > 0 {
|
||||
choice.Delta = &chatMessage{Content: geminiResp.Candidates[0].Content.Parts[0].Text}
|
||||
if geminiResp.Candidates[0].FinishReason != "" {
|
||||
choice.FinishReason = util.Ptr(strings.ToLower(geminiResp.Candidates[0].FinishReason))
|
||||
}
|
||||
}
|
||||
streamResponse := chatCompletionResponse{
|
||||
Id: fmt.Sprintf("chatcmpl-%s", uuid.New().String()),
|
||||
@@ -567,7 +570,7 @@ func (g *geminiProvider) buildChatCompletionStreamResponse(ctx wrapper.HttpConte
|
||||
Created: time.Now().UnixMilli() / 1000,
|
||||
Model: ctx.GetStringContext(ctxKeyFinalRequestModel, ""),
|
||||
Choices: []chatCompletionChoice{choice},
|
||||
Usage: usage{
|
||||
Usage: &usage{
|
||||
PromptTokens: geminiResp.UsageMetadata.PromptTokenCount,
|
||||
CompletionTokens: geminiResp.UsageMetadata.CandidatesTokenCount,
|
||||
TotalTokens: geminiResp.UsageMetadata.TotalTokenCount,
|
||||
|
||||
@@ -387,7 +387,7 @@ func (m *hunyuanProvider) convertChunkFromHunyuanToOpenAI(ctx wrapper.HttpContex
|
||||
Model: ctx.GetStringContext(ctxKeyFinalRequestModel, ""),
|
||||
SystemFingerprint: "",
|
||||
Object: objectChatCompletionChunk,
|
||||
Usage: usage{
|
||||
Usage: &usage{
|
||||
PromptTokens: hunyuanFormattedChunk.Usage.PromptTokens,
|
||||
CompletionTokens: hunyuanFormattedChunk.Usage.CompletionTokens,
|
||||
TotalTokens: hunyuanFormattedChunk.Usage.TotalTokens,
|
||||
@@ -400,7 +400,7 @@ func (m *hunyuanProvider) convertChunkFromHunyuanToOpenAI(ctx wrapper.HttpContex
|
||||
if hunyuanFormattedChunk.Choices[0].FinishReason == hunyuanStreamEndMark {
|
||||
// log.Debugf("@@@ --- 最后chunk: ")
|
||||
openAIFormattedChunk.Choices = append(openAIFormattedChunk.Choices, chatCompletionChoice{
|
||||
FinishReason: hunyuanFormattedChunk.Choices[0].FinishReason,
|
||||
FinishReason: util.Ptr(hunyuanFormattedChunk.Choices[0].FinishReason),
|
||||
})
|
||||
} else {
|
||||
deltaMsg := chatMessage{
|
||||
@@ -495,7 +495,7 @@ func (m *hunyuanProvider) buildChatCompletionResponse(ctx wrapper.HttpContext, h
|
||||
Content: choice.Message.Content,
|
||||
ToolCalls: nil,
|
||||
},
|
||||
FinishReason: choice.FinishReason,
|
||||
FinishReason: util.Ptr(choice.FinishReason),
|
||||
})
|
||||
}
|
||||
return &chatCompletionResponse{
|
||||
@@ -505,7 +505,7 @@ func (m *hunyuanProvider) buildChatCompletionResponse(ctx wrapper.HttpContext, h
|
||||
SystemFingerprint: "",
|
||||
Object: objectChatCompletion,
|
||||
Choices: choices,
|
||||
Usage: usage{
|
||||
Usage: &usage{
|
||||
PromptTokens: hunyuanResponse.Response.Usage.PromptTokens,
|
||||
CompletionTokens: hunyuanResponse.Response.Usage.CompletionTokens,
|
||||
TotalTokens: hunyuanResponse.Response.Usage.TotalTokens,
|
||||
|
||||
@@ -36,8 +36,7 @@ const (
|
||||
defaultSenderName string = "小明"
|
||||
)
|
||||
|
||||
type minimaxProviderInitializer struct {
|
||||
}
|
||||
type minimaxProviderInitializer struct{}
|
||||
|
||||
func (m *minimaxProviderInitializer) ValidateConfig(config *ProviderConfig) error {
|
||||
// If using the chat completion Pro API, a group ID must be set.
|
||||
@@ -368,7 +367,7 @@ func (m *minimaxProvider) responseProToOpenAI(response *minimaxChatCompletionPro
|
||||
Content: message.Text,
|
||||
}
|
||||
choices = append(choices, chatCompletionChoice{
|
||||
FinishReason: choice.FinishReason,
|
||||
FinishReason: util.Ptr(choice.FinishReason),
|
||||
Index: messageIndex,
|
||||
Message: message,
|
||||
})
|
||||
@@ -381,7 +380,7 @@ func (m *minimaxProvider) responseProToOpenAI(response *minimaxChatCompletionPro
|
||||
Created: response.Created,
|
||||
Model: response.Model,
|
||||
Choices: choices,
|
||||
Usage: usage{
|
||||
Usage: &usage{
|
||||
TotalTokens: int(response.Usage.TotalTokens),
|
||||
PromptTokens: int(response.Usage.PromptTokens),
|
||||
CompletionTokens: int(response.Usage.CompletionTokens),
|
||||
|
||||
@@ -138,15 +138,15 @@ type chatCompletionResponse struct {
|
||||
ServiceTier string `json:"service_tier,omitempty"`
|
||||
SystemFingerprint string `json:"system_fingerprint,omitempty"`
|
||||
Object string `json:"object,omitempty"`
|
||||
Usage usage `json:"usage,omitempty"`
|
||||
Usage *usage `json:"usage"`
|
||||
}
|
||||
|
||||
type chatCompletionChoice struct {
|
||||
Index int `json:"index"`
|
||||
Message *chatMessage `json:"message,omitempty"`
|
||||
Delta *chatMessage `json:"delta,omitempty"`
|
||||
FinishReason string `json:"finish_reason,omitempty"`
|
||||
Logprobs map[string]interface{} `json:"logprobs,omitempty"`
|
||||
FinishReason *string `json:"finish_reason"`
|
||||
Logprobs map[string]interface{} `json:"logprobs"`
|
||||
}
|
||||
|
||||
type usage struct {
|
||||
|
||||
@@ -26,24 +26,34 @@ func (m *openaiProviderInitializer) ValidateConfig(config *ProviderConfig) error
|
||||
|
||||
func (m *openaiProviderInitializer) DefaultCapabilities() map[string]string {
|
||||
return map[string]string{
|
||||
string(ApiNameCompletion): PathOpenAICompletions,
|
||||
string(ApiNameChatCompletion): PathOpenAIChatCompletions,
|
||||
string(ApiNameEmbeddings): PathOpenAIEmbeddings,
|
||||
string(ApiNameImageGeneration): PathOpenAIImageGeneration,
|
||||
string(ApiNameImageEdit): PathOpenAIImageEdit,
|
||||
string(ApiNameImageVariation): PathOpenAIImageVariation,
|
||||
string(ApiNameAudioSpeech): PathOpenAIAudioSpeech,
|
||||
string(ApiNameModels): PathOpenAIModels,
|
||||
string(ApiNameFiles): PathOpenAIFiles,
|
||||
string(ApiNameRetrieveFile): PathOpenAIRetrieveFile,
|
||||
string(ApiNameRetrieveFileContent): PathOpenAIRetrieveFileContent,
|
||||
string(ApiNameBatches): PathOpenAIBatches,
|
||||
string(ApiNameRetrieveBatch): PathOpenAIRetrieveBatch,
|
||||
string(ApiNameCancelBatch): PathOpenAICancelBatch,
|
||||
string(ApiNameResponses): PathOpenAIResponses,
|
||||
string(ApiNameCompletion): PathOpenAICompletions,
|
||||
string(ApiNameChatCompletion): PathOpenAIChatCompletions,
|
||||
string(ApiNameEmbeddings): PathOpenAIEmbeddings,
|
||||
string(ApiNameImageGeneration): PathOpenAIImageGeneration,
|
||||
string(ApiNameImageEdit): PathOpenAIImageEdit,
|
||||
string(ApiNameImageVariation): PathOpenAIImageVariation,
|
||||
string(ApiNameAudioSpeech): PathOpenAIAudioSpeech,
|
||||
string(ApiNameModels): PathOpenAIModels,
|
||||
string(ApiNameFiles): PathOpenAIFiles,
|
||||
string(ApiNameRetrieveFile): PathOpenAIRetrieveFile,
|
||||
string(ApiNameRetrieveFileContent): PathOpenAIRetrieveFileContent,
|
||||
string(ApiNameBatches): PathOpenAIBatches,
|
||||
string(ApiNameRetrieveBatch): PathOpenAIRetrieveBatch,
|
||||
string(ApiNameCancelBatch): PathOpenAICancelBatch,
|
||||
string(ApiNameResponses): PathOpenAIResponses,
|
||||
string(ApiNameFineTuningJobs): PathOpenAIFineTuningJobs,
|
||||
string(ApiNameRetrieveFineTuningJob): PathOpenAIRetrieveFineTuningJob,
|
||||
string(ApiNameFineTuningJobEvents): PathOpenAIFineTuningJobEvents,
|
||||
string(ApiNameFineTuningJobCheckpoints): PathOpenAIFineTuningJobCheckpoints,
|
||||
string(ApiNameCancelFineTuningJob): PathOpenAICancelFineTuningJob,
|
||||
string(ApiNameResumeFineTuningJob): PathOpenAIResumeFineTuningJob,
|
||||
string(ApiNamePauseFineTuningJob): PathOpenAIPauseFineTuningJob,
|
||||
string(ApiNameFineTuningCheckpointPermissions): PathOpenAIFineTuningCheckpointPermissions,
|
||||
string(ApiNameDeleteFineTuningCheckpointPermission): PathOpenAIFineDeleteTuningCheckpointPermission,
|
||||
}
|
||||
}
|
||||
|
||||
// isDirectPath checks if the path is a known standard OpenAI interface path.
|
||||
func isDirectPath(path string) bool {
|
||||
return strings.HasSuffix(path, "/completions") ||
|
||||
strings.HasSuffix(path, "/embeddings") ||
|
||||
@@ -52,7 +62,9 @@ func isDirectPath(path string) bool {
|
||||
strings.HasSuffix(path, "/images/variations") ||
|
||||
strings.HasSuffix(path, "/images/edits") ||
|
||||
strings.HasSuffix(path, "/models") ||
|
||||
strings.HasSuffix(path, "/responses")
|
||||
strings.HasSuffix(path, "/responses") ||
|
||||
strings.HasSuffix(path, "/fine_tuning/jobs") ||
|
||||
strings.HasSuffix(path, "/fine_tuning/checkpoints")
|
||||
}
|
||||
|
||||
func (m *openaiProviderInitializer) CreateProvider(config ProviderConfig) (Provider, error) {
|
||||
@@ -108,9 +120,7 @@ func (m *openaiProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiNa
|
||||
func (m *openaiProvider) TransformRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, headers http.Header) {
|
||||
if m.isDirectCustomPath {
|
||||
util.OverwriteRequestPathHeader(headers, m.customPath)
|
||||
}
|
||||
|
||||
if apiName != "" {
|
||||
} else if apiName != "" {
|
||||
util.OverwriteRequestPathHeaderByCapability(headers, string(apiName), m.config.capabilities)
|
||||
}
|
||||
|
||||
|
||||
@@ -27,40 +27,60 @@ const (
|
||||
// ApiName 格式 {vendor}/{version}/{apitype}
|
||||
// 表示遵循 厂商/版本/接口类型 的格式
|
||||
// 目前openai是事实意义上的标准,但是也有其他厂商存在其他任务的一些可能的标准,比如cohere的rerank
|
||||
ApiNameCompletion ApiName = "openai/v1/completions"
|
||||
ApiNameChatCompletion ApiName = "openai/v1/chatcompletions"
|
||||
ApiNameEmbeddings ApiName = "openai/v1/embeddings"
|
||||
ApiNameImageGeneration ApiName = "openai/v1/imagegeneration"
|
||||
ApiNameImageEdit ApiName = "openai/v1/imageedit"
|
||||
ApiNameImageVariation ApiName = "openai/v1/imagevariation"
|
||||
ApiNameAudioSpeech ApiName = "openai/v1/audiospeech"
|
||||
ApiNameFiles ApiName = "openai/v1/files"
|
||||
ApiNameRetrieveFile ApiName = "openai/v1/retrievefile"
|
||||
ApiNameRetrieveFileContent ApiName = "openai/v1/retrievefilecontent"
|
||||
ApiNameBatches ApiName = "openai/v1/batches"
|
||||
ApiNameRetrieveBatch ApiName = "openai/v1/retrievebatch"
|
||||
ApiNameCancelBatch ApiName = "openai/v1/cancelbatch"
|
||||
ApiNameModels ApiName = "openai/v1/models"
|
||||
ApiNameResponses ApiName = "openai/v1/responses"
|
||||
ApiNameCompletion ApiName = "openai/v1/completions"
|
||||
ApiNameChatCompletion ApiName = "openai/v1/chatcompletions"
|
||||
ApiNameEmbeddings ApiName = "openai/v1/embeddings"
|
||||
ApiNameImageGeneration ApiName = "openai/v1/imagegeneration"
|
||||
ApiNameImageEdit ApiName = "openai/v1/imageedit"
|
||||
ApiNameImageVariation ApiName = "openai/v1/imagevariation"
|
||||
ApiNameAudioSpeech ApiName = "openai/v1/audiospeech"
|
||||
ApiNameFiles ApiName = "openai/v1/files"
|
||||
ApiNameRetrieveFile ApiName = "openai/v1/retrievefile"
|
||||
ApiNameRetrieveFileContent ApiName = "openai/v1/retrievefilecontent"
|
||||
ApiNameBatches ApiName = "openai/v1/batches"
|
||||
ApiNameRetrieveBatch ApiName = "openai/v1/retrievebatch"
|
||||
ApiNameCancelBatch ApiName = "openai/v1/cancelbatch"
|
||||
ApiNameModels ApiName = "openai/v1/models"
|
||||
ApiNameResponses ApiName = "openai/v1/responses"
|
||||
ApiNameFineTuningJobs ApiName = "openai/v1/fine-tuningjobs"
|
||||
ApiNameRetrieveFineTuningJob ApiName = "openai/v1/retrievefine-tuningjob"
|
||||
ApiNameFineTuningJobEvents ApiName = "openai/v1/fine-tuningjobsevents"
|
||||
ApiNameFineTuningJobCheckpoints ApiName = "openai/v1/fine-tuningjobcheckpoints"
|
||||
ApiNameCancelFineTuningJob ApiName = "openai/v1/cancelfine-tuningjob"
|
||||
ApiNameResumeFineTuningJob ApiName = "openai/v1/resumefine-tuningjob"
|
||||
ApiNamePauseFineTuningJob ApiName = "openai/v1/pausefine-tuningjob"
|
||||
ApiNameFineTuningCheckpointPermissions ApiName = "openai/v1/fine-tuningjobcheckpointpermissions"
|
||||
ApiNameDeleteFineTuningCheckpointPermission ApiName = "openai/v1/deletefine-tuningjobcheckpointpermission"
|
||||
|
||||
PathOpenAICompletions = "/v1/completions"
|
||||
PathOpenAIChatCompletions = "/v1/chat/completions"
|
||||
PathOpenAIEmbeddings = "/v1/embeddings"
|
||||
PathOpenAIFiles = "/v1/files"
|
||||
PathOpenAIRetrieveFile = "/v1/files/{file_id}"
|
||||
PathOpenAIRetrieveFileContent = "/v1/files/{file_id}/content"
|
||||
PathOpenAIBatches = "/v1/batches"
|
||||
PathOpenAIRetrieveBatch = "/v1/batches/{batch_id}"
|
||||
PathOpenAICancelBatch = "/v1/batches/{batch_id}/cancel"
|
||||
PathOpenAIModels = "/v1/models"
|
||||
PathOpenAIImageGeneration = "/v1/images/generations"
|
||||
PathOpenAIImageEdit = "/v1/images/edits"
|
||||
PathOpenAIImageVariation = "/v1/images/variations"
|
||||
PathOpenAIAudioSpeech = "/v1/audio/speech"
|
||||
PathOpenAIResponses = "/v1/responses"
|
||||
PathOpenAICompletions = "/v1/completions"
|
||||
PathOpenAIChatCompletions = "/v1/chat/completions"
|
||||
PathOpenAIEmbeddings = "/v1/embeddings"
|
||||
PathOpenAIFiles = "/v1/files"
|
||||
PathOpenAIRetrieveFile = "/v1/files/{file_id}"
|
||||
PathOpenAIRetrieveFileContent = "/v1/files/{file_id}/content"
|
||||
PathOpenAIBatches = "/v1/batches"
|
||||
PathOpenAIRetrieveBatch = "/v1/batches/{batch_id}"
|
||||
PathOpenAICancelBatch = "/v1/batches/{batch_id}/cancel"
|
||||
PathOpenAIModels = "/v1/models"
|
||||
PathOpenAIImageGeneration = "/v1/images/generations"
|
||||
PathOpenAIImageEdit = "/v1/images/edits"
|
||||
PathOpenAIImageVariation = "/v1/images/variations"
|
||||
PathOpenAIAudioSpeech = "/v1/audio/speech"
|
||||
PathOpenAIResponses = "/v1/responses"
|
||||
PathOpenAIFineTuningJobs = "/v1/fine_tuning/jobs"
|
||||
PathOpenAIRetrieveFineTuningJob = "/v1/fine_tuning/jobs/{fine_tuning_job_id}"
|
||||
PathOpenAIFineTuningJobEvents = "/v1/fine_tuning/jobs/{fine_tuning_job_id}/events"
|
||||
PathOpenAIFineTuningJobCheckpoints = "/v1/fine_tuning/jobs/{fine_tuning_job_id}/checkpoints"
|
||||
PathOpenAICancelFineTuningJob = "/v1/fine_tuning/jobs/{fine_tuning_job_id}/cancel"
|
||||
PathOpenAIResumeFineTuningJob = "/v1/fine_tuning/jobs/{fine_tuning_job_id}/resume"
|
||||
PathOpenAIPauseFineTuningJob = "/v1/fine_tuning/jobs/{fine_tuning_job_id}/pause"
|
||||
PathOpenAIFineTuningCheckpointPermissions = "/v1/fine_tuning/checkpoints/{fine_tuned_model_checkpoint}/permissions"
|
||||
PathOpenAIFineDeleteTuningCheckpointPermission = "/v1/fine_tuning/checkpoints/{fine_tuned_model_checkpoint}/permissions/{permission_id}"
|
||||
|
||||
// TODO: 以下是一些非标准的API名称,需要进一步确认是否支持
|
||||
ApiNameCohereV1Rerank ApiName = "cohere/v1/rerank"
|
||||
ApiNameQwenAsyncAIGC ApiName = "api/v1/services/aigc"
|
||||
ApiNameQwenAsyncTask ApiName = "api/v1/tasks/"
|
||||
|
||||
providerTypeMoonshot = "moonshot"
|
||||
providerTypeAzure = "azure"
|
||||
@@ -276,6 +296,9 @@ type ProviderConfig struct {
|
||||
// @Title zh-CN Amazon Bedrock Region
|
||||
// @Description zh-CN 仅适用于Amazon Bedrock服务访问
|
||||
awsRegion string `required:"false" yaml:"awsRegion" json:"awsRegion"`
|
||||
// @Title zh-CN Amazon Bedrock 额外模型请求参数
|
||||
// @Description zh-CN 仅适用于Amazon Bedrock服务,用于设置模型特定的推理参数
|
||||
bedrockAdditionalFields map[string]interface{} `required:"false" yaml:"bedrockAdditionalFields" json:"bedrockAdditionalFields"`
|
||||
// @Title zh-CN minimax API type
|
||||
// @Description zh-CN 仅适用于 minimax 服务。minimax API 类型,v2 和 pro 中选填一项,默认值为 v2
|
||||
minimaxApiType string `required:"false" yaml:"minimaxApiType" json:"minimaxApiType"`
|
||||
@@ -335,6 +358,8 @@ type ProviderConfig struct {
|
||||
// @Title zh-CN 额外支持的ai能力
|
||||
// @Description zh-CN 开放的ai能力和urlpath映射,例如: {"openai/v1/chatcompletions": "/v1/chat/completions"}
|
||||
capabilities map[string]string
|
||||
// @Title zh-CN 如果配置了subPath,将会先移除请求path中该前缀,再进行后续处理
|
||||
subPath string `required:"false" yaml:"subPath" json:"subPath"`
|
||||
}
|
||||
|
||||
func (c *ProviderConfig) GetId() string {
|
||||
@@ -404,6 +429,12 @@ func (c *ProviderConfig) FromJson(json gjson.Result) {
|
||||
c.awsAccessKey = json.Get("awsAccessKey").String()
|
||||
c.awsSecretKey = json.Get("awsSecretKey").String()
|
||||
c.awsRegion = json.Get("awsRegion").String()
|
||||
if c.typ == providerTypeBedrock {
|
||||
c.bedrockAdditionalFields = make(map[string]interface{})
|
||||
for k, v := range json.Get("bedrockAdditionalFields").Map() {
|
||||
c.bedrockAdditionalFields[k] = v.Value()
|
||||
}
|
||||
}
|
||||
c.minimaxApiType = json.Get("minimaxApiType").String()
|
||||
c.minimaxGroupId = json.Get("minimaxGroupId").String()
|
||||
c.cloudflareAccountId = json.Get("cloudflareAccountId").String()
|
||||
@@ -496,6 +527,7 @@ func (c *ProviderConfig) FromJson(json gjson.Result) {
|
||||
c.capabilities[capability] = pathJson.String()
|
||||
}
|
||||
}
|
||||
c.subPath = json.Get("subPath").String()
|
||||
}
|
||||
|
||||
func (c *ProviderConfig) Validate() error {
|
||||
@@ -806,10 +838,17 @@ func (c *ProviderConfig) handleRequestBody(
|
||||
|
||||
func (c *ProviderConfig) handleRequestHeaders(provider Provider, ctx wrapper.HttpContext, apiName ApiName) {
|
||||
headers := util.GetOriginalRequestHeaders()
|
||||
originPath := headers.Get(":path")
|
||||
if c.subPath != "" {
|
||||
headers.Set(":path", strings.TrimPrefix(originPath, c.subPath))
|
||||
}
|
||||
if handler, ok := provider.(TransformRequestHeadersHandler); ok {
|
||||
handler.TransformRequestHeaders(ctx, apiName, headers)
|
||||
util.ReplaceRequestHeaders(headers)
|
||||
}
|
||||
if headers.Get(":path") != originPath {
|
||||
headers.Set("X-ENVOY-ORIGINAL-PATH", originPath)
|
||||
}
|
||||
util.ReplaceRequestHeaders(headers)
|
||||
}
|
||||
|
||||
// defaultTransformRequestBody 默认的请求体转换方法,只做模型映射,用slog替换模型名称,不用序列化和反序列化,提高性能
|
||||
@@ -840,11 +879,14 @@ func (c *ProviderConfig) DefaultTransformResponseHeaders(ctx wrapper.HttpContext
|
||||
func (c *ProviderConfig) needToProcessRequestBody(apiName ApiName) bool {
|
||||
switch apiName {
|
||||
case ApiNameChatCompletion,
|
||||
ApiNameCompletion,
|
||||
ApiNameEmbeddings,
|
||||
ApiNameImageGeneration,
|
||||
ApiNameImageEdit,
|
||||
ApiNameImageVariation,
|
||||
ApiNameAudioSpeech:
|
||||
ApiNameAudioSpeech,
|
||||
ApiNameFineTuningJobs,
|
||||
ApiNameResponses:
|
||||
return true
|
||||
}
|
||||
return false
|
||||
|
||||
@@ -37,6 +37,9 @@ const (
|
||||
qwenBailianPath = "/api/v1/apps"
|
||||
qwenMultimodalGenerationPath = "/api/v1/services/aigc/multimodal-generation/generation"
|
||||
|
||||
qwenAsyncAIGCPath = "/api/v1/services/aigc/"
|
||||
qwenAsyncTaskPath = "/api/v1/tasks/"
|
||||
|
||||
qwenTopPMin = 0.000001
|
||||
qwenTopPMax = 0.999999
|
||||
|
||||
@@ -74,6 +77,8 @@ func (m *qwenProviderInitializer) DefaultCapabilities(qwenEnableCompatible bool)
|
||||
return map[string]string{
|
||||
string(ApiNameChatCompletion): qwenChatCompletionPath,
|
||||
string(ApiNameEmbeddings): qwenTextEmbeddingPath,
|
||||
string(ApiNameQwenAsyncAIGC): qwenAsyncAIGCPath,
|
||||
string(ApiNameQwenAsyncTask): qwenAsyncTaskPath,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -302,7 +307,7 @@ func (m *qwenProvider) buildChatCompletionResponse(ctx wrapper.HttpContext, qwen
|
||||
message := qwenMessageToChatMessage(qwenChoice.Message, m.config.reasoningContentMode)
|
||||
choices = append(choices, chatCompletionChoice{
|
||||
Message: &message,
|
||||
FinishReason: qwenChoice.FinishReason,
|
||||
FinishReason: util.Ptr(qwenChoice.FinishReason),
|
||||
})
|
||||
}
|
||||
return &chatCompletionResponse{
|
||||
@@ -312,7 +317,7 @@ func (m *qwenProvider) buildChatCompletionResponse(ctx wrapper.HttpContext, qwen
|
||||
SystemFingerprint: "",
|
||||
Object: objectChatCompletion,
|
||||
Choices: choices,
|
||||
Usage: usage{
|
||||
Usage: &usage{
|
||||
PromptTokens: qwenResponse.Usage.InputTokens,
|
||||
CompletionTokens: qwenResponse.Usage.OutputTokens,
|
||||
TotalTokens: qwenResponse.Usage.TotalTokens,
|
||||
@@ -413,11 +418,11 @@ func (m *qwenProvider) buildChatCompletionStreamingResponse(ctx wrapper.HttpCont
|
||||
|
||||
if finished {
|
||||
finishResponse := *&baseMessage
|
||||
finishResponse.Choices = append(finishResponse.Choices, chatCompletionChoice{Delta: &chatMessage{}, FinishReason: qwenChoice.FinishReason})
|
||||
finishResponse.Choices = append(finishResponse.Choices, chatCompletionChoice{Delta: &chatMessage{}, FinishReason: util.Ptr(qwenChoice.FinishReason)})
|
||||
|
||||
usageResponse := *&baseMessage
|
||||
usageResponse.Choices = []chatCompletionChoice{{Delta: &chatMessage{}}}
|
||||
usageResponse.Usage = usage{
|
||||
usageResponse.Usage = &usage{
|
||||
PromptTokens: qwenResponse.Usage.InputTokens,
|
||||
CompletionTokens: qwenResponse.Usage.OutputTokens,
|
||||
TotalTokens: qwenResponse.Usage.TotalTokens,
|
||||
@@ -689,6 +694,10 @@ func (m *qwenProvider) GetApiName(path string) ApiName {
|
||||
case strings.Contains(path, qwenTextEmbeddingPath),
|
||||
strings.Contains(path, qwenCompatibleTextEmbeddingPath):
|
||||
return ApiNameEmbeddings
|
||||
case strings.Contains(path, qwenAsyncAIGCPath):
|
||||
return ApiNameQwenAsyncAIGC
|
||||
case strings.Contains(path, qwenAsyncTaskPath):
|
||||
return ApiNameQwenAsyncTask
|
||||
default:
|
||||
return ""
|
||||
}
|
||||
|
||||
@@ -150,7 +150,7 @@ func (p *sparkProvider) responseSpark2OpenAI(ctx wrapper.HttpContext, response *
|
||||
Object: objectChatCompletion,
|
||||
Model: ctx.GetStringContext(ctxKeyFinalRequestModel, ""),
|
||||
Choices: choices,
|
||||
Usage: response.Usage,
|
||||
Usage: &response.Usage,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -168,7 +168,7 @@ func (p *sparkProvider) streamResponseSpark2OpenAI(ctx wrapper.HttpContext, resp
|
||||
Model: ctx.GetStringContext(ctxKeyFinalRequestModel, ""),
|
||||
Object: objectChatCompletion,
|
||||
Choices: choices,
|
||||
Usage: response.Usage,
|
||||
Usage: &response.Usage,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -32,8 +32,7 @@ const (
|
||||
vertexEmbeddingAction = "predict"
|
||||
)
|
||||
|
||||
type vertexProviderInitializer struct {
|
||||
}
|
||||
type vertexProviderInitializer struct{}
|
||||
|
||||
func (v *vertexProviderInitializer) ValidateConfig(config *ProviderConfig) error {
|
||||
if config.vertexAuthKey == "" {
|
||||
@@ -245,7 +244,7 @@ func (v *vertexProvider) buildChatCompletionResponse(ctx wrapper.HttpContext, re
|
||||
Created: time.Now().UnixMilli() / 1000,
|
||||
Model: ctx.GetStringContext(ctxKeyFinalRequestModel, ""),
|
||||
Choices: make([]chatCompletionChoice, 0, len(response.Candidates)),
|
||||
Usage: usage{
|
||||
Usage: &usage{
|
||||
PromptTokens: response.UsageMetadata.PromptTokenCount,
|
||||
CompletionTokens: response.UsageMetadata.CandidatesTokenCount,
|
||||
TotalTokens: response.UsageMetadata.TotalTokenCount,
|
||||
@@ -257,7 +256,7 @@ func (v *vertexProvider) buildChatCompletionResponse(ctx wrapper.HttpContext, re
|
||||
Message: &chatMessage{
|
||||
Role: roleAssistant,
|
||||
},
|
||||
FinishReason: candidate.FinishReason,
|
||||
FinishReason: util.Ptr(candidate.FinishReason),
|
||||
}
|
||||
if len(candidate.Content.Parts) > 0 {
|
||||
choice.Message.Content = candidate.Content.Parts[0].Text
|
||||
@@ -310,7 +309,7 @@ func (v *vertexProvider) buildChatCompletionStreamResponse(ctx wrapper.HttpConte
|
||||
Created: time.Now().UnixMilli() / 1000,
|
||||
Model: ctx.GetStringContext(ctxKeyFinalRequestModel, ""),
|
||||
Choices: []chatCompletionChoice{choice},
|
||||
Usage: usage{
|
||||
Usage: &usage{
|
||||
PromptTokens: vertexResp.UsageMetadata.PromptTokenCount,
|
||||
CompletionTokens: vertexResp.UsageMetadata.CandidatesTokenCount,
|
||||
TotalTokens: vertexResp.UsageMetadata.TotalTokenCount,
|
||||
|
||||
@@ -17,10 +17,18 @@ const (
|
||||
)
|
||||
|
||||
var (
|
||||
RegRetrieveBatchPath = regexp.MustCompile(`^.*/v1/batches/(?P<batch_id>[^/]+)$`)
|
||||
RegCancelBatchPath = regexp.MustCompile(`^.*/v1/batches/(?P<batch_id>[^/]+)/cancel$`)
|
||||
RegRetrieveFilePath = regexp.MustCompile(`^.*/v1/files/(?P<file_id>[^/]+)$`)
|
||||
RegRetrieveFileContentPath = regexp.MustCompile(`^.*/v1/files/(?P<file_id>[^/]+)/content$`)
|
||||
RegRetrieveBatchPath = regexp.MustCompile(`^.*/v1/batches/(?P<batch_id>[^/]+)$`)
|
||||
RegCancelBatchPath = regexp.MustCompile(`^.*/v1/batches/(?P<batch_id>[^/]+)/cancel$`)
|
||||
RegRetrieveFilePath = regexp.MustCompile(`^.*/v1/files/(?P<file_id>[^/]+)$`)
|
||||
RegRetrieveFileContentPath = regexp.MustCompile(`^.*/v1/files/(?P<file_id>[^/]+)/content$`)
|
||||
RegRetrieveFineTuningJobPath = regexp.MustCompile(`^.*/v1/fine_tuning/jobs/(?P<fine_tuning_job_id>[^/]+)$`)
|
||||
RegRetrieveFineTuningJobEventsPath = regexp.MustCompile(`^.*/v1/fine_tuning/jobs/(?P<fine_tuning_job_id>[^/]+)/events$`)
|
||||
RegRetrieveFineTuningJobCheckpointsPath = regexp.MustCompile(`^.*/v1/fine_tuning/jobs/(?P<fine_tuning_job_id>[^/]+)/checkpoints$`)
|
||||
RegCancelFineTuningJobPath = regexp.MustCompile(`^.*/v1/fine_tuning/jobs/(?P<fine_tuning_job_id>[^/]+)/cancel$`)
|
||||
RegResumeFineTuningJobPath = regexp.MustCompile(`^.*/v1/fine_tuning/jobs/(?P<fine_tuning_job_id>[^/]+)/resume$`)
|
||||
RegPauseFineTuningJobPath = regexp.MustCompile(`^.*/v1/fine_tuning/jobs/(?P<fine_tuning_job_id>[^/]+)/pause$`)
|
||||
RegFineTuningCheckpointPermissionPath = regexp.MustCompile(`^.*/v1/fine_tuning/checkpoints/(?P<fine_tuned_model_checkpoint>[^/]+)/permissions$`)
|
||||
RegDeleteFineTuningCheckpointPermissionPath = regexp.MustCompile(`^.*/v1/fine_tuning/checkpoints/(?P<fine_tuned_model_checkpoint>[^/]+)/permissions/(?P<permission_id>[^/]+)$`)
|
||||
)
|
||||
|
||||
type ErrorHandlerFunc func(statusCodeDetails string, err error) error
|
||||
|
||||
5
plugins/wasm-go/extensions/ai-proxy/util/ptr.go
Normal file
5
plugins/wasm-go/extensions/ai-proxy/util/ptr.go
Normal file
@@ -0,0 +1,5 @@
|
||||
package util
|
||||
|
||||
func Ptr[T any](v T) *T {
|
||||
return &v
|
||||
}
|
||||
@@ -23,27 +23,30 @@ description: AI可观测配置参考
|
||||
| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
|
||||
|----------------|-------|------|-----|------------------------|
|
||||
| `attributes` | []Attribute | 非必填 | - | 用户希望记录在log/span中的信息 |
|
||||
| `disable_openai_usage` | bool | 非必填 | false | 非openai兼容协议时,model、token的支持非标,配置为true时可以避免报错 |
|
||||
|
||||
Attribute 配置说明:
|
||||
|
||||
| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
|
||||
|----------------|-------|-----|-----|------------------------|
|
||||
| `key` | string | 必填 | - | attrribute 名称 |
|
||||
| `value_source` | string | 必填 | - | attrribute 取值来源,可选值为 `fixed_value`, `request_header`, `request_body`, `response_header`, `response_body`, `response_streaming_body` |
|
||||
| `value` | string | 必填 | - | attrribute 取值 key value/path |
|
||||
| `default_value` | string | 非必填 | - | attrribute 默认值 |
|
||||
| `rule` | string | 非必填 | - | 从流式响应中提取 attrribute 的规则,可选值为 `first`, `replace`, `append`|
|
||||
| `key` | string | 必填 | - | attribute 名称 |
|
||||
| `value_source` | string | 必填 | - | attribute 取值来源,可选值为 `fixed_value`, `request_header`, `request_body`, `response_header`, `response_body`, `response_streaming_body` |
|
||||
| `value` | string | 必填 | - | attribute 取值 key value/path |
|
||||
| `default_value` | string | 非必填 | - | attribute 默认值 |
|
||||
| `rule` | string | 非必填 | - | 从流式响应中提取 attribute 的规则,可选值为 `first`, `replace`, `append`|
|
||||
| `apply_to_log` | bool | 非必填 | false | 是否将提取的信息记录在日志中 |
|
||||
| `apply_to_span` | bool | 非必填 | false | 是否将提取的信息记录在链路追踪span中 |
|
||||
| `trace_span_key` | string | 非必填 | - | 链路追踪attribute key,默认会使用`key`的设置 |
|
||||
| `as_separate_log_field` | bool | 非必填 | false | 记录日志时是否作为单独的字段,日志字段名使用`key`的设置 |
|
||||
|
||||
`value_source` 的各种取值含义如下:
|
||||
|
||||
- `fixed_value`:固定值
|
||||
- `request_header` : attrribute 值通过 http 请求头获取,value 配置为 header key
|
||||
- `request_body` :attrribute 值通过请求 body 获取,value 配置格式为 gjson 的 jsonpath
|
||||
- `response_header` :attrribute 值通过 http 响应头获取,value 配置为header key
|
||||
- `response_body` :attrribute 值通过响应 body 获取,value 配置格式为 gjson 的 jsonpath
|
||||
- `response_streaming_body` :attrribute 值通过流式响应 body 获取,value 配置格式为 gjson 的 jsonpath
|
||||
- `request_header` : attribute 值通过 http 请求头获取,value 配置为 header key
|
||||
- `request_body` :attribute 值通过请求 body 获取,value 配置格式为 gjson 的 jsonpath
|
||||
- `response_header` :attribute 值通过 http 响应头获取,value 配置为header key
|
||||
- `response_body` :attribute 值通过响应 body 获取,value 配置格式为 gjson 的 jsonpath
|
||||
- `response_streaming_body` :attribute 值通过流式响应 body 获取,value 配置格式为 gjson 的 jsonpath
|
||||
|
||||
|
||||
当 `value_source` 为 `response_streaming_body` 时,应当配置 `rule`,用于指定如何从流式body中获取指定值,取值含义如下:
|
||||
@@ -59,6 +62,21 @@ Attribute 配置说明:
|
||||
'{"ai_log":"%FILTER_STATE(wasm.ai_log:PLAIN)%"}'
|
||||
```
|
||||
|
||||
如果字段设置了 `as_separate_log_field`,例如:
|
||||
```yaml
|
||||
attributes:
|
||||
- key: consumer
|
||||
value_source: request_header
|
||||
value: x-mse-consumer
|
||||
apply_to_log: true
|
||||
as_separate_log_field: true
|
||||
```
|
||||
|
||||
那么要在日志中打印,需要额外设置log_format:
|
||||
```
|
||||
'{"consumer":"%FILTER_STATE(wasm.consumer:PLAIN)%"}'
|
||||
```
|
||||
|
||||
### 空配置
|
||||
#### 监控
|
||||
|
||||
|
||||
@@ -22,28 +22,31 @@ Users can also expand observable values through configuration:
|
||||
|
||||
| Name | Type | Required | Default | Description |
|
||||
|----------------|-------|------|-----|------------------------|
|
||||
| `attributes` | []Attribute | required | - | Information that the user wants to record in log/span |
|
||||
| `attributes` | []Attribute | optional | - | Information that the user wants to record in log/span |
|
||||
| `disable_openai_usage` | bool | optional | false | When using a non-OpenAI-compatible protocol, the support for model and token is non-standard. Setting the configuration to true can prevent errors. |
|
||||
|
||||
Attribute Configuration instructions:
|
||||
|
||||
| Name | Type | Required | Default | Description |
|
||||
|----------------|-------|-----|-----|------------------------|
|
||||
| `key` | string | required | - | attrribute key |
|
||||
| `value_source` | string | required | - | attrribute value source, optional values are `fixed_value`, `request_header`, `request_body`, `response_header`, `response_body`, `response_streaming_body` |
|
||||
| `value` | string | required | - | how to get attrribute value |
|
||||
| `key` | string | required | - | attribute key |
|
||||
| `value_source` | string | required | - | attribute value source, optional values are `fixed_value`, `request_header`, `request_body`, `response_header`, `response_body`, `response_streaming_body` |
|
||||
| `value` | string | required | - | how to get attribute value |
|
||||
| `default_value` | string | optional | - | default value for attribute |
|
||||
| `rule` | string | optional | - | Rule to extract attribute from streaming response, optional values are `first`, `replace`, `append`|
|
||||
| `apply_to_log` | bool | optional | false | Whether to record the extracted information in the log |
|
||||
| `apply_to_span` | bool | optional | false | Whether to record the extracted information in the link tracking span |
|
||||
| `trace_span_key` | string | optional | - | span attribute key, default is the value of `key` |
|
||||
| `as_separate_log_field` | bool | optional | false | Whether to use a separate log field, the field name is equal to the value of `key` |
|
||||
|
||||
The meanings of various values for `value_source` are as follows:
|
||||
|
||||
- `fixed_value`: fixed value
|
||||
- `request_header`: The attrribute is obtained through the http request header
|
||||
- `request_body`: The attrribute is obtained through the http request body
|
||||
- `response_header`: The attrribute is obtained through the http response header
|
||||
- `response_body`: The attrribute is obtained through the http response body
|
||||
- `response_streaming_body`: The attrribute is obtained through the http streaming response body
|
||||
- `request_header`: The attribute is obtained through the http request header
|
||||
- `request_body`: The attribute is obtained through the http request body
|
||||
- `response_header`: The attribute is obtained through the http response header
|
||||
- `response_body`: The attribute is obtained through the http response body
|
||||
- `response_streaming_body`: The attribute is obtained through the http streaming response body
|
||||
|
||||
|
||||
When `value_source` is `response_streaming_body`, `rule` should be configured to specify how to obtain the specified value from the streaming body. The meaning of the value is as follows:
|
||||
@@ -59,6 +62,21 @@ If you want to record ai-statistic related statistical values in the gateway acc
|
||||
'{"ai_log":"%FILTER_STATE(wasm.ai_log:PLAIN)%"}'
|
||||
```
|
||||
|
||||
If the field is set with `as_separate_log_field`, for example:
|
||||
```yaml
|
||||
attributes:
|
||||
- key: consumer
|
||||
value_source: request_header
|
||||
value: x-mse-consumer
|
||||
apply_to_log: true
|
||||
as_separate_log_field: true
|
||||
```
|
||||
|
||||
Then to print in the log, you need to set log_format additionally:
|
||||
```
|
||||
'{"consumer":"%FILTER_STATE(wasm.consumer:PLAIN)%"}'
|
||||
```
|
||||
|
||||
### Empty
|
||||
#### Metric
|
||||
|
||||
|
||||
@@ -75,13 +75,15 @@ const (
|
||||
|
||||
// TracingSpan is the tracing span configuration.
|
||||
type Attribute struct {
|
||||
Key string `json:"key"`
|
||||
ValueSource string `json:"value_source"`
|
||||
Value string `json:"value"`
|
||||
DefaultValue string `json:"default_value,omitempty"`
|
||||
Rule string `json:"rule,omitempty"`
|
||||
ApplyToLog bool `json:"apply_to_log,omitempty"`
|
||||
ApplyToSpan bool `json:"apply_to_span,omitempty"`
|
||||
Key string `json:"key"`
|
||||
ValueSource string `json:"value_source"`
|
||||
Value string `json:"value"`
|
||||
TraceSpanKey string `json:"trace_span_key,omitempty"`
|
||||
DefaultValue string `json:"default_value,omitempty"`
|
||||
Rule string `json:"rule,omitempty"`
|
||||
ApplyToLog bool `json:"apply_to_log,omitempty"`
|
||||
ApplyToSpan bool `json:"apply_to_span,omitempty"`
|
||||
AsSeparateLogField bool `json:"as_separate_log_field,omitempty"`
|
||||
}
|
||||
|
||||
type AIStatisticsConfig struct {
|
||||
@@ -92,6 +94,8 @@ type AIStatisticsConfig struct {
|
||||
attributes []Attribute
|
||||
// If there exist attributes extracted from streaming body, chunks should be buffered
|
||||
shouldBufferStreamingBody bool
|
||||
// If disableOpenaiUsage is true, model/input_token/output_token logs will be skipped
|
||||
disableOpenaiUsage bool
|
||||
}
|
||||
|
||||
func generateMetricName(route, cluster, model, consumer, metricName string) string {
|
||||
@@ -160,6 +164,10 @@ func parseConfig(configJson gjson.Result, config *AIStatisticsConfig, log wrappe
|
||||
}
|
||||
// Metric settings
|
||||
config.counterMetrics = make(map[string]proxywasm.MetricCounter)
|
||||
|
||||
// Parse openai usage config setting.
|
||||
config.disableOpenaiUsage = configJson.Get("disable_openai_usage").Bool()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -264,15 +272,17 @@ func onHttpStreamingBody(ctx wrapper.HttpContext, config AIStatisticsConfig, dat
|
||||
}
|
||||
|
||||
// Set information about this request
|
||||
if model, inputToken, outputToken, ok := getUsage(data); ok {
|
||||
ctx.SetUserAttribute(Model, model)
|
||||
ctx.SetUserAttribute(InputToken, inputToken)
|
||||
ctx.SetUserAttribute(OutputToken, outputToken)
|
||||
// Set span attributes for ARMS.
|
||||
setSpanAttribute(ArmsModelName, model, log)
|
||||
setSpanAttribute(ArmsInputToken, inputToken, log)
|
||||
setSpanAttribute(ArmsOutputToken, outputToken, log)
|
||||
setSpanAttribute(ArmsTotalToken, inputToken+outputToken, log)
|
||||
if !config.disableOpenaiUsage {
|
||||
if model, inputToken, outputToken, ok := getUsage(data); ok {
|
||||
ctx.SetUserAttribute(Model, model)
|
||||
ctx.SetUserAttribute(InputToken, inputToken)
|
||||
ctx.SetUserAttribute(OutputToken, outputToken)
|
||||
// Set span attributes for ARMS.
|
||||
setSpanAttribute(ArmsModelName, model, log)
|
||||
setSpanAttribute(ArmsInputToken, inputToken, log)
|
||||
setSpanAttribute(ArmsOutputToken, outputToken, log)
|
||||
setSpanAttribute(ArmsTotalToken, inputToken+outputToken, log)
|
||||
}
|
||||
}
|
||||
// If the end of the stream is reached, record metrics/logs/spans.
|
||||
if endOfStream {
|
||||
@@ -311,15 +321,17 @@ func onHttpResponseBody(ctx wrapper.HttpContext, config AIStatisticsConfig, body
|
||||
}
|
||||
|
||||
// Set information about this request
|
||||
if model, inputToken, outputToken, ok := getUsage(body); ok {
|
||||
ctx.SetUserAttribute(Model, model)
|
||||
ctx.SetUserAttribute(InputToken, inputToken)
|
||||
ctx.SetUserAttribute(OutputToken, outputToken)
|
||||
// Set span attributes for ARMS.
|
||||
setSpanAttribute(ArmsModelName, model, log)
|
||||
setSpanAttribute(ArmsInputToken, inputToken, log)
|
||||
setSpanAttribute(ArmsOutputToken, outputToken, log)
|
||||
setSpanAttribute(ArmsTotalToken, inputToken+outputToken, log)
|
||||
if !config.disableOpenaiUsage {
|
||||
if model, inputToken, outputToken, ok := getUsage(body); ok {
|
||||
ctx.SetUserAttribute(Model, model)
|
||||
ctx.SetUserAttribute(InputToken, inputToken)
|
||||
ctx.SetUserAttribute(OutputToken, outputToken)
|
||||
// Set span attributes for ARMS.
|
||||
setSpanAttribute(ArmsModelName, model, log)
|
||||
setSpanAttribute(ArmsInputToken, inputToken, log)
|
||||
setSpanAttribute(ArmsOutputToken, outputToken, log)
|
||||
setSpanAttribute(ArmsTotalToken, inputToken+outputToken, log)
|
||||
}
|
||||
}
|
||||
|
||||
// Set user defined log & span attributes.
|
||||
@@ -396,13 +408,23 @@ func setAttributeBySource(ctx wrapper.HttpContext, config AIStatisticsConfig, so
|
||||
}
|
||||
log.Debugf("[attribute] source type: %s, key: %s, value: %+v", source, key, value)
|
||||
if attribute.ApplyToLog {
|
||||
ctx.SetUserAttribute(key, value)
|
||||
if attribute.AsSeparateLogField {
|
||||
marshalledJsonStr := wrapper.MarshalStr(fmt.Sprint(value))
|
||||
if err := proxywasm.SetProperty([]string{key}, []byte(marshalledJsonStr)); err != nil {
|
||||
log.Warnf("failed to set %s in filter state, raw is %s, err is %v", key, marshalledJsonStr, err)
|
||||
}
|
||||
} else {
|
||||
ctx.SetUserAttribute(key, value)
|
||||
}
|
||||
}
|
||||
// for metrics
|
||||
if key == Model || key == InputToken || key == OutputToken {
|
||||
ctx.SetContext(key, value)
|
||||
}
|
||||
if attribute.ApplyToSpan {
|
||||
if attribute.TraceSpanKey != "" {
|
||||
key = attribute.TraceSpanKey
|
||||
}
|
||||
setSpanAttribute(key, value, log)
|
||||
}
|
||||
}
|
||||
@@ -471,6 +493,11 @@ func writeMetric(ctx wrapper.HttpContext, config AIStatisticsConfig, log wrapper
|
||||
log.Warnf("ClusterName typd assert failed, skip metric record")
|
||||
return
|
||||
}
|
||||
|
||||
if config.disableOpenaiUsage {
|
||||
return
|
||||
}
|
||||
|
||||
if ctx.GetUserAttribute(Model) == nil || ctx.GetUserAttribute(InputToken) == nil || ctx.GetUserAttribute(OutputToken) == nil {
|
||||
log.Warnf("get usage information failed, skip metric record")
|
||||
return
|
||||
|
||||
14
plugins/wasm-go/mcp-filters/Dockerfile
Normal file
14
plugins/wasm-go/mcp-filters/Dockerfile
Normal file
@@ -0,0 +1,14 @@
|
||||
# Use a minimal base image as we only need to store the wasm file.
|
||||
FROM scratch
|
||||
|
||||
# Add build argument for the filter name. This will be passed by the Makefile.
|
||||
ARG FILTER_NAME
|
||||
|
||||
# Copy the compiled WASM binary into the image's root directory.
|
||||
# The wasm file will be named after the filter.
|
||||
COPY ${FILTER_NAME}/main.wasm /plugin.wasm
|
||||
|
||||
# Metadata
|
||||
LABEL org.opencontainers.image.title="${FILTER_NAME}"
|
||||
LABEL org.opencontainers.image.description="Higress MCP filter - ${FILTER_NAME}"
|
||||
LABEL org.opencontainers.image.source="https://github.com/alibaba/higress"
|
||||
54
plugins/wasm-go/mcp-filters/Makefile
Normal file
54
plugins/wasm-go/mcp-filters/Makefile
Normal file
@@ -0,0 +1,54 @@
|
||||
# MCP Filter Makefile
|
||||
|
||||
# Variables
|
||||
FILTER_NAME ?= mcp-router
|
||||
REGISTRY ?= higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/
|
||||
BUILD_TIME := $(shell date "+%Y%m%d-%H%M%S")
|
||||
COMMIT_ID := $(shell git rev-parse --short HEAD 2>/dev/null)
|
||||
IMAGE_TAG = $(if $(strip $(FILTER_VERSION)),${FILTER_VERSION},${BUILD_TIME}-${COMMIT_ID})
|
||||
IMG ?= ${REGISTRY}${FILTER_NAME}:${IMAGE_TAG}
|
||||
|
||||
# Default target
|
||||
.DEFAULT: build
|
||||
|
||||
build:
|
||||
@echo "Building WASM binary for filter: ${FILTER_NAME}..."
|
||||
@if [ ! -d "${FILTER_NAME}" ]; then \
|
||||
echo "Error: Filter directory '${FILTER_NAME}' not found."; \
|
||||
exit 1; \
|
||||
fi
|
||||
cd ${FILTER_NAME} && GOOS=wasip1 GOARCH=wasm go build -buildmode=c-shared -o main.wasm main.go
|
||||
@echo ""
|
||||
@echo "Output WASM file: ${FILTER_NAME}/main.wasm"
|
||||
|
||||
# Build Docker image (depends on build target to ensure WASM binary exists)
|
||||
build-image: build
|
||||
@echo "Building Docker image for ${FILTER_NAME}..."
|
||||
docker build -t ${IMG} \
|
||||
--build-arg FILTER_NAME=${FILTER_NAME} \
|
||||
-f Dockerfile .
|
||||
@echo ""
|
||||
@echo "Image: ${IMG}"
|
||||
|
||||
# Build and push Docker image
|
||||
build-push: build-image
|
||||
docker push ${IMG}
|
||||
|
||||
# Clean build artifacts
|
||||
clean:
|
||||
@echo "Cleaning build artifacts for filter: ${FILTER_NAME}..."
|
||||
rm -f ${FILTER_NAME}/main.wasm
|
||||
|
||||
# Help
|
||||
help:
|
||||
@echo "Available targets:"
|
||||
@echo " build - Build WASM binary for a specific filter"
|
||||
@echo " build-image - Build Docker image"
|
||||
@echo " build-push - Build and push Docker image"
|
||||
@echo " clean - Remove build artifacts for a specific filter"
|
||||
@echo ""
|
||||
@echo "Variables:"
|
||||
@echo " FILTER_NAME - Name of the MCP filter to build (default: ${FILTER_NAME})"
|
||||
@echo " REGISTRY - Docker registry (default: ${REGISTRY})"
|
||||
@echo " FILTER_VERSION - Version tag for the image (default: timestamp-commit)"
|
||||
@echo " IMG - Full image name (default: ${IMG})"
|
||||
89
plugins/wasm-go/mcp-filters/mcp-router/README.md
Normal file
89
plugins/wasm-go/mcp-filters/mcp-router/README.md
Normal file
@@ -0,0 +1,89 @@
|
||||
# MCP Router Plugin
|
||||
|
||||
## Feature Description
|
||||
The `mcp-router` plugin provides a routing capability for MCP (Model Context Protocol) `tools/call` requests. It inspects the tool name in the request payload, and if the name is prefixed with a server identifier (e.g., `server-name/tool-name`), it dynamically reroutes the request to the appropriate backend MCP server.
|
||||
|
||||
This enables the creation of a unified MCP endpoint that can aggregate tools from multiple, distinct MCP servers. A client can make a `tools/call` request to a single endpoint, and the `mcp-router` will ensure it reaches the correct underlying server where the tool is actually hosted.
|
||||
|
||||
## Configuration Fields
|
||||
|
||||
| Name | Data Type | Required | Default Value | Description |
|
||||
|-----------|---------------|----------|---------------|---------------------------------------------------------------------------------------------------------|
|
||||
| `servers` | array of objects | Yes | - | A list of routing configurations for each backend MCP server. |
|
||||
| `servers[].name` | string | Yes | - | The unique identifier for the MCP server. This must match the prefix used in the `tools/call` request's tool name. |
|
||||
| `servers[].domain` | string | No | - | The domain (authority) of the backend MCP server. If omitted, the original request's domain will be kept. |
|
||||
| `servers[].path` | string | Yes | - | The path of the backend MCP server to which the request will be routed. |
|
||||
|
||||
## How It Works
|
||||
|
||||
When a `tools/call` request is processed by a route with the `mcp-router` plugin enabled, the following occurs:
|
||||
|
||||
1. **Tool Name Parsing**: The plugin inspects the `name` parameter within the `params` object of the JSON-RPC request.
|
||||
2. **Prefix Matching**: It checks if the tool name follows the `server-name/tool-name` format.
|
||||
- If it does not match this format, the plugin takes no action, and the request proceeds normally.
|
||||
- If it matches, the plugin extracts the `server-name` and the actual `tool-name`.
|
||||
3. **Route Lookup**: The extracted `server-name` is used to look up the corresponding routing configuration (domain and path) from the `servers` list in the plugin's configuration.
|
||||
4. **Header Modification**:
|
||||
- The `:authority` request header is replaced with the `domain` from the matched server configuration.
|
||||
- The `:path` request header is replaced with the `path` from the matched server configuration.
|
||||
5. **Request Body Modification**: The `name` parameter in the JSON-RPC request body is updated to be just the `tool-name` (the `server-name/` prefix is removed).
|
||||
6. **Rerouting**: After the headers are modified, the gateway's routing engine processes the request again with the new destination information, sending it to the correct backend MCP server.
|
||||
|
||||
### Example Configuration
|
||||
|
||||
Here is an example of how to configure the `mcp-router` plugin in a `higress-plugins.yaml` file:
|
||||
|
||||
```yaml
|
||||
servers:
|
||||
- name: random-user-server
|
||||
domain: mcp.example.com
|
||||
path: /mcp-servers/mcp-random-user-server
|
||||
- name: rest-amap-server
|
||||
domain: mcp.example.com
|
||||
path: /mcp-servers/mcp-rest-amap-server
|
||||
```
|
||||
|
||||
### Example Usage
|
||||
|
||||
Consider a `tools/call` request sent to an endpoint where the `mcp-router` is active:
|
||||
|
||||
**Original Request:**
|
||||
```json
|
||||
{
|
||||
"jsonrpc": "2.0",
|
||||
"id": 2,
|
||||
"method": "tools/call",
|
||||
"params": {
|
||||
"name": "rest-amap-server/get-weather",
|
||||
"arguments": {
|
||||
"location": "New York"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Plugin Actions:**
|
||||
|
||||
1. The plugin identifies the tool name as `rest-amap-server/get-weather`.
|
||||
2. It extracts `server-name` as `rest-amap-server` and `tool-name` as `get-weather`.
|
||||
3. It finds the matching configuration: `domain: mcp.example.com`, `path: /mcp-servers/mcp-rest-amap-server`.
|
||||
4. It modifies the request headers to:
|
||||
- `:authority`: `mcp.example.com`
|
||||
- `:path`: `/mcp-servers/mcp-rest-amap-server`
|
||||
5. It modifies the request body to:
|
||||
|
||||
```json
|
||||
{
|
||||
"jsonrpc": "2.0",
|
||||
"id": 2,
|
||||
"method": "tools/call",
|
||||
"params": {
|
||||
"name": "get-weather",
|
||||
"arguments": {
|
||||
"location": "New York"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
The request is then rerouted to the `rest-amap-server`.
|
||||
89
plugins/wasm-go/mcp-filters/mcp-router/README_ZH.md
Normal file
89
plugins/wasm-go/mcp-filters/mcp-router/README_ZH.md
Normal file
@@ -0,0 +1,89 @@
|
||||
# MCP Router 插件
|
||||
|
||||
## 功能说明
|
||||
`mcp-router` 插件为 MCP (Model Context Protocol) 的 `tools/call` 请求提供了路由能力。它会检查请求负载中的工具名称,如果名称带有服务器标识符前缀(例如 `server-name/tool-name`),它会动态地将请求重新路由到相应的后端 MCP 服务器。
|
||||
|
||||
这使得创建一个统一的 MCP 端点成为可能,该端点可以聚合来自多个不同 MCP 服务器的工具。客户端可以向单个端点发出 `tools/call` 请求,`mcp-router` 将确保请求到达托管该工具的正确底层服务器。
|
||||
|
||||
## 配置字段
|
||||
|
||||
| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
|
||||
|---|---|---|---|---|
|
||||
| `servers` | 对象数组 | 是 | - | 每个后端 MCP 服务器的路由配置列表。 |
|
||||
| `servers[].name` | 字符串 | 是 | - | MCP 服务器的唯一标识符。这必须与 `tools/call` 请求的工具名称中使用的前缀相匹配。 |
|
||||
| `servers[].domain` | 字符串 | 否 | - | 后端 MCP 服务器的域名 (authority)。如果省略,将保留原始请求的域名。 |
|
||||
| `servers[].path` | 字符串 | 是 | - | 请求将被路由到的后端 MCP 服务器的路径。 |
|
||||
|
||||
## 工作原理
|
||||
|
||||
当一个启用了 `mcp-router` 插件的路由处理 `tools/call` 请求时,会发生以下情况:
|
||||
|
||||
1. **工具名称解析**:插件检查 JSON-RPC 请求中 `params` 对象的 `name` 参数。
|
||||
2. **前缀匹配**:它检查工具名称是否遵循 `server-name/tool-name` 格式。
|
||||
- 如果不匹配此格式,插件不执行任何操作,请求将正常继续。
|
||||
- 如果匹配,插件将提取 `server-name` 和实际的 `tool-name`。
|
||||
3. **路由查找**:提取的 `server-name` 用于从插件配置的 `servers` 列表中查找相应的路由配置(domain 和 path)。
|
||||
4. **Header 修改**:
|
||||
- `:authority` 请求头被替换为匹配的服务器配置中的 `domain`。
|
||||
- `:path` 请求头被替换为匹配的服务器配置中的 `path`。
|
||||
5. **请求体修改**:JSON-RPC 请求体中的 `name` 参数被更新为仅包含 `tool-name`(移除了 `server-name/` 前缀)。
|
||||
6. **重新路由**:在 Header 修改后,网关的路由引擎会使用新的目标信息再次处理请求,将其发送到正确的后端 MCP 服务器。
|
||||
|
||||
### 配置示例
|
||||
|
||||
以下是在 `higress-plugins.yaml` 文件中配置 `mcp-router` 插件的示例:
|
||||
|
||||
```yaml
|
||||
servers:
|
||||
- name: random-user-server
|
||||
domain: mcp.example.com
|
||||
path: /mcp-servers/mcp-random-user-server
|
||||
- name: rest-amap-server
|
||||
domain: mcp.example.com
|
||||
path: /mcp-servers/mcp-rest-amap-server
|
||||
```
|
||||
|
||||
### 使用示例
|
||||
|
||||
假设一个 `tools/call` 请求被发送到激活了 `mcp-router` 的端点:
|
||||
|
||||
**原始请求:**
|
||||
```json
|
||||
{
|
||||
"jsonrpc": "2.0",
|
||||
"id": 2,
|
||||
"method": "tools/call",
|
||||
"params": {
|
||||
"name": "rest-amap-server/get-weather",
|
||||
"arguments": {
|
||||
"location": "New York"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**插件行为:**
|
||||
|
||||
1. 插件识别出工具名称为 `rest-amap-server/get-weather`。
|
||||
2. 它提取出 `server-name` 为 `rest-amap-server`,`tool-name` 为 `get-weather`。
|
||||
3. 它找到匹配的配置:`domain: mcp.example.com`, `path: /mcp-servers/mcp-rest-amap-server`。
|
||||
4. 它将请求头修改为:
|
||||
- `:authority`: `mcp.example.com`
|
||||
- `:path`: `/mcp-servers/mcp-rest-amap-server`
|
||||
5. 它将请求体修改为:
|
||||
|
||||
```json
|
||||
{
|
||||
"jsonrpc": "2.0",
|
||||
"id": 2,
|
||||
"method": "tools/call",
|
||||
"params": {
|
||||
"name": "get-weather",
|
||||
"arguments": {
|
||||
"location": "New York"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
请求随后被重新路由到 `rest-amap-server`。
|
||||
34
plugins/wasm-go/mcp-filters/mcp-router/go.mod
Normal file
34
plugins/wasm-go/mcp-filters/mcp-router/go.mod
Normal file
@@ -0,0 +1,34 @@
|
||||
module mcp-router
|
||||
|
||||
go 1.24.1
|
||||
|
||||
require (
|
||||
github.com/alibaba/higress/plugins/wasm-go v1.4.4-0.20250612125225-016b165a33c9
|
||||
github.com/higress-group/proxy-wasm-go-sdk v0.0.0-20250402062734-d50d98c305f0
|
||||
github.com/tidwall/gjson v1.18.0
|
||||
github.com/tidwall/sjson v1.2.5
|
||||
)
|
||||
|
||||
require (
|
||||
dario.cat/mergo v1.0.1 // indirect
|
||||
github.com/Masterminds/goutils v1.1.1 // indirect
|
||||
github.com/Masterminds/semver/v3 v3.3.0 // indirect
|
||||
github.com/Masterminds/sprig/v3 v3.3.0 // indirect
|
||||
github.com/bahlo/generic-list-go v0.2.0 // indirect
|
||||
github.com/buger/jsonparser v1.1.1 // indirect
|
||||
github.com/google/uuid v1.6.0 // indirect
|
||||
github.com/higress-group/gjson_template v0.0.0-20250413075336-4c4161ed428b // indirect
|
||||
github.com/huandu/xstrings v1.5.0 // indirect
|
||||
github.com/invopop/jsonschema v0.13.0 // indirect
|
||||
github.com/mailru/easyjson v0.7.7 // indirect
|
||||
github.com/mitchellh/copystructure v1.2.0 // indirect
|
||||
github.com/mitchellh/reflectwalk v1.0.2 // indirect
|
||||
github.com/shopspring/decimal v1.4.0 // indirect
|
||||
github.com/spf13/cast v1.7.0 // indirect
|
||||
github.com/tidwall/match v1.1.1 // indirect
|
||||
github.com/tidwall/pretty v1.2.1 // indirect
|
||||
github.com/tidwall/resp v0.1.1 // indirect
|
||||
github.com/wk8/go-ordered-map/v2 v2.1.8 // indirect
|
||||
golang.org/x/crypto v0.26.0 // indirect
|
||||
gopkg.in/yaml.v3 v3.0.1 // indirect
|
||||
)
|
||||
71
plugins/wasm-go/mcp-filters/mcp-router/go.sum
Normal file
71
plugins/wasm-go/mcp-filters/mcp-router/go.sum
Normal file
@@ -0,0 +1,71 @@
|
||||
dario.cat/mergo v1.0.1 h1:Ra4+bf83h2ztPIQYNP99R6m+Y7KfnARDfID+a+vLl4s=
|
||||
dario.cat/mergo v1.0.1/go.mod h1:uNxQE+84aUszobStD9th8a29P2fMDhsBdgRYvZOxGmk=
|
||||
github.com/Masterminds/goutils v1.1.1 h1:5nUrii3FMTL5diU80unEVvNevw1nH4+ZV4DSLVJLSYI=
|
||||
github.com/Masterminds/goutils v1.1.1/go.mod h1:8cTjp+g8YejhMuvIA5y2vz3BpJxksy863GQaJW2MFNU=
|
||||
github.com/Masterminds/semver/v3 v3.3.0 h1:B8LGeaivUe71a5qox1ICM/JLl0NqZSW5CHyL+hmvYS0=
|
||||
github.com/Masterminds/semver/v3 v3.3.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM=
|
||||
github.com/Masterminds/sprig/v3 v3.3.0 h1:mQh0Yrg1XPo6vjYXgtf5OtijNAKJRNcTdOOGZe3tPhs=
|
||||
github.com/Masterminds/sprig/v3 v3.3.0/go.mod h1:Zy1iXRYNqNLUolqCpL4uhk6SHUMAOSCzdgBfDb35Lz0=
|
||||
github.com/alibaba/higress/plugins/wasm-go v1.4.4-0.20250612125225-016b165a33c9 h1:MBIjh29Qie+jmPQ9W61wOzyUoulk/lsOjdj6hoYTRpo=
|
||||
github.com/alibaba/higress/plugins/wasm-go v1.4.4-0.20250612125225-016b165a33c9/go.mod h1:yObZXF1xTx/8peEsSbtHIzz7KlTr/tZCrokIHtwF0Jk=
|
||||
github.com/bahlo/generic-list-go v0.2.0 h1:5sz/EEAK+ls5wF+NeqDpk5+iNdMDXrh3z3nPnH1Wvgk=
|
||||
github.com/bahlo/generic-list-go v0.2.0/go.mod h1:2KvAjgMlE5NNynlg/5iLrrCCZ2+5xWbdbCW3pNTGyYg=
|
||||
github.com/buger/jsonparser v1.1.1 h1:2PnMjfWD7wBILjqQbt530v576A/cAbQvEW9gGIpYMUs=
|
||||
github.com/buger/jsonparser v1.1.1/go.mod h1:6RYKKt7H4d4+iWqouImQ9R2FZql3VbhNgx27UK13J/0=
|
||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8=
|
||||
github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0=
|
||||
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
|
||||
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
|
||||
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
|
||||
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||
github.com/higress-group/gjson_template v0.0.0-20250413075336-4c4161ed428b h1:rRI9+ThQbe+nw4jUiYEyOFaREkXCMMW9k1X2gy2d6pE=
|
||||
github.com/higress-group/gjson_template v0.0.0-20250413075336-4c4161ed428b/go.mod h1:rU3M+Tq5VrQOo0dxpKHGb03Ty0sdWIZfAH+YCOACx/Y=
|
||||
github.com/higress-group/proxy-wasm-go-sdk v0.0.0-20250402062734-d50d98c305f0 h1:Ta+RBsZYML3hjoenbGJoS2L6aWJN+hqlxKoqzj/Y2SY=
|
||||
github.com/higress-group/proxy-wasm-go-sdk v0.0.0-20250402062734-d50d98c305f0/go.mod h1:tRI2LfMudSkKHhyv1uex3BWzcice2s/l8Ah8axporfA=
|
||||
github.com/huandu/xstrings v1.5.0 h1:2ag3IFq9ZDANvthTwTiqSSZLjDc+BedvHPAp5tJy2TI=
|
||||
github.com/huandu/xstrings v1.5.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
|
||||
github.com/invopop/jsonschema v0.13.0 h1:KvpoAJWEjR3uD9Kbm2HWJmqsEaHt8lBUpd0qHcIi21E=
|
||||
github.com/invopop/jsonschema v0.13.0/go.mod h1:ffZ5Km5SWWRAIN6wbDXItl95euhFz2uON45H2qjYt+0=
|
||||
github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
|
||||
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
|
||||
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
|
||||
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
|
||||
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
|
||||
github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
|
||||
github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
|
||||
github.com/mitchellh/copystructure v1.2.0 h1:vpKXTN4ewci03Vljg/q9QvCGUDttBOGBIa15WveJJGw=
|
||||
github.com/mitchellh/copystructure v1.2.0/go.mod h1:qLl+cE2AmVv+CoeAwDPye/v+N2HKCj9FbZEVFJRxO9s=
|
||||
github.com/mitchellh/reflectwalk v1.0.2 h1:G2LzWKi524PWgd3mLHV8Y5k7s6XUvT0Gef6zxSIeXaQ=
|
||||
github.com/mitchellh/reflectwalk v1.0.2/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8=
|
||||
github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=
|
||||
github.com/shopspring/decimal v1.4.0 h1:bxl37RwXBklmTi0C79JfXCEBD1cqqHt0bbgBAGFp81k=
|
||||
github.com/shopspring/decimal v1.4.0/go.mod h1:gawqmDU56v4yIKSwfBSFip1HdCCXN8/+DMd9qYNcwME=
|
||||
github.com/spf13/cast v1.7.0 h1:ntdiHjuueXFgm5nzDRdOS4yfT43P5Fnud6DH50rz/7w=
|
||||
github.com/spf13/cast v1.7.0/go.mod h1:ancEpBxwJDODSW/UG4rDrAqiKolqNNh2DX3mk86cAdo=
|
||||
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
|
||||
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
|
||||
github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
|
||||
github.com/tidwall/gjson v1.18.0 h1:FIDeeyB800efLX89e5a8Y0BNH+LOngJyGrIWxG2FKQY=
|
||||
github.com/tidwall/gjson v1.18.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
|
||||
github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA=
|
||||
github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM=
|
||||
github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
|
||||
github.com/tidwall/pretty v1.2.1 h1:qjsOFOWWQl+N3RsoF5/ssm1pHmJJwhjlSbZ51I6wMl4=
|
||||
github.com/tidwall/pretty v1.2.1/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
|
||||
github.com/tidwall/resp v0.1.1 h1:Ly20wkhqKTmDUPlyM1S7pWo5kk0tDu8OoC/vFArXmwE=
|
||||
github.com/tidwall/resp v0.1.1/go.mod h1:3/FrruOBAxPTPtundW0VXgmsQ4ZBA0Aw714lVYgwFa0=
|
||||
github.com/tidwall/sjson v1.2.5 h1:kLy8mja+1c9jlljvWTlSazM7cKDRfJuR/bOJhcY5NcY=
|
||||
github.com/tidwall/sjson v1.2.5/go.mod h1:Fvgq9kS/6ociJEDnK0Fk1cpYF4FIW6ZF7LAe+6jwd28=
|
||||
github.com/wk8/go-ordered-map/v2 v2.1.8 h1:5h/BUHu93oj4gIdvHHHGsScSTMijfx5PeYkE/fJgbpc=
|
||||
github.com/wk8/go-ordered-map/v2 v2.1.8/go.mod h1:5nJHM5DyteebpVlHnWMV0rPz6Zp7+xBAnxjb1X5vnTw=
|
||||
golang.org/x/crypto v0.26.0 h1:RrRspgV4mU+YwB4FYnuBoKsUapNIL5cohGAmSH3azsw=
|
||||
golang.org/x/crypto v0.26.0/go.mod h1:GY7jblb9wI+FOo5y8/S2oY4zWP07AkOJ4+jxCqdqn54=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
133
plugins/wasm-go/mcp-filters/mcp-router/main.go
Normal file
133
plugins/wasm-go/mcp-filters/mcp-router/main.go
Normal file
@@ -0,0 +1,133 @@
|
||||
// Copyright (c) 2022 Alibaba Group Holding Ltd.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/alibaba/higress/plugins/wasm-go/pkg/log"
|
||||
"github.com/alibaba/higress/plugins/wasm-go/pkg/mcp"
|
||||
"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
|
||||
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
|
||||
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/sjson"
|
||||
)
|
||||
|
||||
func main() {}
|
||||
|
||||
func init() {
|
||||
mcp.LoadMCPFilter(
|
||||
mcp.FilterName("mcp-router"),
|
||||
mcp.SetConfigParser(ParseConfig),
|
||||
mcp.SetToolCallRequestFilter(ProcessRequest),
|
||||
)
|
||||
mcp.InitMCPFilter()
|
||||
}
|
||||
|
||||
// ServerConfig represents the routing configuration for a single MCP server
|
||||
type ServerConfig struct {
|
||||
Name string `json:"name"`
|
||||
Domain string `json:"domain,omitempty"`
|
||||
Path string `json:"path"`
|
||||
}
|
||||
|
||||
// McpRouterConfig represents the configuration for the mcp-router filter
|
||||
type McpRouterConfig struct {
|
||||
Servers []ServerConfig `json:"servers"`
|
||||
}
|
||||
|
||||
func ParseConfig(configBytes []byte, filterConfig *any) error {
|
||||
var config McpRouterConfig
|
||||
if err := json.Unmarshal(configBytes, &config); err != nil {
|
||||
return fmt.Errorf("failed to parse mcp-router config: %v", err)
|
||||
}
|
||||
|
||||
log.Infof("Parsed mcp-router config with %d servers", len(config.Servers))
|
||||
for _, server := range config.Servers {
|
||||
log.Debugf("Server: %s -> %s%s", server.Name, server.Domain, server.Path)
|
||||
}
|
||||
|
||||
*filterConfig = config
|
||||
return nil
|
||||
}
|
||||
|
||||
func ProcessRequest(context wrapper.HttpContext, config any, toolName string, toolArgs gjson.Result, rawBody []byte) types.Action {
|
||||
routerConfig, ok := config.(McpRouterConfig)
|
||||
if !ok {
|
||||
log.Errorf("Invalid config type for mcp-router")
|
||||
return types.ActionContinue
|
||||
}
|
||||
|
||||
// Extract server name from tool name (format: "serverName/toolName")
|
||||
parts := strings.SplitN(toolName, "/", 2)
|
||||
if len(parts) != 2 {
|
||||
log.Debugf("Tool name '%s' does not contain server prefix, continuing without routing", toolName)
|
||||
return types.ActionContinue
|
||||
}
|
||||
|
||||
serverName := parts[0]
|
||||
actualToolName := parts[1]
|
||||
|
||||
log.Debugf("Routing tool call: server=%s, tool=%s", serverName, actualToolName)
|
||||
|
||||
// Find the server configuration
|
||||
var targetServer *ServerConfig
|
||||
for _, server := range routerConfig.Servers {
|
||||
if server.Name == serverName {
|
||||
targetServer = &server
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if targetServer == nil {
|
||||
log.Warnf("No routing configuration found for server '%s'", serverName)
|
||||
return types.ActionContinue
|
||||
}
|
||||
|
||||
log.Infof("Routing to server '%s': domain=[%s], path=[%s]", serverName, targetServer.Domain, targetServer.Path)
|
||||
|
||||
// Modify the :authority header (domain) if it's configured
|
||||
if targetServer.Domain != "" {
|
||||
if err := proxywasm.ReplaceHttpRequestHeader(":authority", targetServer.Domain); err != nil {
|
||||
log.Errorf("Failed to set :authority header to '%s': %v", targetServer.Domain, err)
|
||||
return types.ActionContinue
|
||||
}
|
||||
}
|
||||
|
||||
// Modify the :path header
|
||||
if err := proxywasm.ReplaceHttpRequestHeader(":path", targetServer.Path); err != nil {
|
||||
log.Errorf("Failed to set :path header to '%s': %v", targetServer.Path, err)
|
||||
return types.ActionContinue
|
||||
}
|
||||
|
||||
// Create a new JSON with the modified tool name
|
||||
modifiedBody, err := sjson.SetBytes(rawBody, "params.name", actualToolName)
|
||||
if err != nil {
|
||||
log.Errorf("Failed to modify tool name, body: %s, err: %v", rawBody, err)
|
||||
return types.ActionContinue
|
||||
}
|
||||
// Replace the request body
|
||||
if err := proxywasm.ReplaceHttpRequestBody([]byte(modifiedBody)); err != nil {
|
||||
log.Errorf("Failed to replace request body: %v", err)
|
||||
return types.ActionContinue
|
||||
}
|
||||
|
||||
log.Infof("Successfully routed request for tool '%s' to server '%s'. New tool name is '%s'.",
|
||||
toolName, serverName, actualToolName)
|
||||
return types.ActionContinue
|
||||
}
|
||||
@@ -347,7 +347,7 @@ tools:
|
||||
3. 创建正确格式化 API 请求的 requestTemplate,包括带有模板值的头部
|
||||
4. 创建将 API 响应转换为适合 AI 消费的可读格式的 responseTemplate
|
||||
|
||||
模板使用 GJSON Template 语法 (https://github.com/higress-group/gjson_template),该语法结合了 Go 模板和 GJSON 路径语法进行 JSON 处理。模板引擎支持:
|
||||
模板使用 [GJSON Template 语法](https://github.com/higress-group/gjson_template),该语法结合了 Go 模板和 GJSON 路径语法进行 JSON 处理。模板引擎支持:
|
||||
|
||||
1. 基本点表示法访问字段:{{.fieldName}}
|
||||
2. 用于复杂查询的 gjson 函数:{{gjson "users.#(active==true)#.name"}}
|
||||
|
||||
@@ -8,7 +8,7 @@ replace amap-tools => ../amap-tools
|
||||
|
||||
require (
|
||||
amap-tools v0.0.0-00010101000000-000000000000
|
||||
github.com/alibaba/higress/plugins/wasm-go v1.4.4-0.20250528033743-f88b782fe131
|
||||
github.com/higress-group/wasm-go v1.0.1-0.20250626074812-b8840174d876
|
||||
quark-search v0.0.0-00010101000000-000000000000
|
||||
)
|
||||
|
||||
@@ -21,7 +21,7 @@ require (
|
||||
github.com/buger/jsonparser v1.1.1 // indirect
|
||||
github.com/google/uuid v1.6.0 // indirect
|
||||
github.com/higress-group/gjson_template v0.0.0-20250413075336-4c4161ed428b // indirect
|
||||
github.com/higress-group/proxy-wasm-go-sdk v0.0.0-20250402062734-d50d98c305f0 // indirect
|
||||
github.com/higress-group/proxy-wasm-go-sdk v0.0.0-20250611100342-5654e89a7a80 // indirect
|
||||
github.com/huandu/xstrings v1.5.0 // indirect
|
||||
github.com/invopop/jsonschema v0.13.0 // indirect
|
||||
github.com/mailru/easyjson v0.7.7 // indirect
|
||||
@@ -36,5 +36,6 @@ require (
|
||||
github.com/tidwall/sjson v1.2.5 // indirect
|
||||
github.com/wk8/go-ordered-map/v2 v2.1.8 // indirect
|
||||
golang.org/x/crypto v0.26.0 // indirect
|
||||
google.golang.org/protobuf v1.36.6 // indirect
|
||||
gopkg.in/yaml.v3 v3.0.1 // indirect
|
||||
)
|
||||
|
||||
@@ -6,16 +6,6 @@ github.com/Masterminds/semver/v3 v3.3.0 h1:B8LGeaivUe71a5qox1ICM/JLl0NqZSW5CHyL+
|
||||
github.com/Masterminds/semver/v3 v3.3.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM=
|
||||
github.com/Masterminds/sprig/v3 v3.3.0 h1:mQh0Yrg1XPo6vjYXgtf5OtijNAKJRNcTdOOGZe3tPhs=
|
||||
github.com/Masterminds/sprig/v3 v3.3.0/go.mod h1:Zy1iXRYNqNLUolqCpL4uhk6SHUMAOSCzdgBfDb35Lz0=
|
||||
github.com/alibaba/higress/plugins/wasm-go v1.4.4-0.20250507130917-ed12a186173a h1:CvTkMBU9+SGIyJEJYFEvg/esoVbLzQP9WVeoZzMHM9E=
|
||||
github.com/alibaba/higress/plugins/wasm-go v1.4.4-0.20250507130917-ed12a186173a/go.mod h1:yObZXF1xTx/8peEsSbtHIzz7KlTr/tZCrokIHtwF0Jk=
|
||||
github.com/alibaba/higress/plugins/wasm-go v1.4.4-0.20250513083230-017f47fc2432 h1:Acw2RhWABsw3Mg+agBhKJML+Fk5CbDBJcVhM9HM2lmk=
|
||||
github.com/alibaba/higress/plugins/wasm-go v1.4.4-0.20250513083230-017f47fc2432/go.mod h1:yObZXF1xTx/8peEsSbtHIzz7KlTr/tZCrokIHtwF0Jk=
|
||||
github.com/alibaba/higress/plugins/wasm-go v1.4.4-0.20250515035738-c8f491db9030 h1:CX3lqAbgKnsrNpLYlfi6xDmnyMKsU8NJcMCCaci8BUI=
|
||||
github.com/alibaba/higress/plugins/wasm-go v1.4.4-0.20250515035738-c8f491db9030/go.mod h1:yObZXF1xTx/8peEsSbtHIzz7KlTr/tZCrokIHtwF0Jk=
|
||||
github.com/alibaba/higress/plugins/wasm-go v1.4.4-0.20250526122106-bde03cd884e5 h1:ACvlY5Vu7SN+K1posB3UP3l4G+Iw5+6iMcAEaBKJvH8=
|
||||
github.com/alibaba/higress/plugins/wasm-go v1.4.4-0.20250526122106-bde03cd884e5/go.mod h1:yObZXF1xTx/8peEsSbtHIzz7KlTr/tZCrokIHtwF0Jk=
|
||||
github.com/alibaba/higress/plugins/wasm-go v1.4.4-0.20250528033743-f88b782fe131 h1:/efvKhP31Qo4RE48mjJCNC1jpVObgAohNe23bN5hFPA=
|
||||
github.com/alibaba/higress/plugins/wasm-go v1.4.4-0.20250528033743-f88b782fe131/go.mod h1:yObZXF1xTx/8peEsSbtHIzz7KlTr/tZCrokIHtwF0Jk=
|
||||
github.com/bahlo/generic-list-go v0.2.0 h1:5sz/EEAK+ls5wF+NeqDpk5+iNdMDXrh3z3nPnH1Wvgk=
|
||||
github.com/bahlo/generic-list-go v0.2.0/go.mod h1:2KvAjgMlE5NNynlg/5iLrrCCZ2+5xWbdbCW3pNTGyYg=
|
||||
github.com/buger/jsonparser v1.1.1 h1:2PnMjfWD7wBILjqQbt530v576A/cAbQvEW9gGIpYMUs=
|
||||
@@ -30,8 +20,10 @@ github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
|
||||
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||
github.com/higress-group/gjson_template v0.0.0-20250413075336-4c4161ed428b h1:rRI9+ThQbe+nw4jUiYEyOFaREkXCMMW9k1X2gy2d6pE=
|
||||
github.com/higress-group/gjson_template v0.0.0-20250413075336-4c4161ed428b/go.mod h1:rU3M+Tq5VrQOo0dxpKHGb03Ty0sdWIZfAH+YCOACx/Y=
|
||||
github.com/higress-group/proxy-wasm-go-sdk v0.0.0-20250402062734-d50d98c305f0 h1:Ta+RBsZYML3hjoenbGJoS2L6aWJN+hqlxKoqzj/Y2SY=
|
||||
github.com/higress-group/proxy-wasm-go-sdk v0.0.0-20250402062734-d50d98c305f0/go.mod h1:tRI2LfMudSkKHhyv1uex3BWzcice2s/l8Ah8axporfA=
|
||||
github.com/higress-group/proxy-wasm-go-sdk v0.0.0-20250611100342-5654e89a7a80 h1:xqmtTZI0JQ2O+Lg9/CE6c+Tw9KD6FnvWw8EpLVuuvfg=
|
||||
github.com/higress-group/proxy-wasm-go-sdk v0.0.0-20250611100342-5654e89a7a80/go.mod h1:tRI2LfMudSkKHhyv1uex3BWzcice2s/l8Ah8axporfA=
|
||||
github.com/higress-group/wasm-go v1.0.1-0.20250626074812-b8840174d876 h1:dJEP8h+FN9Q5PUs5zttXl2bS6lGf/bOsX9GMBTHaL3I=
|
||||
github.com/higress-group/wasm-go v1.0.1-0.20250626074812-b8840174d876/go.mod h1:ODBV27sjmhIW8Cqv3R74EUcTnbdkE69bmXBQFuRkY1M=
|
||||
github.com/huandu/xstrings v1.5.0 h1:2ag3IFq9ZDANvthTwTiqSSZLjDc+BedvHPAp5tJy2TI=
|
||||
github.com/huandu/xstrings v1.5.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
|
||||
github.com/invopop/jsonschema v0.13.0 h1:KvpoAJWEjR3uD9Kbm2HWJmqsEaHt8lBUpd0qHcIi21E=
|
||||
@@ -73,6 +65,8 @@ github.com/wk8/go-ordered-map/v2 v2.1.8 h1:5h/BUHu93oj4gIdvHHHGsScSTMijfx5PeYkE/
|
||||
github.com/wk8/go-ordered-map/v2 v2.1.8/go.mod h1:5nJHM5DyteebpVlHnWMV0rPz6Zp7+xBAnxjb1X5vnTw=
|
||||
golang.org/x/crypto v0.26.0 h1:RrRspgV4mU+YwB4FYnuBoKsUapNIL5cohGAmSH3azsw=
|
||||
golang.org/x/crypto v0.26.0/go.mod h1:GY7jblb9wI+FOo5y8/S2oY4zWP07AkOJ4+jxCqdqn54=
|
||||
google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY=
|
||||
google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
|
||||
@@ -18,7 +18,7 @@ import (
|
||||
amap "amap-tools/tools"
|
||||
quark "quark-search/tools"
|
||||
|
||||
"github.com/alibaba/higress/plugins/wasm-go/pkg/mcp"
|
||||
"github.com/higress-group/wasm-go/pkg/mcp"
|
||||
)
|
||||
|
||||
func main() {}
|
||||
|
||||
@@ -3,8 +3,8 @@ module amap-tools
|
||||
go 1.24.1
|
||||
|
||||
require (
|
||||
github.com/alibaba/higress/plugins/wasm-go v1.4.4-0.20250507122328-b62384cff88a
|
||||
github.com/higress-group/proxy-wasm-go-sdk v0.0.0-20250402062734-d50d98c305f0
|
||||
github.com/higress-group/proxy-wasm-go-sdk v0.0.0-20250611100342-5654e89a7a80
|
||||
github.com/higress-group/wasm-go v1.0.0
|
||||
)
|
||||
|
||||
require (
|
||||
@@ -30,5 +30,6 @@ require (
|
||||
github.com/tidwall/sjson v1.2.5 // indirect
|
||||
github.com/wk8/go-ordered-map/v2 v2.1.8 // indirect
|
||||
golang.org/x/crypto v0.26.0 // indirect
|
||||
google.golang.org/protobuf v1.36.6 // indirect
|
||||
gopkg.in/yaml.v3 v3.0.1 // indirect
|
||||
)
|
||||
|
||||
@@ -6,8 +6,6 @@ github.com/Masterminds/semver/v3 v3.3.0 h1:B8LGeaivUe71a5qox1ICM/JLl0NqZSW5CHyL+
|
||||
github.com/Masterminds/semver/v3 v3.3.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM=
|
||||
github.com/Masterminds/sprig/v3 v3.3.0 h1:mQh0Yrg1XPo6vjYXgtf5OtijNAKJRNcTdOOGZe3tPhs=
|
||||
github.com/Masterminds/sprig/v3 v3.3.0/go.mod h1:Zy1iXRYNqNLUolqCpL4uhk6SHUMAOSCzdgBfDb35Lz0=
|
||||
github.com/alibaba/higress/plugins/wasm-go v1.4.4-0.20250507122328-b62384cff88a h1:VQrtP0CR4pgIL3FGnIAb+uY3yRwaMQk2c3AT3p+LVwk=
|
||||
github.com/alibaba/higress/plugins/wasm-go v1.4.4-0.20250507122328-b62384cff88a/go.mod h1:yObZXF1xTx/8peEsSbtHIzz7KlTr/tZCrokIHtwF0Jk=
|
||||
github.com/bahlo/generic-list-go v0.2.0 h1:5sz/EEAK+ls5wF+NeqDpk5+iNdMDXrh3z3nPnH1Wvgk=
|
||||
github.com/bahlo/generic-list-go v0.2.0/go.mod h1:2KvAjgMlE5NNynlg/5iLrrCCZ2+5xWbdbCW3pNTGyYg=
|
||||
github.com/buger/jsonparser v1.1.1 h1:2PnMjfWD7wBILjqQbt530v576A/cAbQvEW9gGIpYMUs=
|
||||
@@ -22,8 +20,10 @@ github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
|
||||
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||
github.com/higress-group/gjson_template v0.0.0-20250413075336-4c4161ed428b h1:rRI9+ThQbe+nw4jUiYEyOFaREkXCMMW9k1X2gy2d6pE=
|
||||
github.com/higress-group/gjson_template v0.0.0-20250413075336-4c4161ed428b/go.mod h1:rU3M+Tq5VrQOo0dxpKHGb03Ty0sdWIZfAH+YCOACx/Y=
|
||||
github.com/higress-group/proxy-wasm-go-sdk v0.0.0-20250402062734-d50d98c305f0 h1:Ta+RBsZYML3hjoenbGJoS2L6aWJN+hqlxKoqzj/Y2SY=
|
||||
github.com/higress-group/proxy-wasm-go-sdk v0.0.0-20250402062734-d50d98c305f0/go.mod h1:tRI2LfMudSkKHhyv1uex3BWzcice2s/l8Ah8axporfA=
|
||||
github.com/higress-group/proxy-wasm-go-sdk v0.0.0-20250611100342-5654e89a7a80 h1:xqmtTZI0JQ2O+Lg9/CE6c+Tw9KD6FnvWw8EpLVuuvfg=
|
||||
github.com/higress-group/proxy-wasm-go-sdk v0.0.0-20250611100342-5654e89a7a80/go.mod h1:tRI2LfMudSkKHhyv1uex3BWzcice2s/l8Ah8axporfA=
|
||||
github.com/higress-group/wasm-go v1.0.0 h1:4Ik5n3FsJ5+r13KLQl2ky+8NuAE8dfWQwoKxXYD2KAw=
|
||||
github.com/higress-group/wasm-go v1.0.0/go.mod h1:ODBV27sjmhIW8Cqv3R74EUcTnbdkE69bmXBQFuRkY1M=
|
||||
github.com/huandu/xstrings v1.5.0 h1:2ag3IFq9ZDANvthTwTiqSSZLjDc+BedvHPAp5tJy2TI=
|
||||
github.com/huandu/xstrings v1.5.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
|
||||
github.com/invopop/jsonschema v0.13.0 h1:KvpoAJWEjR3uD9Kbm2HWJmqsEaHt8lBUpd0qHcIi21E=
|
||||
@@ -65,6 +65,8 @@ github.com/wk8/go-ordered-map/v2 v2.1.8 h1:5h/BUHu93oj4gIdvHHHGsScSTMijfx5PeYkE/
|
||||
github.com/wk8/go-ordered-map/v2 v2.1.8/go.mod h1:5nJHM5DyteebpVlHnWMV0rPz6Zp7+xBAnxjb1X5vnTw=
|
||||
golang.org/x/crypto v0.26.0 h1:RrRspgV4mU+YwB4FYnuBoKsUapNIL5cohGAmSH3azsw=
|
||||
golang.org/x/crypto v0.26.0/go.mod h1:GY7jblb9wI+FOo5y8/S2oY4zWP07AkOJ4+jxCqdqn54=
|
||||
google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY=
|
||||
google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
|
||||
@@ -17,7 +17,7 @@ package main
|
||||
import (
|
||||
"amap-tools/tools"
|
||||
|
||||
"github.com/alibaba/higress/plugins/wasm-go/pkg/mcp"
|
||||
"github.com/higress-group/wasm-go/pkg/mcp"
|
||||
)
|
||||
|
||||
func main() {}
|
||||
|
||||
@@ -15,8 +15,8 @@
|
||||
package tools
|
||||
|
||||
import (
|
||||
"github.com/alibaba/higress/plugins/wasm-go/pkg/mcp"
|
||||
"github.com/alibaba/higress/plugins/wasm-go/pkg/mcp/server"
|
||||
"github.com/higress-group/wasm-go/pkg/mcp"
|
||||
"github.com/higress-group/wasm-go/pkg/mcp/server"
|
||||
)
|
||||
|
||||
func LoadTools(server *mcp.MCPServer) server.Server {
|
||||
|
||||
@@ -23,8 +23,8 @@ import (
|
||||
|
||||
"amap-tools/config"
|
||||
|
||||
"github.com/alibaba/higress/plugins/wasm-go/pkg/mcp/server"
|
||||
"github.com/alibaba/higress/plugins/wasm-go/pkg/mcp/utils"
|
||||
"github.com/higress-group/wasm-go/pkg/mcp/server"
|
||||
"github.com/higress-group/wasm-go/pkg/mcp/utils"
|
||||
)
|
||||
|
||||
var _ server.Tool = AroundSearchRequest{}
|
||||
@@ -58,11 +58,11 @@ func (t AroundSearchRequest) Call(ctx server.HttpContext, s server.Server) error
|
||||
|
||||
url := fmt.Sprintf("http://restapi.amap.com/v3/place/around?key=%s&location=%s&radius=%s&keywords=%s&source=ts_mcp", serverConfig.ApiKey, url.QueryEscape(t.Location), url.QueryEscape(t.Radius), url.QueryEscape(t.Keywords))
|
||||
return ctx.RouteCall(http.MethodGet, url,
|
||||
[][2]string{{"Accept", "application/json"}}, nil, func(sendDirectly bool, statusCode int, responseHeaders [][2]string, responseBody []byte) {
|
||||
[][2]string{{"Accept", "application/json"}}, nil, func(statusCode int, responseHeaders [][2]string, responseBody []byte) {
|
||||
if statusCode != http.StatusOK {
|
||||
utils.OnMCPToolCallError(sendDirectly, ctx, fmt.Errorf("around search call failed, status: %d", statusCode))
|
||||
utils.OnMCPToolCallError(ctx, fmt.Errorf("around search call failed, status: %d", statusCode))
|
||||
return
|
||||
}
|
||||
utils.SendMCPToolTextResult(sendDirectly, ctx, string(responseBody))
|
||||
utils.SendMCPToolTextResult(ctx, string(responseBody))
|
||||
})
|
||||
}
|
||||
|
||||
@@ -23,8 +23,8 @@ import (
|
||||
|
||||
"amap-tools/config"
|
||||
|
||||
"github.com/alibaba/higress/plugins/wasm-go/pkg/mcp/server"
|
||||
"github.com/alibaba/higress/plugins/wasm-go/pkg/mcp/utils"
|
||||
"github.com/higress-group/wasm-go/pkg/mcp/server"
|
||||
"github.com/higress-group/wasm-go/pkg/mcp/utils"
|
||||
)
|
||||
|
||||
var _ server.Tool = BicyclingRequest{}
|
||||
@@ -57,11 +57,11 @@ func (t BicyclingRequest) Call(ctx server.HttpContext, s server.Server) error {
|
||||
|
||||
url := fmt.Sprintf("http://restapi.amap.com/v4/direction/bicycling?key=%s&origin=%s&destination=%s&source=ts_mcp", serverConfig.ApiKey, url.QueryEscape(t.Origin), url.QueryEscape(t.Destination))
|
||||
return ctx.RouteCall(http.MethodGet, url,
|
||||
[][2]string{{"Accept", "application/json"}}, nil, func(sendDirectly bool, statusCode int, responseHeaders [][2]string, responseBody []byte) {
|
||||
[][2]string{{"Accept", "application/json"}}, nil, func(statusCode int, responseHeaders [][2]string, responseBody []byte) {
|
||||
if statusCode != http.StatusOK {
|
||||
utils.OnMCPToolCallError(sendDirectly, ctx, fmt.Errorf("bicycling call failed, status: %d", statusCode))
|
||||
utils.OnMCPToolCallError(ctx, fmt.Errorf("bicycling call failed, status: %d", statusCode))
|
||||
return
|
||||
}
|
||||
utils.SendMCPToolTextResult(sendDirectly, ctx, string(responseBody))
|
||||
utils.SendMCPToolTextResult(ctx, string(responseBody))
|
||||
})
|
||||
}
|
||||
|
||||
@@ -23,8 +23,8 @@ import (
|
||||
|
||||
"amap-tools/config"
|
||||
|
||||
"github.com/alibaba/higress/plugins/wasm-go/pkg/mcp/server"
|
||||
"github.com/alibaba/higress/plugins/wasm-go/pkg/mcp/utils"
|
||||
"github.com/higress-group/wasm-go/pkg/mcp/server"
|
||||
"github.com/higress-group/wasm-go/pkg/mcp/utils"
|
||||
)
|
||||
|
||||
var _ server.Tool = DrivingRequest{}
|
||||
@@ -57,11 +57,11 @@ func (t DrivingRequest) Call(ctx server.HttpContext, s server.Server) error {
|
||||
|
||||
url := fmt.Sprintf("http://restapi.amap.com/v3/direction/driving?key=%s&origin=%s&destination=%s&source=ts_mcp", serverConfig.ApiKey, url.QueryEscape(t.Origin), url.QueryEscape(t.Destination))
|
||||
return ctx.RouteCall(http.MethodGet, url,
|
||||
[][2]string{{"Accept", "application/json"}}, nil, func(sendDirectly bool, statusCode int, responseHeaders [][2]string, responseBody []byte) {
|
||||
[][2]string{{"Accept", "application/json"}}, nil, func(statusCode int, responseHeaders [][2]string, responseBody []byte) {
|
||||
if statusCode != http.StatusOK {
|
||||
utils.OnMCPToolCallError(sendDirectly, ctx, fmt.Errorf("driving call failed, status: %d", statusCode))
|
||||
utils.OnMCPToolCallError(ctx, fmt.Errorf("driving call failed, status: %d", statusCode))
|
||||
return
|
||||
}
|
||||
utils.SendMCPToolTextResult(sendDirectly, ctx, string(responseBody))
|
||||
utils.SendMCPToolTextResult(ctx, string(responseBody))
|
||||
})
|
||||
}
|
||||
|
||||
@@ -23,8 +23,8 @@ import (
|
||||
|
||||
"amap-tools/config"
|
||||
|
||||
"github.com/alibaba/higress/plugins/wasm-go/pkg/mcp/server"
|
||||
"github.com/alibaba/higress/plugins/wasm-go/pkg/mcp/utils"
|
||||
"github.com/higress-group/wasm-go/pkg/mcp/server"
|
||||
"github.com/higress-group/wasm-go/pkg/mcp/utils"
|
||||
)
|
||||
|
||||
var _ server.Tool = TransitIntegratedRequest{}
|
||||
@@ -59,11 +59,11 @@ func (t TransitIntegratedRequest) Call(ctx server.HttpContext, s server.Server)
|
||||
|
||||
url := fmt.Sprintf("http://restapi.amap.com/v3/direction/transit/integrated?key=%s&origin=%s&destination=%s&city=%s&cityd=%s&source=ts_mcp", serverConfig.ApiKey, url.QueryEscape(t.Origin), url.QueryEscape(t.Destination), url.QueryEscape(t.City), url.QueryEscape(t.Cityd))
|
||||
return ctx.RouteCall(http.MethodGet, url,
|
||||
[][2]string{{"Accept", "application/json"}}, nil, func(sendDirectly bool, statusCode int, responseHeaders [][2]string, responseBody []byte) {
|
||||
[][2]string{{"Accept", "application/json"}}, nil, func(statusCode int, responseHeaders [][2]string, responseBody []byte) {
|
||||
if statusCode != http.StatusOK {
|
||||
utils.OnMCPToolCallError(sendDirectly, ctx, fmt.Errorf("transit integrated call failed, status: %d", statusCode))
|
||||
utils.OnMCPToolCallError(ctx, fmt.Errorf("transit integrated call failed, status: %d", statusCode))
|
||||
return
|
||||
}
|
||||
utils.SendMCPToolTextResult(sendDirectly, ctx, string(responseBody))
|
||||
utils.SendMCPToolTextResult(ctx, string(responseBody))
|
||||
})
|
||||
}
|
||||
|
||||
@@ -23,8 +23,8 @@ import (
|
||||
|
||||
"amap-tools/config"
|
||||
|
||||
"github.com/alibaba/higress/plugins/wasm-go/pkg/mcp/server"
|
||||
"github.com/alibaba/higress/plugins/wasm-go/pkg/mcp/utils"
|
||||
"github.com/higress-group/wasm-go/pkg/mcp/server"
|
||||
"github.com/higress-group/wasm-go/pkg/mcp/utils"
|
||||
)
|
||||
|
||||
var _ server.Tool = WalkingRequest{}
|
||||
@@ -57,11 +57,11 @@ func (t WalkingRequest) Call(ctx server.HttpContext, s server.Server) error {
|
||||
|
||||
url := fmt.Sprintf("http://restapi.amap.com/v3/direction/walking?key=%s&origin=%s&destination=%s&source=ts_mcp", serverConfig.ApiKey, url.QueryEscape(t.Origin), url.QueryEscape(t.Destination))
|
||||
return ctx.RouteCall(http.MethodGet, url,
|
||||
[][2]string{{"Accept", "application/json"}}, nil, func(sendDirectly bool, statusCode int, responseHeaders [][2]string, responseBody []byte) {
|
||||
[][2]string{{"Accept", "application/json"}}, nil, func(statusCode int, responseHeaders [][2]string, responseBody []byte) {
|
||||
if statusCode != http.StatusOK {
|
||||
utils.OnMCPToolCallError(sendDirectly, ctx, fmt.Errorf("walking call failed, status: %d", statusCode))
|
||||
utils.OnMCPToolCallError(ctx, fmt.Errorf("walking call failed, status: %d", statusCode))
|
||||
return
|
||||
}
|
||||
utils.SendMCPToolTextResult(sendDirectly, ctx, string(responseBody))
|
||||
utils.SendMCPToolTextResult(ctx, string(responseBody))
|
||||
})
|
||||
}
|
||||
|
||||
@@ -23,8 +23,8 @@ import (
|
||||
|
||||
"amap-tools/config"
|
||||
|
||||
"github.com/alibaba/higress/plugins/wasm-go/pkg/mcp/server"
|
||||
"github.com/alibaba/higress/plugins/wasm-go/pkg/mcp/utils"
|
||||
"github.com/higress-group/wasm-go/pkg/mcp/server"
|
||||
"github.com/higress-group/wasm-go/pkg/mcp/utils"
|
||||
)
|
||||
|
||||
var _ server.Tool = DistanceRequest{}
|
||||
@@ -57,11 +57,11 @@ func (t DistanceRequest) Call(ctx server.HttpContext, s server.Server) error {
|
||||
|
||||
url := fmt.Sprintf("http://restapi.amap.com/v3/distance?key=%s&origins=%s&destination=%s&type=%s&source=ts_mcp", serverConfig.ApiKey, url.QueryEscape(t.Origins), url.QueryEscape(t.Destination), url.QueryEscape(t.Type))
|
||||
return ctx.RouteCall(http.MethodGet, url,
|
||||
[][2]string{{"Accept", "application/json"}}, nil, func(sendDirectly bool, statusCode int, responseHeaders [][2]string, responseBody []byte) {
|
||||
[][2]string{{"Accept", "application/json"}}, nil, func(statusCode int, responseHeaders [][2]string, responseBody []byte) {
|
||||
if statusCode != http.StatusOK {
|
||||
utils.OnMCPToolCallError(sendDirectly, ctx, fmt.Errorf("distance call failed, status: %d", statusCode))
|
||||
utils.OnMCPToolCallError(ctx, fmt.Errorf("distance call failed, status: %d", statusCode))
|
||||
return
|
||||
}
|
||||
utils.SendMCPToolTextResult(sendDirectly, ctx, string(responseBody))
|
||||
utils.SendMCPToolTextResult(ctx, string(responseBody))
|
||||
})
|
||||
}
|
||||
|
||||
@@ -23,8 +23,8 @@ import (
|
||||
|
||||
"amap-tools/config"
|
||||
|
||||
"github.com/alibaba/higress/plugins/wasm-go/pkg/mcp/server"
|
||||
"github.com/alibaba/higress/plugins/wasm-go/pkg/mcp/utils"
|
||||
"github.com/higress-group/wasm-go/pkg/mcp/server"
|
||||
"github.com/higress-group/wasm-go/pkg/mcp/utils"
|
||||
)
|
||||
|
||||
var _ server.Tool = GeoRequest{}
|
||||
@@ -58,11 +58,11 @@ func (t GeoRequest) Call(ctx server.HttpContext, s server.Server) error {
|
||||
apiKey := serverConfig.ApiKey
|
||||
url := fmt.Sprintf("https://restapi.amap.com/v3/geocode/geo?key=%s&address=%s&city=%s&source=ts_mcp", apiKey, url.QueryEscape(t.Address), url.QueryEscape(t.City))
|
||||
return ctx.RouteCall(http.MethodGet, url,
|
||||
[][2]string{{"Accept", "application/json"}}, nil, func(sendDirectly bool, statusCode int, responseHeaders [][2]string, responseBody []byte) {
|
||||
[][2]string{{"Accept", "application/json"}}, nil, func(statusCode int, responseHeaders [][2]string, responseBody []byte) {
|
||||
if statusCode != http.StatusOK {
|
||||
utils.OnMCPToolCallError(sendDirectly, ctx, fmt.Errorf("geo call failed, status: %d", statusCode))
|
||||
utils.OnMCPToolCallError(ctx, fmt.Errorf("geo call failed, status: %d", statusCode))
|
||||
return
|
||||
}
|
||||
utils.SendMCPToolTextResult(sendDirectly, ctx, string(responseBody))
|
||||
utils.SendMCPToolTextResult(ctx, string(responseBody))
|
||||
})
|
||||
}
|
||||
|
||||
@@ -24,8 +24,8 @@ import (
|
||||
|
||||
"amap-tools/config"
|
||||
|
||||
"github.com/alibaba/higress/plugins/wasm-go/pkg/mcp/server"
|
||||
"github.com/alibaba/higress/plugins/wasm-go/pkg/mcp/utils"
|
||||
"github.com/higress-group/wasm-go/pkg/mcp/server"
|
||||
"github.com/higress-group/wasm-go/pkg/mcp/utils"
|
||||
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
|
||||
)
|
||||
|
||||
@@ -70,12 +70,12 @@ func (t IPLocationRequest) Call(ctx server.HttpContext, s server.Server) error {
|
||||
}
|
||||
url := fmt.Sprintf("https://restapi.amap.com/v3/ip?ip=%s&key=%s&source=ts_mcp", url.QueryEscape(t.IP), serverConfig.ApiKey)
|
||||
return ctx.RouteCall(http.MethodGet, url,
|
||||
[][2]string{{"Accept", "application/json"}}, nil, func(sendDirectly bool, statusCode int, responseHeaders [][2]string, responseBody []byte) {
|
||||
[][2]string{{"Accept", "application/json"}}, nil, func(statusCode int, responseHeaders [][2]string, responseBody []byte) {
|
||||
if statusCode != http.StatusOK {
|
||||
utils.OnMCPToolCallError(sendDirectly, ctx, fmt.Errorf("ip location call failed, status: %d", statusCode))
|
||||
utils.OnMCPToolCallError(ctx, fmt.Errorf("ip location call failed, status: %d", statusCode))
|
||||
return
|
||||
}
|
||||
utils.SendMCPToolTextResult(sendDirectly, ctx, string(responseBody))
|
||||
utils.SendMCPToolTextResult(ctx, string(responseBody))
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -23,8 +23,8 @@ import (
|
||||
|
||||
"amap-tools/config"
|
||||
|
||||
"github.com/alibaba/higress/plugins/wasm-go/pkg/mcp/server"
|
||||
"github.com/alibaba/higress/plugins/wasm-go/pkg/mcp/utils"
|
||||
"github.com/higress-group/wasm-go/pkg/mcp/server"
|
||||
"github.com/higress-group/wasm-go/pkg/mcp/utils"
|
||||
)
|
||||
|
||||
var _ server.Tool = ReGeocodeRequest{}
|
||||
@@ -56,11 +56,11 @@ func (t ReGeocodeRequest) Call(ctx server.HttpContext, s server.Server) error {
|
||||
|
||||
url := fmt.Sprintf("http://restapi.amap.com/v3/geocode/regeo?location=%s&key=%s&source=ts_mcp", url.QueryEscape(t.Location), serverConfig.ApiKey)
|
||||
return ctx.RouteCall(http.MethodGet, url,
|
||||
[][2]string{{"Accept", "application/json"}}, nil, func(sendDirectly bool, statusCode int, responseHeaders [][2]string, responseBody []byte) {
|
||||
[][2]string{{"Accept", "application/json"}}, nil, func(statusCode int, responseHeaders [][2]string, responseBody []byte) {
|
||||
if statusCode != http.StatusOK {
|
||||
utils.OnMCPToolCallError(sendDirectly, ctx, fmt.Errorf("regeocode call failed, status: %d", statusCode))
|
||||
utils.OnMCPToolCallError(ctx, fmt.Errorf("regeocode call failed, status: %d", statusCode))
|
||||
return
|
||||
}
|
||||
utils.SendMCPToolTextResult(sendDirectly, ctx, string(responseBody))
|
||||
utils.SendMCPToolTextResult(ctx, string(responseBody))
|
||||
})
|
||||
}
|
||||
|
||||
@@ -23,8 +23,8 @@ import (
|
||||
|
||||
"amap-tools/config"
|
||||
|
||||
"github.com/alibaba/higress/plugins/wasm-go/pkg/mcp/server"
|
||||
"github.com/alibaba/higress/plugins/wasm-go/pkg/mcp/utils"
|
||||
"github.com/higress-group/wasm-go/pkg/mcp/server"
|
||||
"github.com/higress-group/wasm-go/pkg/mcp/utils"
|
||||
)
|
||||
|
||||
var _ server.Tool = SearchDetailRequest{}
|
||||
@@ -56,11 +56,11 @@ func (t SearchDetailRequest) Call(ctx server.HttpContext, s server.Server) error
|
||||
|
||||
url := fmt.Sprintf("http://restapi.amap.com/v3/place/detail?id=%s&key=%s&source=ts_mcp", url.QueryEscape(t.ID), serverConfig.ApiKey)
|
||||
return ctx.RouteCall(http.MethodGet, url,
|
||||
[][2]string{{"Accept", "application/json"}}, nil, func(sendDirectly bool, statusCode int, responseHeaders [][2]string, responseBody []byte) {
|
||||
[][2]string{{"Accept", "application/json"}}, nil, func(statusCode int, responseHeaders [][2]string, responseBody []byte) {
|
||||
if statusCode != http.StatusOK {
|
||||
utils.OnMCPToolCallError(sendDirectly, ctx, fmt.Errorf("search detail call failed, status: %d", statusCode))
|
||||
utils.OnMCPToolCallError(ctx, fmt.Errorf("search detail call failed, status: %d", statusCode))
|
||||
return
|
||||
}
|
||||
utils.SendMCPToolTextResult(sendDirectly, ctx, string(responseBody))
|
||||
utils.SendMCPToolTextResult(ctx, string(responseBody))
|
||||
})
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user