Compare commits

...

24 Commits

Author SHA1 Message Date
澄潭
36bcb595d6 Release 2.1.5 (#2536) 2025-07-02 18:03:46 +08:00
hongzhouzi
783a8db512 feat: add DB MCP Server execute, list tables, describe table tools (#2506)
Signed-off-by: hongzhouzi <weihongzhou.whz@alibaba-inc.com>
2025-07-02 14:47:49 +08:00
澄潭
44566f5259 feat: ai-proxy support config subPath field (#2533) 2025-07-02 11:35:28 +08:00
Xijun Dai
73ba9238bd feat(helm): comment tracing.skywalking (#2514)
Signed-off-by: Xijun Dai <daixijun1990@gmail.com>
Co-authored-by: Se7en <chengzw258@163.com>
2025-07-01 22:09:33 +08:00
Xijun Dai
41a1455874 fix(ai-proxy): restrict the stream_options parameter to be effective only in the openai/v1/chatcompletions (#2524)
Signed-off-by: Xijun Dai <daixijun1990@gmail.com>
2025-07-01 21:44:34 +08:00
rinfx
9d68ccbf35 feat: advanced load balance policys for LLM service through wasm plugin (#2531) 2025-07-01 20:08:44 +08:00
Kent Dong
db7dbb24a2 fix: Fix an incorrect config property name in the README of ai-proxy plugin (#2503) 2025-07-01 16:29:18 +08:00
HaoJie Liu
9a0cf9b762 fix(ai-proxy): add system message handling for Bedrock requests (#2516) 2025-06-30 10:35:14 +08:00
Xijun Dai
bb786c9618 feat(ai-proxy): add responses support for doubao (#2509)
Signed-off-by: Xijun Dai <daixijun1990@gmail.com>
2025-06-28 10:39:21 +08:00
johnlanni
ef49d2f5f6 fix url schema issue 2025-06-26 15:51:06 +08:00
HaoJie Liu
864bf5af39 fix(ai-proxy): bedrock support additional request fields (#2480) 2025-06-26 11:29:32 +08:00
澄潭
527e922d50 Fix the incorrect behavior of decoding when constructing and sending requests if the path in the configured URL contains URL-encoded parts. (#2497) 2025-06-26 11:22:38 +08:00
kai2321
1fe5eb6e13 Implement AI-image-reader plugin (#1925) 2025-06-25 19:28:02 +08:00
澄潭
87185baff2 Update CODEOWNERS 2025-06-25 13:41:22 +08:00
rinfx
76ada0b844 add trace_span_key & as_seperate_log_field configuration for ai-statistics (#2488) 2025-06-25 09:28:14 +08:00
澄潭
f4d3fec228 feat: mcp server support error template response (#2485) 2025-06-24 11:05:54 +08:00
Xijun Dai
e94ac43dd1 fix(ai-proxy): fix openai provider customPath compatibility (#2475)
Signed-off-by: Xijun Dai <daixijun1990@gmail.com>
2025-06-21 08:23:02 +08:00
Jacky Wu
dd29267fd7 fix: add missing controller sa annotation. (#2443) 2025-06-20 16:36:10 +08:00
woody
01a9161153 fix(ai-proxy): Unify the naming convention & fix api name mapping (#2441) 2025-06-20 16:35:30 +08:00
Kenneth
ceb8b557dc feat: add investoday MCP Server (#2450)
Co-authored-by: 澄潭 <zty98751@alibaba-inc.com>
2025-06-20 15:58:28 +08:00
007gzs
753022e093 Feat: Ai data masking msg window support reasoning_content in response and n in request (#2404) 2025-06-20 15:39:09 +08:00
xingpiaoliang
04cbbfc7e8 feat(mcp/sse): support passthourgh the query parameter in sse server to the rest api server (#2460) 2025-06-20 15:07:45 +08:00
Xin Luo
db66df39c4 fix too much logs when nacos is not avaiable (#2469) 2025-06-20 14:36:01 +08:00
澄潭
dad6278a6d refactor: mcp server depends on the latest wasm-go repository (#2458) 2025-06-18 20:32:47 +08:00
96 changed files with 5313 additions and 1223 deletions

View File

@@ -3,7 +3,7 @@
/istio @SpecialYang @johnlanni
/pkg @SpecialYang @johnlanni @CH3CHO
/plugins @johnlanni @CH3CHO @rinfx
/plugins/wasm-go/extensions/ai-proxy @cr7258 @CH3CHO @rinfx
/plugins/wasm-go/extensions/ai-proxy @cr7258 @CH3CHO @rinfx @wydream
/plugins/wasm-rust @007gzs @jizhuozhi
/registry @NameHaibinZhang @2456868764 @johnlanni
/test @Xunzhuo @2456868764 @CH3CHO

View File

@@ -192,8 +192,8 @@ install: pre-install
helm install higress helm/higress -n higress-system --create-namespace --set 'global.local=true'
HIGRESS_LATEST_IMAGE_TAG ?= latest
ENVOY_LATEST_IMAGE_TAG ?= 958467a353d411ae3f06e03b096bfd342cddb2c6
ISTIO_LATEST_IMAGE_TAG ?= d9c728d3b01f64855e012b08d136e306f1160397
ENVOY_LATEST_IMAGE_TAG ?= latest
ISTIO_LATEST_IMAGE_TAG ?= latest
install-dev: pre-install
helm install higress helm/core -n higress-system --create-namespace --set 'controller.tag=$(TAG)' --set 'gateway.replicas=1' --set 'pilot.tag=$(ISTIO_LATEST_IMAGE_TAG)' --set 'gateway.tag=$(ENVOY_LATEST_IMAGE_TAG)' --set 'global.local=true'

View File

@@ -1 +1 @@
v2.1.5-rc.1
v2.1.5

View File

@@ -1,5 +1,5 @@
apiVersion: v2
appVersion: 2.1.5-rc.1
appVersion: 2.1.5
description: Helm chart for deploying higress gateways
icon: https://higress.io/img/higress_logo_small.png
home: http://higress.io/
@@ -15,4 +15,4 @@ dependencies:
repository: "file://../redis"
version: 0.0.1
type: application
version: 2.1.5-rc.1
version: 2.1.5

View File

@@ -9,9 +9,7 @@
accessLogFile: "/dev/stdout"
{{- end }}
ingressControllerMode: "OFF"
accessLogFormat: '{"ai_log":"%FILTER_STATE(wasm.ai_log:PLAIN)%","authority":"%REQ(X-ENVOY-ORIGINAL-HOST?:AUTHORITY)%","bytes_received":"%BYTES_RECEIVED%","bytes_sent":"%BYTES_SENT%","downstream_local_address":"%DOWNSTREAM_LOCAL_ADDRESS%","downstream_remote_address":"%DOWNSTREAM_REMOTE_ADDRESS%","duration":"%DURATION%","istio_policy_status":"%DYNAMIC_METADATA(istio.mixer:status)%","method":"%REQ(:METHOD)%","path":"%REQ(X-ENVOY-ORIGINAL-PATH?:PATH)%","protocol":"%PROTOCOL%","request_id":"%REQ(X-REQUEST-ID)%","requested_server_name":"%REQUESTED_SERVER_NAME%","response_code":"%RESPONSE_CODE%","response_flags":"%RESPONSE_FLAGS%","route_name":"%ROUTE_NAME%","start_time":"%START_TIME%","trace_id":"%REQ(X-B3-TRACEID)%","upstream_cluster":"%UPSTREAM_CLUSTER%","upstream_host":"%UPSTREAM_HOST%","upstream_local_address":"%UPSTREAM_LOCAL_ADDRESS%","upstream_service_time":"%RESP(X-ENVOY-UPSTREAM-SERVICE-TIME)%","upstream_transport_failure_reason":"%UPSTREAM_TRANSPORT_FAILURE_REASON%","user_agent":"%REQ(USER-AGENT)%","x_forwarded_for":"%REQ(X-FORWARDED-FOR)%","response_code_details":"%RESPONSE_CODE_DETAILS%"}
'
accessLogFormat: '{"ai_log":"%FILTER_STATE(wasm.ai_log:PLAIN)%","authority":"%REQ(X-ENVOY-ORIGINAL-HOST?:AUTHORITY)%","bytes_received":"%BYTES_RECEIVED%","bytes_sent":"%BYTES_SENT%","downstream_local_address":"%DOWNSTREAM_LOCAL_ADDRESS%","downstream_remote_address":"%DOWNSTREAM_REMOTE_ADDRESS%","duration":"%DURATION%","istio_policy_status":"%DYNAMIC_METADATA(istio.mixer:status)%","method":"%REQ(:METHOD)%","path":"%REQ(X-ENVOY-ORIGINAL-PATH?:PATH)%","protocol":"%PROTOCOL%","request_id":"%REQ(X-REQUEST-ID)%","requested_server_name":"%REQUESTED_SERVER_NAME%","response_code":"%RESPONSE_CODE%","response_flags":"%RESPONSE_FLAGS%","route_name":"%ROUTE_NAME%","start_time":"%START_TIME%","trace_id":"%REQ(X-B3-TRACEID)%","upstream_cluster":"%UPSTREAM_CLUSTER%","upstream_host":"%UPSTREAM_HOST%","upstream_local_address":"%UPSTREAM_LOCAL_ADDRESS%","upstream_service_time":"%RESP(X-ENVOY-UPSTREAM-SERVICE-TIME)%","upstream_transport_failure_reason":"%UPSTREAM_TRANSPORT_FAILURE_REASON%","user_agent":"%REQ(USER-AGENT)%","x_forwarded_for":"%REQ(X-FORWARDED-FOR)%","response_code_details":"%RESPONSE_CODE_DETAILS%"}'
dnsRefreshRate: 200s
enableAutoMtls: false
enablePrometheusMerge: false
@@ -99,7 +97,7 @@ metadata:
name: higress-config
namespace: {{ .Release.Namespace }}
labels:
{{- include "gateway.labels" . | nindent 4 }}
{{- include "gateway.labels" . | nindent 4 }}
data:
higress: |-
{{- $existingConfig := lookup "v1" "ConfigMap" .Release.Namespace "higress-config" }}
@@ -126,7 +124,7 @@ data:
{{- else }}
networks: {}
{{- end }}
mesh: |-
{{- if .Values.meshConfig }}
{{ $mesh | toYaml | indent 4 }}

View File

@@ -6,4 +6,8 @@ metadata:
namespace: {{ .Release.Namespace }}
labels:
{{- include "controller.labels" . | nindent 4 }}
{{- with .Values.controller.serviceAccount.annotations }}
annotations:
{{- toYaml . | nindent 4 }}
{{- end }}
{{- end }}

View File

@@ -581,8 +581,7 @@ controller:
# -- Labels to apply to the pod
podLabels: {}
podSecurityContext:
{}
podSecurityContext: {}
# fsGroup: 2000
ports:
@@ -709,13 +708,13 @@ tracing:
enable: false
sampling: 100
timeout: 500
skywalking:
# access_token: ""
service: ""
port: 11800
# skywalking:
# access_token: ""
# service: ""
# port: 11800
# zipkin:
# service: ""
# port: 9411
# service: ""
# port: 9411
# -- Downstream config settings
downstream:
@@ -787,7 +786,7 @@ pluginServer:
# Plugin-server Service configuration
service:
port: 80 # Container target port (usually fixed)
port: 80 # Container target port (usually fixed)
resources:
requests:
@@ -795,4 +794,4 @@ pluginServer:
memory: 128Mi
limits:
cpu: 500m
memory: 256Mi
memory: 256Mi

View File

@@ -1,9 +1,9 @@
dependencies:
- name: higress-core
repository: file://../core
version: 2.1.5-rc.1
version: 2.1.5
- name: higress-console
repository: https://higress.io/helm-charts/
version: 2.1.4
digest: sha256:6dbbfb24eabe0927a167c11896799ea20c7f8590aa2889b853dc9a210d075d3a
generated: "2025-06-18T09:15:09.621898+08:00"
version: 2.1.5
digest: sha256:1c7c8003686b2df2c67427054006aef21c92ab1ff86d2e5f5587daf02ebc7d61
generated: "2025-07-02T17:38:10.089494+08:00"

View File

@@ -1,5 +1,5 @@
apiVersion: v2
appVersion: 2.1.5-rc.1
appVersion: 2.1.5
description: Helm chart for deploying Higress gateways
icon: https://higress.io/img/higress_logo_small.png
home: http://higress.io/
@@ -12,9 +12,9 @@ sources:
dependencies:
- name: higress-core
repository: "file://../core"
version: 2.1.5-rc.1
version: 2.1.5
- name: higress-console
repository: "https://higress.io/helm-charts/"
version: 2.1.4
version: 2.1.5
type: application
version: 2.1.5-rc.1
version: 2.1.5

View File

@@ -306,7 +306,5 @@ The command removes all the Kubernetes components associated with the chart and
| revision | string | `""` | |
| tracing.enable | bool | `false` | |
| tracing.sampling | int | `100` | |
| tracing.skywalking.port | int | `11800` | |
| tracing.skywalking.service | string | `""` | |
| tracing.timeout | int | `500` | |
| upstream | object | `{"connectionBufferLimits":10485760,"idleTimeout":10}` | Upstream config settings |

View File

@@ -25,6 +25,14 @@ type DBClient struct {
panicCount int32 // Add panic counter
}
// supports database types
const (
MYSQL = "mysql"
POSTGRES = "postgres"
CLICKHOUSE = "clickhouse"
SQLITE = "sqlite"
)
// NewDBClient creates a new DBClient instance and establishes a connection to the database
func NewDBClient(dsn string, dbType string, stop chan struct{}) *DBClient {
client := &DBClient{
@@ -53,13 +61,13 @@ func (c *DBClient) connect() error {
}
switch c.dbType {
case "postgres":
case POSTGRES:
db, err = gorm.Open(postgres.Open(c.dsn), &gormConfig)
case "clickhouse":
case CLICKHOUSE:
db, err = gorm.Open(clickhouse.Open(c.dsn), &gormConfig)
case "mysql":
case MYSQL:
db, err = gorm.Open(mysql.Open(c.dsn), &gormConfig)
case "sqlite":
case SQLITE:
db, err = gorm.Open(sqlite.Open(c.dsn), &gormConfig)
default:
return fmt.Errorf("unsupported database type %s", c.dbType)
@@ -125,25 +133,166 @@ func (c *DBClient) reconnectLoop() {
}
}
// ExecuteSQL executes a raw SQL query and returns the result as a slice of maps
func (c *DBClient) ExecuteSQL(query string, args ...interface{}) ([]map[string]interface{}, error) {
func (c *DBClient) reconnectIfDbEmpty() error {
if c.db == nil {
// Trigger reconnection
select {
case c.reconnect <- struct{}{}:
default:
}
return nil, fmt.Errorf("database is not connected, attempting to reconnect")
return fmt.Errorf("database is not connected, attempting to reconnect")
}
return nil
}
rows, err := c.db.Raw(query, args...).Rows()
func (c *DBClient) handleSQLError(err error) error {
if err != nil {
// If execution fails, connection might be lost, trigger reconnection
select {
case c.reconnect <- struct{}{}:
default:
}
return nil, fmt.Errorf("failed to execute SQL query: %w", err)
return fmt.Errorf("failed to execute SQL: %w", err)
}
return nil
}
// DescribeTable Get the structure of a specific table.
func (c *DBClient) DescribeTable(table string) ([]map[string]interface{}, error) {
var sql string
var args []string
switch c.dbType {
case MYSQL:
sql = `
select
column_name,
column_type,
is_nullable,
column_key,
column_default,
extra,
column_comment
from information_schema.columns
where table_schema = database() and table_name = ?
`
args = []string{table}
case POSTGRES:
sql = `
select
column_name,
data_type as column_type,
is_nullable,
case
when column_default like 'nextval%%' then 'auto_increment'
when column_default is not null then 'default'
else ''
end as column_key,
column_default,
case
when column_default like 'nextval%%' then 'auto_increment'
else ''
end as extra,
col_description((select oid from pg_class where relname = ?), ordinal_position) as column_comment
from information_schema.columns
where table_name = ?
`
args = []string{table, table}
case CLICKHOUSE:
sql = `
select
name as column_name,
type as column_type,
if(is_nullable, 'YES', 'NO') as is_nullable,
default_kind as column_key,
default_expression as column_default,
default_kind as extra,
comment as column_comment
from system.columns
where database = currentDatabase() and table = ?
`
args = []string{table}
case SQLITE:
sql = `
select
name as column_name,
type as column_type,
not (notnull = 1) as is_nullable,
pk as column_key,
dflt_value as column_default,
'' as extra,
'' as column_comment
from pragma_table_info(?)
`
args = []string{table}
default:
return nil, fmt.Errorf("unsupported database type: %s", c.dbType)
}
return c.Query(sql, args)
}
// ListTables List all tables in the connected database.
func (c *DBClient) ListTables() ([]string, error) {
var sql string
switch c.dbType {
case MYSQL:
sql = "show tables"
case POSTGRES:
sql = "select tablename from pg_tables where schemaname = 'public'"
case CLICKHOUSE:
sql = "select name from system.tables where database = currentDatabase()"
case SQLITE:
sql = "select name from sqlite_master where type='table'"
default:
return nil, fmt.Errorf("unsupported database type: %s", c.dbType)
}
rows, err := c.db.Raw(sql).Rows()
if err := c.handleSQLError(err); err != nil {
return nil, err
}
defer rows.Close()
var tables []string
for rows.Next() {
var table string
if err := rows.Scan(&table); err != nil {
return nil, fmt.Errorf("failed to scan table name: %w", err)
}
tables = append(tables, table)
}
return tables, nil
}
// Execute executes an INSERT, UPDATE, or DELETE raw SQL and returns the rows affected
func (c *DBClient) Execute(sql string, args ...interface{}) (int64, error) {
if err := c.reconnectIfDbEmpty(); err != nil {
return 0, err
}
tx := c.db.Exec(sql, args...)
if err := c.handleSQLError(tx.Error); err != nil {
return 0, err
}
defer tx.Commit()
return tx.RowsAffected, nil
}
// Query executes a raw SQL query and returns the result as a slice of maps
func (c *DBClient) Query(sql string, args ...interface{}) ([]map[string]interface{}, error) {
if err := c.reconnectIfDbEmpty(); err != nil {
return nil, err
}
rows, err := c.db.Raw(sql, args...).Rows()
if err := c.handleSQLError(err); err != nil {
return nil, err
}
defer rows.Close()

View File

@@ -49,11 +49,24 @@ func (c *DBConfig) NewServer(serverName string) (*common.MCPServer, error) {
)
dbClient := NewDBClient(c.dsn, c.dbType, mcpServer.GetDestoryChannel())
descriptionSuffix := fmt.Sprintf("in database %s. Database description: %s", c.dbType, c.description)
// Add query tool
mcpServer.AddTool(
mcp.NewToolWithRawSchema("query", fmt.Sprintf("Run a read-only SQL query in database %s. Database description: %s", c.dbType, c.description), GetQueryToolSchema()),
mcp.NewToolWithRawSchema("query", fmt.Sprintf("Run a read-only SQL query %s", descriptionSuffix), GetQueryToolSchema()),
HandleQueryTool(dbClient),
)
mcpServer.AddTool(
mcp.NewToolWithRawSchema("execute", fmt.Sprintf("Execute an insert, update, or delete SQL %s", descriptionSuffix), GetExecuteToolSchema()),
HandleExecuteTool(dbClient),
)
mcpServer.AddTool(
mcp.NewToolWithRawSchema("list tables", fmt.Sprintf("List all tables %s", descriptionSuffix), GetListTablesToolSchema()),
HandleListTablesTool(dbClient),
)
mcpServer.AddTool(
mcp.NewToolWithRawSchema("describe table", fmt.Sprintf("Get the structure of a specific table %s", descriptionSuffix), GetDescribeTableToolSchema()),
HandleDescribeTableTool(dbClient),
)
return mcpServer, nil
}

View File

@@ -18,27 +18,80 @@ func HandleQueryTool(dbClient *DBClient) common.ToolHandlerFunc {
return nil, fmt.Errorf("invalid message argument")
}
results, err := dbClient.ExecuteSQL(message)
results, err := dbClient.Query(message)
if err != nil {
return nil, fmt.Errorf("failed to execute SQL query: %w", err)
}
jsonData, err := json.Marshal(results)
if err != nil {
return nil, fmt.Errorf("failed to marshal SQL results: %w", err)
return buildCallToolResult(results)
}
}
// HandleExecuteTool handles SQL INSERT, UPDATE, or DELETE execution
func HandleExecuteTool(dbClient *DBClient) common.ToolHandlerFunc {
return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
arguments := request.Params.Arguments
message, ok := arguments["sql"].(string)
if !ok {
return nil, fmt.Errorf("invalid message argument")
}
return &mcp.CallToolResult{
Content: []mcp.Content{
mcp.TextContent{
Type: "text",
Text: string(jsonData),
},
},
}, nil
results, err := dbClient.Execute(message)
if err != nil {
return nil, fmt.Errorf("failed to execute SQL query: %w", err)
}
return buildCallToolResult(results)
}
}
// HandleListTablesTool handles list all tables
func HandleListTablesTool(dbClient *DBClient) common.ToolHandlerFunc {
return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
results, err := dbClient.ListTables()
if err != nil {
return nil, fmt.Errorf("failed to execute SQL query: %w", err)
}
return buildCallToolResult(results)
}
}
// HandleDescribeTableTool handles describe table
func HandleDescribeTableTool(dbClient *DBClient) common.ToolHandlerFunc {
return func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
arguments := request.Params.Arguments
message, ok := arguments["table"].(string)
if !ok {
return nil, fmt.Errorf("invalid message argument")
}
results, err := dbClient.DescribeTable(message)
if err != nil {
return nil, fmt.Errorf("failed to execute SQL query: %w", err)
}
return buildCallToolResult(results)
}
}
// buildCallToolResult builds the call tool result
func buildCallToolResult(results any) (*mcp.CallToolResult, error) {
jsonData, err := json.Marshal(results)
if err != nil {
return nil, fmt.Errorf("failed to marshal SQL results: %w", err)
}
return &mcp.CallToolResult{
Content: []mcp.Content{
mcp.TextContent{
Type: "text",
Text: string(jsonData),
},
},
}, nil
}
// GetQueryToolSchema returns the schema for query tool
func GetQueryToolSchema() json.RawMessage {
return json.RawMessage(`
@@ -53,3 +106,44 @@ func GetQueryToolSchema() json.RawMessage {
}
`)
}
// GetExecuteToolSchema returns the schema for execute tool
func GetExecuteToolSchema() json.RawMessage {
return json.RawMessage(`
{
"type": "object",
"properties": {
"sql": {
"type": "string",
"description": "The sql to execute"
}
}
}
`)
}
// GetDescribeTableToolSchema returns the schema for DescribeTable tool
func GetDescribeTableToolSchema() json.RawMessage {
return json.RawMessage(`
{
"type": "object",
"properties": {
"table": {
"type": "string",
"description": "table name"
}
}
}
`)
}
// GetListTablesToolSchema returns the schema for ListTables tool
func GetListTablesToolSchema() json.RawMessage {
return json.RawMessage(`
{
"type": "object",
"properties": {
}
}
`)
}

View File

@@ -4,6 +4,7 @@ import (
"encoding/json"
"fmt"
"net/http"
"net/url"
"sync"
"time"
@@ -94,13 +95,15 @@ func (s *SSEServer) HandleSSE(cb api.FilterCallbackHandler, stopChan chan struct
defer s.sessions.Delete(sessionID)
channel := GetSSEChannelName(sessionID)
u, err := url.Parse(s.baseURL + s.messageEndpoint)
if err != nil {
api.LogErrorf("Failed to parse base URL: %v", err)
}
messageEndpoint := fmt.Sprintf(
"%s%s?sessionId=%s",
s.baseURL,
s.messageEndpoint,
sessionID,
)
q := u.Query()
q.Set("sessionId", sessionID)
u.RawQuery = q.Encode()
messageEndpoint := u.String()
// go func() {
// for {
@@ -126,7 +129,7 @@ func (s *SSEServer) HandleSSE(cb api.FilterCallbackHandler, stopChan chan struct
// }
// }()
err := s.redisClient.Subscribe(channel, stopChan, func(message string) {
err = s.redisClient.Subscribe(channel, stopChan, func(message string) {
defer cb.EncoderFilterCallbacks().RecoverPanic()
api.LogDebugf("SSE Send message: %s", message)
cb.EncoderFilterCallbacks().InjectData([]byte(message))
@@ -210,7 +213,7 @@ func (s *SSEServer) HandleMessage(w http.ResponseWriter, r *http.Request, body j
var status int
// Only send response if there is one (not for notifications)
if response != nil {
if sessionID != "" {
if sessionID != "" {
w.WriteHeader(http.StatusAccepted)
status = http.StatusAccepted
} else {

View File

@@ -129,9 +129,15 @@ func (f *filter) processMcpRequestHeadersForRestUpstream(header api.RequestHeade
if method != http.MethodGet {
f.callbacks.DecoderFilterCallbacks().SendLocalReply(http.StatusMethodNotAllowed, "Method not allowed", nil, 0, "")
} else {
// to support the query param in Message Endpoint
trimmed := strings.TrimSuffix(requestUrl.Path, GlobalSSEPathSuffix)
if rq := requestUrl.RawQuery; rq != "" {
trimmed += "?" + rq
}
f.config.defaultServer = common.NewSSEServer(common.NewMCPServer(DefaultServerName, Version),
common.WithSSEEndpoint(GlobalSSEPathSuffix),
common.WithMessageEndpoint(strings.TrimSuffix(requestUrl.Path, GlobalSSEPathSuffix)),
common.WithMessageEndpoint(trimmed),
common.WithRedisClient(f.config.redisClient))
f.serverName = f.config.defaultServer.GetServerName()
body := "SSE connection create"

View File

@@ -0,0 +1,98 @@
---
title: AI IMAGE READER
keywords: [ AI网关, AI IMAGE READER ]
description: AI IMAGE READER 插件配置参考
---
## 功能说明
通过对接OCR服务实现AI-IMAGE-READER目前支持阿里云模型服务灵积dashscope的qwen-vl-ocr模型提供OCR服务流程如图所示
<img src=".\ai-image-reader.png">
## 运行属性
插件执行阶段:`默认阶段`
插件执行优先级:`400`
## 配置说明
| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
| ------------- | -------- | -------- | ------ | -------------------------------------- |
| `apiKey` | string | 必填 | - | 用于在访问OCR服务时进行认证的令牌。 |
| `type` | string | 必填 | - | 后端OCR服务提供商类型例如dashscope |
| `serviceHost` | string | 必填 | - | 后端OCR服务域名 |
| `serviceName` | string | 必填 | - | 后端OCR服务名 |
| `servicePort` | int | 必填 | - | 后端OCR服务端口 |
| `model` | string | 必填 | - | 后端OCR服务模型名称例如qwen-vl-ocr |
| `timeout` | int | 选填 | 10000 | API调用超时时间毫秒 |
## 示例
```yaml
"apiKey": "YOUR_API_KEY",
"type": "dashscope",
"model": "qwen-vl-ocr",
"timeout": 10000,
"serviceHost": "dashscope.aliyuncs.com",
"serviceName": "dashscope",
"servicePort": "443"
```
请求遵循openai api协议规范:
URL传递图片
```
messages=[{
"role": "user",
"content": [
{"type": "text", "text": "What's in this image?"},
{
"type": "image_url",
"image_url": {
"url": "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20241108/ctdzex/biaozhun.jpg",
},
},
],
}],
```
Base64编码传递图片
```
messages=[
{
"role": "user",
"content": [
{ "type": "text", "text": "what's in this image?" },
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}",
},
},
],
}
],
```
以下为使用ai-image-reader进行增强的例子原始请求为
```
图片内容是什么?
```
未经过ai-image-reader插件处理LLM返回的结果为
```
对不起作为一个文本AI助手我无法查看图片内容。您可以描述一下图片的内容我可以尽力帮助您识别。
```
经过ai-image-reader插件处理后LLM返回的结果为
```
非常感谢您分享的图片内容根据您提供的文字信息学习编写shell脚本对Linux系统管理员来说是非常有益的。通过自动化系统管理任务可以提高效率并减少手动操作的时间。对于家用Linux爱好者来说了解如何在命令行下操作也是很重要的因为在某些情况下命令行操作可能更为便捷和高效。在本书中您将学习如何运用shell脚本处理系统管理任务以及如何在Linux命令行下进行操作。希望这本书能够帮助您更好地理解和应用Linux系统管理和操作的知识如果您有任何其他问题或需要进一步帮助请随时告诉我。
```

View File

@@ -0,0 +1,94 @@
---
title: AI IMAGE READER
keywords: [ AI GATEWAY, AI IMAGE READER ]
description: AI IMAGE READER Plugin Configuration Reference
---
## Function Description
By integrating with OCR services to implement AI-IMAGE-READER, currently, it supports Alibaba Cloud's qwen-vl-ocr model under Dashscope for OCR services, and the process is shown in the figure below:<img src=".\ai-image-reader-en.png">
## Running Attributes
Plugin execution phase`Default Phase`
Plugin execution priority`400`
## Configuration Description
| Name | Data Type | Requirement | Default Value | Description |
| ------------- | --------- | ----------- | ------------- | ------------------------------------------------------------ |
| `apiKey` | string | Required | - | Token for authenticating access to OCR services. |
| `type` | string | Required | - | Provider type of the backend OCR service type(e.g. dashscope). |
| `serviceHost` | string | Required | - | Host of the backend OCR service. |
| `serviceName` | string | Required | - | Name of the backend OCR service. |
| `servicePort` | int | Required | - | Port of the backend OCR service. |
| `model` | string | Required | - | Model name of the backend OCR service (e.g., qwen-vl-ocr). |
| `timeout` | int | Required | 10000 | API call timeout duration (milliseconds). |
## Example
```yaml
"apiKey": "YOUR_API_KEY",
"type": "dashscope",
"model": "qwen-vl-ocr",
"timeout": 10000,
"serviceHost": "dashscope.aliyuncs.com",
"serviceName": "dashscope",
"servicePort": "443"
```
Request to follow the OpenAI API protocol specifications:
Pass images via URL:
```
messages=[{
"role": "user",
"content": [
{"type": "text", "text": "What's in this image?"},
{
"type": "image_url",
"image_url": {
"url": "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20241108/ctdzex/biaozhun.jpg",
},
},
],
}],
```
Pass images via Base64:
```
messages=[
{
"role": "user",
"content": [
{ "type": "text", "text": "what's in this image?" },
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}",
},
},
],
}
],
```
The following is an example of using ai-image-reader for enhancement. The original request was:
```
What is the content of the image?
```
The result returned by the LLM without processing from the ai-image-reader plugin is:
```
Sorry, as a text-based AI assistant, I cannot view image content. You can describe the content of the image, and I will do my best to help you identify it.
```
The result returned by the LLM after processing by the ai-image-reader plugin is:
```
Thank you for sharing the image! Mastering shell scripting is highly beneficial for Linux system administrators as it automates tasks, boosts efficiency, and cuts down manual work. For home Linux users, command-line skills are equally important for quick and efficient operations. This book will teach you to handle system management tasks with shell scripts and operate in the Linux command line. Hope it aids your Linux system management learning! Feel free to ask if you have more questions.
```

View File

Binary file not shown.

After

Width:  |  Height:  |  Size: 30 KiB

View File

Binary file not shown.

After

Width:  |  Height:  |  Size: 24 KiB

View File

@@ -0,0 +1,177 @@
package main
import (
"encoding/json"
"errors"
"fmt"
"github.com/alibaba/higress/plugins/wasm-go/pkg/log"
"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
"github.com/tidwall/gjson"
"net/http"
"strconv"
)
const (
DashscopeDomain = "dashscope.aliyuncs.com"
DashscopePort = 443
DashscopeDefaultModelName = "qwen-vl-ocr"
DashscopeEndpoint = "/compatible-mode/v1/chat/completions"
MinPixels = 3136
MaxPixels = 1003520
)
type OcrReq struct {
Model string `json:"model,omitempty"`
Messages []chatMessage `json:"messages,omitempty"`
}
type OcrResp struct {
Choices []chatCompletionChoice `json:"choices"`
}
type chatCompletionChoice struct {
Message *chatMessageContent `json:"message,omitempty"`
}
type chatMessageContent struct {
Role string `json:"role,omitempty"`
Content string `json:"content,omitempty"`
}
type chatMessage struct {
Role string `json:"role"`
Content []content `json:"content"`
}
type imageURL struct {
URL string `json:"url"`
}
type content struct {
Type string `json:"type"`
ImageUrl imageURL `json:"image_url,omitempty"`
MinPixels int `json:"min_pixels,omitempty"`
MaxPixels int `json:"max_pixels,omitempty"`
Text string `json:"text,omitempty"`
}
var dashScopeConfig dashScopeProviderConfig
type dashScopeProviderInitializer struct {
}
func (d *dashScopeProviderInitializer) InitConfig(json gjson.Result) {
dashScopeConfig.apiKey = json.Get("apiKey").String()
}
func (d *dashScopeProviderInitializer) ValidateConfig() error {
if dashScopeConfig.apiKey == "" {
return errors.New("[DashScope] apiKey is required")
}
return nil
}
func (d *dashScopeProviderInitializer) CreateProvider(c ProviderConfig) (Provider, error) {
if c.servicePort == 0 {
c.servicePort = DashscopePort
}
if c.serviceHost == "" {
c.serviceHost = DashscopeDomain
}
return &DSProvider{
config: c,
client: wrapper.NewClusterClient(wrapper.FQDNCluster{
FQDN: c.serviceName,
Host: c.serviceHost,
Port: int64(c.servicePort),
}),
}, nil
}
type dashScopeProviderConfig struct {
// @Title zh-CN 文字识别服务 API Key
// @Description zh-CN 文字识别服务 API Key
apiKey string
}
type DSProvider struct {
config ProviderConfig
client wrapper.HttpClient
}
func (d *DSProvider) GetProviderType() string {
return ProviderTypeDashscope
}
func (d *DSProvider) CallArgs(imageUrl string) CallArgs {
model := d.config.model
if model == "" {
model = DashscopeDefaultModelName
}
reqBody := OcrReq{
Model: model,
Messages: []chatMessage{
{
Role: "user",
Content: []content{
{
Type: "image_url",
ImageUrl: imageURL{
URL: imageUrl,
},
MinPixels: MinPixels,
MaxPixels: MaxPixels,
},
},
},
},
}
body, _ := json.Marshal(reqBody)
return CallArgs{
Method: http.MethodPost,
Url: DashscopeEndpoint,
Headers: [][2]string{
{"Content-Type", "application/json"},
{"Authorization", fmt.Sprintf("Bearer %s", dashScopeConfig.apiKey)},
},
Body: body,
TimeoutMillisecond: d.config.timeout,
}
}
func (d *DSProvider) parseOcrResponse(responseBody []byte) (*OcrResp, error) {
var resp OcrResp
err := json.Unmarshal(responseBody, &resp)
if err != nil {
return nil, err
}
return &resp, nil
}
func (d *DSProvider) DoOCR(
imageUrl string,
callback func(imageContent string, err error)) error {
args := d.CallArgs(imageUrl)
err := d.client.Call(args.Method, args.Url, args.Headers, args.Body,
func(statusCode int, responseHeaders http.Header, responseBody []byte) {
if statusCode != http.StatusOK {
err := errors.New("failed to do ocr due to status code: " + strconv.Itoa(statusCode))
callback("", err)
return
}
log.Debugf("do ocr response: %d, %s", statusCode, responseBody)
resp, err := d.parseOcrResponse(responseBody)
if err != nil {
err = fmt.Errorf("failed to parse response: %v", err)
callback("", err)
return
}
if len(resp.Choices) == 0 {
err = errors.New("no ocr response found")
callback("", err)
return
}
callback(resp.Choices[0].Message.Content, nil)
}, args.TimeoutMillisecond)
return err
}

View File

@@ -0,0 +1,19 @@
module ai-image-reader
go 1.19
require (
github.com/alibaba/higress/plugins/wasm-go v1.4.4-0.20250621002302-e94ac43dd15c
github.com/tidwall/gjson v1.18.0
)
require (
github.com/google/uuid v1.3.0 // indirect
github.com/higress-group/nottinygc v0.0.0-20231101025119-e93c4c2f8520 // indirect
github.com/higress-group/proxy-wasm-go-sdk v1.0.1 // indirect
github.com/magefile/mage v1.14.0 // indirect
github.com/tidwall/match v1.1.1 // indirect
github.com/tidwall/pretty v1.2.0 // indirect
github.com/tidwall/resp v0.1.1 // indirect
github.com/tidwall/sjson v1.2.5 // indirect
)

View File

@@ -0,0 +1,25 @@
github.com/alibaba/higress/plugins/wasm-go v1.4.4-0.20250621002302-e94ac43dd15c h1:YGKECMrlahN6dyEaM/S5NEU4IJoFzWKsHQyawov6ep8=
github.com/alibaba/higress/plugins/wasm-go v1.4.4-0.20250621002302-e94ac43dd15c/go.mod h1:E2xVWrIovU3rZi4HGlMfcYf+c/UVh3aCtpcJlNjpxYc=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/higress-group/nottinygc v0.0.0-20231101025119-e93c4c2f8520 h1:IHDghbGQ2DTIXHBHxWfqCYQW1fKjyJ/I7W1pMyUDeEA=
github.com/higress-group/nottinygc v0.0.0-20231101025119-e93c4c2f8520/go.mod h1:Nz8ORLaFiLWotg6GeKlJMhv8cci8mM43uEnLA5t8iew=
github.com/higress-group/proxy-wasm-go-sdk v1.0.1 h1:f9X4I5Y6jK3GrdsWn/lCTI1z5Lu5GOMazqQohAC3Vzk=
github.com/higress-group/proxy-wasm-go-sdk v1.0.1/go.mod h1:iiSyFbo+rAtbtGt/bsefv8GU57h9CCLYGJA74/tF5/0=
github.com/magefile/mage v1.14.0 h1:6QDX3g6z1YvJ4olPhT1wksUcSa/V0a1B+pJb73fBjyo=
github.com/magefile/mage v1.14.0/go.mod h1:z5UZb/iS3GoOSn0JgWuiw7dxlurVYTu+/jHXqQg881A=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
github.com/tidwall/gjson v1.18.0 h1:FIDeeyB800efLX89e5a8Y0BNH+LOngJyGrIWxG2FKQY=
github.com/tidwall/gjson v1.18.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA=
github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM=
github.com/tidwall/pretty v1.2.0 h1:RWIZEg2iJ8/g6fDDYzMpobmaoGh5OLl4AXtGUGPcqCs=
github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
github.com/tidwall/resp v0.1.1 h1:Ly20wkhqKTmDUPlyM1S7pWo5kk0tDu8OoC/vFArXmwE=
github.com/tidwall/resp v0.1.1/go.mod h1:3/FrruOBAxPTPtundW0VXgmsQ4ZBA0Aw714lVYgwFa0=
github.com/tidwall/sjson v1.2.5 h1:kLy8mja+1c9jlljvWTlSazM7cKDRfJuR/bOJhcY5NcY=
github.com/tidwall/sjson v1.2.5/go.mod h1:Fvgq9kS/6ociJEDnK0Fk1cpYF4FIW6ZF7LAe+6jwd28=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=

View File

@@ -0,0 +1,139 @@
package main
import (
"errors"
"fmt"
"github.com/alibaba/higress/plugins/wasm-go/pkg/log"
"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
"github.com/tidwall/gjson"
"github.com/tidwall/sjson"
"strings"
)
const (
DefaultMaxBodyBytes uint32 = 100 * 1024 * 1024
)
type Config struct {
promptTemplate string
ocrProvider Provider
ocrProviderConfig *ProviderConfig
}
func main() {
wrapper.SetCtx(
"ai-image-reader",
wrapper.ParseConfig(parseConfig),
wrapper.ProcessRequestHeaders(onHttpRequestHeaders),
wrapper.ProcessRequestBody(onHttpRequestBody),
)
}
func parseConfig(json gjson.Result, config *Config) error {
config.promptTemplate = `# 用户发送的图片解析得到的文字内容如下:
{image_content}
在回答时,请注意以下几点:
- 请你回答问题时结合用户图片的文字内容回答。
- 除非用户要求,否则你回答的语言需要和用户提问的语言保持一致。
# 用户消息为:
{question}`
config.ocrProviderConfig = &ProviderConfig{}
config.ocrProviderConfig.FromJson(json)
if err := config.ocrProviderConfig.Validate(); err != nil {
return err
}
var err error
config.ocrProvider, err = CreateProvider(*config.ocrProviderConfig)
if err != nil {
return errors.New("create ocr provider failed")
}
return nil
}
func onHttpRequestHeaders(ctx wrapper.HttpContext, config Config) types.Action {
contentType, _ := proxywasm.GetHttpRequestHeader("content-type")
if contentType == "" {
return types.ActionContinue
}
if !strings.Contains(contentType, "application/json") {
log.Warnf("content is not json, can't process: %s", contentType)
ctx.DontReadRequestBody()
return types.ActionContinue
}
ctx.SetRequestBodyBufferLimit(DefaultMaxBodyBytes)
_ = proxywasm.RemoveHttpRequestHeader("Accept-Encoding")
return types.ActionContinue
}
func onHttpRequestBody(ctx wrapper.HttpContext, config Config, body []byte) types.Action {
var queryIndex int
var query string
messages := gjson.GetBytes(body, "messages").Array()
var imageUrls []string
for i := len(messages) - 1; i >= 0; i-- {
if messages[i].Get("role").String() == "user" {
queryIndex = i
content := messages[i].Get("content").Array()
for j := len(content) - 1; j >= 0; j-- {
contentType := content[j].Get("type").String()
if contentType == "image_url" {
imageUrls = append(imageUrls, content[j].Get("image_url.url").String())
} else if contentType == "text" {
query = content[j].Get("text").String()
}
}
break
}
}
if len(imageUrls) == 0 {
return types.ActionContinue
}
return executeReadImage(imageUrls, config, query, queryIndex, body)
}
func executeReadImage(imageUrls []string, config Config, query string, queryIndex int, body []byte) types.Action {
var imageContents []string
var totalImages int
var finished int
for _, imageUrl := range imageUrls {
err := config.ocrProvider.DoOCR(imageUrl, func(imageContent string, err error) {
defer func() {
finished++
if totalImages == finished {
var processedContents []string
for idx := len(imageContents) - 1; idx >= 0; idx-- {
processedContents = append(processedContents, fmt.Sprintf("第%d张图片内容为 %s", totalImages-idx, imageContents[idx]))
}
imageSummary := fmt.Sprintf("总共有 %d 张图片。\n", totalImages)
prompt := strings.Replace(config.promptTemplate, "{image_content}", imageSummary+strings.Join(processedContents, "\n"), 1)
prompt = strings.Replace(prompt, "{question}", query, 1)
modifiedBody, err := sjson.SetBytes(body, fmt.Sprintf("messages.%d.content", queryIndex), prompt)
if err != nil {
log.Errorf("modify request message content failed, err:%v, body:%s", err, body)
} else {
log.Debugf("modified body:%s", modifiedBody)
proxywasm.ReplaceHttpRequestBody(modifiedBody)
}
proxywasm.ResumeHttpRequest()
}
}()
if err != nil {
log.Errorf("do ocr failed, err:%v", err)
return
}
imageContents = append(imageContents, imageContent)
})
if err != nil {
log.Errorf("ocr call failed, err:%v", err)
continue
}
totalImages++
}
if totalImages > 0 {
return types.ActionPause
}
return types.ActionContinue
}

View File

@@ -0,0 +1,109 @@
package main
import (
"errors"
"github.com/tidwall/gjson"
)
const (
ProviderTypeDashscope = "dashscope"
)
type providerInitializer interface {
InitConfig(json gjson.Result)
ValidateConfig() error
CreateProvider(ProviderConfig) (Provider, error)
}
var (
providerInitializers = map[string]providerInitializer{
ProviderTypeDashscope: &dashScopeProviderInitializer{},
}
)
type ProviderConfig struct {
// @Title zh-CN 文字识别服务提供者类型
// @Description zh-CN 文字识别服务提供者类型,例如 DashScope
typ string
// @Title zh-CN DashScope 文字识别服务名称
// @Description zh-CN 文字识别服务名称
serviceName string
// @Title zh-CN 文字识别服务域名
// @Description zh-CN 文字识别服务域名
serviceHost string
// @Title zh-CN 文字识别服务端口
// @Description zh-CN 文字识别服务端口
servicePort int64
// @Title zh-CN 文字识别服务超时时间
// @Description zh-CN 文字识别服务超时时间
timeout uint32
// @Title zh-CN 文字识别服务使用的模型
// @Description zh-CN 用于文字识别的模型名称, 在 DashScope 中默认为 "qwen-vl-ocr"
model string
initializer providerInitializer
}
func (c *ProviderConfig) FromJson(json gjson.Result) {
c.typ = json.Get("type").String()
i, has := providerInitializers[c.typ]
if has {
i.InitConfig(json)
c.initializer = i
}
c.serviceName = json.Get("serviceName").String()
c.serviceHost = json.Get("serviceHost").String()
c.servicePort = json.Get("servicePort").Int()
c.timeout = uint32(json.Get("timeout").Int())
c.model = json.Get("model").String()
if c.timeout == 0 {
c.timeout = 10000
}
}
func (c *ProviderConfig) Validate() error {
if c.typ == "" {
return errors.New("ocr service provider type is required")
}
if c.serviceName == "" {
return errors.New("ocr service name is required")
}
if c.typ == "" {
return errors.New("ocr service type is required")
}
if c.initializer == nil {
return errors.New("unknown ocr service provider type: " + c.typ)
}
if err := c.initializer.ValidateConfig(); err != nil {
return err
}
return nil
}
func (c *ProviderConfig) GetProviderType() string {
return c.typ
}
func CreateProvider(pc ProviderConfig) (Provider, error) {
initializer, has := providerInitializers[pc.typ]
if !has {
return nil, errors.New("unknown provider type: " + pc.typ)
}
return initializer.CreateProvider(pc)
}
type CallArgs struct {
Method string
Url string
Headers [][2]string
Body []byte
TimeoutMillisecond uint32
}
type Provider interface {
GetProviderType() string
CallArgs(imageUrl string) CallArgs
DoOCR(
imageUrl string,
callback func(imageContent string, err error)) error
}

View File

@@ -0,0 +1 @@
test/

View File

@@ -0,0 +1,174 @@
---
title: AI负载均衡
keywords: [higress, llm, load balance]
description: 针对LLM服务的负载均衡策略
---
# 功能说明
**注意**
- Higress网关版本需要>=v2.1.5
对LLM服务提供热插拔的负载均衡策略如果关闭插件负载均衡策略会退化为服务本身的负载均衡策略轮训、本地最小请求数、随机、一致性hash等
配置如下:
| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
|--------------------|-----------------|------------------|-------------|-------------------------------------|
| `lb_policy` | string | 必填 | | 负载均衡策略类型 |
| `lb_config` | object | 必填 | | 当前负载均衡策略类型的配置 |
目前支持的负载均衡策略包括:
- `global_least_request`: 基于redis实现的全局最小请求数负载均衡
- `prefix_cache`: 基于 prompt 前缀匹配选择后端节点,如果通过前缀匹配无法匹配到节点,则通过全局最小请求数进行服务节点的选择
- `least_busy`: [gateway-api-inference-extension](https://github.com/kubernetes-sigs/gateway-api-inference-extension/blob/main/README.md) 的 wasm 实现
# 全局最小请求数
## 功能说明
```mermaid
sequenceDiagram
participant C as Client
participant H as Higress
participant R as Redis
participant H1 as Host1
participant H2 as Host2
C ->> H: 发起请求
H ->> R: 获取 host ongoing 请求数
R ->> H: 返回结果
H ->> R: 根据结果选择当前请求数最小的host计数+1
R ->> H: 返回结果
H ->> H1: 绕过service原本的负载均衡策略转发请求到对应host
H1 ->> H: 返回响应
H ->> R: host计数-1
H ->> C: 返回响应
```
## 配置说明
| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
|--------------------|-----------------|------------------|-------------|-------------------------------------|
| `serviceFQDN` | string | 必填 | | redis 服务的FQDN例如: `redis.dns` |
| `servicePort` | int | 必填 | | redis 服务的port |
| `username` | string | 必填 | | redis 用户名 |
| `password` | string | 选填 | 空 | redis 密码 |
| `timeout` | int | 选填 | 3000ms | redis 请求超时时间 |
| `database` | int | 选填 | 0 | redis 数据库序号 |
## 配置示例
```yaml
lb_policy: global_least_request
lb_config:
serviceFQDN: redis.static
servicePort: 6379
username: default
password: '123456'
```
# 前缀匹配
## 功能说明
根据 prompt 前缀匹配选择 pod以复用 KV Cache如果通过前缀匹配无法匹配到节点则通过全局最小请求数进行服务节点的选择
例如以下请求被路由到了pod 1
```json
{
"model": "qwen-turbo",
"messages": [
{
"role": "user",
"content": "hi"
}
]
}
```
那么后续具有相同前缀的请求也会被路由到 pod 1
```json
{
"model": "qwen-turbo",
"messages": [
{
"role": "user",
"content": "hi"
},
{
"role": "assistant",
"content": "Hi! How can I assist you today? 😊"
},
{
"role": "user",
"content": "write a short story aboud 100 words"
}
]
}
```
## 配置说明
| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
|--------------------|-----------------|------------------|-------------|-------------------------------------|
| `serviceFQDN` | string | 必填 | | redis 服务的FQDN例如: `redis.dns` |
| `servicePort` | int | 必填 | | redis 服务的port |
| `username` | string | 必填 | | redis 用户名 |
| `password` | string | 选填 | 空 | redis 密码 |
| `timeout` | int | 选填 | 3000ms | redis 请求超时时间 |
| `database` | int | 选填 | 0 | redis 数据库序号 |
| `redisKeyTTL` | int | 选填 | 1800ms | prompt 前缀对应的key的ttl |
## 配置示例
```yaml
lb_policy: prefix_cache
lb_config:
serviceFQDN: redis.static
servicePort: 6379
username: default
password: '123456'
```
# 最小负载
## 功能说明
[gateway-api-inference-extension](https://github.com/kubernetes-sigs/gateway-api-inference-extension/blob/main/README.md) 的 wasm 实现
```mermaid
sequenceDiagram
participant C as Client
participant H as Higress
participant H1 as Host1
participant H2 as Host2
loop 定期拉取metrics
H ->> H1: /metrics
H1 ->> H: vllm metrics
H ->> H2: /metrics
H2 ->> H: vllm metrics
end
C ->> H: 发起请求
H ->> H1: 根据vllm metrics选择合适的pod绕过服务原始的lb policy直接转发
H1 ->> H: 返回响应
H ->> C: 返回响应
```
<!-- pod选取流程图如下
![](https://github.com/kubernetes-sigs/gateway-api-inference-extension/blob/main/docs/scheduler-flowchart.png) -->
## 配置说明
| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
|--------------------|-----------------|------------------|-------------|-------------------------------------|
| `criticalModels` | []string | 选填 | | critical的模型列表 |
## 配置示例
```yaml
lb_policy: least_busy
lb_config:
criticalModels:
- meta-llama/Llama-2-7b-hf
- sql-lora
```

View File

@@ -0,0 +1,177 @@
---
title: AI Load Balance
keywords: [higress, llm, load balance]
description: LLM-oriented load balance policies
---
# Introduction
**Attention**:
- Version of Higress should >= v2.1.5
This plug-in provides the llm-oriented load balancing capability in a hot-swappable manner. If the plugin is closed, the load balancing strategy will degenerate into the load balancing strategy of the service itself (round robin, local minimum request number, random, consistent hash, etc.).
The configuration is:
| Name | Type | Required | default | description |
|--------------------|-----------------|------------------|-------------|-------------------------------------|
| `lb_policy` | string | required | | load balance type |
| `lb_config` | object | required | | configuration for the current load balance type |
Current supported load balance policies are:
- `global_least_request`: global least request based on redis
- `prefix_cache`: Select the backend node based on the prompt prefix match. If the node cannot be matched by prefix matching, the service node is selected based on the global minimum number of requests.
- `least_busy`: implementation for [gateway-api-inference-extension](https://github.com/kubernetes-sigs/gateway-api-inference-extension/blob/main/README.md)
# Global Least Request
## Introduction
```mermaid
sequenceDiagram
participant C as Client
participant H as Higress
participant R as Redis
participant H1 as Host1
participant H2 as Host2
C ->> H: Send request
H ->> R: Get host ongoing request number
R ->> H: Return result
H ->> R: According to the result, select the host with the smallest number of current requests, host rq count +1.
R ->> H: Return result
H ->> H1: Bypass the service's original load balancing strategy and forward the request to the corresponding host
H1 ->> H: Return result
H ->> R: host rq count -1
H ->> C: Receive response
```
## Configuration
| Name | Type | required | default | description |
|--------------------|-----------------|------------------|-------------|-------------------------------------|
| `serviceFQDN` | string | required | | redis FQDN, e.g. `redis.dns` |
| `servicePort` | int | required | | redis port |
| `username` | string | required | | redis username |
| `password` | string | optional | `` | redis password |
| `timeout` | int | optional | 3000ms | redis request timeout |
| `database` | int | optional | 0 | redis database number |
## Configuration Example
```yaml
lb_policy: global_least_request
lb_config:
serviceFQDN: redis.static
servicePort: 6379
username: default
password: '123456'
```
# Prefix Cache
## Introduction
Select pods based on the prompt prefix match to reuse KV Cache. If no node can be matched by prefix match, select the service node based on the global minimum number of requests.
For example, the following request is routed to pod 1:
```json
{
"model": "qwen-turbo",
"messages": [
{
"role": "user",
"content": "hi"
}
]
}
```
Then subsequent requests with the same prefix will also be routed to pod 1:
```json
{
"model": "qwen-turbo",
"messages": [
{
"role": "user",
"content": "hi"
},
{
"role": "assistant",
"content": "Hi! How can I assist you today? 😊"
},
{
"role": "user",
"content": "write a short story aboud 100 words"
}
]
}
```
## Configuration
| Name | Type | required | default | description |
|--------------------|-----------------|-----------------------|-------------|---------------------------------|
| `serviceFQDN` | string | required | | redis FQDN, e.g. `redis.dns` |
| `servicePort` | int | required | | redis port |
| `username` | string | required | | redis username |
| `password` | string | optional | `` | redis password |
| `timeout` | int | optional | 3000ms | redis request timeout |
| `database` | int | optional | 0 | redis database number |
| `redisKeyTTL` | int | optional | 1800ms | prompt prefix key's ttl |
## Configuration Example
```yaml
lb_policy: prefix_cache
lb_config:
serviceFQDN: redis.static
servicePort: 6379
username: default
password: '123456'
```
# Least Busy
## Introduction
wasm implementation for [gateway-api-inference-extension](https://github.com/kubernetes-sigs/gateway-api-inference-extension/blob/main/README.md)
```mermaid
sequenceDiagram
participant C as Client
participant H as Higress
participant H1 as Host1
participant H2 as Host2
loop fetch metrics periodically
H ->> H1: /metrics
H1 ->> H: vllm metrics
H ->> H2: /metrics
H2 ->> H: vllm metrics
end
C ->> H: request
H ->> H1: select pod according to vllm metrics, bypassing original service load balance policy
H1 ->> H: response
H ->> C: response
```
<!-- flowchart for pod selection:
![](https://github.com/kubernetes-sigs/gateway-api-inference-extension/blob/main/docs/scheduler-flowchart.png) -->
## Configuration
| Name | Type | Required | default | description |
|--------------------|-----------------|------------------|-------------|-------------------------------------|
| `criticalModels` | []string | required | | critical model names |
## Configuration Example
```yaml
lb_policy: least_busy
lb_config:
criticalModels:
- meta-llama/Llama-2-7b-hf
- sql-lora
```

View File

@@ -0,0 +1,178 @@
package global_least_request
import (
"errors"
"fmt"
"math/rand"
"time"
"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-load-balancer/utils"
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
"github.com/higress-group/wasm-go/pkg/log"
"github.com/higress-group/wasm-go/pkg/wrapper"
"github.com/tidwall/gjson"
"github.com/tidwall/resp"
)
const (
RedisKeyFormat = "higress:global_least_request_table:%s:%s"
RedisLua = `local seed = KEYS[1]
local hset_key = KEYS[2]
local current_target = KEYS[3]
local current_count = 0
math.randomseed(seed)
local function randomBool()
return math.random() >= 0.5
end
local function is_healthy(addr)
for i = 4, #KEYS do
if addr == KEYS[i] then
return true
end
end
return false
end
if redis.call('HEXISTS', hset_key, current_target) ~= 0 then
current_count = redis.call('HGET', hset_key, current_target)
local hash = redis.call('HGETALL', hset_key)
for i = 1, #hash, 2 do
local addr = hash[i]
local count = hash[i+1]
if is_healthy(addr) then
if count < current_count then
current_target = addr
current_count = count
elseif count == current_count and randomBool() then
current_target = addr
current_count = count
end
end
end
end
redis.call("HINCRBY", hset_key, current_target, 1)
return current_target`
)
type GlobalLeastRequestLoadBalancer struct {
redisClient wrapper.RedisClient
}
func NewGlobalLeastRequestLoadBalancer(json gjson.Result) (GlobalLeastRequestLoadBalancer, error) {
lb := GlobalLeastRequestLoadBalancer{}
serviceFQDN := json.Get("serviceFQDN").String()
servicePort := json.Get("servicePort").Int()
if serviceFQDN == "" || servicePort == 0 {
log.Errorf("invalid redis service, serviceFQDN: %s, servicePort: %d", serviceFQDN, servicePort)
return lb, errors.New("invalid redis service config")
}
lb.redisClient = wrapper.NewRedisClusterClient(wrapper.FQDNCluster{
FQDN: serviceFQDN,
Port: servicePort,
})
username := json.Get("username").String()
password := json.Get("password").String()
timeout := json.Get("timeout").Int()
if timeout == 0 {
timeout = 3000
}
// database default is 0
database := json.Get("database").Int()
return lb, lb.redisClient.Init(username, password, int64(timeout), wrapper.WithDataBase(int(database)))
}
func (lb GlobalLeastRequestLoadBalancer) HandleHttpRequestHeaders(ctx wrapper.HttpContext) types.Action {
// If return types.ActionContinue, SetUpstreamOverrideHost will not take effect
return types.HeaderStopIteration
}
func (lb GlobalLeastRequestLoadBalancer) HandleHttpRequestBody(ctx wrapper.HttpContext, body []byte) types.Action {
routeName, err := utils.GetRouteName()
if err != nil || routeName == "" {
ctx.SetContext("error", true)
return types.ActionContinue
} else {
ctx.SetContext("routeName", routeName)
}
clusterName, err := utils.GetClusterName()
if err != nil || clusterName == "" {
ctx.SetContext("error", true)
return types.ActionContinue
} else {
ctx.SetContext("clusterName", clusterName)
}
hostInfos, err := proxywasm.GetUpstreamHosts()
if err != nil {
ctx.SetContext("error", true)
return types.ActionContinue
}
// Only healthy host can be selected
healthyHostArray := []string{}
for _, hostInfo := range hostInfos {
if gjson.Get(hostInfo[1], "health_status").String() == "Healthy" {
healthyHostArray = append(healthyHostArray, hostInfo[0])
}
}
if len(healthyHostArray) == 0 {
ctx.SetContext("error", true)
return types.ActionContinue
}
randomIndex := rand.Intn(len(healthyHostArray))
hostSelected := healthyHostArray[randomIndex]
keys := []interface{}{time.Now().Unix(), fmt.Sprintf(RedisKeyFormat, routeName, clusterName), hostSelected}
for _, v := range healthyHostArray {
keys = append(keys, v)
}
err = lb.redisClient.Eval(RedisLua, len(keys), keys, []interface{}{}, func(response resp.Value) {
if err := response.Error(); err != nil {
log.Errorf("HGetAll failed: %+v", err)
ctx.SetContext("error", true)
proxywasm.ResumeHttpRequest()
return
}
hostSelected = response.String()
if err := proxywasm.SetUpstreamOverrideHost([]byte(hostSelected)); err != nil {
ctx.SetContext("error", true)
log.Errorf("override upstream host failed, fallback to default lb policy, error informations: %+v", err)
}
log.Debugf("host_selected: %s", hostSelected)
ctx.SetContext("host_selected", hostSelected)
proxywasm.ResumeHttpRequest()
})
if err != nil {
ctx.SetContext("error", true)
return types.ActionContinue
}
return types.ActionPause
}
func (lb GlobalLeastRequestLoadBalancer) HandleHttpResponseHeaders(ctx wrapper.HttpContext) types.Action {
return types.ActionContinue
}
func (lb GlobalLeastRequestLoadBalancer) HandleHttpStreamingResponseBody(ctx wrapper.HttpContext, data []byte, endOfStream bool) []byte {
if endOfStream {
isErr, _ := ctx.GetContext("error").(bool)
if !isErr {
routeName, _ := ctx.GetContext("routeName").(string)
clusterName, _ := ctx.GetContext("clusterName").(string)
host_selected, _ := ctx.GetContext("host_selected").(string)
if host_selected == "" {
log.Errorf("get host_selected failed")
} else {
lb.redisClient.HIncrBy(fmt.Sprintf(RedisKeyFormat, routeName, clusterName), host_selected, -1, nil)
}
}
}
return data
}
func (lb GlobalLeastRequestLoadBalancer) HandleHttpResponseBody(ctx wrapper.HttpContext, body []byte) types.Action {
return types.ActionContinue
}

View File

@@ -0,0 +1,23 @@
module github.com/alibaba/higress/plugins/wasm-go/extensions/ai-load-balancer
go 1.24.1
toolchain go1.24.3
require (
github.com/higress-group/proxy-wasm-go-sdk v0.0.0-20250611100342-5654e89a7a80
github.com/higress-group/wasm-go v1.0.1-0.20250628101008-bea7da01a545
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/prometheus/client_model v0.6.2
github.com/tidwall/gjson v1.18.0
github.com/tidwall/resp v0.1.1
go.uber.org/multierr v1.11.0
)
require (
github.com/google/uuid v1.6.0 // indirect
github.com/prometheus/common v0.64.0
github.com/tidwall/match v1.1.1 // indirect
github.com/tidwall/pretty v1.2.1 // indirect
google.golang.org/protobuf v1.36.6 // indirect
)

View File

@@ -0,0 +1,35 @@
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/higress-group/proxy-wasm-go-sdk v0.0.0-20250611100342-5654e89a7a80 h1:xqmtTZI0JQ2O+Lg9/CE6c+Tw9KD6FnvWw8EpLVuuvfg=
github.com/higress-group/proxy-wasm-go-sdk v0.0.0-20250611100342-5654e89a7a80/go.mod h1:tRI2LfMudSkKHhyv1uex3BWzcice2s/l8Ah8axporfA=
github.com/higress-group/wasm-go v1.0.1-0.20250628101008-bea7da01a545 h1:zPXEonKCAeLvXI1IpwGpIeVSvLY5AZ9h9uTJnOuiA3Q=
github.com/higress-group/wasm-go v1.0.1-0.20250628101008-bea7da01a545/go.mod h1:ODBV27sjmhIW8Cqv3R74EUcTnbdkE69bmXBQFuRkY1M=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk=
github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE=
github.com/prometheus/common v0.64.0 h1:pdZeA+g617P7oGv1CzdTzyeShxAGrTBsolKNOLQPGO4=
github.com/prometheus/common v0.64.0/go.mod h1:0gZns+BLRQ3V6NdaerOhMbwwRbNh9hkGINtQAsP5GS8=
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/tidwall/gjson v1.18.0 h1:FIDeeyB800efLX89e5a8Y0BNH+LOngJyGrIWxG2FKQY=
github.com/tidwall/gjson v1.18.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA=
github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM=
github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
github.com/tidwall/pretty v1.2.1 h1:qjsOFOWWQl+N3RsoF5/ssm1pHmJJwhjlSbZ51I6wMl4=
github.com/tidwall/pretty v1.2.1/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
github.com/tidwall/resp v0.1.1 h1:Ly20wkhqKTmDUPlyM1S7pWo5kk0tDu8OoC/vFArXmwE=
github.com/tidwall/resp v0.1.1/go.mod h1:3/FrruOBAxPTPtundW0VXgmsQ4ZBA0Aw714lVYgwFa0=
go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0=
go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y=
google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY=
google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

View File

@@ -0,0 +1,68 @@
/*
Copyright 2025 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package backend
import "fmt"
type PodSet map[Pod]bool
type Pod struct {
Name string
Address string
}
func (p Pod) String() string {
return p.Name + ":" + p.Address
}
type Metrics struct {
// ActiveModels is a set of models(including LoRA adapters) that are currently cached to GPU.
ActiveModels map[string]int
// MaxActiveModels is the maximum number of models that can be loaded to GPU.
MaxActiveModels int
RunningQueueSize int
WaitingQueueSize int
KVCacheUsagePercent float64
KvCacheMaxTokenCapacity int
}
type PodMetrics struct {
Pod
Metrics
}
func (pm *PodMetrics) String() string {
return fmt.Sprintf("Pod: %+v; Metrics: %+v", pm.Pod, pm.Metrics)
}
func (pm *PodMetrics) Clone() *PodMetrics {
cm := make(map[string]int, len(pm.ActiveModels))
for k, v := range pm.ActiveModels {
cm[k] = v
}
clone := &PodMetrics{
Pod: pm.Pod,
Metrics: Metrics{
ActiveModels: cm,
RunningQueueSize: pm.RunningQueueSize,
WaitingQueueSize: pm.WaitingQueueSize,
KVCacheUsagePercent: pm.KVCacheUsagePercent,
KvCacheMaxTokenCapacity: pm.KvCacheMaxTokenCapacity,
},
}
return clone
}

View File

@@ -0,0 +1,150 @@
/*
Copyright 2025 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package vllm provides vllm specific pod metrics implementation.
package vllm
import (
"fmt"
"strconv"
"strings"
"time"
"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-load-balancer/least_busy/backend"
dto "github.com/prometheus/client_model/go"
"go.uber.org/multierr"
)
const (
LoraRequestInfoMetricName = "vllm:lora_requests_info"
LoraRequestInfoRunningAdaptersMetricName = "running_lora_adapters"
LoraRequestInfoMaxAdaptersMetricName = "max_lora"
// TODO: Replace these with the num_tokens_running/waiting below once we add those to the fork.
RunningQueueSizeMetricName = "vllm:num_requests_running"
WaitingQueueSizeMetricName = "vllm:num_requests_waiting"
/* TODO: Uncomment this once the following are added to the fork.
RunningQueueSizeMetricName = "vllm:num_tokens_running"
WaitingQueueSizeMetricName = "vllm:num_tokens_waiting"
*/
KVCacheUsagePercentMetricName = "vllm:gpu_cache_usage_perc"
KvCacheMaxTokenCapacityMetricName = "vllm:gpu_cache_max_token_capacity"
)
// promToPodMetrics updates internal pod metrics with scraped prometheus metrics.
// A combined error is returned if errors occur in one or more metric processing.
// it returns a new PodMetrics pointer which can be used to atomically update the pod metrics map.
func PromToPodMetrics(
metricFamilies map[string]*dto.MetricFamily,
existing *backend.PodMetrics,
) (*backend.PodMetrics, error) {
var errs error
updated := existing.Clone()
runningQueueSize, err := getLatestMetric(metricFamilies, RunningQueueSizeMetricName)
errs = multierr.Append(errs, err)
if err == nil {
updated.RunningQueueSize = int(runningQueueSize.GetGauge().GetValue())
}
waitingQueueSize, err := getLatestMetric(metricFamilies, WaitingQueueSizeMetricName)
errs = multierr.Append(errs, err)
if err == nil {
updated.WaitingQueueSize = int(waitingQueueSize.GetGauge().GetValue())
}
cachePercent, err := getLatestMetric(metricFamilies, KVCacheUsagePercentMetricName)
errs = multierr.Append(errs, err)
if err == nil {
updated.KVCacheUsagePercent = cachePercent.GetGauge().GetValue()
}
loraMetrics, _, err := getLatestLoraMetric(metricFamilies)
errs = multierr.Append(errs, err)
/* TODO: uncomment once this is available in vllm.
kvCap, _, err := getGaugeLatestValue(metricFamilies, KvCacheMaxTokenCapacityMetricName)
errs = multierr.Append(errs, err)
if err != nil {
updated.KvCacheMaxTokenCapacity = int(kvCap)
}
*/
if loraMetrics != nil {
updated.ActiveModels = make(map[string]int)
for _, label := range loraMetrics.GetLabel() {
if label.GetName() == LoraRequestInfoRunningAdaptersMetricName {
if label.GetValue() != "" {
adapterList := strings.Split(label.GetValue(), ",")
for _, adapter := range adapterList {
updated.ActiveModels[adapter] = 0
}
}
}
if label.GetName() == LoraRequestInfoMaxAdaptersMetricName {
if label.GetValue() != "" {
updated.MaxActiveModels, err = strconv.Atoi(label.GetValue())
if err != nil {
errs = multierr.Append(errs, err)
}
}
}
}
}
return updated, errs
}
// getLatestLoraMetric gets latest lora metric series in gauge metric family `vllm:lora_requests_info`
// reason its specially fetched is because each label key value pair permutation generates new series
// and only most recent is useful. The value of each series is the creation timestamp so we can
// retrieve the latest by sorting the value.
func getLatestLoraMetric(metricFamilies map[string]*dto.MetricFamily) (*dto.Metric, time.Time, error) {
loraRequests, ok := metricFamilies[LoraRequestInfoMetricName]
if !ok {
// klog.Warningf("metric family %q not found", LoraRequestInfoMetricName)
return nil, time.Time{}, fmt.Errorf("metric family %q not found", LoraRequestInfoMetricName)
}
var latestTs float64
var latest *dto.Metric
for _, m := range loraRequests.GetMetric() {
if m.GetGauge().GetValue() > latestTs {
latestTs = m.GetGauge().GetValue()
latest = m
}
}
return latest, time.Unix(0, int64(latestTs*1000)), nil
}
// getLatestMetric gets the latest metric of a family. This should be used to get the latest Gauge metric.
// Since vllm doesn't set the timestamp in metric, this metric essentially gets the first metric.
func getLatestMetric(metricFamilies map[string]*dto.MetricFamily, metricName string) (*dto.Metric, error) {
mf, ok := metricFamilies[metricName]
if !ok {
// klog.Warningf("metric family %q not found", metricName)
return nil, fmt.Errorf("metric family %q not found", metricName)
}
if len(mf.GetMetric()) == 0 {
return nil, fmt.Errorf("no metrics available for %q", metricName)
}
var latestTs int64
var latest *dto.Metric
for _, m := range mf.GetMetric() {
if m.GetTimestampMs() >= latestTs {
latestTs = m.GetTimestampMs()
latest = m
}
}
// klog.V(logutil.TRACE).Infof("Got metric value %+v for metric %v", latest, metricName)
return latest, nil
}

View File

@@ -0,0 +1,79 @@
package least_busy
import (
"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-load-balancer/least_busy/scheduling"
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
"github.com/higress-group/wasm-go/pkg/log"
"github.com/higress-group/wasm-go/pkg/wrapper"
"github.com/tidwall/gjson"
)
type LeastBusyLoadBalancer struct {
criticalModels map[string]struct{}
}
func NewLeastBusyLoadBalancer(json gjson.Result) (LeastBusyLoadBalancer, error) {
lb := LeastBusyLoadBalancer{}
lb.criticalModels = make(map[string]struct{})
for _, model := range json.Get("criticalModels").Array() {
lb.criticalModels[model.String()] = struct{}{}
}
return lb, nil
}
// Callbacks which are called in request path
func (lb LeastBusyLoadBalancer) HandleHttpRequestHeaders(ctx wrapper.HttpContext) types.Action {
// If return types.ActionContinue, SetUpstreamOverrideHost will not take effect
return types.HeaderStopIteration
}
func (lb LeastBusyLoadBalancer) HandleHttpRequestBody(ctx wrapper.HttpContext, body []byte) types.Action {
requestModel := gjson.GetBytes(body, "model")
if !requestModel.Exists() {
return types.ActionContinue
}
_, isCritical := lb.criticalModels[requestModel.String()]
llmReq := &scheduling.LLMRequest{
Model: requestModel.String(),
Critical: isCritical,
}
hostInfos, err := proxywasm.GetUpstreamHosts()
if err != nil {
return types.ActionContinue
}
hostMetrics := make(map[string]string)
for _, hostInfo := range hostInfos {
if gjson.Get(hostInfo[1], "health_status").String() == "Healthy" {
hostMetrics[hostInfo[0]] = gjson.Get(hostInfo[1], "metrics").String()
}
}
scheduler, err := scheduling.GetScheduler(hostMetrics)
if err != nil {
log.Debugf("initial scheduler failed: %v", err)
return types.ActionContinue
}
targetPod, err := scheduler.Schedule(llmReq)
log.Debugf("targetPod: %+v", targetPod.Address)
if err != nil {
log.Debugf("pod select failed: %v", err)
proxywasm.SendHttpResponseWithDetail(429, "limited resources", nil, []byte("limited resources"), 0)
} else {
proxywasm.SetUpstreamOverrideHost([]byte(targetPod.Address))
}
return types.ActionContinue
}
func (lb LeastBusyLoadBalancer) HandleHttpResponseHeaders(ctx wrapper.HttpContext) types.Action {
ctx.DontReadResponseBody()
return types.ActionContinue
}
func (lb LeastBusyLoadBalancer) HandleHttpStreamingResponseBody(ctx wrapper.HttpContext, data []byte, endOfStream bool) []byte {
return data
}
func (lb LeastBusyLoadBalancer) HandleHttpResponseBody(ctx wrapper.HttpContext, body []byte) types.Action {
return types.ActionContinue
}

View File

@@ -0,0 +1,203 @@
/*
Copyright 2025 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package scheduling
import (
"errors"
"math"
"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-load-balancer/least_busy/backend"
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
)
type Filter interface {
Name() string
Filter(req *LLMRequest, pods []*backend.PodMetrics) ([]*backend.PodMetrics, error)
}
// filter applies current filterFunc, and then recursively applies next filters depending success or
// failure of the current filterFunc.
// It can be used to construct a flow chart algorithm.
type filter struct {
name string
filter filterFunc
// nextOnSuccess filter will be applied after successfully applying the current filter.
// The filtered results will be passed to the next filter.
nextOnSuccess *filter
// nextOnFailure filter will be applied if current filter fails.
// The original input will be passed to the next filter.
nextOnFailure *filter
// nextOnSuccessOrFailure is a convenience field to configure the next filter regardless of the
// success or failure of the current filter.
// NOTE: When using nextOnSuccessOrFailure, both nextOnSuccess and nextOnFailure SHOULD be nil.
// However if that's not the case, nextOnSuccess and nextOnFailure will be used, instead of
// nextOnSuccessOrFailure, in the success and failure scenarios, respectively.
nextOnSuccessOrFailure *filter
// callbacks api.FilterCallbackHandler
}
func (f *filter) Name() string {
if f == nil {
return "nil"
}
return f.name
}
func (f *filter) Filter(req *LLMRequest, pods []*backend.PodMetrics) ([]*backend.PodMetrics, error) {
proxywasm.LogDebugf("Running filter %q on request %v with %v pods", f.name, req, len(pods))
filtered, err := f.filter(req, pods)
next := f.nextOnSuccessOrFailure
if err == nil && len(filtered) > 0 {
if f.nextOnSuccess == nil && f.nextOnSuccessOrFailure == nil {
// No succeeding filters to run, return.
return filtered, err
}
if f.nextOnSuccess != nil {
next = f.nextOnSuccess
}
// On success, pass the filtered result to the next filter.
return next.Filter(req, filtered)
} else {
if f.nextOnFailure == nil && f.nextOnSuccessOrFailure == nil {
// No succeeding filters to run, return.
return filtered, err
}
if f.nextOnFailure != nil {
next = f.nextOnFailure
}
// On failure, pass the initial set of pods to the next filter.
return next.Filter(req, pods)
}
}
// filterFunc filters a set of input pods to a subset.
type filterFunc func(req *LLMRequest, pods []*backend.PodMetrics) ([]*backend.PodMetrics, error)
// toFilterFunc is a helper function to convert a per pod filter func to the FilterFunc.
func toFilterFunc(pp podPredicate) filterFunc {
return func(req *LLMRequest, pods []*backend.PodMetrics) ([]*backend.PodMetrics, error) {
filtered := []*backend.PodMetrics{}
for _, pod := range pods {
pass := pp(req, pod)
if pass {
filtered = append(filtered, pod)
}
}
if len(filtered) == 0 {
return nil, errors.New("no pods left")
}
return filtered, nil
}
}
// leastQueuingFilterFunc finds the max and min queue size of all pods, divides the whole range
// (max-min) by the number of pods, and finds the pods that fall into the first range.
// The intuition is that if there are multiple pods that share similar queue size in the low range,
// we should consider them all instead of the absolute minimum one. This worked better than picking
// the least one as it gives more choices for the next filter, which on aggregate gave better
// results.
// TODO: Compare this strategy with other strategies such as top K.
func leastQueuingFilterFunc(req *LLMRequest, pods []*backend.PodMetrics) ([]*backend.PodMetrics, error) {
min := math.MaxInt
max := 0
filtered := []*backend.PodMetrics{}
for _, pod := range pods {
if pod.WaitingQueueSize <= min {
min = pod.WaitingQueueSize
}
if pod.WaitingQueueSize >= max {
max = pod.WaitingQueueSize
}
}
for _, pod := range pods {
if pod.WaitingQueueSize >= min && pod.WaitingQueueSize <= min+(max-min)/len(pods) {
filtered = append(filtered, pod)
}
}
return filtered, nil
}
func lowQueueingPodPredicate(_ *LLMRequest, pod *backend.PodMetrics) bool {
return pod.WaitingQueueSize < queueingThresholdLoRA
}
// leastKVCacheFilterFunc finds the max and min KV cache of all pods, divides the whole range
// (max-min) by the number of pods, and finds the pods that fall into the first range.
// The intuition is that if there are multiple pods that share similar KV cache in the low range, we
// should consider them all instead of the absolute minimum one. This worked better than picking the
// least one as it gives more choices for the next filter, which on aggregate gave better results.
// TODO: Compare this strategy with other strategies such as top K.
func leastKVCacheFilterFunc(req *LLMRequest, pods []*backend.PodMetrics) ([]*backend.PodMetrics, error) {
min := math.MaxFloat64
var max float64 = 0
filtered := []*backend.PodMetrics{}
for _, pod := range pods {
if pod.KVCacheUsagePercent <= min {
min = pod.KVCacheUsagePercent
}
if pod.KVCacheUsagePercent >= max {
max = pod.KVCacheUsagePercent
}
}
for _, pod := range pods {
if pod.KVCacheUsagePercent >= min && pod.KVCacheUsagePercent <= min+(max-min)/float64(len(pods)) {
filtered = append(filtered, pod)
}
}
return filtered, nil
}
// podPredicate is a filter function to check whether a pod is desired.
type podPredicate func(req *LLMRequest, pod *backend.PodMetrics) bool
// We consider serving an adapter low cost it the adapter is active in the model server, or the
// model server has room to load the adapter. The lowLoRACostPredicate ensures weak affinity by
// spreading the load of a LoRA adapter across multiple pods, avoiding "pinning" all requests to
// a single pod. This gave good performance in our initial benchmarking results in the scenario
// where # of lora slots > # of lora adapters.
func lowLoRACostPredicate(req *LLMRequest, pod *backend.PodMetrics) bool {
_, ok := pod.ActiveModels[req.Model]
return ok || len(pod.ActiveModels) < pod.MaxActiveModels
}
// loRAAffinityPredicate is a filter function to check whether a pod has affinity to the lora requested.
func loRAAffinityPredicate(req *LLMRequest, pod *backend.PodMetrics) bool {
_, ok := pod.ActiveModels[req.Model]
return ok
}
// canAcceptNewLoraPredicate is a filter function to check whether a pod has room to load the adapter.
func canAcceptNewLoraPredicate(req *LLMRequest, pod *backend.PodMetrics) bool {
return len(pod.ActiveModels) < pod.MaxActiveModels
}
func criticalRequestPredicate(req *LLMRequest, pod *backend.PodMetrics) bool {
return req.Critical
}
func noQueueAndLessThanKVCacheThresholdPredicate(queueThreshold int, kvCacheThreshold float64) podPredicate {
return func(req *LLMRequest, pod *backend.PodMetrics) bool {
return pod.WaitingQueueSize <= queueThreshold && pod.KVCacheUsagePercent <= kvCacheThreshold
}
}

View File

@@ -0,0 +1,158 @@
/*
Copyright 2025 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package scheduling implements request scheduling algorithms.
package scheduling
import (
"errors"
"fmt"
"math/rand"
"strings"
"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-load-balancer/least_busy/backend"
"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-load-balancer/least_busy/backend/vllm"
"github.com/prometheus/common/expfmt"
)
const (
// TODO(https://github.com/kubernetes-sigs/gateway-api-inference-extension/issues/16) Make this configurable.
kvCacheThreshold = 0.8
// TODO(https://github.com/kubernetes-sigs/gateway-api-inference-extension/issues/16) Make this configurable.
queueThresholdCritical = 5
// TODO(https://github.com/kubernetes-sigs/gateway-api-inference-extension/issues/16) Make this configurable.
// the threshold for queued requests to be considered low below which we can prioritize LoRA affinity.
// The value of 50 is arrived heuristicically based on experiments.
queueingThresholdLoRA = 50
)
var (
defaultFilter = &filter{
name: "critical request",
filter: toFilterFunc(criticalRequestPredicate),
nextOnSuccess: lowLatencyFilter,
nextOnFailure: sheddableRequestFilter,
}
// queueLoRAAndKVCacheFilter applied least queue -> low cost lora -> least KV Cache filter
queueLoRAAndKVCacheFilter = &filter{
name: "least queuing",
filter: leastQueuingFilterFunc,
nextOnSuccessOrFailure: &filter{
name: "low cost LoRA",
filter: toFilterFunc(lowLoRACostPredicate),
nextOnSuccessOrFailure: &filter{
name: "least KV cache percent",
filter: leastKVCacheFilterFunc,
},
},
}
// queueAndKVCacheFilter applies least queue followed by least KV Cache filter
queueAndKVCacheFilter = &filter{
name: "least queuing",
filter: leastQueuingFilterFunc,
nextOnSuccessOrFailure: &filter{
name: "least KV cache percent",
filter: leastKVCacheFilterFunc,
},
}
lowLatencyFilter = &filter{
name: "low queueing filter",
filter: toFilterFunc((lowQueueingPodPredicate)),
nextOnSuccess: &filter{
name: "affinity LoRA",
filter: toFilterFunc(loRAAffinityPredicate),
nextOnSuccess: queueAndKVCacheFilter,
nextOnFailure: &filter{
name: "can accept LoRA Adapter",
filter: toFilterFunc(canAcceptNewLoraPredicate),
nextOnSuccessOrFailure: queueAndKVCacheFilter,
},
},
nextOnFailure: queueLoRAAndKVCacheFilter,
}
sheddableRequestFilter = &filter{
// When there is at least one model server that's not queuing requests, and still has KV
// cache below a certain threshold, we consider this model server has capacity to handle
// a sheddable request without impacting critical requests.
name: "has capacity for sheddable requests",
filter: toFilterFunc(noQueueAndLessThanKVCacheThresholdPredicate(queueThresholdCritical, kvCacheThreshold)),
nextOnSuccess: queueLoRAAndKVCacheFilter,
// If all pods are queuing or running above the KVCache threshold, we drop the sheddable
// request to make room for critical requests.
nextOnFailure: &filter{
name: "drop request",
filter: func(req *LLMRequest, pods []*backend.PodMetrics) ([]*backend.PodMetrics, error) {
// api.LogDebugf("Dropping request %v", req)
return []*backend.PodMetrics{}, errors.New("dropping request due to limited backend resources")
},
},
}
)
func NewScheduler(pm []*backend.PodMetrics) *Scheduler {
return &Scheduler{
podMetrics: pm,
filter: defaultFilter,
}
}
type Scheduler struct {
podMetrics []*backend.PodMetrics
filter Filter
}
// Schedule finds the target pod based on metrics and the requested lora adapter.
func (s *Scheduler) Schedule(req *LLMRequest) (targetPod backend.Pod, err error) {
pods, err := s.filter.Filter(req, s.podMetrics)
if err != nil || len(pods) == 0 {
return backend.Pod{}, fmt.Errorf("failed to apply filter, resulted %v pods: %w", len(pods), err)
}
i := rand.Intn(len(pods))
return pods[i].Pod, nil
}
func GetScheduler(hostMetrics map[string]string) (*Scheduler, error) {
if len(hostMetrics) == 0 {
return nil, errors.New("backend is not support llm scheduling")
}
var pms []*backend.PodMetrics
for addr, metric := range hostMetrics {
parser := expfmt.TextParser{}
metricFamilies, err := parser.TextToMetricFamilies(strings.NewReader(metric))
if err != nil {
return nil, err
}
pm := &backend.PodMetrics{
Pod: backend.Pod{
Name: addr,
Address: addr,
},
Metrics: backend.Metrics{},
}
pm, err = vllm.PromToPodMetrics(metricFamilies, pm)
if err != nil {
return nil, err
}
pms = append(pms, pm)
}
return NewScheduler(pms), nil
}

View File

@@ -0,0 +1,7 @@
package scheduling
// LLMRequest is a structured representation of the fields we parse out of the LLMRequest body.
type LLMRequest struct {
Model string
Critical bool
}

View File

@@ -0,0 +1,82 @@
package main
import (
"fmt"
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
"github.com/higress-group/wasm-go/pkg/wrapper"
"github.com/tidwall/gjson"
global_least_request "github.com/alibaba/higress/plugins/wasm-go/extensions/ai-load-balancer/global_least_request"
least_busy "github.com/alibaba/higress/plugins/wasm-go/extensions/ai-load-balancer/least_busy"
prefix_cache "github.com/alibaba/higress/plugins/wasm-go/extensions/ai-load-balancer/prefix_cache"
)
func main() {}
func init() {
wrapper.SetCtx(
"ai-load-balancer",
wrapper.ParseConfig(parseConfig),
wrapper.ProcessRequestHeaders(onHttpRequestHeaders),
wrapper.ProcessRequestBody(onHttpRequestBody),
wrapper.ProcessResponseHeaders(onHttpResponseHeaders),
wrapper.ProcessStreamingResponseBody(onHttpStreamingResponseBody),
wrapper.ProcessResponseBody(onHttpResponseBody),
)
}
type LoadBalancer interface {
HandleHttpRequestHeaders(ctx wrapper.HttpContext) types.Action
HandleHttpRequestBody(ctx wrapper.HttpContext, body []byte) types.Action
HandleHttpResponseHeaders(ctx wrapper.HttpContext) types.Action
HandleHttpStreamingResponseBody(ctx wrapper.HttpContext, data []byte, endOfStream bool) []byte
HandleHttpResponseBody(ctx wrapper.HttpContext, body []byte) types.Action
}
type Config struct {
policy string
lb LoadBalancer
}
const (
LeastBusyLoadBalancerPolicy = "least_busy"
GlobalLeastRequestLoadBalancerPolicy = "global_least_request"
PrefixCache = "prefix_cache"
)
func parseConfig(json gjson.Result, config *Config) error {
config.policy = json.Get("lb_policy").String()
var err error
switch config.policy {
case LeastBusyLoadBalancerPolicy:
config.lb, err = least_busy.NewLeastBusyLoadBalancer(json.Get("lb_config"))
case GlobalLeastRequestLoadBalancerPolicy:
config.lb, err = global_least_request.NewGlobalLeastRequestLoadBalancer(json.Get("lb_config"))
case PrefixCache:
config.lb, err = prefix_cache.NewPrefixCacheLoadBalancer(json.Get("lb_config"))
default:
err = fmt.Errorf("lb_policy %s is not supported", config.policy)
}
return err
}
func onHttpRequestHeaders(ctx wrapper.HttpContext, config Config) types.Action {
return config.lb.HandleHttpRequestHeaders(ctx)
}
func onHttpRequestBody(ctx wrapper.HttpContext, config Config, body []byte) types.Action {
return config.lb.HandleHttpRequestBody(ctx, body)
}
func onHttpResponseHeaders(ctx wrapper.HttpContext, config Config) types.Action {
return config.lb.HandleHttpResponseHeaders(ctx)
}
func onHttpStreamingResponseBody(ctx wrapper.HttpContext, config Config, data []byte, endOfStream bool) []byte {
return config.lb.HandleHttpStreamingResponseBody(ctx, data, endOfStream)
}
func onHttpResponseBody(ctx wrapper.HttpContext, config Config, body []byte) types.Action {
return config.lb.HandleHttpResponseBody(ctx, body)
}

View File

@@ -0,0 +1,302 @@
package prefix_cache
import (
"crypto/sha1"
"encoding/hex"
"errors"
"fmt"
"math/rand"
"strings"
"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-load-balancer/utils"
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
"github.com/higress-group/wasm-go/pkg/log"
"github.com/higress-group/wasm-go/pkg/wrapper"
"github.com/tidwall/gjson"
"github.com/tidwall/resp"
)
const (
RedisKeyFormat = "higress:global_least_request_table:%s:%s"
RedisLua = `-- hex string => bytes
local function hex_to_bytes(hex)
local bytes = {}
for i = 1, #hex, 2 do
local byte_str = hex:sub(i, i+1)
local byte_val = tonumber(byte_str, 16)
table.insert(bytes, byte_val)
end
return bytes
end
-- bytes => hex string
local function bytes_to_hex(bytes)
local result = ""
for _, byte in ipairs(bytes) do
result = result .. string.format("%02X", byte)
end
return result
end
-- byte XOR
local function byte_xor(a, b)
local result = 0
for i = 0, 7 do
local bit_val = 2^i
if ((a % (bit_val * 2)) >= bit_val) ~= ((b % (bit_val * 2)) >= bit_val) then
result = result + bit_val
end
end
return result
end
-- hex string XOR
local function hex_xor(a, b)
if #a ~= #b then
error("Hex strings must be of equal length, first is " .. a .. " second is " .. b)
end
local a_bytes = hex_to_bytes(a)
local b_bytes = hex_to_bytes(b)
local result_bytes = {}
for i = 1, #a_bytes do
table.insert(result_bytes, byte_xor(a_bytes[i], b_bytes[i]))
end
return bytes_to_hex(result_bytes)
end
-- check host whether healthy
local function is_healthy(addr)
for i = 4, #KEYS do
if addr == KEYS[i] then
return true
end
end
return false
end
local target = ""
local key = ""
local current_key = ""
local count = #ARGV
local ttl = KEYS[1]
local hset_key = KEYS[2]
local default_target = KEYS[3]
if count == 0 then
return target
end
-- find longest prefix
local index = 1
while index <= count do
if current_key == "" then
current_key = ARGV[index]
else
current_key = hex_xor(current_key, ARGV[index])
end
if redis.call("EXISTS", current_key) == 1 then
key = current_key
local tmp_target = redis.call("GET", key)
if not is_healthy(tmp_target) then
break
end
target = tmp_target
-- update ttl for exist keys
redis.call("EXPIRE", key, ttl)
index = index + 1
else
break
end
end
-- global least request
if target == "" then
index = 1
local current_count = 0
target = default_target
if redis.call('HEXISTS', hset_key, target) ~= 0 then
current_count = redis.call('HGET', hset_key, target)
local hash = redis.call('HGETALL', hset_key)
for i = 1, #hash, 2 do
local addr = hash[i]
local count = hash[i+1]
if count < current_count and is_healthy(addr) then
target = addr
current_count = count
end
end
end
end
-- update request count
redis.call("HINCRBY", hset_key, target, 1)
-- add tree-path
while index <= count do
if key == "" then
key = ARGV[index]
else
key = hex_xor(key, ARGV[index])
end
redis.call("SET", key, target)
redis.call("EXPIRE", key, ttl)
index = index + 1
end
return target`
)
type PrefixCacheLoadBalancer struct {
redisClient wrapper.RedisClient
redisKeyTTL int
}
func NewPrefixCacheLoadBalancer(json gjson.Result) (PrefixCacheLoadBalancer, error) {
lb := PrefixCacheLoadBalancer{}
serviceFQDN := json.Get("serviceFQDN").String()
servicePort := json.Get("servicePort").Int()
if serviceFQDN == "" || servicePort == 0 {
log.Errorf("invalid redis service, serviceFQDN: %s, servicePort: %d", serviceFQDN, servicePort)
return lb, errors.New("invalid redis service config")
}
lb.redisClient = wrapper.NewRedisClusterClient(wrapper.FQDNCluster{
FQDN: serviceFQDN,
Port: servicePort,
})
username := json.Get("username").String()
password := json.Get("password").String()
timeout := json.Get("timeout").Int()
if timeout == 0 {
timeout = 3000
}
// database default is 0
database := json.Get("database").Int()
if json.Get("redisKeyTTL").Int() == 0 {
lb.redisKeyTTL = int(json.Get("redisKeyTTL").Int())
} else {
lb.redisKeyTTL = 1800
}
return lb, lb.redisClient.Init(username, password, int64(timeout), wrapper.WithDataBase(int(database)))
}
func (lb PrefixCacheLoadBalancer) HandleHttpRequestHeaders(ctx wrapper.HttpContext) types.Action {
// If return types.ActionContinue, SetUpstreamOverrideHost will not take effect
return types.HeaderStopIteration
}
func (lb PrefixCacheLoadBalancer) HandleHttpRequestBody(ctx wrapper.HttpContext, body []byte) types.Action {
var err error
routeName, err := utils.GetRouteName()
if err != nil || routeName == "" {
ctx.SetContext("error", true)
return types.ActionContinue
} else {
ctx.SetContext("routeName", routeName)
}
clusterName, err := utils.GetClusterName()
if err != nil || clusterName == "" {
ctx.SetContext("error", true)
return types.ActionContinue
} else {
ctx.SetContext("clusterName", clusterName)
}
hostInfos, err := proxywasm.GetUpstreamHosts()
if err != nil {
ctx.SetContext("error", true)
log.Error("get upstream cluster endpoints failed")
return types.ActionContinue
}
healthyHosts := []string{}
for _, hostInfo := range hostInfos {
if gjson.Get(hostInfo[1], "health_status").String() == "Healthy" {
healthyHosts = append(healthyHosts, hostInfo[0])
}
}
if len(healthyHosts) == 0 {
log.Info("upstream cluster has no healthy endpoints")
return types.ActionContinue
}
defaultHost := healthyHosts[rand.Intn(len(healthyHosts))]
params := []interface{}{}
rawStr := ""
messages := gjson.GetBytes(body, "messages").Array()
for index, obj := range messages {
if !obj.Get("role").Exists() || !obj.Get("content").Exists() {
ctx.SetContext("error", true)
log.Info("cannot extract role or content from request body, skip llm load balancing")
return types.ActionContinue
}
role := obj.Get("role").String()
content := obj.Get("content").String()
rawStr += role + ":" + content
if role == "user" || index == len(messages)-1 {
sha1Str := computeSHA1(rawStr)
params = append(params, sha1Str)
rawStr = ""
}
}
if len(params) == 0 {
return types.ActionContinue
}
keys := []interface{}{lb.redisKeyTTL, fmt.Sprintf(RedisKeyFormat, routeName, clusterName), defaultHost}
for _, v := range healthyHosts {
keys = append(keys, v)
}
err = lb.redisClient.Eval(RedisLua, len(keys), keys, params, func(response resp.Value) {
defer proxywasm.ResumeHttpRequest()
if err := response.Error(); err != nil {
ctx.SetContext("error", true)
log.Errorf("Redis eval failed: %+v", err)
return
}
hostSelected := response.String()
if err := proxywasm.SetUpstreamOverrideHost([]byte(hostSelected)); err != nil {
ctx.SetContext("error", true)
log.Errorf("override upstream host failed, fallback to default lb policy, error informations: %+v", err)
}
log.Debugf("host_selected: %s", hostSelected)
ctx.SetContext("host_selected", hostSelected)
})
if err != nil {
ctx.SetContext("error", true)
return types.ActionContinue
}
return types.ActionPause
}
func (lb PrefixCacheLoadBalancer) HandleHttpResponseHeaders(ctx wrapper.HttpContext) types.Action {
return types.ActionContinue
}
func (lb PrefixCacheLoadBalancer) HandleHttpStreamingResponseBody(ctx wrapper.HttpContext, data []byte, endOfStream bool) []byte {
if endOfStream {
isErr, _ := ctx.GetContext("error").(bool)
if !isErr {
routeName, _ := ctx.GetContext("routeName").(string)
clusterName, _ := ctx.GetContext("clusterName").(string)
host_selected, _ := ctx.GetContext("host_selected").(string)
if host_selected == "" {
log.Errorf("get host_selected failed")
} else {
lb.redisClient.HIncrBy(fmt.Sprintf(RedisKeyFormat, routeName, clusterName), host_selected, -1, nil)
}
}
}
return data
}
func (lb PrefixCacheLoadBalancer) HandleHttpResponseBody(ctx wrapper.HttpContext, body []byte) types.Action {
return types.ActionContinue
}
func computeSHA1(data string) string {
hasher := sha1.New()
hasher.Write([]byte(data))
return strings.ToUpper(hex.EncodeToString(hasher.Sum(nil)))
}

View File

@@ -0,0 +1,19 @@
package utils
import "github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
func GetRouteName() (string, error) {
if raw, err := proxywasm.GetProperty([]string{"route_name"}); err != nil {
return "", err
} else {
return string(raw), nil
}
}
func GetClusterName() (string, error) {
if raw, err := proxywasm.GetProperty([]string{"cluster_name"}); err != nil {
return "", err
} else {
return string(raw), nil
}
}

View File

@@ -30,19 +30,20 @@ description: AI 代理插件配置参考
`provider`的配置字段说明如下:
| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
| ---------------------- | ---------------------- | -------- | ------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `type` | string | 必填 | - | AI 服务提供商名称 |
| `apiTokens` | array of string | 非必填 | - | 用于在访问 AI 服务时进行认证的令牌。如果配置了多个 token插件会在请求时随机进行选择。部分服务提供商只支持配置一个 token。 |
| `timeout` | number | 非必填 | - | 访问 AI 服务的超时时间。单位为毫秒。默认值为 120000即 2 分钟。此项配置目前仅用于获取上下文信息,并不影响实际转发大模型请求。 |
| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
| ---------------------- | ---------------------- | -------- | ------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `type` | string | 必填 | - | AI 服务提供商名称 |
| `apiTokens` | array of string | 非必填 | - | 用于在访问 AI 服务时进行认证的令牌。如果配置了多个 token插件会在请求时随机进行选择。部分服务提供商只支持配置一个 token。 |
| `timeout` | number | 非必填 | - | 访问 AI 服务的超时时间。单位为毫秒。默认值为 120000即 2 分钟。此项配置目前仅用于获取上下文信息,并不影响实际转发大模型请求。 |
| `modelMapping` | map of string | 非必填 | - | AI 模型映射表,用于将请求中的模型名称映射为服务提供商支持模型名称。<br/>1. 支持前缀匹配。例如用 "gpt-3-\*" 匹配所有名称以“gpt-3-”开头的模型;<br/>2. 支持使用 "\*" 为键来配置通用兜底映射关系;<br/>3. 如果映射的目标名称为空字符串 "",则表示保留原模型名称。<br/>4. 支持以 `~` 前缀使用正则匹配。例如用 "~gpt(.\*)" 匹配所有以 "gpt" 开头的模型并支持在目标模型中使用 capture group 引用匹配到的内容。示例: "~gpt(.\*): openai/gpt\$1" |
| `protocol` | string | 非必填 | - | 插件对外提供的 API 接口契约。目前支持以下取值openai默认值使用 OpenAI 的接口契约、original使用目标服务提供商的原始接口契约 |
| `context` | object | 非必填 | - | 配置 AI 对话上下文信息 |
| `customSettings` | array of customSetting | 非必填 | - | 为 AI 请求指定覆盖或者填充参数 |
| `failover` | object | 非必填 | - | 配置 apiToken 的 failover 策略,当 apiToken 不可用时,将其移出 apiToken 列表,待健康检测通过后重新添加回 apiToken 列表 |
| `retryOnFailure` | object | 非必填 | - | 当请求失败时立即进行重试 |
| `reasoningContentMode` | string | 非必填 | - | 如何处理大模型服务返回的推理内容。目前支持以下取值passthrough正常输出推理内容、ignore不输出推理内容、concat将推理内容拼接在常规输出内容之前。默认为 passthrough。仅支持通义千问服务。 |
| `capabilities` | map of string | 非必填 | - | 部分 provider 的部分 ai 能力原生兼容 openai/v1 格式,不需要重写,可以直接转发,通过此配置项指定来开启转发, key 表示的是采用的厂商协议能力values 表示的真实的厂商该能力的 api path, 厂商协议能力当前支持: openai/v1/chatcompletions, openai/v1/embeddings, openai/v1/imagegeneration, openai/v1/audiospeech, cohere/v1/rerank |
| `protocol` | string | 非必填 | - | 插件对外提供的 API 接口契约。目前支持以下取值openai默认值使用 OpenAI 的接口契约、original使用目标服务提供商的原始接口契约 |
| `context` | object | 非必填 | - | 配置 AI 对话上下文信息 |
| `customSettings` | array of customSetting | 非必填 | - | 为 AI 请求指定覆盖或者填充参数 |
| `failover` | object | 非必填 | - | 配置 apiToken 的 failover 策略,当 apiToken 不可用时,将其移出 apiToken 列表,待健康检测通过后重新添加回 apiToken 列表 |
| `retryOnFailure` | object | 非必填 | - | 当请求失败时立即进行重试 |
| `reasoningContentMode` | string | 非必填 | - | 如何处理大模型服务返回的推理内容。目前支持以下取值passthrough正常输出推理内容、ignore不输出推理内容、concat将推理内容拼接在常规输出内容之前。默认为 passthrough。仅支持通义千问服务。 |
| `capabilities` | map of string | 非必填 | - | 部分 provider 的部分 ai 能力原生兼容 openai/v1 格式,不需要重写,可以直接转发,通过此配置项指定来开启转发, key 表示的是采用的厂商协议能力values 表示的真实的厂商该能力的 api path, 厂商协议能力当前支持: openai/v1/chatcompletions, openai/v1/embeddings, openai/v1/imagegeneration, openai/v1/audiospeech, cohere/v1/rerank |
| `subPath` | string | 非必填 | - | 如果配置了subPath将会先移除请求path中该前缀再进行后续处理 |
`context`的配置字段说明如下:
@@ -272,18 +273,19 @@ Google Vertex AI 所对应的 type 为 vertex。它特有的配置字段如下
| `vertexRegion` | string | 必填 | - | Google Cloud 区域(如 us-central1, europe-west4 等),用于构建 Vertex API 地址 |
| `vertexProjectId` | string | 必填 | - | Google Cloud 项目 ID用于标识目标 GCP 项目 |
| `vertexAuthServiceName` | string | 必填 | - | 用于 OAuth2 认证的服务名称该服务为了访问oauth2.googleapis.com |
| `vertexGeminiSafetySetting` | map of string | 非必填 | - | Gemini 模型的内容安全过滤设置。 |
| `vertexTokenRefreshAhead` | number | 非必填 | - | Vertex access token刷新提前时间(单位秒) |
| `geminiSafetySetting` | map of string | 非必填 | - | Gemini AI 内容过滤和安全级别设定。参考[Safety settings](https://ai.google.dev/gemini-api/docs/safety-settings) |
| `vertexTokenRefreshAhead` | number | 非必填 | - | Vertex access token刷新提前时间(单位秒) |
#### AWS Bedrock
AWS Bedrock 所对应的 type 为 bedrock。它特有的配置字段如下
| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
|----------------|--------|------|-----|------------------------------|
| `awsAccessKey` | string | 必填 | - | AWS Access Key用于身份认证 |
| `awsSecretKey` | string | 必填 | - | AWS Secret Access Key用于身份认证 |
| `awsRegion` | string | 必填 | - | AWS 区域例如us-east-1 |
| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
|---------------------------|--------|------|-----|------------------------------|
| `awsAccessKey` | string | 必填 | - | AWS Access Key用于身份认证 |
| `awsSecretKey` | string | 必填 | - | AWS Secret Access Key用于身份认证 |
| `awsRegion` | string | 必填 | - | AWS 区域例如us-east-1 |
| `bedrockAdditionalFields` | map | 非必填 | - | Bedrock 额外模型请求参数 |
## 用法示例
@@ -1428,7 +1430,7 @@ provider:
}
```
### 使用 OpenAI 协议代理 gemini 服务
### 使用 OpenAI 协议代理 Gemini 服务
**配置信息**
@@ -1724,6 +1726,8 @@ provider:
awsAccessKey: "YOUR_AWS_ACCESS_KEY_ID"
awsSecretKey: "YOUR_AWS_SECRET_ACCESS_KEY"
awsRegion: "YOUR_AWS_REGION"
bedrockAdditionalFields:
top_k: 200
```
**请求示例**

View File

@@ -29,15 +29,16 @@ Plugin execution priority: `100`
**Details for the `provider` configuration fields:**
| Name | Data Type | Requirement | Default | Description |
| -------------- | --------------- | -------- | ------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `type` | string | Required | - | Name of the AI service provider |
| `apiTokens` | array of string | Optional | - | Tokens used for authentication when accessing AI services. If multiple tokens are configured, the plugin randomly selects one for each request. Some service providers only support configuring a single token. |
| `timeout` | number | Optional | - | Timeout for accessing AI services, in milliseconds. The default value is 120000, which equals 2 minutes. Only used when retrieving context data. Won't affect the request forwarded to the LLM upstream. |
| `modelMapping` | map of string | Optional | - | Mapping table for AI models, used to map model names in requests to names supported by the service provider.<br/>1. Supports prefix matching. For example, "gpt-3-\*" matches all model names starting with “gpt-3-”;<br/>2. Supports using "\*" as a key for a general fallback mapping;<br/>3. If the mapped target name is an empty string "", the original model name is preserved. |
| `protocol` | string | Optional | - | API contract provided by the plugin. Currently supports the following values: openai (default, uses OpenAI's interface contract), original (uses the raw interface contract of the target service provider) |
| `context` | object | Optional | - | Configuration for AI conversation context information |
| `customSettings` | array of customSetting | Optional | - | Specifies overrides or fills parameters for AI requests |
| Name | Data Type | Requirement | Default | Description |
| -------------- | --------------- | -------- | ------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `type` | string | Required | - | Name of the AI service provider |
| `apiTokens` | array of string | Optional | - | Tokens used for authentication when accessing AI services. If multiple tokens are configured, the plugin randomly selects one for each request. Some service providers only support configuring a single token. |
| `timeout` | number | Optional | - | Timeout for accessing AI services, in milliseconds. The default value is 120000, which equals 2 minutes. Only used when retrieving context data. Won't affect the request forwarded to the LLM upstream. |
| `modelMapping` | map of string | Optional | - | Mapping table for AI models, used to map model names in requests to names supported by the service provider.<br/>1. Supports prefix matching. For example, "gpt-3-\*" matches all model names starting with “gpt-3-”;<br/>2. Supports using "\*" as a key for a general fallback mapping;<br/>3. If the mapped target name is an empty string "", the original model name is preserved. |
| `protocol` | string | Optional | - | API contract provided by the plugin. Currently supports the following values: openai (default, uses OpenAI's interface contract), original (uses the raw interface contract of the target service provider) |
| `context` | object | Optional | - | Configuration for AI conversation context information |
| `customSettings` | array of customSetting | Optional | - | Specifies overrides or fills parameters for AI requests |
| `subPath` | string | Optional | - | If subPath is configured, the prefix will be removed from the request path before further processing. |
**Details for the `context` configuration fields:**
@@ -224,11 +225,12 @@ For Vertex, the corresponding `type` is `vertex`. Its unique configuration field
For AWS Bedrock, the corresponding `type` is `bedrock`. Its unique configuration field is:
| Name | Data Type | Requirement | Default | Description |
|----------------|-----------|-------------|---------|-----------------------------------------------|
| `awsAccessKey` | string | Required | - | AWS Access Key used for authentication |
| `awsSecretKey` | string | Required | - | AWS Secret Access Key used for authentication |
| `awsRegion` | string | Required | - | AWS region, e.g., us-east-1 |
| Name | Data Type | Requirement | Default | Description |
|---------------------------|-----------|-------------|---------|---------------------------------------------------------|
| `awsAccessKey` | string | Required | - | AWS Access Key used for authentication |
| `awsSecretKey` | string | Required | - | AWS Secret Access Key used for authentication |
| `awsRegion` | string | Required | - | AWS region, e.g., us-east-1 |
| `bedrockAdditionalFields` | map | Optional | - | Additional inference parameters that the model supports |
## Usage Examples
@@ -1499,6 +1501,8 @@ provider:
awsAccessKey: "YOUR_AWS_ACCESS_KEY_ID"
awsSecretKey: "YOUR_AWS_SECRET_ACCESS_KEY"
awsRegion: "YOUR_AWS_REGION"
bedrockAdditionalFields:
top_k: 200
```
**Request Example**

View File

@@ -161,7 +161,8 @@ func onHttpRequestBody(ctx wrapper.HttpContext, pluginConfig config.PluginConfig
if settingErr != nil {
log.Errorf("failed to replace request body by custom settings: %v", settingErr)
}
if providerConfig.IsOpenAIProtocol() {
// 仅 /v1/chat/completions 和 /v1/completions 接口支持 stream_options 参数
if providerConfig.IsOpenAIProtocol() && (apiName == provider.ApiNameChatCompletion || apiName == provider.ApiNameCompletion) {
newBody = normalizeOpenAiRequestBody(newBody)
}
log.Debugf("[onHttpRequestBody] newBody=%s", newBody)
@@ -315,7 +316,7 @@ func onHttpResponseBody(ctx wrapper.HttpContext, pluginConfig config.PluginConfi
func normalizeOpenAiRequestBody(body []byte) []byte {
var err error
// Default setting include_usage.
if gjson.GetBytes(body, "stream").Bool() {
if gjson.GetBytes(body, "stream").Bool() && (!gjson.GetBytes(body, "stream_options").Exists() || !gjson.GetBytes(body, "stream_options.include_usage").Exists()) {
body, err = sjson.SetBytes(body, "stream_options.include_usage", true)
if err != nil {
log.Errorf("set include_usage failed, err:%s", err)
@@ -383,28 +384,31 @@ func getApiName(path string) provider.ApiName {
return provider.ApiNameFineTuningJobs
}
if util.RegRetrieveFineTuningJobPath.MatchString(path) {
return provider.ApiNameFineTuningRetrieveJob
return provider.ApiNameRetrieveFineTuningJob
}
if util.RegRetrieveFineTuningJobEventsPath.MatchString(path) {
return provider.PathOpenAIFineTuningJobEvents
return provider.ApiNameFineTuningJobEvents
}
if util.RegRetrieveFineTuningJobCheckpointsPath.MatchString(path) {
return provider.PathOpenAIFineTuningJobCheckpoints
return provider.ApiNameFineTuningJobCheckpoints
}
if util.RegCancelFineTuningJobPath.MatchString(path) {
return provider.ApiNameFineTuningCancelJob
return provider.ApiNameCancelFineTuningJob
}
if util.RegResumeFineTuningJobPath.MatchString(path) {
return provider.ApiNameFineTuningResumeJob
return provider.ApiNameResumeFineTuningJob
}
if util.RegPauseFineTuningJobPath.MatchString(path) {
return provider.ApiNameFineTuningPauseJob
return provider.ApiNamePauseFineTuningJob
}
if util.RegFineTuningCheckpointPermissionPath.MatchString(path) {
return provider.ApiNameFineTuningCheckpointPermissions
}
if util.RegDeleteFineTuningCheckpointPermissionPath.MatchString(path) {
return provider.PathOpenAIFineDeleteTuningCheckpointPermission
return provider.ApiNameDeleteFineTuningCheckpointPermission
}
if strings.HasSuffix(path, "/v1/responses") {
return provider.ApiNameResponses
}
// cohere style
if strings.HasSuffix(path, "/v1/rerank") {

View File

@@ -723,21 +723,34 @@ func (b *bedrockProvider) onChatCompletionRequestBody(ctx wrapper.HttpContext, b
func (b *bedrockProvider) buildBedrockTextGenerationRequest(origRequest *chatCompletionRequest, headers http.Header) ([]byte, error) {
messages := make([]bedrockMessage, 0, len(origRequest.Messages))
for i := range origRequest.Messages {
messages = append(messages, chatMessage2BedrockMessage(origRequest.Messages[i]))
systemMessages := make([]systemContentBlock, 0)
for _, msg := range origRequest.Messages {
if msg.Role == roleSystem {
systemMessages = append(systemMessages, systemContentBlock{Text: msg.StringContent()})
} else {
messages = append(messages, chatMessage2BedrockMessage(msg))
}
}
request := &bedrockTextGenRequest{
System: systemMessages,
Messages: messages,
InferenceConfig: bedrockInferenceConfig{
MaxTokens: origRequest.MaxTokens,
Temperature: origRequest.Temperature,
TopP: origRequest.TopP,
},
AdditionalModelRequestFields: map[string]interface{}{},
AdditionalModelRequestFields: make(map[string]interface{}),
PerformanceConfig: PerformanceConfiguration{
Latency: "standard",
},
}
for key, value := range b.config.bedrockAdditionalFields {
request.AdditionalModelRequestFields[key] = value
}
requestBytes, err := json.Marshal(request)
b.setAuthHeaders(requestBytes, headers)
return requestBytes, err

View File

@@ -6,8 +6,11 @@ import (
"strings"
"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy/util"
"github.com/alibaba/higress/plugins/wasm-go/pkg/log"
"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
"github.com/tidwall/gjson"
"github.com/tidwall/sjson"
)
const (
@@ -15,6 +18,7 @@ const (
doubaoChatCompletionPath = "/api/v3/chat/completions"
doubaoEmbeddingsPath = "/api/v3/embeddings"
doubaoImageGenerationPath = "/api/v3/images/generations"
doubaoResponsesPath = "/api/v3/responses"
)
type doubaoProviderInitializer struct{}
@@ -31,6 +35,7 @@ func (m *doubaoProviderInitializer) DefaultCapabilities() map[string]string {
string(ApiNameChatCompletion): doubaoChatCompletionPath,
string(ApiNameEmbeddings): doubaoEmbeddingsPath,
string(ApiNameImageGeneration): doubaoImageGenerationPath,
string(ApiNameResponses): doubaoResponsesPath,
}
}
@@ -70,6 +75,32 @@ func (m *doubaoProvider) TransformRequestHeaders(ctx wrapper.HttpContext, apiNam
headers.Del("Content-Length")
}
func (m *doubaoProvider) TransformRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte) ([]byte, error) {
var err error
switch apiName {
case ApiNameResponses:
// 移除火山 responses 接口暂时不支持的参数
// 参考: https://www.volcengine.com/docs/82379/1569618
// TODO: 这里应该用 DTO 处理
for _, param := range []string{"parallel_tool_calls", "tool_choice"} {
body, err = sjson.DeleteBytes(body, param)
if err != nil {
log.Warnf("[doubao] failed to delete %s in request body, err: %v", param, err)
}
}
case ApiNameImageGeneration:
// 火山生图接口默认会带上水印,但 OpenAI 接口不支持此参数
// 参考: https://www.volcengine.com/docs/82379/1541523
if res := gjson.GetBytes(body, "watermark"); !res.Exists() {
body, err = sjson.SetBytes(body, "watermark", false)
if err != nil {
log.Warnf("[doubao] failed to set watermark in request body, err: %v", err)
}
}
}
return m.config.defaultTransformRequestBody(ctx, apiName, body)
}
func (m *doubaoProvider) GetApiName(path string) ApiName {
if strings.Contains(path, doubaoChatCompletionPath) {
return ApiNameChatCompletion
@@ -80,5 +111,8 @@ func (m *doubaoProvider) GetApiName(path string) ApiName {
if strings.Contains(path, doubaoImageGenerationPath) {
return ApiNameImageGeneration
}
if strings.Contains(path, doubaoResponsesPath) {
return ApiNameResponses
}
return ""
}

View File

@@ -42,12 +42,12 @@ func (m *openaiProviderInitializer) DefaultCapabilities() map[string]string {
string(ApiNameCancelBatch): PathOpenAICancelBatch,
string(ApiNameResponses): PathOpenAIResponses,
string(ApiNameFineTuningJobs): PathOpenAIFineTuningJobs,
string(ApiNameFineTuningRetrieveJob): PathOpenAIFineTuningRetrieveJob,
string(ApiNameRetrieveFineTuningJob): PathOpenAIRetrieveFineTuningJob,
string(ApiNameFineTuningJobEvents): PathOpenAIFineTuningJobEvents,
string(ApiNameFineTuningJobCheckpoints): PathOpenAIFineTuningJobCheckpoints,
string(ApiNameFineTuningCancelJob): PathOpenAIFineTuningCancelJob,
string(ApiNameFineTuningResumeJob): PathOpenAIFineTuningResumeJob,
string(ApiNameFineTuningPauseJob): PathOpenAIFineTuningPauseJob,
string(ApiNameCancelFineTuningJob): PathOpenAICancelFineTuningJob,
string(ApiNameResumeFineTuningJob): PathOpenAIResumeFineTuningJob,
string(ApiNamePauseFineTuningJob): PathOpenAIPauseFineTuningJob,
string(ApiNameFineTuningCheckpointPermissions): PathOpenAIFineTuningCheckpointPermissions,
string(ApiNameDeleteFineTuningCheckpointPermission): PathOpenAIFineDeleteTuningCheckpointPermission,
}
@@ -120,9 +120,7 @@ func (m *openaiProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiNa
func (m *openaiProvider) TransformRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, headers http.Header) {
if m.isDirectCustomPath {
util.OverwriteRequestPathHeader(headers, m.customPath)
}
if apiName != "" {
} else if apiName != "" {
util.OverwriteRequestPathHeaderByCapability(headers, string(apiName), m.config.capabilities)
}

View File

@@ -43,12 +43,12 @@ const (
ApiNameModels ApiName = "openai/v1/models"
ApiNameResponses ApiName = "openai/v1/responses"
ApiNameFineTuningJobs ApiName = "openai/v1/fine-tuningjobs"
ApiNameFineTuningRetrieveJob ApiName = "openai/v1/retrievefine-tuningjob"
ApiNameRetrieveFineTuningJob ApiName = "openai/v1/retrievefine-tuningjob"
ApiNameFineTuningJobEvents ApiName = "openai/v1/fine-tuningjobsevents"
ApiNameFineTuningJobCheckpoints ApiName = "openai/v1/fine-tuningjobcheckpoints"
ApiNameFineTuningCancelJob ApiName = "openai/v1/cancelfine-tuningjob"
ApiNameFineTuningResumeJob ApiName = "openai/v1/resumefine-tuningjob"
ApiNameFineTuningPauseJob ApiName = "openai/v1/pausefine-tuningjob"
ApiNameCancelFineTuningJob ApiName = "openai/v1/cancelfine-tuningjob"
ApiNameResumeFineTuningJob ApiName = "openai/v1/resumefine-tuningjob"
ApiNamePauseFineTuningJob ApiName = "openai/v1/pausefine-tuningjob"
ApiNameFineTuningCheckpointPermissions ApiName = "openai/v1/fine-tuningjobcheckpointpermissions"
ApiNameDeleteFineTuningCheckpointPermission ApiName = "openai/v1/deletefine-tuningjobcheckpointpermission"
@@ -68,12 +68,12 @@ const (
PathOpenAIAudioSpeech = "/v1/audio/speech"
PathOpenAIResponses = "/v1/responses"
PathOpenAIFineTuningJobs = "/v1/fine_tuning/jobs"
PathOpenAIFineTuningRetrieveJob = "/v1/fine_tuning/jobs/{fine_tuning_job_id}"
PathOpenAIRetrieveFineTuningJob = "/v1/fine_tuning/jobs/{fine_tuning_job_id}"
PathOpenAIFineTuningJobEvents = "/v1/fine_tuning/jobs/{fine_tuning_job_id}/events"
PathOpenAIFineTuningJobCheckpoints = "/v1/fine_tuning/jobs/{fine_tuning_job_id}/checkpoints"
PathOpenAIFineTuningCancelJob = "/v1/fine_tuning/jobs/{fine_tuning_job_id}/cancel"
PathOpenAIFineTuningResumeJob = "/v1/fine_tuning/jobs/{fine_tuning_job_id}/resume"
PathOpenAIFineTuningPauseJob = "/v1/fine_tuning/jobs/{fine_tuning_job_id}/pause"
PathOpenAICancelFineTuningJob = "/v1/fine_tuning/jobs/{fine_tuning_job_id}/cancel"
PathOpenAIResumeFineTuningJob = "/v1/fine_tuning/jobs/{fine_tuning_job_id}/resume"
PathOpenAIPauseFineTuningJob = "/v1/fine_tuning/jobs/{fine_tuning_job_id}/pause"
PathOpenAIFineTuningCheckpointPermissions = "/v1/fine_tuning/checkpoints/{fine_tuned_model_checkpoint}/permissions"
PathOpenAIFineDeleteTuningCheckpointPermission = "/v1/fine_tuning/checkpoints/{fine_tuned_model_checkpoint}/permissions/{permission_id}"
@@ -296,6 +296,9 @@ type ProviderConfig struct {
// @Title zh-CN Amazon Bedrock Region
// @Description zh-CN 仅适用于Amazon Bedrock服务访问
awsRegion string `required:"false" yaml:"awsRegion" json:"awsRegion"`
// @Title zh-CN Amazon Bedrock 额外模型请求参数
// @Description zh-CN 仅适用于Amazon Bedrock服务用于设置模型特定的推理参数
bedrockAdditionalFields map[string]interface{} `required:"false" yaml:"bedrockAdditionalFields" json:"bedrockAdditionalFields"`
// @Title zh-CN minimax API type
// @Description zh-CN 仅适用于 minimax 服务。minimax API 类型v2 和 pro 中选填一项,默认值为 v2
minimaxApiType string `required:"false" yaml:"minimaxApiType" json:"minimaxApiType"`
@@ -355,6 +358,8 @@ type ProviderConfig struct {
// @Title zh-CN 额外支持的ai能力
// @Description zh-CN 开放的ai能力和urlpath映射例如 {"openai/v1/chatcompletions": "/v1/chat/completions"}
capabilities map[string]string
// @Title zh-CN 如果配置了subPath将会先移除请求path中该前缀再进行后续处理
subPath string `required:"false" yaml:"subPath" json:"subPath"`
}
func (c *ProviderConfig) GetId() string {
@@ -424,6 +429,12 @@ func (c *ProviderConfig) FromJson(json gjson.Result) {
c.awsAccessKey = json.Get("awsAccessKey").String()
c.awsSecretKey = json.Get("awsSecretKey").String()
c.awsRegion = json.Get("awsRegion").String()
if c.typ == providerTypeBedrock {
c.bedrockAdditionalFields = make(map[string]interface{})
for k, v := range json.Get("bedrockAdditionalFields").Map() {
c.bedrockAdditionalFields[k] = v.Value()
}
}
c.minimaxApiType = json.Get("minimaxApiType").String()
c.minimaxGroupId = json.Get("minimaxGroupId").String()
c.cloudflareAccountId = json.Get("cloudflareAccountId").String()
@@ -516,6 +527,7 @@ func (c *ProviderConfig) FromJson(json gjson.Result) {
c.capabilities[capability] = pathJson.String()
}
}
c.subPath = json.Get("subPath").String()
}
func (c *ProviderConfig) Validate() error {
@@ -826,10 +838,17 @@ func (c *ProviderConfig) handleRequestBody(
func (c *ProviderConfig) handleRequestHeaders(provider Provider, ctx wrapper.HttpContext, apiName ApiName) {
headers := util.GetOriginalRequestHeaders()
originPath := headers.Get(":path")
if c.subPath != "" {
headers.Set(":path", strings.TrimPrefix(originPath, c.subPath))
}
if handler, ok := provider.(TransformRequestHeadersHandler); ok {
handler.TransformRequestHeaders(ctx, apiName, headers)
util.ReplaceRequestHeaders(headers)
}
if headers.Get(":path") != originPath {
headers.Set("X-ENVOY-ORIGINAL-PATH", originPath)
}
util.ReplaceRequestHeaders(headers)
}
// defaultTransformRequestBody 默认的请求体转换方法只做模型映射用slog替换模型名称不用序列化和反序列化提高性能

View File

@@ -29,22 +29,24 @@ Attribute 配置说明:
| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
|----------------|-------|-----|-----|------------------------|
| `key` | string | 必填 | - | attrribute 名称 |
| `value_source` | string | 必填 | - | attrribute 取值来源,可选值为 `fixed_value`, `request_header`, `request_body`, `response_header`, `response_body`, `response_streaming_body` |
| `value` | string | 必填 | - | attrribute 取值 key value/path |
| `default_value` | string | 非必填 | - | attrribute 默认值 |
| `rule` | string | 非必填 | - | 从流式响应中提取 attrribute 的规则,可选值为 `first`, `replace`, `append`|
| `key` | string | 必填 | - | attribute 名称 |
| `value_source` | string | 必填 | - | attribute 取值来源,可选值为 `fixed_value`, `request_header`, `request_body`, `response_header`, `response_body`, `response_streaming_body` |
| `value` | string | 必填 | - | attribute 取值 key value/path |
| `default_value` | string | 非必填 | - | attribute 默认值 |
| `rule` | string | 非必填 | - | 从流式响应中提取 attribute 的规则,可选值为 `first`, `replace`, `append`|
| `apply_to_log` | bool | 非必填 | false | 是否将提取的信息记录在日志中 |
| `apply_to_span` | bool | 非必填 | false | 是否将提取的信息记录在链路追踪span中 |
| `trace_span_key` | string | 非必填 | - | 链路追踪attribute key默认会使用`key`的设置 |
| `as_separate_log_field` | bool | 非必填 | false | 记录日志时是否作为单独的字段,日志字段名使用`key`的设置 |
`value_source` 的各种取值含义如下:
- `fixed_value`:固定值
- `request_header` attrribute 值通过 http 请求头获取value 配置为 header key
- `request_body` attrribute 值通过请求 body 获取value 配置格式为 gjson 的 jsonpath
- `response_header` attrribute 值通过 http 响应头获取value 配置为header key
- `response_body` attrribute 值通过响应 body 获取value 配置格式为 gjson 的 jsonpath
- `response_streaming_body` attrribute 值通过流式响应 body 获取value 配置格式为 gjson 的 jsonpath
- `request_header` attribute 值通过 http 请求头获取value 配置为 header key
- `request_body` attribute 值通过请求 body 获取value 配置格式为 gjson 的 jsonpath
- `response_header` attribute 值通过 http 响应头获取value 配置为header key
- `response_body` attribute 值通过响应 body 获取value 配置格式为 gjson 的 jsonpath
- `response_streaming_body` attribute 值通过流式响应 body 获取value 配置格式为 gjson 的 jsonpath
`value_source``response_streaming_body` 时,应当配置 `rule`用于指定如何从流式body中获取指定值取值含义如下
@@ -60,6 +62,21 @@ Attribute 配置说明:
'{"ai_log":"%FILTER_STATE(wasm.ai_log:PLAIN)%"}'
```
如果字段设置了 `as_separate_log_field`,例如:
```yaml
attributes:
- key: consumer
value_source: request_header
value: x-mse-consumer
apply_to_log: true
as_separate_log_field: true
```
那么要在日志中打印需要额外设置log_format
```
'{"consumer":"%FILTER_STATE(wasm.consumer:PLAIN)%"}'
```
### 空配置
#### 监控

View File

@@ -29,22 +29,24 @@ Attribute Configuration instructions:
| Name | Type | Required | Default | Description |
|----------------|-------|-----|-----|------------------------|
| `key` | string | required | - | attrribute key |
| `value_source` | string | required | - | attrribute value source, optional values are `fixed_value`, `request_header`, `request_body`, `response_header`, `response_body`, `response_streaming_body` |
| `value` | string | required | - | how to get attrribute value |
| `key` | string | required | - | attribute key |
| `value_source` | string | required | - | attribute value source, optional values are `fixed_value`, `request_header`, `request_body`, `response_header`, `response_body`, `response_streaming_body` |
| `value` | string | required | - | how to get attribute value |
| `default_value` | string | optional | - | default value for attribute |
| `rule` | string | optional | - | Rule to extract attribute from streaming response, optional values are `first`, `replace`, `append`|
| `apply_to_log` | bool | optional | false | Whether to record the extracted information in the log |
| `apply_to_span` | bool | optional | false | Whether to record the extracted information in the link tracking span |
| `trace_span_key` | string | optional | - | span attribute key, default is the value of `key` |
| `as_separate_log_field` | bool | optional | false | Whether to use a separate log field, the field name is equal to the value of `key` |
The meanings of various values for `value_source` are as follows:
- `fixed_value`: fixed value
- `request_header`: The attrribute is obtained through the http request header
- `request_body`: The attrribute is obtained through the http request body
- `response_header`: The attrribute is obtained through the http response header
- `response_body`: The attrribute is obtained through the http response body
- `response_streaming_body`: The attrribute is obtained through the http streaming response body
- `request_header`: The attribute is obtained through the http request header
- `request_body`: The attribute is obtained through the http request body
- `response_header`: The attribute is obtained through the http response header
- `response_body`: The attribute is obtained through the http response body
- `response_streaming_body`: The attribute is obtained through the http streaming response body
When `value_source` is `response_streaming_body`, `rule` should be configured to specify how to obtain the specified value from the streaming body. The meaning of the value is as follows:
@@ -60,6 +62,21 @@ If you want to record ai-statistic related statistical values in the gateway acc
'{"ai_log":"%FILTER_STATE(wasm.ai_log:PLAIN)%"}'
```
If the field is set with `as_separate_log_field`, for example:
```yaml
attributes:
- key: consumer
value_source: request_header
value: x-mse-consumer
apply_to_log: true
as_separate_log_field: true
```
Then to print in the log, you need to set log_format additionally:
```
'{"consumer":"%FILTER_STATE(wasm.consumer:PLAIN)%"}'
```
### Empty
#### Metric

View File

@@ -75,13 +75,15 @@ const (
// TracingSpan is the tracing span configuration.
type Attribute struct {
Key string `json:"key"`
ValueSource string `json:"value_source"`
Value string `json:"value"`
DefaultValue string `json:"default_value,omitempty"`
Rule string `json:"rule,omitempty"`
ApplyToLog bool `json:"apply_to_log,omitempty"`
ApplyToSpan bool `json:"apply_to_span,omitempty"`
Key string `json:"key"`
ValueSource string `json:"value_source"`
Value string `json:"value"`
TraceSpanKey string `json:"trace_span_key,omitempty"`
DefaultValue string `json:"default_value,omitempty"`
Rule string `json:"rule,omitempty"`
ApplyToLog bool `json:"apply_to_log,omitempty"`
ApplyToSpan bool `json:"apply_to_span,omitempty"`
AsSeparateLogField bool `json:"as_separate_log_field,omitempty"`
}
type AIStatisticsConfig struct {
@@ -406,13 +408,23 @@ func setAttributeBySource(ctx wrapper.HttpContext, config AIStatisticsConfig, so
}
log.Debugf("[attribute] source type: %s, key: %s, value: %+v", source, key, value)
if attribute.ApplyToLog {
ctx.SetUserAttribute(key, value)
if attribute.AsSeparateLogField {
marshalledJsonStr := wrapper.MarshalStr(fmt.Sprint(value))
if err := proxywasm.SetProperty([]string{key}, []byte(marshalledJsonStr)); err != nil {
log.Warnf("failed to set %s in filter state, raw is %s, err is %v", key, marshalledJsonStr, err)
}
} else {
ctx.SetUserAttribute(key, value)
}
}
// for metrics
if key == Model || key == InputToken || key == OutputToken {
ctx.SetContext(key, value)
}
if attribute.ApplyToSpan {
if attribute.TraceSpanKey != "" {
key = attribute.TraceSpanKey
}
setSpanAttribute(key, value, log)
}
}
@@ -481,10 +493,10 @@ func writeMetric(ctx wrapper.HttpContext, config AIStatisticsConfig, log wrapper
log.Warnf("ClusterName typd assert failed, skip metric record")
return
}
if config.disableOpenaiUsage {
return
}
}
if ctx.GetUserAttribute(Model) == nil || ctx.GetUserAttribute(InputToken) == nil || ctx.GetUserAttribute(OutputToken) == nil {
log.Warnf("get usage information failed, skip metric record")

View File

@@ -8,7 +8,7 @@ replace amap-tools => ../amap-tools
require (
amap-tools v0.0.0-00010101000000-000000000000
github.com/alibaba/higress/plugins/wasm-go v1.4.4-0.20250617125129-8731ba4ea3db
github.com/higress-group/wasm-go v1.0.1-0.20250626074812-b8840174d876
quark-search v0.0.0-00010101000000-000000000000
)

View File

@@ -6,8 +6,6 @@ github.com/Masterminds/semver/v3 v3.3.0 h1:B8LGeaivUe71a5qox1ICM/JLl0NqZSW5CHyL+
github.com/Masterminds/semver/v3 v3.3.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM=
github.com/Masterminds/sprig/v3 v3.3.0 h1:mQh0Yrg1XPo6vjYXgtf5OtijNAKJRNcTdOOGZe3tPhs=
github.com/Masterminds/sprig/v3 v3.3.0/go.mod h1:Zy1iXRYNqNLUolqCpL4uhk6SHUMAOSCzdgBfDb35Lz0=
github.com/alibaba/higress/plugins/wasm-go v1.4.4-0.20250617125129-8731ba4ea3db h1:hubkTsadmBj/FNfh9gI0glOWI7NEDQeF+UwX0EmO0Es=
github.com/alibaba/higress/plugins/wasm-go v1.4.4-0.20250617125129-8731ba4ea3db/go.mod h1:ixggLUTsFfFogWS6p95AzTfey/XbPccCWln1gyvkY0M=
github.com/bahlo/generic-list-go v0.2.0 h1:5sz/EEAK+ls5wF+NeqDpk5+iNdMDXrh3z3nPnH1Wvgk=
github.com/bahlo/generic-list-go v0.2.0/go.mod h1:2KvAjgMlE5NNynlg/5iLrrCCZ2+5xWbdbCW3pNTGyYg=
github.com/buger/jsonparser v1.1.1 h1:2PnMjfWD7wBILjqQbt530v576A/cAbQvEW9gGIpYMUs=
@@ -24,6 +22,8 @@ github.com/higress-group/gjson_template v0.0.0-20250413075336-4c4161ed428b h1:rR
github.com/higress-group/gjson_template v0.0.0-20250413075336-4c4161ed428b/go.mod h1:rU3M+Tq5VrQOo0dxpKHGb03Ty0sdWIZfAH+YCOACx/Y=
github.com/higress-group/proxy-wasm-go-sdk v0.0.0-20250611100342-5654e89a7a80 h1:xqmtTZI0JQ2O+Lg9/CE6c+Tw9KD6FnvWw8EpLVuuvfg=
github.com/higress-group/proxy-wasm-go-sdk v0.0.0-20250611100342-5654e89a7a80/go.mod h1:tRI2LfMudSkKHhyv1uex3BWzcice2s/l8Ah8axporfA=
github.com/higress-group/wasm-go v1.0.1-0.20250626074812-b8840174d876 h1:dJEP8h+FN9Q5PUs5zttXl2bS6lGf/bOsX9GMBTHaL3I=
github.com/higress-group/wasm-go v1.0.1-0.20250626074812-b8840174d876/go.mod h1:ODBV27sjmhIW8Cqv3R74EUcTnbdkE69bmXBQFuRkY1M=
github.com/huandu/xstrings v1.5.0 h1:2ag3IFq9ZDANvthTwTiqSSZLjDc+BedvHPAp5tJy2TI=
github.com/huandu/xstrings v1.5.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
github.com/invopop/jsonschema v0.13.0 h1:KvpoAJWEjR3uD9Kbm2HWJmqsEaHt8lBUpd0qHcIi21E=

View File

@@ -18,7 +18,7 @@ import (
amap "amap-tools/tools"
quark "quark-search/tools"
"github.com/alibaba/higress/plugins/wasm-go/pkg/mcp"
"github.com/higress-group/wasm-go/pkg/mcp"
)
func main() {}

View File

@@ -3,8 +3,8 @@ module amap-tools
go 1.24.1
require (
github.com/alibaba/higress/plugins/wasm-go v1.4.4-0.20250617125129-8731ba4ea3db
github.com/higress-group/proxy-wasm-go-sdk v0.0.0-20250611100342-5654e89a7a80
github.com/higress-group/wasm-go v1.0.0
)
require (

View File

@@ -6,8 +6,6 @@ github.com/Masterminds/semver/v3 v3.3.0 h1:B8LGeaivUe71a5qox1ICM/JLl0NqZSW5CHyL+
github.com/Masterminds/semver/v3 v3.3.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM=
github.com/Masterminds/sprig/v3 v3.3.0 h1:mQh0Yrg1XPo6vjYXgtf5OtijNAKJRNcTdOOGZe3tPhs=
github.com/Masterminds/sprig/v3 v3.3.0/go.mod h1:Zy1iXRYNqNLUolqCpL4uhk6SHUMAOSCzdgBfDb35Lz0=
github.com/alibaba/higress/plugins/wasm-go v1.4.4-0.20250617125129-8731ba4ea3db h1:hubkTsadmBj/FNfh9gI0glOWI7NEDQeF+UwX0EmO0Es=
github.com/alibaba/higress/plugins/wasm-go v1.4.4-0.20250617125129-8731ba4ea3db/go.mod h1:ixggLUTsFfFogWS6p95AzTfey/XbPccCWln1gyvkY0M=
github.com/bahlo/generic-list-go v0.2.0 h1:5sz/EEAK+ls5wF+NeqDpk5+iNdMDXrh3z3nPnH1Wvgk=
github.com/bahlo/generic-list-go v0.2.0/go.mod h1:2KvAjgMlE5NNynlg/5iLrrCCZ2+5xWbdbCW3pNTGyYg=
github.com/buger/jsonparser v1.1.1 h1:2PnMjfWD7wBILjqQbt530v576A/cAbQvEW9gGIpYMUs=
@@ -24,6 +22,8 @@ github.com/higress-group/gjson_template v0.0.0-20250413075336-4c4161ed428b h1:rR
github.com/higress-group/gjson_template v0.0.0-20250413075336-4c4161ed428b/go.mod h1:rU3M+Tq5VrQOo0dxpKHGb03Ty0sdWIZfAH+YCOACx/Y=
github.com/higress-group/proxy-wasm-go-sdk v0.0.0-20250611100342-5654e89a7a80 h1:xqmtTZI0JQ2O+Lg9/CE6c+Tw9KD6FnvWw8EpLVuuvfg=
github.com/higress-group/proxy-wasm-go-sdk v0.0.0-20250611100342-5654e89a7a80/go.mod h1:tRI2LfMudSkKHhyv1uex3BWzcice2s/l8Ah8axporfA=
github.com/higress-group/wasm-go v1.0.0 h1:4Ik5n3FsJ5+r13KLQl2ky+8NuAE8dfWQwoKxXYD2KAw=
github.com/higress-group/wasm-go v1.0.0/go.mod h1:ODBV27sjmhIW8Cqv3R74EUcTnbdkE69bmXBQFuRkY1M=
github.com/huandu/xstrings v1.5.0 h1:2ag3IFq9ZDANvthTwTiqSSZLjDc+BedvHPAp5tJy2TI=
github.com/huandu/xstrings v1.5.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
github.com/invopop/jsonschema v0.13.0 h1:KvpoAJWEjR3uD9Kbm2HWJmqsEaHt8lBUpd0qHcIi21E=

View File

@@ -17,7 +17,7 @@ package main
import (
"amap-tools/tools"
"github.com/alibaba/higress/plugins/wasm-go/pkg/mcp"
"github.com/higress-group/wasm-go/pkg/mcp"
)
func main() {}

View File

@@ -15,8 +15,8 @@
package tools
import (
"github.com/alibaba/higress/plugins/wasm-go/pkg/mcp"
"github.com/alibaba/higress/plugins/wasm-go/pkg/mcp/server"
"github.com/higress-group/wasm-go/pkg/mcp"
"github.com/higress-group/wasm-go/pkg/mcp/server"
)
func LoadTools(server *mcp.MCPServer) server.Server {

View File

@@ -23,8 +23,8 @@ import (
"amap-tools/config"
"github.com/alibaba/higress/plugins/wasm-go/pkg/mcp/server"
"github.com/alibaba/higress/plugins/wasm-go/pkg/mcp/utils"
"github.com/higress-group/wasm-go/pkg/mcp/server"
"github.com/higress-group/wasm-go/pkg/mcp/utils"
)
var _ server.Tool = AroundSearchRequest{}

View File

@@ -23,8 +23,8 @@ import (
"amap-tools/config"
"github.com/alibaba/higress/plugins/wasm-go/pkg/mcp/server"
"github.com/alibaba/higress/plugins/wasm-go/pkg/mcp/utils"
"github.com/higress-group/wasm-go/pkg/mcp/server"
"github.com/higress-group/wasm-go/pkg/mcp/utils"
)
var _ server.Tool = BicyclingRequest{}

View File

@@ -23,8 +23,8 @@ import (
"amap-tools/config"
"github.com/alibaba/higress/plugins/wasm-go/pkg/mcp/server"
"github.com/alibaba/higress/plugins/wasm-go/pkg/mcp/utils"
"github.com/higress-group/wasm-go/pkg/mcp/server"
"github.com/higress-group/wasm-go/pkg/mcp/utils"
)
var _ server.Tool = DrivingRequest{}

View File

@@ -23,8 +23,8 @@ import (
"amap-tools/config"
"github.com/alibaba/higress/plugins/wasm-go/pkg/mcp/server"
"github.com/alibaba/higress/plugins/wasm-go/pkg/mcp/utils"
"github.com/higress-group/wasm-go/pkg/mcp/server"
"github.com/higress-group/wasm-go/pkg/mcp/utils"
)
var _ server.Tool = TransitIntegratedRequest{}

View File

@@ -23,8 +23,8 @@ import (
"amap-tools/config"
"github.com/alibaba/higress/plugins/wasm-go/pkg/mcp/server"
"github.com/alibaba/higress/plugins/wasm-go/pkg/mcp/utils"
"github.com/higress-group/wasm-go/pkg/mcp/server"
"github.com/higress-group/wasm-go/pkg/mcp/utils"
)
var _ server.Tool = WalkingRequest{}

View File

@@ -23,8 +23,8 @@ import (
"amap-tools/config"
"github.com/alibaba/higress/plugins/wasm-go/pkg/mcp/server"
"github.com/alibaba/higress/plugins/wasm-go/pkg/mcp/utils"
"github.com/higress-group/wasm-go/pkg/mcp/server"
"github.com/higress-group/wasm-go/pkg/mcp/utils"
)
var _ server.Tool = DistanceRequest{}

View File

@@ -23,8 +23,8 @@ import (
"amap-tools/config"
"github.com/alibaba/higress/plugins/wasm-go/pkg/mcp/server"
"github.com/alibaba/higress/plugins/wasm-go/pkg/mcp/utils"
"github.com/higress-group/wasm-go/pkg/mcp/server"
"github.com/higress-group/wasm-go/pkg/mcp/utils"
)
var _ server.Tool = GeoRequest{}

View File

@@ -24,8 +24,8 @@ import (
"amap-tools/config"
"github.com/alibaba/higress/plugins/wasm-go/pkg/mcp/server"
"github.com/alibaba/higress/plugins/wasm-go/pkg/mcp/utils"
"github.com/higress-group/wasm-go/pkg/mcp/server"
"github.com/higress-group/wasm-go/pkg/mcp/utils"
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
)

View File

@@ -23,8 +23,8 @@ import (
"amap-tools/config"
"github.com/alibaba/higress/plugins/wasm-go/pkg/mcp/server"
"github.com/alibaba/higress/plugins/wasm-go/pkg/mcp/utils"
"github.com/higress-group/wasm-go/pkg/mcp/server"
"github.com/higress-group/wasm-go/pkg/mcp/utils"
)
var _ server.Tool = ReGeocodeRequest{}

View File

@@ -23,8 +23,8 @@ import (
"amap-tools/config"
"github.com/alibaba/higress/plugins/wasm-go/pkg/mcp/server"
"github.com/alibaba/higress/plugins/wasm-go/pkg/mcp/utils"
"github.com/higress-group/wasm-go/pkg/mcp/server"
"github.com/higress-group/wasm-go/pkg/mcp/utils"
)
var _ server.Tool = SearchDetailRequest{}

View File

@@ -23,8 +23,8 @@ import (
"amap-tools/config"
"github.com/alibaba/higress/plugins/wasm-go/pkg/mcp/server"
"github.com/alibaba/higress/plugins/wasm-go/pkg/mcp/utils"
"github.com/higress-group/wasm-go/pkg/mcp/server"
"github.com/higress-group/wasm-go/pkg/mcp/utils"
)
var _ server.Tool = TextSearchRequest{}

View File

@@ -23,8 +23,8 @@ import (
"amap-tools/config"
"github.com/alibaba/higress/plugins/wasm-go/pkg/mcp/server"
"github.com/alibaba/higress/plugins/wasm-go/pkg/mcp/utils"
"github.com/higress-group/wasm-go/pkg/mcp/server"
"github.com/higress-group/wasm-go/pkg/mcp/utils"
)
var _ server.Tool = WeatherRequest{}

View File

@@ -0,0 +1,26 @@
# Sector Market Overview
Integrates the latest real-time market data for industry and concept sectors, including detailed information on constituent stocks. It covers key market indicators such as index level, price change percentage, trading volume, total market capitalization, sector rankings, and leading stocks. Designed for intelligent investment research and market trend tracking, it provides comprehensive insights into sector dynamics and constituent stock performance.
## Tool Overview
### Real-Time Daily Market Data for an Industry get_industry_realtime_quote
Enter an industry code to obtain the latest real-time data for that industry, including index level, price change percentage, trading volume, total market capitalization, number of constituent stocks, counts of stocks that hit limit up / rose / fell / remained flat / total stocks, sector ranking by performance, and leading stock information. This is used for real-time tracking of overall industry performance.
### Real-Time Daily Market Data for an Industry and Its Constituent Stocks get_industry_stock_realtime_quote
Enter an industry code to obtain the latest overall real-time market data for that industry—including index level, price change percentage, trading volume, total market capitalization, number of constituent stocks, performance ranking, and leading stock—along with detailed real-time data for all related constituent stocks, such as stock code, name, opening price, current price, price change percentage, and highest/lowest prices. This enables comprehensive analysis of the industrys and its constituents latest market performance.
### Real-Time Daily Market Data for a Concept get_concept_realtime_quote
Enter a concept type (e.g., Juyuan, CLS) and concept code to retrieve the latest real-time market data for the concept sector, including sector name, price change percentage, one-week change, total market capitalization, number of constituent stocks, counts of stocks that hit limit up / rose / fell / remained flat, performance ranking, and leading stock information. This enables efficient tracking of trending market concept sectors.
### Real-Time Daily Market Data for a Concept and Its Constituent Stocks get_concept_stock_realtime_quote
Retrieve the latest real-time market data for a specified concept sector, including concept name, price change percentage, one-week change, total market capitalization, performance ranking, number of constituent stocks, counts of stocks that hit limit up / rose / fell / remained flat, and leading stock information. It also provides real-time data for all constituent stocks under the concept, such as stock code, name, market, opening price, current price, price change percentage, and highest/lowest prices—enabling quick insight into both the overall concept and its constituent stocks latest market performance.
## Usage Guide
### Apply for an APP Code
Visit the [Investoday Data Marketplace](https://data-api.investoday.net/mcp) to apply for your AppCode.
### Configuration Examples
Use the applied appCode to generate in Higress.

View File

@@ -0,0 +1,27 @@
# 板块行情
整合了行业和概念板块的最新实时行情及其成分股明细数据,覆盖指数、涨跌幅、成交量、总市值、涨跌幅排名、领涨股等关键市场指标,面向智能投研与市场热点追踪,助力全方位洞察行业与概念板块的最新动态及成分股表现。
## 工具简介
### 行业的最新实时日行情 get_industry_realtime_quote
输入行业代码,获取该行业的最新实时指数、涨跌幅、成交量、总市值、成分股数量、涨停/上涨/下跌/平盘/总股数、涨跌幅排名、领涨股等关键行情数据,用于实时跟踪行业整体表现。
### 行业及关联成分股的最新实时日行情 get_industry_stock_realtime_quote
输入行业代码,获取该行业的最新整体行情(包括指数、涨跌幅、成交量、总市值、成分股数量、涨跌幅排名、领涨股等)以及所有关联成分股的实时行情明细(包括股票代码、名称、开盘价、当前价、涨跌幅、最高/最低价等),用于全面分析行业及其成分股的最新市场表现。
### 概念的最新实时日行情 get_concept_realtime_quote
输入概念类型(如聚源、财联社)和概念代码,获取该概念板块的最新实时行情数据,包括板块名称、涨跌幅、一周涨跌幅、总市值、成分股数量、涨停/上涨/下跌/平盘股数、涨跌幅排名、领涨股信息等关键指标,便于追踪特定市场热点板块表现。
### 概念及关联成分股的最新实时日行情 get_concept_stock_realtime_quote
获取指定概念板块的最新实时行情,包括概念名称、涨跌幅、一周涨跌幅、总市值、涨跌幅排名、成分股数量、涨停/上涨/下跌/平盘股数、领涨股信息等,以及该概念下所有成分股的实时行情(如股票代码、名称、市场、开盘价、当前价、涨跌幅、最高/最低价等),用于快速洞察概念整体及其成分股的最新市场表现。
## 使用教程
### 申请APICode
访问[今日投资数据市场](https://data-api.investoday.net/mcp)申请appCode。
### 配置地址
使用申请的appCode在higress生成即可。

View File

@@ -0,0 +1,580 @@
{
"openapi": "3.0.0",
"info": {
"title": "板块行情",
"description": "整合了行业和概念板块的最新实时行情及其成分股明细数据,覆盖指数、涨跌幅、成交量、总市值、涨跌幅排名、领涨股等关键市场指标,面向智能投研与市场热点追踪,助力全方位洞察行业与概念板块的最新动态及成分股表现。",
"version": "1.0.0"
},
"servers": [
{
"url": "https://data-api.investoday.net/data"
}
],
"paths": {
"/industry-quote/realtime": {
"get": {
"summary": "行业的最新实时日行情",
"description": "输入行业代码,获取该行业的最新实时指数、涨跌幅、成交量、总市值、成分股数量、涨停/上涨/下跌/平盘/总股数、涨跌幅排名、领涨股等关键行情数据,用于实时跟踪行业整体表现。",
"operationId": "get_industry_realtime_quote",
"parameters": [
{
"name": "industryCode",
"in": "query",
"description": "行业代码",
"required": true,
"schema": {
"type": "string",
"example": "330000"
}
}
],
"responses": {
"200": {
"description": "成功响应行业实时行情",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/IndustryRealtimeQuote"
}
}
}
}
}
}
},
"/industry-quote/stock-realtime": {
"get": {
"summary": "行业及关联成分股的最新实时日行情",
"description": "输入行业代码,获取该行业的最新整体行情以及所有关联成分股的实时行情明细,用于全面分析行业及其成分股的最新市场表现。",
"operationId": "get_industry_stock_realtime_quote",
"parameters": [
{
"name": "industryCode",
"in": "query",
"description": "行业代码",
"required": true,
"schema": {
"type": "string",
"example": "330000"
}
}
],
"responses": {
"200": {
"description": "成功响应行业及成分股实时行情",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/IndustryStockRealtimeQuote"
}
}
}
}
}
}
},
"/concept-quote/realtime": {
"get": {
"summary": "概念的最新实时日行情",
"description": "输入概念类型(如聚源、财联社)和概念代码,获取该概念板块的最新实时行情数据,包括涨跌幅、一周涨跌幅、总市值、成分股数量、涨停/上涨/下跌/平盘股数、涨跌幅排名、领涨股信息等关键指标。",
"operationId": "get_concept_realtime_quote",
"parameters": [
{
"name": "conceptType",
"in": "query",
"description": "概念类型 (jy-聚源、cls-财联社)",
"required": true,
"schema": {
"type": "string",
"example": "jy"
}
},
{
"name": "conceptCode",
"in": "query",
"description": "概念代码",
"required": true,
"schema": {
"type": "string",
"example": "14060061"
}
}
],
"responses": {
"200": {
"description": "成功响应概念实时行情",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ConceptRealtimeQuote"
}
}
}
}
}
}
},
"/concept-quote/stock-realtime": {
"get": {
"summary": "概念及关联成分股的最新实时日行情",
"description": "获取指定概念板块的最新实时行情及其所有成分股的实时行情,用于快速洞察概念整体及其成分股的最新市场表现。",
"operationId": "get_concept_stock_realtime_quote",
"parameters": [
{
"name": "conceptType",
"in": "query",
"description": "概念类型 (jy-聚源、cls-财联社)",
"required": true,
"schema": {
"type": "string",
"example": "jy"
}
},
{
"name": "conceptCode",
"in": "query",
"description": "概念代码",
"required": true,
"schema": {
"type": "string",
"example": "14060061"
}
}
],
"responses": {
"200": {
"description": "成功响应概念及成分股实时行情",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ConceptStockRealtimeQuote"
}
}
}
}
}
}
},
"/stocks/concept-classifications": {
"get": {
"summary": "股票所属概念",
"description": "支持根据股票代码、概念代码、日期及存续状态,查询股票所属的全部概念,包括概念代码、详细说明、入选和剔除日期等信息,便于追溯股票的历史及当前概念归属关系和变动。",
"operationId": "get_stock_concept_classifications",
"parameters": [
{
"name": "removalDate",
"in": "query",
"description": "剔除日期",
"required": false,
"schema": {
"type": "string",
"format": "date",
"example": "2021-01-01"
}
},
{
"name": "existenceStatus",
"in": "query",
"description": "当前概念存续状态 1、存续 0 、终止",
"required": false,
"schema": {
"type": "integer",
"example": 1
}
},
{
"name": "selectionDate",
"in": "query",
"description": "入选日期",
"required": false,
"schema": {
"type": "string",
"format": "date",
"example": "2021-01-01"
}
},
{
"name": "conceptClass",
"in": "query",
"description": "概念类型 1是财联社概念、0是聚源概念",
"required": true,
"schema": {
"type": "integer",
"example": 1
}
},
{
"name": "conceptCode",
"in": "query",
"description": "概念代码(可输入多个,用逗号分隔)",
"required": true,
"schema": {
"type": "array",
"items": { "type": "string" },
"example": ["15030008"]
},
"style": "form",
"explode": false
},
{
"name": "stockCode",
"in": "query",
"description": "股票代码(可输入多个,用逗号分隔)",
"required": true,
"schema": {
"type": "array",
"items": { "type": "string" },
"example": ["000001"]
},
"style": "form",
"explode": false
}
],
"responses": {
"200": {
"description": "成功响应股票所属概念",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/StockConceptClassificationsResponse"
}
}
}
}
}
}
},
"/concept/basic": {
"get": {
"summary": "概念列表",
"description": "支持用户通过中文自然语言输入概念关键词、板块类别快速检索A股市场的全部概念板块信息。可模糊搜索概念名称也可指定来源分类如“财联社”、“聚源”等用于查询市场热点、主题板块归属为行业分析、主题投资等应用提供底层数据支持。",
"operationId": "get_concept_basic",
"parameters": [
{
"name": "conceptCode",
"in": "query",
"description": "概念代码",
"required": true,
"schema": {
"type": "string",
"items": { "type": "string" },
"example": "000001"
},
"style": "form",
"explode": false
},
{
"name": "conceptName",
"in": "query",
"description": "概念名称(可模糊匹配)",
"required": true,
"schema": {
"type": "string",
"example": "腾讯云概念"
}
},
{
"name": "conceptClass",
"in": "query",
"description": "概念分类(可选: 财联社-C01[默认值],聚源-99)",
"required": true,
"schema": {
"type": "string",
"example": "C01"
}
}
],
"responses": {
"200": {
"description": "成功响应概念科目",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/StockCategoryMappingsResponse"
}
}
}
}
}
}
},
"/industry/basic": {
"get": {
"summary": "行业列表",
"description": "用户可输入感兴趣的行业名称、行业类别或行业层级,快速查询当前支持的全部行业信息。可按行业中文名(支持模糊搜索),指定行业体系(如“申万行业”),或根据层级(如一级、二级行业)筛选。返回结果包含行业名称、所属行业体系、行业指数、行业层级等基础信息,可作为后续行业行情、板块分析等业务的基础数据。",
"operationId": "get_industry_basic",
"parameters": [
{
"name": "industryName",
"in": "query",
"description": "行业名称(模糊匹配)",
"required": false,
"schema": {
"type": "string",
"example": "农林牧渔"
}
},
{
"name": "industryType",
"in": "query",
"description": "行业分类体系(可选:申万行业体系-INDUS4_CL[默认])",
"required": false,
"schema": {
"type": "string",
"example": "INDUS4_CL"
}
},
{
"name": "industryLevel",
"in": "query",
"description": "行业等级",
"required": false,
"schema": {
"type": "string",
"example": "1"
}
},
{
"name": "industryCode",
"in": "query",
"description": "行业代码",
"required": false,
"schema": {
"type": "string",
"example": "110000"
}
}
],
"responses": {
"200": {
"description": "成功响应行业列表",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/IndustryBasicResponse"
}
}
}
}
}
}
}
},
"components": {
"schemas": {
"IndustryRealtimeQuote": {
"type": "object",
"properties": {
"industryCode": { "type": "string", "description": "行业代码" },
"industryName": { "type": "string", "description": "行业名称" },
"price": { "type": "number", "description": "行业指数" },
"changeRatio": { "type": "number", "description": "行业涨跌幅" },
"volume": { "type": "integer", "description": "行业成交量" },
"changeRatio1W": { "type": "number", "description": "行业一周涨跌幅" },
"totalValue": { "type": "number", "description": "行业总市值" },
"ratioRank": { "type": "integer", "description": "行业涨跌幅排名" },
"industryAmount": { "type": "integer", "description": "行业成分股数量" },
"limitUpAmount": { "type": "integer", "description": "行业涨停股数量" },
"stockUpAmount": { "type": "integer", "description": "行业上涨股数量" },
"stockDownAmount": { "type": "integer", "description": "行业下跌股数量" },
"stockBxAmount": { "type": "integer", "description": "行业平盘股数量" },
"stockAmount": { "type": "integer", "description": "行业股票总数" },
"leadUpStockCode": { "type": "string", "description": "行业领涨股代码" },
"leadUpStockName": { "type": "string", "description": "行业领涨股名称" },
"dataTime": { "type": "string", "format": "date-time", "description": "数据时间" }
},
"required": [
"industryCode",
"industryName",
"price",
"changeRatio",
"volume",
"changeRatio1W",
"totalValue",
"ratioRank",
"industryAmount",
"limitUpAmount",
"stockUpAmount",
"stockDownAmount",
"stockBxAmount",
"stockAmount",
"leadUpStockCode",
"leadUpStockName",
"dataTime"
]
},
"StockRealQuote": {
"type": "object",
"properties": {
"stockCode": { "type": "string", "description": "股票代码" },
"stockName": { "type": "string", "description": "股票名称" },
"marketType": { "type": "string", "description": "市场类型" },
"openPrice": { "type": "number", "description": "开盘价" },
"closePriceYDay": { "type": "number", "description": "昨日收盘价" },
"currentPrice": { "type": "number", "description": "当前价格" },
"changeRatio": { "type": "number", "description": "涨跌幅" },
"highPrice": { "type": "number", "description": "最高价" },
"lowPrice": { "type": "number", "description": "最低价" },
"dataTime": { "type": "string", "format": "date-time", "description": "数据时间" },
"sysTime": { "type": "string", "format": "date-time", "description": "系统时间" },
"status": { "type": "string", "description": "状态" }
},
"required": [
"stockCode",
"stockName",
"marketType",
"openPrice",
"closePriceYDay",
"currentPrice",
"changeRatio",
"highPrice",
"lowPrice",
"dataTime",
"sysTime",
"status"
]
},
"IndustryStockRealtimeQuote": {
"allOf": [
{ "$ref": "#/components/schemas/IndustryRealtimeQuote" },
{
"type": "object",
"properties": {
"stockRealQuotes": {
"type": "array",
"description": "股票实时行情数据列表",
"items": { "$ref": "#/components/schemas/StockRealQuote" }
}
},
"required": ["stockRealQuotes"]
}
]
},
"ConceptRealtimeQuote": {
"type": "object",
"properties": {
"conceptCode": { "type": "string", "description": "概念板块代码" },
"conceptName": { "type": "string", "description": "概念板块名称" },
"changeRatio": { "type": "number", "description": "概念板块涨跌幅" },
"changeRatio1W": { "type": "number", "description": "概念板块一周涨跌幅" },
"ratioRank": { "type": "integer", "description": "概念板块涨跌幅排名(排名值越小涨幅越大)" },
"conceptAmount": { "type": "integer", "description": "概念板块成分股数量" },
"limitUpAmount": { "type": "integer", "description": "概念板块涨停股数量" },
"stockUpAmount": { "type": "integer", "description": "概念板块上涨股数量" },
"stockDownAmount": { "type": "integer", "description": "概念板块下跌股数量" },
"stockBxAmount": { "type": "integer", "description": "概念板块平盘股数量" },
"leadUpStockCode": { "type": "string", "description": "概念板块领涨股代码" },
"leadUpStockName": { "type": "string", "description": "概念板块领涨股名称" },
"totalValue": { "type": "number", "description": "概念板块总市值" },
"dataTime": { "type": "string", "format": "date-time", "description": "数据时间" }
},
"required": [
"conceptCode",
"conceptName",
"changeRatio",
"changeRatio1W",
"ratioRank",
"conceptAmount",
"limitUpAmount",
"stockUpAmount",
"stockDownAmount",
"stockBxAmount",
"leadUpStockCode",
"leadUpStockName",
"totalValue",
"dataTime"
]
},
"ConceptStockRealtimeQuote": {
"allOf": [
{ "$ref": "#/components/schemas/ConceptRealtimeQuote" },
{
"type": "object",
"properties": {
"stockRealQuotes": {
"type": "array",
"description": "股票实时行情数据",
"items": { "$ref": "#/components/schemas/StockRealQuote" }
}
},
"required": ["stockRealQuotes"]
}
]
},
"StockConceptClassification": {
"type": "object",
"properties": {
"conceptName": { "type": "string", "description": "概念名称" },
"conceptCode": { "type": "string", "description": "概念代码" },
"stockCode": { "type": "string", "description": "股票代码" },
"description": { "type": "string", "description": "说明" },
"selectionDate": { "type": "string", "format": "date", "description": "入选日期" },
"removalDate": { "type": "string", "format": "date", "description": "剔除日期" }
},
"required": ["conceptName","conceptCode","stockCode","description","selectionDate","removalDate"]
},
"StockConceptClassificationsResponse": {
"type": "object",
"properties": {
"code": { "type": "integer", "description": "状态码0 成功" },
"msg": { "type": "string", "description": "错误信息" },
"data": {
"type": "array",
"description": "数据",
"items": { "$ref": "#/components/schemas/StockConceptClassification" }
}
},
"required": ["code","msg","data"]
},
"StockCategoryMapping": {
"type": "object",
"properties": {
"conceptName": { "type": "string", "description": "概念名称" },
"conceptCategoryName": { "type": "string", "description": "概念类别名称" },
"parentConceptCode": { "type": "string", "description": "父概念代码" },
"conceptCategoryCode": { "type": "string", "description": "概念类别代码1是财联社概念、0是聚源概念" },
"conceptCode": { "type": "string", "description": "概念代码" },
"conceptLevel": { "type": "integer", "description": "概念级别" }
},
"required": ["conceptName","conceptCategoryName","parentConceptCode","conceptCategoryCode","conceptCode","conceptLevel"]
},
"StockCategoryMappingsResponse": {
"type": "object",
"properties": {
"code": { "type": "integer", "description": "状态码0 成功" },
"msg": { "type": "string", "description": "错误信息" },
"data": {
"type": "array",
"description": "数据",
"items": { "$ref": "#/components/schemas/StockCategoryMapping" }
}
},
"required": ["code","msg","data"]
},
"IndustryBasic": {
"type": "object",
"properties": {
"industryType": { "type": "string", "description": "行业类型(申万行业体系-INDUS4_CL" },
"industryName": { "type": "string", "description": "行业名称" },
"indexCode": { "type": "string", "description": "行业的指数代码" },
"industryCode": { "type": "string", "description": "行业代码" },
"industryLevel": { "type": "string", "description": "行业等级" }
},
"required": ["industryType","industryName","indexCode","industryCode","industryLevel"]
},
"IndustryBasicResponse": {
"type": "object",
"properties": {
"code": { "type": "integer", "description": "状态码0 成功" },
"msg": { "type": "string", "description": "错误信息" },
"data": {
"type": "array",
"description": "数据",
"items": { "$ref": "#/components/schemas/IndustryBasic" }
}
},
"required": ["code","msg","data"]
}
}
}
}

View File

@@ -0,0 +1,346 @@
server:
name: plate-quote
description: 板块行情
config:
appCode: ""
tools:
- name: get_concept_realtime_quote
description: 概念的最新实时日行情 - 输入概念类型(如聚源、财联社)和概念代码,获取该概念板块的最新实时行情数据,包括涨跌幅、一周涨跌幅、总市值、成分股数量、涨停/上涨/下跌/平盘股数、涨跌幅排名、领涨股信息等关键指标。
args:
- name: conceptCode
description: 概念代码
type: string
required: true
position: query
- name: conceptType
description: 概念类型 (jy-聚源、cls-财联社)
type: string
required: true
position: query
requestTemplate:
url: https://data-api.investoday.net/data/concept-quote/realtime
method: GET
headers:
- key: "Content-Type"
value: "application/json"
- key: "appCode"
value: "{{.config.appCode}}"
responseTemplate:
prependBody: |+
# API Response Information
Below is the response from an API call. To help you understand the data, I've provided:
1. A detailed description of all fields in the response structure
2. The complete API response
## Response Structure
> Content-Type: application/json
- **changeRatio**: 概念板块涨跌幅 (Type: number)
- **changeRatio1W**: 概念板块一周涨跌幅 (Type: number)
- **conceptAmount**: 概念板块成分股数量 (Type: integer)
- **conceptCode**: 概念板块代码 (Type: string)
- **conceptName**: 概念板块名称 (Type: string)
- **dataTime**: 数据时间 (Type: string)
- **leadUpStockCode**: 概念板块领涨股代码 (Type: string)
- **leadUpStockName**: 概念板块领涨股名称 (Type: string)
- **limitUpAmount**: 概念板块涨停股数量 (Type: integer)
- **ratioRank**: 概念板块涨跌幅排名(排名值越小涨幅越大) (Type: integer)
- **stockBxAmount**: 概念板块平盘股数量 (Type: integer)
- **stockDownAmount**: 概念板块下跌股数量 (Type: integer)
- **stockUpAmount**: 概念板块上涨股数量 (Type: integer)
- **totalValue**: 概念板块总市值 (Type: number)
## Original Response
- name: get_concept_stock_realtime_quote
description: 概念及关联成分股的最新实时日行情 - 获取指定概念板块的最新实时行情及其所有成分股的实时行情,用于快速洞察概念整体及其成分股的最新市场表现。
args:
- name: conceptCode
description: 概念代码
type: string
required: true
position: query
- name: conceptType
description: 概念类型 (jy-聚源、cls-财联社)
type: string
required: true
position: query
requestTemplate:
url: https://data-api.investoday.net/data/concept-quote/stock-realtime
method: GET
headers:
- key: "Content-Type"
value: "application/json"
- key: "appCode"
value: "{{.config.appCode}}"
responseTemplate:
prependBody: |+
# API Response Information
Below is the response from an API call. To help you understand the data, I've provided:
1. A detailed description of all fields in the response structure
2. The complete API response
## Response Structure
> Content-Type: application/json
## Original Response
- name: get_industry_basic
description: 行业列表 - 用户可输入感兴趣的行业名称、行业类别或行业层级,快速查询当前支持的全部行业信息。可按行业中文名(支持模糊搜索),指定行业体系(如“申万行业”),或根据层级(如一级、二级行业)筛选。返回结果包含行业名称、所属行业体系、行业指数、行业层级等基础信息,可作为后续行业行情、板块分析等业务的基础数据。
args:
- name: industryCode
description: 行业代码
type: string
position: query
- name: industryLevel
description: 行业等级
type: string
position: query
- name: industryName
description: 行业名称(模糊匹配)
type: string
position: query
- name: industryType
description: 行业分类体系(可选:申万行业体系-INDUS4_CL[默认])
type: string
position: query
requestTemplate:
url: https://data-api.investoday.net/data/industry/basic
method: GET
headers:
- key: "Content-Type"
value: "application/json"
- key: "appCode"
value: "{{.config.appCode}}"
responseTemplate:
prependBody: |+
# API Response Information
Below is the response from an API call. To help you understand the data, I've provided:
1. A detailed description of all fields in the response structure
2. The complete API response
## Response Structure
> Content-Type: application/json
- **code**: 状态码0 成功 (Type: integer)
- **data**: 数据 (Type: array)
- **data[].indexCode**: 行业的指数代码 (Type: string)
- **data[].industryCode**: 行业代码 (Type: string)
- **data[].industryLevel**: 行业等级 (Type: string)
- **data[].industryName**: 行业名称 (Type: string)
- **data[].industryType**: 行业类型(申万行业体系-INDUS4_CL (Type: string)
- **msg**: 错误信息 (Type: string)
## Original Response
- name: get_industry_realtime_quote
description: 行业的最新实时日行情 - 输入行业代码,获取该行业的最新实时指数、涨跌幅、成交量、总市值、成分股数量、涨停/上涨/下跌/平盘/总股数、涨跌幅排名、领涨股等关键行情数据,用于实时跟踪行业整体表现。
args:
- name: industryCode
description: 行业代码
type: string
required: true
position: query
requestTemplate:
url: https://data-api.investoday.net/data/industry-quote/realtime
method: GET
headers:
- key: "Content-Type"
value: "application/json"
- key: "appCode"
value: "{{.config.appCode}}"
responseTemplate:
prependBody: |+
# API Response Information
Below is the response from an API call. To help you understand the data, I've provided:
1. A detailed description of all fields in the response structure
2. The complete API response
## Response Structure
> Content-Type: application/json
- **changeRatio**: 行业涨跌幅 (Type: number)
- **changeRatio1W**: 行业一周涨跌幅 (Type: number)
- **dataTime**: 数据时间 (Type: string)
- **industryAmount**: 行业成分股数量 (Type: integer)
- **industryCode**: 行业代码 (Type: string)
- **industryName**: 行业名称 (Type: string)
- **leadUpStockCode**: 行业领涨股代码 (Type: string)
- **leadUpStockName**: 行业领涨股名称 (Type: string)
- **limitUpAmount**: 行业涨停股数量 (Type: integer)
- **price**: 行业指数 (Type: number)
- **ratioRank**: 行业涨跌幅排名 (Type: integer)
- **stockAmount**: 行业股票总数 (Type: integer)
- **stockBxAmount**: 行业平盘股数量 (Type: integer)
- **stockDownAmount**: 行业下跌股数量 (Type: integer)
- **stockUpAmount**: 行业上涨股数量 (Type: integer)
- **totalValue**: 行业总市值 (Type: number)
- **volume**: 行业成交量 (Type: integer)
## Original Response
- name: get_industry_stock_realtime_quote
description: 行业及关联成分股的最新实时日行情 - 输入行业代码,获取该行业的最新整体行情以及所有关联成分股的实时行情明细,用于全面分析行业及其成分股的最新市场表现。
args:
- name: industryCode
description: 行业代码
type: string
required: true
position: query
requestTemplate:
url: https://data-api.investoday.net/data/industry-quote/stock-realtime
method: GET
headers:
- key: "Content-Type"
value: "application/json"
- key: "appCode"
value: "{{.config.appCode}}"
responseTemplate:
prependBody: |+
# API Response Information
Below is the response from an API call. To help you understand the data, I've provided:
1. A detailed description of all fields in the response structure
2. The complete API response
## Response Structure
> Content-Type: application/json
## Original Response
- name: get_concept_basic
description: 概念列表 - 支持用户通过中文自然语言输入概念关键词、板块类别快速检索A股市场的全部概念板块信息。可模糊搜索概念名称也可指定来源分类如“财联社”、“聚源”等用于查询市场热点、主题板块归属为行业分析、主题投资等应用提供底层数据支持。
args:
- name: conceptCode
description: 概念代码
type: string
required: false
items:
type: string
position: query
- name: conceptName
description: 概念名称(可模糊匹配)
type: string
required: false
position: query
- name: conceptClass
description: "概念分类(可选: 财联社-C01[默认值],聚源-99)"
type: string
required: false
position: query
requestTemplate:
url: https://data-api.investoday.net/data/concept/basic
method: GET
headers:
- key: "Content-Type"
value: "application/json"
- key: "appCode"
value: "{{.config.appCode}}"
responseTemplate:
prependBody: |+
# API Response Information
Below is the response from an API call. To help you understand the data, I've provided:
1. A detailed description of all fields in the response structure
2. The complete API response
## Response Structure
> Content-Type: application/json
- **code**: 状态码0 成功 (Type: integer)
- **data**: 数据 (Type: array)
- **data[].conceptCategoryCode**: 概念类别代码1是财联社概念、0是聚源概念 (Type: string)
- **data[].conceptCategoryName**: 概念类别名称 (Type: string)
- **data[].conceptCode**: 概念代码 (Type: string)
- **data[].conceptLevel**: 概念级别 (Type: integer)
- **data[].conceptName**: 概念名称 (Type: string)
- **data[].parentConceptCode**: 父概念代码 (Type: string)
- **msg**: 错误信息 (Type: string)
## Original Response
- name: get_stock_concept_classifications
description: 股票所属概念 - 支持根据股票代码、概念代码、日期及存续状态,查询股票所属的全部概念,包括概念代码、详细说明、入选和剔除日期等信息,便于追溯股票的历史及当前概念归属关系和变动。
args:
- name: conceptClass
description: 概念类型 1是财联社概念、0是聚源概念
type: integer
required: true
position: body
- name: conceptCode
description: 概念代码(可输入多个,用逗号分隔)
type: string
required: false
items:
type: string
position: body
- name: existenceStatus
description: 当前概念存续状态 1、存续 0 、终止
type: integer
position: body
- name: removalDate
description: 剔除日期
type: string
position: body
- name: selectionDate
description: 入选日期
type: string
position: body
- name: stockCode
description: 股票代码(可输入多个,用逗号分隔)
type: array
required: true
items:
type: string
position: body
requestTemplate:
url: https://data-api.investoday.net/data/stocks/concept-classifications
method: POST
headers:
- key: "Content-Type"
value: "application/json"
- key: "appCode"
value: "{{.config.appCode}}"
responseTemplate:
prependBody: |+
# API Response Information
Below is the response from an API call. To help you understand the data, I've provided:
1. A detailed description of all fields in the response structure
2. The complete API response
## Response Structure
> Content-Type: application/json
- **code**: 状态码0 成功 (Type: integer)
- **data**: 数据 (Type: array)
- **data[].conceptCode**: 概念代码 (Type: string)
- **data[].conceptName**: 概念名称 (Type: string)
- **data[].description**: 说明 (Type: string)
- **data[].removalDate**: 剔除日期 (Type: string)
- **data[].selectionDate**: 入选日期 (Type: string)
- **data[].stockCode**: 股票代码 (Type: string)
- **msg**: 错误信息 (Type: string)
## Original Response

View File

@@ -3,7 +3,7 @@ module quark-search
go 1.24.1
require (
github.com/alibaba/higress/plugins/wasm-go v1.4.4-0.20250617125129-8731ba4ea3db
github.com/higress-group/wasm-go v1.0.0
github.com/tidwall/gjson v1.18.0
)

View File

@@ -6,8 +6,6 @@ github.com/Masterminds/semver/v3 v3.3.0 h1:B8LGeaivUe71a5qox1ICM/JLl0NqZSW5CHyL+
github.com/Masterminds/semver/v3 v3.3.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM=
github.com/Masterminds/sprig/v3 v3.3.0 h1:mQh0Yrg1XPo6vjYXgtf5OtijNAKJRNcTdOOGZe3tPhs=
github.com/Masterminds/sprig/v3 v3.3.0/go.mod h1:Zy1iXRYNqNLUolqCpL4uhk6SHUMAOSCzdgBfDb35Lz0=
github.com/alibaba/higress/plugins/wasm-go v1.4.4-0.20250617125129-8731ba4ea3db h1:hubkTsadmBj/FNfh9gI0glOWI7NEDQeF+UwX0EmO0Es=
github.com/alibaba/higress/plugins/wasm-go v1.4.4-0.20250617125129-8731ba4ea3db/go.mod h1:ixggLUTsFfFogWS6p95AzTfey/XbPccCWln1gyvkY0M=
github.com/bahlo/generic-list-go v0.2.0 h1:5sz/EEAK+ls5wF+NeqDpk5+iNdMDXrh3z3nPnH1Wvgk=
github.com/bahlo/generic-list-go v0.2.0/go.mod h1:2KvAjgMlE5NNynlg/5iLrrCCZ2+5xWbdbCW3pNTGyYg=
github.com/buger/jsonparser v1.1.1 h1:2PnMjfWD7wBILjqQbt530v576A/cAbQvEW9gGIpYMUs=
@@ -24,6 +22,8 @@ github.com/higress-group/gjson_template v0.0.0-20250413075336-4c4161ed428b h1:rR
github.com/higress-group/gjson_template v0.0.0-20250413075336-4c4161ed428b/go.mod h1:rU3M+Tq5VrQOo0dxpKHGb03Ty0sdWIZfAH+YCOACx/Y=
github.com/higress-group/proxy-wasm-go-sdk v0.0.0-20250611100342-5654e89a7a80 h1:xqmtTZI0JQ2O+Lg9/CE6c+Tw9KD6FnvWw8EpLVuuvfg=
github.com/higress-group/proxy-wasm-go-sdk v0.0.0-20250611100342-5654e89a7a80/go.mod h1:tRI2LfMudSkKHhyv1uex3BWzcice2s/l8Ah8axporfA=
github.com/higress-group/wasm-go v1.0.0 h1:4Ik5n3FsJ5+r13KLQl2ky+8NuAE8dfWQwoKxXYD2KAw=
github.com/higress-group/wasm-go v1.0.0/go.mod h1:ODBV27sjmhIW8Cqv3R74EUcTnbdkE69bmXBQFuRkY1M=
github.com/huandu/xstrings v1.5.0 h1:2ag3IFq9ZDANvthTwTiqSSZLjDc+BedvHPAp5tJy2TI=
github.com/huandu/xstrings v1.5.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
github.com/invopop/jsonschema v0.13.0 h1:KvpoAJWEjR3uD9Kbm2HWJmqsEaHt8lBUpd0qHcIi21E=

View File

@@ -17,7 +17,7 @@ package main
import (
"quark-search/tools"
"github.com/alibaba/higress/plugins/wasm-go/pkg/mcp"
"github.com/higress-group/wasm-go/pkg/mcp"
)
func main() {}

View File

@@ -15,8 +15,8 @@
package tools
import (
"github.com/alibaba/higress/plugins/wasm-go/pkg/mcp"
"github.com/alibaba/higress/plugins/wasm-go/pkg/mcp/server"
"github.com/higress-group/wasm-go/pkg/mcp"
"github.com/higress-group/wasm-go/pkg/mcp/server"
)
func LoadTools(server *mcp.MCPServer) server.Server {

View File

@@ -24,8 +24,8 @@ import (
"quark-search/config"
"github.com/alibaba/higress/plugins/wasm-go/pkg/mcp/server"
"github.com/alibaba/higress/plugins/wasm-go/pkg/mcp/utils"
"github.com/higress-group/wasm-go/pkg/mcp/server"
"github.com/higress-group/wasm-go/pkg/mcp/utils"
"github.com/tidwall/gjson"
)

View File

@@ -566,7 +566,7 @@ func (ctx *CommonHttpCtx[PluginConfig]) WriteUserAttributeToLogWithKey(key strin
newAttributeMap := map[string]interface{}{}
if string(preMarshalledJsonLogStr) != "" {
// e.g. {"field1":"value1","field2":"value2"}
preJsonLogStr := unmarshalStr(fmt.Sprintf(`"%s"`, string(preMarshalledJsonLogStr)))
preJsonLogStr := UnmarshalStr(fmt.Sprintf(`"%s"`, string(preMarshalledJsonLogStr)))
err := json.Unmarshal([]byte(preJsonLogStr), &newAttributeMap)
if err != nil {
ctx.plugin.vm.log.Warnf("Unmarshal failed, will overwrite %s, pre value is: %s", key, string(preMarshalledJsonLogStr))
@@ -580,7 +580,7 @@ func (ctx *CommonHttpCtx[PluginConfig]) WriteUserAttributeToLogWithKey(key strin
// e.g. {"field1":"value1","field2":2,"field3":"value3"}
jsonStr, _ := json.Marshal(newAttributeMap)
// e.g. {\"field1\":\"value1\",\"field2\":2,\"field3\":\"value3\"}
marshalledJsonStr := marshalStr(string(jsonStr))
marshalledJsonStr := MarshalStr(string(jsonStr))
if err := proxywasm.SetProperty([]string{key}, []byte(marshalledJsonStr)); err != nil {
ctx.plugin.vm.log.Warnf("failed to set %s in filter state, raw is %s, err is %v", key, marshalledJsonStr, err)
return err

View File

@@ -7,7 +7,7 @@ import (
"github.com/tidwall/gjson"
)
func unmarshalStr(marshalledJsonStr string) string {
func UnmarshalStr(marshalledJsonStr string) string {
// e.g. "{\"field1\":\"value1\",\"field2\":\"value2\"}"
var jsonStr string
err := json.Unmarshal([]byte(marshalledJsonStr), &jsonStr)
@@ -19,7 +19,7 @@ func unmarshalStr(marshalledJsonStr string) string {
return jsonStr
}
func marshalStr(raw string) string {
func MarshalStr(raw string) string {
// e.g. {"field1":"value1","field2":"value2"}
helper := map[string]string{
"placeholder": raw,

View File

@@ -8,7 +8,8 @@ COPY . .
WORKDIR /workspace/extensions/$PLUGIN_NAME
RUN if [ -f $PREBUILD ]; then sh $PREBUILD; fi
RUN cargo build --target wasm32-wasip1 $BUILD_OPTS \
&& cp target/wasm32-wasip1/release/*.wasm /main.wasm
&& cp target/wasm32-wasip1/release/*.wasm /main.wasm \
&& cargo clean
FROM scratch
COPY --from=builder /main.wasm plugin.wasm

View File

@@ -23,15 +23,19 @@ build:
lint-base:
cargo fmt --all --check
cargo clippy --workspace --all-features --all-targets
cargo clean
lint:
cargo fmt --all --check --manifest-path extensions/${PLUGIN_NAME}/Cargo.toml
cargo clippy --workspace --all-features --all-targets --manifest-path extensions/${PLUGIN_NAME}/Cargo.toml
cargo clean --manifest-path extensions/${PLUGIN_NAME}/Cargo.toml
test-base:
cargo test --lib
cargo clean
test:
cargo test --manifest-path extensions/${PLUGIN_NAME}/Cargo.toml
cargo clean --manifest-path extensions/${PLUGIN_NAME}/Cargo.toml
builder:
DOCKER_BUILDKIT=1 docker build \

View File

@@ -36,10 +36,10 @@ dependencies = [
"fancy-regex",
"grok",
"higress-wasm-rust",
"hmac-sha256",
"jieba-rs",
"jsonpath-rust",
"lazy_static",
"md5",
"proxy-wasm",
"rust-embed",
"serde",
@@ -295,6 +295,12 @@ dependencies = [
"serde_json",
]
[[package]]
name = "hmac-sha256"
version = "1.1.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ad6880c8d4a9ebf39c6e8b77007ce223f646a4d21ce29d99f70cb16420545425"
[[package]]
name = "http"
version = "1.2.0"
@@ -559,12 +565,6 @@ version = "0.4.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "04cbf5b083de1c7e0222a7a51dbfdba1cbe1c6ab0b15e29fff3f6c077fd9cd9f"
[[package]]
name = "md5"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"
[[package]]
name = "memchr"
version = "2.7.4"

View File

@@ -14,7 +14,7 @@ proxy-wasm = { git="https://github.com/higress-group/proxy-wasm-rust-sdk", branc
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
fancy-regex = "0"
md5 = "0"
hmac-sha256 = "1"
grok = "2"
lazy_static = "1"
jieba-rs = "0"

View File

@@ -0,0 +1,736 @@
// Copyright (c) 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::deny_word::DenyWord;
use crate::msg_win_openai::MsgWindow;
use fancy_regex::Regex;
use grok::patterns;
use higress_wasm_rust::log::Log;
use higress_wasm_rust::plugin_wrapper::{HttpContextWrapper, RootContextWrapper};
use higress_wasm_rust::request_wrapper::has_request_body;
use higress_wasm_rust::rule_matcher::{on_configure, RuleMatcher, SharedRuleMatcher};
use jsonpath_rust::{JsonPath, JsonPathValue};
use lazy_static::lazy_static;
use proxy_wasm::traits::{Context, HttpContext, RootContext};
use proxy_wasm::types::{Bytes, ContextType, DataAction, HeaderAction, LogLevel};
use serde::de::Error;
use serde::Deserialize;
use serde::Deserializer;
use serde_json::{json, Value};
use std::cell::RefCell;
use std::collections::{BTreeMap, HashMap, VecDeque};
use std::fmt::Write;
use std::ops::DerefMut;
use std::rc::{Rc, Weak};
use std::str::FromStr;
use std::vec;
proxy_wasm::main! {{
proxy_wasm::set_log_level(LogLevel::Trace);
proxy_wasm::set_root_context(|_|Box::new(AiDataMaskingRoot::new()));
}}
const PLUGIN_NAME: &str = "ai-data-masking";
const GROK_PATTERN: &str = r"%\{(?<name>(?<pattern>[A-z0-9]+)(?::(?<alias>[A-z0-9_:;\/\s\.]+))?)\}";
struct System {
deny_word: DenyWord,
grok_regex: Regex,
grok_patterns: BTreeMap<String, String>,
}
lazy_static! {
static ref SYSTEM: System = System::new();
}
struct AiDataMaskingRoot {
log: Log,
rule_matcher: SharedRuleMatcher<AiDataMaskingConfig>,
}
struct AiDataMasking {
weak: Weak<RefCell<Box<dyn HttpContextWrapper<AiDataMaskingConfig>>>>,
config: Option<Rc<AiDataMaskingConfig>>,
mask_map: HashMap<String, Option<String>>,
is_openai: bool,
is_openai_stream: Option<bool>,
stream: bool,
log: Log,
msg_window: MsgWindow,
char_window_size: usize,
byte_window_size: usize,
}
fn deserialize_regexp<'de, D>(deserializer: D) -> Result<Regex, D::Error>
where
D: Deserializer<'de>,
{
let value: Value = Deserialize::deserialize(deserializer)?;
if let Some(pattern) = value.as_str() {
let (p, _) = SYSTEM.grok_to_pattern(pattern);
if let Ok(reg) = Regex::new(&p) {
Ok(reg)
} else if let Ok(reg) = Regex::new(pattern) {
Ok(reg)
} else {
Err(Error::custom(format!("regexp error field {}", pattern)))
}
} else {
Err(Error::custom("regexp error not string".to_string()))
}
}
fn deserialize_type<'de, D>(deserializer: D) -> Result<Type, D::Error>
where
D: Deserializer<'de>,
{
let value: Value = Deserialize::deserialize(deserializer)?;
if let Some(t) = value.as_str() {
if t == "replace" {
Ok(Type::Replace)
} else if t == "hash" {
Ok(Type::Hash)
} else {
Err(Error::custom(format!("regexp error value {}", t)))
}
} else {
Err(Error::custom("type error not string".to_string()))
}
}
fn deserialize_denyword<'de, D>(deserializer: D) -> Result<DenyWord, D::Error>
where
D: Deserializer<'de>,
{
let value: Vec<String> = Deserialize::deserialize(deserializer)?;
Ok(DenyWord::from_iter(value))
}
fn deserialize_jsonpath<'de, D>(deserializer: D) -> Result<Vec<JsonPath>, D::Error>
where
D: Deserializer<'de>,
{
let value: Vec<String> = Deserialize::deserialize(deserializer)?;
let mut ret = Vec::new();
for v in value {
if v.is_empty() {
continue;
}
match JsonPath::from_str(&v) {
Ok(jp) => ret.push(jp),
Err(_) => return Err(Error::custom(format!("jsonpath error value {}", v))),
}
}
Ok(ret)
}
#[derive(Debug, Clone, PartialEq, Eq)]
enum Type {
Replace,
Hash,
}
#[derive(Debug, Deserialize, Clone)]
struct Rule {
#[serde(deserialize_with = "deserialize_regexp")]
regex: Regex,
#[serde(deserialize_with = "deserialize_type", alias = "type")]
type_: Type,
#[serde(default)]
restore: bool,
#[serde(default)]
value: String,
}
fn default_deny_openai() -> bool {
true
}
fn default_deny_raw() -> bool {
false
}
fn default_system_deny() -> bool {
false
}
fn default_deny_code() -> u16 {
200
}
fn default_deny_content_type() -> String {
"application/json".to_string()
}
fn default_deny_raw_message() -> String {
"{\"errmsg\":\"提问或回答中包含敏感词,已被屏蔽\"}".to_string()
}
fn default_deny_message() -> String {
"提问或回答中包含敏感词,已被屏蔽".to_string()
}
#[derive(Default, Debug, Deserialize, Clone)]
pub struct AiDataMaskingConfig {
#[serde(default = "default_deny_openai")]
deny_openai: bool,
#[serde(default = "default_deny_raw")]
deny_raw: bool,
#[serde(default, deserialize_with = "deserialize_jsonpath")]
deny_jsonpath: Vec<JsonPath>,
#[serde(default = "default_system_deny")]
system_deny: bool,
#[serde(default = "default_deny_code")]
deny_code: u16,
#[serde(default = "default_deny_message")]
deny_message: String,
#[serde(default = "default_deny_raw_message")]
deny_raw_message: String,
#[serde(default = "default_deny_content_type")]
deny_content_type: String,
#[serde(default)]
replace_roles: Vec<Rule>,
#[serde(deserialize_with = "deserialize_denyword", default = "DenyWord::empty")]
deny_words: DenyWord,
}
impl AiDataMaskingConfig {
fn check_message(&self, message: &str, log: &Log) -> bool {
if let Some(word) = self.deny_words.check(message) {
log.warn(&format!(
"custom deny word {} matched from {}",
word, message
));
return true;
} else if self.system_deny {
if let Some(word) = SYSTEM.deny_word.check(message) {
log.warn(&format!(
"system deny word {} matched from {}",
word, message
));
return true;
}
}
false
}
}
#[derive(Debug, Deserialize, Clone)]
struct Message {
#[serde(default)]
content: String,
#[serde(default)]
reasoning_content: String,
}
#[derive(Debug, Deserialize, Clone)]
struct Req {
#[serde(default)]
stream: bool,
messages: Vec<Message>,
}
#[derive(Default, Debug, Deserialize)]
struct ResMessage {
#[serde(default)]
message: Option<Message>,
}
#[derive(Default, Debug, Deserialize)]
struct Res {
#[serde(default)]
choices: Vec<ResMessage>,
}
static SYSTEM_PATTERNS: &[(&str, &str)] = &[
("MOBILE", r#"\d{8,11}"#),
("IDCARD", r#"\d{17}[0-9xX]|\d{15}"#),
];
impl System {
fn new() -> Self {
let grok_regex = Regex::new(GROK_PATTERN).unwrap();
let grok_patterns = BTreeMap::new();
let mut system = System {
deny_word: DenyWord::system(),
grok_regex,
grok_patterns,
};
system.init();
system
}
fn init(&mut self) {
let mut grok_temp_patterns = VecDeque::new();
for patterns in [patterns(), SYSTEM_PATTERNS] {
for &(key, value) in patterns {
if self.grok_regex.is_match(value).is_ok_and(|r| r) {
grok_temp_patterns.push_back((String::from(key), String::from(value)));
} else {
self.grok_patterns
.insert(String::from(key), String::from(value));
}
}
}
let mut last_ok: Option<String> = None;
while let Some((key, value)) = grok_temp_patterns.pop_front() {
if let Some(k) = &last_ok {
if k == &key {
break;
}
}
let (v, ok) = self.grok_to_pattern(&value);
if ok {
self.grok_patterns.insert(key, v);
last_ok = None;
} else {
if last_ok.is_none() {
last_ok = Some(key.clone());
}
grok_temp_patterns.push_back((key, v));
}
}
}
fn grok_to_pattern(&self, pattern: &str) -> (String, bool) {
let mut ok = true;
let mut ret = pattern.to_string();
for capture in self.grok_regex.captures_iter(pattern) {
if capture.is_err() {
ok = false;
continue;
}
let c = capture.unwrap();
if let (Some(full), Some(name)) = (c.get(0), c.name("pattern")) {
if let Some(p) = self.grok_patterns.get(name.as_str()) {
if let Some(alias) = c.name("alias") {
ret = ret.replace(full.as_str(), &format!("(?P<{}>{})", alias.as_str(), p));
} else {
ret = ret.replace(full.as_str(), p);
}
} else {
ok = false;
}
}
}
(ret, ok)
}
}
impl AiDataMaskingRoot {
fn new() -> Self {
AiDataMaskingRoot {
log: Log::new(PLUGIN_NAME.to_string()),
rule_matcher: Rc::new(RefCell::new(RuleMatcher::default())),
}
}
}
impl Context for AiDataMaskingRoot {}
impl RootContext for AiDataMaskingRoot {
fn on_configure(&mut self, plugin_configuration_size: usize) -> bool {
on_configure(
self,
plugin_configuration_size,
self.rule_matcher.borrow_mut().deref_mut(),
&self.log,
)
}
fn create_http_context(&self, context_id: u32) -> Option<Box<dyn HttpContext>> {
self.create_http_context_use_wrapper(context_id)
}
fn get_type(&self) -> Option<ContextType> {
Some(ContextType::HttpContext)
}
}
impl RootContextWrapper<AiDataMaskingConfig> for AiDataMaskingRoot {
fn rule_matcher(&self) -> &SharedRuleMatcher<AiDataMaskingConfig> {
&self.rule_matcher
}
fn create_http_context_wrapper(
&self,
_context_id: u32,
) -> Option<Box<dyn HttpContextWrapper<AiDataMaskingConfig>>> {
Some(Box::new(AiDataMasking {
weak: Weak::default(),
mask_map: HashMap::new(),
config: None,
is_openai: false,
is_openai_stream: None,
stream: false,
msg_window: MsgWindow::default(),
log: Log::new(PLUGIN_NAME.to_string()),
char_window_size: 0,
byte_window_size: 0,
}))
}
}
impl AiDataMasking {
fn check_message(&self, message: &str) -> bool {
if let Some(config) = &self.config {
config.check_message(message, self.log())
} else {
false
}
}
fn msg_to_response(&self, msg: &str, raw_msg: &str, content_type: &str) -> (String, String) {
if !self.is_openai {
(raw_msg.to_string(), content_type.to_string())
} else if self.stream {
(
format!(
"data:{}\n\n",
json!({"choices": [{"index": 0, "delta": {"role": "assistant", "content": msg}}], "usage": {}})
),
"text/event-stream;charset=UTF-8".to_string(),
)
} else {
(
json!({"choices": [{"index": 0, "message": {"role": "assistant", "content": msg}}], "usage": {}}).to_string(),
"application/json".to_string()
)
}
}
fn deny(&mut self, in_response: bool) -> DataAction {
if in_response && self.stream {
self.replace_http_response_body(&[]);
return DataAction::Continue;
}
let (deny_code, (deny_message, content_type)) = if let Some(config) = &self.config {
(
config.deny_code,
self.msg_to_response(
&config.deny_message,
&config.deny_raw_message,
&config.deny_content_type,
),
)
} else {
(
default_deny_code(),
self.msg_to_response(
&default_deny_message(),
&default_deny_raw_message(),
&default_deny_content_type(),
),
)
};
if in_response {
self.replace_http_response_body(deny_message.as_bytes());
return DataAction::Continue;
}
self.send_http_response(
deny_code as u32,
vec![("Content-Type", &content_type)],
Some(deny_message.as_bytes()),
);
DataAction::StopIterationAndBuffer
}
fn replace_request_msg(&mut self, message: &str) -> String {
let config = self.config.as_ref().unwrap();
let mut msg = message.to_string();
for rule in &config.replace_roles {
let mut replace_pair = Vec::new();
if rule.type_ == Type::Replace && !rule.restore {
msg = rule.regex.replace_all(&msg, &rule.value).to_string();
} else {
for mc in rule.regex.find_iter(&msg) {
if mc.is_err() {
continue;
}
let m = mc.unwrap();
let from_word = m.as_str();
let to_word = match rule.type_ {
Type::Hash => {
let digest = hmac_sha256::Hash::hash(from_word.as_bytes());
digest.iter().fold(String::new(), |mut output, b| {
let _ = write!(output, "{b:02x}");
output
})
}
Type::Replace => rule.regex.replace(from_word, &rule.value).to_string(),
};
if to_word.len() > self.byte_window_size {
self.byte_window_size = to_word.len();
}
if to_word.chars().count() > self.char_window_size {
self.char_window_size = to_word.chars().count();
}
replace_pair.push((from_word.to_string(), to_word.clone()));
if rule.restore && !to_word.is_empty() {
match self.mask_map.entry(to_word) {
std::collections::hash_map::Entry::Occupied(mut e) => {
e.insert(None);
}
std::collections::hash_map::Entry::Vacant(e) => {
e.insert(Some(from_word.to_string()));
}
}
}
}
for (from_word, to_word) in replace_pair {
msg = msg.replace(&from_word, &to_word);
}
}
}
if msg != message {
self.log()
.debug(&format!("replace_request_msg from {} to {}", message, msg));
}
msg
}
}
impl Context for AiDataMasking {}
impl HttpContext for AiDataMasking {
fn on_http_request_headers(
&mut self,
_num_headers: usize,
_end_of_stream: bool,
) -> HeaderAction {
if has_request_body() {
self.set_http_request_header("Content-Length", None);
HeaderAction::StopIteration
} else {
HeaderAction::Continue
}
}
fn on_http_response_headers(
&mut self,
_num_headers: usize,
_end_of_stream: bool,
) -> HeaderAction {
self.set_http_response_header("Content-Length", None);
HeaderAction::Continue
}
fn on_http_response_body(&mut self, body_size: usize, end_of_stream: bool) -> DataAction {
if !self.stream {
return DataAction::Continue;
}
if body_size > 0 {
if let Some(body) = self.get_http_response_body(0, body_size) {
if self.is_openai && self.is_openai_stream.is_none() {
self.is_openai_stream = Some(body.starts_with(b"data:"));
}
self.msg_window
.push(&body, self.is_openai_stream.unwrap_or_default());
let mut deny = false;
let log = Log::new(PLUGIN_NAME.to_string());
for message in self.msg_window.messages_iter_mut() {
if let Ok(mut msg) = String::from_utf8(message.clone()) {
if let Some(config) = &self.config {
if config.check_message(&msg, &log) {
deny = true;
break;
}
}
if !self.mask_map.is_empty() {
for (from_word, to_word) in self.mask_map.iter() {
if let Some(to) = to_word {
msg = msg.replace(from_word, to);
}
}
}
message.clear();
message.extend_from_slice(msg.as_bytes());
}
}
if deny {
return self.deny(true);
}
}
}
let new_body = if end_of_stream {
self.msg_window.finish(self.is_openai_stream.unwrap())
} else {
self.msg_window.pop(
self.char_window_size * 2,
self.byte_window_size * 2,
self.is_openai_stream.unwrap(),
)
};
self.replace_http_response_body(&new_body);
DataAction::Continue
}
}
impl HttpContextWrapper<AiDataMaskingConfig> for AiDataMasking {
fn init_self_weak(
&mut self,
self_weak: std::rc::Weak<RefCell<Box<dyn HttpContextWrapper<AiDataMaskingConfig>>>>,
) {
self.weak = self_weak;
}
fn log(&self) -> &Log {
&self.log
}
fn on_config(&mut self, config: Rc<AiDataMaskingConfig>) {
self.config = Some(config.clone());
}
fn cache_request_body(&self) -> bool {
true
}
fn cache_response_body(&self) -> bool {
!self.stream
}
fn on_http_request_complete_body(&mut self, req_body: &Bytes) -> DataAction {
if self.config.is_none() {
return DataAction::Continue;
}
let config = self.config.as_ref().unwrap();
let mut req_body = match serde_json::from_slice::<Value>(req_body) {
Ok(r) => r.to_string(),
Err(_) => {
if let Ok(r) = String::from_utf8(req_body.clone()) {
r
} else {
return DataAction::Continue;
}
}
};
if config.deny_openai {
if let Ok(req) = serde_json::from_str::<Req>(req_body.as_str()) {
self.is_openai = true;
self.stream = req.stream;
for msg in req.messages {
if self.check_message(&msg.content)
|| self.check_message(&msg.reasoning_content)
{
return self.deny(false);
}
let new_content = self.replace_request_msg(&msg.content);
let new_reasoning_content = self.replace_request_msg(&msg.reasoning_content);
if new_content != msg.content {
req_body = req_body.replace(
&Value::String(msg.content).to_string(),
&Value::String(new_content).to_string(),
);
}
if new_reasoning_content != msg.reasoning_content {
req_body = req_body.replace(
&Value::String(msg.reasoning_content).to_string(),
&Value::String(new_reasoning_content).to_string(),
);
}
}
self.replace_http_request_body(req_body.as_bytes());
return DataAction::Continue;
}
}
if !config.deny_jsonpath.is_empty() {
if let Ok(json) = serde_json::from_str::<Value>(req_body.as_str()) {
for jsonpath in config.deny_jsonpath.clone() {
for v in jsonpath.find_slice(&json) {
if let JsonPathValue::Slice(d, _) = v {
if let Some(s) = d.as_str() {
if self.check_message(s) {
return self.deny(false);
}
let content = s.to_string();
let new_content = self.replace_request_msg(&content);
if new_content != content {
req_body = req_body.replace(
&Value::String(content).to_string(),
&Value::String(new_content).to_string(),
);
}
}
}
}
}
self.replace_http_request_body(req_body.as_bytes());
return DataAction::Continue;
}
}
if config.deny_raw {
if self.check_message(&req_body) {
return self.deny(false);
}
let new_body = self.replace_request_msg(&req_body);
if new_body != req_body {
self.replace_http_request_body(new_body.as_bytes())
}
return DataAction::Continue;
}
DataAction::Continue
}
fn on_http_response_complete_body(&mut self, res_body: &Bytes) -> DataAction {
if self.config.is_none() {
return DataAction::Continue;
}
let config = self.config.as_ref().unwrap();
let mut res_body = match serde_json::from_slice::<Value>(res_body) {
Ok(r) => r.to_string(),
Err(_) => {
if let Ok(r) = String::from_utf8(res_body.clone()) {
r
} else {
return DataAction::Continue;
}
}
};
if config.deny_openai && self.is_openai {
if let Ok(res) = serde_json::from_str::<Res>(res_body.as_str()) {
for msg in res.choices {
if let Some(message) = msg.message {
if self.check_message(&message.content)
|| self.check_message(&message.reasoning_content)
{
return self.deny(true);
}
if self.mask_map.is_empty() {
continue;
}
let mut new_content = message.content.clone();
let mut new_reasoning_content = message.reasoning_content.clone();
for (from_word, to_word) in self.mask_map.iter() {
if let Some(to) = to_word {
new_content = new_content.replace(from_word, to);
new_reasoning_content =
new_reasoning_content.replace(from_word, to);
}
}
if new_content != message.content {
res_body = res_body.replace(
&Value::String(message.content).to_string(),
&Value::String(new_content).to_string(),
);
}
if new_reasoning_content != message.reasoning_content {
res_body = res_body.replace(
&Value::String(message.reasoning_content).to_string(),
&Value::String(new_reasoning_content).to_string(),
);
}
}
}
self.replace_http_response_body(res_body.as_bytes());
return DataAction::Continue;
}
}
if config.deny_raw {
if self.check_message(&res_body) {
return self.deny(true);
}
if !self.mask_map.is_empty() {
for (from_word, to_word) in self.mask_map.iter() {
if let Some(to) = to_word {
res_body = res_body.replace(from_word, to);
}
}
}
self.replace_http_response_body(res_body.as_bytes());
return DataAction::Continue;
}
DataAction::Continue
}
}

View File

@@ -1,8 +1,25 @@
// Copyright (c) 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashSet;
use jieba_rs::Jieba;
use rust_embed::Embed;
use crate::Asset;
#[derive(Embed)]
#[folder = "res/"]
struct Asset;
#[derive(Default, Debug, Clone)]
pub(crate) struct DenyWord {

View File

@@ -1,4 +1,4 @@
// Copyright (c) 2023 Alibaba Group Holding Ltd.
// Copyright (c) 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -12,705 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
mod ai_data_masking;
mod deny_word;
mod msg_win_openai;
mod msg_window;
use crate::deny_word::DenyWord;
use crate::msg_window::MsgWindow;
use fancy_regex::Regex;
use grok::patterns;
use higress_wasm_rust::log::Log;
use higress_wasm_rust::plugin_wrapper::{HttpContextWrapper, RootContextWrapper};
use higress_wasm_rust::request_wrapper::has_request_body;
use higress_wasm_rust::rule_matcher::{on_configure, RuleMatcher, SharedRuleMatcher};
use jsonpath_rust::{JsonPath, JsonPathValue};
use lazy_static::lazy_static;
use proxy_wasm::traits::{Context, HttpContext, RootContext};
use proxy_wasm::types::{Bytes, ContextType, DataAction, HeaderAction, LogLevel};
use rust_embed::Embed;
use serde::de::Error;
use serde::Deserializer;
use serde::{Deserialize, Serialize};
use serde_json::{json, Value};
use std::cell::RefCell;
use std::collections::{BTreeMap, HashMap, VecDeque};
use std::ops::DerefMut;
use std::rc::Rc;
use std::str::FromStr;
use std::vec;
proxy_wasm::main! {{
proxy_wasm::set_log_level(LogLevel::Trace);
proxy_wasm::set_root_context(|_|Box::new(AiDataMaskingRoot::new()));
}}
const PLUGIN_NAME: &str = "ai-data-masking";
const GROK_PATTERN: &str = r"%\{(?<name>(?<pattern>[A-z0-9]+)(?::(?<alias>[A-z0-9_:;\/\s\.]+))?)\}";
#[derive(Embed)]
#[folder = "res/"]
struct Asset;
struct System {
deny_word: DenyWord,
grok_regex: Regex,
grok_patterns: BTreeMap<String, String>,
}
lazy_static! {
static ref SYSTEM: System = System::new();
}
struct AiDataMaskingRoot {
log: Log,
rule_matcher: SharedRuleMatcher<AiDataMaskingConfig>,
}
struct AiDataMasking {
config: Option<Rc<AiDataMaskingConfig>>,
mask_map: HashMap<String, Option<String>>,
is_openai: bool,
is_openai_stream: Option<bool>,
stream: bool,
log: Log,
msg_window: MsgWindow,
char_window_size: usize,
byte_window_size: usize,
}
fn deserialize_regexp<'de, D>(deserializer: D) -> Result<Regex, D::Error>
where
D: Deserializer<'de>,
{
let value: Value = Deserialize::deserialize(deserializer)?;
if let Some(pattern) = value.as_str() {
let (p, _) = SYSTEM.grok_to_pattern(pattern);
if let Ok(reg) = Regex::new(&p) {
Ok(reg)
} else if let Ok(reg) = Regex::new(pattern) {
Ok(reg)
} else {
Err(Error::custom(format!("regexp error field {}", pattern)))
}
} else {
Err(Error::custom("regexp error not string".to_string()))
}
}
fn deserialize_type<'de, D>(deserializer: D) -> Result<Type, D::Error>
where
D: Deserializer<'de>,
{
let value: Value = Deserialize::deserialize(deserializer)?;
if let Some(t) = value.as_str() {
if t == "replace" {
Ok(Type::Replace)
} else if t == "hash" {
Ok(Type::Hash)
} else {
Err(Error::custom(format!("regexp error value {}", t)))
}
} else {
Err(Error::custom("type error not string".to_string()))
}
}
fn deserialize_denyword<'de, D>(deserializer: D) -> Result<DenyWord, D::Error>
where
D: Deserializer<'de>,
{
let value: Vec<String> = Deserialize::deserialize(deserializer)?;
Ok(DenyWord::from_iter(value))
}
fn deserialize_jsonpath<'de, D>(deserializer: D) -> Result<Vec<JsonPath>, D::Error>
where
D: Deserializer<'de>,
{
let value: Vec<String> = Deserialize::deserialize(deserializer)?;
let mut ret = Vec::new();
for v in value {
if v.is_empty() {
continue;
}
match JsonPath::from_str(&v) {
Ok(jp) => ret.push(jp),
Err(_) => return Err(Error::custom(format!("jsonpath error value {}", v))),
}
}
Ok(ret)
}
#[derive(Debug, Clone, PartialEq, Eq)]
enum Type {
Replace,
Hash,
}
#[derive(Debug, Deserialize, Clone)]
struct Rule {
#[serde(deserialize_with = "deserialize_regexp")]
regex: Regex,
#[serde(deserialize_with = "deserialize_type", alias = "type")]
type_: Type,
#[serde(default)]
restore: bool,
#[serde(default)]
value: String,
}
fn default_deny_openai() -> bool {
true
}
fn default_deny_raw() -> bool {
false
}
fn default_system_deny() -> bool {
false
}
fn default_deny_code() -> u16 {
200
}
fn default_deny_content_type() -> String {
"application/json".to_string()
}
fn default_deny_raw_message() -> String {
"{\"errmsg\":\"提问或回答中包含敏感词,已被屏蔽\"}".to_string()
}
fn default_deny_message() -> String {
"提问或回答中包含敏感词,已被屏蔽".to_string()
}
#[derive(Default, Debug, Deserialize, Clone)]
pub struct AiDataMaskingConfig {
#[serde(default = "default_deny_openai")]
deny_openai: bool,
#[serde(default = "default_deny_raw")]
deny_raw: bool,
#[serde(default, deserialize_with = "deserialize_jsonpath")]
deny_jsonpath: Vec<JsonPath>,
#[serde(default = "default_system_deny")]
system_deny: bool,
#[serde(default = "default_deny_code")]
deny_code: u16,
#[serde(default = "default_deny_message")]
deny_message: String,
#[serde(default = "default_deny_raw_message")]
deny_raw_message: String,
#[serde(default = "default_deny_content_type")]
deny_content_type: String,
#[serde(default)]
replace_roles: Vec<Rule>,
#[serde(deserialize_with = "deserialize_denyword", default = "DenyWord::empty")]
deny_words: DenyWord,
}
#[derive(Debug, Deserialize, Clone)]
struct Message {
#[serde(default)]
content: String,
}
#[derive(Debug, Deserialize, Clone)]
struct Req {
#[serde(default)]
stream: bool,
messages: Vec<Message>,
}
#[derive(Default, Debug, Deserialize)]
struct ResMessage {
#[serde(default)]
message: Option<Message>,
#[serde(default)]
delta: Option<Message>,
}
#[derive(Default, Debug, Deserialize, Serialize, Clone)]
struct Usage {
#[serde(default)]
completion_tokens: i32,
#[serde(default)]
prompt_tokens: i32,
#[serde(default)]
total_tokens: i32,
}
impl Usage {
pub fn add(&mut self, usage: &Usage) {
self.completion_tokens += usage.completion_tokens;
self.prompt_tokens += usage.prompt_tokens;
self.total_tokens += usage.total_tokens;
}
pub fn reset(&mut self) {
self.completion_tokens = 0;
self.prompt_tokens = 0;
self.total_tokens = 0;
}
}
#[derive(Default, Debug, Deserialize)]
struct Res {
#[serde(default)]
choices: Vec<ResMessage>,
#[serde(default)]
usage: Usage,
}
static SYSTEM_PATTERNS: &[(&str, &str)] = &[
("MOBILE", r#"\d{8,11}"#),
("IDCARD", r#"\d{17}[0-9xX]|\d{15}"#),
];
impl System {
fn new() -> Self {
let grok_regex = Regex::new(GROK_PATTERN).unwrap();
let grok_patterns = BTreeMap::new();
let mut system = System {
deny_word: DenyWord::system(),
grok_regex,
grok_patterns,
};
system.init();
system
}
fn init(&mut self) {
let mut grok_temp_patterns = VecDeque::new();
for patterns in [patterns(), SYSTEM_PATTERNS] {
for &(key, value) in patterns {
if self.grok_regex.is_match(value).is_ok_and(|r| r) {
grok_temp_patterns.push_back((String::from(key), String::from(value)));
} else {
self.grok_patterns
.insert(String::from(key), String::from(value));
}
}
}
let mut last_ok: Option<String> = None;
while let Some((key, value)) = grok_temp_patterns.pop_front() {
if let Some(k) = &last_ok {
if k == &key {
break;
}
}
let (v, ok) = self.grok_to_pattern(&value);
if ok {
self.grok_patterns.insert(key, v);
last_ok = None;
} else {
if last_ok.is_none() {
last_ok = Some(key.clone());
}
grok_temp_patterns.push_back((key, v));
}
}
}
fn grok_to_pattern(&self, pattern: &str) -> (String, bool) {
let mut ok = true;
let mut ret = pattern.to_string();
for capture in self.grok_regex.captures_iter(pattern) {
if capture.is_err() {
ok = false;
continue;
}
let c = capture.unwrap();
if let (Some(full), Some(name)) = (c.get(0), c.name("pattern")) {
if let Some(p) = self.grok_patterns.get(name.as_str()) {
if let Some(alias) = c.name("alias") {
ret = ret.replace(full.as_str(), &format!("(?P<{}>{})", alias.as_str(), p));
} else {
ret = ret.replace(full.as_str(), p);
}
} else {
ok = false;
}
}
}
(ret, ok)
}
}
impl AiDataMaskingRoot {
fn new() -> Self {
AiDataMaskingRoot {
log: Log::new(PLUGIN_NAME.to_string()),
rule_matcher: Rc::new(RefCell::new(RuleMatcher::default())),
}
}
}
impl Context for AiDataMaskingRoot {}
impl RootContext for AiDataMaskingRoot {
fn on_configure(&mut self, plugin_configuration_size: usize) -> bool {
on_configure(
self,
plugin_configuration_size,
self.rule_matcher.borrow_mut().deref_mut(),
&self.log,
)
}
fn create_http_context(&self, context_id: u32) -> Option<Box<dyn HttpContext>> {
self.create_http_context_use_wrapper(context_id)
}
fn get_type(&self) -> Option<ContextType> {
Some(ContextType::HttpContext)
}
}
impl RootContextWrapper<AiDataMaskingConfig> for AiDataMaskingRoot {
fn rule_matcher(&self) -> &SharedRuleMatcher<AiDataMaskingConfig> {
&self.rule_matcher
}
fn create_http_context_wrapper(
&self,
_context_id: u32,
) -> Option<Box<dyn HttpContextWrapper<AiDataMaskingConfig>>> {
Some(Box::new(AiDataMasking {
mask_map: HashMap::new(),
config: None,
is_openai: false,
is_openai_stream: None,
stream: false,
msg_window: MsgWindow::new(),
log: Log::new(PLUGIN_NAME.to_string()),
char_window_size: 0,
byte_window_size: 0,
}))
}
}
impl AiDataMasking {
fn check_message(&self, message: &str) -> bool {
if let Some(config) = &self.config {
if let Some(word) = config.deny_words.check(message) {
self.log().warn(&format!(
"custom deny word {} matched from {}",
word, message
));
return true;
} else if config.system_deny {
if let Some(word) = SYSTEM.deny_word.check(message) {
self.log().warn(&format!(
"system deny word {} matched from {}",
word, message
));
return true;
}
}
}
false
}
fn msg_to_response(&self, msg: &str, raw_msg: &str, content_type: &str) -> (String, String) {
if !self.is_openai {
(raw_msg.to_string(), content_type.to_string())
} else if self.stream {
(
format!(
"data:{}\n\n",
json!({"choices": [{"index": 0, "delta": {"role": "assistant", "content": msg}}], "usage": {}})
),
"text/event-stream;charset=UTF-8".to_string(),
)
} else {
(
json!({"choices": [{"index": 0, "message": {"role": "assistant", "content": msg}}], "usage": {}}).to_string(),
"application/json".to_string()
)
}
}
fn deny(&mut self, in_response: bool) -> DataAction {
if in_response && self.stream {
self.replace_http_response_body(&[]);
return DataAction::Continue;
}
let (deny_code, (deny_message, content_type)) = if let Some(config) = &self.config {
(
config.deny_code,
self.msg_to_response(
&config.deny_message,
&config.deny_raw_message,
&config.deny_content_type,
),
)
} else {
(
default_deny_code(),
self.msg_to_response(
&default_deny_message(),
&default_deny_raw_message(),
&default_deny_content_type(),
),
)
};
if in_response {
self.replace_http_response_body(deny_message.as_bytes());
return DataAction::Continue;
}
self.send_http_response(
deny_code as u32,
vec![("Content-Type", &content_type)],
Some(deny_message.as_bytes()),
);
DataAction::StopIterationAndBuffer
}
fn replace_request_msg(&mut self, message: &str) -> String {
let config = self.config.as_ref().unwrap();
let mut msg = message.to_string();
for rule in &config.replace_roles {
let mut replace_pair = Vec::new();
if rule.type_ == Type::Replace && !rule.restore {
msg = rule.regex.replace_all(&msg, &rule.value).to_string();
} else {
for mc in rule.regex.find_iter(&msg) {
if mc.is_err() {
continue;
}
let m = mc.unwrap();
let from_word = m.as_str();
let to_word = match rule.type_ {
Type::Hash => {
let digest = md5::compute(from_word.as_bytes());
format!("{:x}", digest)
}
Type::Replace => rule.regex.replace(from_word, &rule.value).to_string(),
};
if to_word.len() > self.byte_window_size {
self.byte_window_size = to_word.len();
}
if to_word.chars().count() > self.char_window_size {
self.char_window_size = to_word.chars().count();
}
replace_pair.push((from_word.to_string(), to_word.clone()));
if rule.restore && !to_word.is_empty() {
match self.mask_map.entry(to_word) {
std::collections::hash_map::Entry::Occupied(mut e) => {
e.insert(None);
}
std::collections::hash_map::Entry::Vacant(e) => {
e.insert(Some(from_word.to_string()));
}
}
}
}
for (from_word, to_word) in replace_pair {
msg = msg.replace(&from_word, &to_word);
}
}
}
if msg != message {
self.log()
.debug(&format!("replace_request_msg from {} to {}", message, msg));
}
msg
}
}
impl Context for AiDataMasking {}
impl HttpContext for AiDataMasking {
fn on_http_request_headers(
&mut self,
_num_headers: usize,
_end_of_stream: bool,
) -> HeaderAction {
if has_request_body() {
self.set_http_request_header("Content-Length", None);
HeaderAction::StopIteration
} else {
HeaderAction::Continue
}
}
fn on_http_response_headers(
&mut self,
_num_headers: usize,
_end_of_stream: bool,
) -> HeaderAction {
self.set_http_response_header("Content-Length", None);
HeaderAction::Continue
}
fn on_http_response_body(&mut self, body_size: usize, end_of_stream: bool) -> DataAction {
if !self.stream {
return DataAction::Continue;
}
if body_size > 0 {
if let Some(body) = self.get_http_response_body(0, body_size) {
if self.is_openai && self.is_openai_stream.is_none() {
self.is_openai_stream = Some(body.starts_with(b"data:"));
}
self.msg_window.push(&body, self.is_openai_stream.unwrap());
if let Ok(mut msg) = String::from_utf8(self.msg_window.message.clone()) {
if self.check_message(&msg) {
return self.deny(true);
}
if !self.mask_map.is_empty() {
for (from_word, to_word) in self.mask_map.iter() {
if let Some(to) = to_word {
msg = msg.replace(from_word, to);
}
}
}
self.msg_window.message = msg.as_bytes().to_vec();
}
}
}
let new_body = if end_of_stream {
self.msg_window.finish(self.is_openai_stream.unwrap())
} else {
self.msg_window.pop(
self.char_window_size * 2,
self.byte_window_size * 2,
self.is_openai_stream.unwrap(),
)
};
self.replace_http_response_body(&new_body);
DataAction::Continue
}
}
impl HttpContextWrapper<AiDataMaskingConfig> for AiDataMasking {
fn log(&self) -> &Log {
&self.log
}
fn on_config(&mut self, config: Rc<AiDataMaskingConfig>) {
self.config = Some(config.clone());
}
fn cache_request_body(&self) -> bool {
true
}
fn cache_response_body(&self) -> bool {
!self.stream
}
fn on_http_request_complete_body(&mut self, req_body: &Bytes) -> DataAction {
if self.config.is_none() {
return DataAction::Continue;
}
let config = self.config.as_ref().unwrap();
let mut req_body = match String::from_utf8(req_body.clone()) {
Ok(r) => r,
Err(_) => return DataAction::Continue,
};
if config.deny_openai {
if let Ok(r) = serde_json::from_str(req_body.as_str()) {
let req: Req = r;
self.is_openai = true;
self.stream = req.stream;
for msg in req.messages {
if self.check_message(&msg.content) {
return self.deny(false);
}
let new_content = self.replace_request_msg(&msg.content);
if new_content != msg.content {
if let (Ok(from), Ok(to)) = (
serde_json::to_string(&msg.content),
serde_json::to_string(&new_content),
) {
req_body = req_body.replace(&from, &to);
}
}
}
self.replace_http_request_body(req_body.as_bytes());
return DataAction::Continue;
}
}
if !config.deny_jsonpath.is_empty() {
if let Ok(r) = serde_json::from_str(req_body.as_str()) {
let json: Value = r;
for jsonpath in config.deny_jsonpath.clone() {
for v in jsonpath.find_slice(&json) {
if let JsonPathValue::Slice(d, _) = v {
if let Some(s) = d.as_str() {
if self.check_message(s) {
return self.deny(false);
}
let content = s.to_string();
let new_content = self.replace_request_msg(&content);
if new_content != content {
if let (Ok(from), Ok(to)) = (
serde_json::to_string(&content),
serde_json::to_string(&new_content),
) {
req_body = req_body.replace(&from, &to);
}
}
}
}
}
}
self.replace_http_request_body(req_body.as_bytes());
return DataAction::Continue;
}
}
if config.deny_raw {
if self.check_message(&req_body) {
return self.deny(false);
}
let new_body = self.replace_request_msg(&req_body);
if new_body != req_body {
self.replace_http_request_body(new_body.as_bytes())
}
return DataAction::Continue;
}
DataAction::Continue
}
fn on_http_response_complete_body(&mut self, res_body: &Bytes) -> DataAction {
if self.config.is_none() {
return DataAction::Continue;
}
let config = self.config.as_ref().unwrap();
let mut res_body = match String::from_utf8(res_body.clone()) {
Ok(r) => r,
Err(_) => {
return DataAction::Continue;
}
};
if config.deny_openai && self.is_openai {
if let Ok(r) = serde_json::from_str(res_body.as_str()) {
let res: Res = r;
for msg in res.choices {
if let Some(meesage) = msg.message {
if self.check_message(&meesage.content) {
return self.deny(true);
}
if self.mask_map.is_empty() {
continue;
}
let mut m = meesage.content.clone();
for (from_word, to_word) in self.mask_map.iter() {
if let Some(to) = to_word {
m = m.replace(from_word, to);
}
}
if m != meesage.content {
if let (Ok(from), Ok(to)) = (
serde_json::to_string(&meesage.content),
serde_json::to_string(&m),
) {
res_body = res_body.replace(&from, &to);
}
}
}
}
self.replace_http_response_body(res_body.as_bytes());
return DataAction::Continue;
}
}
if config.deny_raw {
if self.check_message(&res_body) {
return self.deny(true);
}
if !self.mask_map.is_empty() {
for (from_word, to_word) in self.mask_map.iter() {
if let Some(to) = to_word {
res_body = res_body.replace(from_word, to);
}
}
}
self.replace_http_response_body(res_body.as_bytes());
return DataAction::Continue;
}
DataAction::Continue
}
}
mod number_merge;

View File

@@ -0,0 +1,356 @@
// Copyright (c) 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashMap;
use higress_wasm_rust::event_stream::EventStream;
use serde::Deserialize;
use serde_json::Value;
use crate::msg_window::MessageWindow;
use crate::number_merge::NumberMerge;
#[derive(PartialEq, Eq, Clone, Copy)]
enum MsgFlag {
None,
Content,
ReasoningContent,
}
impl Default for MsgFlag {
fn default() -> Self {
Self::None
}
}
#[derive(Deserialize)]
struct Delta {
#[serde(default)]
content: Option<String>,
#[serde(default)]
reasoning_content: Option<String>,
}
#[derive(Deserialize)]
struct Choices {
#[serde(default)]
index: i64,
#[serde(default)]
delta: Option<Delta>,
#[serde(default)]
finish_reason: Option<String>,
}
impl Delta {
fn get_flag_msg(&self, default_flag: &MsgFlag) -> (MsgFlag, &[u8]) {
if let Some(msg) = &self.content {
if !msg.is_empty() {
return (MsgFlag::Content, msg.as_bytes());
}
}
if let Some(msg) = &self.reasoning_content {
if !msg.is_empty() {
return (MsgFlag::ReasoningContent, msg.as_bytes());
}
}
(*default_flag, &[])
}
}
const USAGE_PATH: &str = "usage";
const CHOICES_PATH: &str = "choices";
type MessageLine = Vec<(MsgFlag, Vec<u8>)>;
#[derive(Default)]
struct MessageWindowOpenAi {
message_window: MessageWindow,
ret_messages: MessageLine,
flag: MsgFlag,
last_value: Value,
finish_reason: Option<String>,
}
impl MessageWindowOpenAi {
fn update(
&mut self,
data: &[u8],
flag: MsgFlag,
value: &Value,
finish_reason: &Option<String>,
) {
self.last_value = value.clone();
if data.is_empty() {
return;
}
if self.flag == MsgFlag::None {
self.flag = flag;
}
if self.flag != flag {
let last_flag = core::mem::replace(&mut self.flag, flag);
let msg = self.message_window.finish();
self.ret_messages.push((last_flag, msg));
}
self.message_window.update(data);
if let Some(fr) = finish_reason {
self.finish_reason = Some(fr.clone());
}
}
fn gen_value(&self, flag: &MsgFlag, msg: &[u8], finish: bool) -> Value {
let mut ret = self.last_value.clone();
match flag {
MsgFlag::Content => {
ret["delta"]["content"] = Value::String(String::from_utf8_lossy(msg).to_string());
if let Some(m) = ret["delta"].as_object_mut() {
m.remove("reasoning_content");
}
}
MsgFlag::ReasoningContent => {
ret["delta"]["reasoning_content"] =
Value::String(String::from_utf8_lossy(msg).to_string());
ret["delta"]["content"] = Value::String(String::new());
}
_ => {}
}
if finish {
ret["finish_reason"] = self
.finish_reason
.as_ref()
.map_or(Value::Null, |v| Value::String(v.to_string()));
} else {
ret["finish_reason"] = Value::Null;
}
ret
}
fn messages_to_value(&mut self) -> Vec<Value> {
let mut ret = Vec::new();
for (flag, msg) in core::mem::take(&mut self.ret_messages) {
ret.push(self.gen_value(&flag, &msg, false));
}
ret
}
fn pop(&mut self, char_window_size: usize, byte_window_size: usize) -> Vec<Value> {
let mut ret = self.messages_to_value();
let msg = self.message_window.pop(char_window_size, byte_window_size);
if !msg.is_empty() {
ret.push(self.gen_value(&self.flag, &msg, false));
}
ret
}
fn finish(&mut self) -> Vec<Value> {
let mut ret = self.messages_to_value();
let msg = self.message_window.finish();
let flag = core::mem::replace(&mut self.flag, MsgFlag::None);
ret.push(self.gen_value(&flag, &msg, true));
ret
}
fn iter_mut(&mut self) -> impl Iterator<Item = &mut Vec<u8>> {
self.ret_messages
.iter_mut()
.map(|(_, msg)| msg)
.chain(self.message_window.iter_mut())
}
}
#[derive(Default)]
pub(crate) struct MsgWindow {
stream_parser: EventStream,
base_message_window: MessageWindow,
message_windows: HashMap<i64, MessageWindowOpenAi>,
last_value: Value,
usage: NumberMerge,
}
impl MsgWindow {
fn update_event(&mut self, event: Vec<u8>) -> Option<Vec<u8>> {
if event.is_empty() || !event.starts_with(b"data:") {
Some(event)
} else if let Ok(res) = serde_json::from_slice::<Value>(&event[b"data:".len()..]) {
self.last_value = res;
if let Some(r) = self.last_value.as_object() {
if let Some(v) = r.get(USAGE_PATH) {
self.usage.add(v);
}
if let Some(v) = r.get(CHOICES_PATH) {
if let Some(a) = v.as_array() {
for item in a {
if let Ok(c) = serde_json::from_value::<Choices>(item.clone()) {
if let Some(d) = &c.delta {
let mw = self.message_windows.entry(c.index).or_default();
let (flag, msg) = d.get_flag_msg(&mw.flag);
mw.update(msg, flag, item, &c.finish_reason);
}
}
}
}
}
}
None
} else if event.starts_with(b"data: [DONE]") {
None
} else {
Some(event)
}
}
fn push_base(&mut self, data: &[u8]) {
self.base_message_window.update(data);
}
pub(crate) fn push(&mut self, data: &[u8], is_openai: bool) {
if is_openai {
self.stream_parser.update(data.to_vec());
while let Some(event) = self.stream_parser.next() {
if let Some(msg) = self.update_event(event) {
self.push_base(&msg);
}
}
} else {
self.push_base(data);
}
}
pub(crate) fn pop(
&mut self,
char_window_size: usize,
byte_window_size: usize,
is_openai: bool,
) -> Vec<u8> {
if !is_openai {
return self
.base_message_window
.pop(char_window_size, byte_window_size);
}
let mut ret = Vec::new();
for mw in self.message_windows.values_mut() {
for value in mw.pop(char_window_size, byte_window_size) {
let usage = self.usage.finish();
let mut ret_value = self.last_value.clone();
ret_value[CHOICES_PATH] = Value::Array(vec![value]);
ret_value[USAGE_PATH] = usage;
ret.extend(format!("data: {}\n\n", ret_value).as_bytes())
}
}
ret
}
pub(crate) fn finish(&mut self, is_openai: bool) -> Vec<u8> {
if !is_openai {
return self.base_message_window.finish();
}
if let Some(event) = self.stream_parser.flush() {
self.update_event(event);
}
let mut ret = Vec::new();
for mw in &mut self.message_windows.values_mut() {
for value in mw.finish() {
let usage = self.usage.finish();
let mut ret_value = self.last_value.clone();
ret_value[CHOICES_PATH] = Value::Array(vec![value]);
ret_value[USAGE_PATH] = usage;
ret.extend(format!("data: {}\n\n", ret_value).as_bytes())
}
}
ret
}
pub(crate) fn messages_iter_mut(&mut self) -> impl Iterator<Item = &mut Vec<u8>> {
self.base_message_window.iter_mut().chain(
self.message_windows
.values_mut()
.flat_map(|mw| mw.iter_mut()),
)
}
}
#[cfg(test)]
mod tests {
use rust_embed::Embed;
use super::*;
#[derive(Embed)]
#[folder = "test/"]
struct Asset;
#[derive(Deserialize)]
struct Res {
choices: Vec<Choices>,
}
impl Res {
fn get_text(&self) -> (String, String) {
let mut content = String::new();
let mut reasoning_content = String::new();
for choice in self.choices.iter() {
if let Some(delta) = &choice.delta {
if let Some(c) = &delta.content {
content += c;
}
if let Some(rc) = &delta.reasoning_content {
reasoning_content += rc;
}
}
}
(content, reasoning_content)
}
}
#[test]
fn test_msg() {
let mut msg_win = MsgWindow::default();
let data = raw_message("raw_message.txt");
let mut buffer = Vec::new();
for line in data.split("\n") {
msg_win.push(line.as_bytes(), true);
msg_win.push(b"\n\n", true);
for message in msg_win.messages_iter_mut() {
if let Ok(mut msg) = String::from_utf8(message.clone()) {
msg = msg.replace("Higress", "***higress***");
message.clear();
message.extend_from_slice(msg.as_bytes());
}
}
buffer.extend(msg_win.pop(7, 7, true));
}
buffer.extend(msg_win.finish(true));
let mut message = String::new();
let mut reasoning_message = String::new();
for line in buffer.split(|&x| x == b'\n') {
if line.is_empty() {
continue;
}
assert!(line.starts_with(b"data:"));
if line.starts_with(b"data: [DONE]") {
continue;
}
let des = serde_json::from_slice::<Res>(&line[b"data:".len()..]);
assert!(des.is_ok());
let res = des.unwrap();
let (c, rc) = res.get_text();
message.push_str(&c);
reasoning_message.push_str(&rc);
}
let res = "***higress*** 是一个基于 Istio 的高性能服务网格数据平面项目,旨在提供高吞吐量、低延迟和可扩展的服务通信管理。它为企业级应用提供了丰富的流量治理功能,如负载均衡、熔断、限流等,并支持多协议代理(包括 HTTP/1.1, HTTP/2, gRPC。***higress*** 的设计目标是优化 Istio 在大规模集群中的性能表现,满足高并发场景下的需求。";
assert_eq!(message, res);
assert_eq!(reasoning_message, res);
}
fn raw_message(file_name: &str) -> String {
if let Some(file) = Asset::get(file_name) {
if let Ok(data) = std::str::from_utf8(file.data.as_ref()) {
return data.to_string();
}
}
String::new()
}
}

View File

@@ -1,56 +1,28 @@
use higress_wasm_rust::event_stream::EventStream;
use serde_json::json;
use crate::{Res, Usage};
// Copyright (c) 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#[derive(Default)]
pub(crate) struct MsgWindow {
stream_parser: EventStream,
pub(crate) message: Vec<u8>,
usage: Usage,
pub(crate) struct MessageWindow {
message: Vec<u8>,
}
impl MsgWindow {
pub fn new() -> Self {
MsgWindow::default()
impl MessageWindow {
pub(crate) fn update(&mut self, data: &[u8]) {
self.message.extend(data);
}
fn update_event(&mut self, event: Vec<u8>) -> Option<Vec<u8>> {
if event.is_empty() || !event.starts_with(b"data:") {
Some(event)
} else if let Ok(res) = serde_json::from_slice::<Res>(&event[b"data:".len()..]) {
for choice in &res.choices {
if let Some(delta) = &choice.delta {
self.message.extend(delta.content.as_bytes());
}
}
self.usage.add(&res.usage);
None
} else if event.starts_with(b"data: [DONE]") {
None
} else {
Some(event)
}
}
pub fn push(&mut self, data: &[u8], is_openai: bool) {
if is_openai {
self.stream_parser.update(data.to_vec());
while let Some(event) = self.stream_parser.next() {
if let Some(msg) = self.update_event(event) {
self.message.extend(msg);
}
}
} else {
self.message.extend(data);
}
}
pub fn pop(
&mut self,
char_window_size: usize,
byte_window_size: usize,
is_openai: bool,
) -> Vec<u8> {
pub(crate) fn pop(&mut self, char_window_size: usize, byte_window_size: usize) -> Vec<u8> {
if let Ok(message) = String::from_utf8(self.message.clone()) {
let chars = message.chars().collect::<Vec<char>>();
if chars.len() <= char_window_size {
@@ -64,277 +36,31 @@ impl MsgWindow {
.collect::<String>()
.as_bytes()
.to_vec();
if is_openai {
let usage = self.usage.clone();
self.usage.reset();
format!(
"data: {}\n\n",
json!({"choices": [{"index": 0, "delta": {"role": "assistant", "content": ret}}], "usage": usage})
).as_bytes().to_vec()
} else {
ret.as_bytes().to_vec()
}
ret.as_bytes().to_vec()
} else {
let ret = self.message[..self.message.len() - byte_window_size].to_vec();
self.message = self.message[self.message.len() - byte_window_size..].to_vec();
ret
}
}
pub fn finish(&mut self, is_openai: bool) -> Vec<u8> {
if let Some(event) = self.stream_parser.flush() {
self.update_event(event);
}
if self.message.is_empty() {
Vec::new()
} else if is_openai {
format!(
"data: {}\n\ndata: [DONE]\n\n",
json!({"choices": [{"index": 0, "delta": {"role": "assistant", "content": String::from_utf8_lossy(&self.message)}}], "usage": self.usage})
).as_bytes().to_vec()
} else {
self.message.clone()
}
pub(crate) fn finish(&mut self) -> Vec<u8> {
core::mem::take(&mut self.message)
}
pub(crate) fn iter_mut(&mut self) -> impl Iterator<Item = &mut Vec<u8>> {
std::iter::once(&mut self.message)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_msg() {
let mut msg_win = MsgWindow::default();
let data = r#"data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"H"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"ig"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"ress"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":" 是"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"一个"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"基于"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":" Ist"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"io"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":" 的"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"高性能"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"服务"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"网格"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"数据"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"平面"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"项目"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":""},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"旨在"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"提供"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"高"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"吞"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"吐"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"量"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"、"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"低"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"延迟"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"和"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"可"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"扩展"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"的服务"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"通信"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"管理"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"。"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"它"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"为企业"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"级"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"应用"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"提供了"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"丰富的"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"流量"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"治理"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"功能"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":""},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"如"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"负载"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"均衡"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"、"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"熔"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"断"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"、"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"限"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"流"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"等"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":",并"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"支持"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"多"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"协议"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"代理"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":""},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"包括"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":" HTTP"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"/"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"1"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"."},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"1"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":","},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":" HTTP"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"/"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"2"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":","},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":" g"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"RPC"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":")。"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"H"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"ig"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"ress"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":" 的"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"设计"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"目标"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"是"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"优化"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":" Ist"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"io"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":" 在"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"大规模"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"集群"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"中的"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"性能"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"表现"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":""},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"满足"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"高"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"并发"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"场景"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"下的"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"需求"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"。"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"usage":{}}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{}}],"usage":{"prompt_tokens":372,"completion_tokens":9,"total_tokens":381}}
data: [DONE]
"#;
let mut buffer = Vec::new();
for line in data.split("\n\n") {
msg_win.push(line.as_bytes(), true);
msg_win.push(b"\n\n", true);
if let Ok(mut msg) = String::from_utf8(msg_win.message.clone()) {
msg = msg.replace("Higress", "***higress***");
msg_win.message = msg.as_bytes().to_vec();
}
buffer.extend(msg_win.pop(7, 7, true));
}
buffer.extend(msg_win.finish(true));
let mut message = String::new();
for line in buffer.split(|&x| x == b'\n') {
if line.is_empty() {
continue;
}
assert!(line.starts_with(b"data:"));
if line.starts_with(b"data: [DONE]") {
continue;
}
let des = serde_json::from_slice(&line[b"data:".len()..]);
assert!(des.is_ok());
let res: Res = des.unwrap();
for choice in &res.choices {
if let Some(delta) = &choice.delta {
message.push_str(&delta.content);
}
}
}
assert_eq!(message, "***higress*** 是一个基于 Istio 的高性能服务网格数据平面项目,旨在提供高吞吐量、低延迟和可扩展的服务通信管理。它为企业级应用提供了丰富的流量治理功能,如负载均衡、熔断、限流等,并支持多协议代理(包括 HTTP/1.1, HTTP/2, gRPC。***higress*** 的设计目标是优化 Istio 在大规模集群中的性能表现,满足高并发场景下的需求。");
fn test_msg_window() {
let mut msg_window = super::MessageWindow::default();
msg_window.update(b"hello world");
assert_eq!(msg_window.pop(5, 5), b"hello ");
assert_eq!(msg_window.pop(5, 5), b"");
msg_window.update(b"hello world");
assert_eq!(msg_window.pop(5, 5), b"worldhello ");
assert_eq!(msg_window.finish(), b"world");
}
}

View File

@@ -0,0 +1,59 @@
// Copyright (c) 2025 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use serde_json::{json, Number, Value};
fn merge_number(target: &mut Value, add: &Value) {
if target.is_null() {
if add.is_object() {
*target = json!({});
} else if add.is_number() {
*target = Value::from(0i64);
} else {
return;
}
}
match (target, add) {
(Value::Object(t), Value::Object(a)) => {
for (key, value) in a.iter() {
if let Some(v) = t.get_mut(key) {
merge_number(v, value);
} else {
t.insert(key.clone(), value.clone());
}
}
}
(Value::Number(t), Value::Number(a)) => {
*t = Number::from(
t.as_i64()
.unwrap_or_default()
.saturating_add(a.as_i64().unwrap_or_default()),
);
}
_ => {}
}
}
#[derive(Default, Clone)]
pub(crate) struct NumberMerge {
value: Value,
}
impl NumberMerge {
pub(crate) fn add(&mut self, number: &Value) {
merge_number(&mut self.value, number);
}
pub(crate) fn finish(&mut self) -> Value {
core::mem::replace(&mut self.value, Value::Null)
}
}

View File

@@ -0,0 +1,196 @@
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"H"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"ig"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"ress"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":" 是"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"一个"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"基于"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":" Ist"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"io"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":" 的"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"高性能"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"服务"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"网格"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"数据"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"平面"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"项目"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":""},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"旨在"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"提供"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"高"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"吞"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"吐"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"量"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"、"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"低"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"延迟"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"和"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"可"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"扩展"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"的服务"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"通信"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"管理"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"。"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"它"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"为企业"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"级"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"应用"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"提供了"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"丰富的"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"流量"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"治理"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"功能"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":""},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"如"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"负载"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"均衡"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"、"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"熔"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"断"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"、"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"限"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"流"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"等"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":",并"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"支持"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"多"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"协议"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"代理"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":""},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"包括"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":" HTTP"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"/"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"1"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"."},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"1"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":","},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":" HTTP"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"/"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"2"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":","},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":" g"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"RPC"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":")。"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"H"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"ig"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"ress"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":" 的"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"设计"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"目标"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"是"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"优化"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":" Ist"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"io"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":" 在"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"大规模"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"集群"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"中的"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"性能"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"表现"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":""},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"满足"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"高"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"并发"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"场景"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"下的"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"需求"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":"。"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"H"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"ig"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"ress"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":" 是"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"一个"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"基于"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":" Ist"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"io"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":" 的"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"高性能"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"服务"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"网格"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"数据"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"平面"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"项目"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":""},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"旨在"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"提供"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"高"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"吞"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"吐"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"量"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"、"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"低"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"延迟"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"和"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"可"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"扩展"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"的服务"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"通信"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"管理"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"。"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"它"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"为企业"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"级"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"应用"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"提供了"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"丰富的"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"流量"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"治理"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"功能"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":""},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"如"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"负载"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"均衡"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"、"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"熔"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"断"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"、"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"限"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"流"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"等"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":",并"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"支持"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"多"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"协议"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"代理"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":""},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"包括"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":" HTTP"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"/"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"1"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"."},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"1"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":","},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":" HTTP"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"/"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"2"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":","},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":" g"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"RPC"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":")。"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"H"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"ig"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"ress"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":" 的"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"设计"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"目标"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"是"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"优化"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":" Ist"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"io"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":" 在"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"大规模"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"集群"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"中的"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"性能"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"表现"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":""},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"满足"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"高"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"并发"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"场景"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"下的"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"需求"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"。"},"finish_reason":null}]}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":null,"finish_reason":"stop"}],"usage":null}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":1,"delta":{},"finish_reason":"stop"}],"usage":{}}
data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{}}],"usage":{"prompt_tokens":372,"completion_tokens":9,"total_tokens":381}}
data: [DONE]

View File

@@ -141,12 +141,13 @@ func (n *NacosRegistryClient) listMcpServerConfigs() ([]model.ConfigItem, error)
})
if err != nil {
mcpServerLog.Errorf("List mcp server configs for page size %d, page number %d error %v", currentPageNum, DefaultNacosListConfigPageSize)
mcpServerLog.Errorf("List mcp server configs for page size %d, page number %d error %v", currentPageNum, DefaultNacosListConfigPageSize, err)
break
}
if configPage == nil {
mcpServerLog.Errorf("List mcp server configs for page size %d, page number %d null %v", currentPageNum, DefaultNacosListConfigPageSize)
continue
mcpServerLog.Errorf("List mcp server configs for page size %d, page number %d null", currentPageNum, DefaultNacosListConfigPageSize)
break
}
result = append(result, configPage.PageItems...)

View File

@@ -50,6 +50,8 @@ const (
DefaultNacosCacheDir = "/var/log/nacos/log/mcp/cache"
DefaultNacosNotLoadCache = true
DefaultNacosLogMaxAge = 3
DefaultNacosLogMaxSize = 64
DefaultNacosLogMaxBackups = 3
DefaultRefreshInterval = time.Second * 30
DefaultRefreshIntervalLimit = time.Second * 10
)
@@ -128,6 +130,8 @@ func NewWatcher(cache memory.Cache, opts ...WatcherOption) (provider.Watcher, er
constant.WithNotLoadCacheAtStart(DefaultNacosNotLoadCache),
constant.WithLogRollingConfig(&constant.ClientLogRollingConfig{
MaxAge: DefaultNacosLogMaxAge,
MaxSize: DefaultNacosLogMaxSize,
MaxBackups: DefaultNacosLogMaxBackups,
}),
constant.WithUpdateCacheWhenEmpty(w.updateCacheWhenEmpty),
constant.WithNamespaceId(w.NacosNamespaceId),

View File

@@ -183,7 +183,7 @@ var RustWasmPluginsAiDataMasking = suite.ConformanceTest{
"replace.raw.com",
true,
[]byte("{\"test\":[{\"test\":\"test\", \"test1\":\"127.0.0.1 admin@gmail.com sk-12345\"}]}"),
[]byte("{\"res\":\"***.***.***.*** 48a7e98a91d93896d8dac522c5853948 ****@gmail.com\"}"),
[]byte("{\"res\":\"***.***.***.*** c11e7177eb60c80cf983ddf8ca98f2dc1272d4c612204ce9bedd2460b18939cc ****@gmail.com\"}"),
))
t.Run("WasmPlugins ai-data-masking", func(t *testing.T) {

View File

@@ -62,7 +62,7 @@ spec:
config:
headers:
- Content-Type=application/json
"body": "{\"choices\":[{\"index\":0,\"message\":{\"role\":\"assistant\",\"content\":\"***.***.***.*** 48a7e98a91d93896d8dac522c5853948 ****@gmail.com\"}}],\"usage\":{}}"
"body": "{\"choices\":[{\"index\":0,\"message\":{\"role\":\"assistant\",\"content\":\"***.***.***.*** c11e7177eb60c80cf983ddf8ca98f2dc1272d4c612204ce9bedd2460b18939cc ****@gmail.com\"}}],\"usage\":{}}"
- domain:
- system_deny.openai.com
@@ -93,7 +93,7 @@ spec:
config:
headers:
- Content-Type=application/json
"body": "{\"res\":\"***.***.***.*** 48a7e98a91d93896d8dac522c5853948 ****@gmail.com\"}"
"body": "{\"res\":\"***.***.***.*** c11e7177eb60c80cf983ddf8ca98f2dc1272d4c612204ce9bedd2460b18939cc ****@gmail.com\"}"
- domain:
- system_deny.raw.com
config:

View File

@@ -88,6 +88,11 @@ checkDesiredVersion() {
elif [ "${HAS_WGET}" == "true" ]; then
VERSION=$(wget $latest_release_url -O - 2>&1 | grep 'href="/alibaba/higress/releases/tag/v[0-9]*.[0-9]*.[0-9]*\"' | sed -E 's/.*\/alibaba\/higress\/releases\/tag\/(v[0-9\.]+)".*/\1/g' | head -1)
fi
if [ "$VERSION" == "" ]; then
echo "Failed to determine latest version. Please check network or set VERSION manually."
exit 1
fi
fi
}