feat(ai-security-guard): structured x_higress deny response, error-path metrics, and AI logging (#3894)

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
Co-authored-by: rinfx <yucheng.lxr@alibaba-inc.com>
This commit is contained in:
JianweiWang
2026-05-29 10:45:10 +08:00
committed by GitHub
parent 385f8d8b4e
commit c21a38e783
14 changed files with 2181 additions and 195 deletions

View File

@@ -6,12 +6,16 @@ import (
"fmt"
"regexp"
"strings"
"time"
"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-security-guard/utils"
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
"github.com/higress-group/wasm-go/pkg/wrapper"
"github.com/tidwall/gjson"
)
type OpenAIDenyResponseFormat string
const (
MaxRisk = "max"
HighRisk = "high"
@@ -35,10 +39,31 @@ const (
WaterMarkType = "waterMark"
// Default configurations
OpenAIResponseFormat = `{"id": "%s","object":"chat.completion","model":"from-security-guard","choices":[{"index":0,"message":{"role":"assistant","content":"%s"},"logprobs":null,"finish_reason":"stop"}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}`
OpenAIStreamResponseChunk = `data:{"id":"%s","object":"chat.completion.chunk","model":"from-security-guard","choices":[{"index":0,"delta":{"role":"assistant","content":"%s"},"logprobs":null,"finish_reason":null}]}`
OpenAIStreamResponseEnd = `data:{"id":"%s","object":"chat.completion.chunk","model":"from-security-guard","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}`
OpenAIStreamResponseFormat = OpenAIStreamResponseChunk + "\n\n" + OpenAIStreamResponseEnd + "\n\n" + `data: [DONE]`
// Template parameter order:
// OpenAIResponseFormatLegacy: id, created (unix sec), content
// OpenAIResponseFormatStructured: id, created (unix sec), content, x_higress_guardrail JSON
// OpenAIStreamResponseChunk: id, created, content
// OpenAIStreamResponseEndLegacy: id, created
// OpenAIStreamResponseEndStructured: id, created, x_higress_guardrail JSON
// OpenAIStreamResponseFormatLegacy: id, created, content, id, created
// OpenAIStreamResponseFormatStructured: id, created, content, id, created, x_higress_guardrail JSON
// `created` is required by openai-python (ChatCompletion.created is non-Optional).
// `finish_reason: "stop"` preserves wire-level compatibility with downstream
// consumers (LangChain / LiteLLM / SDKs / BI dashboards) that treat `stop` as
// "valid completion"; the moderation-event signal lives in the nested
// `choices[0].x_higress_guardrail` block (denyCode / blockedDetails) instead.
OpenAIResponseFormatLegacy = `{"id":"%s","object":"chat.completion","created":%d,"model":"from-security-guard","choices":[{"index":0,"message":{"role":"assistant","content":"%s"},"logprobs":null,"finish_reason":"stop"}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}`
OpenAIResponseFormatStructured = `{"id":"%s","object":"chat.completion","created":%d,"model":"from-security-guard","choices":[{"index":0,"message":{"role":"assistant","content":"%s"},"logprobs":null,"finish_reason":"stop","x_higress_guardrail":%s}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}`
OpenAIStreamResponseChunk = `data:{"id":"%s","object":"chat.completion.chunk","created":%d,"model":"from-security-guard","choices":[{"index":0,"delta":{"role":"assistant","content":"%s"},"logprobs":null,"finish_reason":null}]}`
OpenAIStreamResponseEndLegacy = `data:{"id":"%s","object":"chat.completion.chunk","created":%d,"model":"from-security-guard","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}`
OpenAIStreamResponseEndStructured = `data:{"id":"%s","object":"chat.completion.chunk","created":%d,"model":"from-security-guard","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop","x_higress_guardrail":%s}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}`
OpenAIStreamResponseFormatLegacy = OpenAIStreamResponseChunk + "\n\n" + OpenAIStreamResponseEndLegacy + "\n\n" + `data: [DONE]`
OpenAIStreamResponseFormatStructured = OpenAIStreamResponseChunk + "\n\n" + OpenAIStreamResponseEndStructured + "\n\n" + `data: [DONE]`
OpenAIDenyResponseFormatLegacy OpenAIDenyResponseFormat = "legacy"
OpenAIDenyResponseFormatStructured OpenAIDenyResponseFormat = "structured"
OpenAIDenyResponseFormatConsumerScopeError = "openAIDenyResponseFormat must be configured at plugin global scope, not under consumerRiskLevel"
DefaultDenyCode = 200
DefaultDenyMessage = "很抱歉,我无法回答您的问题"
@@ -184,6 +209,7 @@ type AISecurityConfig struct {
DenyCode int64
DenyMessage string
ProtocolOriginal bool
OpenAIDenyResponseFormat OpenAIDenyResponseFormat
RiskLevelBar string
ContentModerationLevelBar string
PromptAttackLevelBar string
@@ -296,6 +322,16 @@ func (config *AISecurityConfig) Parse(json gjson.Result) error {
config.CheckRequestImage = json.Get("checkRequestImage").Bool()
config.CheckResponse = json.Get("checkResponse").Bool()
config.ProtocolOriginal = json.Get("protocol").String() == "original"
if obj := json.Get("openAIDenyResponseFormat"); obj.Exists() {
switch OpenAIDenyResponseFormat(obj.String()) {
case OpenAIDenyResponseFormatLegacy:
config.OpenAIDenyResponseFormat = OpenAIDenyResponseFormatLegacy
case OpenAIDenyResponseFormatStructured:
config.OpenAIDenyResponseFormat = OpenAIDenyResponseFormatStructured
default:
return errors.New("invalid openAIDenyResponseFormat, value must be one of [legacy, structured]")
}
}
config.DenyMessage = json.Get("denyMessage").String()
if obj := json.Get("denyCode"); obj.Exists() {
config.DenyCode = obj.Int()
@@ -411,6 +447,9 @@ func (config *AISecurityConfig) Parse(json gjson.Result) error {
for k, v := range item.Map() {
m[k] = v.Value()
}
if _, ok := m["openAIDenyResponseFormat"]; ok {
return errors.New(OpenAIDenyResponseFormatConsumerScopeError)
}
consumerName, ok1 := m["name"]
matchType, ok2 := m["matchType"]
if !ok1 || !ok2 {
@@ -531,6 +570,7 @@ func (config *AISecurityConfig) SetDefaultValues() {
config.ApiType = ApiTextGeneration
config.ProviderType = ProviderOpenAI
config.RiskAction = "block"
config.OpenAIDenyResponseFormat = OpenAIDenyResponseFormatLegacy
}
func (config *AISecurityConfig) IncrementCounter(metricName string, inc uint64) {
@@ -951,6 +991,151 @@ func BuildDenyResponseBody(response Response, config AISecurityConfig, consumer
return json.Marshal(body)
}
// ResolveDenyMessage returns the human-readable deny text used both for
// non-original OpenAI wrappers (message.content / delta.content) and for the
// x_higress_guardrail.denyMessage field, ensuring the two stay aligned.
func ResolveDenyMessage(config AISecurityConfig) string {
if config.DenyMessage != "" {
return config.DenyMessage
}
return DefaultDenyMessage
}
// BuildOpenAIDenyResponseBody builds the guardrail JSON object embedded by the
// outer OpenAI structured template as choices[0].x_higress_guardrail. Its shape
// mirrors DenyResponseBody, but DenyMessage is filled via ResolveDenyMessage so
// the field is always present and consistent with the rendered content.
// Code is sourced from config.DenyCode so that x_higress_guardrail.code
// consistently represents "the HTTP status this gateway returns to the client",
// aligned with BuildOpenAIFallbackDenyResponseBody.
func BuildOpenAIDenyResponseBody(response Response, config AISecurityConfig, consumer string) ([]byte, error) {
details := GetUnacceptableDetail(response.Data, config, consumer)
blocked := make([]BlockedDetail, 0, len(details))
for _, d := range details {
blocked = append(blocked, BlockedDetail{
Type: d.Type,
Level: d.Level,
})
}
body := DenyResponseBody{
Code: int(config.DenyCode),
DenyMessage: ResolveDenyMessage(config),
BlockedDetails: blocked,
}
return json.Marshal(body)
}
// BuildOpenAIFallbackDenyResponseBody builds the guardrail JSON object embedded
// by the outer OpenAI structured template as choices[0].x_higress_guardrail for
// mask→block fallback paths. The fallback is triggered by
// ReplaceJsonFieldTextContent failure or empty desensitization, so there is no
// upstream Response object to derive blockedDetails from.
func BuildOpenAIFallbackDenyResponseBody(config AISecurityConfig) ([]byte, error) {
body := DenyResponseBody{
Code: int(config.DenyCode),
DenyMessage: ResolveDenyMessage(config),
BlockedDetails: []BlockedDetail{},
}
return json.Marshal(body)
}
func openAIDenyContentType(isStream bool) [][2]string {
if isStream {
return [][2]string{{"content-type", "text/event-stream;charset=UTF-8"}}
}
return [][2]string{{"content-type", "application/json"}}
}
// BuildOpenAIDenyData builds the complete OpenAI-formatted deny response bytes
// (structured or legacy). Callers that need raw bytes (e.g. streaming response
// handlers using InjectEncodedDataToFilterChain) use this directly; callers that
// want a full SendHttpResponse dispatch should use SendDenyResponse instead.
func BuildOpenAIDenyData(config AISecurityConfig, response Response, consumer string, isStream bool) ([]byte, error) {
if config.OpenAIDenyResponseFormat == OpenAIDenyResponseFormatStructured {
guardrailBody, err := BuildOpenAIDenyResponseBody(response, config, consumer)
if err != nil {
return nil, err
}
marshalledDenyMessage := wrapper.MarshalStr(ResolveDenyMessage(config))
randomID := utils.GenerateRandomChatID()
createdTs := time.Now().Unix()
if isStream {
return []byte(fmt.Sprintf(OpenAIStreamResponseFormatStructured, randomID, createdTs, marshalledDenyMessage, randomID, createdTs, string(guardrailBody))), nil
}
return []byte(fmt.Sprintf(OpenAIResponseFormatStructured, randomID, createdTs, marshalledDenyMessage, string(guardrailBody))), nil
}
denyBody, err := BuildDenyResponseBody(response, config, consumer)
if err != nil {
return nil, err
}
marshalledDenyBody := wrapper.MarshalStr(string(denyBody))
randomID := utils.GenerateRandomChatID()
createdTs := time.Now().Unix()
if isStream {
return []byte(fmt.Sprintf(OpenAIStreamResponseFormatLegacy, randomID, createdTs, marshalledDenyBody, randomID, createdTs)), nil
}
return []byte(fmt.Sprintf(OpenAIResponseFormatLegacy, randomID, createdTs, marshalledDenyBody)), nil
}
// SendDenyResponse dispatches a deny HTTP response in the appropriate format
// (ProtocolOriginal, Structured, or Legacy). It returns an error only if
// building the response body fails; the caller should handle the error
// (e.g. log, mark guardrail error, resume request/response).
func SendDenyResponse(config AISecurityConfig, response Response, consumer string, isStream bool) error {
if config.ProtocolOriginal {
denyBody, err := BuildDenyResponseBody(response, config, consumer)
if err != nil {
return err
}
proxywasm.SendHttpResponse(uint32(config.DenyCode), [][2]string{{"content-type", "application/json"}}, denyBody, -1)
return nil
}
data, err := BuildOpenAIDenyData(config, response, consumer, isStream)
if err != nil {
return err
}
proxywasm.SendHttpResponse(uint32(config.DenyCode), openAIDenyContentType(isStream), data, -1)
return nil
}
// SendFallbackDenyResponse dispatches a fallback deny HTTP response when no
// upstream Response object is available (e.g. mask-to-block on replace error
// or empty desensitization). For ProtocolOriginal it sends the plain deny
// message; for OpenAI formats it wraps it in the appropriate template with
// an empty-blockedDetails guardrail object (structured) or the deny message
// as content (legacy).
func SendFallbackDenyResponse(config AISecurityConfig, isStream bool) error {
marshalledDenyMessage := wrapper.MarshalStr(ResolveDenyMessage(config))
if config.ProtocolOriginal {
proxywasm.SendHttpResponse(uint32(config.DenyCode), [][2]string{{"content-type", "application/json"}}, []byte(marshalledDenyMessage), -1)
return nil
}
randomID := utils.GenerateRandomChatID()
createdTs := time.Now().Unix()
if config.OpenAIDenyResponseFormat == OpenAIDenyResponseFormatStructured {
guardrailBody, err := BuildOpenAIFallbackDenyResponseBody(config)
if err != nil {
return err
}
var data []byte
if isStream {
data = []byte(fmt.Sprintf(OpenAIStreamResponseFormatStructured, randomID, createdTs, marshalledDenyMessage, randomID, createdTs, string(guardrailBody)))
} else {
data = []byte(fmt.Sprintf(OpenAIResponseFormatStructured, randomID, createdTs, marshalledDenyMessage, string(guardrailBody)))
}
proxywasm.SendHttpResponse(uint32(config.DenyCode), openAIDenyContentType(isStream), data, -1)
return nil
}
var data []byte
if isStream {
data = []byte(fmt.Sprintf(OpenAIStreamResponseFormatLegacy, randomID, createdTs, marshalledDenyMessage, randomID, createdTs))
} else {
data = []byte(fmt.Sprintf(OpenAIResponseFormatLegacy, randomID, createdTs, marshalledDenyMessage))
}
proxywasm.SendHttpResponse(uint32(config.DenyCode), openAIDenyContentType(isStream), data, -1)
return nil
}
func GetUnacceptableDetail(data Data, config AISecurityConfig, consumer string) []Detail {
result := []Detail{}
for _, detail := range data.Detail {