feat(ai-security-guard): replace denyMessage with structured DenyResponseBody (#3642)

Co-authored-by: rinfx <yucheng.lxr@alibaba-inc.com>
2026-05-26 05:37:25 +08:00 · 2026-04-01 19:38:01 +08:00
parent 89587c1c9b
commit 1c9e981bf2
10 changed files with 820 additions and 83 deletions
--- a/plugins/wasm-go/extensions/ai-security-guard/config/config.go
+++ b/plugins/wasm-go/extensions/ai-security-guard/config/config.go
@@ -1,6 +1,7 @@
 package config

 import (
+	"encoding/json"
 	"errors"
 	"fmt"
 	"regexp"
@@ -584,3 +585,68 @@ func IsRiskLevelAcceptable(action string, data Data, config AISecurityConfig, co
 		return LevelToInt(data.RiskLevel) < LevelToInt(config.GetRiskLevelBar(consumer))
 	}
 }
+
+type DenyResponseBody struct {
+	BlockedDetails []Detail `json:"blockedDetails"`
+	RequestId      string   `json:"requestId"`
+	// GuardCode is the business code returned by the security service (typically 200 when the check
+	// succeeded and a risk was detected). It is NOT an HTTP status code.
+	GuardCode int `json:"guardCode"`
+}
+
+func BuildDenyResponseBody(response Response, config AISecurityConfig, consumer string) ([]byte, error) {
+	body := DenyResponseBody{
+		BlockedDetails: GetUnacceptableDetail(response.Data, config, consumer),
+		RequestId:      response.RequestId,
+		GuardCode:      response.Code,
+	}
+	return json.Marshal(body)
+}
+
+func GetUnacceptableDetail(data Data, config AISecurityConfig, consumer string) []Detail {
+	result := []Detail{}
+	for _, detail := range data.Detail {
+		switch detail.Type {
+		case ContentModerationType:
+			if LevelToInt(detail.Level) >= LevelToInt(config.GetContentModerationLevelBar(consumer)) {
+				result = append(result, detail)
+			}
+		case PromptAttackType:
+			if LevelToInt(detail.Level) >= LevelToInt(config.GetPromptAttackLevelBar(consumer)) {
+				result = append(result, detail)
+			}
+		case SensitiveDataType:
+			if LevelToInt(detail.Level) >= LevelToInt(config.GetSensitiveDataLevelBar(consumer)) {
+				result = append(result, detail)
+			}
+		case MaliciousUrlDataType:
+			if LevelToInt(detail.Level) >= LevelToInt(config.GetMaliciousUrlLevelBar(consumer)) {
+				result = append(result, detail)
+			}
+		case ModelHallucinationDataType:
+			if LevelToInt(detail.Level) >= LevelToInt(config.GetModelHallucinationLevelBar(consumer)) {
+				result = append(result, detail)
+			}
+		}
+	}
+	// Fallback: when the security service returns a top-level risk signal but no Detail entries,
+	// synthesise detail items from RiskLevel/AttackLevel so blockedDetails is never empty on a
+	// real block event.
+	if len(result) == 0 {
+		if LevelToInt(data.RiskLevel) >= LevelToInt(config.GetContentModerationLevelBar(consumer)) {
+			result = append(result, Detail{
+				Type:       ContentModerationType,
+				Level:      data.RiskLevel,
+				Suggestion: "block",
+			})
+		}
+		if LevelToInt(data.AttackLevel) >= LevelToInt(config.GetPromptAttackLevelBar(consumer)) {
+			result = append(result, Detail{
+				Type:       PromptAttackType,
+				Level:      data.AttackLevel,
+				Suggestion: "block",
+			})
+		}
+	}
+	return result
+}