fix(ai-security-guard): 移除Suggestion=block的兜底逻辑，改为基于阈值判断 || fix(ai-security-guard): Remove the cover-up logic of Suggestion=block and change it to based on threshold judgment (#3731)

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
2026-05-08 12:57:26 +08:00 · 2026-04-17 14:42:22 +08:00
parent 94b40aab9a
commit 9128cbf729
5 changed files with 85 additions and 69 deletions
--- a/plugins/wasm-go/extensions/ai-security-guard/config/config.go
+++ b/plugins/wasm-go/extensions/ai-security-guard/config/config.go
@@ -807,11 +807,6 @@ func evaluateRiskMultiModal(data Data, config AISecurityConfig, consumer string)
 		}
 	}

-	// 3. Data.Suggestion=block fallback
-	if data.Suggestion == "block" {
-		return RiskBlock
-	}
-
 	if hasMask {
 		return RiskMask
 	}
@@ -821,9 +816,6 @@ func evaluateRiskMultiModal(data Data, config AISecurityConfig, consumer string)
 // detailTriggersBlock returns whether this single detail should trigger blocking,
 // given the resolved dimension action and threshold evaluation result.
 func detailTriggersBlock(detail Detail, dimAction string, exceeds bool) bool {
-	if detail.Suggestion == "block" {
-		return true
-	}
 	if dimAction == "block" {
 		return exceeds
 	}
--- a/plugins/wasm-go/extensions/ai-security-guard/config/evaluate_risk_property_test.go
+++ b/plugins/wasm-go/extensions/ai-security-guard/config/evaluate_risk_property_test.go
@@ -258,14 +258,12 @@ var knownDetailTypes = []string{
 //
 // Sub-property 4b: For any Detail where the resolved dimAction is "block" and the
 // detail's level exceeds the configured threshold, evaluateRiskMultiModal SHALL return RiskBlock.
-func TestProperty4a_SuggestionBlockAlwaysProducesRiskBlock(t *testing.T) {
+func TestProperty4a_SuggestionBlockRespectsThreshold(t *testing.T) {
 	f := func(seed uint64) bool {
 		r := rand.New(rand.NewSource(int64(seed)))

-		// Pick a random detail type
 		detailType := knownDetailTypes[r.Intn(len(knownDetailTypes))]

-		// Pick a random level based on type
 		var level string
 		if detailType == SensitiveDataType {
 			level = validSensitiveLevels[r.Intn(len(validSensitiveLevels))]
@@ -273,40 +271,34 @@ func TestProperty4a_SuggestionBlockAlwaysProducesRiskBlock(t *testing.T) {
 			level = validGeneralRiskLevels[r.Intn(len(validGeneralRiskLevels))]
 		}

-		// Random config: pick random dimAction (block or mask) and random thresholds
 		config := baseConfig()
-
-		// Randomly assign dimension actions
-		actions := []string{"block", "mask"}
-		config.ContentModerationAction = actions[r.Intn(2)]
-		config.PromptAttackAction = actions[r.Intn(2)]
-		config.SensitiveDataAction = actions[r.Intn(2)]
-		config.MaliciousUrlAction = actions[r.Intn(2)]
-		config.ModelHallucinationAction = actions[r.Intn(2)]
-		config.CustomLabelAction = actions[r.Intn(2)]
-
-		// Random thresholds
-		config.ContentModerationLevelBar = validGeneralRiskLevels[1+r.Intn(len(validGeneralRiskLevels)-1)]
-		config.PromptAttackLevelBar = validGeneralRiskLevels[1+r.Intn(len(validGeneralRiskLevels)-1)]
-		config.SensitiveDataLevelBar = validSensitiveLevels[r.Intn(len(validSensitiveLevels))]
-		config.MaliciousUrlLevelBar = validGeneralRiskLevels[1+r.Intn(len(validGeneralRiskLevels)-1)]
-		config.ModelHallucinationLevelBar = validGeneralRiskLevels[1+r.Intn(len(validGeneralRiskLevels)-1)]
-		config.CustomLabelLevelBar = validGeneralRiskLevels[1+r.Intn(len(validGeneralRiskLevels)-1)]
+		// Set all thresholds to max so no detail exceeds threshold
+		config.ContentModerationLevelBar = MaxRisk
+		config.PromptAttackLevelBar = MaxRisk
+		config.SensitiveDataLevelBar = S4Sensitive
+		config.MaliciousUrlLevelBar = MaxRisk
+		config.ModelHallucinationLevelBar = MaxRisk
+		config.CustomLabelLevelBar = MaxRisk

 		data := Data{
-			RiskLevel: "none", // Avoid top-level gate interference
+			RiskLevel: "none",
 			Detail: []Detail{
 				{
 					Type:       detailType,
-					Suggestion: "block", // Always block suggestion
+					Suggestion: "block",
 					Level:      level,
 				},
 			},
 		}

 		result := EvaluateRisk(MultiModalGuard, data, config, "")
-		if result != RiskBlock {
-			t.Errorf("expected RiskBlock for Suggestion=block, type=%s, level=%s, got %d", detailType, level, result)
+		exceeds := detailExceedsThreshold(data.Detail[0], config, "")
+		if exceeds && result != RiskBlock {
+			t.Errorf("expected RiskBlock when threshold exceeded for type=%s, level=%s", detailType, level)
+			return false
+		}
+		if !exceeds && result == RiskBlock {
+			t.Errorf("expected non-block when threshold not exceeded for type=%s, level=%s", detailType, level)
 			return false
 		}
 		return true
@@ -315,7 +307,6 @@ func TestProperty4a_SuggestionBlockAlwaysProducesRiskBlock(t *testing.T) {
 	cfg := &quick.Config{MaxCount: 200}
 	if err := quick.Check(f, cfg); err != nil {
 		t.Errorf("Property 4a failed: %v", err)
-		fmt.Printf("Property 4a counterexample: %v\n", err)
 	}
 }

@@ -583,14 +574,11 @@ func TestProperty5b_TopLevelAttackLevelGateProducesRiskBlock(t *testing.T) {
 //
 // For any set of Details that do not individually trigger block, when Data.Suggestion=block,
 // evaluateRiskMultiModal SHALL return RiskBlock.
-func TestProperty6_DataSuggestionBlockFallbackProducesRiskBlock(t *testing.T) {
+func TestProperty6_DataSuggestionBlockIgnoredWhenThresholdNotExceeded(t *testing.T) {
 	f := func(seed uint64) bool {
 		r := rand.New(rand.NewSource(int64(seed)))

-		// Generate 0-4 random non-blocking details.
-		// Strategy: use Suggestion="pass" or "watch" with levels below their thresholds
-		// so that no detail individually triggers block.
-		numDetails := r.Intn(5) // 0-4 details
+		numDetails := r.Intn(5)
 		nonBlockSuggestions := []string{"pass", "watch"}
 		details := make([]Detail, numDetails)

@@ -598,8 +586,6 @@ func TestProperty6_DataSuggestionBlockFallbackProducesRiskBlock(t *testing.T) {
 			detailType := knownDetailTypes[r.Intn(len(knownDetailTypes))]
 			suggestion := nonBlockSuggestions[r.Intn(len(nonBlockSuggestions))]

-			// Use "none" level (0) which is always below any meaningful threshold
-			// since all thresholds are set to max.
 			var level string
 			if detailType == SensitiveDataType {
 				level = "S0"
@@ -615,7 +601,6 @@ func TestProperty6_DataSuggestionBlockFallbackProducesRiskBlock(t *testing.T) {
 		}

 		config := baseConfig()
-		// Set all thresholds to max so no detail exceeds threshold
 		config.ContentModerationLevelBar = MaxRisk
 		config.PromptAttackLevelBar = MaxRisk
 		config.SensitiveDataLevelBar = S4Sensitive
@@ -625,16 +610,15 @@ func TestProperty6_DataSuggestionBlockFallbackProducesRiskBlock(t *testing.T) {
 		config.RiskAction = "block"

 		data := Data{
-			RiskLevel:   "none",  // Avoid top-level RiskLevel gate
-			AttackLevel: "",      // Avoid top-level AttackLevel gate
-			Suggestion:  "block", // The fallback that should trigger RiskBlock
+			RiskLevel:   "none",
+			AttackLevel: "",
+			Suggestion:  "block",
 			Detail:      details,
 		}

 		result := EvaluateRisk(MultiModalGuard, data, config, "")
-		if result != RiskBlock {
-			t.Errorf("expected RiskBlock for Data.Suggestion=block with %d non-blocking details, got %d",
-				numDetails, result)
+		if result != RiskPass {
+			t.Errorf("expected RiskPass when no detail exceeds threshold (data.Suggestion=block should be ignored), got %d", result)
 			return false
 		}
 		return true
--- a/plugins/wasm-go/extensions/ai-security-guard/config/evaluate_risk_test.go
+++ b/plugins/wasm-go/extensions/ai-security-guard/config/evaluate_risk_test.go
@@ -60,7 +60,7 @@ func TestTC_EVAL_001(t *testing.T) {
 	require.Equal(t, RiskMask, result)
 }

-// TestTC_EVAL_002 同上但 Suggestion=block => RiskBlock
+// TestTC_EVAL_002 Suggestion=block but level below threshold => RiskPass
 func TestTC_EVAL_002(t *testing.T) {
 	config := baseConfig()
 	config.SensitiveDataAction = "mask"
@@ -77,7 +77,7 @@ func TestTC_EVAL_002(t *testing.T) {
 	}

 	result := EvaluateRisk(MultiModalGuard, data, config, "")
-	require.Equal(t, RiskBlock, result)
+	require.Equal(t, RiskPass, result)
 }

 // TestTC_EVAL_003 promptAttackAction=block 且该维度超阈值 => RiskBlock
@@ -323,7 +323,7 @@ func TestTC_EVAL_013(t *testing.T) {
 	require.Equal(t, RiskMask, result)
 }

-// TestTC_EVAL_014 未知维度 Detail.Type=maliciousFile 且 Suggestion=block => RiskBlock
+// TestTC_EVAL_014 未知维度 Detail.Type=maliciousFile 无阈值配置 => RiskPass
 func TestTC_EVAL_014(t *testing.T) {
 	config := baseConfig()

@@ -339,16 +339,16 @@ func TestTC_EVAL_014(t *testing.T) {
 	}

 	result := EvaluateRisk(MultiModalGuard, data, config, "")
-	require.Equal(t, RiskBlock, result)
+	require.Equal(t, RiskPass, result)
 }

-// TestTC_EVAL_015 Detail 不触发拦截，但 Data.Suggestion=block => RiskBlock
+// TestTC_EVAL_015 Detail level below threshold, data.Suggestion=block ignored => RiskPass
 func TestTC_EVAL_015(t *testing.T) {
 	config := baseConfig()

 	data := Data{
 		RiskLevel:  "none",
-		Suggestion: "block", // 兜底
+		Suggestion: "block",
 		Detail: []Detail{
 			{
 				Suggestion: "pass",
@@ -359,7 +359,7 @@ func TestTC_EVAL_015(t *testing.T) {
 	}

 	result := EvaluateRisk(MultiModalGuard, data, config, "")
-	require.Equal(t, RiskBlock, result)
+	require.Equal(t, RiskPass, result)
 }

 // TestTC_EVAL_016 Data.Suggestion=mask 但无 sensitiveData 脱敏明细 => 不返回 RiskMask
@@ -531,7 +531,7 @@ func TestTC_EVAL_018(t *testing.T) {
 	result := EvaluateRisk(MultiModalGuardForBase64, data, config, "")
 	require.Equal(t, RiskMask, result)

-	// block 场景
+	// block scenario: level=high but threshold=max => RiskPass
 	data2 := Data{
 		RiskLevel: "none",
 		Detail: []Detail{
@@ -543,21 +543,21 @@ func TestTC_EVAL_018(t *testing.T) {
 		},
 	}
 	result2 := EvaluateRisk(MultiModalGuardForBase64, data2, config, "")
-	require.Equal(t, RiskBlock, result2)
+	require.Equal(t, RiskPass, result2)
 }

-// TestTC_EVAL_019 空 Detail 列表 + Data.Suggestion=block => RiskBlock
+// TestTC_EVAL_019 空 Detail 列表 + Data.Suggestion=block => RiskPass (threshold not exceeded)
 func TestTC_EVAL_019(t *testing.T) {
 	config := baseConfig()

 	data := Data{
 		RiskLevel:  "none",
 		Suggestion: "block",
-		Detail:     []Detail{}, // 空 Detail 列表
+		Detail:     []Detail{},
 	}

 	result := EvaluateRisk(MultiModalGuard, data, config, "")
-	require.Equal(t, RiskBlock, result)
+	require.Equal(t, RiskPass, result)
 }

 // TestTC_EVAL_020 空 Detail 列表 + 无 Data.Suggestion => RiskPass
@@ -787,15 +787,16 @@ func TestTC_EVAL_027(t *testing.T) {
 	require.Equal(t, RiskBlock, result2)
 }

-// TestTC_EVAL_028 Data.Suggestion=block 兜底 + 有 mask 候选 => RiskBlock
-// block 兜底优先于 mask 候选
+// TestTC_EVAL_028 Data.Suggestion=block does NOT override threshold checks.
+// Detail level S1 < threshold S4, so the request should pass even though
+// the top-level Suggestion is "block".
 func TestTC_EVAL_028(t *testing.T) {
 	config := baseConfig()
 	config.SensitiveDataAction = "mask"

 	data := Data{
 		RiskLevel:  "none",
-		Suggestion: "block", // 兜底 block
+		Suggestion: "block",
 		Detail: []Detail{
 			{
 				Suggestion: "mask",
@@ -807,7 +808,7 @@ func TestTC_EVAL_028(t *testing.T) {
 	}

 	result := EvaluateRisk(MultiModalGuard, data, config, "")
-	require.Equal(t, RiskBlock, result)
+	require.Equal(t, RiskPass, result)
 }

 // TestTC_DESENS_005 Detail.Result 为空数组 => 返回空字符串
--- a/plugins/wasm-go/extensions/ai-security-guard/main_test.go
+++ b/plugins/wasm-go/extensions/ai-security-guard/main_test.go
@@ -1467,7 +1467,7 @@ func TestIsRiskLevelAcceptable(t *testing.T) {
 				{Suggestion: "block", Type: "contentModeration", Level: "high"},
 			},
 		}
-		require.False(t, cfg.IsRiskLevelAcceptable(cfg.MultiModalGuard, data, config, ""))
+		require.True(t, cfg.IsRiskLevelAcceptable(cfg.MultiModalGuard, data, config, ""))
 	})

 	// 用例 3: riskAction=mask, 无风险 → 应返回 true
@@ -2549,7 +2549,7 @@ func TestBuildDenyResponseBody(t *testing.T) {
 		require.Equal(t, "high", result.BlockedDetails[0].Level)
 	})

-	t.Run("blockedDetails includes explicit block suggestion below threshold", func(t *testing.T) {
+	t.Run("blockedDetails empty when suggestion=block but below threshold", func(t *testing.T) {
 		resp := cfg.Response{
 			Code:      200,
 			RequestId: "req-suggestion-block",
@@ -2566,9 +2566,7 @@ func TestBuildDenyResponseBody(t *testing.T) {

 		var result cfg.DenyResponseBody
 		require.NoError(t, json.Unmarshal(body, &result))
-		require.Len(t, result.BlockedDetails, 1)
-		require.Equal(t, cfg.SensitiveDataType, result.BlockedDetails[0].Type)
-		require.Equal(t, "S3", result.BlockedDetails[0].Level)
+		require.Len(t, result.BlockedDetails, 0)
 	})

 	t.Run("blockedDetails includes customLabel when threshold exceeded", func(t *testing.T) {