fix(ai-security-guard): 移除Suggestion=block的兜底逻辑,改为基于阈值判断 || fix(ai-security-guard): Remove the cover-up logic of Suggestion=block and change it to based on threshold judgment (#3731)

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
This commit is contained in:
JianweiWang
2026-04-17 14:42:22 +08:00
committed by GitHub
parent 94b40aab9a
commit 9128cbf729
5 changed files with 85 additions and 69 deletions

View File

@@ -807,11 +807,6 @@ func evaluateRiskMultiModal(data Data, config AISecurityConfig, consumer string)
}
}
// 3. Data.Suggestion=block fallback
if data.Suggestion == "block" {
return RiskBlock
}
if hasMask {
return RiskMask
}
@@ -821,9 +816,6 @@ func evaluateRiskMultiModal(data Data, config AISecurityConfig, consumer string)
// detailTriggersBlock returns whether this single detail should trigger blocking,
// given the resolved dimension action and threshold evaluation result.
func detailTriggersBlock(detail Detail, dimAction string, exceeds bool) bool {
if detail.Suggestion == "block" {
return true
}
if dimAction == "block" {
return exceeds
}

View File

@@ -258,14 +258,12 @@ var knownDetailTypes = []string{
//
// Sub-property 4b: For any Detail where the resolved dimAction is "block" and the
// detail's level exceeds the configured threshold, evaluateRiskMultiModal SHALL return RiskBlock.
func TestProperty4a_SuggestionBlockAlwaysProducesRiskBlock(t *testing.T) {
func TestProperty4a_SuggestionBlockRespectsThreshold(t *testing.T) {
f := func(seed uint64) bool {
r := rand.New(rand.NewSource(int64(seed)))
// Pick a random detail type
detailType := knownDetailTypes[r.Intn(len(knownDetailTypes))]
// Pick a random level based on type
var level string
if detailType == SensitiveDataType {
level = validSensitiveLevels[r.Intn(len(validSensitiveLevels))]
@@ -273,40 +271,34 @@ func TestProperty4a_SuggestionBlockAlwaysProducesRiskBlock(t *testing.T) {
level = validGeneralRiskLevels[r.Intn(len(validGeneralRiskLevels))]
}
// Random config: pick random dimAction (block or mask) and random thresholds
config := baseConfig()
// Randomly assign dimension actions
actions := []string{"block", "mask"}
config.ContentModerationAction = actions[r.Intn(2)]
config.PromptAttackAction = actions[r.Intn(2)]
config.SensitiveDataAction = actions[r.Intn(2)]
config.MaliciousUrlAction = actions[r.Intn(2)]
config.ModelHallucinationAction = actions[r.Intn(2)]
config.CustomLabelAction = actions[r.Intn(2)]
// Random thresholds
config.ContentModerationLevelBar = validGeneralRiskLevels[1+r.Intn(len(validGeneralRiskLevels)-1)]
config.PromptAttackLevelBar = validGeneralRiskLevels[1+r.Intn(len(validGeneralRiskLevels)-1)]
config.SensitiveDataLevelBar = validSensitiveLevels[r.Intn(len(validSensitiveLevels))]
config.MaliciousUrlLevelBar = validGeneralRiskLevels[1+r.Intn(len(validGeneralRiskLevels)-1)]
config.ModelHallucinationLevelBar = validGeneralRiskLevels[1+r.Intn(len(validGeneralRiskLevels)-1)]
config.CustomLabelLevelBar = validGeneralRiskLevels[1+r.Intn(len(validGeneralRiskLevels)-1)]
// Set all thresholds to max so no detail exceeds threshold
config.ContentModerationLevelBar = MaxRisk
config.PromptAttackLevelBar = MaxRisk
config.SensitiveDataLevelBar = S4Sensitive
config.MaliciousUrlLevelBar = MaxRisk
config.ModelHallucinationLevelBar = MaxRisk
config.CustomLabelLevelBar = MaxRisk
data := Data{
RiskLevel: "none", // Avoid top-level gate interference
RiskLevel: "none",
Detail: []Detail{
{
Type: detailType,
Suggestion: "block", // Always block suggestion
Suggestion: "block",
Level: level,
},
},
}
result := EvaluateRisk(MultiModalGuard, data, config, "")
if result != RiskBlock {
t.Errorf("expected RiskBlock for Suggestion=block, type=%s, level=%s, got %d", detailType, level, result)
exceeds := detailExceedsThreshold(data.Detail[0], config, "")
if exceeds && result != RiskBlock {
t.Errorf("expected RiskBlock when threshold exceeded for type=%s, level=%s", detailType, level)
return false
}
if !exceeds && result == RiskBlock {
t.Errorf("expected non-block when threshold not exceeded for type=%s, level=%s", detailType, level)
return false
}
return true
@@ -315,7 +307,6 @@ func TestProperty4a_SuggestionBlockAlwaysProducesRiskBlock(t *testing.T) {
cfg := &quick.Config{MaxCount: 200}
if err := quick.Check(f, cfg); err != nil {
t.Errorf("Property 4a failed: %v", err)
fmt.Printf("Property 4a counterexample: %v\n", err)
}
}
@@ -583,14 +574,11 @@ func TestProperty5b_TopLevelAttackLevelGateProducesRiskBlock(t *testing.T) {
//
// For any set of Details that do not individually trigger block, when Data.Suggestion=block,
// evaluateRiskMultiModal SHALL return RiskBlock.
func TestProperty6_DataSuggestionBlockFallbackProducesRiskBlock(t *testing.T) {
func TestProperty6_DataSuggestionBlockIgnoredWhenThresholdNotExceeded(t *testing.T) {
f := func(seed uint64) bool {
r := rand.New(rand.NewSource(int64(seed)))
// Generate 0-4 random non-blocking details.
// Strategy: use Suggestion="pass" or "watch" with levels below their thresholds
// so that no detail individually triggers block.
numDetails := r.Intn(5) // 0-4 details
numDetails := r.Intn(5)
nonBlockSuggestions := []string{"pass", "watch"}
details := make([]Detail, numDetails)
@@ -598,8 +586,6 @@ func TestProperty6_DataSuggestionBlockFallbackProducesRiskBlock(t *testing.T) {
detailType := knownDetailTypes[r.Intn(len(knownDetailTypes))]
suggestion := nonBlockSuggestions[r.Intn(len(nonBlockSuggestions))]
// Use "none" level (0) which is always below any meaningful threshold
// since all thresholds are set to max.
var level string
if detailType == SensitiveDataType {
level = "S0"
@@ -615,7 +601,6 @@ func TestProperty6_DataSuggestionBlockFallbackProducesRiskBlock(t *testing.T) {
}
config := baseConfig()
// Set all thresholds to max so no detail exceeds threshold
config.ContentModerationLevelBar = MaxRisk
config.PromptAttackLevelBar = MaxRisk
config.SensitiveDataLevelBar = S4Sensitive
@@ -625,16 +610,15 @@ func TestProperty6_DataSuggestionBlockFallbackProducesRiskBlock(t *testing.T) {
config.RiskAction = "block"
data := Data{
RiskLevel: "none", // Avoid top-level RiskLevel gate
AttackLevel: "", // Avoid top-level AttackLevel gate
Suggestion: "block", // The fallback that should trigger RiskBlock
RiskLevel: "none",
AttackLevel: "",
Suggestion: "block",
Detail: details,
}
result := EvaluateRisk(MultiModalGuard, data, config, "")
if result != RiskBlock {
t.Errorf("expected RiskBlock for Data.Suggestion=block with %d non-blocking details, got %d",
numDetails, result)
if result != RiskPass {
t.Errorf("expected RiskPass when no detail exceeds threshold (data.Suggestion=block should be ignored), got %d", result)
return false
}
return true

View File

@@ -60,7 +60,7 @@ func TestTC_EVAL_001(t *testing.T) {
require.Equal(t, RiskMask, result)
}
// TestTC_EVAL_002 同上但 Suggestion=block => RiskBlock
// TestTC_EVAL_002 Suggestion=block but level below threshold => RiskPass
func TestTC_EVAL_002(t *testing.T) {
config := baseConfig()
config.SensitiveDataAction = "mask"
@@ -77,7 +77,7 @@ func TestTC_EVAL_002(t *testing.T) {
}
result := EvaluateRisk(MultiModalGuard, data, config, "")
require.Equal(t, RiskBlock, result)
require.Equal(t, RiskPass, result)
}
// TestTC_EVAL_003 promptAttackAction=block 且该维度超阈值 => RiskBlock
@@ -323,7 +323,7 @@ func TestTC_EVAL_013(t *testing.T) {
require.Equal(t, RiskMask, result)
}
// TestTC_EVAL_014 未知维度 Detail.Type=maliciousFile 且 Suggestion=block => RiskBlock
// TestTC_EVAL_014 未知维度 Detail.Type=maliciousFile 无阈值配置 => RiskPass
func TestTC_EVAL_014(t *testing.T) {
config := baseConfig()
@@ -339,16 +339,16 @@ func TestTC_EVAL_014(t *testing.T) {
}
result := EvaluateRisk(MultiModalGuard, data, config, "")
require.Equal(t, RiskBlock, result)
require.Equal(t, RiskPass, result)
}
// TestTC_EVAL_015 Detail 不触发拦截,但 Data.Suggestion=block => RiskBlock
// TestTC_EVAL_015 Detail level below threshold, data.Suggestion=block ignored => RiskPass
func TestTC_EVAL_015(t *testing.T) {
config := baseConfig()
data := Data{
RiskLevel: "none",
Suggestion: "block", // 兜底
Suggestion: "block",
Detail: []Detail{
{
Suggestion: "pass",
@@ -359,7 +359,7 @@ func TestTC_EVAL_015(t *testing.T) {
}
result := EvaluateRisk(MultiModalGuard, data, config, "")
require.Equal(t, RiskBlock, result)
require.Equal(t, RiskPass, result)
}
// TestTC_EVAL_016 Data.Suggestion=mask 但无 sensitiveData 脱敏明细 => 不返回 RiskMask
@@ -531,7 +531,7 @@ func TestTC_EVAL_018(t *testing.T) {
result := EvaluateRisk(MultiModalGuardForBase64, data, config, "")
require.Equal(t, RiskMask, result)
// block 场景
// block scenario: level=high but threshold=max => RiskPass
data2 := Data{
RiskLevel: "none",
Detail: []Detail{
@@ -543,21 +543,21 @@ func TestTC_EVAL_018(t *testing.T) {
},
}
result2 := EvaluateRisk(MultiModalGuardForBase64, data2, config, "")
require.Equal(t, RiskBlock, result2)
require.Equal(t, RiskPass, result2)
}
// TestTC_EVAL_019 空 Detail 列表 + Data.Suggestion=block => RiskBlock
// TestTC_EVAL_019 空 Detail 列表 + Data.Suggestion=block => RiskPass (threshold not exceeded)
func TestTC_EVAL_019(t *testing.T) {
config := baseConfig()
data := Data{
RiskLevel: "none",
Suggestion: "block",
Detail: []Detail{}, // 空 Detail 列表
Detail: []Detail{},
}
result := EvaluateRisk(MultiModalGuard, data, config, "")
require.Equal(t, RiskBlock, result)
require.Equal(t, RiskPass, result)
}
// TestTC_EVAL_020 空 Detail 列表 + 无 Data.Suggestion => RiskPass
@@ -787,15 +787,16 @@ func TestTC_EVAL_027(t *testing.T) {
require.Equal(t, RiskBlock, result2)
}
// TestTC_EVAL_028 Data.Suggestion=block 兜底 + 有 mask 候选 => RiskBlock
// block 兜底优先于 mask 候选
// TestTC_EVAL_028 Data.Suggestion=block does NOT override threshold checks.
// Detail level S1 < threshold S4, so the request should pass even though
// the top-level Suggestion is "block".
func TestTC_EVAL_028(t *testing.T) {
config := baseConfig()
config.SensitiveDataAction = "mask"
data := Data{
RiskLevel: "none",
Suggestion: "block", // 兜底 block
Suggestion: "block",
Detail: []Detail{
{
Suggestion: "mask",
@@ -807,7 +808,7 @@ func TestTC_EVAL_028(t *testing.T) {
}
result := EvaluateRisk(MultiModalGuard, data, config, "")
require.Equal(t, RiskBlock, result)
require.Equal(t, RiskPass, result)
}
// TestTC_DESENS_005 Detail.Result 为空数组 => 返回空字符串

View File

@@ -1467,7 +1467,7 @@ func TestIsRiskLevelAcceptable(t *testing.T) {
{Suggestion: "block", Type: "contentModeration", Level: "high"},
},
}
require.False(t, cfg.IsRiskLevelAcceptable(cfg.MultiModalGuard, data, config, ""))
require.True(t, cfg.IsRiskLevelAcceptable(cfg.MultiModalGuard, data, config, ""))
})
// 用例 3: riskAction=mask, 无风险 → 应返回 true
@@ -2549,7 +2549,7 @@ func TestBuildDenyResponseBody(t *testing.T) {
require.Equal(t, "high", result.BlockedDetails[0].Level)
})
t.Run("blockedDetails includes explicit block suggestion below threshold", func(t *testing.T) {
t.Run("blockedDetails empty when suggestion=block but below threshold", func(t *testing.T) {
resp := cfg.Response{
Code: 200,
RequestId: "req-suggestion-block",
@@ -2566,9 +2566,7 @@ func TestBuildDenyResponseBody(t *testing.T) {
var result cfg.DenyResponseBody
require.NoError(t, json.Unmarshal(body, &result))
require.Len(t, result.BlockedDetails, 1)
require.Equal(t, cfg.SensitiveDataType, result.BlockedDetails[0].Type)
require.Equal(t, "S3", result.BlockedDetails[0].Level)
require.Len(t, result.BlockedDetails, 0)
})
t.Run("blockedDetails includes customLabel when threshold exceeded", func(t *testing.T) {