mirror of
https://github.com/alibaba/higress.git
synced 2026-02-06 23:21:08 +08:00
fix: add AttackLevel field support for MultiModalGuard prompt attack detection (#2938)
This commit is contained in:
@@ -21,6 +21,7 @@ description: 阿里云内容安全检测
|
||||
| `accessKey` | string | requried | - | 阿里云AK |
|
||||
| `secretKey` | string | requried | - | 阿里云SK |
|
||||
| `action` | string | requried | - | 阿里云ai安全业务接口 |
|
||||
| `securityToken` | string | optional | - | 阿里云安全令牌(用于临时凭证) |
|
||||
| `checkRequest` | bool | optional | false | 检查提问内容是否合规 |
|
||||
| `checkResponse` | bool | optional | false | 检查大模型的回答内容是否合规,生效时会使流式响应变为非流式 |
|
||||
| `requestCheckService` | string | optional | llm_query_moderation | 指定阿里云内容安全用于检测输入内容的服务 |
|
||||
@@ -36,6 +37,8 @@ description: 阿里云内容安全检测
|
||||
| `sensitiveDataLevelBar` | string | optional | S4 | 敏感内容检测拦截风险等级,取值为 `S4`, `S3`, `S2` or `S1` |
|
||||
| `timeout` | int | optional | 2000 | 调用内容安全服务时的超时时间 |
|
||||
| `bufferLimit` | int | optional | 1000 | 调用内容安全服务时每段文本的长度限制 |
|
||||
| `consumerSpecificRequestCheckService` | map | optional | - | 为不同消费者指定特定的请求检测服务 |
|
||||
| `consumerSpecificResponseCheckService` | map | optional | - | 为不同消费者指定特定的响应检测服务 |
|
||||
|
||||
补充说明一下 `denyMessage`,对非法请求的处理逻辑为:
|
||||
- 如果配置了 `denyMessage`,返回内容为 `denyMessage` 配置内容,格式为openai格式的流式/非流式响应
|
||||
@@ -90,6 +93,35 @@ checkRequest: true
|
||||
checkResponse: true
|
||||
```
|
||||
|
||||
### 使用临时安全凭证
|
||||
|
||||
```yaml
|
||||
serviceName: safecheck.dns
|
||||
servicePort: 443
|
||||
serviceHost: "green-cip.cn-shanghai.aliyuncs.com"
|
||||
accessKey: "XXXXXXXXX"
|
||||
secretKey: "XXXXXXXXXXXXXXX"
|
||||
securityToken: "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
|
||||
checkRequest: true
|
||||
```
|
||||
|
||||
### 为不同消费者指定不同的检测服务
|
||||
|
||||
```yaml
|
||||
serviceName: safecheck.dns
|
||||
servicePort: 443
|
||||
serviceHost: "green-cip.cn-shanghai.aliyuncs.com"
|
||||
accessKey: "XXXXXXXXX"
|
||||
secretKey: "XXXXXXXXXXXXXXX"
|
||||
checkRequest: true
|
||||
consumerSpecificRequestCheckService:
|
||||
consumerA: llm_query_moderation_strict
|
||||
consumerB: llm_query_moderation_relaxed
|
||||
consumerSpecificResponseCheckService:
|
||||
consumerA: llm_response_moderation_strict
|
||||
consumerB: llm_response_moderation_relaxed
|
||||
```
|
||||
|
||||
### 指定自定义内容安全检测服务
|
||||
用户可能需要根据不同的场景配置不同的检测规则,该问题可通过为不同域名/路由/服务配置不同的内容安全检测服务实现。如下图所示,我们创建了一个名为 llm_query_moderation_01 的检测服务,其中的检测规则在 llm_query_moderation 之上做了一些改动:
|
||||
|
||||
|
||||
@@ -81,10 +81,11 @@ type Response struct {
|
||||
}
|
||||
|
||||
type Data struct {
|
||||
RiskLevel string `json:"RiskLevel"`
|
||||
Result []Result `json:"Result,omitempty"`
|
||||
Advice []Advice `json:"Advice,omitempty"`
|
||||
Detail []Detail `json:"Detail,omitempty"`
|
||||
RiskLevel string `json:"RiskLevel"`
|
||||
AttackLevel string `json:"AttackLevel,omitempty"`
|
||||
Result []Result `json:"Result,omitempty"`
|
||||
Advice []Advice `json:"Advice,omitempty"`
|
||||
Detail []Detail `json:"Detail,omitempty"`
|
||||
}
|
||||
|
||||
type Result struct {
|
||||
@@ -142,16 +143,51 @@ func (config *AISecurityConfig) incrementCounter(metricName string, inc uint64)
|
||||
}
|
||||
|
||||
func levelToInt(riskLevel string) int {
|
||||
// First check against our defined constants
|
||||
switch riskLevel {
|
||||
case MaxRisk, S4Sensitive:
|
||||
case MaxRisk:
|
||||
return 4
|
||||
case HighRisk, S3Sensitive:
|
||||
case HighRisk:
|
||||
return 3
|
||||
case MediumRisk, S2Sensitive:
|
||||
case MediumRisk:
|
||||
return 2
|
||||
case LowRisk, S1Sensitive:
|
||||
case LowRisk:
|
||||
return 1
|
||||
case NoRisk, NoSensitive:
|
||||
case NoRisk:
|
||||
return 0
|
||||
case S4Sensitive:
|
||||
return 4
|
||||
case S3Sensitive:
|
||||
return 3
|
||||
case S2Sensitive:
|
||||
return 2
|
||||
case S1Sensitive:
|
||||
return 1
|
||||
case NoSensitive:
|
||||
return 0
|
||||
}
|
||||
|
||||
// Then check against raw string values
|
||||
switch riskLevel {
|
||||
case "max", "MAX":
|
||||
return 4
|
||||
case "high", "HIGH":
|
||||
return 3
|
||||
case "medium", "MEDIUM":
|
||||
return 2
|
||||
case "low", "LOW":
|
||||
return 1
|
||||
case "none", "NONE":
|
||||
return 0
|
||||
case "S4", "s4":
|
||||
return 4
|
||||
case "S3", "s3":
|
||||
return 3
|
||||
case "S2", "s2":
|
||||
return 2
|
||||
case "S1", "s1":
|
||||
return 1
|
||||
case "S0", "s0":
|
||||
return 0
|
||||
default:
|
||||
return -1
|
||||
@@ -160,6 +196,16 @@ func levelToInt(riskLevel string) int {
|
||||
|
||||
func isRiskLevelAcceptable(action string, data Data, config AISecurityConfig) bool {
|
||||
if action == "MultiModalGuard" {
|
||||
// Check top-level risk levels for MultiModalGuard
|
||||
if levelToInt(data.RiskLevel) >= levelToInt(config.contentModerationLevelBar) {
|
||||
return false
|
||||
}
|
||||
// Also check AttackLevel for prompt attack detection
|
||||
if levelToInt(data.AttackLevel) >= levelToInt(config.promptAttackLevelBar) {
|
||||
return false
|
||||
}
|
||||
|
||||
// Check detailed results for backward compatibility
|
||||
for _, detail := range data.Detail {
|
||||
switch detail.Type {
|
||||
case ContentModerationType:
|
||||
|
||||
Reference in New Issue
Block a user