feat: ai敏感词拦截插件 (#1190)

This commit is contained in:
007gzs
2024-08-16 17:24:32 +08:00
committed by GitHub
parent dcea483c61
commit 25b085cb5e
20 changed files with 66898 additions and 166 deletions

View File

@@ -0,0 +1,189 @@
// Copyright (c) 2022 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package tests
import (
"testing"
"github.com/alibaba/higress/test/e2e/conformance/utils/http"
"github.com/alibaba/higress/test/e2e/conformance/utils/suite"
)
func init() {
Register(RustWasmPluginsAiDataMasking)
}
func gen_assertion(host string, req_is_json bool, req_body []byte, res_body []byte) http.Assertion {
var content_type string
if req_is_json {
content_type = http.ContentTypeApplicationJson
} else {
content_type = http.ContentTypeTextPlain
}
return http.Assertion{
Meta: http.AssertionMeta{
CompareTarget: http.CompareTargetResponse,
},
Request: http.AssertionRequest{
ActualRequest: http.Request{
Host: host,
Path: "/",
Method: "POST",
ContentType: content_type,
Body: req_body,
UnfollowRedirect: true,
},
},
Response: http.AssertionResponse{
ExpectedResponse: http.Response{
ContentType: http.ContentTypeApplicationJson,
Body: res_body,
},
},
}
}
var RustWasmPluginsAiDataMasking = suite.ConformanceTest{
ShortName: "RustWasmPluginsAiDataMasking",
Description: "The Ingress in the higress-conformance-infra namespace test the rust ai-data-masking wasmplugins.",
Manifests: []string{"tests/rust-wasm-ai-data-masking.yaml"},
Features: []suite.SupportedFeature{suite.WASMRustConformanceFeature},
Test: func(t *testing.T, suite *suite.ConformanceTestSuite) {
var testcases []http.Assertion
//openai
testcases = append(testcases, gen_assertion(
"replace.openai.com",
true,
[]byte("{\"messages\":[{\"role\":\"user\",\"content\":\"127.0.0.1 admin@gmail.com sk-12345\"}]}"),
[]byte("{\"choices\":[{\"index\":0,\"message\":{\"role\":\"assistant\",\"content\":\"127.0.0.1 sk-12345 admin@gmail.com\"}}],\"usage\":{}}"),
))
testcases = append(testcases, gen_assertion(
"replace.openai.com",
true,
[]byte("{\"messages\":[{\"role\":\"user\",\"content\":\"192.168.0.1 root@gmail.com sk-12345\"}]}"),
[]byte("{\"choices\":[{\"index\":0,\"message\":{\"role\":\"assistant\",\"content\":\"192.168.0.1 sk-12345 root@gmail.com\"}}],\"usage\":{}}"),
))
testcases = append(testcases, gen_assertion(
"ok.openai.com",
true,
[]byte("{\"messages\":[{\"role\":\"user\",\"content\":\"fuck\"}]}"),
[]byte("{\"choices\":[{\"index\":0,\"message\":{\"role\":\"assistant\",\"content\":\"提问或回答中包含敏感词,已被屏蔽\"}}],\"usage\":{}}"),
))
testcases = append(testcases, gen_assertion(
"ok.openai.com",
true,
[]byte("{\"messages\":[{\"role\":\"user\",\"content\":\"costom_word1\"}]}"),
[]byte("{\"choices\":[{\"index\":0,\"message\":{\"role\":\"assistant\",\"content\":\"提问或回答中包含敏感词,已被屏蔽\"}}],\"usage\":{}}"),
))
testcases = append(testcases, gen_assertion(
"ok.openai.com",
true,
[]byte("{\"messages\":[{\"role\":\"user\",\"content\":\"costom_word\"}]}"),
[]byte("{\"choices\":[{\"index\":0,\"message\":{\"role\":\"assistant\",\"content\":\"ok\"}}],\"usage\":{}}"),
))
testcases = append(testcases, gen_assertion(
"system_deny.openai.com",
true,
[]byte("{\"messages\":[{\"role\":\"user\",\"content\":\"test\"}]}"),
[]byte("{\"choices\":[{\"index\":0,\"message\":{\"role\":\"assistant\",\"content\":\"提问或回答中包含敏感词,已被屏蔽\"}}],\"usage\":{}}"),
))
testcases = append(testcases, gen_assertion(
"costom_word1.openai.com",
true,
[]byte("{\"messages\":[{\"role\":\"user\",\"content\":\"test\"}]}"),
[]byte("{\"choices\":[{\"index\":0,\"message\":{\"role\":\"assistant\",\"content\":\"提问或回答中包含敏感词,已被屏蔽\"}}],\"usage\":{}}"),
))
testcases = append(testcases, gen_assertion(
"costom_word.openai.com",
true,
[]byte("{\"messages\":[{\"role\":\"user\",\"content\":\"test\"}]}"),
[]byte("{\"choices\":[{\"index\":0,\"message\":{\"role\":\"assistant\",\"content\":\"costom_word\"}}],\"usage\":{}}"),
))
//raw
testcases = append(testcases, gen_assertion(
"replace.raw.com",
false,
[]byte("127.0.0.1 admin@gmail.com sk-12345"),
[]byte("{\"res\":\"127.0.0.1 sk-12345 admin@gmail.com\"}"),
))
testcases = append(testcases, gen_assertion(
"replace.raw.com",
false,
[]byte("192.168.0.1 root@gmail.com sk-12345"),
[]byte("{\"res\":\"192.168.0.1 sk-12345 root@gmail.com\"}"),
))
testcases = append(testcases, gen_assertion(
"ok.raw.com",
false,
[]byte("fuck"),
[]byte("{\"errmsg\":\"提问或回答中包含敏感词,已被屏蔽\"}"),
))
testcases = append(testcases, gen_assertion(
"ok.raw.com",
false,
[]byte("costom_word1"),
[]byte("{\"errmsg\":\"提问或回答中包含敏感词,已被屏蔽\"}"),
))
testcases = append(testcases, gen_assertion(
"ok.raw.com",
false,
[]byte("costom_word"),
[]byte("{\"res\":\"ok\"}"),
))
testcases = append(testcases, gen_assertion(
"system_deny.raw.com",
false,
[]byte("test"),
[]byte("{\"errmsg\":\"提问或回答中包含敏感词,已被屏蔽\"}"),
))
testcases = append(testcases, gen_assertion(
"costom_word1.raw.com",
false,
[]byte("test"),
[]byte("{\"errmsg\":\"提问或回答中包含敏感词,已被屏蔽\"}"),
))
testcases = append(testcases, gen_assertion(
"costom_word.raw.com",
false,
[]byte("test"),
[]byte("{\"res\":\"costom_word\"}"),
))
//jsonpath
testcases = append(testcases, gen_assertion(
"replace.raw.com",
true,
[]byte("{\"test\":[{\"test\":\"127.0.0.1 admin@gmail.com sk-12345\"}]}"),
[]byte("{\"res\":\"127.0.0.1 sk-12345 admin@gmail.com\"}"),
))
testcases = append(testcases, gen_assertion(
"replace.raw.com",
true,
[]byte("{\"test\":[{\"test\":\"test\", \"test1\":\"127.0.0.1 admin@gmail.com sk-12345\"}]}"),
[]byte("{\"res\":\"***.***.***.*** 48a7e98a91d93896d8dac522c5853948 ****@gmail.com\"}"),
))
t.Run("WasmPlugins ai-data-masking", func(t *testing.T) {
for _, testcase := range testcases {
http.MakeRequestAndExpectEventuallyConsistentResponse(t, suite.RoundTripper, suite.TimeoutConfig, suite.GatewayAddress, testcase)
}
})
},
}

View File

@@ -0,0 +1,150 @@
# Copyright (c) 2022 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: wasmplugin-ai-data-masking
namespace: higress-conformance-infra
spec:
ingressClassName: higress
rules:
- host: "*.openai.com"
http:
paths:
- pathType: Prefix
path: "/"
backend:
service:
name: infra-backend-v1
port:
number: 8080
- host: "*.raw.com"
http:
paths:
- pathType: Prefix
path: "/"
backend:
service:
name: infra-backend-v1
port:
number: 8080
---
apiVersion: extensions.higress.io/v1alpha1
kind: WasmPlugin
metadata:
name: custom-response
namespace: higress-system
spec:
priority: 200
defaultConfig:
"body": "ok"
matchRules:
- domain:
- ok.openai.com
config:
headers:
- Content-Type=application/json
"body": "{\"choices\":[{\"index\":0,\"message\":{\"role\":\"assistant\",\"content\":\"ok\"}}],\"usage\":{}}"
- domain:
- replace.openai.com
config:
headers:
- Content-Type=application/json
"body": "{\"choices\":[{\"index\":0,\"message\":{\"role\":\"assistant\",\"content\":\"***.***.***.*** 48a7e98a91d93896d8dac522c5853948 ****@gmail.com\"}}],\"usage\":{}}"
- domain:
- system_deny.openai.com
config:
headers:
- Content-Type=application/json
"body": "{\"choices\":[{\"index\":0,\"message\":{\"role\":\"assistant\",\"content\":\"fuck\"}}],\"usage\":{}}"
- domain:
- costom_word1.openai.com
config:
headers:
- Content-Type=application/json
"body": "{\"choices\":[{\"index\":0,\"message\":{\"role\":\"assistant\",\"content\":\"costom_word1\"}}],\"usage\":{}}"
- domain:
- costom_word.openai.com
config:
headers:
- Content-Type=application/json
"body": "{\"choices\":[{\"index\":0,\"message\":{\"role\":\"assistant\",\"content\":\"costom_word\"}}],\"usage\":{}}"
- domain:
- ok.raw.com
config:
headers:
- Content-Type=application/json
"body": "{\"res\":\"ok\"}"
- domain:
- replace.raw.com
config:
headers:
- Content-Type=application/json
"body": "{\"res\":\"***.***.***.*** 48a7e98a91d93896d8dac522c5853948 ****@gmail.com\"}"
- domain:
- system_deny.raw.com
config:
headers:
- Content-Type=application/json
"body": "{\"res\":\"fuck\"}"
- domain:
- costom_word1.raw.com
config:
headers:
- Content-Type=application/json
"body": "{\"res\":\"costom_word1\"}"
- domain:
- costom_word.raw.com
config:
headers:
- Content-Type=application/json
"body": "{\"res\":\"costom_word\"}"
url: file:///opt/plugins/wasm-go/extensions/custom-response/plugin.wasm
---
apiVersion: extensions.higress.io/v1alpha1
kind: WasmPlugin
metadata:
name: ai-data-masking
namespace: higress-system
spec:
priority: 300
defaultConfig:
system_deny: true
deny_openai: true
deny_jsonpath:
- "$.test[*].test"
deny_raw: true
deny_code: 200
deny_message: "提问或回答中包含敏感词,已被屏蔽"
deny_raw_message: "{\"errmsg\":\"提问或回答中包含敏感词,已被屏蔽\"}"
deny_content_type: "application/json"
deny_words:
- "costom_word1"
replace_roles:
- regex: "%{EMAILLOCALPART}@%{HOSTNAME:domain}"
type: "replace"
restore: true
value: "****@$domain"
- regex: "%{IP}"
type: "replace"
restore: true
value: "***.***.***.***"
- regex: "sk-[0-9a-zA-Z]*"
restore: true
type: "hash"
url: file:///opt/plugins/wasm-rust/extensions/ai-data-masking/plugin.wasm