feat: ai敏感词拦截插件 (#1190)

2026-04-22 20:47:36 +08:00 · 2024-08-16 17:24:32 +08:00
parent dcea483c61
commit 25b085cb5e
20 changed files with 66898 additions and 166 deletions
--- a/test/e2e/conformance/tests/rust-wasm-ai-data-masking.go
+++ b/test/e2e/conformance/tests/rust-wasm-ai-data-masking.go
@@ -0,0 +1,189 @@
+// Copyright (c) 2022 Alibaba Group Holding Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package tests
+
+import (
+	"testing"
+
+	"github.com/alibaba/higress/test/e2e/conformance/utils/http"
+	"github.com/alibaba/higress/test/e2e/conformance/utils/suite"
+)
+
+func init() {
+	Register(RustWasmPluginsAiDataMasking)
+}
+
+func gen_assertion(host string, req_is_json bool, req_body []byte, res_body []byte) http.Assertion {
+	var content_type string
+	if req_is_json {
+		content_type = http.ContentTypeApplicationJson
+	} else {
+		content_type = http.ContentTypeTextPlain
+	}
+	return http.Assertion{
+		Meta: http.AssertionMeta{
+			CompareTarget: http.CompareTargetResponse,
+		},
+		Request: http.AssertionRequest{
+			ActualRequest: http.Request{
+				Host:             host,
+				Path:             "/",
+				Method:           "POST",
+				ContentType:      content_type,
+				Body:             req_body,
+				UnfollowRedirect: true,
+			},
+		},
+		Response: http.AssertionResponse{
+			ExpectedResponse: http.Response{
+				ContentType: http.ContentTypeApplicationJson,
+				Body:        res_body,
+			},
+		},
+	}
+}
+
+var RustWasmPluginsAiDataMasking = suite.ConformanceTest{
+	ShortName:   "RustWasmPluginsAiDataMasking",
+	Description: "The Ingress in the higress-conformance-infra namespace test the rust ai-data-masking wasmplugins.",
+	Manifests:   []string{"tests/rust-wasm-ai-data-masking.yaml"},
+	Features:    []suite.SupportedFeature{suite.WASMRustConformanceFeature},
+	Test: func(t *testing.T, suite *suite.ConformanceTestSuite) {
+		var testcases []http.Assertion
+		//openai
+		testcases = append(testcases, gen_assertion(
+			"replace.openai.com",
+			true,
+			[]byte("{\"messages\":[{\"role\":\"user\",\"content\":\"127.0.0.1 admin@gmail.com sk-12345\"}]}"),
+			[]byte("{\"choices\":[{\"index\":0,\"message\":{\"role\":\"assistant\",\"content\":\"127.0.0.1 sk-12345 admin@gmail.com\"}}],\"usage\":{}}"),
+		))
+		testcases = append(testcases, gen_assertion(
+			"replace.openai.com",
+			true,
+			[]byte("{\"messages\":[{\"role\":\"user\",\"content\":\"192.168.0.1 root@gmail.com sk-12345\"}]}"),
+			[]byte("{\"choices\":[{\"index\":0,\"message\":{\"role\":\"assistant\",\"content\":\"192.168.0.1 sk-12345 root@gmail.com\"}}],\"usage\":{}}"),
+		))
+		testcases = append(testcases, gen_assertion(
+			"ok.openai.com",
+			true,
+			[]byte("{\"messages\":[{\"role\":\"user\",\"content\":\"fuck\"}]}"),
+			[]byte("{\"choices\":[{\"index\":0,\"message\":{\"role\":\"assistant\",\"content\":\"提问或回答中包含敏感词，已被屏蔽\"}}],\"usage\":{}}"),
+		))
+		testcases = append(testcases, gen_assertion(
+			"ok.openai.com",
+			true,
+			[]byte("{\"messages\":[{\"role\":\"user\",\"content\":\"costom_word1\"}]}"),
+			[]byte("{\"choices\":[{\"index\":0,\"message\":{\"role\":\"assistant\",\"content\":\"提问或回答中包含敏感词，已被屏蔽\"}}],\"usage\":{}}"),
+		))
+		testcases = append(testcases, gen_assertion(
+			"ok.openai.com",
+			true,
+			[]byte("{\"messages\":[{\"role\":\"user\",\"content\":\"costom_word\"}]}"),
+			[]byte("{\"choices\":[{\"index\":0,\"message\":{\"role\":\"assistant\",\"content\":\"ok\"}}],\"usage\":{}}"),
+		))
+
+		testcases = append(testcases, gen_assertion(
+			"system_deny.openai.com",
+			true,
+			[]byte("{\"messages\":[{\"role\":\"user\",\"content\":\"test\"}]}"),
+			[]byte("{\"choices\":[{\"index\":0,\"message\":{\"role\":\"assistant\",\"content\":\"提问或回答中包含敏感词，已被屏蔽\"}}],\"usage\":{}}"),
+		))
+		testcases = append(testcases, gen_assertion(
+			"costom_word1.openai.com",
+			true,
+			[]byte("{\"messages\":[{\"role\":\"user\",\"content\":\"test\"}]}"),
+			[]byte("{\"choices\":[{\"index\":0,\"message\":{\"role\":\"assistant\",\"content\":\"提问或回答中包含敏感词，已被屏蔽\"}}],\"usage\":{}}"),
+		))
+		testcases = append(testcases, gen_assertion(
+			"costom_word.openai.com",
+			true,
+			[]byte("{\"messages\":[{\"role\":\"user\",\"content\":\"test\"}]}"),
+			[]byte("{\"choices\":[{\"index\":0,\"message\":{\"role\":\"assistant\",\"content\":\"costom_word\"}}],\"usage\":{}}"),
+		))
+
+		//raw
+		testcases = append(testcases, gen_assertion(
+			"replace.raw.com",
+			false,
+			[]byte("127.0.0.1 admin@gmail.com sk-12345"),
+			[]byte("{\"res\":\"127.0.0.1 sk-12345 admin@gmail.com\"}"),
+		))
+
+		testcases = append(testcases, gen_assertion(
+			"replace.raw.com",
+			false,
+			[]byte("192.168.0.1 root@gmail.com sk-12345"),
+			[]byte("{\"res\":\"192.168.0.1 sk-12345 root@gmail.com\"}"),
+		))
+
+		testcases = append(testcases, gen_assertion(
+			"ok.raw.com",
+			false,
+			[]byte("fuck"),
+			[]byte("{\"errmsg\":\"提问或回答中包含敏感词，已被屏蔽\"}"),
+		))
+		testcases = append(testcases, gen_assertion(
+			"ok.raw.com",
+			false,
+			[]byte("costom_word1"),
+			[]byte("{\"errmsg\":\"提问或回答中包含敏感词，已被屏蔽\"}"),
+		))
+		testcases = append(testcases, gen_assertion(
+			"ok.raw.com",
+			false,
+			[]byte("costom_word"),
+			[]byte("{\"res\":\"ok\"}"),
+		))
+
+		testcases = append(testcases, gen_assertion(
+			"system_deny.raw.com",
+			false,
+			[]byte("test"),
+			[]byte("{\"errmsg\":\"提问或回答中包含敏感词，已被屏蔽\"}"),
+		))
+		testcases = append(testcases, gen_assertion(
+			"costom_word1.raw.com",
+			false,
+			[]byte("test"),
+			[]byte("{\"errmsg\":\"提问或回答中包含敏感词，已被屏蔽\"}"),
+		))
+		testcases = append(testcases, gen_assertion(
+			"costom_word.raw.com",
+			false,
+			[]byte("test"),
+			[]byte("{\"res\":\"costom_word\"}"),
+		))
+
+		//jsonpath
+		testcases = append(testcases, gen_assertion(
+			"replace.raw.com",
+			true,
+			[]byte("{\"test\":[{\"test\":\"127.0.0.1 admin@gmail.com sk-12345\"}]}"),
+			[]byte("{\"res\":\"127.0.0.1 sk-12345 admin@gmail.com\"}"),
+		))
+		testcases = append(testcases, gen_assertion(
+			"replace.raw.com",
+			true,
+			[]byte("{\"test\":[{\"test\":\"test\", \"test1\":\"127.0.0.1 admin@gmail.com sk-12345\"}]}"),
+			[]byte("{\"res\":\"***.***.***.*** 48a7e98a91d93896d8dac522c5853948 ****@gmail.com\"}"),
+		))
+
+		t.Run("WasmPlugins ai-data-masking", func(t *testing.T) {
+			for _, testcase := range testcases {
+				http.MakeRequestAndExpectEventuallyConsistentResponse(t, suite.RoundTripper, suite.TimeoutConfig, suite.GatewayAddress, testcase)
+			}
+		})
+	},
+}
--- a/test/e2e/conformance/tests/rust-wasm-ai-data-masking.yaml
+++ b/test/e2e/conformance/tests/rust-wasm-ai-data-masking.yaml
@@ -0,0 +1,150 @@
+# Copyright (c) 2022 Alibaba Group Holding Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  name: wasmplugin-ai-data-masking
+  namespace: higress-conformance-infra
+spec:
+  ingressClassName: higress
+  rules:
+    - host: "*.openai.com"
+      http:
+        paths:
+          - pathType: Prefix
+            path: "/"
+            backend:
+              service:
+                name: infra-backend-v1
+                port:
+                  number: 8080
+    - host: "*.raw.com"
+      http:
+        paths:
+          - pathType: Prefix
+            path: "/"
+            backend:
+              service:
+                name: infra-backend-v1
+                port:
+                  number: 8080
+---
+apiVersion: extensions.higress.io/v1alpha1
+kind: WasmPlugin
+metadata:
+  name: custom-response
+  namespace: higress-system
+spec:
+  priority: 200
+  defaultConfig:
+    "body": "ok"
+  matchRules:
+    - domain:
+        - ok.openai.com
+      config:
+        headers:
+          - Content-Type=application/json
+        "body": "{\"choices\":[{\"index\":0,\"message\":{\"role\":\"assistant\",\"content\":\"ok\"}}],\"usage\":{}}"
+    - domain:
+        - replace.openai.com
+      config:
+        headers:
+          - Content-Type=application/json
+        "body": "{\"choices\":[{\"index\":0,\"message\":{\"role\":\"assistant\",\"content\":\"***.***.***.*** 48a7e98a91d93896d8dac522c5853948 ****@gmail.com\"}}],\"usage\":{}}"
+        
+    - domain:
+        - system_deny.openai.com
+      config:
+        headers:
+          - Content-Type=application/json
+        "body": "{\"choices\":[{\"index\":0,\"message\":{\"role\":\"assistant\",\"content\":\"fuck\"}}],\"usage\":{}}"
+    - domain:
+        - costom_word1.openai.com
+      config:
+        headers:
+          - Content-Type=application/json
+        "body": "{\"choices\":[{\"index\":0,\"message\":{\"role\":\"assistant\",\"content\":\"costom_word1\"}}],\"usage\":{}}"
+    - domain:
+        - costom_word.openai.com
+      config:
+        headers:
+          - Content-Type=application/json
+        "body": "{\"choices\":[{\"index\":0,\"message\":{\"role\":\"assistant\",\"content\":\"costom_word\"}}],\"usage\":{}}"
+    - domain:
+        - ok.raw.com
+      config:
+        headers:
+          - Content-Type=application/json
+        "body": "{\"res\":\"ok\"}"
+    - domain:
+        - replace.raw.com
+      config:
+        headers:
+          - Content-Type=application/json
+        "body": "{\"res\":\"***.***.***.*** 48a7e98a91d93896d8dac522c5853948 ****@gmail.com\"}"
+    - domain:
+        - system_deny.raw.com
+      config:
+        headers:
+          - Content-Type=application/json
+        "body": "{\"res\":\"fuck\"}"
+    - domain:
+        - costom_word1.raw.com
+      config:
+        headers:
+          - Content-Type=application/json
+        "body": "{\"res\":\"costom_word1\"}"
+    - domain:
+        - costom_word.raw.com
+      config:
+        headers:
+          - Content-Type=application/json
+        "body": "{\"res\":\"costom_word\"}"
+    
+  url: file:///opt/plugins/wasm-go/extensions/custom-response/plugin.wasm
+---
+apiVersion: extensions.higress.io/v1alpha1
+kind: WasmPlugin
+metadata:
+  name: ai-data-masking
+  namespace: higress-system
+spec:
+  priority: 300
+  defaultConfig:
+    system_deny: true
+    deny_openai: true
+    deny_jsonpath:
+      - "$.test[*].test"
+    deny_raw: true
+    deny_code: 200
+    deny_message: "提问或回答中包含敏感词，已被屏蔽"
+    deny_raw_message: "{\"errmsg\":\"提问或回答中包含敏感词，已被屏蔽\"}"
+    deny_content_type: "application/json"
+    deny_words: 
+      - "costom_word1"
+    replace_roles:
+      - regex: "%{EMAILLOCALPART}@%{HOSTNAME:domain}"
+        type: "replace"
+        restore: true
+        value: "****@$domain"
+      - regex: "%{IP}"
+        type: "replace"
+        restore: true
+        value: "***.***.***.***"
+      - regex: "sk-[0-9a-zA-Z]*"
+        restore: true
+        type: "hash"
+
+  url: file:///opt/plugins/wasm-rust/extensions/ai-data-masking/plugin.wasm