feat: ai敏感词拦截插件 (#1190)

This commit is contained in:
007gzs
2024-08-16 17:24:32 +08:00
committed by GitHub
parent dcea483c61
commit 25b085cb5e
20 changed files with 66898 additions and 166 deletions

View File

@@ -49,6 +49,11 @@ jobs:
with:
go-version: 1.19
- name: Setup Rust
uses: actions-rs/toolchain@v1
with:
toolchain: stable
if: matrix.wasmPluginType == 'RUST'
- name: Setup Golang Caches
uses: actions/cache@v4
with:

View File

@@ -4,15 +4,22 @@ version = 3
[[package]]
name = "ahash"
version = "0.8.3"
version = "0.8.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2c99f64d1e06488f620f932677e24bc6e2897582980441ae90a671415bd7ec2f"
checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011"
dependencies = [
"cfg-if",
"once_cell",
"version_check",
"zerocopy",
]
[[package]]
name = "allocator-api2"
version = "0.2.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f"
[[package]]
name = "cfg-if"
version = "1.0.0"
@@ -21,9 +28,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "getrandom"
version = "0.2.9"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c85e1d9ab2eadba7e5040d4e09cbd6d072b76a557ad64e797c2cb9d4da21d7e4"
checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7"
dependencies = [
"cfg-if",
"libc",
@@ -32,11 +39,12 @@ dependencies = [
[[package]]
name = "hashbrown"
version = "0.13.2"
version = "0.14.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e"
checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
dependencies = [
"ahash",
"allocator-api2",
]
[[package]]
@@ -52,24 +60,27 @@ dependencies = [
[[package]]
name = "itoa"
version = "1.0.6"
version = "1.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "453ad9f582a441959e5f0d088b02ce04cfe8d51a8eaf077f12ac6d3e94164ca6"
checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b"
[[package]]
name = "libc"
version = "0.2.144"
version = "0.2.155"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b00cc1c228a6782d0f076e7b232802e0c5689d41bb5df366f2a6b6621cfdfe1"
checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c"
[[package]]
name = "log"
version = "0.4.17"
version = "0.4.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e"
dependencies = [
"cfg-if",
]
checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24"
[[package]]
name = "memchr"
version = "2.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
[[package]]
name = "multimap"
@@ -82,60 +93,129 @@ dependencies = [
[[package]]
name = "once_cell"
version = "1.17.1"
version = "1.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3"
checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
[[package]]
name = "proc-macro2"
version = "1.0.86"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77"
dependencies = [
"unicode-ident",
]
[[package]]
name = "proxy-wasm"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "823b744520cd4a54ba7ebacbffe4562e839d6dcd8f89209f96a1ace4f5229cd4"
version = "0.2.2"
source = "git+https://github.com/higress-group/proxy-wasm-rust-sdk?branch=main#73833051f57d483570cf5aaa9d62bd7402fae63b"
dependencies = [
"hashbrown",
"log",
]
[[package]]
name = "ryu"
version = "1.0.13"
name = "quote"
version = "1.0.36"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f91339c0467de62360649f8d3e185ca8de4224ff281f66000de5eb2a77a79041"
checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7"
dependencies = [
"proc-macro2",
]
[[package]]
name = "ryu"
version = "1.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f"
[[package]]
name = "serde"
version = "1.0.163"
version = "1.0.207"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2113ab51b87a539ae008b5c6c02dc020ffa39afd2d83cffcb3f4eb2722cebec2"
checksum = "5665e14a49a4ea1b91029ba7d3bca9f299e1f7cfa194388ccc20f14743e784f2"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.207"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6aea2634c86b0e8ef2cfdc0c340baede54ec27b1e46febd7f80dffb2aa44a00e"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "serde_json"
version = "1.0.96"
version = "1.0.125"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "057d394a50403bcac12672b2b18fb387ab6d289d957dab67dd201875391e52f1"
checksum = "83c8e735a073ccf5be70aa8066aa984eaf2fa000db6c8d0100ae605b366d31ed"
dependencies = [
"itoa",
"memchr",
"ryu",
"serde",
]
[[package]]
name = "uuid"
version = "1.3.3"
name = "syn"
version = "2.0.74"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "345444e32442451b267fc254ae85a209c64be56d2890e601a0c37ff0c3c5ecd2"
checksum = "1fceb41e3d546d0bd83421d3409b1460cc7444cd389341a4c880fe7a042cb3d7"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "unicode-ident"
version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
[[package]]
name = "uuid"
version = "1.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "81dfa00651efa65069b0b6b651f4aaa31ba9e3c3ce0137aaad053604ee7e0314"
dependencies = [
"getrandom",
]
[[package]]
name = "version_check"
version = "0.9.4"
version = "0.9.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
[[package]]
name = "wasi"
version = "0.11.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]]
name = "zerocopy"
version = "0.7.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0"
dependencies = [
"zerocopy-derive",
]
[[package]]
name = "zerocopy-derive"
version = "0.7.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
dependencies = [
"proc-macro2",
"quote",
"syn",
]

View File

@@ -6,7 +6,7 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
proxy-wasm = "0.2.1"
proxy-wasm = { git="https://github.com/higress-group/proxy-wasm-rust-sdk", branch="main", version="0.2.2" }
serde = "1.0"
serde_json = "1.0"
uuid = { version = "1.3.3", features = ["v4"] }

View File

@@ -1,5 +1,6 @@
FROM rust:1.69 as builder
FROM rust:1.80 as builder
WORKDIR /workspace
RUN apt update && apt-get install gcc gcc-multilib llvm clang -y && apt clean
RUN rustup target add wasm32-wasi
ARG PLUGIN_NAME="say-hello"
ARG BUILD_OPTS="--release"
@@ -9,4 +10,4 @@ RUN cargo build --target wasm32-wasi $BUILD_OPTS \
&& cp target/wasm32-wasi/release/*.wasm /main.wasm
FROM scratch
COPY --from=builder /main.wasm plugin.wasm
COPY --from=builder /main.wasm plugin.wasm

View File

@@ -6,6 +6,12 @@ IMAGE_TAG = $(if $(strip $(PLUGIN_VERSION)),${PLUGIN_VERSION},${BUILD_TIME}-${CO
IMG ?= ${REGISTRY}${PLUGIN_NAME}:${IMAGE_TAG}
.DEFAULT:
lint-base:
cargo fmt --all --check
cargo clippy --workspace --all-features --all-targets
lint:
cargo fmt --all --check --manifest-path extensions/${PLUGIN_NAME}/Cargo.toml
cargo clippy --workspace --all-features --all-targets --manifest-path extensions/${PLUGIN_NAME}/Cargo.toml
build:
DOCKER_BUILDKIT=1 docker build \
--build-arg PLUGIN_NAME=${PLUGIN_NAME} \
@@ -13,4 +19,4 @@ build:
--output extensions/${PLUGIN_NAME} \
.
@echo ""
@echo "output wasm file: extensions/${PLUGIN_NAME}/plugin.wasm"
@echo "output wasm file: extensions/${PLUGIN_NAME}/plugin.wasm"

View File

@@ -0,0 +1,851 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "ahash"
version = "0.8.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011"
dependencies = [
"cfg-if",
"once_cell",
"version_check",
"zerocopy",
]
[[package]]
name = "aho-corasick"
version = "1.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
dependencies = [
"memchr",
]
[[package]]
name = "ai-data-masking"
version = "0.1.0"
dependencies = [
"fancy-regex",
"grok",
"higress-wasm-rust",
"jieba-rs",
"jsonpath-rust",
"lazy_static",
"md5",
"proxy-wasm",
"rust-embed",
"serde",
"serde_json",
]
[[package]]
name = "allocator-api2"
version = "0.2.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f"
[[package]]
name = "bit-set"
version = "0.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1"
dependencies = [
"bit-vec",
]
[[package]]
name = "bit-vec"
version = "0.6.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb"
[[package]]
name = "bitflags"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]]
name = "block-buffer"
version = "0.10.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71"
dependencies = [
"generic-array",
]
[[package]]
name = "byteorder"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
[[package]]
name = "cc"
version = "1.1.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5fb8dd288a69fc53a1996d7ecfbf4a20d59065bff137ce7e56bbd620de191189"
dependencies = [
"shlex",
]
[[package]]
name = "cedarwood"
version = "0.4.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d910bedd62c24733263d0bed247460853c9d22e8956bd4cd964302095e04e90"
dependencies = [
"smallvec",
]
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "cpufeatures"
version = "0.2.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "51e852e6dc9a5bed1fae92dd2375037bf2b768725bf3be87811edee3249d09ad"
dependencies = [
"libc",
]
[[package]]
name = "crypto-common"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3"
dependencies = [
"generic-array",
"typenum",
]
[[package]]
name = "darling"
version = "0.20.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6f63b86c8a8826a49b8c21f08a2d07338eec8d900540f8630dc76284be802989"
dependencies = [
"darling_core",
"darling_macro",
]
[[package]]
name = "darling_core"
version = "0.20.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95133861a8032aaea082871032f5815eb9e98cef03fa916ab4500513994df9e5"
dependencies = [
"fnv",
"ident_case",
"proc-macro2",
"quote",
"strsim",
"syn",
]
[[package]]
name = "darling_macro"
version = "0.20.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d336a2a514f6ccccaa3e09b02d41d35330c07ddf03a62165fcec10bb561c7806"
dependencies = [
"darling_core",
"quote",
"syn",
]
[[package]]
name = "derive_builder"
version = "0.20.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0350b5cb0331628a5916d6c5c0b72e97393b8b6b03b47a9284f4e7f5a405ffd7"
dependencies = [
"derive_builder_macro",
]
[[package]]
name = "derive_builder_core"
version = "0.20.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d48cda787f839151732d396ac69e3473923d54312c070ee21e9effcaa8ca0b1d"
dependencies = [
"darling",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "derive_builder_macro"
version = "0.20.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "206868b8242f27cecce124c19fd88157fbd0dd334df2587f36417bafbc85097b"
dependencies = [
"derive_builder_core",
"syn",
]
[[package]]
name = "digest"
version = "0.10.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
dependencies = [
"block-buffer",
"crypto-common",
]
[[package]]
name = "fancy-regex"
version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "531e46835a22af56d1e3b66f04844bed63158bc094a628bec1d321d9b4c44bf2"
dependencies = [
"bit-set",
"regex-automata",
"regex-syntax",
]
[[package]]
name = "fnv"
version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
[[package]]
name = "fxhash"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c"
dependencies = [
"byteorder",
]
[[package]]
name = "generic-array"
version = "0.14.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a"
dependencies = [
"typenum",
"version_check",
]
[[package]]
name = "getrandom"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7"
dependencies = [
"cfg-if",
"libc",
"wasi",
]
[[package]]
name = "glob"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
[[package]]
name = "grok"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "273797968160270573071022613fc4aa28b91fe68f3eef6c96a1b2a1947ddfbd"
dependencies = [
"glob",
"onig",
]
[[package]]
name = "hashbrown"
version = "0.14.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
dependencies = [
"ahash",
"allocator-api2",
]
[[package]]
name = "higress-wasm-rust"
version = "0.1.0"
dependencies = [
"multimap",
"proxy-wasm",
"serde",
"serde_json",
"uuid",
]
[[package]]
name = "ident_case"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
[[package]]
name = "itoa"
version = "1.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b"
[[package]]
name = "jieba-rs"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c1e2b0210dc78b49337af9e49d7ae41a39dceac6e5985613f1cf7763e2f76a25"
dependencies = [
"cedarwood",
"derive_builder",
"fxhash",
"lazy_static",
"phf",
"phf_codegen",
"regex",
]
[[package]]
name = "jsonpath-rust"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d64f9886fc067a709ab27faf63b7d3f4d1ec570a700705408b0b0683e2f43897"
dependencies = [
"pest",
"pest_derive",
"regex",
"serde_json",
"thiserror",
]
[[package]]
name = "lazy_static"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
[[package]]
name = "libc"
version = "0.2.155"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c"
[[package]]
name = "log"
version = "0.4.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24"
[[package]]
name = "md5"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"
[[package]]
name = "memchr"
version = "2.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
[[package]]
name = "multimap"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "defc4c55412d89136f966bbb339008b474350e5e6e78d2714439c386b3137a03"
dependencies = [
"serde",
]
[[package]]
name = "once_cell"
version = "1.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
[[package]]
name = "onig"
version = "6.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8c4b31c8722ad9171c6d77d3557db078cab2bd50afcc9d09c8b315c59df8ca4f"
dependencies = [
"bitflags",
"libc",
"once_cell",
"onig_sys",
]
[[package]]
name = "onig_sys"
version = "69.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7b829e3d7e9cc74c7e315ee8edb185bf4190da5acde74afd7fc59c35b1f086e7"
dependencies = [
"cc",
"pkg-config",
]
[[package]]
name = "pest"
version = "2.7.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cd53dff83f26735fdc1ca837098ccf133605d794cdae66acfc2bfac3ec809d95"
dependencies = [
"memchr",
"thiserror",
"ucd-trie",
]
[[package]]
name = "pest_derive"
version = "2.7.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2a548d2beca6773b1c244554d36fcf8548a8a58e74156968211567250e48e49a"
dependencies = [
"pest",
"pest_generator",
]
[[package]]
name = "pest_generator"
version = "2.7.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3c93a82e8d145725dcbaf44e5ea887c8a869efdcc28706df2d08c69e17077183"
dependencies = [
"pest",
"pest_meta",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "pest_meta"
version = "2.7.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a941429fea7e08bedec25e4f6785b6ffaacc6b755da98df5ef3e7dcf4a124c4f"
dependencies = [
"once_cell",
"pest",
"sha2",
]
[[package]]
name = "phf"
version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc"
dependencies = [
"phf_shared",
]
[[package]]
name = "phf_codegen"
version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e8d39688d359e6b34654d328e262234662d16cc0f60ec8dcbe5e718709342a5a"
dependencies = [
"phf_generator",
"phf_shared",
]
[[package]]
name = "phf_generator"
version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "48e4cc64c2ad9ebe670cb8fd69dd50ae301650392e81c05f9bfcb2d5bdbc24b0"
dependencies = [
"phf_shared",
"rand",
]
[[package]]
name = "phf_shared"
version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "90fcb95eef784c2ac79119d1dd819e162b5da872ce6f3c3abe1e8ca1c082f72b"
dependencies = [
"siphasher",
]
[[package]]
name = "pkg-config"
version = "0.3.30"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec"
[[package]]
name = "proc-macro2"
version = "1.0.86"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77"
dependencies = [
"unicode-ident",
]
[[package]]
name = "proxy-wasm"
version = "0.2.2"
source = "git+https://github.com/higress-group/proxy-wasm-rust-sdk?branch=main#73833051f57d483570cf5aaa9d62bd7402fae63b"
dependencies = [
"hashbrown",
"log",
]
[[package]]
name = "quote"
version = "1.0.36"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7"
dependencies = [
"proc-macro2",
]
[[package]]
name = "rand"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
dependencies = [
"rand_core",
]
[[package]]
name = "rand_core"
version = "0.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
[[package]]
name = "regex"
version = "1.10.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4219d74c6b67a3654a9fbebc4b419e22126d13d2f3c4a07ee0cb61ff79a79619"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata",
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.4.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b"
[[package]]
name = "rust-embed"
version = "8.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fa66af4a4fdd5e7ebc276f115e895611a34739a9c1c01028383d612d550953c0"
dependencies = [
"rust-embed-impl",
"rust-embed-utils",
"walkdir",
]
[[package]]
name = "rust-embed-impl"
version = "8.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6125dbc8867951125eec87294137f4e9c2c96566e61bf72c45095a7c77761478"
dependencies = [
"proc-macro2",
"quote",
"rust-embed-utils",
"syn",
"walkdir",
]
[[package]]
name = "rust-embed-utils"
version = "8.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2e5347777e9aacb56039b0e1f28785929a8a3b709e87482e7442c72e7c12529d"
dependencies = [
"sha2",
"walkdir",
]
[[package]]
name = "ryu"
version = "1.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f"
[[package]]
name = "same-file"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
dependencies = [
"winapi-util",
]
[[package]]
name = "serde"
version = "1.0.207"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5665e14a49a4ea1b91029ba7d3bca9f299e1f7cfa194388ccc20f14743e784f2"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.207"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6aea2634c86b0e8ef2cfdc0c340baede54ec27b1e46febd7f80dffb2aa44a00e"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "serde_json"
version = "1.0.125"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "83c8e735a073ccf5be70aa8066aa984eaf2fa000db6c8d0100ae605b366d31ed"
dependencies = [
"itoa",
"memchr",
"ryu",
"serde",
]
[[package]]
name = "sha2"
version = "0.10.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8"
dependencies = [
"cfg-if",
"cpufeatures",
"digest",
]
[[package]]
name = "shlex"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
[[package]]
name = "siphasher"
version = "0.3.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d"
[[package]]
name = "smallvec"
version = "1.13.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67"
[[package]]
name = "strsim"
version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
[[package]]
name = "syn"
version = "2.0.74"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fceb41e3d546d0bd83421d3409b1460cc7444cd389341a4c880fe7a042cb3d7"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "thiserror"
version = "1.0.63"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c0342370b38b6a11b6cc11d6a805569958d54cfa061a29969c3b5ce2ea405724"
dependencies = [
"thiserror-impl",
]
[[package]]
name = "thiserror-impl"
version = "1.0.63"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "typenum"
version = "1.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825"
[[package]]
name = "ucd-trie"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed646292ffc8188ef8ea4d1e0e0150fb15a5c2e12ad9b8fc191ae7a8a7f3c4b9"
[[package]]
name = "unicode-ident"
version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
[[package]]
name = "uuid"
version = "1.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "81dfa00651efa65069b0b6b651f4aaa31ba9e3c3ce0137aaad053604ee7e0314"
dependencies = [
"getrandom",
]
[[package]]
name = "version_check"
version = "0.9.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
[[package]]
name = "walkdir"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b"
dependencies = [
"same-file",
"winapi-util",
]
[[package]]
name = "wasi"
version = "0.11.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]]
name = "winapi-util"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb"
dependencies = [
"windows-sys",
]
[[package]]
name = "windows-sys"
version = "0.59.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
dependencies = [
"windows-targets",
]
[[package]]
name = "windows-targets"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
dependencies = [
"windows_aarch64_gnullvm",
"windows_aarch64_msvc",
"windows_i686_gnu",
"windows_i686_gnullvm",
"windows_i686_msvc",
"windows_x86_64_gnu",
"windows_x86_64_gnullvm",
"windows_x86_64_msvc",
]
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
[[package]]
name = "windows_aarch64_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
[[package]]
name = "windows_i686_gnu"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
[[package]]
name = "windows_i686_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
[[package]]
name = "windows_i686_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
[[package]]
name = "windows_x86_64_gnu"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
[[package]]
name = "windows_x86_64_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
[[package]]
name = "zerocopy"
version = "0.7.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0"
dependencies = [
"zerocopy-derive",
]
[[package]]
name = "zerocopy-derive"
version = "0.7.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
dependencies = [
"proc-macro2",
"quote",
"syn",
]

View File

@@ -0,0 +1,22 @@
[package]
name = "ai-data-masking"
version = "0.1.0"
edition = "2021"
publish = false
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[lib]
crate-type = ["cdylib"]
[dependencies]
higress-wasm-rust = { path = "../../", version = "0.1.0" }
proxy-wasm = { git="https://github.com/higress-group/proxy-wasm-rust-sdk", branch="main", version="0.2.2" }
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
fancy-regex = "0"
md5 = "0"
grok = "2"
lazy_static = "1"
jieba-rs = "0"
rust-embed="8.5.0"
jsonpath-rust = "0"

View File

@@ -0,0 +1,155 @@
# 功能说明
对请求/返回中的敏感词拦截、替换
```mermaid
sequenceDiagram
participant 用户
participant 敏感词插件
participant 后端服务
用户->>敏感词插件: 请求数据(如:包含admin@gmail.com)
敏感词插件->>敏感词插件: 数据解析
opt 如果包含拦截词
敏感词插件-->>用户: 返回预设错误消息 (拦截)
end
opt 替换敏感词
敏感词插件->>后端服务: 关键词替换后的请求数据 (将admin@gmail.com替换为****@gmail.com)
后端服务->>敏感词插件: 原始返回响应(包含 ****@gmail.com)
敏感词插件->>用户: 数据恢复后的相应数据(将****@gmail.com恢复为admin@gmail.com)
end
```
## 处理数据范围
- openai协议请求/返回对话内容
- jsonpath只处理指定字段
- raw整个请求/返回body
## 敏感词拦截
- 处理数据范围中出现敏感词直接拦截,返回预设错误信息
- 支持系统内置敏感词库和自定义敏感词
## 敏感词替换
- 将请求数据中出现的敏感词替换为脱敏字符串,传递给后端服务。可保证敏感数据不出域
- 部分脱敏数据在后端服务返回后可进行还原
- 自定义规则支持标准正则和grok规则替换字符串支持变量替换
# 配置字段
| 名称 | 数据类型 | 默认值 | 描述 |
| -------- | -------- | -------- | -------- |
| deny_openai | bool | true | 对openai协议进行拦截 |
| deny_jsonpath | string | [] | 对指定jsonpath拦截 |
| deny_raw | bool | false | 对原始body拦截 |
| system_deny | bool | true | 开启内置拦截规则 |
| deny_code | int | 200 | 拦截时http状态码 |
| deny_message | string | 提问或回答中包含敏感词,已被屏蔽 | 拦截时ai返回消息 |
| deny_raw_message | string | {"errmsg":"提问或回答中包含敏感词,已被屏蔽"} | 非openai拦截时返回内容 |
| deny_content_type | string | application/json | 非openai拦截时返回content_type头 |
| deny_words | array of string | [] | 自定义敏感词列表 |
| replace_roles | array | - | 自定义敏感词正则替换 |
| replace_roles.regex | string | - | 规则正则(内置GROK规则) |
| replace_roles.type | [replace, hash] | - | 替换类型 |
| replace_roles.restore | bool | false | 是否恢复 |
| replace_roles.value | string | - | 替换值(支持正则变量) |
# 配置示例
```yaml
system_deny: true
deny_openai: true
deny_jsonpath:
- "$.messages[*].content"
deny_raw: true
deny_code: 200
deny_message: "提问或回答中包含敏感词,已被屏蔽"
deny_raw_message: "{\"errmsg\":\"提问或回答中包含敏感词,已被屏蔽\"}"
deny_content_type: "application/json"
deny_words:
- "自定义敏感词1"
- "自定义敏感词2"
replace_roles:
- regex: "%{MOBILE}"
type: "replace"
value: "****"
# 手机号 13800138000 -> ****
- regex: "%{EMAILLOCALPART}@%{HOSTNAME:domain}"
type: "replace"
restore: true
value: "****@$domain"
# 电子邮箱 admin@gmail.com -> ****@gmail.com
- regex: "%{IP}"
type: "replace"
restore: true
value: "***.***.***.***"
# ip 192.168.0.1 -> ***.***.***.***
- regex: "%{IDCARD}"
type: "replace"
value: "****"
# 身份证号 110000000000000000 -> ****
- regex: "sk-[0-9a-zA-Z]*"
restore: true
type: "hash"
# hash sk-12345 -> 9cb495455da32f41567dab1d07f1973d
# hash后的值提供给大模型从大模型返回的数据中会将hash值还原为原始值
```
# 敏感词替换样例
## 用户请求内容
请将 `curl http://172.20.5.14/api/openai/v1/chat/completions -H "Authorization: sk-12345" -H "Auth: test@gmail.com"` 改成post方式
## 处理后请求大模型内容
`curl http://***.***.***.***/api/openai/v1/chat/completions -H "Authorization: 48a7e98a91d93896d8dac522c5853948" -H "Auth: ****@gmail.com"` 改成post方式
## 大模型返回内容
您想要将一个 `curl` 的 GET 请求转换为 POST 请求,并且这个请求是向一个特定的 API 发送数据。下面是修改后的 `curl` 命令,以 POST 方式发送:
```sh
curl -X POST \
-H "Authorization: 48a7e98a91d93896d8dac522c5853948" \
-H "Auth: ****@gmail.com" \
-H "Content-Type: application/json" \
-d '{"key":"value"}' \
http://***.***.***.***/api/openai/v1/chat/completions
```
这里做了如下几个修改:
- `-X POST` 设置请求方式为 POST。
- `-H "Content-Type: application/json"` 设置请求头中的 `Content-Type``application/json`,这通常用来告诉服务器您发送的数据格式是 JSON。
- `-d '{"key":"value"}'` 这里设置了要发送的数据,`'{"key":"value"}'` 是一个简单的 JSON 对象示例。您需要将其替换为您实际想要发送的数据。
请注意,您需要将 `"key":"value"` 替换为您实际要发送的数据内容。如果您的 API 接受不同的数据结构或者需要特定的字段,请根据实际情况调整这部分内容。
## 处理后返回用户内容
您想要将一个 `curl` 的 GET 请求转换为 POST 请求,并且这个请求是向一个特定的 API 发送数据。下面是修改后的 `curl` 命令,以 POST 方式发送:
```sh
curl -X POST \
-H "Authorization: sk-12345" \
-H "Auth: test@gmail.com" \
-H "Content-Type: application/json" \
-d '{"key":"value"}' \
http://172.20.5.14/api/openai/v1/chat/completions
```
这里做了如下几个修改:
- `-X POST` 设置请求方式为 POST。
- `-H "Content-Type: application/json"` 设置请求头中的 `Content-Type``application/json`,这通常用来告诉服务器您发送的数据格式是 JSON。
- `-d '{"key":"value"}'` 这里设置了要发送的数据,`'{"key":"value"}'` 是一个简单的 JSON 对象示例。您需要将其替换为您实际想要发送的数据。
请注意,您需要将 `"key":"value"` 替换为您实际要发送的数据内容。如果您的 API 接受不同的数据结构或者需要特定的字段,请根据实际情况调整这部分内容。
# 相关说明
- 流模式中如果脱敏后的词被多个chunk拆分可能无法进行还原
- 流模式中如果敏感词语被多个chunk拆分可能会有敏感词的一部分返回给用户的情况
- grok 内置规则列表 https://help.aliyun.com/zh/sls/user-guide/grok-patterns
- 内置拦截规则数据来源 https://github.com/houbb/sensitive-word/tree/master/src/main/resources

View File

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,732 @@
// Copyright (c) 2023 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use fancy_regex::Regex;
use grok::patterns;
use higress_wasm_rust::log::Log;
use higress_wasm_rust::plugin_wrapper::{HttpContextWrapper, RootContextWrapper};
use higress_wasm_rust::rule_matcher::{on_configure, RuleMatcher, SharedRuleMatcher};
use jieba_rs::Jieba;
use jsonpath_rust::{JsonPath, JsonPathValue};
use lazy_static::lazy_static;
use proxy_wasm::traits::{Context, HttpContext, RootContext};
use proxy_wasm::types::{Bytes, ContextType, DataAction, HeaderAction, LogLevel};
use rust_embed::Embed;
use serde::de::Error;
use serde::Deserialize;
use serde::Deserializer;
use serde_json::{json, Value};
use std::cell::RefCell;
use std::collections::{BTreeMap, HashMap, HashSet, VecDeque};
use std::ops::DerefMut;
use std::rc::Rc;
use std::str::FromStr;
use std::vec;
proxy_wasm::main! {{
proxy_wasm::set_log_level(LogLevel::Trace);
proxy_wasm::set_root_context(|_|Box::new(AiDataMaskingRoot::new()));
}}
const PLUGIN_NAME: &str = "ai-data-masking";
const GROK_PATTERN: &str = r"%\{(?<name>(?<pattern>[A-z0-9]+)(?::(?<alias>[A-z0-9_:;\/\s\.]+))?)\}";
#[derive(Embed)]
#[folder = "res/"]
struct Asset;
#[derive(Default, Debug, Clone)]
struct DenyWord {
jieba: Jieba,
words: HashSet<String>,
}
struct System {
deny_word: DenyWord,
grok_regex: Regex,
grok_patterns: BTreeMap<String, String>,
}
lazy_static! {
static ref SYSTEM: System = System::new();
}
struct AiDataMaskingRoot {
log: Log,
rule_matcher: SharedRuleMatcher<AiDataMaskingConfig>,
}
struct AiDataMasking {
config: Option<AiDataMaskingConfig>,
mask_map: HashMap<String, Option<String>>,
is_openai: bool,
stream: bool,
res_body: Bytes,
}
fn deserialize_regexp<'de, D>(deserializer: D) -> Result<Regex, D::Error>
where
D: Deserializer<'de>,
{
let value: Value = Deserialize::deserialize(deserializer)?;
if let Some(pattern) = value.as_str() {
let (p, _) = SYSTEM.grok_to_pattern(pattern);
if let Ok(reg) = Regex::new(&p) {
Ok(reg)
} else if let Ok(reg) = Regex::new(pattern) {
Ok(reg)
} else {
Err(Error::custom(format!("regexp error field {}", pattern)))
}
} else {
Err(Error::custom("regexp error not string".to_string()))
}
}
fn deserialize_type<'de, D>(deserializer: D) -> Result<Type, D::Error>
where
D: Deserializer<'de>,
{
let value: Value = Deserialize::deserialize(deserializer)?;
if let Some(_type) = value.as_str() {
if _type == "replace" {
Ok(Type::Replace)
} else if _type == "hash" {
Ok(Type::Hash)
} else {
Err(Error::custom(format!("regexp error value {}", _type)))
}
} else {
Err(Error::custom("type error not string".to_string()))
}
}
fn deserialize_denyword<'de, D>(deserializer: D) -> Result<DenyWord, D::Error>
where
D: Deserializer<'de>,
{
let value: Vec<String> = Deserialize::deserialize(deserializer)?;
Ok(DenyWord::from_iter(value))
}
fn deserialize_jsonpath<'de, D>(deserializer: D) -> Result<Vec<JsonPath>, D::Error>
where
D: Deserializer<'de>,
{
let value: Vec<String> = Deserialize::deserialize(deserializer)?;
let mut ret = Vec::new();
for v in value {
if v.is_empty() {
continue;
}
match JsonPath::from_str(&v) {
Ok(jp) => ret.push(jp),
Err(_) => return Err(Error::custom(format!("jsonpath error value {}", v))),
}
}
Ok(ret)
}
#[derive(Debug, Clone, PartialEq, Eq)]
enum Type {
Replace,
Hash,
}
#[derive(Debug, Deserialize, Clone)]
struct Rule {
#[serde(deserialize_with = "deserialize_regexp")]
regex: Regex,
#[serde(deserialize_with = "deserialize_type", alias = "type")]
type_: Type,
#[serde(default)]
restore: bool,
#[serde(default)]
value: String,
}
fn default_deny_openai() -> bool {
true
}
fn default_deny_raw() -> bool {
false
}
fn default_system_deny() -> bool {
true
}
fn default_deny_code() -> u16 {
200
}
fn default_deny_content_type() -> String {
"application/json".to_string()
}
fn default_deny_raw_message() -> String {
"{\"errmsg\":\"提问或回答中包含敏感词,已被屏蔽\"}".to_string()
}
fn default_deny_message() -> String {
"提问或回答中包含敏感词,已被屏蔽".to_string()
}
#[derive(Default, Debug, Deserialize, Clone)]
pub struct AiDataMaskingConfig {
#[serde(default = "default_deny_openai")]
deny_openai: bool,
#[serde(default = "default_deny_raw")]
deny_raw: bool,
#[serde(default, deserialize_with = "deserialize_jsonpath")]
deny_jsonpath: Vec<JsonPath>,
#[serde(default = "default_system_deny")]
system_deny: bool,
#[serde(default = "default_deny_code")]
deny_code: u16,
#[serde(default = "default_deny_message")]
deny_message: String,
#[serde(default = "default_deny_raw_message")]
deny_raw_message: String,
#[serde(default = "default_deny_content_type")]
deny_content_type: String,
#[serde(default)]
replace_roles: Vec<Rule>,
#[serde(deserialize_with = "deserialize_denyword", default = "DenyWord::empty")]
deny_words: DenyWord,
}
#[derive(Debug, Deserialize, Clone)]
struct Message {
content: String,
}
#[derive(Debug, Deserialize, Clone)]
struct Req {
#[serde(default)]
stream: bool,
messages: Vec<Message>,
}
#[derive(Default, Debug, Deserialize)]
struct ResMessage {
#[serde(default)]
message: Option<Message>,
#[serde(default)]
delta: Option<Message>,
}
#[derive(Default, Debug, Deserialize)]
struct Res {
#[serde(default)]
choices: Vec<ResMessage>,
}
static SYSTEM_PATTERNS: &[(&str, &str)] = &[
("MOBILE", r#"\d{8,11}"#),
("IDCARD", r#"\d{17}[0-9xX]|\d{15}"#),
];
impl DenyWord {
fn empty() -> Self {
DenyWord {
jieba: Jieba::empty(),
words: HashSet::new(),
}
}
fn from_iter<T: IntoIterator<Item = impl Into<String>>>(words: T) -> Self {
let mut deny_word = DenyWord::empty();
for word in words {
let _w = word.into();
let w = _w.trim();
if w.is_empty() {
continue;
}
deny_word.jieba.add_word(w, None, None);
deny_word.words.insert(w.to_string());
}
deny_word
}
fn default() -> Self {
if let Some(file) = Asset::get("sensitive_word_dict.txt") {
if let Ok(data) = std::str::from_utf8(file.data.as_ref()) {
return DenyWord::from_iter(data.split('\n'));
}
}
DenyWord::empty()
}
fn check(&self, message: &str) -> bool {
for word in self.jieba.cut(message, true) {
if self.words.contains(word) {
return true;
}
}
false
}
}
impl System {
fn new() -> Self {
let grok_regex = Regex::new(GROK_PATTERN).unwrap();
let grok_patterns = BTreeMap::new();
let mut system = System {
deny_word: DenyWord::default(),
grok_regex,
grok_patterns,
};
system.init();
system
}
fn init(&mut self) {
let mut grok_temp_patterns = VecDeque::new();
for patterns in [patterns(), SYSTEM_PATTERNS] {
for &(key, value) in patterns {
if self.grok_regex.is_match(value).is_ok_and(|r| r) {
grok_temp_patterns.push_back((String::from(key), String::from(value)));
} else {
self.grok_patterns
.insert(String::from(key), String::from(value));
}
}
}
let mut last_ok: Option<String> = None;
while let Some((key, value)) = grok_temp_patterns.pop_front() {
if let Some(k) = &last_ok {
if k == &key {
break;
}
}
let (v, ok) = self.grok_to_pattern(&value);
if ok {
self.grok_patterns.insert(key, v);
last_ok = None;
} else {
if last_ok.is_none() {
last_ok = Some(key.clone());
}
grok_temp_patterns.push_back((key, v));
}
}
}
fn grok_to_pattern(&self, pattern: &str) -> (String, bool) {
let mut ok = true;
let mut ret = pattern.to_string();
for _c in self.grok_regex.captures_iter(pattern) {
if _c.is_err() {
ok = false;
continue;
}
let c = _c.unwrap();
if let (Some(full), Some(name)) = (c.get(0), c.name("pattern")) {
if let Some(p) = self.grok_patterns.get(name.as_str()) {
if let Some(alias) = c.name("alias") {
ret = ret.replace(full.as_str(), &format!("(?P<{}>{})", alias.as_str(), p));
} else {
ret = ret.replace(full.as_str(), p);
}
} else {
ok = false;
}
}
}
(ret, ok)
}
}
impl AiDataMaskingRoot {
fn new() -> Self {
AiDataMaskingRoot {
log: Log::new(PLUGIN_NAME.to_string()),
rule_matcher: Rc::new(RefCell::new(RuleMatcher::default())),
}
}
}
impl Context for AiDataMaskingRoot {}
impl RootContext for AiDataMaskingRoot {
fn on_configure(&mut self, _plugin_configuration_size: usize) -> bool {
on_configure(
self,
_plugin_configuration_size,
self.rule_matcher.borrow_mut().deref_mut(),
&self.log,
)
}
fn create_http_context(&self, _context_id: u32) -> Option<Box<dyn HttpContext>> {
self.create_http_context_use_wrapper(_context_id)
}
fn get_type(&self) -> Option<ContextType> {
Some(ContextType::HttpContext)
}
}
impl RootContextWrapper<AiDataMaskingConfig> for AiDataMaskingRoot {
fn rule_matcher(&self) -> &SharedRuleMatcher<AiDataMaskingConfig> {
&self.rule_matcher
}
fn create_http_context_wrapper(
&self,
_context_id: u32,
) -> Option<Box<dyn HttpContextWrapper<AiDataMaskingConfig>>> {
Some(Box::new(AiDataMasking {
mask_map: HashMap::new(),
config: None,
is_openai: false,
stream: false,
res_body: Bytes::new(),
}))
}
}
impl AiDataMasking {
fn check_message(&self, message: &str) -> bool {
if let Some(config) = &self.config {
config.deny_words.check(message)
|| (config.system_deny && SYSTEM.deny_word.check(message))
} else {
false
}
}
fn msg_to_response(&self, msg: &str, raw_msg: &str, content_type: &str) -> (String, String) {
if !self.is_openai {
(raw_msg.to_string(), content_type.to_string())
} else if self.stream {
(
format!(
"data:{}\n\n",
json!({"choices": [{"index": 0, "delta": {"role": "assistant", "content": msg}}], "usage": {}})
),
"text/event-stream;charset=UTF-8".to_string(),
)
} else {
(
json!({"choices": [{"index": 0, "message": {"role": "assistant", "content": msg}}], "usage": {}}).to_string(),
"application/json".to_string()
)
}
}
fn deny(&mut self, in_response: bool) -> DataAction {
if in_response && self.stream {
self.replace_http_response_body(&[]);
return DataAction::Continue;
}
let (deny_code, (deny_message, content_type)) = if let Some(config) = &self.config {
(
config.deny_code,
self.msg_to_response(
&config.deny_message,
&config.deny_raw_message,
&config.deny_content_type,
),
)
} else {
(
default_deny_code(),
self.msg_to_response(
&default_deny_message(),
&default_deny_raw_message(),
&default_deny_content_type(),
),
)
};
if in_response {
self.replace_http_response_body(deny_message.as_bytes());
return DataAction::Continue;
}
self.send_http_response(
deny_code as u32,
vec![("Content-Type", &content_type)],
Some(deny_message.as_bytes()),
);
DataAction::StopIterationAndBuffer
}
fn process_sse_message(&mut self, sse_message: &str) -> Vec<String> {
let mut messages = Vec::new();
for msg in sse_message.split('\n') {
if !msg.starts_with("data:") {
continue;
}
let res: Res = if let Some(m) = msg.strip_prefix("data:") {
match serde_json::from_str(m) {
Ok(r) => r,
Err(_) => continue,
}
} else {
continue;
};
if res.choices.is_empty() {
continue;
}
for choice in &res.choices {
if let Some(delta) = &choice.delta {
messages.push(delta.content.clone());
}
}
}
messages
}
fn replace_request_msg(&mut self, message: &str) -> String {
let config = self.config.as_ref().unwrap();
let mut msg = message.to_string();
for rule in &config.replace_roles {
let mut replace_pair = Vec::new();
if rule.type_ == Type::Replace && !rule.restore {
msg = rule.regex.replace_all(&msg, &rule.value).to_string();
} else {
for _m in rule.regex.find_iter(&msg) {
if _m.is_err() {
continue;
}
let m = _m.unwrap();
let from_word = m.as_str();
let to_word = match rule.type_ {
Type::Hash => {
let digest = md5::compute(from_word.as_bytes());
format!("{:x}", digest)
}
Type::Replace => rule.regex.replace(from_word, &rule.value).to_string(),
};
replace_pair.push((from_word.to_string(), to_word.clone()));
if rule.restore && !to_word.is_empty() {
match self.mask_map.entry(to_word) {
std::collections::hash_map::Entry::Occupied(mut e) => {
e.insert(None);
}
std::collections::hash_map::Entry::Vacant(e) => {
e.insert(Some(from_word.to_string()));
}
}
}
}
for (from_word, to_word) in replace_pair {
msg = msg.replace(&from_word, &to_word);
}
}
}
msg
}
}
impl Context for AiDataMasking {}
impl HttpContext for AiDataMasking {
fn on_http_request_headers(
&mut self,
_num_headers: usize,
_end_of_stream: bool,
) -> HeaderAction {
HeaderAction::StopIteration
}
fn on_http_response_headers(
&mut self,
_num_headers: usize,
_end_of_stream: bool,
) -> HeaderAction {
self.set_http_response_header("Content-Length", None);
HeaderAction::Continue
}
fn on_http_response_body(&mut self, body_size: usize, _end_of_stream: bool) -> DataAction {
if !self.stream {
return DataAction::Continue;
}
if let Some(body) = self.get_http_response_body(0, body_size) {
self.res_body.extend(&body);
if let Ok(body_str) = String::from_utf8(self.res_body.clone()) {
if self.is_openai {
let messages = self.process_sse_message(&body_str);
if self.check_message(&messages.join("")) {
return self.deny(true);
}
} else if self.check_message(&body_str) {
return self.deny(true);
}
}
if self.mask_map.is_empty() {
return DataAction::Continue;
}
if let Ok(body_str) = std::str::from_utf8(&body) {
let mut new_str = body_str.to_string();
if self.is_openai {
let messages = self.process_sse_message(body_str);
for message in messages {
let mut new_message = message.clone();
for (from_word, to_word) in self.mask_map.iter() {
if let Some(to) = to_word {
new_message = new_message.replace(from_word, to);
}
}
if new_message != message {
new_str = new_str.replace(
&json!(message).to_string(),
&json!(new_message).to_string(),
);
}
}
} else {
for (from_word, to_word) in self.mask_map.iter() {
if let Some(to) = to_word {
new_str = new_str.replace(from_word, to);
}
}
}
if new_str != body_str {
self.replace_http_response_body(new_str.as_bytes());
}
}
}
DataAction::Continue
}
}
impl HttpContextWrapper<AiDataMaskingConfig> for AiDataMasking {
fn on_config(&mut self, config: &AiDataMaskingConfig) {
self.config = Some(config.clone());
}
fn cache_request_body(&self) -> bool {
true
}
fn cache_response_body(&self) -> bool {
!self.stream
}
fn on_http_request_complete_body(&mut self, req_body: &Bytes) -> DataAction {
if self.config.is_none() {
return DataAction::Continue;
}
let config = self.config.as_ref().unwrap();
let mut req_body = match String::from_utf8(req_body.clone()) {
Ok(r) => r,
Err(_) => return DataAction::Continue,
};
if config.deny_openai {
if let Ok(r) = serde_json::from_str(req_body.as_str()) {
let req: Req = r;
self.is_openai = true;
self.stream = req.stream;
for msg in req.messages {
if self.check_message(&msg.content) {
return self.deny(false);
}
let new_content = self.replace_request_msg(&msg.content);
if new_content != msg.content {
if let (Ok(from), Ok(to)) = (
serde_json::to_string(&msg.content),
serde_json::to_string(&new_content),
) {
req_body = req_body.replace(&from, &to);
}
}
}
self.replace_http_request_body(req_body.as_bytes());
return DataAction::Continue;
}
}
if !config.deny_jsonpath.is_empty() {
if let Ok(r) = serde_json::from_str(req_body.as_str()) {
let json: Value = r;
for jsonpath in config.deny_jsonpath.clone() {
for v in jsonpath.find_slice(&json) {
if let JsonPathValue::Slice(d, _) = v {
if let Some(s) = d.as_str() {
if self.check_message(s) {
return self.deny(false);
}
let content = s.to_string();
let new_content = self.replace_request_msg(&content);
if new_content != content {
if let (Ok(from), Ok(to)) = (
serde_json::to_string(&content),
serde_json::to_string(&new_content),
) {
req_body = req_body.replace(&from, &to);
}
}
}
}
}
}
self.replace_http_request_body(req_body.as_bytes());
return DataAction::Continue;
}
}
if config.deny_raw {
if self.check_message(&req_body) {
return self.deny(false);
}
let new_body = self.replace_request_msg(&req_body);
if new_body != req_body {
self.replace_http_request_body(new_body.as_bytes())
}
return DataAction::Continue;
}
DataAction::Continue
}
fn on_http_response_complete_body(&mut self, res_body: &Bytes) -> DataAction {
if self.config.is_none() {
self.reset_http_response();
return DataAction::Continue;
}
let config = self.config.as_ref().unwrap();
let mut res_body = match String::from_utf8(res_body.clone()) {
Ok(r) => r,
Err(_) => {
self.reset_http_response();
return DataAction::Continue;
}
};
if config.deny_openai && self.is_openai {
if let Ok(r) = serde_json::from_str(res_body.as_str()) {
let res: Res = r;
for msg in res.choices {
if let Some(meesage) = msg.message {
if self.check_message(&meesage.content) {
return self.deny(true);
}
if self.mask_map.is_empty() {
continue;
}
let mut m = meesage.content.clone();
for (from_word, to_word) in self.mask_map.iter() {
if let Some(to) = to_word {
m = m.replace(from_word, to);
}
}
if m != meesage.content {
if let (Ok(from), Ok(to)) = (
serde_json::to_string(&meesage.content),
serde_json::to_string(&m),
) {
res_body = res_body.replace(&from, &to);
}
}
}
}
self.replace_http_response_body(res_body.as_bytes());
return DataAction::Continue;
}
}
if config.deny_raw {
if self.check_message(&res_body) {
return self.deny(true);
}
if !self.mask_map.is_empty() {
for (from_word, to_word) in self.mask_map.iter() {
if let Some(to) = to_word {
res_body = res_body.replace(from_word, to);
}
}
}
self.replace_http_response_body(res_body.as_bytes());
return DataAction::Continue;
}
DataAction::Continue
}
}

View File

@@ -4,13 +4,14 @@ version = 3
[[package]]
name = "ahash"
version = "0.8.3"
version = "0.8.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2c99f64d1e06488f620f932677e24bc6e2897582980441ae90a671415bd7ec2f"
checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011"
dependencies = [
"cfg-if",
"once_cell",
"version_check",
"zerocopy",
]
[[package]]
@@ -22,6 +23,12 @@ dependencies = [
"memchr",
]
[[package]]
name = "allocator-api2"
version = "0.2.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f"
[[package]]
name = "cfg-if"
version = "1.0.0"
@@ -30,9 +37,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "getrandom"
version = "0.2.9"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c85e1d9ab2eadba7e5040d4e09cbd6d072b76a557ad64e797c2cb9d4da21d7e4"
checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7"
dependencies = [
"cfg-if",
"libc",
@@ -41,11 +48,12 @@ dependencies = [
[[package]]
name = "hashbrown"
version = "0.13.2"
version = "0.14.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e"
checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
dependencies = [
"ahash",
"allocator-api2",
]
[[package]]
@@ -61,21 +69,21 @@ dependencies = [
[[package]]
name = "itoa"
version = "1.0.6"
version = "1.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "453ad9f582a441959e5f0d088b02ce04cfe8d51a8eaf077f12ac6d3e94164ca6"
checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b"
[[package]]
name = "libc"
version = "0.2.144"
version = "0.2.155"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b00cc1c228a6782d0f076e7b232802e0c5689d41bb5df366f2a6b6621cfdfe1"
checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c"
[[package]]
name = "log"
version = "0.4.18"
version = "0.4.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "518ef76f2f87365916b142844c16d8fefd85039bc5699050210a7778ee1cd1de"
checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24"
[[package]]
name = "memchr"
@@ -94,24 +102,23 @@ dependencies = [
[[package]]
name = "once_cell"
version = "1.17.2"
version = "1.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9670a07f94779e00908f3e686eab508878ebb390ba6e604d3a284c00e8d0487b"
checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
[[package]]
name = "proc-macro2"
version = "1.0.59"
version = "1.0.86"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6aeca18b86b413c660b781aa319e4e2648a3e6f9eadc9b47e9038e6fe9f3451b"
checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77"
dependencies = [
"unicode-ident",
]
[[package]]
name = "proxy-wasm"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "823b744520cd4a54ba7ebacbffe4562e839d6dcd8f89209f96a1ace4f5229cd4"
version = "0.2.2"
source = "git+https://github.com/higress-group/proxy-wasm-rust-sdk?branch=main#73833051f57d483570cf5aaa9d62bd7402fae63b"
dependencies = [
"hashbrown",
"log",
@@ -119,18 +126,18 @@ dependencies = [
[[package]]
name = "quote"
version = "1.0.28"
version = "1.0.36"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b9ab9c7eadfd8df19006f1cf1a4aed13540ed5cbc047010ece5826e10825488"
checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7"
dependencies = [
"proc-macro2",
]
[[package]]
name = "regex"
version = "1.10.5"
version = "1.10.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f"
checksum = "4219d74c6b67a3654a9fbebc4b419e22126d13d2f3c4a07ee0cb61ff79a79619"
dependencies = [
"aho-corasick",
"memchr",
@@ -169,24 +176,24 @@ dependencies = [
[[package]]
name = "ryu"
version = "1.0.13"
version = "1.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f91339c0467de62360649f8d3e185ca8de4224ff281f66000de5eb2a77a79041"
checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f"
[[package]]
name = "serde"
version = "1.0.163"
version = "1.0.207"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2113ab51b87a539ae008b5c6c02dc020ffa39afd2d83cffcb3f4eb2722cebec2"
checksum = "5665e14a49a4ea1b91029ba7d3bca9f299e1f7cfa194388ccc20f14743e784f2"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.163"
version = "1.0.207"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8c805777e3930c8883389c602315a24224bcc738b63905ef87cd1420353ea93e"
checksum = "6aea2634c86b0e8ef2cfdc0c340baede54ec27b1e46febd7f80dffb2aa44a00e"
dependencies = [
"proc-macro2",
"quote",
@@ -195,20 +202,21 @@ dependencies = [
[[package]]
name = "serde_json"
version = "1.0.96"
version = "1.0.125"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "057d394a50403bcac12672b2b18fb387ab6d289d957dab67dd201875391e52f1"
checksum = "83c8e735a073ccf5be70aa8066aa984eaf2fa000db6c8d0100ae605b366d31ed"
dependencies = [
"itoa",
"memchr",
"ryu",
"serde",
]
[[package]]
name = "syn"
version = "2.0.18"
version = "2.0.74"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32d41677bcbe24c20c52e7c70b0d8db04134c5d1066bf98662e2871ad200ea3e"
checksum = "1fceb41e3d546d0bd83421d3409b1460cc7444cd389341a4c880fe7a042cb3d7"
dependencies = [
"proc-macro2",
"quote",
@@ -217,27 +225,47 @@ dependencies = [
[[package]]
name = "unicode-ident"
version = "1.0.9"
version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b15811caf2415fb889178633e7724bad2509101cde276048e013b9def5e51fa0"
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
[[package]]
name = "uuid"
version = "1.3.3"
version = "1.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "345444e32442451b267fc254ae85a209c64be56d2890e601a0c37ff0c3c5ecd2"
checksum = "81dfa00651efa65069b0b6b651f4aaa31ba9e3c3ce0137aaad053604ee7e0314"
dependencies = [
"getrandom",
]
[[package]]
name = "version_check"
version = "0.9.4"
version = "0.9.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
[[package]]
name = "wasi"
version = "0.11.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]]
name = "zerocopy"
version = "0.7.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0"
dependencies = [
"zerocopy-derive",
]
[[package]]
name = "zerocopy-derive"
version = "0.7.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
dependencies = [
"proc-macro2",
"quote",
"syn",
]

View File

@@ -10,8 +10,8 @@ crate-type = ["cdylib"]
[dependencies]
higress-wasm-rust = { path = "../../", version = "0.1.0" }
proxy-wasm = "0.2.1"
proxy-wasm = { git="https://github.com/higress-group/proxy-wasm-rust-sdk", branch="main", version="0.2.2" }
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
regex = "1"
multimap = "0"
multimap = "0"

View File

@@ -17,7 +17,7 @@ use higress_wasm_rust::plugin_wrapper::{HttpContextWrapper, RootContextWrapper};
use higress_wasm_rust::rule_matcher::{on_configure, RuleMatcher, SharedRuleMatcher};
use multimap::MultiMap;
use proxy_wasm::traits::{Context, HttpContext, RootContext};
use proxy_wasm::types::{Action, Bytes, ContextType, LogLevel};
use proxy_wasm::types::{Bytes, ContextType, DataAction, HeaderAction, LogLevel};
use regex::Regex;
use serde::de::Error;
use serde::Deserialize;
@@ -148,9 +148,12 @@ impl HttpContextWrapper<RquestBlockConfig> for RquestBlock {
fn cache_request_body(&self) -> bool {
self.cache_request
}
fn on_http_request_headers_ok(&mut self, headers: &MultiMap<String, String>) -> Action {
fn on_http_request_complete_headers(
&mut self,
headers: &MultiMap<String, String>,
) -> HeaderAction {
if self.config.is_none() {
return Action::Continue;
return HeaderAction::Continue;
}
let config = self.config.as_ref().unwrap();
if !config.block_urls.is_empty()
@@ -161,7 +164,7 @@ impl HttpContextWrapper<RquestBlockConfig> for RquestBlock {
if value.is_none() {
self.log.warn("get path failed");
return Action::Continue;
return HeaderAction::Continue;
}
let mut request_url = value.unwrap().clone();
@@ -175,7 +178,7 @@ impl HttpContextWrapper<RquestBlockConfig> for RquestBlock {
Vec::new(),
Some(config.blocked_message.as_bytes()),
);
return Action::Pause;
return HeaderAction::StopIteration;
}
}
for block_url in &config.block_urls {
@@ -185,7 +188,7 @@ impl HttpContextWrapper<RquestBlockConfig> for RquestBlock {
Vec::new(),
Some(config.blocked_message.as_bytes()),
);
return Action::Pause;
return HeaderAction::StopIteration;
}
}
@@ -196,7 +199,7 @@ impl HttpContextWrapper<RquestBlockConfig> for RquestBlock {
Vec::new(),
Some(config.blocked_message.as_bytes()),
);
return Action::Pause;
return HeaderAction::StopIteration;
}
}
}
@@ -214,19 +217,19 @@ impl HttpContextWrapper<RquestBlockConfig> for RquestBlock {
Vec::new(),
Some(config.blocked_message.as_bytes()),
);
return Action::Pause;
return HeaderAction::StopIteration;
}
}
}
Action::Continue
HeaderAction::Continue
}
fn on_http_request_body_ok(&mut self, req_body: &Bytes) -> Action {
fn on_http_request_complete_body(&mut self, req_body: &Bytes) -> DataAction {
if self.config.is_none() {
return Action::Continue;
return DataAction::Continue;
}
let config = self.config.as_ref().unwrap();
if config.block_bodies.is_empty() {
return Action::Continue;
return DataAction::Continue;
}
let mut body = req_body.clone();
if !config.case_sensitive {
@@ -240,9 +243,9 @@ impl HttpContextWrapper<RquestBlockConfig> for RquestBlock {
Vec::new(),
Some(config.blocked_message.as_bytes()),
);
return Action::Pause;
return DataAction::StopIterationAndBuffer;
}
}
Action::Continue
DataAction::Continue
}
}

View File

@@ -4,15 +4,22 @@ version = 3
[[package]]
name = "ahash"
version = "0.8.3"
version = "0.8.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2c99f64d1e06488f620f932677e24bc6e2897582980441ae90a671415bd7ec2f"
checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011"
dependencies = [
"cfg-if",
"once_cell",
"version_check",
"zerocopy",
]
[[package]]
name = "allocator-api2"
version = "0.2.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f"
[[package]]
name = "cfg-if"
version = "1.0.0"
@@ -21,9 +28,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "getrandom"
version = "0.2.9"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c85e1d9ab2eadba7e5040d4e09cbd6d072b76a557ad64e797c2cb9d4da21d7e4"
checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7"
dependencies = [
"cfg-if",
"libc",
@@ -32,17 +39,19 @@ dependencies = [
[[package]]
name = "hashbrown"
version = "0.13.2"
version = "0.14.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e"
checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
dependencies = [
"ahash",
"allocator-api2",
]
[[package]]
name = "higress-wasm-rust"
version = "0.1.0"
dependencies = [
"multimap",
"proxy-wasm",
"serde",
"serde_json",
@@ -51,42 +60,56 @@ dependencies = [
[[package]]
name = "itoa"
version = "1.0.6"
version = "1.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "453ad9f582a441959e5f0d088b02ce04cfe8d51a8eaf077f12ac6d3e94164ca6"
checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b"
[[package]]
name = "libc"
version = "0.2.144"
version = "0.2.155"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b00cc1c228a6782d0f076e7b232802e0c5689d41bb5df366f2a6b6621cfdfe1"
checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c"
[[package]]
name = "log"
version = "0.4.18"
version = "0.4.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "518ef76f2f87365916b142844c16d8fefd85039bc5699050210a7778ee1cd1de"
checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24"
[[package]]
name = "memchr"
version = "2.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
[[package]]
name = "multimap"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "defc4c55412d89136f966bbb339008b474350e5e6e78d2714439c386b3137a03"
dependencies = [
"serde",
]
[[package]]
name = "once_cell"
version = "1.17.2"
version = "1.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9670a07f94779e00908f3e686eab508878ebb390ba6e604d3a284c00e8d0487b"
checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
[[package]]
name = "proc-macro2"
version = "1.0.59"
version = "1.0.86"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6aeca18b86b413c660b781aa319e4e2648a3e6f9eadc9b47e9038e6fe9f3451b"
checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77"
dependencies = [
"unicode-ident",
]
[[package]]
name = "proxy-wasm"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "823b744520cd4a54ba7ebacbffe4562e839d6dcd8f89209f96a1ace4f5229cd4"
version = "0.2.2"
source = "git+https://github.com/higress-group/proxy-wasm-rust-sdk?branch=main#73833051f57d483570cf5aaa9d62bd7402fae63b"
dependencies = [
"hashbrown",
"log",
@@ -94,18 +117,18 @@ dependencies = [
[[package]]
name = "quote"
version = "1.0.28"
version = "1.0.36"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b9ab9c7eadfd8df19006f1cf1a4aed13540ed5cbc047010ece5826e10825488"
checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7"
dependencies = [
"proc-macro2",
]
[[package]]
name = "ryu"
version = "1.0.13"
version = "1.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f91339c0467de62360649f8d3e185ca8de4224ff281f66000de5eb2a77a79041"
checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f"
[[package]]
name = "say-hello"
@@ -119,18 +142,18 @@ dependencies = [
[[package]]
name = "serde"
version = "1.0.163"
version = "1.0.207"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2113ab51b87a539ae008b5c6c02dc020ffa39afd2d83cffcb3f4eb2722cebec2"
checksum = "5665e14a49a4ea1b91029ba7d3bca9f299e1f7cfa194388ccc20f14743e784f2"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.163"
version = "1.0.207"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8c805777e3930c8883389c602315a24224bcc738b63905ef87cd1420353ea93e"
checksum = "6aea2634c86b0e8ef2cfdc0c340baede54ec27b1e46febd7f80dffb2aa44a00e"
dependencies = [
"proc-macro2",
"quote",
@@ -139,20 +162,21 @@ dependencies = [
[[package]]
name = "serde_json"
version = "1.0.96"
version = "1.0.125"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "057d394a50403bcac12672b2b18fb387ab6d289d957dab67dd201875391e52f1"
checksum = "83c8e735a073ccf5be70aa8066aa984eaf2fa000db6c8d0100ae605b366d31ed"
dependencies = [
"itoa",
"memchr",
"ryu",
"serde",
]
[[package]]
name = "syn"
version = "2.0.18"
version = "2.0.74"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32d41677bcbe24c20c52e7c70b0d8db04134c5d1066bf98662e2871ad200ea3e"
checksum = "1fceb41e3d546d0bd83421d3409b1460cc7444cd389341a4c880fe7a042cb3d7"
dependencies = [
"proc-macro2",
"quote",
@@ -161,27 +185,47 @@ dependencies = [
[[package]]
name = "unicode-ident"
version = "1.0.9"
version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b15811caf2415fb889178633e7724bad2509101cde276048e013b9def5e51fa0"
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
[[package]]
name = "uuid"
version = "1.3.3"
version = "1.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "345444e32442451b267fc254ae85a209c64be56d2890e601a0c37ff0c3c5ecd2"
checksum = "81dfa00651efa65069b0b6b651f4aaa31ba9e3c3ce0137aaad053604ee7e0314"
dependencies = [
"getrandom",
]
[[package]]
name = "version_check"
version = "0.9.4"
version = "0.9.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
[[package]]
name = "wasi"
version = "0.11.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]]
name = "zerocopy"
version = "0.7.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0"
dependencies = [
"zerocopy-derive",
]
[[package]]
name = "zerocopy-derive"
version = "0.7.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
dependencies = [
"proc-macro2",
"quote",
"syn",
]

View File

@@ -10,6 +10,6 @@ crate-type = ["cdylib"]
[dependencies]
higress-wasm-rust = { path = "../../", version = "0.1.0" }
proxy-wasm = "0.2.1"
proxy-wasm = { git="https://github.com/higress-group/proxy-wasm-rust-sdk", branch="main", version="0.2.2" }
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
serde_json = "1.0"

View File

@@ -15,7 +15,7 @@
use higress_wasm_rust::log::Log;
use higress_wasm_rust::rule_matcher::{on_configure, RuleMatcher, SharedRuleMatcher};
use proxy_wasm::traits::{Context, HttpContext, RootContext};
use proxy_wasm::types::{Action, ContextType, LogLevel};
use proxy_wasm::types::{ContextType, HeaderAction, LogLevel};
use serde::Deserialize;
use std::cell::RefCell;
use std::ops::DerefMut;
@@ -75,12 +75,16 @@ impl RootContext for SayHelloRoot {
impl Context for SayHello {}
impl HttpContext for SayHello {
fn on_http_request_headers(&mut self, _num_headers: usize, _end_of_stream: bool) -> Action {
fn on_http_request_headers(
&mut self,
_num_headers: usize,
_end_of_stream: bool,
) -> HeaderAction {
let binding = self.rule_matcher.borrow();
let config = match binding.get_match_config() {
None => {
self.send_http_response(200, vec![], Some("Hello, World!".as_bytes()));
return Action::Continue;
return HeaderAction::Continue;
}
Some(config) => config.1,
};
@@ -90,6 +94,6 @@ impl HttpContext for SayHello {
vec![],
Some(format!("Hello, {}!", config.name).as_bytes()),
);
Action::Continue
HeaderAction::Continue
}
}

View File

@@ -15,7 +15,7 @@
use crate::rule_matcher::SharedRuleMatcher;
use multimap::MultiMap;
use proxy_wasm::traits::{Context, HttpContext, RootContext};
use proxy_wasm::types::{Action, Bytes};
use proxy_wasm::types::{Action, Bytes, DataAction, HeaderAction};
use serde::de::DeserializeOwned;
pub trait RootContextWrapper<PluginConfig>: RootContext
@@ -43,8 +43,17 @@ where
}
pub trait HttpContextWrapper<PluginConfig>: HttpContext {
fn on_config(&mut self, _config: &PluginConfig) {}
fn on_http_request_headers_ok(&mut self, _headers: &MultiMap<String, String>) -> Action {
Action::Continue
fn on_http_request_complete_headers(
&mut self,
_headers: &MultiMap<String, String>,
) -> HeaderAction {
HeaderAction::Continue
}
fn on_http_response_complete_headers(
&mut self,
_headers: &MultiMap<String, String>,
) -> HeaderAction {
HeaderAction::Continue
}
fn cache_request_body(&self) -> bool {
false
@@ -52,11 +61,11 @@ pub trait HttpContextWrapper<PluginConfig>: HttpContext {
fn cache_response_body(&self) -> bool {
false
}
fn on_http_request_body_ok(&mut self, _req_body: &Bytes) -> Action {
Action::Continue
fn on_http_request_complete_body(&mut self, _req_body: &Bytes) -> DataAction {
DataAction::Continue
}
fn on_http_response_body_ok(&mut self, _res_body: &Bytes) -> Action {
Action::Continue
fn on_http_response_complete_body(&mut self, _res_body: &Bytes) -> DataAction {
DataAction::Continue
}
fn replace_http_request_body(&mut self, body: &[u8]) {
self.set_http_request_body(0, i32::MAX as usize, body)
@@ -67,6 +76,7 @@ pub trait HttpContextWrapper<PluginConfig>: HttpContext {
}
pub struct PluginHttpWrapper<PluginConfig> {
req_headers: MultiMap<String, String>,
res_headers: MultiMap<String, String>,
req_body_len: usize,
res_body_len: usize,
config: Option<PluginConfig>,
@@ -80,6 +90,7 @@ impl<PluginConfig> PluginHttpWrapper<PluginConfig> {
) -> Self {
PluginHttpWrapper {
req_headers: MultiMap::new(),
res_headers: MultiMap::new(),
req_body_len: 0,
res_body_len: 0,
config: None,
@@ -129,7 +140,7 @@ impl<PluginConfig> HttpContext for PluginHttpWrapper<PluginConfig>
where
PluginConfig: Default + DeserializeOwned + Clone,
{
fn on_http_request_headers(&mut self, num_headers: usize, end_of_stream: bool) -> Action {
fn on_http_request_headers(&mut self, num_headers: usize, end_of_stream: bool) -> HeaderAction {
let binding = self.rule_matcher.borrow();
self.config = binding.get_match_config().map(|config| config.1.clone());
for (k, v) in self.get_http_request_headers() {
@@ -141,23 +152,22 @@ where
let ret = self
.http_content
.on_http_request_headers(num_headers, end_of_stream);
if ret != Action::Continue {
if ret != HeaderAction::Continue {
return ret;
}
self.http_content
.on_http_request_headers_ok(&self.req_headers)
.on_http_request_complete_headers(&self.req_headers)
}
fn on_http_request_body(&mut self, body_size: usize, end_of_stream: bool) -> Action {
fn on_http_request_body(&mut self, body_size: usize, end_of_stream: bool) -> DataAction {
if !self.http_content.cache_request_body() {
return self
.http_content
.on_http_request_body(body_size, end_of_stream);
}
self.req_body_len += body_size;
if !end_of_stream {
return Action::Pause;
}
let ret = self
.http_content
.on_http_request_body(self.req_body_len, end_of_stream);
if ret != Action::Continue || !self.http_content.cache_request_body() {
return ret;
return DataAction::StopIterationAndBuffer;
}
let mut req_body = Bytes::new();
if self.req_body_len > 0 {
@@ -165,30 +175,42 @@ where
req_body = body;
}
}
self.http_content.on_http_request_body_ok(&req_body)
self.http_content.on_http_request_complete_body(&req_body)
}
fn on_http_request_trailers(&mut self, num_trailers: usize) -> Action {
self.http_content.on_http_request_trailers(num_trailers)
}
fn on_http_response_headers(&mut self, num_headers: usize, end_of_stream: bool) -> Action {
self.http_content
.on_http_response_headers(num_headers, end_of_stream)
}
fn on_http_response_body(&mut self, body_size: usize, end_of_stream: bool) -> Action {
self.res_body_len += body_size;
if !end_of_stream {
return Action::Pause;
fn on_http_response_headers(
&mut self,
num_headers: usize,
end_of_stream: bool,
) -> HeaderAction {
for (k, v) in self.get_http_response_headers() {
self.res_headers.insert(k, v);
}
let ret = self
.http_content
.on_http_response_body(self.res_body_len, end_of_stream);
if ret != Action::Continue || !self.http_content.cache_response_body() {
.on_http_response_headers(num_headers, end_of_stream);
if ret != HeaderAction::Continue {
return ret;
}
self.http_content
.on_http_response_complete_headers(&self.res_headers)
}
fn on_http_response_body(&mut self, body_size: usize, end_of_stream: bool) -> DataAction {
if !self.http_content.cache_response_body() {
return self
.http_content
.on_http_response_body(body_size, end_of_stream);
}
self.res_body_len += body_size;
if !end_of_stream {
return DataAction::StopIterationAndBuffer;
}
let mut res_body = Bytes::new();
if self.res_body_len > 0 {
@@ -196,7 +218,7 @@ where
res_body = body;
}
}
self.http_content.on_http_response_body_ok(&res_body)
self.http_content.on_http_response_complete_body(&res_body)
}
fn on_http_response_trailers(&mut self, num_trailers: usize) -> Action {

View File

@@ -0,0 +1,189 @@
// Copyright (c) 2022 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package tests
import (
"testing"
"github.com/alibaba/higress/test/e2e/conformance/utils/http"
"github.com/alibaba/higress/test/e2e/conformance/utils/suite"
)
func init() {
Register(RustWasmPluginsAiDataMasking)
}
func gen_assertion(host string, req_is_json bool, req_body []byte, res_body []byte) http.Assertion {
var content_type string
if req_is_json {
content_type = http.ContentTypeApplicationJson
} else {
content_type = http.ContentTypeTextPlain
}
return http.Assertion{
Meta: http.AssertionMeta{
CompareTarget: http.CompareTargetResponse,
},
Request: http.AssertionRequest{
ActualRequest: http.Request{
Host: host,
Path: "/",
Method: "POST",
ContentType: content_type,
Body: req_body,
UnfollowRedirect: true,
},
},
Response: http.AssertionResponse{
ExpectedResponse: http.Response{
ContentType: http.ContentTypeApplicationJson,
Body: res_body,
},
},
}
}
var RustWasmPluginsAiDataMasking = suite.ConformanceTest{
ShortName: "RustWasmPluginsAiDataMasking",
Description: "The Ingress in the higress-conformance-infra namespace test the rust ai-data-masking wasmplugins.",
Manifests: []string{"tests/rust-wasm-ai-data-masking.yaml"},
Features: []suite.SupportedFeature{suite.WASMRustConformanceFeature},
Test: func(t *testing.T, suite *suite.ConformanceTestSuite) {
var testcases []http.Assertion
//openai
testcases = append(testcases, gen_assertion(
"replace.openai.com",
true,
[]byte("{\"messages\":[{\"role\":\"user\",\"content\":\"127.0.0.1 admin@gmail.com sk-12345\"}]}"),
[]byte("{\"choices\":[{\"index\":0,\"message\":{\"role\":\"assistant\",\"content\":\"127.0.0.1 sk-12345 admin@gmail.com\"}}],\"usage\":{}}"),
))
testcases = append(testcases, gen_assertion(
"replace.openai.com",
true,
[]byte("{\"messages\":[{\"role\":\"user\",\"content\":\"192.168.0.1 root@gmail.com sk-12345\"}]}"),
[]byte("{\"choices\":[{\"index\":0,\"message\":{\"role\":\"assistant\",\"content\":\"192.168.0.1 sk-12345 root@gmail.com\"}}],\"usage\":{}}"),
))
testcases = append(testcases, gen_assertion(
"ok.openai.com",
true,
[]byte("{\"messages\":[{\"role\":\"user\",\"content\":\"fuck\"}]}"),
[]byte("{\"choices\":[{\"index\":0,\"message\":{\"role\":\"assistant\",\"content\":\"提问或回答中包含敏感词,已被屏蔽\"}}],\"usage\":{}}"),
))
testcases = append(testcases, gen_assertion(
"ok.openai.com",
true,
[]byte("{\"messages\":[{\"role\":\"user\",\"content\":\"costom_word1\"}]}"),
[]byte("{\"choices\":[{\"index\":0,\"message\":{\"role\":\"assistant\",\"content\":\"提问或回答中包含敏感词,已被屏蔽\"}}],\"usage\":{}}"),
))
testcases = append(testcases, gen_assertion(
"ok.openai.com",
true,
[]byte("{\"messages\":[{\"role\":\"user\",\"content\":\"costom_word\"}]}"),
[]byte("{\"choices\":[{\"index\":0,\"message\":{\"role\":\"assistant\",\"content\":\"ok\"}}],\"usage\":{}}"),
))
testcases = append(testcases, gen_assertion(
"system_deny.openai.com",
true,
[]byte("{\"messages\":[{\"role\":\"user\",\"content\":\"test\"}]}"),
[]byte("{\"choices\":[{\"index\":0,\"message\":{\"role\":\"assistant\",\"content\":\"提问或回答中包含敏感词,已被屏蔽\"}}],\"usage\":{}}"),
))
testcases = append(testcases, gen_assertion(
"costom_word1.openai.com",
true,
[]byte("{\"messages\":[{\"role\":\"user\",\"content\":\"test\"}]}"),
[]byte("{\"choices\":[{\"index\":0,\"message\":{\"role\":\"assistant\",\"content\":\"提问或回答中包含敏感词,已被屏蔽\"}}],\"usage\":{}}"),
))
testcases = append(testcases, gen_assertion(
"costom_word.openai.com",
true,
[]byte("{\"messages\":[{\"role\":\"user\",\"content\":\"test\"}]}"),
[]byte("{\"choices\":[{\"index\":0,\"message\":{\"role\":\"assistant\",\"content\":\"costom_word\"}}],\"usage\":{}}"),
))
//raw
testcases = append(testcases, gen_assertion(
"replace.raw.com",
false,
[]byte("127.0.0.1 admin@gmail.com sk-12345"),
[]byte("{\"res\":\"127.0.0.1 sk-12345 admin@gmail.com\"}"),
))
testcases = append(testcases, gen_assertion(
"replace.raw.com",
false,
[]byte("192.168.0.1 root@gmail.com sk-12345"),
[]byte("{\"res\":\"192.168.0.1 sk-12345 root@gmail.com\"}"),
))
testcases = append(testcases, gen_assertion(
"ok.raw.com",
false,
[]byte("fuck"),
[]byte("{\"errmsg\":\"提问或回答中包含敏感词,已被屏蔽\"}"),
))
testcases = append(testcases, gen_assertion(
"ok.raw.com",
false,
[]byte("costom_word1"),
[]byte("{\"errmsg\":\"提问或回答中包含敏感词,已被屏蔽\"}"),
))
testcases = append(testcases, gen_assertion(
"ok.raw.com",
false,
[]byte("costom_word"),
[]byte("{\"res\":\"ok\"}"),
))
testcases = append(testcases, gen_assertion(
"system_deny.raw.com",
false,
[]byte("test"),
[]byte("{\"errmsg\":\"提问或回答中包含敏感词,已被屏蔽\"}"),
))
testcases = append(testcases, gen_assertion(
"costom_word1.raw.com",
false,
[]byte("test"),
[]byte("{\"errmsg\":\"提问或回答中包含敏感词,已被屏蔽\"}"),
))
testcases = append(testcases, gen_assertion(
"costom_word.raw.com",
false,
[]byte("test"),
[]byte("{\"res\":\"costom_word\"}"),
))
//jsonpath
testcases = append(testcases, gen_assertion(
"replace.raw.com",
true,
[]byte("{\"test\":[{\"test\":\"127.0.0.1 admin@gmail.com sk-12345\"}]}"),
[]byte("{\"res\":\"127.0.0.1 sk-12345 admin@gmail.com\"}"),
))
testcases = append(testcases, gen_assertion(
"replace.raw.com",
true,
[]byte("{\"test\":[{\"test\":\"test\", \"test1\":\"127.0.0.1 admin@gmail.com sk-12345\"}]}"),
[]byte("{\"res\":\"***.***.***.*** 48a7e98a91d93896d8dac522c5853948 ****@gmail.com\"}"),
))
t.Run("WasmPlugins ai-data-masking", func(t *testing.T) {
for _, testcase := range testcases {
http.MakeRequestAndExpectEventuallyConsistentResponse(t, suite.RoundTripper, suite.TimeoutConfig, suite.GatewayAddress, testcase)
}
})
},
}

View File

@@ -0,0 +1,150 @@
# Copyright (c) 2022 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: wasmplugin-ai-data-masking
namespace: higress-conformance-infra
spec:
ingressClassName: higress
rules:
- host: "*.openai.com"
http:
paths:
- pathType: Prefix
path: "/"
backend:
service:
name: infra-backend-v1
port:
number: 8080
- host: "*.raw.com"
http:
paths:
- pathType: Prefix
path: "/"
backend:
service:
name: infra-backend-v1
port:
number: 8080
---
apiVersion: extensions.higress.io/v1alpha1
kind: WasmPlugin
metadata:
name: custom-response
namespace: higress-system
spec:
priority: 200
defaultConfig:
"body": "ok"
matchRules:
- domain:
- ok.openai.com
config:
headers:
- Content-Type=application/json
"body": "{\"choices\":[{\"index\":0,\"message\":{\"role\":\"assistant\",\"content\":\"ok\"}}],\"usage\":{}}"
- domain:
- replace.openai.com
config:
headers:
- Content-Type=application/json
"body": "{\"choices\":[{\"index\":0,\"message\":{\"role\":\"assistant\",\"content\":\"***.***.***.*** 48a7e98a91d93896d8dac522c5853948 ****@gmail.com\"}}],\"usage\":{}}"
- domain:
- system_deny.openai.com
config:
headers:
- Content-Type=application/json
"body": "{\"choices\":[{\"index\":0,\"message\":{\"role\":\"assistant\",\"content\":\"fuck\"}}],\"usage\":{}}"
- domain:
- costom_word1.openai.com
config:
headers:
- Content-Type=application/json
"body": "{\"choices\":[{\"index\":0,\"message\":{\"role\":\"assistant\",\"content\":\"costom_word1\"}}],\"usage\":{}}"
- domain:
- costom_word.openai.com
config:
headers:
- Content-Type=application/json
"body": "{\"choices\":[{\"index\":0,\"message\":{\"role\":\"assistant\",\"content\":\"costom_word\"}}],\"usage\":{}}"
- domain:
- ok.raw.com
config:
headers:
- Content-Type=application/json
"body": "{\"res\":\"ok\"}"
- domain:
- replace.raw.com
config:
headers:
- Content-Type=application/json
"body": "{\"res\":\"***.***.***.*** 48a7e98a91d93896d8dac522c5853948 ****@gmail.com\"}"
- domain:
- system_deny.raw.com
config:
headers:
- Content-Type=application/json
"body": "{\"res\":\"fuck\"}"
- domain:
- costom_word1.raw.com
config:
headers:
- Content-Type=application/json
"body": "{\"res\":\"costom_word1\"}"
- domain:
- costom_word.raw.com
config:
headers:
- Content-Type=application/json
"body": "{\"res\":\"costom_word\"}"
url: file:///opt/plugins/wasm-go/extensions/custom-response/plugin.wasm
---
apiVersion: extensions.higress.io/v1alpha1
kind: WasmPlugin
metadata:
name: ai-data-masking
namespace: higress-system
spec:
priority: 300
defaultConfig:
system_deny: true
deny_openai: true
deny_jsonpath:
- "$.test[*].test"
deny_raw: true
deny_code: 200
deny_message: "提问或回答中包含敏感词,已被屏蔽"
deny_raw_message: "{\"errmsg\":\"提问或回答中包含敏感词,已被屏蔽\"}"
deny_content_type: "application/json"
deny_words:
- "costom_word1"
replace_roles:
- regex: "%{EMAILLOCALPART}@%{HOSTNAME:domain}"
type: "replace"
restore: true
value: "****@$domain"
- regex: "%{IP}"
type: "replace"
restore: true
value: "***.***.***.***"
- regex: "sk-[0-9a-zA-Z]*"
restore: true
type: "hash"
url: file:///opt/plugins/wasm-rust/extensions/ai-data-masking/plugin.wasm

View File

@@ -32,6 +32,7 @@ then
elif [ "$TYPE" == "RUST" ]
then
cd ./plugins/wasm-rust/
make lint-base
if [ ! -n "$INNER_PLUGIN_NAME" ]; then
EXTENSIONS_DIR=$(pwd)"/extensions/"
echo "🚀 Build all Rust WasmPlugins under folder of $EXTENSIONS_DIR"
@@ -40,12 +41,21 @@ then
if [ -d $EXTENSIONS_DIR$file ]; then
name=${file##*/}
echo "🚀 Build Rust WasmPlugin: $name"
PLUGIN_NAME=${name} make lint
PLUGIN_NAME=${name} BUILDER_REGISTRY="docker.io/alihigress/plugins-rust-" make build
fi
done
cd ../wasm-go/
PLUGIN_NAME=custom-response make build
else
echo "🚀 Build Rust WasmPlugin: $INNER_PLUGIN_NAME"
PLUGIN_NAME=${INNER_PLUGIN_NAME} make lint
PLUGIN_NAME=${INNER_PLUGIN_NAME} make build
if [ "$INNER_PLUGIN_NAME" == "ai-data-masking" ]; then
cd ../wasm-go/
PLUGIN_NAME=custom-response make build
fi
fi
else
echo "Not specify plugin language, so just compile wasm-go as default"