mirror of
https://github.com/alibaba/higress.git
synced 2026-04-22 04:27:26 +08:00
Ai data mask deny word match optimize (#1453)
This commit is contained in:
@@ -0,0 +1,54 @@
|
||||
use std::collections::HashSet;
|
||||
|
||||
use jieba_rs::Jieba;
|
||||
|
||||
use crate::Asset;
|
||||
|
||||
#[derive(Default, Debug, Clone)]
|
||||
pub(crate) struct DenyWord {
|
||||
jieba: Jieba,
|
||||
words: HashSet<String>,
|
||||
}
|
||||
|
||||
impl DenyWord {
|
||||
pub(crate) fn from_iter<T: IntoIterator<Item = impl Into<String>>>(words: T) -> Self {
|
||||
let mut deny_word = DenyWord::default();
|
||||
|
||||
for word in words {
|
||||
let word_s = word.into();
|
||||
let w = word_s.trim();
|
||||
if w.is_empty() {
|
||||
continue;
|
||||
}
|
||||
deny_word.jieba.add_word(w, None, None);
|
||||
deny_word.words.insert(w.to_string());
|
||||
}
|
||||
|
||||
deny_word
|
||||
}
|
||||
|
||||
pub(crate) fn empty() -> Self {
|
||||
DenyWord {
|
||||
jieba: Jieba::empty(),
|
||||
words: HashSet::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn system() -> Self {
|
||||
if let Some(file) = Asset::get("sensitive_word_dict.txt") {
|
||||
if let Ok(data) = std::str::from_utf8(file.data.as_ref()) {
|
||||
return DenyWord::from_iter(data.split('\n'));
|
||||
}
|
||||
}
|
||||
Self::empty()
|
||||
}
|
||||
|
||||
pub(crate) fn check(&self, message: &str) -> Option<String> {
|
||||
for word in self.jieba.cut(message, true) {
|
||||
if self.words.contains(word) {
|
||||
return Some(word.to_string());
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
||||
@@ -12,13 +12,15 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
mod deny_word;
|
||||
|
||||
use crate::deny_word::DenyWord;
|
||||
use fancy_regex::Regex;
|
||||
use grok::patterns;
|
||||
use higress_wasm_rust::log::Log;
|
||||
use higress_wasm_rust::plugin_wrapper::{HttpContextWrapper, RootContextWrapper};
|
||||
use higress_wasm_rust::request_wrapper::has_request_body;
|
||||
use higress_wasm_rust::rule_matcher::{on_configure, RuleMatcher, SharedRuleMatcher};
|
||||
use jieba_rs::Jieba;
|
||||
use jsonpath_rust::{JsonPath, JsonPathValue};
|
||||
use lazy_static::lazy_static;
|
||||
use proxy_wasm::traits::{Context, HttpContext, RootContext};
|
||||
@@ -29,7 +31,7 @@ use serde::Deserialize;
|
||||
use serde::Deserializer;
|
||||
use serde_json::{json, Value};
|
||||
use std::cell::RefCell;
|
||||
use std::collections::{BTreeMap, HashMap, HashSet, VecDeque};
|
||||
use std::collections::{BTreeMap, HashMap, VecDeque};
|
||||
use std::ops::DerefMut;
|
||||
use std::rc::Rc;
|
||||
use std::str::FromStr;
|
||||
@@ -47,11 +49,6 @@ const GROK_PATTERN: &str = r"%\{(?<name>(?<pattern>[A-z0-9]+)(?::(?<alias>[A-z0-
|
||||
#[folder = "res/"]
|
||||
struct Asset;
|
||||
|
||||
#[derive(Default, Debug, Clone)]
|
||||
struct DenyWord {
|
||||
jieba: Jieba,
|
||||
words: HashSet<String>,
|
||||
}
|
||||
struct System {
|
||||
deny_word: DenyWord,
|
||||
grok_regex: Regex,
|
||||
@@ -227,52 +224,12 @@ static SYSTEM_PATTERNS: &[(&str, &str)] = &[
|
||||
("IDCARD", r#"\d{17}[0-9xX]|\d{15}"#),
|
||||
];
|
||||
|
||||
impl DenyWord {
|
||||
fn empty() -> Self {
|
||||
DenyWord {
|
||||
jieba: Jieba::empty(),
|
||||
words: HashSet::new(),
|
||||
}
|
||||
}
|
||||
fn from_iter<T: IntoIterator<Item = impl Into<String>>>(words: T) -> Self {
|
||||
let mut deny_word = DenyWord::empty();
|
||||
|
||||
for word in words {
|
||||
let word_s = word.into();
|
||||
let w = word_s.trim();
|
||||
if w.is_empty() {
|
||||
continue;
|
||||
}
|
||||
deny_word.jieba.add_word(w, None, None);
|
||||
deny_word.words.insert(w.to_string());
|
||||
}
|
||||
|
||||
deny_word
|
||||
}
|
||||
fn default() -> Self {
|
||||
if let Some(file) = Asset::get("sensitive_word_dict.txt") {
|
||||
if let Ok(data) = std::str::from_utf8(file.data.as_ref()) {
|
||||
return DenyWord::from_iter(data.split('\n'));
|
||||
}
|
||||
}
|
||||
DenyWord::empty()
|
||||
}
|
||||
|
||||
fn check(&self, message: &str) -> Option<String> {
|
||||
for word in self.jieba.cut(message, true) {
|
||||
if self.words.contains(word) {
|
||||
return Some(word.to_string());
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
||||
impl System {
|
||||
fn new() -> Self {
|
||||
let grok_regex = Regex::new(GROK_PATTERN).unwrap();
|
||||
let grok_patterns = BTreeMap::new();
|
||||
let mut system = System {
|
||||
deny_word: DenyWord::default(),
|
||||
deny_word: DenyWord::system(),
|
||||
grok_regex,
|
||||
grok_patterns,
|
||||
};
|
||||
@@ -335,6 +292,7 @@ impl System {
|
||||
(ret, ok)
|
||||
}
|
||||
}
|
||||
|
||||
impl AiDataMaskingRoot {
|
||||
fn new() -> Self {
|
||||
AiDataMaskingRoot {
|
||||
@@ -382,6 +340,7 @@ impl RootContextWrapper<AiDataMaskingConfig> for AiDataMaskingRoot {
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
impl AiDataMasking {
|
||||
fn check_message(&self, message: &str) -> bool {
|
||||
if let Some(config) = &self.config {
|
||||
@@ -532,6 +491,7 @@ impl AiDataMasking {
|
||||
}
|
||||
|
||||
impl Context for AiDataMasking {}
|
||||
|
||||
impl HttpContext for AiDataMasking {
|
||||
fn on_http_request_headers(
|
||||
&mut self,
|
||||
@@ -607,6 +567,7 @@ impl HttpContext for AiDataMasking {
|
||||
DataAction::Continue
|
||||
}
|
||||
}
|
||||
|
||||
impl HttpContextWrapper<AiDataMaskingConfig> for AiDataMasking {
|
||||
fn log(&self) -> &Log {
|
||||
&self.log
|
||||
|
||||
Reference in New Issue
Block a user