From a0c334a7cbf7b564ad0f527dc6b4ee360ef44637 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=BE=84=E6=BD=AD?= Date: Sun, 9 Mar 2025 23:07:49 +0800 Subject: [PATCH] optimize model router&mapper (#1866) --- .../extensions/model_mapper/plugin.cc | 5 ++- .../extensions/model_router/README.md | 6 +-- .../extensions/model_router/README_EN.md | 42 +++++++++---------- .../extensions/model_router/plugin.cc | 10 ++++- 4 files changed, 35 insertions(+), 28 deletions(-) diff --git a/plugins/wasm-cpp/extensions/model_mapper/plugin.cc b/plugins/wasm-cpp/extensions/model_mapper/plugin.cc index 69c2e7c7c..771a3c563 100644 --- a/plugins/wasm-cpp/extensions/model_mapper/plugin.cc +++ b/plugins/wasm-cpp/extensions/model_mapper/plugin.cc @@ -44,8 +44,8 @@ static RegisterContextFactory register_ModelMapper( namespace { constexpr std::string_view SetDecoderBufferLimitKey = - "SetRequestBodyBufferLimit"; -constexpr std::string_view DefaultMaxBodyBytes = "10485760"; + "set_decoder_buffer_limit"; +constexpr std::string_view DefaultMaxBodyBytes = "104857600"; } // namespace @@ -166,6 +166,7 @@ FilterHeadersStatus PluginRootContext::onHeader( } removeRequestHeader(Wasm::Common::Http::Header::ContentLength); setFilterState(SetDecoderBufferLimitKey, DefaultMaxBodyBytes); + LOG_INFO(absl::StrCat("SetRequestBodyBufferLimit: ", DefaultMaxBodyBytes)); return FilterHeadersStatus::StopIteration; } diff --git a/plugins/wasm-cpp/extensions/model_router/README.md b/plugins/wasm-cpp/extensions/model_router/README.md index 7d7d30805..d414a94a3 100644 --- a/plugins/wasm-cpp/extensions/model_router/README.md +++ b/plugins/wasm-cpp/extensions/model_router/README.md @@ -1,14 +1,14 @@ -# 功能说明 +## 功能说明 `model-router`插件实现了基于LLM协议中的model参数路由的功能 -# 配置字段 +## 配置字段 | 名称 | 数据类型 | 填写要求 | 默认值 | 描述 | | ----------- | --------------- | ----------------------- | ------ | ------------------------------------------- | | `modelKey` | string | 选填 | model | 请求body中model参数的位置 | | `addProviderHeader` | string | 选填 | - | 从model参数中解析出的provider名字放到哪个请求header中 | | `modelToHeader` | string | 选填 | - | 直接将model参数放到哪个请求header中 | -| `enableOnPathSuffix` | array of string | 选填 | ["/v1/chat/completions"] | 只对这些特定路径后缀的请求生效 | +| `enableOnPathSuffix` | array of string | 选填 | ["/v1/chat/completions"] | 只对这些特定路径后缀的请求生效,可以配置为 "*" 以匹配所有路径 | ## 运行属性 diff --git a/plugins/wasm-cpp/extensions/model_router/README_EN.md b/plugins/wasm-cpp/extensions/model_router/README_EN.md index f5866423b..5960fa292 100644 --- a/plugins/wasm-cpp/extensions/model_router/README_EN.md +++ b/plugins/wasm-cpp/extensions/model_router/README_EN.md @@ -1,31 +1,31 @@ -## Function Description -The `model-router` plugin implements the function of routing based on the model parameter in the LLM protocol. +## Feature Description +The `model-router` plugin implements routing functionality based on the model parameter in LLM protocols. ## Configuration Fields -| Name | Data Type | Filling Requirement | Default Value | Description | -| ----------- | --------------- | ----------------------- | ------ | ------------------------------------------- | -| `modelKey` | string | Optional | model | The location of the model parameter in the request body | -| `addProviderHeader` | string | Optional | - | Which request header to place the provider name parsed from the model parameter | -| `modelToHeader` | string | Optional | - | Which request header to directly place the model parameter | -| `enableOnPathSuffix` | array of string | Optional | ["/v1/chat/completions"] | Only effective for requests with these specific path suffixes | +| Name | Data Type | Requirement | Default Value | Description | +| ----------- | --------------- | ----------------------- | ------ | ------------------------------------------- | +| `modelKey` | string | Optional | model | Location of the model parameter in the request body | +| `addProviderHeader` | string | Optional | - | Which request header to add the provider name parsed from the model parameter | +| `modelToHeader` | string | Optional | - | Which request header to directly add the model parameter to | +| `enableOnPathSuffix` | array of string | Optional | ["/v1/chat/completions"] | Only effective for requests with these specific path suffixes, can be configured as "*" to match all paths | -## Runtime Attributes +## Runtime Properties Plugin execution phase: Authentication phase Plugin execution priority: 900 ## Effect Description -### Routing Based on the model Parameter +### Routing Based on Model Parameter -The following configuration is required: +The following configuration is needed: ```yaml modelToHeader: x-higress-llm-model ``` -The plugin will extract the model parameter from the request and set it in the x-higress-llm-model request header, which can be used for subsequent routing. For example, the original LLM request body: +The plugin extracts the model parameter from the request and sets it to the x-higress-llm-model request header for subsequent routing. For example, the original LLM request body is: ```json { @@ -35,7 +35,7 @@ The plugin will extract the model parameter from the request and set it in the x "stream": false, "messages": [{ "role": "user", - "content": "What is the GitHub address of the main repository for the higress project" + "content": "What is the GitHub address of the Higress project's main repository?" }], "presence_penalty": 0, "temperature": 0.7, @@ -43,21 +43,21 @@ The plugin will extract the model parameter from the request and set it in the x } ``` -After processing by this plugin, the following request header (which can be used for route matching) will be added: +After processing by this plugin, the following request header will be added (can be used for route matching): x-higress-llm-model: qwen-long -### Extracting the provider Field from the model Parameter for Routing +### Extracting Provider Field from Model Parameter for Routing -> Note that this mode requires the client to specify the provider using a `/` separator in the model parameter. +> Note that this mode requires the client to specify the provider in the model parameter using the `/` delimiter -The following configuration is required: +The following configuration is needed: ```yaml addProviderHeader: x-higress-llm-provider ``` -The plugin will extract the provider part (if present) from the model parameter in the request and set it in the x-higress-llm-provider request header, which can be used for subsequent routing, and rewrite the model parameter to the model name part. For example, the original LLM request body: +The plugin extracts the provider part (if any) from the model parameter in the request, sets it to the x-higress-llm-provider request header for subsequent routing, and rewrites the model parameter to only contain the model name part. For example, the original LLM request body is: ```json { @@ -67,7 +67,7 @@ The plugin will extract the provider part (if present) from the model parameter "stream": false, "messages": [{ "role": "user", - "content": "What is the GitHub address of the main repository for the higress project" + "content": "What is the GitHub address of the Higress project's main repository?" }], "presence_penalty": 0, "temperature": 0.7, @@ -75,7 +75,7 @@ The plugin will extract the provider part (if present) from the model parameter } ``` -After processing by this plugin, the following request header (which can be used for route matching) will be added: +After processing by this plugin, the following request header will be added (can be used for route matching): x-higress-llm-provider: dashscope @@ -89,7 +89,7 @@ The original LLM request body will be changed to: "stream": false, "messages": [{ "role": "user", - "content": "What is the GitHub address of the main repository for the higress project" + "content": "What is the GitHub address of the Higress project's main repository?" }], "presence_penalty": 0, "temperature": 0.7, diff --git a/plugins/wasm-cpp/extensions/model_router/plugin.cc b/plugins/wasm-cpp/extensions/model_router/plugin.cc index 0e45ec473..23aaeaff7 100644 --- a/plugins/wasm-cpp/extensions/model_router/plugin.cc +++ b/plugins/wasm-cpp/extensions/model_router/plugin.cc @@ -44,8 +44,8 @@ static RegisterContextFactory register_ModelRouter( namespace { constexpr std::string_view SetDecoderBufferLimitKey = - "SetRequestBodyBufferLimit"; -constexpr std::string_view DefaultMaxBodyBytes = "10485760"; + "set_decoder_buffer_limit"; +constexpr std::string_view DefaultMaxBodyBytes = "104857600"; } // namespace @@ -137,6 +137,11 @@ FilterHeadersStatus PluginRootContext::onHeader( } bool enable = false; for (const auto& enable_suffix : rule.enable_on_path_suffix_) { + // Support wildcard "*" to enable for all paths + if (enable_suffix == "*") { + enable = true; + break; + } if (absl::EndsWith({path.c_str(), uri_end}, enable_suffix)) { enable = true; break; @@ -153,6 +158,7 @@ FilterHeadersStatus PluginRootContext::onHeader( } removeRequestHeader(Wasm::Common::Http::Header::ContentLength); setFilterState(SetDecoderBufferLimitKey, DefaultMaxBodyBytes); + LOG_INFO(absl::StrCat("SetRequestBodyBufferLimit: ", DefaultMaxBodyBytes)); return FilterHeadersStatus::StopIteration; }