mirror of
https://github.com/alibaba/higress.git
synced 2026-02-27 06:00:51 +08:00
feat: enhance model mapper and router with rebuild triggers and path extensions (#3218)
This commit is contained in:
@@ -135,8 +135,40 @@ bool PluginRootContext::configure(size_t configuration_size) {
|
||||
return true;
|
||||
}
|
||||
|
||||
void PluginRootContext::incrementRequestCount() {
|
||||
request_count_++;
|
||||
if (request_count_ >= REBUILD_THRESHOLD) {
|
||||
LOG_DEBUG("Request count reached threshold, triggering rebuild");
|
||||
setFilterState("wasm_need_rebuild", "true");
|
||||
request_count_ = 0; // Reset counter after setting rebuild flag
|
||||
}
|
||||
}
|
||||
|
||||
FilterHeadersStatus PluginRootContext::onHeader(
|
||||
const ModelMapperConfigRule& rule) {
|
||||
// Increment request count and check for rebuild
|
||||
incrementRequestCount();
|
||||
|
||||
// Check memory threshold and trigger rebuild if needed
|
||||
std::string value;
|
||||
if (getValue({"plugin_vm_memory"}, &value)) {
|
||||
// The value is stored as binary uint64_t, convert to string for logging
|
||||
if (value.size() == sizeof(uint64_t)) {
|
||||
uint64_t memory_size;
|
||||
memcpy(&memory_size, value.data(), sizeof(uint64_t));
|
||||
LOG_DEBUG(absl::StrCat("vm memory size is ", memory_size));
|
||||
if (memory_size >= MEMORY_THRESHOLD_BYTES) {
|
||||
LOG_INFO(absl::StrCat("Memory threshold reached (", memory_size, " >= ",
|
||||
MEMORY_THRESHOLD_BYTES, "), triggering rebuild"));
|
||||
setFilterState("wasm_need_rebuild", "true");
|
||||
}
|
||||
} else {
|
||||
LOG_ERROR("invalid memory size format");
|
||||
}
|
||||
} else {
|
||||
LOG_ERROR("get vm memory size failed");
|
||||
}
|
||||
|
||||
if (!Wasm::Common::Http::hasRequestBody()) {
|
||||
return FilterHeadersStatus::Continue;
|
||||
}
|
||||
|
||||
@@ -42,9 +42,10 @@ struct ModelMapperConfigRule {
|
||||
std::vector<std::pair<std::string, std::string>> prefix_model_mapping_;
|
||||
std::string default_model_mapping_;
|
||||
std::vector<std::string> enable_on_path_suffix_ = {
|
||||
"/completions", "/embeddings", "/images/generations",
|
||||
"/audio/speech", "/fine_tuning/jobs", "/moderations",
|
||||
"/image-synthesis", "/video-synthesis"};
|
||||
"/completions", "/embeddings", "/images/generations",
|
||||
"/audio/speech", "/fine_tuning/jobs", "/moderations",
|
||||
"/image-synthesis", "/video-synthesis", "/rerank",
|
||||
"/messages"};
|
||||
};
|
||||
|
||||
// PluginRootContext is the root context for all streams processed by the
|
||||
@@ -60,9 +61,13 @@ class PluginRootContext : public RootContext,
|
||||
FilterHeadersStatus onHeader(const ModelMapperConfigRule&);
|
||||
FilterDataStatus onBody(const ModelMapperConfigRule&, std::string_view);
|
||||
bool configure(size_t);
|
||||
void incrementRequestCount();
|
||||
|
||||
private:
|
||||
bool parsePluginConfig(const json&, ModelMapperConfigRule&) override;
|
||||
uint64_t request_count_ = 0;
|
||||
static constexpr uint64_t REBUILD_THRESHOLD = 1000;
|
||||
static constexpr size_t MEMORY_THRESHOLD_BYTES = 200 * 1024 * 1024;
|
||||
};
|
||||
|
||||
// Per-stream context.
|
||||
|
||||
Reference in New Issue
Block a user