feat: Add an AI-Proxy Wasm plugin (#921)

Co-authored-by: 澄潭 <zty98751@alibaba-inc.com>
2026-06-09 12:47:28 +08:00 · 2024-05-14 17:00:12 +08:00
parent 5c7736980c
commit 333f9b48f3
21 changed files with 2124 additions and 1 deletions
--- a/plugins/wasm-go/Dockerfile
+++ b/plugins/wasm-go/Dockerfile
@@ -5,6 +5,9 @@ FROM $BUILDER as builder
 ARG GOPROXY
 ENV GOPROXY=${GOPROXY}

+ARG EXTRA_TAGS=""
+ENV EXTRA_TAGS=${EXTRA_TAGS}
+
 ARG PLUGIN_NAME=hello-world

 WORKDIR /workspace
@@ -14,7 +17,7 @@ COPY . .
 WORKDIR /workspace/extensions/$PLUGIN_NAME

 RUN go mod tidy
-RUN tinygo build -o /main.wasm -scheduler=none -gc=custom -tags='custommalloc nottinygc_finalizer' -target=wasi ./
+RUN tinygo build -o /main.wasm -scheduler=none -gc=custom -tags="custommalloc nottinygc_finalizer $EXTRA_TAGS" -target=wasi ./

 FROM scratch as output

--- a/plugins/wasm-go/extensions/ai-proxy/.gitignore
+++ b/plugins/wasm-go/extensions/ai-proxy/.gitignore
@@ -0,0 +1,19 @@
+# File generated by hgctl. Modify as required.
+
+*
+
+!/.gitignore
+
+!*.go
+!go.sum
+!go.mod
+
+!LICENSE
+!*.md
+!*.yaml
+!*.yml
+
+!*/
+
+/out
+/test
--- a/plugins/wasm-go/extensions/ai-proxy/README.md
+++ b/plugins/wasm-go/extensions/ai-proxy/README.md
@@ -0,0 +1,340 @@
+---
+title: AI 代理
+keywords: [ higress,ai,proxy,rag ]
+description: AI 代理插件配置参考
+---
+
+## 功能说明
+
+`AI 代理`插件实现了基于 OpenAI API 契约的 AI 代理功能。目前支持 OpenAI、Azure OpenAI、月之暗面（Moonshot）和通义千问等 AI
+服务提供商。
+
+## 配置字段
+
+### 基本配置
+
+| 名称         | 数据类型   | 填写要求 | 默认值 | 描述               |
+|------------|--------|------|-----|------------------|
+| `provider` | object | 必填   | -   | 配置目标 AI 服务提供商的信息 |
+
+`provider`的配置字段说明如下：
+
+| 名称             | 数据类型                    | 填写要求 | 默认值 | 描述                                                                    |
+|----------------|-------------------------|------|-----|-----------------------------------------------------------------------|
+| `type`         | string                  | 必填   | -   | AI 服务提供商名称。目前支持以下值：openai, azure, moonshot, qwen                      |
+| `apiTokens`    | array of string         | 必填   | -   | 用于在访问 AI 服务时进行认证的令牌。如果配置了多个 token，插件会在请求时随机进行选择。部分服务提供商只支持配置一个 token。 |
+| `timeout`      | number                  | 非必填  | -   | 访问 AI 服务的超时时间。单位为毫秒。默认值为 120000，即 2 分钟                                |
+| `modelMapping` | map of string to string | 非必填  | -   | AI 模型映射表，用于将请求中的模型名称映射为服务提供商支持模型名称。<br/>可以使用 "*" 为键来配置通用兜底映射关系        |
+| `context`      | object                  | 非必填  | -   | 配置 AI 对话上下文信息                                                         |
+
+`context`的配置字段说明如下：
+
+| 名称            | 数据类型   | 填写要求 | 默认值 | 描述                               |
+|---------------|--------|------|-----|----------------------------------|
+| `fileUrl`     | string | 必填   | -   | 保存 AI 对话上下文的文件 URL。仅支持纯文本类型的文件内容 |
+| `serviceName` | string | 必填   | -   | URL 所对应的 Higress 后端服务完整名称        |
+| `servicePort` | number | 必填   | -   | URL 所对应的 Higress 后端服务访问端口        |
+
+### 提供商特有配置
+
+#### OpenAI
+
+OpenAI 所对应的 `type` 为 `openai`。它并无特有的配置字段。
+
+#### Azure OpenAI
+
+Azure OpenAI 所对应的 `type` 为 `azure`。它特有的配置字段如下：
+
+| 名称                | 数据类型   | 填写要求 | 默认值 | 描述                                           |
+|-------------------|--------|------|-----|----------------------------------------------|
+| `azureServiceUrl` | string | 必填   | -   | Azure OpenAI 服务的 URL，须包含 `api-version` 查询参数。 |
+
+**注意：** Azure OpenAI 只支持配置一个 API Token。
+
+#### 月之暗面（Moonshot）
+
+月之暗面所对应的 `type` 为 `moonshot`。它特有的配置字段如下：
+
+| 名称               | 数据类型   | 填写要求 | 默认值 | 描述                                                          |
+|------------------|--------|------|-----|-------------------------------------------------------------|
+| `moonshotFileId` | string | 非必填  | -   | 通过文件接口上传至月之暗面的文件 ID，其内容将被用做 AI 对话的上下文。不可与 `context` 字段同时配置。 |
+
+#### 通义千问（Qwen）
+
+通义千问所对应的 `type` 为 `qwen`。它并无特有的配置字段。
+
+## 用法示例
+
+### 使用 OpenAI 协议代理 Azure OpenAI 服务
+
+使用最基本的 Azure OpenAI 服务，不配置任何上下文。
+
+**配置信息**
+
+```yaml
+provider:
+  type: azure
+  apiTokens:
+    - "YOUR_AZURE_OPENAI_API_TOKEN"
+  azureServiceUrl: "https://YOUR_RESOURCE_NAME.openai.azure.com/openai/deployments/YOUR_DEPLOYMENT_NAME/chat/completions?api-version=2024-02-15-preview",
+```
+
+**请求示例**
+
+```json
+{
+  "model": "gpt-3",
+  "messages": [
+    {
+      "role": "user",
+      "content": "你好，你是谁？"
+    }
+  ],
+  "temperature": 0.3
+}
+```
+
+**响应示例**
+
+```json
+{
+  "choices": [
+    {
+      "content_filter_results": {
+        "hate": {
+          "filtered": false,
+          "severity": "safe"
+        },
+        "self_harm": {
+          "filtered": false,
+          "severity": "safe"
+        },
+        "sexual": {
+          "filtered": false,
+          "severity": "safe"
+        },
+        "violence": {
+          "filtered": false,
+          "severity": "safe"
+        }
+      },
+      "finish_reason": "stop",
+      "index": 0,
+      "logprobs": null,
+      "message": {
+        "content": "你好！我是一个AI助手，可以回答你的问题和提供帮助。有什么我可以帮到你的吗？",
+        "role": "assistant"
+      }
+    }
+  ],
+  "created": 1714807624,
+  "id": "chatcmpl-abcdefg1234567890",
+  "model": "gpt-35-turbo-16k",
+  "object": "chat.completion",
+  "prompt_filter_results": [
+    {
+      "prompt_index": 0,
+      "content_filter_results": {
+        "hate": {
+          "filtered": false,
+          "severity": "safe"
+        },
+        "self_harm": {
+          "filtered": false,
+          "severity": "safe"
+        },
+        "sexual": {
+          "filtered": false,
+          "severity": "safe"
+        },
+        "violence": {
+          "filtered": false,
+          "severity": "safe"
+        }
+      }
+    }
+  ],
+  "system_fingerprint": null,
+  "usage": {
+    "completion_tokens": 40,
+    "prompt_tokens": 15,
+    "total_tokens": 55
+  }
+}
+```
+
+### 使用 OpenAI 协议代理通义千问服务
+
+使用通义千问服务，并配置从 OpenAI 大模型到通义千问的模型映射关系。
+
+**配置信息**
+
+```yaml
+provider:
+  type: qwen
+  apiTokens:
+    - "YOUR_QWEN_API_TOKEN"
+  modelMapping:
+    'gpt-3': "qwen-turbo"
+    'gpt-35-turbo': "qwen-plus"
+    'gpt-4-turbo': "qwen-max"
+    '*': "qwen-turbo"
+```
+
+**请求示例**
+
+```json
+{
+  "model": "gpt-3",
+  "messages": [
+    {
+      "role": "user",
+      "content": "你好，你是谁？"
+    }
+  ],
+  "temperature": 0.3
+}
+```
+
+**响应示例**
+
+```json
+{
+  "id": "c2518bd3-0f46-97d1-be34-bb5777cb3108",
+  "choices": [
+    {
+      "index": 0,
+      "message": {
+        "role": "assistant",
+        "content": "我是通义千问，由阿里云开发的AI助手。我可以回答各种问题、提供信息和与用户进行对话。有什么我可以帮助你的吗？"
+      },
+      "finish_reason": "stop"
+    }
+  ],
+  "created": 1715175072,
+  "model": "qwen-turbo",
+  "object": "chat.completion",
+  "usage": {
+    "prompt_tokens": 24,
+    "completion_tokens": 33,
+    "total_tokens": 57
+  }
+}
+```
+
+### 使用通义千问配合纯文本上下文信息
+
+使用通义千问服务，同时配置纯文本上下文信息。
+
+**配置信息**
+
+```yaml
+provider:
+  type: qwen
+  apiTokens:
+    - "YOUR_QWEN_API_TOKEN"
+  modelMapping:
+    "*": "qwen-turbo"
+  context:
+    - fileUrl: "http://file.default.svc.cluster.local/ai/context.txt",
+      serviceName: "file.dns",
+      servicePort: 80
+```
+
+**请求示例**
+
+```json
+{
+  "model": "gpt-3",
+  "messages": [
+    {
+      "role": "user",
+      "content": "请概述文案内容"
+    }
+  ],
+  "temperature": 0.3
+}
+```
+
+**响应示例**
+
+```json
+{
+  "id": "cmpl-77861a17681f4987ab8270dbf8001936",
+  "object": "chat.completion",
+  "created": 9756990,
+  "model": "moonshot-v1-128k",
+  "choices": [
+    {
+      "index": 0,
+      "message": {
+        "role": "assistant",
+        "content": "这份文案是一份关于..."
+      },
+      "finish_reason": "stop"
+    }
+  ],
+  "usage": {
+    "prompt_tokens": 20181,
+    "completion_tokens": 439,
+    "total_tokens": 20620
+  }
+}
+```
+
+### 使用月之暗面配合其原生的文件上下文
+
+提前上传文件至月之暗面，以文件内容作为上下文使用其 AI 服务。
+
+**配置信息**
+
+```yaml
+provider:
+  type: moonshot
+  apiTokens:
+    - "YOUR_MOONSHOT_API_TOKEN"
+  moonshotFileId: "YOUR_MOONSHOT_FILE_ID",
+  modelMapping:
+    '*': "moonshot-v1-32k"
+```
+
+**请求示例**
+
+```json
+{
+  "model": "gpt-4-turbo",
+  "messages": [
+    {
+      "role": "user",
+      "content": "请概述文案内容"
+    }
+  ],
+  "temperature": 0.3
+}
+```
+
+**响应示例**
+
+```json
+{
+  "id": "cmpl-e5ca873642ca4f5d8b178c1742f9a8e8",
+  "object": "chat.completion",
+  "created": 1872961,
+  "model": "moonshot-v1-128k",
+  "choices": [
+    {
+      "index": 0,
+      "message": {
+        "role": "assistant",
+        "content": "文案内容是关于一个名为“xxxx”的支付平台..."
+      },
+      "finish_reason": "stop"
+    }
+  ],
+  "usage": {
+    "prompt_tokens": 11,
+    "completion_tokens": 498,
+    "total_tokens": 509
+  }
+}
+```
--- a/plugins/wasm-go/extensions/ai-proxy/config/config.go
+++ b/plugins/wasm-go/extensions/ai-proxy/config/config.go
@@ -0,0 +1,52 @@
+package config
+
+import (
+	"github.com/tidwall/gjson"
+
+	"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy/provider"
+)
+
+// @Name ai-proxy
+// @Category custom
+// @Phase UNSPECIFIED_PHASE
+// @Priority 0
+// @Title zh-CN AI代理
+// @Description zh-CN 通过AI助手提供智能对话服务
+// @IconUrl https://img.alicdn.com/imgextra/i1/O1CN018iKKih1iVx287RltL_!!6000000004419-2-tps-42-42.png
+// @Version 0.1.0
+//
+// @Contact.name CH3CHO
+// @Contact.url https://github.com/CH3CHO
+// @Contact.email ch3cho@qq.com
+//
+// @Example
+// { "provider": { "type": "qwen", "apiToken": "YOUR_DASHSCOPE_API_TOKEN", "modelMapping": { "*": "qwen-turbo" } } }
+// @End
+type PluginConfig struct {
+	// @Title zh-CN AI服务提供商配置
+	// @Description zh-CN AI服务提供商配置，包含API接口、模型和知识库文件等信息
+	providerConfig provider.ProviderConfig `required:"true" yaml:"provider"`
+
+	provider provider.Provider `yaml:"-"`
+}
+
+func (c *PluginConfig) FromJson(json gjson.Result) {
+	c.providerConfig.FromJson(json.Get("provider"))
+}
+
+func (c *PluginConfig) Validate() error {
+	if err := c.providerConfig.Validate(); err != nil {
+		return err
+	}
+	return nil
+}
+
+func (c *PluginConfig) Complete() error {
+	var err error
+	c.provider, err = provider.CreateProvider(c.providerConfig)
+	return err
+}
+
+func (c *PluginConfig) GetProvider() provider.Provider {
+	return c.provider
+}
--- a/plugins/wasm-go/extensions/ai-proxy/envoy.yaml
+++ b/plugins/wasm-go/extensions/ai-proxy/envoy.yaml
@@ -0,0 +1,110 @@
+# File generated by hgctl. Modify as required.
+
+admin:
+  address:
+    socket_address:
+      protocol: TCP
+      address: 0.0.0.0
+      port_value: 9901
+static_resources:
+  listeners:
+    - name: listener_0
+      address:
+        socket_address:
+          protocol: TCP
+          address: 0.0.0.0
+          port_value: 10000
+      filter_chains:
+        - filters:
+            - name: envoy.filters.network.http_connection_manager
+              typed_config:
+                "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
+                scheme_header_transformation:
+                  scheme_to_overwrite: https
+                stat_prefix: ingress_http
+                # Output envoy logs to stdout
+                access_log:
+                  - name: envoy.access_loggers.stdout
+                    typed_config:
+                      "@type": type.googleapis.com/envoy.extensions.access_loggers.stream.v3.StdoutAccessLog
+                # Modify as required
+                route_config:
+                  name: local_route
+                  virtual_hosts:
+                    - name: local_service
+                      domains: [ "*" ]
+                      routes:
+                        - match:
+                            prefix: "/"
+                          route:
+                            cluster: moonshot
+                            timeout: 300s
+                http_filters:
+                  - name: wasmtest
+                    typed_config:
+                      "@type": type.googleapis.com/udpa.type.v1.TypedStruct
+                      type_url: type.googleapis.com/envoy.extensions.filters.http.wasm.v3.Wasm
+                      value:
+                        config:
+                          name: wasmtest
+                          vm_config:
+                            runtime: envoy.wasm.runtime.v8
+                            code:
+                              local:
+                                filename: /etc/envoy/plugin.wasm
+                          configuration:
+                            "@type": "type.googleapis.com/google.protobuf.StringValue"
+                            value: |
+                              {
+                                "provider": {
+                                  "type": "moonshot",
+                                  "domain": "api.moonshot.cn",
+                                  "apiTokens": [
+                                    "****",
+                                    "****"
+                                  ],
+                                  "timeout": 1200000,
+                                  "modelMapping": {
+                                    "gpt-3": "moonshot-v1-8k",
+                                    "gpt-35-turbo": "moonshot-v1-32k",
+                                    "gpt-4-turbo": "moonshot-v1-128k",
+                                    "*": "moonshot-v1-8k"
+                                  },
+                                }
+                              }
+                  - name: envoy.filters.http.router
+  clusters:
+    - name: httpbin
+      connect_timeout: 30s
+      type: LOGICAL_DNS
+      # Comment out the following line to test on v6 networks
+      dns_lookup_family: V4_ONLY
+      lb_policy: ROUND_ROBIN
+      load_assignment:
+        cluster_name: httpbin
+        endpoints:
+          - lb_endpoints:
+              - endpoint:
+                  address:
+                    socket_address:
+                      address: httpbin
+                      port_value: 80
+    - name: moonshot
+      connect_timeout: 30s
+      type: LOGICAL_DNS
+      dns_lookup_family: V4_ONLY
+      lb_policy: ROUND_ROBIN
+      load_assignment:
+        cluster_name: moonshot
+        endpoints:
+          - lb_endpoints:
+              - endpoint:
+                  address:
+                    socket_address:
+                      address: api.moonshot.cn
+                      port_value: 443
+      transport_socket:
+        name: envoy.transport_sockets.tls
+        typed_config:
+          "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
+          "sni": "api.moonshot.cn"
--- a/plugins/wasm-go/extensions/ai-proxy/go.mod
+++ b/plugins/wasm-go/extensions/ai-proxy/go.mod
@@ -0,0 +1,26 @@
+// File generated by hgctl. Modify as required.
+
+module github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy
+
+go 1.19
+
+replace github.com/alibaba/higress/plugins/wasm-go => ../..
+
+require (
+	github.com/alibaba/higress/plugins/wasm-go v0.0.0
+	github.com/higress-group/proxy-wasm-go-sdk v0.0.0-20240318034951-d5306e367c43
+	github.com/stretchr/testify v1.8.4
+	github.com/tidwall/gjson v1.14.3
+)
+
+require (
+	github.com/davecgh/go-spew v1.1.1 // indirect
+	github.com/google/uuid v1.3.0 // indirect
+	github.com/higress-group/nottinygc v0.0.0-20231101025119-e93c4c2f8520 // indirect
+	github.com/magefile/mage v1.14.0 // indirect
+	github.com/pmezard/go-difflib v1.0.0 // indirect
+	github.com/tidwall/match v1.1.1 // indirect
+	github.com/tidwall/pretty v1.2.0 // indirect
+	github.com/tidwall/resp v0.1.1 // indirect
+	gopkg.in/yaml.v3 v3.0.1 // indirect
+)
--- a/plugins/wasm-go/extensions/ai-proxy/go.sum
+++ b/plugins/wasm-go/extensions/ai-proxy/go.sum
@@ -0,0 +1,26 @@
+github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
+github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/higress-group/nottinygc v0.0.0-20231101025119-e93c4c2f8520 h1:IHDghbGQ2DTIXHBHxWfqCYQW1fKjyJ/I7W1pMyUDeEA=
+github.com/higress-group/nottinygc v0.0.0-20231101025119-e93c4c2f8520/go.mod h1:Nz8ORLaFiLWotg6GeKlJMhv8cci8mM43uEnLA5t8iew=
+github.com/higress-group/proxy-wasm-go-sdk v0.0.0-20240318034951-d5306e367c43 h1:dCw7F/9ciw4NZN7w68wQRaygZ2zGOWMTIEoRvP1tlWs=
+github.com/higress-group/proxy-wasm-go-sdk v0.0.0-20240318034951-d5306e367c43/go.mod h1:hNFjhrLUIq+kJ9bOcs8QtiplSQ61GZXtd2xHKx4BYRo=
+github.com/magefile/mage v1.14.0 h1:6QDX3g6z1YvJ4olPhT1wksUcSa/V0a1B+pJb73fBjyo=
+github.com/magefile/mage v1.14.0/go.mod h1:z5UZb/iS3GoOSn0JgWuiw7dxlurVYTu+/jHXqQg881A=
+github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
+github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
+github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
+github.com/tidwall/gjson v1.14.3 h1:9jvXn7olKEHU1S9vwoMGliaT8jq1vJ7IH/n9zD9Dnlw=
+github.com/tidwall/gjson v1.14.3/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
+github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA=
+github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM=
+github.com/tidwall/pretty v1.2.0 h1:RWIZEg2iJ8/g6fDDYzMpobmaoGh5OLl4AXtGUGPcqCs=
+github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
+github.com/tidwall/resp v0.1.1 h1:Ly20wkhqKTmDUPlyM1S7pWo5kk0tDu8OoC/vFArXmwE=
+github.com/tidwall/resp v0.1.1/go.mod h1:3/FrruOBAxPTPtundW0VXgmsQ4ZBA0Aw714lVYgwFa0=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
+gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
--- a/plugins/wasm-go/extensions/ai-proxy/main.go
+++ b/plugins/wasm-go/extensions/ai-proxy/main.go
@@ -0,0 +1,199 @@
+// File generated by hgctl. Modify as required.
+// See: https://higress.io/zh-cn/docs/user/wasm-go#2-%E7%BC%96%E5%86%99-maingo-%E6%96%87%E4%BB%B6
+
+package main
+
+import (
+	"fmt"
+	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
+	"net/url"
+	"strings"
+
+	"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy/config"
+	"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy/provider"
+	"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy/util"
+	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
+	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
+	"github.com/tidwall/gjson"
+)
+
+const (
+	pluginName = "ai-proxy"
+
+	ctxKeyApiName = "apiKey"
+)
+
+func main() {
+	wrapper.SetCtx(
+		pluginName,
+		wrapper.ParseConfigBy(parseConfig),
+		wrapper.ProcessRequestHeadersBy(onHttpRequestHeader),
+		wrapper.ProcessRequestBodyBy(onHttpRequestBody),
+		wrapper.ProcessResponseHeadersBy(onHttpResponseHeaders),
+		wrapper.ProcessStreamingResponseBodyBy(onStreamingResponseBody),
+		wrapper.ProcessResponseBodyBy(onHttpResponseBody),
+	)
+}
+
+func parseConfig(json gjson.Result, pluginConfig *config.PluginConfig, log wrapper.Log) error {
+	//log.Debugf("loading config: %s", json.String())
+
+	pluginConfig.FromJson(json)
+	if err := pluginConfig.Validate(); err != nil {
+		return err
+	}
+	if err := pluginConfig.Complete(); err != nil {
+		return err
+	}
+	return nil
+}
+
+func onHttpRequestHeader(ctx wrapper.HttpContext, pluginConfig config.PluginConfig, log wrapper.Log) types.Action {
+	activeProvider := pluginConfig.GetProvider()
+
+	if activeProvider == nil {
+		log.Debugf("[onHttpRequestHeader] no active provider, skip processing")
+		ctx.DontReadRequestBody()
+		return types.ActionContinue
+	}
+
+	rawPath := ctx.Path()
+	path, _ := url.Parse(rawPath)
+	apiName := getApiName(path.Path)
+	if apiName == "" {
+		log.Debugf("[onHttpRequestHeader] unsupported path: %s", path.Path)
+		_ = util.SendResponse(404, util.MimeTypeTextPlain, "API not found: "+path.Path)
+		return types.ActionContinue
+	}
+	ctx.SetContext(ctxKeyApiName, apiName)
+
+	if handler, ok := activeProvider.(provider.RequestHeadersHandler); ok {
+		action, err := handler.OnRequestHeaders(ctx, apiName, log)
+		if err == nil {
+			return action
+		}
+		_ = util.SendResponse(404, util.MimeTypeTextPlain, fmt.Sprintf("failed to process request headers: %v", err))
+		return types.ActionContinue
+	}
+
+	if _, needHandleBody := activeProvider.(provider.RequestBodyHandler); needHandleBody {
+		ctx.DontReadRequestBody()
+	}
+
+	return types.ActionContinue
+}
+
+func onHttpRequestBody(ctx wrapper.HttpContext, pluginConfig config.PluginConfig, body []byte, log wrapper.Log) types.Action {
+	activeProvider := pluginConfig.GetProvider()
+
+	if activeProvider == nil {
+		log.Debugf("[onHttpRequestBody] no active provider, skip processing")
+		return types.ActionContinue
+	}
+
+	log.Debugf("[onHttpRequestBody] provider=%s", activeProvider.GetProviderType())
+
+	if handler, ok := activeProvider.(provider.RequestBodyHandler); ok {
+		apiName := ctx.GetContext(ctxKeyApiName).(provider.ApiName)
+		action, err := handler.OnRequestBody(ctx, apiName, body, log)
+		if err == nil {
+			return action
+		}
+		_ = util.SendResponse(404, util.MimeTypeTextPlain, fmt.Sprintf("failed to process request body: %v", err))
+		return types.ActionContinue
+	}
+	return types.ActionContinue
+}
+
+func onHttpResponseHeaders(ctx wrapper.HttpContext, pluginConfig config.PluginConfig, log wrapper.Log) types.Action {
+	activeProvider := pluginConfig.GetProvider()
+
+	if activeProvider == nil {
+		log.Debugf("[onHttpResponseHeaders] no active provider, skip processing")
+		ctx.DontReadResponseBody()
+		return types.ActionContinue
+	}
+
+	log.Debugf("[onHttpResponseHeaders] provider=%s", activeProvider.GetProviderType())
+
+	status, err := proxywasm.GetHttpResponseHeader(":status")
+	if err != nil || status != "200" {
+		if err != nil {
+			log.Errorf("unable to load :status header from response: %v", err)
+		}
+		ctx.DontReadResponseBody()
+		return types.ActionContinue
+	}
+
+	if handler, ok := activeProvider.(provider.ResponseHeadersHandler); ok {
+		apiName := ctx.GetContext(ctxKeyApiName).(provider.ApiName)
+		action, err := handler.OnResponseHeaders(ctx, apiName, log)
+		if err == nil {
+			return action
+		}
+		_ = util.SendResponse(404, util.MimeTypeTextPlain, fmt.Sprintf("failed to process response headers: %v", err))
+		return types.ActionContinue
+	}
+
+	_, needHandleBody := activeProvider.(provider.ResponseBodyHandler)
+	_, needHandleStreamingBody := activeProvider.(provider.StreamingResponseBodyHandler)
+	if !needHandleBody && !needHandleStreamingBody {
+		ctx.DontReadResponseBody()
+	} else if !needHandleStreamingBody {
+		ctx.BufferResponseBody()
+	}
+
+	return types.ActionContinue
+}
+
+func onStreamingResponseBody(ctx wrapper.HttpContext, pluginConfig config.PluginConfig, chunk []byte, isLastChunk bool, log wrapper.Log) []byte {
+	activeProvider := pluginConfig.GetProvider()
+
+	if activeProvider == nil {
+		log.Debugf("[onStreamingResponseBody] no active provider, skip processing")
+		return chunk
+	}
+
+	log.Debugf("[onStreamingResponseBody] provider=%s", activeProvider.GetProviderType())
+	log.Debugf("isLastChunk=%v chunk: %s", isLastChunk, string(chunk))
+
+	if handler, ok := activeProvider.(provider.StreamingResponseBodyHandler); ok {
+		apiName := ctx.GetContext(ctxKeyApiName).(provider.ApiName)
+		modifiedChunk, err := handler.OnStreamingResponseBody(ctx, apiName, chunk, isLastChunk, log)
+		if err == nil && modifiedChunk != nil {
+			return modifiedChunk
+		}
+		return chunk
+	}
+	return chunk
+}
+
+func onHttpResponseBody(ctx wrapper.HttpContext, pluginConfig config.PluginConfig, body []byte, log wrapper.Log) types.Action {
+	activeProvider := pluginConfig.GetProvider()
+
+	if activeProvider == nil {
+		log.Debugf("[onHttpResponseBody] no active provider, skip processing")
+		return types.ActionContinue
+	}
+
+	log.Debugf("[onHttpResponseBody] provider=%s", activeProvider.GetProviderType())
+	//log.Debugf("response body: %s", string(body))
+
+	if handler, ok := activeProvider.(provider.ResponseBodyHandler); ok {
+		apiName := ctx.GetContext(ctxKeyApiName).(provider.ApiName)
+		action, err := handler.OnResponseBody(ctx, apiName, body, log)
+		if err == nil {
+			return action
+		}
+		_ = util.SendResponse(404, util.MimeTypeTextPlain, fmt.Sprintf("failed to process response body: %v", err))
+		return types.ActionContinue
+	}
+	return types.ActionContinue
+}
+
+func getApiName(path string) provider.ApiName {
+	if strings.HasSuffix(path, "/v1/chat/completions") {
+		return provider.ApiNameChatCompletion
+	}
+	return ""
+}
--- a/plugins/wasm-go/extensions/ai-proxy/option.yaml
+++ b/plugins/wasm-go/extensions/ai-proxy/option.yaml
@@ -0,0 +1,52 @@
+# File generated by hgctl. Modify as required.
+
+version: 1.0.0
+
+build:
+  # The official builder image version
+  builder:
+    go: 1.19
+    tinygo: 0.28.1
+    oras: 1.0.0
+  # The WASM plugin project directory
+  input: ./
+  # The output of the build products
+  output:
+  # Choose between 'files' and 'image'
+    type: files
+    # Destination address: when type=files, specify the local directory path, e.g., './out' or
+    # type=image, specify the remote docker repository, e.g., 'docker.io/<your_username>/<your_image>'
+    dest: ./out
+  # The authentication configuration for pushing image to the docker repository
+  docker-auth: ~/.docker/config.json
+  # The directory for the WASM plugin configuration structure
+  model-dir: ./
+  # The WASM plugin configuration structure name
+  model: config.PluginConfig
+  # Enable debug mode
+  debug: false
+
+test:
+  # Test environment name, that is a docker compose project name
+  name: wasm-test
+  # The output path to build products, that is the source of test configuration parameters
+  from-path: ./out
+  # The test configuration source
+  test-path: ./test
+  # Docker compose configuration, which is empty, looks for the following files from 'test-path':
+  # compose.yaml, compose.yml, docker-compose.yml, docker-compose.yaml
+  compose-file:
+  # Detached mode: Run containers in the background
+  detach: false
+
+install:
+  # The namespace of the installation
+  namespace: higress-system
+  # Use to validate WASM plugin configuration when install by yaml
+  spec-yaml: ./out/spec.yaml
+  # Installation source. Choose between 'from-yaml' and 'from-go-project'
+  from-yaml: ./test/plugin-conf.yaml
+  # If 'from-go-src' is non-empty, the output type of the build option must be 'image'
+  from-go-src:
+  # Enable debug mode
+  debug: false
--- a/plugins/wasm-go/extensions/ai-proxy/provider/azure.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/azure.go
@@ -0,0 +1,99 @@
+package provider
+
+import (
+	"errors"
+	"fmt"
+	"net/url"
+
+	"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy/util"
+	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
+	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
+	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
+)
+
+// azureProvider is the provider for Azure OpenAI service.
+
+type azureProviderInitializer struct {
+}
+
+func (m *azureProviderInitializer) ValidateConfig(config ProviderConfig) error {
+	if config.azureServiceUrl == "" {
+		return errors.New("missing azureServiceUrl in provider config")
+	}
+	if _, err := url.Parse(config.azureServiceUrl); err != nil {
+		return fmt.Errorf("invalid azureServiceUrl: %w", err)
+	}
+	return nil
+}
+
+func (m *azureProviderInitializer) CreateProvider(config ProviderConfig) (Provider, error) {
+	var serviceUrl *url.URL
+	if u, err := url.Parse(config.azureServiceUrl); err != nil {
+		return nil, fmt.Errorf("invalid azureServiceUrl: %w", err)
+	} else {
+		serviceUrl = u
+	}
+	return &azureProvider{
+		config:       config,
+		serviceUrl:   serviceUrl,
+		contextCache: createContextCache(&config),
+	}, nil
+}
+
+type azureProvider struct {
+	config ProviderConfig
+
+	contextCache *contextCache
+	serviceUrl   *url.URL
+}
+
+func (m *azureProvider) GetProviderType() string {
+	return providerTypeAzure
+}
+
+func (m *azureProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) (types.Action, error) {
+	if apiName != ApiNameChatCompletion {
+		return types.ActionContinue, errUnsupportedApiName
+	}
+	_ = util.OverwriteRequestPath(m.serviceUrl.RequestURI())
+	_ = util.OverwriteRequestHost(m.serviceUrl.Host)
+	_ = proxywasm.ReplaceHttpRequestHeader("api-key", m.config.apiTokens[0])
+
+	if m.contextCache == nil {
+		ctx.DontReadRequestBody()
+	} else {
+		_ = proxywasm.RemoveHttpRequestHeader("Content-Length")
+	}
+
+	return types.ActionContinue, nil
+}
+
+func (m *azureProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
+	if apiName != ApiNameChatCompletion {
+		return types.ActionContinue, errUnsupportedApiName
+	}
+	if m.contextCache == nil {
+		return types.ActionContinue, nil
+	}
+	request := &chatCompletionRequest{}
+	if err := decodeChatCompletionRequest(body, request); err != nil {
+		return types.ActionContinue, err
+	}
+	err := m.contextCache.GetContent(func(content string, err error) {
+		defer func() {
+			_ = proxywasm.ResumeHttpRequest()
+		}()
+		if err != nil {
+			log.Errorf("failed to load context file: %v", err)
+			_ = util.SendResponse(500, util.MimeTypeTextPlain, fmt.Sprintf("failed to load context file: %v", err))
+		}
+		insertContextMessage(request, content)
+		if err := replaceJsonRequestBody(request, log); err != nil {
+			_ = util.SendResponse(500, util.MimeTypeTextPlain, fmt.Sprintf("failed to replace request body: %v", err))
+		}
+	}, log)
+	if err == nil {
+		return types.ActionPause, nil
+	}
+	return types.ActionContinue, err
+}
--- a/plugins/wasm-go/extensions/ai-proxy/provider/cluster.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/cluster.go
@@ -0,0 +1,17 @@
+package provider
+
+import "fmt"
+
+type plainCluster struct {
+	serviceName string
+	servicePort int64
+	hostName    string
+}
+
+func (c plainCluster) ClusterName() string {
+	return fmt.Sprintf("outbound|%d||%s", c.servicePort, c.serviceName)
+}
+
+func (c plainCluster) HostName() string {
+	return c.hostName
+}
--- a/plugins/wasm-go/extensions/ai-proxy/provider/context.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/context.go
@@ -0,0 +1,100 @@
+package provider
+
+import (
+	"errors"
+	"fmt"
+	"net/http"
+	"net/url"
+
+	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
+	"github.com/tidwall/gjson"
+)
+
+type ContextConfig struct {
+	// @Title zh-CN 文件URL
+	// @Description zh-CN 用于获取对话上下文的文件的URL。目前仅支持HTTP和HTTPS协议，纯文本格式文件
+	fileUrl string `required:"true" yaml:"url" json:"url"`
+	// @Title zh-CN 上游服务名称
+	// @Description zh-CN 文件服务所对应的网关内上游服务名称
+	serviceName string `required:"true" yaml:"serviceName" json:"serviceName"`
+	// @Title zh-CN 上游服务端口
+	// @Description zh-CN 文件服务所对应的网关内上游服务名称
+	servicePort int64 `required:"true" yaml:"serviceName" json:"serviceName"`
+
+	fileUrlObj *url.URL `yaml:"-"`
+}
+
+func (c *ContextConfig) FromJson(json gjson.Result) {
+	c.fileUrl = json.Get("fileUrl").String()
+	c.serviceName = json.Get("serviceName").String()
+	c.servicePort = json.Get("servicePort").Int()
+}
+
+func (c *ContextConfig) Validate() error {
+	if c.fileUrl == "" {
+		return errors.New("missing fileUrl in context config")
+	}
+	if fileUrlObj, err := url.Parse(c.fileUrl); err != nil {
+		return fmt.Errorf("invalid fileUrl in context config: %v", err)
+	} else {
+		c.fileUrlObj = fileUrlObj
+	}
+	if c.serviceName == "" {
+		return errors.New("missing serviceName in context config")
+	}
+	if c.servicePort == 0 {
+		return errors.New("missing servicePort in context config")
+	}
+	return nil
+}
+
+type contextCache struct {
+	client  wrapper.HttpClient
+	fileUrl *url.URL
+	timeout uint32
+
+	loaded  bool
+	content string
+}
+
+func (c *contextCache) GetContent(callback func(string, error), log wrapper.Log) error {
+	if callback == nil {
+		return errors.New("callback is nil")
+	}
+
+	if c.loaded {
+		log.Debugf("context file loaded from cache")
+		callback(c.content, nil)
+		return nil
+	}
+
+	log.Infof("loading context file from %s", c.fileUrl.String())
+	return c.client.Get(c.fileUrl.Path, nil, func(statusCode int, responseHeaders http.Header, responseBody []byte) {
+		if statusCode != http.StatusOK {
+			callback("", fmt.Errorf("failed to load context file, status: %d", statusCode))
+			return
+		}
+		c.content = string(responseBody)
+		c.loaded = true
+		log.Debugf("content: %s", c.content)
+		callback(c.content, nil)
+	}, c.timeout)
+}
+
+func createContextCache(providerConfig *ProviderConfig) *contextCache {
+	contextConfig := providerConfig.context
+	if contextConfig == nil {
+		return nil
+	}
+	fileUrlObj, _ := url.Parse(contextConfig.fileUrl)
+	cluster := plainCluster{
+		serviceName: contextConfig.serviceName,
+		servicePort: contextConfig.servicePort,
+		hostName:    fileUrlObj.Host,
+	}
+	return &contextCache{
+		client:  wrapper.NewClusterClient(cluster),
+		fileUrl: fileUrlObj,
+		timeout: providerConfig.timeout,
+	}
+}
--- a/plugins/wasm-go/extensions/ai-proxy/provider/model.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/model.go
@@ -0,0 +1,44 @@
+package provider
+
+type chatCompletionRequest struct {
+	Model            string        `json:"model"`
+	Messages         []chatMessage `json:"messages"`
+	MaxTokens        int           `json:"max_tokens,omitempty"`
+	FrequencyPenalty float64       `json:"frequency_penalty,omitempty"`
+	N                int           `json:"n,omitempty"`
+	PresencePenalty  float64       `json:"presence_penalty,omitempty"`
+	Seed             int           `json:"seed,omitempty"`
+	Stream           bool          `json:"stream,omitempty"`
+	Temperature      float64       `json:"temperature,omitempty"`
+	TopP             float64       `json:"top_p,omitempty"`
+	User             string        `json:"user,omitempty"`
+}
+
+type chatCompletionResponse struct {
+	Id                string                 `json:"id,omitempty"`
+	Choices           []chatCompletionChoice `json:"choices,omitempty"`
+	Created           int64                  `json:"created,omitempty"`
+	Model             string                 `json:"model,omitempty"`
+	SystemFingerprint string                 `json:"system_fingerprint,omitempty"`
+	Object            string                 `json:"object,omitempty"`
+	Usage             chatCompletionUsage    `json:"usage,omitempty"`
+}
+
+type chatCompletionChoice struct {
+	Index        int          `json:"index"`
+	Message      *chatMessage `json:"message,omitempty"`
+	Delta        *chatMessage `json:"delta,omitempty"`
+	FinishReason string       `json:"finish_reason,omitempty"`
+}
+
+type chatCompletionUsage struct {
+	PromptTokens     int `json:"prompt_tokens,omitempty"`
+	CompletionTokens int `json:"completion_tokens,omitempty"`
+	TotalTokens      int `json:"total_tokens,omitempty"`
+}
+
+type chatMessage struct {
+	Name    string `json:"name,omitempty"`
+	Role    string `json:"role,omitempty"`
+	Content string `json:"content,omitempty"`
+}
--- a/plugins/wasm-go/extensions/ai-proxy/provider/moonshot.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/moonshot.go
@@ -0,0 +1,152 @@
+package provider
+
+import (
+	"errors"
+	"fmt"
+	"net/http"
+
+	"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy/util"
+	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
+	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
+	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
+	"github.com/tidwall/gjson"
+)
+
+// moonshotProvider is the provider for Moonshot AI service.
+
+const (
+	moonshotDomain             = "api.moonshot.cn"
+	moonshotChatCompletionPath = "/v1/chat/completions"
+)
+
+type moonshotProviderInitializer struct {
+}
+
+func (m *moonshotProviderInitializer) ValidateConfig(config ProviderConfig) error {
+	if config.moonshotFileId != "" && config.context != nil {
+		return errors.New("moonshotFileId and context cannot be configured at the same time")
+	}
+	return nil
+}
+
+func (m *moonshotProviderInitializer) CreateProvider(config ProviderConfig) (Provider, error) {
+	return &moonshotProvider{
+		config: config,
+		client: wrapper.NewClusterClient(wrapper.RouteCluster{
+			Host: moonshotDomain,
+		}),
+		contextCache: createContextCache(&config),
+	}, nil
+}
+
+type moonshotProvider struct {
+	config ProviderConfig
+
+	client       wrapper.HttpClient
+	fileContent  string
+	contextCache *contextCache
+}
+
+func (m *moonshotProvider) GetProviderType() string {
+	return providerTypeMoonshot
+}
+
+func (m *moonshotProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) (types.Action, error) {
+	if apiName != ApiNameChatCompletion {
+		return types.ActionContinue, errUnsupportedApiName
+	}
+	_ = util.OverwriteRequestPath(moonshotChatCompletionPath)
+	_ = util.OverwriteRequestHost(moonshotDomain)
+	_ = proxywasm.ReplaceHttpRequestHeader("Authorization", "Bearer "+m.config.GetRandomToken())
+	_ = proxywasm.RemoveHttpRequestHeader("Content-Length")
+	return types.ActionContinue, nil
+}
+
+func (m *moonshotProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
+	if apiName != ApiNameChatCompletion {
+		return types.ActionContinue, errUnsupportedApiName
+	}
+
+	request := &chatCompletionRequest{}
+	if err := decodeChatCompletionRequest(body, request); err != nil {
+		return types.ActionContinue, err
+	}
+
+	model := request.Model
+	if model == "" {
+		return types.ActionContinue, errors.New("missing model in chat completion request")
+	}
+	mappedModel := getMappedModel(model, m.config.modelMapping, log)
+	if mappedModel == "" {
+		return types.ActionContinue, errors.New("model becomes empty after applying the configured mapping")
+	}
+	request.Model = mappedModel
+
+	if m.config.moonshotFileId == "" && m.contextCache == nil {
+		return types.ActionContinue, replaceJsonRequestBody(request, log)
+	}
+
+	err := m.getContextContent(func(content string, err error) {
+		defer func() {
+			_ = proxywasm.ResumeHttpRequest()
+		}()
+		if err != nil {
+			log.Errorf("failed to load context file: %v", err)
+			_ = util.SendResponse(500, util.MimeTypeTextPlain, fmt.Sprintf("failed to load context file: %v", err))
+			return
+		}
+		err = m.performChatCompletion(ctx, content, request, log)
+		if err != nil {
+			_ = util.SendResponse(500, util.MimeTypeTextPlain, fmt.Sprintf("failed to perform chat completion: %v", err))
+		}
+	}, log)
+	if err == nil {
+		return types.ActionPause, nil
+	}
+	return types.ActionContinue, err
+}
+
+func (m *moonshotProvider) performChatCompletion(ctx wrapper.HttpContext, fileContent string, request *chatCompletionRequest, log wrapper.Log) error {
+	insertContextMessage(request, fileContent)
+	return replaceJsonRequestBody(request, log)
+}
+
+func (m *moonshotProvider) getContextContent(callback func(string, error), log wrapper.Log) error {
+	if m.config.moonshotFileId != "" {
+		if m.fileContent != "" {
+			callback(m.fileContent, nil)
+			return nil
+		}
+		return m.sendRequest(http.MethodGet, "/v1/files/"+m.config.moonshotFileId+"/content", "",
+			func(statusCode int, responseHeaders http.Header, responseBody []byte) {
+				responseString := string(responseBody)
+				if statusCode != http.StatusOK {
+					log.Errorf("failed to load knowledge base file from AI service, status: %d body: %s", statusCode, responseString)
+					callback("", fmt.Errorf("failed to load knowledge base file from moonshot service, status: %d", statusCode))
+					return
+				}
+				responseJson := gjson.Parse(responseString)
+				m.fileContent = responseJson.Get("content").String()
+				callback(m.fileContent, nil)
+			})
+	}
+
+	if m.contextCache != nil {
+		return m.contextCache.GetContent(callback, log)
+	}
+
+	return errors.New("both moonshotFileId and context are not configured")
+}
+
+func (m *moonshotProvider) sendRequest(method, path string, body string, callback wrapper.ResponseCallback) error {
+	switch method {
+	case http.MethodGet:
+		headers := util.CreateHeaders("Authorization", "Bearer "+m.config.GetRandomToken())
+		return m.client.Get(path, headers, callback, m.config.timeout)
+	case http.MethodPost:
+		headers := util.CreateHeaders("Authorization", "Bearer "+m.config.GetRandomToken(), "Content-Type", "application/json")
+		return m.client.Post(path, headers, []byte(body), callback, m.config.timeout)
+	default:
+		return errors.New("unsupported method: " + method)
+	}
+}
--- a/plugins/wasm-go/extensions/ai-proxy/provider/openai.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/openai.go
@@ -0,0 +1,87 @@
+package provider
+
+import (
+	"fmt"
+
+	"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy/util"
+	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
+	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
+	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
+)
+
+// azureProvider is the provider for Azure OpenAI service.
+
+const (
+	openaiDomain             = "api.openai.com"
+	openaiChatCompletionPath = "/v1/chat/completions"
+)
+
+type openaiProviderInitializer struct {
+}
+
+func (m *openaiProviderInitializer) ValidateConfig(config ProviderConfig) error {
+	return nil
+}
+
+func (m *openaiProviderInitializer) CreateProvider(config ProviderConfig) (Provider, error) {
+	return &openaiProvider{
+		config:       config,
+		contextCache: createContextCache(&config),
+	}, nil
+}
+
+type openaiProvider struct {
+	config       ProviderConfig
+	contextCache *contextCache
+}
+
+func (m *openaiProvider) GetProviderType() string {
+	return providerTypeOpenAI
+}
+
+func (m *openaiProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) (types.Action, error) {
+	if apiName != ApiNameChatCompletion {
+		return types.ActionContinue, errUnsupportedApiName
+	}
+	_ = util.OverwriteRequestPath(openaiChatCompletionPath)
+	_ = util.OverwriteRequestHost(openaiDomain)
+	_ = proxywasm.ReplaceHttpRequestHeader("Authorization", "Bearer "+m.config.GetRandomToken())
+
+	if m.contextCache == nil {
+		ctx.DontReadRequestBody()
+	} else {
+		_ = proxywasm.RemoveHttpRequestHeader("Content-Length")
+	}
+
+	return types.ActionContinue, nil
+}
+
+func (m *openaiProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
+	if apiName != ApiNameChatCompletion {
+		return types.ActionContinue, errUnsupportedApiName
+	}
+	if m.contextCache == nil {
+		return types.ActionContinue, nil
+	}
+	request := &chatCompletionRequest{}
+	if err := decodeChatCompletionRequest(body, request); err != nil {
+		return types.ActionContinue, err
+	}
+	err := m.contextCache.GetContent(func(content string, err error) {
+		defer func() {
+			_ = proxywasm.ResumeHttpRequest()
+		}()
+		if err != nil {
+			log.Errorf("failed to load context file: %v", err)
+			_ = util.SendResponse(500, util.MimeTypeTextPlain, fmt.Sprintf("failed to load context file: %v", err))
+		}
+		insertContextMessage(request, content)
+		if err := replaceJsonRequestBody(request, log); err != nil {
+			_ = util.SendResponse(500, util.MimeTypeTextPlain, fmt.Sprintf("failed to replace request body: %v", err))
+		}
+	}, log)
+	if err == nil {
+		return types.ActionPause, nil
+	}
+	return types.ActionContinue, err
+}
--- a/plugins/wasm-go/extensions/ai-proxy/provider/provider.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/provider.go
@@ -0,0 +1,199 @@
+package provider
+
+import (
+	"errors"
+	"math/rand"
+
+	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
+	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
+	"github.com/tidwall/gjson"
+)
+
+type ApiName string
+type Pointcut string
+
+const (
+	ApiNameChatCompletion ApiName = "chatCompletion"
+
+	providerTypeMoonshot = "moonshot"
+	providerTypeAzure    = "azure"
+	providerTypeQwen     = "qwen"
+	providerTypeOpenAI   = "openai"
+
+	protocolOpenAI   = "openai"
+	protocolOriginal = "original"
+
+	roleSystem = "system"
+
+	ctxKeyStreamingBody        = "streamingBody"
+	ctxKeyOriginalRequestModel = "originalRequestModel"
+	ctxKeyFinalRequestModel    = "finalRequestModel"
+
+	objectChatCompletion      = "chat.completion"
+	objectChatCompletionChunk = "chat.completion.chunk"
+
+	finishReasonStop = "stop"
+
+	wildcard = "*"
+
+	defaultTimeout = 2 * 60 * 1000 // ms
+)
+
+type providerInitializer interface {
+	ValidateConfig(ProviderConfig) error
+	CreateProvider(ProviderConfig) (Provider, error)
+}
+
+var (
+	errUnsupportedApiName = errors.New("unsupported API name")
+
+	providerInitializers = map[string]providerInitializer{
+		providerTypeMoonshot: &moonshotProviderInitializer{},
+		providerTypeAzure:    &azureProviderInitializer{},
+		providerTypeQwen:     &qwenProviderInitializer{},
+		providerTypeOpenAI:   &openaiProviderInitializer{},
+	}
+)
+
+type Provider interface {
+	GetProviderType() string
+}
+
+type RequestHeadersHandler interface {
+	OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) (types.Action, error)
+}
+
+type RequestBodyHandler interface {
+	OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error)
+}
+
+type ResponseHeadersHandler interface {
+	OnResponseHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) (types.Action, error)
+}
+
+type StreamingResponseBodyHandler interface {
+	OnStreamingResponseBody(ctx wrapper.HttpContext, name ApiName, chunk []byte, isLastChunk bool, log wrapper.Log) ([]byte, error)
+}
+
+type ResponseBodyHandler interface {
+	OnResponseBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error)
+}
+
+type ProviderConfig struct {
+	// @Title zh-CN AI服务提供商
+	// @Description zh-CN AI服务提供商类型，目前支持的取值为："moonshot"
+	typ string `required:"true" yaml:"type" json:"type"`
+	// @Title zh-CN API Tokens
+	// @Description zh-CN 在请求AI服务时用于认证的API Token列表。不同的AI服务提供商可能有不同的名称。部分供应商只支持配置一个API Token（如Azure OpenAI）。
+	apiTokens []string `required:"false" yaml:"apiToken" json:"apiTokens"`
+	// @Title zh-CN 请求超时
+	// @Description zh-CN 请求AI服务的超时时间，单位为毫秒。默认值为120000，即2分钟
+	timeout uint32 `required:"false" yaml:"timeout" json:"timeout"`
+	// @Title zh-CN Moonshot File ID
+	// @Description zh-CN 仅适用于Moonshot AI服务。Moonshot AI服务的文件 ID，其内容用于补充 AI 请求上下文
+	moonshotFileId string `required:"false" yaml:"moonshotFileId" json:"moonshotFileId"`
+	// @Title zh-CN Azure OpenAI Service URL
+	// @Description zh-CN 仅适用于Azure OpenAI服务。要请求的OpenAI服务的完整URL，包含api-version等参数
+	azureServiceUrl string `required:"false" yaml:"azureServiceUrl" json:"azureServiceUrl"`
+	// @Title zh-CN 模型名称映射表
+	// @Description zh-CN 用于将请求中的模型名称映射为目标AI服务商支持的模型名称。支持通过“*”来配置全局映射
+	modelMapping map[string]string `required:"false" yaml:"modelMapping" json:"modelMapping"`
+	// @Title zh-CN 对外接口协议
+	// @Description zh-CN 通过本插件对外提供的AI服务接口协议。默认值为“openai”，即OpenAI的接口协议。如需保留原有接口协议，可配置为“original"
+	protocol string `required:"false" yaml:"protocol" json:"protocol"`
+	// @Title zh-CN 模型对话上下文
+	// @Description zh-CN 配置一个外部获取对话上下文的文件来源，用于在AI请求中补充对话上下文
+	context *ContextConfig `required:"false" yaml:"context" json:"context"`
+}
+
+func (c *ProviderConfig) FromJson(json gjson.Result) {
+	c.typ = json.Get("type").String()
+	c.apiTokens = make([]string, 0)
+	for _, token := range json.Get("apiTokens").Array() {
+		c.apiTokens = append(c.apiTokens, token.String())
+	}
+	c.timeout = uint32(json.Get("timeout").Uint())
+	if c.timeout == 0 {
+		c.timeout = defaultTimeout
+	}
+	c.moonshotFileId = json.Get("moonshotFileId").String()
+	c.azureServiceUrl = json.Get("azureServiceUrl").String()
+	c.modelMapping = make(map[string]string)
+	for k, v := range json.Get("modelMapping").Map() {
+		c.modelMapping[k] = v.String()
+	}
+	c.protocol = json.Get("protocol").String()
+	if c.protocol == "" {
+		c.protocol = protocolOpenAI
+	}
+	contextJson := json.Get("context")
+	if contextJson.Exists() {
+		c.context = &ContextConfig{}
+		c.context.FromJson(contextJson)
+	}
+}
+
+func (c *ProviderConfig) Validate() error {
+	if c.apiTokens == nil || len(c.apiTokens) == 0 {
+		return errors.New("no apiToken found in provider config")
+	}
+	if c.timeout < 0 {
+		return errors.New("invalid timeout in config")
+	}
+	if c.protocol != protocolOpenAI && c.protocol != protocolOriginal {
+		return errors.New("invalid protocol in config")
+	}
+	if c.context != nil {
+		if err := c.context.Validate(); err != nil {
+			return err
+		}
+	}
+
+	if c.typ == "" {
+		return errors.New("missing type in provider config")
+	}
+	initializer, has := providerInitializers[c.typ]
+	if !has {
+		return errors.New("unknown provider type: " + c.typ)
+	}
+	if err := initializer.ValidateConfig(*c); err != nil {
+		return err
+	}
+	return nil
+}
+
+func (c *ProviderConfig) GetRandomToken() string {
+	apiTokens := c.apiTokens
+	count := len(apiTokens)
+	switch count {
+	case 0:
+		return ""
+	case 1:
+		return apiTokens[0]
+	default:
+		return apiTokens[rand.Intn(count)]
+	}
+}
+
+func CreateProvider(pc ProviderConfig) (Provider, error) {
+	initializer, has := providerInitializers[pc.typ]
+	if !has {
+		return nil, errors.New("unknown provider type: " + pc.typ)
+	}
+	return initializer.CreateProvider(pc)
+}
+
+func getMappedModel(model string, modelMapping map[string]string, log wrapper.Log) string {
+	if modelMapping == nil || len(modelMapping) == 0 {
+		return model
+	}
+	if v, ok := modelMapping[model]; ok && len(v) != 0 {
+		log.Debugf("model %s is mapped to %s explictly", model, v)
+		return v
+	}
+	if v, ok := modelMapping[wildcard]; ok {
+		log.Debugf("model %s is mapped to %s via wildcard", model, v)
+		return v
+	}
+	return model
+}
--- a/plugins/wasm-go/extensions/ai-proxy/provider/qwen.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/qwen.go
@@ -0,0 +1,447 @@
+package provider
+
+import (
+	"encoding/json"
+	"errors"
+	"fmt"
+	"math"
+	"strings"
+	"time"
+
+	"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy/util"
+	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
+	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
+	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
+)
+
+// qwenProvider is the provider for Qwen service.
+
+const (
+	qwenResultFormatMessage = "message"
+
+	qwenDomain             = "dashscope.aliyuncs.com"
+	qwenChatCompletionPath = "/api/v1/services/aigc/text-generation/generation"
+
+	qwenTopPMin = 0.000001
+	qwenTopPMax = 0.999999
+
+	ctxKeyPushedMessageContent = "pushedMessageContent"
+
+	streamIdItemKey    = "id:"
+	streamDataItemKey  = "data:"
+	streamEndDataValue = "[DONE]"
+	streamEventHeader  = "event: result\n:HTTP_STATUS/200\n"
+)
+
+type qwenProviderInitializer struct {
+}
+
+func (m *qwenProviderInitializer) ValidateConfig(config ProviderConfig) error {
+	return nil
+}
+
+func (m *qwenProviderInitializer) CreateProvider(config ProviderConfig) (Provider, error) {
+	return &qwenProvider{
+		config:       config,
+		contextCache: createContextCache(&config),
+	}, nil
+}
+
+type qwenProvider struct {
+	config ProviderConfig
+
+	contextCache *contextCache
+}
+
+func (m *qwenProvider) GetProviderType() string {
+	return providerTypeQwen
+}
+
+const (
+	forceStreaming = true
+)
+
+func (m *qwenProvider) OnRequestHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) (types.Action, error) {
+	if apiName != ApiNameChatCompletion {
+		return types.ActionContinue, errUnsupportedApiName
+	}
+	_ = util.OverwriteRequestPath(qwenChatCompletionPath)
+	_ = util.OverwriteRequestHost(qwenDomain)
+	_ = proxywasm.ReplaceHttpRequestHeader("Authorization", "Bearer "+m.config.GetRandomToken())
+
+	if m.config.protocol == protocolOriginal && m.config.context == nil {
+		ctx.DontReadRequestBody()
+		return types.ActionContinue, nil
+	}
+
+	if forceStreaming {
+		_ = proxywasm.RemoveHttpRequestHeader("Accept-Encoding")
+		_ = proxywasm.RemoveHttpRequestHeader("Content-Length")
+
+		_ = proxywasm.ReplaceHttpRequestHeader("Accept", "text/event-stream")
+		_ = proxywasm.ReplaceHttpRequestHeader("X-DashScope-SSE", "enable")
+		return types.ActionContinue, nil
+	} else {
+		// Delay the header processing to allow changing streaming mode in OnRequestBody
+		return types.HeaderStopIteration, nil
+	}
+}
+
+func (m *qwenProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
+	if apiName != ApiNameChatCompletion {
+		return types.ActionContinue, errUnsupportedApiName
+	}
+
+	if m.config.protocol == protocolOriginal {
+		if m.config.context == nil {
+			return types.ActionContinue, nil
+		}
+
+		request := &qwenTextGenRequest{}
+		if err := json.Unmarshal(body, request); err != nil {
+			return types.ActionContinue, fmt.Errorf("unable to unmarshal request: %v", err)
+		}
+
+		err := m.contextCache.GetContent(func(content string, err error) {
+			defer func() {
+				_ = proxywasm.ResumeHttpRequest()
+			}()
+
+			if err != nil {
+				log.Errorf("failed to load context file: %v", err)
+				_ = util.SendResponse(500, util.MimeTypeTextPlain, fmt.Sprintf("failed to load context file: %v", err))
+			}
+			m.insertContextMessage(request, content)
+			if err := replaceJsonRequestBody(request, log); err != nil {
+				_ = util.SendResponse(500, util.MimeTypeTextPlain, fmt.Sprintf("failed to replace request body: %v", err))
+			}
+		}, log)
+		if err == nil {
+			return types.ActionPause, nil
+		}
+		return types.ActionContinue, err
+	}
+
+	request := &chatCompletionRequest{}
+	if err := decodeChatCompletionRequest(body, request); err != nil {
+		return types.ActionContinue, err
+	}
+
+	model := request.Model
+	if model == "" {
+		return types.ActionContinue, errors.New("missing model in chat completion request")
+	}
+	ctx.SetContext(ctxKeyOriginalRequestModel, model)
+	mappedModel := getMappedModel(model, m.config.modelMapping, log)
+	if mappedModel == "" {
+		return types.ActionContinue, errors.New("model becomes empty after applying the configured mapping")
+	}
+	request.Model = mappedModel
+	ctx.SetContext(ctxKeyFinalRequestModel, request.Model)
+
+	if !forceStreaming {
+		if request.Stream {
+			_ = proxywasm.ReplaceHttpRequestHeader("Accept", "text/event-stream")
+			_ = proxywasm.ReplaceHttpRequestHeader("X-DashScope-SSE", "enable")
+		} else {
+			_ = proxywasm.ReplaceHttpRequestHeader("Accept", "*/*")
+			_ = proxywasm.RemoveHttpRequestHeader("X-DashScope-SSE")
+		}
+	}
+
+	if m.config.context == nil {
+		qwenRequest := m.buildQwenTextGenerationRequest(request)
+		return types.ActionContinue, replaceJsonRequestBody(qwenRequest, log)
+	}
+
+	err := m.contextCache.GetContent(func(content string, err error) {
+		defer func() {
+			_ = proxywasm.ResumeHttpRequest()
+		}()
+		if err != nil {
+			log.Errorf("failed to load context file: %v", err)
+			_ = util.SendResponse(500, util.MimeTypeTextPlain, fmt.Sprintf("failed to load context file: %v", err))
+		}
+		insertContextMessage(request, content)
+		qwenRequest := m.buildQwenTextGenerationRequest(request)
+		if err := replaceJsonRequestBody(qwenRequest, log); err != nil {
+			_ = util.SendResponse(500, util.MimeTypeTextPlain, fmt.Sprintf("failed to replace request body: %v", err))
+		}
+	}, log)
+	if err == nil {
+		return types.ActionPause, nil
+	}
+	return types.ActionContinue, err
+}
+
+func (m *qwenProvider) OnResponseHeaders(ctx wrapper.HttpContext, apiName ApiName, log wrapper.Log) (types.Action, error) {
+	if m.config.protocol == protocolOriginal {
+		ctx.DontReadResponseBody()
+		return types.ActionContinue, nil
+	}
+
+	_ = proxywasm.RemoveHttpResponseHeader("Content-Length")
+	return types.ActionContinue, nil
+}
+
+func (m *qwenProvider) OnStreamingResponseBody(ctx wrapper.HttpContext, name ApiName, chunk []byte, isLastChunk bool, log wrapper.Log) ([]byte, error) {
+	receivedBody := chunk
+	if bufferedStreamingBody, has := ctx.GetContext(ctxKeyStreamingBody).([]byte); has {
+		receivedBody = append(bufferedStreamingBody, chunk...)
+	}
+
+	eventStartIndex, lineStartIndex, valueStartIndex := 0, -1, -1
+
+	defer func() {
+		if eventStartIndex != -1 {
+			// Just in case the received chunk is not a complete event.
+			ctx.SetContext(ctxKeyStreamingBody, receivedBody[eventStartIndex:])
+		} else {
+			ctx.SetContext(ctxKeyStreamingBody, nil)
+		}
+	}()
+
+	// Sample event response:
+
+	var responseBuilder strings.Builder
+	currentEventId, currentKey := "", ""
+	i, length := 0, len(receivedBody)
+	for i = 0; i < length; i++ {
+		ch := receivedBody[i]
+		if ch != '\n' {
+			if lineStartIndex == -1 {
+				lineStartIndex = i
+				valueStartIndex = -1
+			}
+			if valueStartIndex == -1 {
+				if ch == ':' {
+					valueStartIndex = i + 1
+					currentKey = string(receivedBody[lineStartIndex:valueStartIndex])
+				}
+			} else if valueStartIndex == i && ch == ' ' {
+				// Skip leading spaces in data.
+				valueStartIndex = i + 1
+			}
+			continue
+		}
+
+		if lineStartIndex == -1 {
+			// Extra new line, Should be an event separator.
+			eventStartIndex = i + 1
+			continue
+		}
+
+		key := currentKey
+		value := receivedBody[valueStartIndex:i]
+
+		// Reset message parsing state.
+		eventStartIndex = -1
+		lineStartIndex = -1
+		valueStartIndex = -1
+		currentKey = ""
+
+		switch key {
+		case streamIdItemKey:
+			currentEventId = string(value)
+			break
+		case streamDataItemKey:
+			if err := m.convertStreamEvent(ctx, &responseBuilder, currentEventId, value, log); err != nil {
+				return nil, err
+			}
+			break
+		default:
+			break
+		}
+	}
+
+	modifiedResponseChunk := responseBuilder.String()
+	log.Debugf("=== modified response chunk: %s", modifiedResponseChunk)
+	return []byte(modifiedResponseChunk), nil
+}
+
+func (m *qwenProvider) OnResponseBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) (types.Action, error) {
+	qwenResponse := &qwenTextGenResponse{}
+	if err := json.Unmarshal(body, qwenResponse); err != nil {
+		return types.ActionContinue, fmt.Errorf("unable to unmarshal Qwen response: %v", err)
+	}
+	response := m.buildChatCompletionResponse(ctx, qwenResponse)
+	return types.ActionContinue, replaceJsonResponseBody(response, log)
+}
+
+func (m *qwenProvider) buildQwenTextGenerationRequest(origRequest *chatCompletionRequest) *qwenTextGenRequest {
+	return &qwenTextGenRequest{
+		Model: origRequest.Model,
+		Input: qwenTextGenInput{
+			Messages: origRequest.Messages,
+		},
+		Parameters: qwenTextGenParameters{
+			ResultFormat: qwenResultFormatMessage,
+			MaxTokens:    origRequest.MaxTokens,
+			N:            origRequest.N,
+			Seed:         origRequest.Seed,
+			Temperature:  origRequest.Temperature,
+			TopP:         math.Max(qwenTopPMin, math.Min(origRequest.TopP, qwenTopPMax)),
+		},
+	}
+}
+
+func (m *qwenProvider) buildChatCompletionResponse(ctx wrapper.HttpContext, qwenResponse *qwenTextGenResponse) *chatCompletionResponse {
+	choices := make([]chatCompletionChoice, 0, len(qwenResponse.Output.Choices))
+	for _, qwenChoice := range qwenResponse.Output.Choices {
+		choices = append(choices, chatCompletionChoice{
+			Message:      &qwenChoice.Message,
+			FinishReason: qwenChoice.FinishReason,
+		})
+	}
+	return &chatCompletionResponse{
+		Id:                qwenResponse.RequestId,
+		Created:           time.Now().UnixMilli() / 1000,
+		Model:             ctx.GetContext(ctxKeyFinalRequestModel).(string),
+		SystemFingerprint: "",
+		Object:            objectChatCompletion,
+		Choices:           choices,
+		Usage: chatCompletionUsage{
+			PromptTokens:     qwenResponse.Usage.InputTokens,
+			CompletionTokens: qwenResponse.Usage.OutputTokens,
+			TotalTokens:      qwenResponse.Usage.TotalTokens,
+		},
+	}
+}
+
+func (m *qwenProvider) buildChatCompletionStreamingResponse(ctx wrapper.HttpContext, qwenResponse *qwenTextGenResponse) []*chatCompletionResponse {
+	baseMessage := chatCompletionResponse{
+		Id:                qwenResponse.RequestId,
+		Created:           time.Now().UnixMilli() / 1000,
+		Model:             ctx.GetContext(ctxKeyFinalRequestModel).(string),
+		SystemFingerprint: "",
+		Object:            objectChatCompletionChunk,
+	}
+
+	responses := make([]*chatCompletionResponse, 0)
+
+	qwenChoice := qwenResponse.Output.Choices[0]
+	message := qwenChoice.Message
+
+	content := message.Content
+	if rawPushedContent := ctx.GetContext(ctxKeyPushedMessageContent); rawPushedContent != nil {
+		if pushedContent := rawPushedContent.(string); pushedContent != "" && strings.HasPrefix(content, pushedContent) {
+			content = content[len(pushedContent):]
+		}
+	}
+	if content != "" {
+		deltaResponse := *&baseMessage
+		deltaResponse.Choices = append(deltaResponse.Choices, chatCompletionChoice{Delta: &chatMessage{Role: message.Role, Content: content}})
+		responses = append(responses, &deltaResponse)
+		ctx.SetContext(ctxKeyPushedMessageContent, message.Content)
+	}
+
+	// Yes, Qwen uses a string "null" as null.
+	if qwenChoice.FinishReason != "" && qwenChoice.FinishReason != "null" {
+		finishResponse := *&baseMessage
+		finishResponse.Choices = append(finishResponse.Choices, chatCompletionChoice{FinishReason: qwenChoice.FinishReason})
+		responses = append(responses, &finishResponse)
+	}
+
+	return responses
+}
+
+func (m *qwenProvider) convertStreamEvent(ctx wrapper.HttpContext, responseBuilder *strings.Builder, eventId string, eventData []byte, log wrapper.Log) error {
+	if string(eventData) == streamEndDataValue {
+		responseBuilder.WriteString(streamIdItemKey)
+		responseBuilder.WriteString(eventId)
+		responseBuilder.WriteString("\n")
+		responseBuilder.WriteString(streamEventHeader)
+		responseBuilder.WriteString(streamDataItemKey)
+		responseBuilder.WriteString(streamEndDataValue)
+		responseBuilder.WriteString("\n\n")
+		return nil
+	}
+
+	qwenResponse := &qwenTextGenResponse{}
+	if err := json.Unmarshal(eventData, qwenResponse); err != nil {
+		log.Errorf("unable to unmarshal Qwen response: %v", err)
+		return fmt.Errorf("unable to unmarshal Qwen response: %v", err)
+	}
+
+	responses := m.buildChatCompletionStreamingResponse(ctx, qwenResponse)
+	for _, response := range responses {
+		responseBody, err := json.Marshal(response)
+		if err != nil {
+			log.Errorf("unable to marshal response: %v", err)
+			return fmt.Errorf("unable to marshal response: %v", err)
+		}
+		responseBuilder.WriteString(streamIdItemKey)
+		responseBuilder.WriteString(eventId)
+		responseBuilder.WriteString("\n")
+		responseBuilder.WriteString(streamEventHeader)
+		responseBuilder.WriteString(streamDataItemKey)
+		responseBuilder.Write(responseBody)
+		responseBuilder.WriteString("\n\n")
+	}
+
+	return nil
+}
+
+func (m *qwenProvider) insertContextMessage(request *qwenTextGenRequest, content string) {
+	fileMessage := chatMessage{
+		Role:    roleSystem,
+		Content: content,
+	}
+	firstNonSystemMessageIndex := -1
+	messages := request.Input.Messages
+	if messages != nil {
+		for i, message := range request.Input.Messages {
+			if message.Role != roleSystem {
+				firstNonSystemMessageIndex = i
+				break
+			}
+		}
+	}
+	if firstNonSystemMessageIndex == -1 {
+		request.Input.Messages = append(request.Input.Messages, fileMessage)
+	} else {
+		request.Input.Messages = append(request.Input.Messages[:firstNonSystemMessageIndex], append([]chatMessage{fileMessage}, request.Input.Messages[firstNonSystemMessageIndex:]...)...)
+	}
+}
+
+type qwenTextGenRequest struct {
+	Model      string                `json:"model"`
+	Input      qwenTextGenInput      `json:"input"`
+	Parameters qwenTextGenParameters `json:"parameters,omitempty"`
+}
+
+type qwenTextGenInput struct {
+	Messages []chatMessage `json:"messages"`
+}
+
+type qwenTextGenParameters struct {
+	ResultFormat      string  `json:"result_format,omitempty"`
+	MaxTokens         int     `json:"max_tokens,omitempty"`
+	RepetitionPenalty float64 `json:"repetition_penalty,omitempty"`
+	N                 int     `json:"n,omitempty"`
+	Seed              int     `json:"seed,omitempty"`
+	Temperature       float64 `json:"temperature,omitempty"`
+	TopP              float64 `json:"top_p,omitempty"`
+}
+
+type qwenTextGenResponse struct {
+	RequestId string            `json:"request_id"`
+	Output    qwenTextGenOutput `json:"output"`
+	Usage     qwenTextGenUsage  `json:"usage"`
+}
+
+type qwenTextGenOutput struct {
+	FinishReason string              `json:"finish_reason"`
+	Choices      []qwenTextGenChoice `json:"choices"`
+}
+
+type qwenTextGenChoice struct {
+	FinishReason string      `json:"finish_reason"`
+	Message      chatMessage `json:"message"`
+}
+
+type qwenTextGenUsage struct {
+	InputTokens  int `json:"input_tokens"`
+	OutputTokens int `json:"output_tokens"`
+	TotalTokens  int `json:"total_tokens"`
+}
--- a/plugins/wasm-go/extensions/ai-proxy/provider/request_helper.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/request_helper.go
@@ -0,0 +1,65 @@
+package provider
+
+import (
+	"encoding/json"
+	"errors"
+	"fmt"
+
+	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
+	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
+)
+
+func decodeChatCompletionRequest(body []byte, request *chatCompletionRequest) error {
+	if err := json.Unmarshal(body, request); err != nil {
+		return fmt.Errorf("unable to unmarshal request: %v", err)
+	}
+	if request.Messages == nil || len(request.Messages) == 0 {
+		return errors.New("no message found in the request body")
+	}
+	return nil
+}
+
+func replaceJsonRequestBody(request interface{}, log wrapper.Log) error {
+	body, err := json.Marshal(request)
+	if err != nil {
+		return fmt.Errorf("unable to marshal request: %v", err)
+	}
+	log.Debugf("request body: %s", string(body))
+	err = proxywasm.ReplaceHttpRequestBody(body)
+	if err != nil {
+		return fmt.Errorf("unable to replace the original request body: %v", err)
+	}
+	return err
+}
+
+func insertContextMessage(request *chatCompletionRequest, content string) {
+	fileMessage := chatMessage{
+		Role:    roleSystem,
+		Content: content,
+	}
+	firstNonSystemMessageIndex := -1
+	for i, message := range request.Messages {
+		if message.Role != roleSystem {
+			firstNonSystemMessageIndex = i
+			break
+		}
+	}
+	if firstNonSystemMessageIndex == -1 {
+		request.Messages = append(request.Messages, fileMessage)
+	} else {
+		request.Messages = append(request.Messages[:firstNonSystemMessageIndex], append([]chatMessage{fileMessage}, request.Messages[firstNonSystemMessageIndex:]...)...)
+	}
+}
+
+func replaceJsonResponseBody(response interface{}, log wrapper.Log) error {
+	body, err := json.Marshal(response)
+	if err != nil {
+		return fmt.Errorf("unable to marshal response: %v", err)
+	}
+	log.Debugf("response body: %s", string(body))
+	err = proxywasm.ReplaceHttpResponseBody(body)
+	if err != nil {
+		return fmt.Errorf("unable to replace the original response body: %v", err)
+	}
+	return err
+}
--- a/plugins/wasm-go/extensions/ai-proxy/util/http.go
+++ b/plugins/wasm-go/extensions/ai-proxy/util/http.go
@@ -0,0 +1,36 @@
+package util
+
+import "github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
+
+const (
+	HeaderContentType = "Content-Type"
+
+	MimeTypeTextPlain       = "text/plain"
+	MimeTypeApplicationJson = "application/json"
+)
+
+func SendResponse(statusCode uint32, contentType, body string) error {
+	return proxywasm.SendHttpResponse(statusCode, CreateHeaders(HeaderContentType, contentType), []byte(body), -1)
+}
+
+func CreateHeaders(kvs ...string) [][2]string {
+	headers := make([][2]string, 0, len(kvs)/2)
+	for i := 0; i < len(kvs); i += 2 {
+		headers = append(headers, [2]string{kvs[i], kvs[i+1]})
+	}
+	return headers
+}
+
+func OverwriteRequestHost(host string) error {
+	if originHost, err := proxywasm.GetHttpRequestHeader(":authority"); err == nil {
+		_ = proxywasm.ReplaceHttpRequestHeader("X-ENVOY-ORIGINAL-HOST", originHost)
+	}
+	return proxywasm.ReplaceHttpRequestHeader(":authority", host)
+}
+
+func OverwriteRequestPath(path string) error {
+	if originPath, err := proxywasm.GetHttpRequestHeader(":path"); err == nil {
+		_ = proxywasm.ReplaceHttpRequestHeader("X-ENVOY-ORIGINAL-PATH", originPath)
+	}
+	return proxywasm.ReplaceHttpRequestHeader(":path", path)
+}
--- a/plugins/wasm-go/extensions/ai-proxy/util/json.go
+++ b/plugins/wasm-go/extensions/ai-proxy/util/json.go
@@ -0,0 +1,22 @@
+package util
+
+import (
+	"strconv"
+	"strings"
+)
+
+func EscapeStringForJson(s string) string {
+	var builder strings.Builder
+	for _, c := range s { //iterate through rune
+		switch c {
+		case '"':
+			builder.WriteRune('\\')
+			builder.WriteRune(c)
+			break
+		default:
+			quoted := strconv.QuoteRune(c)
+			builder.WriteString(quoted[1 : len(quoted)-1])
+		}
+	}
+	return builder.String()
+}
--- a/plugins/wasm-go/extensions/ai-proxy/util/json_test.go
+++ b/plugins/wasm-go/extensions/ai-proxy/util/json_test.go
@@ -0,0 +1,28 @@
+package util
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func TestEscapeForJsonString(t *testing.T) {
+	var tests = []struct {
+		input, output string
+	}{
+		{"hello", "hello"},
+		{"hello\"world", "hello\\\"world"},
+		{"h\be\vl\tlo\rworld\n", "h\\be\\vl\\tlo\\rworld\\n"},
+	}
+
+	for _, tt := range tests {
+		// t.Run enables running "subtests", one for each
+		// table entry. These are shown separately
+		// when executing `go test -v`.
+		testName := tt.input
+		t.Run(testName, func(t *testing.T) {
+			output := EscapeStringForJson(tt.input)
+			assert.Equal(t, tt.output, output)
+		})
+	}
+}