diff --git a/plugins/wasm-go/extensions/ai-image-reader/README.md b/plugins/wasm-go/extensions/ai-image-reader/README.md new file mode 100644 index 000000000..1c74d5188 --- /dev/null +++ b/plugins/wasm-go/extensions/ai-image-reader/README.md @@ -0,0 +1,98 @@ +--- +title: AI IMAGE READER +keywords: [ AI网关, AI IMAGE READER ] +description: AI IMAGE READER 插件配置参考 + +--- + +## 功能说明 + +通过对接OCR服务实现AI-IMAGE-READER,目前支持阿里云模型服务灵积(dashscope)的qwen-vl-ocr模型提供OCR服务,流程如图所示: + + + +## 运行属性 + +插件执行阶段:`默认阶段` +插件执行优先级:`400` + + +## 配置说明 + +| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 | +| ------------- | -------- | -------- | ------ | -------------------------------------- | +| `apiKey` | string | 必填 | - | 用于在访问OCR服务时进行认证的令牌。 | +| `type` | string | 必填 | - | 后端OCR服务提供商类型(例如dashscope) | +| `serviceHost` | string | 必填 | - | 后端OCR服务域名 | +| `serviceName` | string | 必填 | - | 后端OCR服务名 | +| `servicePort` | int | 必填 | - | 后端OCR服务端口 | +| `model` | string | 必填 | - | 后端OCR服务模型名称(例如qwen-vl-ocr) | +| `timeout` | int | 选填 | 10000 | API调用超时时间(毫秒) | + +## 示例 + +```yaml +"apiKey": "YOUR_API_KEY", +"type": "dashscope", +"model": "qwen-vl-ocr", +"timeout": 10000, +"serviceHost": "dashscope.aliyuncs.com", +"serviceName": "dashscope", +"servicePort": "443" +``` + +请求遵循openai api协议规范: + +URL传递图片: + +``` +messages=[{ + "role": "user", + "content": [ + {"type": "text", "text": "What's in this image?"}, + { + "type": "image_url", + "image_url": { + "url": "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20241108/ctdzex/biaozhun.jpg", + }, + }, + ], +}], +``` + +Base64编码传递图片: + +``` +messages=[ + { + "role": "user", + "content": [ + { "type": "text", "text": "what's in this image?" }, + { + "type": "image_url", + "image_url": { + "url": f"data:image/jpeg;base64,{base64_image}", + }, + }, + ], + } +], +``` + +以下为使用ai-image-reader进行增强的例子,原始请求为: + +``` +图片内容是什么? +``` + +未经过ai-image-reader插件处理LLM返回的结果为: + +``` +对不起,作为一个文本AI助手,我无法查看图片内容。您可以描述一下图片的内容,我可以尽力帮助您识别。 +``` + +经过ai-image-reader插件处理后LLM返回的结果为: + +``` +非常感谢您分享的图片内容!根据您提供的文字信息,学习编写shell脚本对Linux系统管理员来说是非常有益的。通过自动化系统管理任务,可以提高效率并减少手动操作的时间。对于家用Linux爱好者来说,了解如何在命令行下操作也是很重要的,因为在某些情况下,命令行操作可能更为便捷和高效。在本书中,您将学习如何运用shell脚本处理系统管理任务,以及如何在Linux命令行下进行操作。希望这本书能够帮助您更好地理解和应用Linux系统管理和操作的知识!如果您有任何其他问题或需要进一步帮助,请随时告诉我。 +``` \ No newline at end of file diff --git a/plugins/wasm-go/extensions/ai-image-reader/README_EN.md b/plugins/wasm-go/extensions/ai-image-reader/README_EN.md new file mode 100644 index 000000000..953b73f1c --- /dev/null +++ b/plugins/wasm-go/extensions/ai-image-reader/README_EN.md @@ -0,0 +1,94 @@ +--- +title: AI IMAGE READER +keywords: [ AI GATEWAY, AI IMAGE READER ] +description: AI IMAGE READER Plugin Configuration Reference +--- + +## Function Description + +By integrating with OCR services to implement AI-IMAGE-READER, currently, it supports Alibaba Cloud's qwen-vl-ocr model under Dashscope for OCR services, and the process is shown in the figure below: + +## Running Attributes + +Plugin execution phase:`Default Phase` +Plugin execution priority:`400` + +## Configuration Description + +| Name | Data Type | Requirement | Default Value | Description | +| ------------- | --------- | ----------- | ------------- | ------------------------------------------------------------ | +| `apiKey` | string | Required | - | Token for authenticating access to OCR services. | +| `type` | string | Required | - | Provider type of the backend OCR service type(e.g. dashscope). | +| `serviceHost` | string | Required | - | Host of the backend OCR service. | +| `serviceName` | string | Required | - | Name of the backend OCR service. | +| `servicePort` | int | Required | - | Port of the backend OCR service. | +| `model` | string | Required | - | Model name of the backend OCR service (e.g., qwen-vl-ocr). | +| `timeout` | int | Required | 10000 | API call timeout duration (milliseconds). | + +## Example + +```yaml +"apiKey": "YOUR_API_KEY", +"type": "dashscope", +"model": "qwen-vl-ocr", +"timeout": 10000, +"serviceHost": "dashscope.aliyuncs.com", +"serviceName": "dashscope", +"servicePort": "443" +``` + +Request to follow the OpenAI API protocol specifications: + +Pass images via URL: + +``` +messages=[{ + "role": "user", + "content": [ + {"type": "text", "text": "What's in this image?"}, + { + "type": "image_url", + "image_url": { + "url": "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20241108/ctdzex/biaozhun.jpg", + }, + }, + ], +}], +``` + +Pass images via Base64: + +``` +messages=[ + { + "role": "user", + "content": [ + { "type": "text", "text": "what's in this image?" }, + { + "type": "image_url", + "image_url": { + "url": f"data:image/jpeg;base64,{base64_image}", + }, + }, + ], + } +], +``` + +The following is an example of using ai-image-reader for enhancement. The original request was: + +``` +What is the content of the image? +``` + +The result returned by the LLM without processing from the ai-image-reader plugin is: + +``` +Sorry, as a text-based AI assistant, I cannot view image content. You can describe the content of the image, and I will do my best to help you identify it. +``` + +The result returned by the LLM after processing by the ai-image-reader plugin is: + +``` +Thank you for sharing the image! Mastering shell scripting is highly beneficial for Linux system administrators as it automates tasks, boosts efficiency, and cuts down manual work. For home Linux users, command-line skills are equally important for quick and efficient operations. This book will teach you to handle system management tasks with shell scripts and operate in the Linux command line. Hope it aids your Linux system management learning! Feel free to ask if you have more questions. +``` \ No newline at end of file diff --git a/plugins/wasm-go/extensions/ai-image-reader/ai-image-reader-en.png b/plugins/wasm-go/extensions/ai-image-reader/ai-image-reader-en.png new file mode 100644 index 000000000..495daee80 Binary files /dev/null and b/plugins/wasm-go/extensions/ai-image-reader/ai-image-reader-en.png differ diff --git a/plugins/wasm-go/extensions/ai-image-reader/ai-image-reader.png b/plugins/wasm-go/extensions/ai-image-reader/ai-image-reader.png new file mode 100644 index 000000000..dc234f993 Binary files /dev/null and b/plugins/wasm-go/extensions/ai-image-reader/ai-image-reader.png differ diff --git a/plugins/wasm-go/extensions/ai-image-reader/dashscope.go b/plugins/wasm-go/extensions/ai-image-reader/dashscope.go new file mode 100644 index 000000000..90c4d8070 --- /dev/null +++ b/plugins/wasm-go/extensions/ai-image-reader/dashscope.go @@ -0,0 +1,177 @@ +package main + +import ( + "encoding/json" + "errors" + "fmt" + "github.com/alibaba/higress/plugins/wasm-go/pkg/log" + "github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper" + "github.com/tidwall/gjson" + "net/http" + "strconv" +) + +const ( + DashscopeDomain = "dashscope.aliyuncs.com" + DashscopePort = 443 + DashscopeDefaultModelName = "qwen-vl-ocr" + DashscopeEndpoint = "/compatible-mode/v1/chat/completions" + MinPixels = 3136 + MaxPixels = 1003520 +) + +type OcrReq struct { + Model string `json:"model,omitempty"` + Messages []chatMessage `json:"messages,omitempty"` +} + +type OcrResp struct { + Choices []chatCompletionChoice `json:"choices"` +} + +type chatCompletionChoice struct { + Message *chatMessageContent `json:"message,omitempty"` +} + +type chatMessageContent struct { + Role string `json:"role,omitempty"` + Content string `json:"content,omitempty"` +} + +type chatMessage struct { + Role string `json:"role"` + Content []content `json:"content"` +} + +type imageURL struct { + URL string `json:"url"` +} + +type content struct { + Type string `json:"type"` + ImageUrl imageURL `json:"image_url,omitempty"` + MinPixels int `json:"min_pixels,omitempty"` + MaxPixels int `json:"max_pixels,omitempty"` + Text string `json:"text,omitempty"` +} + +var dashScopeConfig dashScopeProviderConfig + +type dashScopeProviderInitializer struct { +} + +func (d *dashScopeProviderInitializer) InitConfig(json gjson.Result) { + dashScopeConfig.apiKey = json.Get("apiKey").String() +} + +func (d *dashScopeProviderInitializer) ValidateConfig() error { + if dashScopeConfig.apiKey == "" { + return errors.New("[DashScope] apiKey is required") + } + return nil +} + +func (d *dashScopeProviderInitializer) CreateProvider(c ProviderConfig) (Provider, error) { + if c.servicePort == 0 { + c.servicePort = DashscopePort + } + if c.serviceHost == "" { + c.serviceHost = DashscopeDomain + } + return &DSProvider{ + config: c, + client: wrapper.NewClusterClient(wrapper.FQDNCluster{ + FQDN: c.serviceName, + Host: c.serviceHost, + Port: int64(c.servicePort), + }), + }, nil +} + +type dashScopeProviderConfig struct { + // @Title zh-CN 文字识别服务 API Key + // @Description zh-CN 文字识别服务 API Key + apiKey string +} + +type DSProvider struct { + config ProviderConfig + client wrapper.HttpClient +} + +func (d *DSProvider) GetProviderType() string { + return ProviderTypeDashscope +} + +func (d *DSProvider) CallArgs(imageUrl string) CallArgs { + model := d.config.model + if model == "" { + model = DashscopeDefaultModelName + } + reqBody := OcrReq{ + Model: model, + Messages: []chatMessage{ + { + Role: "user", + Content: []content{ + { + Type: "image_url", + ImageUrl: imageURL{ + URL: imageUrl, + }, + MinPixels: MinPixels, + MaxPixels: MaxPixels, + }, + }, + }, + }, + } + body, _ := json.Marshal(reqBody) + return CallArgs{ + Method: http.MethodPost, + Url: DashscopeEndpoint, + Headers: [][2]string{ + {"Content-Type", "application/json"}, + {"Authorization", fmt.Sprintf("Bearer %s", dashScopeConfig.apiKey)}, + }, + Body: body, + TimeoutMillisecond: d.config.timeout, + } +} + +func (d *DSProvider) parseOcrResponse(responseBody []byte) (*OcrResp, error) { + var resp OcrResp + err := json.Unmarshal(responseBody, &resp) + if err != nil { + return nil, err + } + return &resp, nil +} + +func (d *DSProvider) DoOCR( + imageUrl string, + callback func(imageContent string, err error)) error { + args := d.CallArgs(imageUrl) + err := d.client.Call(args.Method, args.Url, args.Headers, args.Body, + func(statusCode int, responseHeaders http.Header, responseBody []byte) { + if statusCode != http.StatusOK { + err := errors.New("failed to do ocr due to status code: " + strconv.Itoa(statusCode)) + callback("", err) + return + } + log.Debugf("do ocr response: %d, %s", statusCode, responseBody) + resp, err := d.parseOcrResponse(responseBody) + if err != nil { + err = fmt.Errorf("failed to parse response: %v", err) + callback("", err) + return + } + if len(resp.Choices) == 0 { + err = errors.New("no ocr response found") + callback("", err) + return + } + callback(resp.Choices[0].Message.Content, nil) + }, args.TimeoutMillisecond) + return err +} diff --git a/plugins/wasm-go/extensions/ai-image-reader/go.mod b/plugins/wasm-go/extensions/ai-image-reader/go.mod new file mode 100644 index 000000000..7f50ccfcd --- /dev/null +++ b/plugins/wasm-go/extensions/ai-image-reader/go.mod @@ -0,0 +1,19 @@ +module ai-image-reader + +go 1.19 + +require ( + github.com/alibaba/higress/plugins/wasm-go v1.4.4-0.20250621002302-e94ac43dd15c + github.com/tidwall/gjson v1.18.0 +) + +require ( + github.com/google/uuid v1.3.0 // indirect + github.com/higress-group/nottinygc v0.0.0-20231101025119-e93c4c2f8520 // indirect + github.com/higress-group/proxy-wasm-go-sdk v1.0.1 // indirect + github.com/magefile/mage v1.14.0 // indirect + github.com/tidwall/match v1.1.1 // indirect + github.com/tidwall/pretty v1.2.0 // indirect + github.com/tidwall/resp v0.1.1 // indirect + github.com/tidwall/sjson v1.2.5 // indirect +) diff --git a/plugins/wasm-go/extensions/ai-image-reader/go.sum b/plugins/wasm-go/extensions/ai-image-reader/go.sum new file mode 100644 index 000000000..54a7eb886 --- /dev/null +++ b/plugins/wasm-go/extensions/ai-image-reader/go.sum @@ -0,0 +1,25 @@ +github.com/alibaba/higress/plugins/wasm-go v1.4.4-0.20250621002302-e94ac43dd15c h1:YGKECMrlahN6dyEaM/S5NEU4IJoFzWKsHQyawov6ep8= +github.com/alibaba/higress/plugins/wasm-go v1.4.4-0.20250621002302-e94ac43dd15c/go.mod h1:E2xVWrIovU3rZi4HGlMfcYf+c/UVh3aCtpcJlNjpxYc= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I= +github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/higress-group/nottinygc v0.0.0-20231101025119-e93c4c2f8520 h1:IHDghbGQ2DTIXHBHxWfqCYQW1fKjyJ/I7W1pMyUDeEA= +github.com/higress-group/nottinygc v0.0.0-20231101025119-e93c4c2f8520/go.mod h1:Nz8ORLaFiLWotg6GeKlJMhv8cci8mM43uEnLA5t8iew= +github.com/higress-group/proxy-wasm-go-sdk v1.0.1 h1:f9X4I5Y6jK3GrdsWn/lCTI1z5Lu5GOMazqQohAC3Vzk= +github.com/higress-group/proxy-wasm-go-sdk v1.0.1/go.mod h1:iiSyFbo+rAtbtGt/bsefv8GU57h9CCLYGJA74/tF5/0= +github.com/magefile/mage v1.14.0 h1:6QDX3g6z1YvJ4olPhT1wksUcSa/V0a1B+pJb73fBjyo= +github.com/magefile/mage v1.14.0/go.mod h1:z5UZb/iS3GoOSn0JgWuiw7dxlurVYTu+/jHXqQg881A= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= +github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= +github.com/tidwall/gjson v1.18.0 h1:FIDeeyB800efLX89e5a8Y0BNH+LOngJyGrIWxG2FKQY= +github.com/tidwall/gjson v1.18.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= +github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA= +github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM= +github.com/tidwall/pretty v1.2.0 h1:RWIZEg2iJ8/g6fDDYzMpobmaoGh5OLl4AXtGUGPcqCs= +github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= +github.com/tidwall/resp v0.1.1 h1:Ly20wkhqKTmDUPlyM1S7pWo5kk0tDu8OoC/vFArXmwE= +github.com/tidwall/resp v0.1.1/go.mod h1:3/FrruOBAxPTPtundW0VXgmsQ4ZBA0Aw714lVYgwFa0= +github.com/tidwall/sjson v1.2.5 h1:kLy8mja+1c9jlljvWTlSazM7cKDRfJuR/bOJhcY5NcY= +github.com/tidwall/sjson v1.2.5/go.mod h1:Fvgq9kS/6ociJEDnK0Fk1cpYF4FIW6ZF7LAe+6jwd28= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= diff --git a/plugins/wasm-go/extensions/ai-image-reader/main.go b/plugins/wasm-go/extensions/ai-image-reader/main.go new file mode 100644 index 000000000..13c2d22fd --- /dev/null +++ b/plugins/wasm-go/extensions/ai-image-reader/main.go @@ -0,0 +1,139 @@ +package main + +import ( + "errors" + "fmt" + "github.com/alibaba/higress/plugins/wasm-go/pkg/log" + "github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper" + "github.com/higress-group/proxy-wasm-go-sdk/proxywasm" + "github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types" + "github.com/tidwall/gjson" + "github.com/tidwall/sjson" + "strings" +) + +const ( + DefaultMaxBodyBytes uint32 = 100 * 1024 * 1024 +) + +type Config struct { + promptTemplate string + ocrProvider Provider + ocrProviderConfig *ProviderConfig +} + +func main() { + wrapper.SetCtx( + "ai-image-reader", + wrapper.ParseConfig(parseConfig), + wrapper.ProcessRequestHeaders(onHttpRequestHeaders), + wrapper.ProcessRequestBody(onHttpRequestBody), + ) +} + +func parseConfig(json gjson.Result, config *Config) error { + config.promptTemplate = `# 用户发送的图片解析得到的文字内容如下: +{image_content} +在回答时,请注意以下几点: +- 请你回答问题时结合用户图片的文字内容回答。 +- 除非用户要求,否则你回答的语言需要和用户提问的语言保持一致。 + +# 用户消息为: +{question}` + config.ocrProviderConfig = &ProviderConfig{} + config.ocrProviderConfig.FromJson(json) + if err := config.ocrProviderConfig.Validate(); err != nil { + return err + } + var err error + config.ocrProvider, err = CreateProvider(*config.ocrProviderConfig) + if err != nil { + return errors.New("create ocr provider failed") + } + return nil +} + +func onHttpRequestHeaders(ctx wrapper.HttpContext, config Config) types.Action { + contentType, _ := proxywasm.GetHttpRequestHeader("content-type") + if contentType == "" { + return types.ActionContinue + } + if !strings.Contains(contentType, "application/json") { + log.Warnf("content is not json, can't process: %s", contentType) + ctx.DontReadRequestBody() + return types.ActionContinue + } + ctx.SetRequestBodyBufferLimit(DefaultMaxBodyBytes) + _ = proxywasm.RemoveHttpRequestHeader("Accept-Encoding") + return types.ActionContinue +} + +func onHttpRequestBody(ctx wrapper.HttpContext, config Config, body []byte) types.Action { + var queryIndex int + var query string + messages := gjson.GetBytes(body, "messages").Array() + var imageUrls []string + for i := len(messages) - 1; i >= 0; i-- { + if messages[i].Get("role").String() == "user" { + queryIndex = i + content := messages[i].Get("content").Array() + for j := len(content) - 1; j >= 0; j-- { + contentType := content[j].Get("type").String() + if contentType == "image_url" { + imageUrls = append(imageUrls, content[j].Get("image_url.url").String()) + } else if contentType == "text" { + query = content[j].Get("text").String() + } + } + break + } + } + if len(imageUrls) == 0 { + return types.ActionContinue + } + return executeReadImage(imageUrls, config, query, queryIndex, body) +} + +func executeReadImage(imageUrls []string, config Config, query string, queryIndex int, body []byte) types.Action { + var imageContents []string + var totalImages int + var finished int + for _, imageUrl := range imageUrls { + err := config.ocrProvider.DoOCR(imageUrl, func(imageContent string, err error) { + defer func() { + finished++ + if totalImages == finished { + var processedContents []string + for idx := len(imageContents) - 1; idx >= 0; idx-- { + processedContents = append(processedContents, fmt.Sprintf("第%d张图片内容为 %s", totalImages-idx, imageContents[idx])) + } + imageSummary := fmt.Sprintf("总共有 %d 张图片。\n", totalImages) + prompt := strings.Replace(config.promptTemplate, "{image_content}", imageSummary+strings.Join(processedContents, "\n"), 1) + prompt = strings.Replace(prompt, "{question}", query, 1) + modifiedBody, err := sjson.SetBytes(body, fmt.Sprintf("messages.%d.content", queryIndex), prompt) + if err != nil { + log.Errorf("modify request message content failed, err:%v, body:%s", err, body) + } else { + log.Debugf("modified body:%s", modifiedBody) + proxywasm.ReplaceHttpRequestBody(modifiedBody) + } + proxywasm.ResumeHttpRequest() + } + }() + if err != nil { + log.Errorf("do ocr failed, err:%v", err) + return + } + imageContents = append(imageContents, imageContent) + }) + if err != nil { + log.Errorf("ocr call failed, err:%v", err) + continue + } + totalImages++ + } + if totalImages > 0 { + return types.ActionPause + } + return types.ActionContinue +} diff --git a/plugins/wasm-go/extensions/ai-image-reader/provider.go b/plugins/wasm-go/extensions/ai-image-reader/provider.go new file mode 100644 index 000000000..4d5b787e0 --- /dev/null +++ b/plugins/wasm-go/extensions/ai-image-reader/provider.go @@ -0,0 +1,109 @@ +package main + +import ( + "errors" + "github.com/tidwall/gjson" +) + +const ( + ProviderTypeDashscope = "dashscope" +) + +type providerInitializer interface { + InitConfig(json gjson.Result) + ValidateConfig() error + CreateProvider(ProviderConfig) (Provider, error) +} + +var ( + providerInitializers = map[string]providerInitializer{ + ProviderTypeDashscope: &dashScopeProviderInitializer{}, + } +) + +type ProviderConfig struct { + // @Title zh-CN 文字识别服务提供者类型 + // @Description zh-CN 文字识别服务提供者类型,例如 DashScope + typ string + // @Title zh-CN DashScope 文字识别服务名称 + // @Description zh-CN 文字识别服务名称 + serviceName string + // @Title zh-CN 文字识别服务域名 + // @Description zh-CN 文字识别服务域名 + serviceHost string + // @Title zh-CN 文字识别服务端口 + // @Description zh-CN 文字识别服务端口 + servicePort int64 + // @Title zh-CN 文字识别服务超时时间 + // @Description zh-CN 文字识别服务超时时间 + timeout uint32 + // @Title zh-CN 文字识别服务使用的模型 + // @Description zh-CN 用于文字识别的模型名称, 在 DashScope 中默认为 "qwen-vl-ocr" + model string + + initializer providerInitializer +} + +func (c *ProviderConfig) FromJson(json gjson.Result) { + c.typ = json.Get("type").String() + i, has := providerInitializers[c.typ] + if has { + i.InitConfig(json) + c.initializer = i + } + c.serviceName = json.Get("serviceName").String() + c.serviceHost = json.Get("serviceHost").String() + c.servicePort = json.Get("servicePort").Int() + c.timeout = uint32(json.Get("timeout").Int()) + c.model = json.Get("model").String() + if c.timeout == 0 { + c.timeout = 10000 + } +} + +func (c *ProviderConfig) Validate() error { + if c.typ == "" { + return errors.New("ocr service provider type is required") + } + if c.serviceName == "" { + return errors.New("ocr service name is required") + } + if c.typ == "" { + return errors.New("ocr service type is required") + } + if c.initializer == nil { + return errors.New("unknown ocr service provider type: " + c.typ) + } + if err := c.initializer.ValidateConfig(); err != nil { + return err + } + return nil +} + +func (c *ProviderConfig) GetProviderType() string { + return c.typ +} + +func CreateProvider(pc ProviderConfig) (Provider, error) { + initializer, has := providerInitializers[pc.typ] + if !has { + return nil, errors.New("unknown provider type: " + pc.typ) + } + return initializer.CreateProvider(pc) +} + +type CallArgs struct { + Method string + Url string + Headers [][2]string + Body []byte + TimeoutMillisecond uint32 +} + +type Provider interface { + GetProviderType() string + CallArgs(imageUrl string) CallArgs + DoOCR( + imageUrl string, + callback func(imageContent string, err error)) error +}