mirror of
https://github.com/fengxxc/wechatmp2markdown.git
synced 2026-02-25 00:01:01 +08:00
feat: 添加三种文章内图片处理方式(原src、保存到本地、内嵌base64)
This commit is contained in:
127
format/format.go
127
format/format.go
@@ -1,15 +1,22 @@
|
||||
package format
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/fengxxc/wechatmp2markdown/parse"
|
||||
"github.com/fengxxc/wechatmp2markdown/util"
|
||||
)
|
||||
|
||||
// Format format article
|
||||
func Format(article parse.Article) string {
|
||||
func Format(article parse.Article) (string, map[string][]byte) {
|
||||
var result string
|
||||
var titleMdStr string = formatTitle(article.Title)
|
||||
result += titleMdStr
|
||||
@@ -17,15 +24,72 @@ func Format(article parse.Article) string {
|
||||
result += metaMdStr
|
||||
var tagsMdStr string = formatTags(article.Tags)
|
||||
result += tagsMdStr
|
||||
var content string = formatContent(article.Content, 0)
|
||||
var saveImageBytes map[string][]byte
|
||||
content, saveImageBytes := formatContent(article.Content, 0)
|
||||
result += content
|
||||
return result
|
||||
return result, saveImageBytes
|
||||
}
|
||||
|
||||
// FormatAndSave fomat article and save to local file
|
||||
func FormatAndSave(article parse.Article, filePath string) error {
|
||||
var result string = Format(article)
|
||||
return ioutil.WriteFile(filePath, []byte(result), 0644)
|
||||
// basrPath := filepath.Join(filePath, )
|
||||
var basePath string
|
||||
var fileName string
|
||||
var isWin bool = runtime.GOOS == "windows"
|
||||
var separator string
|
||||
if isWin {
|
||||
separator = "\\"
|
||||
} else {
|
||||
separator = "/"
|
||||
}
|
||||
if filePath == "" {
|
||||
filePath = "." + separator
|
||||
}
|
||||
if strings.HasPrefix(filePath, "./") || strings.HasPrefix(filePath, ".\\") {
|
||||
wd, _ := os.Getwd()
|
||||
filePath = strings.Replace(filePath, ".", wd, 1)
|
||||
}
|
||||
if strings.HasSuffix(filePath, ".md") {
|
||||
// basePath = filePath[:len(filePath)-len(".md")]
|
||||
basePath = filePath[:strings.LastIndex(filePath, separator)]
|
||||
fileName = filePath
|
||||
} else {
|
||||
title := strings.TrimSpace(article.Title.Val.(string))
|
||||
// title := "thisistitle"
|
||||
basePath = filepath.Join(filePath, title)
|
||||
fileName = filepath.Join(basePath, title+".md")
|
||||
}
|
||||
|
||||
// make basePath dir if not exists
|
||||
if _, err := os.Stat(basePath); err != nil {
|
||||
if err := os.MkdirAll(basePath, 0644); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
var saveImageBytes map[string][]byte
|
||||
result, saveImageBytes := Format(article)
|
||||
if len(saveImageBytes) > 0 {
|
||||
for imgTitle := range saveImageBytes {
|
||||
// save to local
|
||||
imgfileName := filepath.Join(basePath, imgTitle)
|
||||
/* if err := ioutil.WriteFile(imgfileName, saveImageBytes[imgTitle], 0644); err != nil {
|
||||
log.Fatalf("can not save image file: %s\n err: %v", imgfileName, err)
|
||||
continue
|
||||
} */
|
||||
f, err := os.Create(imgfileName)
|
||||
if err != nil {
|
||||
// log.Fatalf("can not save image file: %s", imgTitle)
|
||||
log.Fatalf("can not save image file: %s\n err: %v", imgfileName, err)
|
||||
continue
|
||||
}
|
||||
defer f.Close()
|
||||
buf := new(bytes.Buffer)
|
||||
binary.Write(buf, binary.LittleEndian, saveImageBytes[imgTitle])
|
||||
f.Write(buf.Bytes())
|
||||
}
|
||||
}
|
||||
return ioutil.WriteFile(fileName, []byte(result), 0644)
|
||||
}
|
||||
|
||||
func formatTitle(piece parse.Piece) string {
|
||||
@@ -45,11 +109,13 @@ func formatTags(tags string) string {
|
||||
return tags + " \n" // TODO
|
||||
}
|
||||
|
||||
func formatContent(pieces []parse.Piece, depth int) string {
|
||||
func formatContent(pieces []parse.Piece, depth int) (string, map[string][]byte) {
|
||||
var contentMdStr string
|
||||
var base64Imgs []string
|
||||
var saveImageBytes map[string][]byte = make(map[string][]byte)
|
||||
for _, piece := range pieces {
|
||||
var pieceMdStr string
|
||||
var patchSaveImageBytes map[string][]byte
|
||||
switch piece.Type {
|
||||
case parse.HEADER:
|
||||
pieceMdStr = formatTitle(piece)
|
||||
@@ -64,6 +130,17 @@ func formatContent(pieces []parse.Piece, depth int) string {
|
||||
case parse.BOLD_ITALIC_TEXT:
|
||||
pieceMdStr = "***" + piece.Val.(string) + "***"
|
||||
case parse.IMAGE:
|
||||
if piece.Val == nil {
|
||||
pieceMdStr = formatImageInline(piece)
|
||||
} else {
|
||||
// will save to local
|
||||
src := piece.Attrs["src"]
|
||||
imgExt := util.ParseImageExtFromSrc(src)
|
||||
var hashName string = util.MD5(piece.Val.([]byte)) + "." + imgExt
|
||||
saveImageBytes[hashName] = piece.Val.([]byte)
|
||||
pieceMdStr = formatImageFileReferInline(piece.Attrs["alt"], hashName)
|
||||
}
|
||||
case parse.IMAGE_BASE64:
|
||||
pieceMdStr = formatImageRefer(piece, len(base64Imgs))
|
||||
base64Imgs = append(base64Imgs, piece.Val.(string))
|
||||
case parse.TABLE:
|
||||
@@ -73,36 +150,38 @@ func formatContent(pieces []parse.Piece, depth int) string {
|
||||
case parse.CODE_BLOCK:
|
||||
pieceMdStr = formatCodeBlock(piece)
|
||||
case parse.BLOCK_QUOTES:
|
||||
pieceMdStr = formatBlockQuote(piece, depth)
|
||||
pieceMdStr, patchSaveImageBytes = formatBlockQuote(piece, depth)
|
||||
case parse.O_LIST:
|
||||
pieceMdStr = formatList(piece, depth)
|
||||
pieceMdStr, patchSaveImageBytes = formatList(piece, depth)
|
||||
case parse.U_LIST:
|
||||
pieceMdStr = formatList(piece, depth)
|
||||
pieceMdStr, patchSaveImageBytes = formatList(piece, depth)
|
||||
case parse.HR:
|
||||
// TODO
|
||||
case parse.BR:
|
||||
pieceMdStr = " \n"
|
||||
}
|
||||
contentMdStr += pieceMdStr
|
||||
util.MergeMap(saveImageBytes, patchSaveImageBytes)
|
||||
}
|
||||
for i := 0; i < len(base64Imgs); i++ {
|
||||
contentMdStr += "\n[" + strconv.Itoa(i) + "]:" + "data:image/png;base64," + base64Imgs[i]
|
||||
}
|
||||
return contentMdStr
|
||||
return contentMdStr, saveImageBytes
|
||||
}
|
||||
|
||||
func formatBlockQuote(piece parse.Piece, depth int) string {
|
||||
func formatBlockQuote(piece parse.Piece, depth int) (string, map[string][]byte) {
|
||||
var bqMdString string
|
||||
var prefix string = ">"
|
||||
for i := 0; i < depth; i++ {
|
||||
prefix += ">"
|
||||
}
|
||||
prefix += " "
|
||||
bqMdString = prefix + formatContent(piece.Val.([]parse.Piece), depth+1) + " \n"
|
||||
return bqMdString
|
||||
var saveImageBytes map[string][]byte
|
||||
bqMdString, saveImageBytes = formatContent(piece.Val.([]parse.Piece), depth+1)
|
||||
return prefix + bqMdString + " \n", saveImageBytes
|
||||
}
|
||||
|
||||
func formatList(li parse.Piece, depth int) string {
|
||||
func formatList(li parse.Piece, depth int) (string, map[string][]byte) {
|
||||
var listMdString string
|
||||
var prefix string
|
||||
for j := 0; j < depth; j++ {
|
||||
@@ -113,8 +192,9 @@ func formatList(li parse.Piece, depth int) string {
|
||||
} else if li.Type == parse.O_LIST {
|
||||
prefix += strconv.Itoa(1) + ". " // 写死成1也大丈夫,markdown会自动累加序号
|
||||
}
|
||||
listMdString = prefix + formatContent(li.Val.([]parse.Piece), depth+1) + " \n"
|
||||
return listMdString
|
||||
var saveImageBytes map[string][]byte
|
||||
listMdString, saveImageBytes = formatContent(li.Val.([]parse.Piece), depth+1)
|
||||
return prefix + listMdString + " \n", saveImageBytes
|
||||
}
|
||||
|
||||
func formatCodeBlock(piece parse.Piece) string {
|
||||
@@ -128,11 +208,22 @@ func formatCodeBlock(piece parse.Piece) string {
|
||||
return codeMdStr
|
||||
}
|
||||
|
||||
func formatImageInline(piece parse.Piece, index int) string {
|
||||
// return "![" + piece.Attrs["alt"] + "](" + piece.Attrs["src"] + " \"" + piece.Attrs["title"] + "\")"
|
||||
// 图片地址为本身src
|
||||
func formatImageInline(piece parse.Piece) string {
|
||||
return "![" + piece.Attrs["alt"] + "](" + piece.Attrs["src"] + " \"" + piece.Attrs["title"] + "\")"
|
||||
}
|
||||
|
||||
// 图片地址为本地引用
|
||||
func formatImageFileReferInline(alt string, refName string) string {
|
||||
return ""
|
||||
}
|
||||
|
||||
// 图片转成base64并插在原地
|
||||
func formatImageBase64Inline(piece parse.Piece) string {
|
||||
return "![" + piece.Attrs["alt"] + "](data:image/png;base64," + piece.Val.(string) + ")"
|
||||
}
|
||||
|
||||
// 图片地址为markdown内引用(用于base64)
|
||||
func formatImageRefer(piece parse.Piece, index int) string {
|
||||
return "![" + piece.Attrs["alt"] + "][" + strconv.Itoa(index) + "]"
|
||||
}
|
||||
|
||||
28
main.go
28
main.go
@@ -3,6 +3,7 @@ package main
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/fengxxc/wechatmp2markdown/format"
|
||||
"github.com/fengxxc/wechatmp2markdown/parse"
|
||||
@@ -29,10 +30,35 @@ func main() {
|
||||
return
|
||||
}
|
||||
|
||||
// --image=base64 -ib 保存图片,base64格式,在md文件中(默认为此选项)
|
||||
// --image=url -iu 只保留图片链接
|
||||
// --image=save -is 保存图片,最终输出到文件夹
|
||||
// --save=zip -sz 最终打包输出到zip
|
||||
imageArgValue := "base64"
|
||||
if args[3] != "" {
|
||||
if strings.HasPrefix(args[3], "--image=") {
|
||||
imageArgValue = args[3][len("--image="):]
|
||||
} else if strings.HasPrefix(args[3], "-i") {
|
||||
imageArgVal := args[3][len("-i"):]
|
||||
switch imageArgVal {
|
||||
case "u":
|
||||
imageArgValue = "url"
|
||||
case "s":
|
||||
imageArgValue = "save"
|
||||
case "b":
|
||||
fallthrough
|
||||
default:
|
||||
imageArgValue = "base64"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var imagePolicy parse.ImagePolicy = parse.ImageArgValue2ImagePolicy(imageArgValue)
|
||||
|
||||
// cli pattern
|
||||
url := args1
|
||||
filename := args2
|
||||
fmt.Printf("url: %s, filename: %s\n", url, filename)
|
||||
var articleStruct parse.Article = parse.ParseFromURL(url)
|
||||
var articleStruct parse.Article = parse.ParseFromURL(url, imagePolicy)
|
||||
format.FormatAndSave(articleStruct, filename)
|
||||
}
|
||||
|
||||
@@ -31,12 +31,13 @@ const (
|
||||
ITALIC_TEXT // 4 斜体文字
|
||||
BOLD_ITALIC_TEXT // 5 粗斜体
|
||||
IMAGE // 6 图片
|
||||
TABLE // 7 表格
|
||||
CODE_INLINE // 8 代码 内联
|
||||
CODE_BLOCK // 9 代码 块
|
||||
BLOCK_QUOTES // 10 引用
|
||||
O_LIST // 11 有序列表
|
||||
U_LIST // 12 无序列表
|
||||
HR // 13 分隔线
|
||||
BR // 14 换行
|
||||
IMAGE_BASE64 // 7 图片 base64
|
||||
TABLE // 8 表格
|
||||
CODE_INLINE // 9 代码 内联
|
||||
CODE_BLOCK // 10 代码 块
|
||||
BLOCK_QUOTES // 11 引用
|
||||
O_LIST // 12 有序列表
|
||||
U_LIST // 13 无序列表
|
||||
HR // 14 分隔线
|
||||
BR // 15 换行
|
||||
)
|
||||
|
||||
@@ -16,7 +16,7 @@ import (
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
)
|
||||
|
||||
func parseSection(s *goquery.Selection) []Piece {
|
||||
func parseSection(s *goquery.Selection, imagePolicy ImagePolicy) []Piece {
|
||||
var pieces []Piece
|
||||
pieces = append(pieces, Piece{BR, nil, nil})
|
||||
s.Contents().Each(func(i int, sc *goquery.Selection) {
|
||||
@@ -28,21 +28,31 @@ func parseSection(s *goquery.Selection) []Piece {
|
||||
attr["src"], _ = sc.Attr("data-src")
|
||||
attr["alt"], _ = sc.Attr("alt")
|
||||
attr["title"], _ = sc.Attr("title")
|
||||
base64Image := img2base64(fetchImgFile(attr["src"]))
|
||||
pieces = append(pieces, Piece{IMAGE, base64Image, attr}, Piece{BR, nil, nil})
|
||||
switch imagePolicy {
|
||||
case IMAGE_POLICY_URL:
|
||||
pieces = append(pieces, Piece{IMAGE, nil, attr}, Piece{BR, nil, nil})
|
||||
case IMAGE_POLICY_SAVE:
|
||||
image := fetchImgFile(attr["src"])
|
||||
pieces = append(pieces, Piece{IMAGE, image, attr}, Piece{BR, nil, nil})
|
||||
case IMAGE_POLICY_BASE64:
|
||||
fallthrough
|
||||
default:
|
||||
base64Image := img2base64(fetchImgFile(attr["src"]))
|
||||
pieces = append(pieces, Piece{IMAGE_BASE64, base64Image, attr}, Piece{BR, nil, nil})
|
||||
}
|
||||
} else if sc.Is("ol") {
|
||||
pieces = append(pieces, parseList(sc, O_LIST)...)
|
||||
pieces = append(pieces, parseList(sc, O_LIST, imagePolicy)...)
|
||||
} else if sc.Is("ul") {
|
||||
pieces = append(pieces, parseList(sc, U_LIST)...)
|
||||
pieces = append(pieces, parseList(sc, U_LIST, imagePolicy)...)
|
||||
} else if sc.Is("pre") || sc.Is("section.code-snippet__fix") {
|
||||
// 代码块
|
||||
pieces = append(pieces, parsePre(sc)...)
|
||||
} else if sc.Is("p") || sc.Is("section") || sc.Is("span") {
|
||||
pieces = append(pieces, parseSection(sc)...)
|
||||
pieces = append(pieces, parseSection(sc, imagePolicy)...)
|
||||
} else if sc.Is("h1") || sc.Is("h2") || sc.Is("h3") || sc.Is("h4") || sc.Is("h5") || sc.Is("h6") {
|
||||
pieces = append(pieces, parseHeader(sc)...)
|
||||
} else if sc.Is("blockquote") {
|
||||
pieces = append(pieces, parseBlockQuote(sc)...)
|
||||
pieces = append(pieces, parseBlockQuote(sc, imagePolicy)...)
|
||||
} else if sc.Is("strong") {
|
||||
pieces = append(pieces, parseStrong(sc)...)
|
||||
} else {
|
||||
@@ -83,19 +93,19 @@ func parsePre(s *goquery.Selection) []Piece {
|
||||
return []Piece{p, {BR, nil, nil}}
|
||||
}
|
||||
|
||||
func parseList(s *goquery.Selection, ptype PieceType) []Piece {
|
||||
func parseList(s *goquery.Selection, ptype PieceType, imagePolicy ImagePolicy) []Piece {
|
||||
var list []Piece
|
||||
s.Find("li").Each(func(i int, sc *goquery.Selection) {
|
||||
list = append(list, Piece{ptype, parseSection(sc), nil})
|
||||
list = append(list, Piece{ptype, parseSection(sc, imagePolicy), nil})
|
||||
})
|
||||
list = append(list, Piece{BR, nil, nil})
|
||||
return list
|
||||
}
|
||||
|
||||
func parseBlockQuote(s *goquery.Selection) []Piece {
|
||||
func parseBlockQuote(s *goquery.Selection, imagePolicy ImagePolicy) []Piece {
|
||||
var bq []Piece
|
||||
s.Contents().Each(func(i int, sc *goquery.Selection) {
|
||||
bq = append(bq, Piece{BLOCK_QUOTES, parseSection(sc), nil})
|
||||
bq = append(bq, Piece{BLOCK_QUOTES, parseSection(sc, imagePolicy), nil})
|
||||
})
|
||||
bq = append(bq, Piece{BR, nil, nil})
|
||||
return bq
|
||||
@@ -124,7 +134,7 @@ func parseMeta(s *goquery.Selection) []string {
|
||||
return res
|
||||
}
|
||||
|
||||
func ParseFromReader(r io.Reader) Article {
|
||||
func ParseFromReader(r io.Reader, imagePolicy ImagePolicy) Article {
|
||||
var article Article
|
||||
doc, err := goquery.NewDocumentFromReader(r)
|
||||
if err != nil {
|
||||
@@ -144,11 +154,10 @@ func ParseFromReader(r io.Reader) Article {
|
||||
// 从js中找到发布时间
|
||||
re, _ := regexp.Compile("var ct = \"([0-9]+)\"")
|
||||
findstrs := re.FindStringSubmatch(doc.Find("script").Text())
|
||||
if findstrs != nil && len(findstrs) > 1 {
|
||||
if len(findstrs) > 1 {
|
||||
var createTime string = findstrs[1]
|
||||
timestamp, _ := strconv.Atoi(createTime)
|
||||
time := time.Unix(int64(timestamp), 0)
|
||||
// fmt.Println(time)
|
||||
article.Meta = append(article.Meta, time.Format("2006-01-02 15:04"))
|
||||
}
|
||||
|
||||
@@ -162,17 +171,17 @@ func ParseFromReader(r io.Reader) Article {
|
||||
// p[style="line-height: 1.5em;"] => 项目列表(有序/无序)
|
||||
// section[style=".*text-align:center"]>img => 居中段落(图片)
|
||||
content := mainContent.Find("#js_content")
|
||||
pieces := parseSection(content)
|
||||
pieces := parseSection(content, imagePolicy)
|
||||
article.Content = pieces
|
||||
|
||||
return article
|
||||
}
|
||||
|
||||
func ParseFromHTMLString(s string) Article {
|
||||
return ParseFromReader(strings.NewReader(s))
|
||||
func ParseFromHTMLString(s string, imagePolicy ImagePolicy) Article {
|
||||
return ParseFromReader(strings.NewReader(s), imagePolicy)
|
||||
}
|
||||
|
||||
func ParseFromHTMLFile(filepath string) Article {
|
||||
func ParseFromHTMLFile(filepath string, imagePolicy ImagePolicy) Article {
|
||||
file, err := os.Open(filepath)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
@@ -182,10 +191,10 @@ func ParseFromHTMLFile(filepath string) Article {
|
||||
if err2 != nil {
|
||||
panic(err)
|
||||
}
|
||||
return ParseFromReader(bytes.NewReader(content))
|
||||
return ParseFromReader(bytes.NewReader(content), imagePolicy)
|
||||
}
|
||||
|
||||
func ParseFromURL(url string) Article {
|
||||
func ParseFromURL(url string, imagePolicy ImagePolicy) Article {
|
||||
res, err := http.Get(url)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
@@ -194,7 +203,7 @@ func ParseFromURL(url string) Article {
|
||||
if res.StatusCode != 200 {
|
||||
log.Fatalf("get from url %s error: %d %s", url, res.StatusCode, res.Status)
|
||||
}
|
||||
return ParseFromReader(res.Body)
|
||||
return ParseFromReader(res.Body, imagePolicy)
|
||||
}
|
||||
|
||||
func removeBrAndBlank(s string) string {
|
||||
@@ -230,3 +239,26 @@ func fetchImgFile(url string) []byte {
|
||||
func img2base64(content []byte) string {
|
||||
return base64.StdEncoding.EncodeToString(content)
|
||||
}
|
||||
|
||||
type ImagePolicy int32
|
||||
|
||||
const (
|
||||
IMAGE_POLICY_URL ImagePolicy = iota
|
||||
IMAGE_POLICY_SAVE
|
||||
IMAGE_POLICY_BASE64
|
||||
)
|
||||
|
||||
func ImageArgValue2ImagePolicy(val string) ImagePolicy {
|
||||
var imagePolicy ImagePolicy
|
||||
switch val {
|
||||
case "url":
|
||||
imagePolicy = IMAGE_POLICY_URL
|
||||
case "save":
|
||||
imagePolicy = IMAGE_POLICY_SAVE
|
||||
case "base64":
|
||||
fallthrough
|
||||
default:
|
||||
imagePolicy = IMAGE_POLICY_BASE64
|
||||
}
|
||||
return imagePolicy
|
||||
}
|
||||
|
||||
@@ -4,26 +4,43 @@ import (
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/fengxxc/wechatmp2markdown/format"
|
||||
"github.com/fengxxc/wechatmp2markdown/parse"
|
||||
"github.com/fengxxc/wechatmp2markdown/util"
|
||||
)
|
||||
|
||||
func Start(addr string) {
|
||||
http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
|
||||
wechatmpURL := r.FormValue("url")
|
||||
rawQuery := r.URL.RawQuery
|
||||
paramsMap := parseParams(rawQuery)
|
||||
|
||||
// url param
|
||||
wechatmpURL := paramsMap["url"]
|
||||
fmt.Printf("accept url: %s\n", wechatmpURL)
|
||||
imageArgValue := paramsMap["image"]
|
||||
fmt.Printf(" image: %s\n", imageArgValue)
|
||||
imagePolicy := parse.ImageArgValue2ImagePolicy(imageArgValue)
|
||||
|
||||
if wechatmpURL == "" {
|
||||
w.WriteHeader(http.StatusBadRequest)
|
||||
w.Write([]byte("param 'url' must not be empty. please put in a wechatmp URL and try again."))
|
||||
w.Write([]byte(defHTML))
|
||||
return
|
||||
}
|
||||
w.Header().Set("Content-Type", "application/octet-stream")
|
||||
var articleStruct parse.Article = parse.ParseFromURL(wechatmpURL)
|
||||
var articleStruct parse.Article = parse.ParseFromURL(wechatmpURL, imagePolicy)
|
||||
title := articleStruct.Title.Val.(string)
|
||||
w.Header().Set("Content-Disposition", "attachment; filename="+title+".md")
|
||||
var mdString string = format.Format(articleStruct)
|
||||
w.Write([]byte(mdString))
|
||||
return
|
||||
mdString, saveImageBytes := format.Format(articleStruct)
|
||||
if len(saveImageBytes) > 0 {
|
||||
w.Header().Set("Content-Disposition", "attachment; filename="+title+".zip")
|
||||
saveImageBytes[title] = []byte(mdString)
|
||||
util.HttpDownloadZip(w, saveImageBytes)
|
||||
} else {
|
||||
w.Header().Set("Content-Disposition", "attachment; filename="+title+".md")
|
||||
w.Write([]byte(mdString))
|
||||
}
|
||||
})
|
||||
|
||||
fmt.Printf("wechatmp2markdown server listening on %s\n", addr)
|
||||
@@ -31,3 +48,52 @@ func Start(addr string) {
|
||||
log.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
var defHTML string = `
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>wechatmp2markdown</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1 style="text-align: center; width: 100%;">wechatmp2markdown</h1>
|
||||
<ul style="margin: 0 auto; width: 89%;">
|
||||
<li>
|
||||
<strong>param 'url' is required.</strong> please put in a wechatmp URL and try again.
|
||||
</li>
|
||||
<li>
|
||||
<strong>param 'image' is optional</strong>, value include: 'url' / 'save' / 'base64'(default)
|
||||
</li>
|
||||
<li>
|
||||
<strong>example:</strong> http://localhost:8964/?url=https://mp.weixin.qq.com/s?__biz=aaaa==&mid=1111&idx=2&sn=bbbb&chksm=cccc&scene=123&image=save
|
||||
</li>
|
||||
</ul>
|
||||
</body>
|
||||
</html>
|
||||
`
|
||||
|
||||
func parseParams(rawQuery string) map[string]string {
|
||||
result := make(map[string]string)
|
||||
reg := regexp.MustCompile(`(&?image=)([a-z]+)`)
|
||||
matcheImage := reg.FindStringSubmatch(rawQuery)
|
||||
var urlParamFull string = rawQuery
|
||||
if len(matcheImage) > 1 {
|
||||
// 有image参数
|
||||
imageParamFull := matcheImage[0]
|
||||
urlParamFull = strings.Replace(rawQuery, imageParamFull, "", 1)
|
||||
|
||||
if len(matcheImage) > 2 {
|
||||
imageParamVal := matcheImage[2]
|
||||
result["image"] = imageParamVal
|
||||
}
|
||||
}
|
||||
regUrl := regexp.MustCompile(`(&?url=)(.+)`)
|
||||
matcheUrl := regUrl.FindStringSubmatch(urlParamFull)
|
||||
if len(matcheUrl) > 2 {
|
||||
urlParamVal := matcheUrl[2]
|
||||
result["url"] = urlParamVal
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
@@ -9,12 +9,12 @@ import (
|
||||
)
|
||||
|
||||
func Test1() {
|
||||
var articleStruct parse.Article = parse.ParseFromHTMLFile("./test/test1.html")
|
||||
var articleStruct parse.Article = parse.ParseFromHTMLFile("./test/test1.html", parse.IMAGE_POLICY_BASE64)
|
||||
fmt.Println("-------------------test1.html parse-------------------")
|
||||
fmt.Printf("%+v\n", articleStruct)
|
||||
|
||||
fmt.Println("-------------------test1.html format-------------------")
|
||||
var mdString string = format.Format(articleStruct)
|
||||
mdString, _ := format.Format(articleStruct)
|
||||
fmt.Print(mdString)
|
||||
ioutil.WriteFile("./test/test1_target.md", []byte(mdString), 0644)
|
||||
}
|
||||
|
||||
@@ -9,12 +9,13 @@ import (
|
||||
)
|
||||
|
||||
func Test2() {
|
||||
var articleStruct parse.Article = parse.ParseFromURL("https://mp.weixin.qq.com/s?__biz=MzIzOTU0NTQ0MA==&mid=2247506315&idx=1&sn=1546be4ecece176f669da4eed7076ee2&chksm=e92ae484de5d6d92d93cd68b927fa91e2935a75c9aafc02f294237653ca8a342e8982cabbc1d&cur_album_id=1391790902901014528&scene=189#wechat_redirect")
|
||||
// var articleStruct parse.Article = parse.ParseFromURL("https://mp.weixin.qq.com/s?__biz=MzIzOTU0NTQ0MA==&mid=2247506315&idx=1&sn=1546be4ecece176f669da4eed7076ee2&chksm=e92ae484de5d6d92d93cd68b927fa91e2935a75c9aafc02f294237653ca8a342e8982cabbc1d&cur_album_id=1391790902901014528&scene=189#wechat_redirect")
|
||||
var articleStruct parse.Article = parse.ParseFromURL("https://mp.weixin.qq.com/s?__biz=MzU0OTE4MzYzMw==&mid=2247525863&idx=2&sn=d759f98b62f61f3a8312da4ee426c287&chksm=fbb1ec19ccc6650f40c0ef67b47163040c33f9dfe3d6f05bf28d4d823b6f847c09fea046b2eb&scene=132#wechat_redirect", parse.IMAGE_POLICY_BASE64)
|
||||
|
||||
byteArry, _ := json.MarshalIndent(articleStruct, "", " ")
|
||||
// fmt.Println(string(byteArry))
|
||||
ioutil.WriteFile("./test/test2_target.json", byteArry, 0644)
|
||||
|
||||
var mdString string = format.Format((articleStruct))
|
||||
mdString, _ := format.Format((articleStruct))
|
||||
ioutil.WriteFile("./test/test2_target.md", []byte(mdString), 0644)
|
||||
}
|
||||
|
||||
77
util/util.go
Normal file
77
util/util.go
Normal file
@@ -0,0 +1,77 @@
|
||||
package util
|
||||
|
||||
import (
|
||||
"archive/zip"
|
||||
"bytes"
|
||||
"crypto/md5"
|
||||
"encoding/hex"
|
||||
"io"
|
||||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
"regexp"
|
||||
)
|
||||
|
||||
func MergeMap(m1 map[string][]byte, m2 map[string][]byte) {
|
||||
for k, v := range m2 {
|
||||
m1[k] = v
|
||||
}
|
||||
}
|
||||
|
||||
func Zip(zipFileName string, files map[string][]byte) {
|
||||
f, err := os.Create(zipFileName)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
return
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
zipWriter := zip.NewWriter(f)
|
||||
for name, file := range files {
|
||||
zw, err := zipWriter.Create(name)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
if _, err := io.Copy(zw, bytes.NewReader(file)); err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
}
|
||||
zipWriter.Close()
|
||||
}
|
||||
|
||||
func HttpDownloadZip(w http.ResponseWriter, files map[string][]byte) {
|
||||
zipWriter := zip.NewWriter(w)
|
||||
for name, file := range files {
|
||||
zw, err := zipWriter.Create(name)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
if _, err := io.Copy(zw, bytes.NewReader(file)); err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
}
|
||||
zipWriter.Close()
|
||||
}
|
||||
|
||||
func MD5(content []byte) string {
|
||||
hash := md5.New()
|
||||
hash.Write(content)
|
||||
md5Bytes := hash.Sum(nil)
|
||||
return hex.EncodeToString(md5Bytes)
|
||||
}
|
||||
|
||||
// 从图片src中解析出图片的扩展名
|
||||
func ParseImageExtFromSrc(src string) string {
|
||||
reg := regexp.MustCompile(`(wx_fmt=)([a-zA-Z]+)(&?)`)
|
||||
matches := reg.FindStringSubmatch(src)
|
||||
if len(matches) < 3 {
|
||||
return ""
|
||||
}
|
||||
return matches[2]
|
||||
}
|
||||
|
||||
// 判断路径是否存在
|
||||
func PathIsExists(path string) (os.FileInfo, bool) {
|
||||
f, err := os.Stat(path)
|
||||
return f, err == nil || os.IsExist(err)
|
||||
}
|
||||
Reference in New Issue
Block a user