feat: 有/无序列表的支持,请求公众号url的支持

This commit is contained in:
fengxxc
2021-12-01 16:57:53 +08:00
parent bdd8cb4b10
commit 5727cdd22c
4 changed files with 74 additions and 16 deletions

View File

@@ -14,7 +14,7 @@ func Format(article parse.Article) string {
result += metaMdStr
var tagsMdStr string = formatTags(article.Tags)
result += tagsMdStr
var content string = formatContent(article.Content)
var content string = formatContent(article.Content, 0)
result += content
return result
}
@@ -36,7 +36,7 @@ func formatTags(tags string) string {
return tags + " \n" // TODO
}
func formatContent(pieces []parse.Piece) string {
func formatContent(pieces []parse.Piece, depth int) string {
var contentMdStr string
for _, piece := range pieces {
var pieceMdStr string
@@ -62,7 +62,9 @@ func formatContent(pieces []parse.Piece) string {
pieceMdStr = formatCodeBlock(piece)
case parse.BLOCK_QUOTES:
case parse.O_LIST:
pieceMdStr = formatList(piece, depth)
case parse.U_LIST:
pieceMdStr = formatList(piece, depth)
case parse.HR:
case parse.BR:
pieceMdStr = " \n"
@@ -72,6 +74,21 @@ func formatContent(pieces []parse.Piece) string {
return contentMdStr
}
func formatList(li parse.Piece, depth int) string {
var listMdString string
var prefix string
for j := 0; j < depth; j++ {
prefix += " "
}
if li.Type == parse.U_LIST {
prefix += "- "
} else if li.Type == parse.O_LIST {
prefix += strconv.Itoa(1) + ". " // 写死成1也大丈夫markdown会自动累加序号
}
listMdString += prefix + formatContent(li.Val.([]parse.Piece), depth+1) + " \n"
return listMdString
}
func formatCodeBlock(piece parse.Piece) string {
var codeMdStr string
codeMdStr += "```\n"

View File

@@ -3,5 +3,6 @@ package main
import "github.com/fengxxc/wechatmp2markdown/test"
func main() {
test.Test1()
// test.Test1()
test.Test2()
}

View File

@@ -17,28 +17,33 @@ import (
func parseSection(s *goquery.Selection) []Piece {
var piece []Piece
s.Children().Each(func(i int, s *goquery.Selection) {
var p Piece
var p []Piece
attr := make(map[string]string)
if s.Is("span") {
p = Piece{NORMAL_TEXT, s.Text(), nil}
// p = Piece{NORMAL_TEXT, s.Text(), nil}
p = append(p, Piece{NORMAL_TEXT, s.Text(), nil})
} else if s.Is("a") {
attr["href"], _ = s.Attr("href")
p = Piece{LINK, removeBrAndBlank(s.Text()), attr}
// p = Piece{LINK, removeBrAndBlank(s.Text()), attr}
p = append(p, Piece{LINK, removeBrAndBlank(s.Text()), attr})
} else if s.Is("img") {
attr["src"], _ = s.Attr("data-src")
attr["alt"], _ = s.Attr("alt")
attr["title"], _ = s.Attr("title")
p = Piece{IMAGE, "", attr}
// p = Piece{IMAGE, "", attr}
p = append(p, Piece{IMAGE, "", attr})
} else if s.Is("ol") {
// TODO
p = append(p, parseOl(s)...)
} else if s.Is("ul") {
// TODO
p = append(p, parseUl(s)...)
} else if s.Is("section") {
p = append(p, parseSection(s)...)
} else {
p = Piece{NORMAL_TEXT, s.Text(), nil}
// p = Piece{NORMAL_TEXT, s.Text(), nil}
// TODO
}
// fmt.Printf("%+v\n", t)
piece = append(piece, p)
piece = append(piece, p...)
})
return piece
}
@@ -73,6 +78,24 @@ func parsePre(s *goquery.Selection) []Piece {
return []Piece{p}
}
func parseUl(s *goquery.Selection) []Piece {
var list []Piece
s.Find("li").Each(func(i int, s *goquery.Selection) {
li := Piece{U_LIST, parseSection(s), nil}
list = append(list, li)
})
return list
}
func parseOl(s *goquery.Selection) []Piece {
var list []Piece
s.Find("li").Each(func(i int, s *goquery.Selection) {
li := Piece{O_LIST, parseSection(s), nil}
list = append(list, li)
})
return list
}
func ParseFromReader(r io.Reader) Article {
var article Article
doc, err := goquery.NewDocumentFromReader(r)
@@ -107,18 +130,15 @@ func ParseFromReader(r io.Reader) Article {
// var paragraph Paragraph
if s.Is("pre") || s.Is("section.code-snippet__fix") {
// 代码块
// paragraph = parsePre(s)
pieces = append(pieces, parsePre(s)...)
} else if s.Is("p") || s.Is("section") {
// paragraph = parseSection(s)
pieces = append(pieces, parseSection(s)...)
} else if s.Is("h1") || s.Is("h2") || s.Is("h3") || s.Is("h4") || s.Is("h5") || s.Is("h6") {
// paragraph = parseHeader(s)
pieces = append(pieces, parseHeader(s)...)
} else if s.Is("ol") {
// TODO
pieces = append(pieces, parseOl(s)...)
} else if s.Is("ul") {
// TODO
pieces = append(pieces, parseUl(s)...)
}
// sections = append(sections, paragraph)
pieces = append(pieces, Piece{BR, nil, nil})

20
test/test2.go Normal file
View File

@@ -0,0 +1,20 @@
package test
import (
"encoding/json"
"io/ioutil"
"github.com/fengxxc/wechatmp2markdown/format"
"github.com/fengxxc/wechatmp2markdown/parse"
)
func Test2() {
var articleStruct parse.Article = parse.ParseFromURL("https://mp.weixin.qq.com/s?__biz=MzIzOTU0NTQ0MA==&mid=2247506315&idx=1&sn=1546be4ecece176f669da4eed7076ee2&chksm=e92ae484de5d6d92d93cd68b927fa91e2935a75c9aafc02f294237653ca8a342e8982cabbc1d&cur_album_id=1391790902901014528&scene=189#wechat_redirect")
byteArry, _ := json.MarshalIndent(articleStruct, "", " ")
// fmt.Println(string(byteArry))
ioutil.WriteFile("./test/test2_target.json", byteArry, 0644)
var mdString string = format.Format((articleStruct))
ioutil.WriteFile("./test/test2_target.md", []byte(mdString), 0644)
}