mirror of
https://github.com/fengxxc/wechatmp2markdown.git
synced 2026-06-02 07:57:28 +08:00
feat: 有/无序列表的支持,请求公众号url的支持
This commit is contained in:
@@ -14,7 +14,7 @@ func Format(article parse.Article) string {
|
|||||||
result += metaMdStr
|
result += metaMdStr
|
||||||
var tagsMdStr string = formatTags(article.Tags)
|
var tagsMdStr string = formatTags(article.Tags)
|
||||||
result += tagsMdStr
|
result += tagsMdStr
|
||||||
var content string = formatContent(article.Content)
|
var content string = formatContent(article.Content, 0)
|
||||||
result += content
|
result += content
|
||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
@@ -36,7 +36,7 @@ func formatTags(tags string) string {
|
|||||||
return tags + " \n" // TODO
|
return tags + " \n" // TODO
|
||||||
}
|
}
|
||||||
|
|
||||||
func formatContent(pieces []parse.Piece) string {
|
func formatContent(pieces []parse.Piece, depth int) string {
|
||||||
var contentMdStr string
|
var contentMdStr string
|
||||||
for _, piece := range pieces {
|
for _, piece := range pieces {
|
||||||
var pieceMdStr string
|
var pieceMdStr string
|
||||||
@@ -62,7 +62,9 @@ func formatContent(pieces []parse.Piece) string {
|
|||||||
pieceMdStr = formatCodeBlock(piece)
|
pieceMdStr = formatCodeBlock(piece)
|
||||||
case parse.BLOCK_QUOTES:
|
case parse.BLOCK_QUOTES:
|
||||||
case parse.O_LIST:
|
case parse.O_LIST:
|
||||||
|
pieceMdStr = formatList(piece, depth)
|
||||||
case parse.U_LIST:
|
case parse.U_LIST:
|
||||||
|
pieceMdStr = formatList(piece, depth)
|
||||||
case parse.HR:
|
case parse.HR:
|
||||||
case parse.BR:
|
case parse.BR:
|
||||||
pieceMdStr = " \n"
|
pieceMdStr = " \n"
|
||||||
@@ -72,6 +74,21 @@ func formatContent(pieces []parse.Piece) string {
|
|||||||
return contentMdStr
|
return contentMdStr
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func formatList(li parse.Piece, depth int) string {
|
||||||
|
var listMdString string
|
||||||
|
var prefix string
|
||||||
|
for j := 0; j < depth; j++ {
|
||||||
|
prefix += " "
|
||||||
|
}
|
||||||
|
if li.Type == parse.U_LIST {
|
||||||
|
prefix += "- "
|
||||||
|
} else if li.Type == parse.O_LIST {
|
||||||
|
prefix += strconv.Itoa(1) + ". " // 写死成1也大丈夫,markdown会自动累加序号
|
||||||
|
}
|
||||||
|
listMdString += prefix + formatContent(li.Val.([]parse.Piece), depth+1) + " \n"
|
||||||
|
return listMdString
|
||||||
|
}
|
||||||
|
|
||||||
func formatCodeBlock(piece parse.Piece) string {
|
func formatCodeBlock(piece parse.Piece) string {
|
||||||
var codeMdStr string
|
var codeMdStr string
|
||||||
codeMdStr += "```\n"
|
codeMdStr += "```\n"
|
||||||
|
|||||||
3
main.go
3
main.go
@@ -3,5 +3,6 @@ package main
|
|||||||
import "github.com/fengxxc/wechatmp2markdown/test"
|
import "github.com/fengxxc/wechatmp2markdown/test"
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
test.Test1()
|
// test.Test1()
|
||||||
|
test.Test2()
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -17,28 +17,33 @@ import (
|
|||||||
func parseSection(s *goquery.Selection) []Piece {
|
func parseSection(s *goquery.Selection) []Piece {
|
||||||
var piece []Piece
|
var piece []Piece
|
||||||
s.Children().Each(func(i int, s *goquery.Selection) {
|
s.Children().Each(func(i int, s *goquery.Selection) {
|
||||||
var p Piece
|
var p []Piece
|
||||||
attr := make(map[string]string)
|
attr := make(map[string]string)
|
||||||
if s.Is("span") {
|
if s.Is("span") {
|
||||||
p = Piece{NORMAL_TEXT, s.Text(), nil}
|
// p = Piece{NORMAL_TEXT, s.Text(), nil}
|
||||||
|
p = append(p, Piece{NORMAL_TEXT, s.Text(), nil})
|
||||||
} else if s.Is("a") {
|
} else if s.Is("a") {
|
||||||
attr["href"], _ = s.Attr("href")
|
attr["href"], _ = s.Attr("href")
|
||||||
p = Piece{LINK, removeBrAndBlank(s.Text()), attr}
|
// p = Piece{LINK, removeBrAndBlank(s.Text()), attr}
|
||||||
|
p = append(p, Piece{LINK, removeBrAndBlank(s.Text()), attr})
|
||||||
} else if s.Is("img") {
|
} else if s.Is("img") {
|
||||||
attr["src"], _ = s.Attr("data-src")
|
attr["src"], _ = s.Attr("data-src")
|
||||||
attr["alt"], _ = s.Attr("alt")
|
attr["alt"], _ = s.Attr("alt")
|
||||||
attr["title"], _ = s.Attr("title")
|
attr["title"], _ = s.Attr("title")
|
||||||
p = Piece{IMAGE, "", attr}
|
// p = Piece{IMAGE, "", attr}
|
||||||
|
p = append(p, Piece{IMAGE, "", attr})
|
||||||
} else if s.Is("ol") {
|
} else if s.Is("ol") {
|
||||||
// TODO
|
p = append(p, parseOl(s)...)
|
||||||
} else if s.Is("ul") {
|
} else if s.Is("ul") {
|
||||||
// TODO
|
p = append(p, parseUl(s)...)
|
||||||
|
} else if s.Is("section") {
|
||||||
|
p = append(p, parseSection(s)...)
|
||||||
} else {
|
} else {
|
||||||
p = Piece{NORMAL_TEXT, s.Text(), nil}
|
// p = Piece{NORMAL_TEXT, s.Text(), nil}
|
||||||
// TODO
|
// TODO
|
||||||
}
|
}
|
||||||
// fmt.Printf("%+v\n", t)
|
// fmt.Printf("%+v\n", t)
|
||||||
piece = append(piece, p)
|
piece = append(piece, p...)
|
||||||
})
|
})
|
||||||
return piece
|
return piece
|
||||||
}
|
}
|
||||||
@@ -73,6 +78,24 @@ func parsePre(s *goquery.Selection) []Piece {
|
|||||||
return []Piece{p}
|
return []Piece{p}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func parseUl(s *goquery.Selection) []Piece {
|
||||||
|
var list []Piece
|
||||||
|
s.Find("li").Each(func(i int, s *goquery.Selection) {
|
||||||
|
li := Piece{U_LIST, parseSection(s), nil}
|
||||||
|
list = append(list, li)
|
||||||
|
})
|
||||||
|
return list
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseOl(s *goquery.Selection) []Piece {
|
||||||
|
var list []Piece
|
||||||
|
s.Find("li").Each(func(i int, s *goquery.Selection) {
|
||||||
|
li := Piece{O_LIST, parseSection(s), nil}
|
||||||
|
list = append(list, li)
|
||||||
|
})
|
||||||
|
return list
|
||||||
|
}
|
||||||
|
|
||||||
func ParseFromReader(r io.Reader) Article {
|
func ParseFromReader(r io.Reader) Article {
|
||||||
var article Article
|
var article Article
|
||||||
doc, err := goquery.NewDocumentFromReader(r)
|
doc, err := goquery.NewDocumentFromReader(r)
|
||||||
@@ -107,18 +130,15 @@ func ParseFromReader(r io.Reader) Article {
|
|||||||
// var paragraph Paragraph
|
// var paragraph Paragraph
|
||||||
if s.Is("pre") || s.Is("section.code-snippet__fix") {
|
if s.Is("pre") || s.Is("section.code-snippet__fix") {
|
||||||
// 代码块
|
// 代码块
|
||||||
// paragraph = parsePre(s)
|
|
||||||
pieces = append(pieces, parsePre(s)...)
|
pieces = append(pieces, parsePre(s)...)
|
||||||
} else if s.Is("p") || s.Is("section") {
|
} else if s.Is("p") || s.Is("section") {
|
||||||
// paragraph = parseSection(s)
|
|
||||||
pieces = append(pieces, parseSection(s)...)
|
pieces = append(pieces, parseSection(s)...)
|
||||||
} else if s.Is("h1") || s.Is("h2") || s.Is("h3") || s.Is("h4") || s.Is("h5") || s.Is("h6") {
|
} else if s.Is("h1") || s.Is("h2") || s.Is("h3") || s.Is("h4") || s.Is("h5") || s.Is("h6") {
|
||||||
// paragraph = parseHeader(s)
|
|
||||||
pieces = append(pieces, parseHeader(s)...)
|
pieces = append(pieces, parseHeader(s)...)
|
||||||
} else if s.Is("ol") {
|
} else if s.Is("ol") {
|
||||||
// TODO
|
pieces = append(pieces, parseOl(s)...)
|
||||||
} else if s.Is("ul") {
|
} else if s.Is("ul") {
|
||||||
// TODO
|
pieces = append(pieces, parseUl(s)...)
|
||||||
}
|
}
|
||||||
// sections = append(sections, paragraph)
|
// sections = append(sections, paragraph)
|
||||||
pieces = append(pieces, Piece{BR, nil, nil})
|
pieces = append(pieces, Piece{BR, nil, nil})
|
||||||
|
|||||||
20
test/test2.go
Normal file
20
test/test2.go
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
package test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"io/ioutil"
|
||||||
|
|
||||||
|
"github.com/fengxxc/wechatmp2markdown/format"
|
||||||
|
"github.com/fengxxc/wechatmp2markdown/parse"
|
||||||
|
)
|
||||||
|
|
||||||
|
func Test2() {
|
||||||
|
var articleStruct parse.Article = parse.ParseFromURL("https://mp.weixin.qq.com/s?__biz=MzIzOTU0NTQ0MA==&mid=2247506315&idx=1&sn=1546be4ecece176f669da4eed7076ee2&chksm=e92ae484de5d6d92d93cd68b927fa91e2935a75c9aafc02f294237653ca8a342e8982cabbc1d&cur_album_id=1391790902901014528&scene=189#wechat_redirect")
|
||||||
|
|
||||||
|
byteArry, _ := json.MarshalIndent(articleStruct, "", " ")
|
||||||
|
// fmt.Println(string(byteArry))
|
||||||
|
ioutil.WriteFile("./test/test2_target.json", byteArry, 0644)
|
||||||
|
|
||||||
|
var mdString string = format.Format((articleStruct))
|
||||||
|
ioutil.WriteFile("./test/test2_target.md", []byte(mdString), 0644)
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user