diff --git a/format/format.go b/format/format.go index 13dc55b..a0e8060 100644 --- a/format/format.go +++ b/format/format.go @@ -14,7 +14,7 @@ func Format(article parse.Article) string { result += metaMdStr var tagsMdStr string = formatTags(article.Tags) result += tagsMdStr - var content string = formatContent(article.Content) + var content string = formatContent(article.Content, 0) result += content return result } @@ -36,7 +36,7 @@ func formatTags(tags string) string { return tags + " \n" // TODO } -func formatContent(pieces []parse.Piece) string { +func formatContent(pieces []parse.Piece, depth int) string { var contentMdStr string for _, piece := range pieces { var pieceMdStr string @@ -62,7 +62,9 @@ func formatContent(pieces []parse.Piece) string { pieceMdStr = formatCodeBlock(piece) case parse.BLOCK_QUOTES: case parse.O_LIST: + pieceMdStr = formatList(piece, depth) case parse.U_LIST: + pieceMdStr = formatList(piece, depth) case parse.HR: case parse.BR: pieceMdStr = " \n" @@ -72,6 +74,21 @@ func formatContent(pieces []parse.Piece) string { return contentMdStr } +func formatList(li parse.Piece, depth int) string { + var listMdString string + var prefix string + for j := 0; j < depth; j++ { + prefix += " " + } + if li.Type == parse.U_LIST { + prefix += "- " + } else if li.Type == parse.O_LIST { + prefix += strconv.Itoa(1) + ". " // 写死成1也大丈夫,markdown会自动累加序号 + } + listMdString += prefix + formatContent(li.Val.([]parse.Piece), depth+1) + " \n" + return listMdString +} + func formatCodeBlock(piece parse.Piece) string { var codeMdStr string codeMdStr += "```\n" diff --git a/main.go b/main.go index f516d8d..9a1eb29 100644 --- a/main.go +++ b/main.go @@ -3,5 +3,6 @@ package main import "github.com/fengxxc/wechatmp2markdown/test" func main() { - test.Test1() + // test.Test1() + test.Test2() } diff --git a/parse/parse.go b/parse/parse.go index 81a83cf..f409b8b 100644 --- a/parse/parse.go +++ b/parse/parse.go @@ -17,28 +17,33 @@ import ( func parseSection(s *goquery.Selection) []Piece { var piece []Piece s.Children().Each(func(i int, s *goquery.Selection) { - var p Piece + var p []Piece attr := make(map[string]string) if s.Is("span") { - p = Piece{NORMAL_TEXT, s.Text(), nil} + // p = Piece{NORMAL_TEXT, s.Text(), nil} + p = append(p, Piece{NORMAL_TEXT, s.Text(), nil}) } else if s.Is("a") { attr["href"], _ = s.Attr("href") - p = Piece{LINK, removeBrAndBlank(s.Text()), attr} + // p = Piece{LINK, removeBrAndBlank(s.Text()), attr} + p = append(p, Piece{LINK, removeBrAndBlank(s.Text()), attr}) } else if s.Is("img") { attr["src"], _ = s.Attr("data-src") attr["alt"], _ = s.Attr("alt") attr["title"], _ = s.Attr("title") - p = Piece{IMAGE, "", attr} + // p = Piece{IMAGE, "", attr} + p = append(p, Piece{IMAGE, "", attr}) } else if s.Is("ol") { - // TODO + p = append(p, parseOl(s)...) } else if s.Is("ul") { - // TODO + p = append(p, parseUl(s)...) + } else if s.Is("section") { + p = append(p, parseSection(s)...) } else { - p = Piece{NORMAL_TEXT, s.Text(), nil} + // p = Piece{NORMAL_TEXT, s.Text(), nil} // TODO } // fmt.Printf("%+v\n", t) - piece = append(piece, p) + piece = append(piece, p...) }) return piece } @@ -73,6 +78,24 @@ func parsePre(s *goquery.Selection) []Piece { return []Piece{p} } +func parseUl(s *goquery.Selection) []Piece { + var list []Piece + s.Find("li").Each(func(i int, s *goquery.Selection) { + li := Piece{U_LIST, parseSection(s), nil} + list = append(list, li) + }) + return list +} + +func parseOl(s *goquery.Selection) []Piece { + var list []Piece + s.Find("li").Each(func(i int, s *goquery.Selection) { + li := Piece{O_LIST, parseSection(s), nil} + list = append(list, li) + }) + return list +} + func ParseFromReader(r io.Reader) Article { var article Article doc, err := goquery.NewDocumentFromReader(r) @@ -107,18 +130,15 @@ func ParseFromReader(r io.Reader) Article { // var paragraph Paragraph if s.Is("pre") || s.Is("section.code-snippet__fix") { // 代码块 - // paragraph = parsePre(s) pieces = append(pieces, parsePre(s)...) } else if s.Is("p") || s.Is("section") { - // paragraph = parseSection(s) pieces = append(pieces, parseSection(s)...) } else if s.Is("h1") || s.Is("h2") || s.Is("h3") || s.Is("h4") || s.Is("h5") || s.Is("h6") { - // paragraph = parseHeader(s) pieces = append(pieces, parseHeader(s)...) } else if s.Is("ol") { - // TODO + pieces = append(pieces, parseOl(s)...) } else if s.Is("ul") { - // TODO + pieces = append(pieces, parseUl(s)...) } // sections = append(sections, paragraph) pieces = append(pieces, Piece{BR, nil, nil}) diff --git a/test/test2.go b/test/test2.go new file mode 100644 index 0000000..be7652b --- /dev/null +++ b/test/test2.go @@ -0,0 +1,20 @@ +package test + +import ( + "encoding/json" + "io/ioutil" + + "github.com/fengxxc/wechatmp2markdown/format" + "github.com/fengxxc/wechatmp2markdown/parse" +) + +func Test2() { + var articleStruct parse.Article = parse.ParseFromURL("https://mp.weixin.qq.com/s?__biz=MzIzOTU0NTQ0MA==&mid=2247506315&idx=1&sn=1546be4ecece176f669da4eed7076ee2&chksm=e92ae484de5d6d92d93cd68b927fa91e2935a75c9aafc02f294237653ca8a342e8982cabbc1d&cur_album_id=1391790902901014528&scene=189#wechat_redirect") + + byteArry, _ := json.MarshalIndent(articleStruct, "", " ") + // fmt.Println(string(byteArry)) + ioutil.WriteFile("./test/test2_target.json", byteArry, 0644) + + var mdString string = format.Format((articleStruct)) + ioutil.WriteFile("./test/test2_target.md", []byte(mdString), 0644) +}