From ab6c8488d5f4ff28c0f2a4c8957ad71633e40784 Mon Sep 17 00:00:00 2001 From: fengxxc Date: Sun, 28 Nov 2021 20:23:26 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=B7=BB=E5=8A=A0=E4=BB=8E=E6=96=87?= =?UTF-8?q?=E4=BB=B6=E8=AF=BB=E5=8F=96=E5=8A=9F=E8=83=BD=EF=BC=8C=E6=B7=BB?= =?UTF-8?q?=E5=8A=A0=E6=B5=8B=E8=AF=95=E7=94=A8=E4=BE=8B=E6=96=87=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- main.go | 4 +-- parse/parse.go | 81 ++++++++++--------------------------------------- test/test1.go | 13 ++++++++ test/test1.html | 69 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 100 insertions(+), 67 deletions(-) create mode 100644 test/test1.go create mode 100644 test/test1.html diff --git a/main.go b/main.go index b578fa1..f516d8d 100644 --- a/main.go +++ b/main.go @@ -1,7 +1,7 @@ package main -import "github.com/fengxxc/wechatmp2markdown/parse" +import "github.com/fengxxc/wechatmp2markdown/test" func main() { - parse.Test() + test.Test1() } diff --git a/parse/parse.go b/parse/parse.go index 9bb0ee2..3a4b58c 100644 --- a/parse/parse.go +++ b/parse/parse.go @@ -1,9 +1,12 @@ package parse import ( + "bytes" "fmt" "io" + "io/ioutil" "log" + "os" "regexp" "strings" @@ -92,6 +95,19 @@ func ParseFromHTMLString(s string) Article { return ParseFromReader(strings.NewReader(s)) } +func ParseFromHTMLFile(filepath string) Article { + file, err := os.Open(filepath) + if err != nil { + panic(err) + } + defer file.Close() + content, err2 := ioutil.ReadAll(file) + if err2 != nil { + panic(err) + } + return ParseFromReader(bytes.NewReader(content)) +} + func removeBrAndBlank(s string) string { regstr := "\\s{2,}" reg, _ := regexp.Compile(regstr) @@ -104,68 +120,3 @@ func removeBrAndBlank(s string) string { } return strings.Replace(string(sb), "\n", " ", -1) } - -var testHTML = ` -
-
-

终于有人喷我了!

-
- - - 闪客sun - - - - 低并发编程 - - - 2021-11-26 -
- -
-

写公众号一年了,一直盼着能有人喷喷我,今天终于被我碰到了!

-
‍‍
-
还是我的一位读者发现的,在推特上,于是分享给了我。
-
图片
-
- 这是喷我最近的一个新系列, - - 你管这破玩意叫操作系统源码 - - ,正愁找不到借口推广一波呢,这不就给我来素材了。
-
-
-
-
-` - -func Test() { - res := ParseFromHTMLString(testHTML) - fmt.Println("---------------------------------------") - fmt.Printf("%+v\n", res) -} diff --git a/test/test1.go b/test/test1.go new file mode 100644 index 0000000..53331f8 --- /dev/null +++ b/test/test1.go @@ -0,0 +1,13 @@ +package test + +import ( + "fmt" + + "github.com/fengxxc/wechatmp2markdown/parse" +) + +func Test1() { + res := parse.ParseFromHTMLFile("./test/test1.html") + fmt.Println("-------------------test1.html-------------------") + fmt.Printf("%+v\n", res) +} diff --git a/test/test1.html b/test/test1.html new file mode 100644 index 0000000..3bd0faa --- /dev/null +++ b/test/test1.html @@ -0,0 +1,69 @@ + + + + + + + + test1 + + +
+
+

这里是文章标题

+
+ + + 这里是作者 + + + + 这里是公众号名 + + + 2021-01-01 +
+ +
+

正文第一行

+
‍‍
+
正文第二行。
+
图片
+
+ 正文第三行,part1,文本 + + 正文第三行,part2,链接 + + ,正文第三行,part3,文本。
+
+
+
+
+ + + \ No newline at end of file