From 428ee34c3b33d76096c00cdca2345b315a384c54 Mon Sep 17 00:00:00 2001 From: fengxxc <744320491@qq.com> Date: Sun, 30 Oct 2022 20:37:05 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8D=E6=96=87=E7=AB=A0meta?= =?UTF-8?q?=E9=83=A8=E5=88=86=E7=9A=84=E8=A7=A3=E6=9E=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- parse/parse.go | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/parse/parse.go b/parse/parse.go index d29ed70..c69db8e 100644 --- a/parse/parse.go +++ b/parse/parse.go @@ -11,12 +11,14 @@ import ( "regexp" "strconv" "strings" + "time" "github.com/PuerkitoBio/goquery" ) func parseSection(s *goquery.Selection) []Piece { var pieces []Piece + pieces = append(pieces, Piece{BR, nil, nil}) s.Contents().Each(func(i int, sc *goquery.Selection) { attr := make(map[string]string) if sc.Is("a") { @@ -111,10 +113,12 @@ func parseMeta(s *goquery.Selection) []string { if sc.Is("#profileBt") { res = append(res, removeBrAndBlank(sc.Find("#js_name").Text())) } else { - // t := sc.Text() - t := sc.Nodes[0].Data - // t, _ := sc.Html() - res = append(res, t) + style, exists := sc.Attr("style") + if !(exists && strings.Contains(style, "display: none;")) { + // t := sc.Nodes[0].Data + t := strings.TrimSpace(sc.Text()) + res = append(res, t) + } } }) return res @@ -133,10 +137,20 @@ func ParseFromReader(r io.Reader) Article { attr := map[string]string{"level": "1"} article.Title = Piece{HEADER, removeBrAndBlank(title), attr} - // meta 细节待完善 + // meta meta := mainContent.Find("#meta_content") metastring := parseMeta(meta) article.Meta = metastring + // 从js中找到发布时间 + re, _ := regexp.Compile("var ct = \"([0-9]+)\"") + findstrs := re.FindStringSubmatch(doc.Find("script").Text()) + if findstrs != nil && len(findstrs) > 1 { + var createTime string = findstrs[1] + timestamp, _ := strconv.Atoi(createTime) + time := time.Unix(int64(timestamp), 0) + // fmt.Println(time) + article.Meta = append(article.Meta, time.Format("2006-01-02 15:04")) + } // tags 细节待完善 tags := mainContent.Find("#js_tags").Text()