Files
polaris/pkg/importlist/douban/douban.go
Simon Ding f5c977224b fix
2024-09-20 14:27:49 +08:00

101 lines
2.2 KiB
Go

package douban
import (
"fmt"
"net/http"
"polaris/log"
"polaris/pkg/importlist"
"strconv"
"strings"
"github.com/PuerkitoBio/goquery"
)
const ua = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36"
func ParseDoulist(doulistUrl string) (*importlist.Response, error) {
if !strings.Contains(doulistUrl, "doulist") {
return nil, fmt.Errorf("not doulist")
}
req, err := http.NewRequest("GET", doulistUrl, nil)
if err != nil {
return nil, err
}
req.Header.Set("User-Agent", ua)
res, err := http.DefaultClient.Do(req)
if err != nil {
return nil, err
}
defer res.Body.Close()
if res.StatusCode != 200 {
return nil, fmt.Errorf("status code error: %d %s", res.StatusCode, res.Status)
}
doc, err := goquery.NewDocumentFromReader(res.Body)
if err != nil {
return nil, err
}
doc.Find("div[class=doulist-item]").Each(func(i int, selection *goquery.Selection) {
titleDiv := selection.Find("div[class=title]")
link := titleDiv.Find("div>a")
href, ok := link.Attr("href")
if !ok {
return
}
abstract := selection.Find("div[class=abstract]")
lines := strings.Split(abstract.Text(), "\n")
year := 0
for _, l := range lines {
if strings.Contains(l, "年份") {
ppp := strings.Split(l, ":")
if len(ppp) < 2 {
continue
} else {
n := ppp[1]
n1, err := strconv.Atoi(strings.TrimSpace(n))
if err != nil {
log.Errorf("convert year number %s to int error: %v", n, err)
continue
}
year = n1
}
}
}
item := importlist.Item{
Title: strings.TrimSpace(link.Text()),
Year: year,
}
_ = item
println(link.Text(), href)
})
return nil, nil
}
func parseDetailPage(url string) (string, error) {
req, err := http.NewRequest("GET", url, nil)
if err != nil {
return "", err
}
req.Header.Set("User-Agent", ua)
res, err := http.DefaultClient.Do(req)
if err != nil {
return "", err
}
defer res.Body.Close()
if res.StatusCode != 200 {
return "", fmt.Errorf("status code error: %d %s", res.StatusCode, res.Status)
}
doc, err := goquery.NewDocumentFromReader(res.Body)
if err != nil {
return "", err
}
_ = doc
return "", nil
}