chore: update watchlist

This commit is contained in:
Simon Ding
2024-09-04 15:55:15 +08:00
parent e4c111ac2a
commit 5d4429bf7c
5 changed files with 115 additions and 10 deletions

View File

@@ -3,17 +3,28 @@ package douban
import (
"fmt"
"net/http"
"polaris/log"
"polaris/pkg/importlist"
"strconv"
"strings"
"github.com/PuerkitoBio/goquery"
)
type DoulistItem struct {
Name string
ImdbID string
}
const ua = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36"
func ParseDoulist(doulistUrl string) ([]DoulistItem, error) {
res, err := http.Get(doulistUrl)
func ParseDoulist(doulistUrl string) (*importlist.Response, error) {
if !strings.Contains(doulistUrl, "doulist") {
return nil, fmt.Errorf("not doulist")
}
req, err := http.NewRequest("GET", doulistUrl, nil)
if err != nil {
return nil, err
}
req.Header.Set("User-Agent", ua)
res, err := http.DefaultClient.Do(req)
if err != nil {
return nil, err
}
@@ -26,6 +37,64 @@ func ParseDoulist(doulistUrl string) ([]DoulistItem, error) {
if err != nil {
return nil, err
}
doc.Find("")
doc.Find("div[class=doulist-item]").Each(func(i int, selection *goquery.Selection) {
titleDiv := selection.Find("div[class=title]")
link := titleDiv.Find("div>a")
href, ok := link.Attr("href")
if !ok {
return
}
abstract := selection.Find("div[class=abstract]")
lines := strings.Split(abstract.Text(), "\n")
year := 0
for _, l := range lines {
if strings.Contains(l, "年份") {
ppp := strings.Split(l, ":")
if len(ppp) < 2 {
continue
} else {
n := ppp[1]
n1, err := strconv.Atoi(n)
if err != nil {
log.Errorf("convert year number %s to int error: %v", n, err)
continue
}
year = n1
}
}
}
item := importlist.Item{
Title: strings.TrimSpace(link.Text()),
Year: year,
}
_ = item
println(link.Text(), href)
})
return nil, nil
}
func parseDetailPage(url string) (string, error) {
req, err := http.NewRequest("GET", url, nil)
if err != nil {
return "", err
}
req.Header.Set("User-Agent", ua)
res, err := http.DefaultClient.Do(req)
if err != nil {
return "", err
}
defer res.Body.Close()
if res.StatusCode != 200 {
return "", fmt.Errorf("status code error: %d %s", res.StatusCode, res.Status)
}
doc, err := goquery.NewDocumentFromReader(res.Body)
if err != nil {
return "", err
}
_ = doc
return "", nil
}

View File

@@ -0,0 +1,11 @@
package douban
import (
"polaris/log"
"testing"
)
func TestParseDoulist(t *testing.T) {
r, err := ParseDoulist("https://www.douban.com/doulist/166422/")
log.Info(r, err)
}

View File

@@ -2,6 +2,7 @@ package importlist
type Item struct {
Title string
Year int
ImdbID string
TvdbID string
TmdbID string