mirror of
https://github.com/simon-ding/polaris.git
synced 2026-06-08 19:17:53 +08:00
WIP: douban wish list
This commit is contained in:
@@ -2,29 +2,25 @@ package douban
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"polaris/log"
|
||||
"polaris/pkg/importlist"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"github.com/gocolly/colly"
|
||||
)
|
||||
|
||||
const ua = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36"
|
||||
const ua = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36"
|
||||
|
||||
func ParseDoulist(doulistUrl string) (*importlist.Response, error) {
|
||||
if !strings.Contains(doulistUrl, "doulist") {
|
||||
return nil, fmt.Errorf("not doulist")
|
||||
}
|
||||
|
||||
req, err := http.NewRequest("GET", doulistUrl, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
req.Header.Set("User-Agent", ua)
|
||||
|
||||
res, err := http.DefaultClient.Do(req)
|
||||
res, err := doHttpReq("GET", doulistUrl, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -85,13 +81,8 @@ func ParseDoulist(doulistUrl string) (*importlist.Response, error) {
|
||||
|
||||
func parseDetailPage(url string) (string, error) {
|
||||
println(url)
|
||||
req, err := http.NewRequest("GET", url, nil)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
req.Header.Set("User-Agent", ua)
|
||||
|
||||
res, err := http.DefaultClient.Do(req)
|
||||
res, err := doHttpReq("GET", url, nil)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
@@ -115,3 +106,80 @@ func parseDetailPage(url string) (string, error) {
|
||||
_ = doc
|
||||
return "", nil
|
||||
}
|
||||
func NewDoubanWishlist(personId string) *DoubanWishlist {
|
||||
return &DoubanWishlist{PersonId: personId}
|
||||
}
|
||||
|
||||
type DoubanWishlist struct {
|
||||
PersonId string
|
||||
}
|
||||
|
||||
const wishlistUrl = "https://movie.douban.com/people/%s/wish?sort=time&start=%d&mode=grid&tags_sort=count"
|
||||
|
||||
func (d *DoubanWishlist) GetWishlist(page int) (*importlist.Response, error) {
|
||||
c := colly.NewCollector(colly.UserAgent(ua))
|
||||
c.Limit(&colly.LimitRule{
|
||||
DomainRegexp: "*",
|
||||
Delay: 10 * time.Second,
|
||||
RandomDelay: 2 * time.Second,
|
||||
})
|
||||
url := fmt.Sprintf(wishlistUrl, d.PersonId, (page-1)*15)
|
||||
c.OnHTML("div[class='item comment-item']", func(e *colly.HTMLElement) {
|
||||
if !strings.HasPrefix(e.Request.URL.String(), "https://movie.douban.com/people") {
|
||||
return
|
||||
}
|
||||
e.DOM.Find("div[class='pic'] a[title]").Each(func(i int, selection *goquery.Selection) {
|
||||
println(selection.Attr("href"))
|
||||
url, ok := selection.Attr("href")
|
||||
if ok {
|
||||
c.Visit(url)
|
||||
}
|
||||
})
|
||||
})
|
||||
|
||||
c.OnHTML("#content", func(h *colly.HTMLElement) {
|
||||
var item importlist.Item
|
||||
h.DOM.Find("h1").Each(func(i int, selection *goquery.Selection) {
|
||||
selection.Find("span[property]").Each(func(i int, selection *goquery.Selection) {
|
||||
println(selection.Text())
|
||||
item.Title = selection.Text()
|
||||
})
|
||||
selection.Find("span[class='year']").Each(func(i int, selection *goquery.Selection) {
|
||||
n, _ := strconv.Atoi(selection.Text())
|
||||
item.Year = n
|
||||
})
|
||||
|
||||
})
|
||||
h.DOM.Find("#info").Each(func(i int, s *goquery.Selection) {
|
||||
info := strings.TrimSpace(s.Text())
|
||||
lines := strings.Split(info, "\n")
|
||||
if len(lines) == 0 {
|
||||
return
|
||||
}
|
||||
last := lines[len(lines)-1]
|
||||
if !strings.HasPrefix(strings.ToLower(last), "imdb") {
|
||||
return
|
||||
}
|
||||
ss := strings.Split(last, ":")
|
||||
for _, p := range ss {
|
||||
p := strings.TrimSpace(strings.ToLower(p))
|
||||
if strings.HasPrefix(p, "tt") {
|
||||
item.ImdbID = p
|
||||
}
|
||||
}
|
||||
})
|
||||
log.Info(item)
|
||||
})
|
||||
|
||||
return nil, c.Visit(url)
|
||||
}
|
||||
|
||||
func doHttpReq(method, url string, body io.Reader) (*http.Response, error) {
|
||||
|
||||
req, err := http.NewRequest(method, url, body)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
req.Header.Set("User-Agent", ua)
|
||||
return http.DefaultClient.Do(req)
|
||||
}
|
||||
|
||||
@@ -9,3 +9,10 @@ func TestParseDoulist(t *testing.T) {
|
||||
r, err := ParseDoulist("https://www.douban.com/doulist/81580/")
|
||||
log.Info(r, err)
|
||||
}
|
||||
|
||||
|
||||
func Test111(t *testing.T) {
|
||||
d := NewDoubanWishlist("69894889")
|
||||
_, err := d.GetWishlist(1)
|
||||
log.Infof("err: %v", err)
|
||||
}
|
||||
Reference in New Issue
Block a user