mirror of
https://github.com/NanmiCoder/MediaCrawler.git
synced 2026-06-06 18:07:26 +08:00
feat: 增加搜索词来源渠道
This commit is contained in:
@@ -12,7 +12,7 @@ from base.base_crawler import AbstractCrawler
|
||||
from proxy.proxy_ip_pool import IpInfoModel, create_ip_pool
|
||||
from store import xhs as xhs_store
|
||||
from tools import utils
|
||||
from var import crawler_type_var
|
||||
from var import crawler_type_var, source_keyword_var
|
||||
|
||||
from .client import XiaoHongShuClient
|
||||
from .exception import DataFetchError
|
||||
@@ -94,6 +94,7 @@ class XiaoHongShuCrawler(AbstractCrawler):
|
||||
config.CRAWLER_MAX_NOTES_COUNT = xhs_limit_count
|
||||
start_page = config.START_PAGE
|
||||
for keyword in config.KEYWORDS.split(","):
|
||||
source_keyword_var.set(keyword)
|
||||
utils.logger.info(f"[XiaoHongShuCrawler.search] Current search keyword: {keyword}")
|
||||
page = 1
|
||||
while (page - start_page + 1) * xhs_limit_count <= config.CRAWLER_MAX_NOTES_COUNT:
|
||||
|
||||
Reference in New Issue
Block a user