mirror of
https://github.com/NanmiCoder/MediaCrawler.git
synced 2026-06-09 03:17:25 +08:00
improve base config reading command line arg logic
This commit is contained in:
@@ -21,9 +21,6 @@ from .login import KuaishouLogin
|
||||
|
||||
|
||||
class KuaishouCrawler(AbstractCrawler):
|
||||
platform: str
|
||||
login_type: str
|
||||
crawler_type: str
|
||||
context_page: Page
|
||||
ks_client: KuaiShouClient
|
||||
browser_context: BrowserContext
|
||||
@@ -32,13 +29,6 @@ class KuaishouCrawler(AbstractCrawler):
|
||||
self.index_url = "https://www.kuaishou.com"
|
||||
self.user_agent = utils.get_user_agent()
|
||||
|
||||
def init_config(self, platform: str, login_type: str, crawler_type: str, start_page: int, keyword: str):
|
||||
self.platform = platform
|
||||
self.login_type = login_type
|
||||
self.crawler_type = crawler_type
|
||||
self.start_page = start_page
|
||||
self.keyword = keyword
|
||||
|
||||
async def start(self):
|
||||
playwright_proxy_format, httpx_proxy_format = None, None
|
||||
if config.ENABLE_IP_PROXY:
|
||||
@@ -64,7 +54,7 @@ class KuaishouCrawler(AbstractCrawler):
|
||||
self.ks_client = await self.create_ks_client(httpx_proxy_format)
|
||||
if not await self.ks_client.pong():
|
||||
login_obj = KuaishouLogin(
|
||||
login_type=self.login_type,
|
||||
login_type=config.LOGIN_TYPE,
|
||||
login_phone=httpx_proxy_format,
|
||||
browser_context=self.browser_context,
|
||||
context_page=self.context_page,
|
||||
@@ -73,11 +63,11 @@ class KuaishouCrawler(AbstractCrawler):
|
||||
await login_obj.begin()
|
||||
await self.ks_client.update_cookies(browser_context=self.browser_context)
|
||||
|
||||
crawler_type_var.set(self.crawler_type)
|
||||
if self.crawler_type == "search":
|
||||
crawler_type_var.set(config.CRAWLER_TYPE)
|
||||
if config.CRAWLER_TYPE == "search":
|
||||
# Search for notes and retrieve their comment information.
|
||||
await self.search()
|
||||
elif self.crawler_type == "detail":
|
||||
elif config.CRAWLER_TYPE == "detail":
|
||||
# Get the information and comments of the specified post
|
||||
await self.get_specified_videos()
|
||||
else:
|
||||
@@ -90,8 +80,8 @@ class KuaishouCrawler(AbstractCrawler):
|
||||
ks_limit_count = 20 # kuaishou limit page fixed value
|
||||
if config.CRAWLER_MAX_NOTES_COUNT < ks_limit_count:
|
||||
config.CRAWLER_MAX_NOTES_COUNT = ks_limit_count
|
||||
start_page = self.start_page
|
||||
for keyword in self.keyword.split(","):
|
||||
start_page = config.START_PAGE
|
||||
for keyword in config.KEYWORDS.split(","):
|
||||
utils.logger.info(f"[KuaishouCrawler.search] Current search keyword: {keyword}")
|
||||
page = 1
|
||||
while (page - start_page + 1) * ks_limit_count <= config.CRAWLER_MAX_NOTES_COUNT:
|
||||
@@ -238,7 +228,7 @@ class KuaishouCrawler(AbstractCrawler):
|
||||
utils.logger.info("[KuaishouCrawler.launch_browser] Begin create browser context ...")
|
||||
if config.SAVE_LOGIN_STATE:
|
||||
user_data_dir = os.path.join(os.getcwd(), "browser_data",
|
||||
config.USER_DATA_DIR % self.platform) # type: ignore
|
||||
config.USER_DATA_DIR % config.PLATFORM) # type: ignore
|
||||
browser_context = await chromium.launch_persistent_context(
|
||||
user_data_dir=user_data_dir,
|
||||
accept_downloads=True,
|
||||
|
||||
@@ -19,7 +19,7 @@ class KuaishouLogin(AbstractLogin):
|
||||
login_phone: Optional[str] = "",
|
||||
cookie_str: str = ""
|
||||
):
|
||||
self.login_type = login_type
|
||||
config.LOGIN_TYPE = login_type
|
||||
self.browser_context = browser_context
|
||||
self.context_page = context_page
|
||||
self.login_phone = login_phone
|
||||
@@ -28,11 +28,11 @@ class KuaishouLogin(AbstractLogin):
|
||||
async def begin(self):
|
||||
"""Start login xiaohongshu"""
|
||||
utils.logger.info("[KuaishouLogin.begin] Begin login kuaishou ...")
|
||||
if self.login_type == "qrcode":
|
||||
if config.LOGIN_TYPE == "qrcode":
|
||||
await self.login_by_qrcode()
|
||||
elif self.login_type == "phone":
|
||||
elif config.LOGIN_TYPE == "phone":
|
||||
await self.login_by_mobile()
|
||||
elif self.login_type == "cookie":
|
||||
elif config.LOGIN_TYPE == "cookie":
|
||||
await self.login_by_cookies()
|
||||
else:
|
||||
raise ValueError("[KuaishouLogin.begin] Invalid Login Type Currently only supported qrcode or phone or cookie ...")
|
||||
|
||||
Reference in New Issue
Block a user