refactor: 优化抖音Crawler部分代码

fix: 日志初始化错误修复
This commit is contained in:
Relakkes
2023-07-15 21:30:12 +08:00
parent dad8d56ab5
commit 2398a17e21
10 changed files with 186 additions and 152 deletions

View File

@@ -8,13 +8,14 @@ from tenacity import (
retry,
stop_after_attempt,
wait_fixed,
retry_if_result
retry_if_result,
RetryError
)
from playwright.async_api import Page, TimeoutError as PlaywrightTimeoutError
from playwright.async_api import BrowserContext
import config
from tools import utils, easing
from tools import utils
from base.base_crawler import AbstractLogin
@@ -54,21 +55,22 @@ class DouYinLogin(AbstractLogin):
raise ValueError("Invalid Login Type Currently only supported qrcode or phone ...")
# 如果页面重定向到滑动验证码页面,需要再次滑动滑块
await asyncio.sleep(3)
await asyncio.sleep(6)
current_page_title = await self.context_page.title()
if "验证码中间页" in current_page_title:
await self.check_page_display_slider(move_step=3, slider_level="hard")
# check login state
logging.info(f"login finished then check login state ...")
login_flag: bool = await self.check_login_state()
if not login_flag:
logging.info("login failed please confirm ...")
utils.logger.info(f"login finished then check login state ...")
try:
await self.check_login_state()
except RetryError:
utils.logger.info("login failed please confirm ...")
sys.exit()
# wait for redirect
wait_redirect_seconds = 5
logging.info(f"Login successful then wait for {wait_redirect_seconds} seconds redirect ...")
utils.logger.info(f"Login successful then wait for {wait_redirect_seconds} seconds redirect ...")
await asyncio.sleep(wait_redirect_seconds)
@retry(stop=stop_after_attempt(20), wait=wait_fixed(1), retry=retry_if_result(lambda value: value is False))
@@ -88,31 +90,31 @@ class DouYinLogin(AbstractLogin):
await self.context_page.wait_for_selector(dialog_selector, timeout=1000 * 10)
except Exception as e:
logging.error(f"login dialog box does not pop up automatically, error: {e}")
logging.info("login dialog box does not pop up automatically, we will manually click the login button")
utils.logger.info("login dialog box does not pop up automatically, we will manually click the login button")
login_button_ele = self.context_page.locator("xpath=//p[text() = '登录']")
await login_button_ele.click()
await asyncio.sleep(0.5)
async def login_by_qrcode(self):
logging.info("Begin login douyin by qrcode...")
utils.logger.info("Begin login douyin by qrcode...")
qrcode_img_selector = "xpath=//article[@class='web-login']//img"
base64_qrcode_img = await utils.find_login_qrcode(
self.context_page,
selector=qrcode_img_selector
)
if not base64_qrcode_img:
logging.info("login qrcode not found please confirm ...")
utils.logger.info("login qrcode not found please confirm ...")
sys.exit()
# show login qrcode
# utils.show_qrcode(base64_qrcode_img)
# utils.show_qrcode(base64_qrcode_img)
partial_show_qrcode = functools.partial(utils.show_qrcode, base64_qrcode_img)
asyncio.get_running_loop().run_in_executor(executor=None, func=partial_show_qrcode)
utils.show_qrcode(base64_qrcode_img)
await asyncio.sleep(2)
async def login_by_mobile(self):
logging.info("Begin login douyin by mobile ...")
utils.logger.info("Begin login douyin by mobile ...")
mobile_tap_ele = self.context_page.locator("xpath=//li[text() = '验证码登录']")
await mobile_tap_ele.click()
await self.context_page.wait_for_selector("xpath=//article[@class='web-login-mobile-code']")
@@ -128,7 +130,7 @@ class DouYinLogin(AbstractLogin):
redis_obj = aioredis.from_url(url=config.REDIS_DB_HOST, password=config.REDIS_DB_PWD, decode_responses=True)
max_get_sms_code_time = 60 * 2 # 最长获取验证码的时间为2分钟
while max_get_sms_code_time > 0:
logging.info(f"get douyin sms code from redis remaining time {max_get_sms_code_time}s ...")
utils.logger.info(f"get douyin sms code from redis remaining time {max_get_sms_code_time}s ...")
await asyncio.sleep(1)
sms_code_key = f"dy_{self.login_phone}"
sms_code_value = await redis_obj.get(sms_code_key)
@@ -170,20 +172,20 @@ class DouYinLogin(AbstractLogin):
# 如果滑块滑动慢了,或者验证失败了,会提示操作过慢,这里点一下刷新按钮
page_content = await self.context_page.content()
if "操作过慢" in page_content or "提示重新操作" in page_content:
logging.info("slider verify failed, retry ...")
utils.logger.info("slider verify failed, retry ...")
await self.context_page.click(selector="//a[contains(@class, 'secsdk_captcha_refresh')]")
continue
# 滑动成功后,等待滑块消失
await self.context_page.wait_for_selector(selector=back_selector, state="hidden", timeout=1000)
# 如果滑块消失了,说明验证成功了,跳出循环,如果没有消失,说明验证失败了,上面这一行代码会抛出异常被捕获后继续循环滑动验证码
logging.info("slider verify success ...")
utils.logger.info("slider verify success ...")
slider_verify_success = True
except Exception as e:
logging.error(f"slider verify failed, error: {e}")
await asyncio.sleep(1)
max_slider_try_times -= 1
logging.info(f"remaining slider try times: {max_slider_try_times}")
utils.logger.info(f"remaining slider try times: {max_slider_try_times}")
continue
async def move_slider(self, back_selector: str, gap_selector: str, move_step: int = 10, slider_level="easy"):
@@ -240,7 +242,7 @@ class DouYinLogin(AbstractLogin):
await self.context_page.mouse.up()
async def login_by_cookies(self):
logging.info("Begin login douyin by cookie ...")
utils.logger.info("Begin login douyin by cookie ...")
for key, value in utils.convert_str_cookie_to_dict(self.cookie_str).items():
await self.browser_context.add_cookies([{
'name': key,