From 03e384bbe29633d06f0c713a1c398a978c382724 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=A8=8B=E5=BA=8F=E5=91=98=E9=98=BF=E6=B1=9F=28Relakkes?= =?UTF-8?q?=29?= Date: Sun, 19 Oct 2025 15:32:03 +0800 Subject: [PATCH] =?UTF-8?q?refactor:=20cdp=E6=A8=A1=E5=BC=8F=E4=B8=8B?= =?UTF-8?q?=E7=A7=BB=E9=99=A4stealth=E6=B3=A8=E5=85=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- media_platform/bilibili/core.py | 5 +++-- media_platform/douyin/core.py | 5 +++-- media_platform/kuaishou/core.py | 6 ++++-- media_platform/weibo/core.py | 7 +++++-- media_platform/xhs/core.py | 5 +++-- media_platform/zhihu/core.py | 4 ++-- 6 files changed, 20 insertions(+), 12 deletions(-) diff --git a/media_platform/bilibili/core.py b/media_platform/bilibili/core.py index 39af14a..5f1f42c 100644 --- a/media_platform/bilibili/core.py +++ b/media_platform/bilibili/core.py @@ -78,8 +78,9 @@ class BilibiliCrawler(AbstractCrawler): # Launch a browser context. chromium = playwright.chromium self.browser_context = await self.launch_browser(chromium, None, self.user_agent, headless=config.HEADLESS) - # stealth.min.js is a js script to prevent the website from detecting the crawler. - await self.browser_context.add_init_script(path="libs/stealth.min.js") + # stealth.min.js is a js script to prevent the website from detecting the crawler. + await self.browser_context.add_init_script(path="libs/stealth.min.js") + self.context_page = await self.browser_context.new_page() await self.context_page.goto(self.index_url) diff --git a/media_platform/douyin/core.py b/media_platform/douyin/core.py index 92aebb3..c002155 100644 --- a/media_platform/douyin/core.py +++ b/media_platform/douyin/core.py @@ -74,8 +74,9 @@ class DouYinCrawler(AbstractCrawler): user_agent=None, headless=config.HEADLESS, ) - # stealth.min.js is a js script to prevent the website from detecting the crawler. - await self.browser_context.add_init_script(path="libs/stealth.min.js") + # stealth.min.js is a js script to prevent the website from detecting the crawler. + await self.browser_context.add_init_script(path="libs/stealth.min.js") + self.context_page = await self.browser_context.new_page() await self.context_page.goto(self.index_url) diff --git a/media_platform/kuaishou/core.py b/media_platform/kuaishou/core.py index 9e11a7f..4cd2eb8 100644 --- a/media_platform/kuaishou/core.py +++ b/media_platform/kuaishou/core.py @@ -78,8 +78,10 @@ class KuaishouCrawler(AbstractCrawler): self.browser_context = await self.launch_browser( chromium, None, self.user_agent, headless=config.HEADLESS ) - # stealth.min.js is a js script to prevent the website from detecting the crawler. - await self.browser_context.add_init_script(path="libs/stealth.min.js") + # stealth.min.js is a js script to prevent the website from detecting the crawler. + await self.browser_context.add_init_script(path="libs/stealth.min.js") + + self.context_page = await self.browser_context.new_page() await self.context_page.goto(f"{self.index_url}?isHome=1") diff --git a/media_platform/weibo/core.py b/media_platform/weibo/core.py index d502386..e78a212 100644 --- a/media_platform/weibo/core.py +++ b/media_platform/weibo/core.py @@ -77,8 +77,11 @@ class WeiboCrawler(AbstractCrawler): # Launch a browser context. chromium = playwright.chromium self.browser_context = await self.launch_browser(chromium, None, self.mobile_user_agent, headless=config.HEADLESS) - # stealth.min.js is a js script to prevent the website from detecting the crawler. - await self.browser_context.add_init_script(path="libs/stealth.min.js") + + # stealth.min.js is a js script to prevent the website from detecting the crawler. + await self.browser_context.add_init_script(path="libs/stealth.min.js") + + self.context_page = await self.browser_context.new_page() await self.context_page.goto(self.mobile_index_url) diff --git a/media_platform/xhs/core.py b/media_platform/xhs/core.py index 536c1ca..3567c6b 100644 --- a/media_platform/xhs/core.py +++ b/media_platform/xhs/core.py @@ -79,8 +79,9 @@ class XiaoHongShuCrawler(AbstractCrawler): self.user_agent, headless=config.HEADLESS, ) - # stealth.min.js is a js script to prevent the website from detecting the crawler. - await self.browser_context.add_init_script(path="libs/stealth.min.js") + # stealth.min.js is a js script to prevent the website from detecting the crawler. + await self.browser_context.add_init_script(path="libs/stealth.min.js") + self.context_page = await self.browser_context.new_page() await self.context_page.goto(self.index_url) diff --git a/media_platform/zhihu/core.py b/media_platform/zhihu/core.py index ea87e1c..ad1b729 100644 --- a/media_platform/zhihu/core.py +++ b/media_platform/zhihu/core.py @@ -86,8 +86,8 @@ class ZhihuCrawler(AbstractCrawler): self.browser_context = await self.launch_browser( chromium, None, self.user_agent, headless=config.HEADLESS ) - # stealth.min.js is a js script to prevent the website from detecting the crawler. - await self.browser_context.add_init_script(path="libs/stealth.min.js") + # stealth.min.js is a js script to prevent the website from detecting the crawler. + await self.browser_context.add_init_script(path="libs/stealth.min.js") self.context_page = await self.browser_context.new_page() await self.context_page.goto(self.index_url, wait_until="domcontentloaded")