diff --git a/media_platform/bilibili/core.py b/media_platform/bilibili/core.py index 39af14a..5f1f42c 100644 --- a/media_platform/bilibili/core.py +++ b/media_platform/bilibili/core.py @@ -78,8 +78,9 @@ class BilibiliCrawler(AbstractCrawler): # Launch a browser context. chromium = playwright.chromium self.browser_context = await self.launch_browser(chromium, None, self.user_agent, headless=config.HEADLESS) - # stealth.min.js is a js script to prevent the website from detecting the crawler. - await self.browser_context.add_init_script(path="libs/stealth.min.js") + # stealth.min.js is a js script to prevent the website from detecting the crawler. + await self.browser_context.add_init_script(path="libs/stealth.min.js") + self.context_page = await self.browser_context.new_page() await self.context_page.goto(self.index_url) diff --git a/media_platform/douyin/core.py b/media_platform/douyin/core.py index 92aebb3..c002155 100644 --- a/media_platform/douyin/core.py +++ b/media_platform/douyin/core.py @@ -74,8 +74,9 @@ class DouYinCrawler(AbstractCrawler): user_agent=None, headless=config.HEADLESS, ) - # stealth.min.js is a js script to prevent the website from detecting the crawler. - await self.browser_context.add_init_script(path="libs/stealth.min.js") + # stealth.min.js is a js script to prevent the website from detecting the crawler. + await self.browser_context.add_init_script(path="libs/stealth.min.js") + self.context_page = await self.browser_context.new_page() await self.context_page.goto(self.index_url) diff --git a/media_platform/kuaishou/core.py b/media_platform/kuaishou/core.py index 9e11a7f..4cd2eb8 100644 --- a/media_platform/kuaishou/core.py +++ b/media_platform/kuaishou/core.py @@ -78,8 +78,10 @@ class KuaishouCrawler(AbstractCrawler): self.browser_context = await self.launch_browser( chromium, None, self.user_agent, headless=config.HEADLESS ) - # stealth.min.js is a js script to prevent the website from detecting the crawler. - await self.browser_context.add_init_script(path="libs/stealth.min.js") + # stealth.min.js is a js script to prevent the website from detecting the crawler. + await self.browser_context.add_init_script(path="libs/stealth.min.js") + + self.context_page = await self.browser_context.new_page() await self.context_page.goto(f"{self.index_url}?isHome=1") diff --git a/media_platform/weibo/core.py b/media_platform/weibo/core.py index d502386..e78a212 100644 --- a/media_platform/weibo/core.py +++ b/media_platform/weibo/core.py @@ -77,8 +77,11 @@ class WeiboCrawler(AbstractCrawler): # Launch a browser context. chromium = playwright.chromium self.browser_context = await self.launch_browser(chromium, None, self.mobile_user_agent, headless=config.HEADLESS) - # stealth.min.js is a js script to prevent the website from detecting the crawler. - await self.browser_context.add_init_script(path="libs/stealth.min.js") + + # stealth.min.js is a js script to prevent the website from detecting the crawler. + await self.browser_context.add_init_script(path="libs/stealth.min.js") + + self.context_page = await self.browser_context.new_page() await self.context_page.goto(self.mobile_index_url) diff --git a/media_platform/xhs/core.py b/media_platform/xhs/core.py index 536c1ca..3567c6b 100644 --- a/media_platform/xhs/core.py +++ b/media_platform/xhs/core.py @@ -79,8 +79,9 @@ class XiaoHongShuCrawler(AbstractCrawler): self.user_agent, headless=config.HEADLESS, ) - # stealth.min.js is a js script to prevent the website from detecting the crawler. - await self.browser_context.add_init_script(path="libs/stealth.min.js") + # stealth.min.js is a js script to prevent the website from detecting the crawler. + await self.browser_context.add_init_script(path="libs/stealth.min.js") + self.context_page = await self.browser_context.new_page() await self.context_page.goto(self.index_url) diff --git a/media_platform/zhihu/core.py b/media_platform/zhihu/core.py index ea87e1c..ad1b729 100644 --- a/media_platform/zhihu/core.py +++ b/media_platform/zhihu/core.py @@ -86,8 +86,8 @@ class ZhihuCrawler(AbstractCrawler): self.browser_context = await self.launch_browser( chromium, None, self.user_agent, headless=config.HEADLESS ) - # stealth.min.js is a js script to prevent the website from detecting the crawler. - await self.browser_context.add_init_script(path="libs/stealth.min.js") + # stealth.min.js is a js script to prevent the website from detecting the crawler. + await self.browser_context.add_init_script(path="libs/stealth.min.js") self.context_page = await self.browser_context.new_page() await self.context_page.goto(self.index_url, wait_until="domcontentloaded")