mirror of
https://github.com/NanmiCoder/MediaCrawler.git
synced 2026-06-07 02:17:25 +08:00
添加了抖音存储视频以及图片的逻辑,并将config.py中ENABLE_GET_IMAGES参数更名为ENABLE_GET_MEIDAS,在此基础上略微修改存储逻辑
This commit is contained in:
@@ -1,13 +1,12 @@
|
||||
# 声明:本代码仅供学习和研究目的使用。使用者应遵守以下原则:
|
||||
# 1. 不得用于任何商业用途。
|
||||
# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。
|
||||
# 3. 不得进行大规模爬取或对平台造成运营干扰。
|
||||
# 4. 应合理控制请求频率,避免给目标平台带来不必要的负担。
|
||||
# 声明:本代码仅供学习和研究目的使用。使用者应遵守以下原则:
|
||||
# 1. 不得用于任何商业用途。
|
||||
# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。
|
||||
# 3. 不得进行大规模爬取或对平台造成运营干扰。
|
||||
# 4. 应合理控制请求频率,避免给目标平台带来不必要的负担。
|
||||
# 5. 不得用于任何非法或不当的用途。
|
||||
#
|
||||
# 详细许可条款请参阅项目根目录下的LICENSE文件。
|
||||
# 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。
|
||||
|
||||
#
|
||||
# 详细许可条款请参阅项目根目录下的LICENSE文件。
|
||||
# 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Dict, Optional
|
||||
@@ -16,6 +15,7 @@ from playwright.async_api import BrowserContext, BrowserType, Playwright
|
||||
|
||||
|
||||
class AbstractCrawler(ABC):
|
||||
|
||||
@abstractmethod
|
||||
async def start(self):
|
||||
"""
|
||||
@@ -31,8 +31,7 @@ class AbstractCrawler(ABC):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def launch_browser(self, chromium: BrowserType, playwright_proxy: Optional[Dict], user_agent: Optional[str],
|
||||
headless: bool = True) -> BrowserContext:
|
||||
async def launch_browser(self, chromium: BrowserType, playwright_proxy: Optional[Dict], user_agent: Optional[str], headless: bool = True) -> BrowserContext:
|
||||
"""
|
||||
launch browser
|
||||
:param chromium: chromium browser
|
||||
@@ -43,8 +42,7 @@ class AbstractCrawler(ABC):
|
||||
"""
|
||||
pass
|
||||
|
||||
async def launch_browser_with_cdp(self, playwright: Playwright, playwright_proxy: Optional[Dict],
|
||||
user_agent: Optional[str], headless: bool = True) -> BrowserContext:
|
||||
async def launch_browser_with_cdp(self, playwright: Playwright, playwright_proxy: Optional[Dict], user_agent: Optional[str], headless: bool = True) -> BrowserContext:
|
||||
"""
|
||||
使用CDP模式启动浏览器(可选实现)
|
||||
:param playwright: playwright实例
|
||||
@@ -58,6 +56,7 @@ class AbstractCrawler(ABC):
|
||||
|
||||
|
||||
class AbstractLogin(ABC):
|
||||
|
||||
@abstractmethod
|
||||
async def begin(self):
|
||||
pass
|
||||
@@ -76,6 +75,7 @@ class AbstractLogin(ABC):
|
||||
|
||||
|
||||
class AbstractStore(ABC):
|
||||
|
||||
@abstractmethod
|
||||
async def store_content(self, content_item: Dict):
|
||||
pass
|
||||
@@ -99,7 +99,16 @@ class AbstractStoreImage(ABC):
|
||||
pass
|
||||
|
||||
|
||||
class AbstractStoreVideo(ABC):
|
||||
# TODO: support all platform
|
||||
# only weibo is supported
|
||||
# @abstractmethod
|
||||
async def store_video(self, video_content_item: Dict):
|
||||
pass
|
||||
|
||||
|
||||
class AbstractApiClient(ABC):
|
||||
|
||||
@abstractmethod
|
||||
async def request(self, method, url, **kwargs):
|
||||
pass
|
||||
|
||||
Reference in New Issue
Block a user