mirror of
https://github.com/NanmiCoder/MediaCrawler.git
synced 2026-06-08 19:07:33 +08:00
refactor:优化部分代码
feat: 增加IP代理账号池
This commit is contained in:
33
main.py
33
main.py
@@ -3,6 +3,8 @@ import asyncio
|
||||
import argparse
|
||||
|
||||
import config
|
||||
from tools import utils
|
||||
from base import proxy_account_pool
|
||||
from media_platform.douyin import DouYinCrawler
|
||||
from media_platform.xhs import XiaoHongShuCrawler
|
||||
|
||||
@@ -19,24 +21,37 @@ class CrawlerFactory:
|
||||
|
||||
|
||||
async def main():
|
||||
utils.init_loging_config()
|
||||
# define command line params ...
|
||||
parser = argparse.ArgumentParser(description='Media crawler program.')
|
||||
parser.add_argument('--platform', type=str, help='Media platform select (xhs|dy)...', default=config.platform)
|
||||
parser.add_argument('--keywords', type=str, help='Search note/page keywords...', default=config.keyword)
|
||||
parser.add_argument('--lt', type=str, help='Login type (qrcode | phone | cookie)', default=config.login_type)
|
||||
parser.add_argument('--phone', type=str, help='Login phone', default=config.login_phone)
|
||||
parser.add_argument('--cookies', type=str, help='cookies to keep log in', default=config.cookies)
|
||||
parser.add_argument('--platform', type=str, help='Media platform select (xhs|dy)...', default=config.PLATFORM)
|
||||
parser.add_argument('--lt', type=str, help='Login type (qrcode | phone | cookie)', default=config.LOGIN_TYPE)
|
||||
|
||||
# init account pool
|
||||
account_pool = proxy_account_pool.create_account_pool()
|
||||
|
||||
args = parser.parse_args()
|
||||
crawler = CrawlerFactory().create_crawler(platform=args.platform)
|
||||
crawler.init_config(
|
||||
keywords=args.keywords,
|
||||
login_phone=args.phone,
|
||||
login_type=args.lt,
|
||||
cookie_str=args.cookies
|
||||
command_args=args,
|
||||
account_pool=account_pool
|
||||
)
|
||||
await crawler.start()
|
||||
|
||||
"""
|
||||
# retry when exception ...
|
||||
while True:
|
||||
try:
|
||||
await crawler.start()
|
||||
except Exception as e:
|
||||
logging.info(f"crawler start error: {e} ...")
|
||||
await crawler.close()
|
||||
# If you encounter an exception
|
||||
# sleep for a period of time before retrying
|
||||
# to avoid frequent requests that may result in the account being blocked.
|
||||
await asyncio.sleep(config.RETRY_INTERVAL)
|
||||
"""
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user