mirror of
https://github.com/NanmiCoder/MediaCrawler.git
synced 2026-06-06 09:57:25 +08:00
feat: support time deplay for all platform
This commit is contained in:
@@ -15,7 +15,7 @@
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import random
|
||||
# import random # Removed as we now use fixed config.CRAWLER_MAX_SLEEP_SEC intervals
|
||||
from asyncio import Task
|
||||
from typing import Dict, List, Optional, Tuple, Union
|
||||
from datetime import datetime, timedelta
|
||||
@@ -208,6 +208,11 @@ class BilibiliCrawler(AbstractCrawler):
|
||||
await bilibili_store.update_up_info(video_item)
|
||||
await self.get_bilibili_video(video_item, semaphore)
|
||||
page += 1
|
||||
|
||||
# Sleep after page navigation
|
||||
await asyncio.sleep(config.CRAWLER_MAX_SLEEP_SEC)
|
||||
utils.logger.info(f"[BilibiliCrawler.search_by_keywords] Sleeping for {config.CRAWLER_MAX_SLEEP_SEC} seconds after page {page-1}")
|
||||
|
||||
await self.batch_get_video_comments(video_id_list)
|
||||
|
||||
async def search_by_keywords_in_time_range(self, daily_limit: bool):
|
||||
@@ -284,6 +289,11 @@ class BilibiliCrawler(AbstractCrawler):
|
||||
await self.get_bilibili_video(video_item, semaphore)
|
||||
|
||||
page += 1
|
||||
|
||||
# Sleep after page navigation
|
||||
await asyncio.sleep(config.CRAWLER_MAX_SLEEP_SEC)
|
||||
utils.logger.info(f"[BilibiliCrawler.search_by_keywords_in_time_range] Sleeping for {config.CRAWLER_MAX_SLEEP_SEC} seconds after page {page-1}")
|
||||
|
||||
await self.batch_get_video_comments(video_id_list)
|
||||
|
||||
except Exception as e:
|
||||
@@ -318,10 +328,11 @@ class BilibiliCrawler(AbstractCrawler):
|
||||
async with semaphore:
|
||||
try:
|
||||
utils.logger.info(f"[BilibiliCrawler.get_comments] begin get video_id: {video_id} comments ...")
|
||||
await asyncio.sleep(random.uniform(0.5, 1.5))
|
||||
await asyncio.sleep(config.CRAWLER_MAX_SLEEP_SEC)
|
||||
utils.logger.info(f"[BilibiliCrawler.get_comments] Sleeping for {config.CRAWLER_MAX_SLEEP_SEC} seconds after fetching comments for video {video_id}")
|
||||
await self.bili_client.get_video_all_comments(
|
||||
video_id=video_id,
|
||||
crawl_interval=random.random(),
|
||||
crawl_interval=config.CRAWLER_MAX_SLEEP_SEC,
|
||||
is_fetch_sub_comments=config.ENABLE_GET_SUB_COMMENTS,
|
||||
callback=bilibili_store.batch_update_bilibili_video_comments,
|
||||
max_count=config.CRAWLER_MAX_COMMENTS_COUNT_SINGLENOTES,
|
||||
@@ -347,7 +358,8 @@ class BilibiliCrawler(AbstractCrawler):
|
||||
await self.get_specified_videos(video_bvids_list)
|
||||
if int(result["page"]["count"]) <= pn * ps:
|
||||
break
|
||||
await asyncio.sleep(random.random())
|
||||
await asyncio.sleep(config.CRAWLER_MAX_SLEEP_SEC)
|
||||
utils.logger.info(f"[BilibiliCrawler.get_creator_videos] Sleeping for {config.CRAWLER_MAX_SLEEP_SEC} seconds after page {pn}")
|
||||
pn += 1
|
||||
|
||||
async def get_specified_videos(self, bvids_list: List[str]):
|
||||
@@ -381,6 +393,11 @@ class BilibiliCrawler(AbstractCrawler):
|
||||
async with semaphore:
|
||||
try:
|
||||
result = await self.bili_client.get_video_info(aid=aid, bvid=bvid)
|
||||
|
||||
# Sleep after fetching video details
|
||||
await asyncio.sleep(config.CRAWLER_MAX_SLEEP_SEC)
|
||||
utils.logger.info(f"[BilibiliCrawler.get_video_info_task] Sleeping for {config.CRAWLER_MAX_SLEEP_SEC} seconds after fetching video details {bvid or aid}")
|
||||
|
||||
return result
|
||||
except DataFetchError as ex:
|
||||
utils.logger.error(f"[BilibiliCrawler.get_video_info_task] Get video detail error: {ex}")
|
||||
@@ -544,7 +561,8 @@ class BilibiliCrawler(AbstractCrawler):
|
||||
return
|
||||
|
||||
content = await self.bili_client.get_video_media(video_url)
|
||||
await asyncio.sleep(random.random())
|
||||
await asyncio.sleep(config.CRAWLER_MAX_SLEEP_SEC)
|
||||
utils.logger.info(f"[BilibiliCrawler.get_bilibili_video] Sleeping for {config.CRAWLER_MAX_SLEEP_SEC} seconds after fetching video {aid}")
|
||||
if content is None:
|
||||
return
|
||||
extension_file_name = f"video.mp4"
|
||||
@@ -600,7 +618,7 @@ class BilibiliCrawler(AbstractCrawler):
|
||||
utils.logger.info(f"[BilibiliCrawler.get_fans] begin get creator_id: {creator_id} fans ...")
|
||||
await self.bili_client.get_creator_all_fans(
|
||||
creator_info=creator_info,
|
||||
crawl_interval=random.random(),
|
||||
crawl_interval=config.CRAWLER_MAX_SLEEP_SEC,
|
||||
callback=bilibili_store.batch_update_bilibili_creator_fans,
|
||||
max_count=config.CRAWLER_MAX_CONTACTS_COUNT_SINGLENOTES,
|
||||
)
|
||||
@@ -623,7 +641,7 @@ class BilibiliCrawler(AbstractCrawler):
|
||||
utils.logger.info(f"[BilibiliCrawler.get_followings] begin get creator_id: {creator_id} followings ...")
|
||||
await self.bili_client.get_creator_all_followings(
|
||||
creator_info=creator_info,
|
||||
crawl_interval=random.random(),
|
||||
crawl_interval=config.CRAWLER_MAX_SLEEP_SEC,
|
||||
callback=bilibili_store.batch_update_bilibili_creator_followings,
|
||||
max_count=config.CRAWLER_MAX_CONTACTS_COUNT_SINGLENOTES,
|
||||
)
|
||||
@@ -646,7 +664,7 @@ class BilibiliCrawler(AbstractCrawler):
|
||||
utils.logger.info(f"[BilibiliCrawler.get_dynamics] begin get creator_id: {creator_id} dynamics ...")
|
||||
await self.bili_client.get_creator_all_dynamics(
|
||||
creator_info=creator_info,
|
||||
crawl_interval=random.random(),
|
||||
crawl_interval=config.CRAWLER_MAX_SLEEP_SEC,
|
||||
callback=bilibili_store.batch_update_bilibili_creator_dynamics,
|
||||
max_count=config.CRAWLER_MAX_DYNAMICS_COUNT_SINGLENOTES,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user