mirror of
https://github.com/NanmiCoder/MediaCrawler.git
synced 2026-02-06 23:21:33 +08:00
feat: Enhance Bilibili crawler with retry logic and robustness
This commit introduces several improvements to enhance the stability and functionality of the Bilibili crawler. - **Add Retry Logic:** Implement a retry mechanism with exponential backoff when fetching video comments. This makes the crawler more resilient to transient network issues or API errors. - **Improve Error Handling:** Add a `try...except` block to handle potential `JSONDecodeError` in the Bilibili client, preventing crashes when the API returns an invalid response. - **Ensure Clean Shutdown:** Refactor `main.py` to use a `try...finally` block, guaranteeing that the crawler and database connections are properly closed on exit, error, or `KeyboardInterrupt`. - **Update Default Config:** Adjust default configuration values to increase concurrency, enable word cloud generation by default, and refine the Bilibili search mode for more practical usage.
This commit is contained in:
28
main.py
28
main.py
@@ -45,25 +45,29 @@ class CrawlerFactory:
|
||||
return crawler_class()
|
||||
|
||||
async def main():
|
||||
# Init crawler
|
||||
crawler: Optional[AbstractCrawler] = None
|
||||
try:
|
||||
# parse cmd
|
||||
await cmd_arg.parse_cmd()
|
||||
|
||||
# parse cmd
|
||||
await cmd_arg.parse_cmd()
|
||||
# init db
|
||||
if config.SAVE_DATA_OPTION == "db":
|
||||
await db.init_db()
|
||||
|
||||
# init db
|
||||
if config.SAVE_DATA_OPTION == "db":
|
||||
await db.init_db()
|
||||
crawler = CrawlerFactory.create_crawler(platform=config.PLATFORM)
|
||||
await crawler.start()
|
||||
finally:
|
||||
if crawler:
|
||||
await crawler.close()
|
||||
if config.SAVE_DATA_OPTION == "db":
|
||||
await db.close()
|
||||
|
||||
crawler = CrawlerFactory.create_crawler(platform=config.PLATFORM)
|
||||
await crawler.start()
|
||||
|
||||
if config.SAVE_DATA_OPTION == "db":
|
||||
await db.close()
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
try:
|
||||
# asyncio.run(main())
|
||||
asyncio.get_event_loop().run_until_complete(main())
|
||||
except KeyboardInterrupt:
|
||||
print("\n[main] Caught keyboard interrupt, exiting.")
|
||||
sys.exit()
|
||||
|
||||
Reference in New Issue
Block a user