chore: add copyright to every file

This commit is contained in:
程序员阿江(Relakkes)
2025-11-18 12:24:02 +08:00
parent 5288bddb42
commit ff8c92daad
137 changed files with 2569 additions and 810 deletions

View File

@@ -1,3 +1,12 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2025 relakkes@gmail.com
#
# This file is part of MediaCrawler project.
# Repository: https://github.com/NanmiCoder/MediaCrawler/blob/main/media_platform/tieba/core.py
# GitHub: https://github.com/NanmiCoder
# Licensed under NON-COMMERCIAL LEARNING LICENSE 1.1
#
# 声明:本代码仅供学习和研究目的使用。使用者应遵守以下原则:
# 1. 不得用于任何商业用途。
# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。
@@ -182,11 +191,11 @@ class TieBaCrawler(AbstractCrawler):
await self.get_specified_notes(
note_id_list=[note_detail.note_id for note_detail in notes_list]
)
# Sleep after page navigation
await asyncio.sleep(config.CRAWLER_MAX_SLEEP_SEC)
utils.logger.info(f"[TieBaCrawler.search] Sleeping for {config.CRAWLER_MAX_SLEEP_SEC} seconds after page {page}")
page += 1
except Exception as ex:
utils.logger.error(
@@ -224,11 +233,11 @@ class TieBaCrawler(AbstractCrawler):
f"[BaiduTieBaCrawler.get_specified_tieba_notes] tieba name: {tieba_name} note list len: {len(note_list)}"
)
await self.get_specified_notes([note.note_id for note in note_list])
# Sleep after processing notes
await asyncio.sleep(config.CRAWLER_MAX_SLEEP_SEC)
utils.logger.info(f"[TieBaCrawler.get_specified_tieba_notes] Sleeping for {config.CRAWLER_MAX_SLEEP_SEC} seconds after processing notes from page {page_number}")
page_number += tieba_limit_count
async def get_specified_notes(
@@ -273,11 +282,11 @@ class TieBaCrawler(AbstractCrawler):
f"[BaiduTieBaCrawler.get_note_detail] Begin get note detail, note_id: {note_id}"
)
note_detail: TiebaNote = await self.tieba_client.get_note_by_id(note_id)
# Sleep after fetching note details
await asyncio.sleep(config.CRAWLER_MAX_SLEEP_SEC)
utils.logger.info(f"[TieBaCrawler.get_note_detail_async_task] Sleeping for {config.CRAWLER_MAX_SLEEP_SEC} seconds after fetching note details {note_id}")
if not note_detail:
utils.logger.error(
f"[BaiduTieBaCrawler.get_note_detail] Get note detail error, note_id: {note_id}"
@@ -333,11 +342,11 @@ class TieBaCrawler(AbstractCrawler):
utils.logger.info(
f"[BaiduTieBaCrawler.get_comments] Begin get note id comments {note_detail.note_id}"
)
# Sleep before fetching comments
await asyncio.sleep(config.CRAWLER_MAX_SLEEP_SEC)
utils.logger.info(f"[TieBaCrawler.get_comments_async_task] Sleeping for {config.CRAWLER_MAX_SLEEP_SEC} seconds before fetching comments for note {note_detail.note_id}")
await self.tieba_client.get_note_all_comments(
note_detail=note_detail,
crawl_interval=config.CRAWLER_MAX_SLEEP_SEC,