mirror of
https://github.com/NanmiCoder/MediaCrawler.git
synced 2026-06-09 03:17:25 +08:00
新增对微博博客内照片获取的支持 文件存放路径data/weibo/images
This commit is contained in:
@@ -121,8 +121,10 @@ class WeiboCrawler(AbstractCrawler):
|
||||
for note_item in note_list:
|
||||
if note_item:
|
||||
mblog: Dict = note_item.get("mblog")
|
||||
note_id_list.append(mblog.get("id"))
|
||||
await weibo_store.update_weibo_note(note_item)
|
||||
if mblog:
|
||||
note_id_list.append(mblog.get("id"))
|
||||
await weibo_store.update_weibo_note(note_item)
|
||||
await self.get_note_images(mblog)
|
||||
|
||||
page += 1
|
||||
await self.batch_get_notes_comments(note_id_list)
|
||||
@@ -200,6 +202,28 @@ class WeiboCrawler(AbstractCrawler):
|
||||
except Exception as e:
|
||||
utils.logger.error(f"[WeiboCrawler.get_note_comments] may be been blocked, err:{e}")
|
||||
|
||||
async def get_note_images(self, mblog: Dict):
|
||||
"""
|
||||
get note images
|
||||
:param mblog:
|
||||
:return:
|
||||
"""
|
||||
if not config.ENABLE_GET_IMAGES:
|
||||
utils.logger.info(f"[WeiboCrawler.get_note_images] Crawling image mode is not enabled")
|
||||
return
|
||||
|
||||
pics: Dict = mblog.get("pics")
|
||||
if not pics:
|
||||
return
|
||||
for pic in pics:
|
||||
url = pic.get("url")
|
||||
if not url:
|
||||
continue
|
||||
content = await self.wb_client.get_note_image(url)
|
||||
if content != None:
|
||||
extension_file_name = url.split(".")[-1]
|
||||
await weibo_store.update_weibo_note_image(pic["pid"], content, extension_file_name)
|
||||
|
||||
async def create_weibo_client(self, httpx_proxy: Optional[str]) -> WeiboClient:
|
||||
"""Create xhs client"""
|
||||
utils.logger.info("[WeiboCrawler.create_weibo_client] Begin create weibo API client ...")
|
||||
|
||||
Reference in New Issue
Block a user