新增对微博博客内照片获取的支持 文件存放路径data/weibo/images

This commit is contained in:
Er_Meng
2024-04-09 17:21:52 +08:00
parent 5c409c6f0c
commit 16413c3074
6 changed files with 114 additions and 3 deletions

View File

@@ -121,8 +121,10 @@ class WeiboCrawler(AbstractCrawler):
for note_item in note_list:
if note_item:
mblog: Dict = note_item.get("mblog")
note_id_list.append(mblog.get("id"))
await weibo_store.update_weibo_note(note_item)
if mblog:
note_id_list.append(mblog.get("id"))
await weibo_store.update_weibo_note(note_item)
await self.get_note_images(mblog)
page += 1
await self.batch_get_notes_comments(note_id_list)
@@ -200,6 +202,28 @@ class WeiboCrawler(AbstractCrawler):
except Exception as e:
utils.logger.error(f"[WeiboCrawler.get_note_comments] may be been blocked, err:{e}")
async def get_note_images(self, mblog: Dict):
"""
get note images
:param mblog:
:return:
"""
if not config.ENABLE_GET_IMAGES:
utils.logger.info(f"[WeiboCrawler.get_note_images] Crawling image mode is not enabled")
return
pics: Dict = mblog.get("pics")
if not pics:
return
for pic in pics:
url = pic.get("url")
if not url:
continue
content = await self.wb_client.get_note_image(url)
if content != None:
extension_file_name = url.split(".")[-1]
await weibo_store.update_weibo_note_image(pic["pid"], content, extension_file_name)
async def create_weibo_client(self, httpx_proxy: Optional[str]) -> WeiboClient:
"""Create xhs client"""
utils.logger.info("[WeiboCrawler.create_weibo_client] Begin create weibo API client ...")