mirror of
https://github.com/NanmiCoder/MediaCrawler.git
synced 2026-06-08 02:47:26 +08:00
feature: 支持小红书图片、视频下载
This commit is contained in:
@@ -8,6 +8,7 @@ import config
|
||||
|
||||
from . import xhs_store_impl
|
||||
from .xhs_store_impl import *
|
||||
from .xhs_store_image import *
|
||||
|
||||
|
||||
class XhsStoreFactory:
|
||||
@@ -25,6 +26,25 @@ class XhsStoreFactory:
|
||||
return store_class()
|
||||
|
||||
|
||||
def get_video_url_arr(note_item: Dict) -> List:
|
||||
if note_item.get('type') != 'video':
|
||||
return []
|
||||
|
||||
videoArr = []
|
||||
originVideoKey = note_item.get('video').get('consumer').get('origin_video_key')
|
||||
if originVideoKey == '':
|
||||
originVideoKey = note_item.get('video').get('consumer').get('originVideoKey')
|
||||
# 降级有水印
|
||||
if originVideoKey == '':
|
||||
videos = note_item.get('video').get('media').get('stream').get('h264')
|
||||
if type(videos).__name__ == 'list':
|
||||
videoArr = [v.get('master_url') for v in videos]
|
||||
else:
|
||||
videoArr = [f"http://sns-video-bd.xhscdn.com/{originVideoKey}"]
|
||||
|
||||
return videoArr
|
||||
|
||||
|
||||
async def update_xhs_note(note_item: Dict):
|
||||
note_id = note_item.get("note_id")
|
||||
user_info = note_item.get("user", {})
|
||||
@@ -36,11 +56,7 @@ async def update_xhs_note(note_item: Dict):
|
||||
if img.get('url_default') != '':
|
||||
img.update({'url': img.get('url_default')})
|
||||
|
||||
video_url = ''
|
||||
if note_item.get('type') == 'video':
|
||||
videos = note_item.get('video').get('media').get('stream').get('h264')
|
||||
if type(videos).__name__ == 'list':
|
||||
video_url = ','.join([v.get('master_url') for v in videos])
|
||||
video_url = ','.join(get_video_url_arr(note_item))
|
||||
|
||||
local_db_item = {
|
||||
"note_id": note_item.get("note_id"),
|
||||
@@ -127,3 +143,8 @@ async def save_creator(user_id: str, creator: Dict):
|
||||
}
|
||||
utils.logger.info(f"[store.xhs.save_creator] creator:{local_db_item}")
|
||||
await XhsStoreFactory.create_store().store_creator(local_db_item)
|
||||
|
||||
|
||||
async def update_xhs_note_image(note_id, pic_content, extension_file_name):
|
||||
await XiaoHongShuImage().store_image(
|
||||
{"notice_id": note_id, "pic_content": pic_content, "extension_file_name": extension_file_name})
|
||||
|
||||
55
store/xhs/xhs_store_image.py
Normal file
55
store/xhs/xhs_store_image.py
Normal file
@@ -0,0 +1,55 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# @Author : helloteemo
|
||||
# @Time : 2024/7/11 22:35
|
||||
# @Desc : 小红书图片保存
|
||||
import pathlib
|
||||
from typing import Dict
|
||||
|
||||
import aiofiles
|
||||
|
||||
from base.base_crawler import AbstractStoreImage
|
||||
from tools import utils
|
||||
|
||||
|
||||
class XiaoHongShuImage(AbstractStoreImage):
|
||||
image_store_path: str = "data/xhs/images"
|
||||
|
||||
async def store_image(self, image_content_item: Dict):
|
||||
"""
|
||||
store content
|
||||
Args:
|
||||
content_item:
|
||||
|
||||
Returns:
|
||||
|
||||
"""
|
||||
await self.save_image(image_content_item.get("notice_id"), image_content_item.get("pic_content"),
|
||||
image_content_item.get("extension_file_name"))
|
||||
|
||||
def make_save_file_name(self, notice_id: str, extension_file_name: str) -> str:
|
||||
"""
|
||||
make save file name by store type
|
||||
Args:
|
||||
notice_id: notice id
|
||||
picid: image id
|
||||
|
||||
Returns:
|
||||
|
||||
"""
|
||||
return f"{self.image_store_path}/{notice_id}/{extension_file_name}"
|
||||
|
||||
async def save_image(self, notice_id: str, pic_content: str, extension_file_name="jpg"):
|
||||
"""
|
||||
save image to local
|
||||
Args:
|
||||
notice_id: notice id
|
||||
pic_content: image content
|
||||
|
||||
Returns:
|
||||
|
||||
"""
|
||||
pathlib.Path(self.image_store_path + "/" + notice_id).mkdir(parents=True, exist_ok=True)
|
||||
save_file_name = self.make_save_file_name(notice_id, extension_file_name)
|
||||
async with aiofiles.open(save_file_name, 'wb') as f:
|
||||
await f.write(pic_content)
|
||||
utils.logger.info(f"[XiaoHongShuImageStoreImplement.save_image] save image {save_file_name} success ...")
|
||||
Reference in New Issue
Block a user