mirror of
https://github.com/NanmiCoder/MediaCrawler.git
synced 2026-05-27 21:17:27 +08:00
feat: Douyin supports obtaining video links and cover images. for issue #620
This commit is contained in:
@@ -147,6 +147,8 @@ CREATE TABLE `douyin_aweme`
|
|||||||
`share_count` varchar(16) DEFAULT NULL COMMENT '视频分享数',
|
`share_count` varchar(16) DEFAULT NULL COMMENT '视频分享数',
|
||||||
`collected_count` varchar(16) DEFAULT NULL COMMENT '视频收藏数',
|
`collected_count` varchar(16) DEFAULT NULL COMMENT '视频收藏数',
|
||||||
`aweme_url` varchar(255) DEFAULT NULL COMMENT '视频详情页URL',
|
`aweme_url` varchar(255) DEFAULT NULL COMMENT '视频详情页URL',
|
||||||
|
`cover_url` varchar(500) DEFAULT NULL COMMENT '视频封面图URL',
|
||||||
|
`video_download_url` varchar(1024) DEFAULT NULL COMMENT '视频下载地址',
|
||||||
PRIMARY KEY (`id`),
|
PRIMARY KEY (`id`),
|
||||||
KEY `idx_douyin_awem_aweme_i_6f7bc6` (`aweme_id`),
|
KEY `idx_douyin_awem_aweme_i_6f7bc6` (`aweme_id`),
|
||||||
KEY `idx_douyin_awem_create__299dfe` (`create_time`)
|
KEY `idx_douyin_awem_create__299dfe` (`create_time`)
|
||||||
|
|||||||
@@ -62,6 +62,48 @@ def _extract_comment_image_list(comment_item: Dict) -> List[str]:
|
|||||||
return images_res
|
return images_res
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_content_cover_url(aweme_detail: Dict) -> str:
|
||||||
|
"""
|
||||||
|
提取视频封面地址
|
||||||
|
|
||||||
|
Args:
|
||||||
|
aweme_detail (Dict): 抖音内容详情
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: 视频封面地址
|
||||||
|
"""
|
||||||
|
res_cover_url = ""
|
||||||
|
|
||||||
|
video_item = aweme_detail.get("video", {})
|
||||||
|
raw_cover_url_list = (
|
||||||
|
video_item.get("raw_cover", {}) or video_item.get("origin_cover", {})
|
||||||
|
).get("url_list", [])
|
||||||
|
if raw_cover_url_list and len(raw_cover_url_list) > 1:
|
||||||
|
res_cover_url = raw_cover_url_list[1]
|
||||||
|
|
||||||
|
return res_cover_url
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_video_download_url(aweme_detail: Dict) -> str:
|
||||||
|
"""
|
||||||
|
提取视频下载地址
|
||||||
|
|
||||||
|
Args:
|
||||||
|
aweme_detail (Dict): 抖音视频
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: 视频下载地址
|
||||||
|
"""
|
||||||
|
video_item = aweme_detail.get("video", {})
|
||||||
|
url_h264_list = video_item.get("play_addr_h264", {}).get("url_list", [])
|
||||||
|
url_256_list = video_item.get("play_addr_256", {}).get("url_list", [])
|
||||||
|
url_list = video_item.get("play_addr", {}).get("url_list", [])
|
||||||
|
actual_url_list = url_h264_list or url_256_list or url_list
|
||||||
|
if not actual_url_list or len(actual_url_list) < 2:
|
||||||
|
return ""
|
||||||
|
return actual_url_list[-1]
|
||||||
|
|
||||||
|
|
||||||
async def update_douyin_aweme(aweme_item: Dict):
|
async def update_douyin_aweme(aweme_item: Dict):
|
||||||
aweme_id = aweme_item.get("aweme_id")
|
aweme_id = aweme_item.get("aweme_id")
|
||||||
user_info = aweme_item.get("author", {})
|
user_info = aweme_item.get("author", {})
|
||||||
@@ -86,6 +128,8 @@ async def update_douyin_aweme(aweme_item: Dict):
|
|||||||
"ip_location": aweme_item.get("ip_label", ""),
|
"ip_location": aweme_item.get("ip_label", ""),
|
||||||
"last_modify_ts": utils.get_current_timestamp(),
|
"last_modify_ts": utils.get_current_timestamp(),
|
||||||
"aweme_url": f"https://www.douyin.com/video/{aweme_id}",
|
"aweme_url": f"https://www.douyin.com/video/{aweme_id}",
|
||||||
|
"cover_url": _extract_content_cover_url(aweme_item),
|
||||||
|
"video_download_url": _extract_video_download_url(aweme_item),
|
||||||
"source_keyword": source_keyword_var.get(),
|
"source_keyword": source_keyword_var.get(),
|
||||||
}
|
}
|
||||||
utils.logger.info(
|
utils.logger.info(
|
||||||
|
|||||||
Reference in New Issue
Block a user