Python
运行代码复制代码1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
def get_video_url(douyin_url):
try:
# 发送GET请求,获取页面内容
response = requests.get(douyin_url, headers=headers, timeout=10)
response.raise_for_status() # 抛出HTTP错误异常
page_content = response.text
# 使用正则表达式提取视频信息的JSON字符串
# 抖音页面中视频信息通常包含在window.__INITIAL_STATE__中
pattern = re.compile(r'window\\.__INITIAL_STATE__ = (.*?);\\(function\\(\\)')
match = pattern.search(page_content)
if not match:
print("未找到视频信息")
return None
# 解析JSON数据
json_data = json.loads(match.group(1))
# 提取视频的原始播放地址(不同版本的抖音页面,数据结构可能略有差异)
# 以下为通用的解析逻辑,需根据实际情况调整
video_info = None
# 遍历JSON数据,找到视频播放地址相关字段
if "initialVideo" in json_data:
video_info = json_data["initialVideo"]
elif "videoDetail" in json_data:
video_info = json_data["videoDetail"]
else:
# 另一种数据结构,通过正则提取itemId对应的视频信息
item_id_pattern = re.compile(r'"itemId":"(.*?)"')
item_id = item_id_pattern.search(page_content).group(1)
# 调用抖音的API接口,获取视频详细信息
api_url = f"https://www.douyin.com/web/api/v2/aweme/iteminfo/?item_ids={item_id}"
api_response = requests.get(api_url, headers=headers, timeout=10)
api_json = api_response.json()
video_info = api_json["item_list"][0]
# 提取无水印视频地址(替换水印标识)
if video_info and "video" in video_info:
# 水印视频地址
watermark_url = video_info["video"]["play_addr"]["url_list"][0]
# 替换url中的水印标识,得到无水印地址(关键步骤)
no_watermark_url = watermark_url.replace("playwm", "play")
# 视频标题,用于命名文件
video_title = video_info.get("desc", f"douyin_video_{item_id}")
# 过滤标题中的非法字符
video_title = re.sub(r'[\\\\/:*?"<>|]', '', video_title)
return no_watermark_url, video_title
else:
print("未找到视频播放地址")
return None
except Exception as e:
print(f"获取视频地址失败:{str(e)}")
return None
Python
运行代码复制代码1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
def download_video(video_url, video_title):
try:
# 发送GET请求,获取视频流
# 抖音的视频地址可能需要重定向,设置allow_redirects=True
video_response = requests.get(video_url, headers=headers, timeout=20, stream=True, allow_redirects=True)
video_response.raise_for_status()
# 保存视频到本地
file_path = f"douyin_videos/{video_title}.mp4"
# 避免文件名重复,添加序号
count = 1
while os.path.exists(file_path):
file_path = f"douyin_videos/{video_title}_{count}.mp4"
count += 1
# 分块写入文件,适合大文件下载
with open(file_path, "wb") as f:
for chunk in video_response.iter_content(chunk_size=1024*1024): # 每次读取1MB
if chunk:
f.write(chunk)
print(f"视频下载完成:{file_path}")
except Exception as e:
print(f"视频下载失败:{str(e)}")
Python
运行代码复制代码1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
def batch_download(video_urls):
for url in video_urls:
print(f"\\n正在处理:{url}")
video_info = get_video_url(url)
if video_info:
no_watermark_url, video_title = video_info
download_video(no_watermark_url, video_title)
else:
print(f"处理失败:{url}")
# 主程序入口
if __name__ == "__main__":
# 批量抖音视频链接(可从文件中读取,此处为示例)
douyin_video_urls = [
"https://www.douyin.com/video/738xxxxxx",
"https://www.douyin.com/video/739xxxxxx"
]
batch_download(douyin_video_urls)







待会儿见
K哥馆
mayun
文鼎_应老师
课课家运营团队
liangchsh
启程软考
