爬取视频网站视频并下载
2021-03-20 10:28
标签:write new android sub down pil mob file comm 爬取视频网站视频并下载 标签:write new android sub down pil mob file comm 原文地址:https://blog.51cto.com/12629984/2488756import requests
import os
import json
import time
import re
from retrying import retry
"""
爬虫*six room
"""
def change_title(file):
"""替换非法字符"""
pattern = re.compile(r"[\/\\\:\*\?\"\\|]")
file_name = re.sub(pattern,"_",file)
return file_name
def mp4_download_json():
for pages in range(1,101):
print("\033[31;1m==========正在抓取第{}页的数据==========\033[0m".format(str(pages)))
url = "https://v.6.cn/minivideo/getlist.php?act=recommend&page={}&pagesize=20".format(pages)
headers = {
‘user-agent‘: ‘Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Mobile Safari/537.36‘
}
response = requests.get(url,headers=headers)
html = response.text
# print(html)
dict_data = json.loads(html)
data_list = dict_data[‘content‘][‘list‘]
# print(data_list)
for data in data_list:
time.sleep(1)
mp4_url = data[‘playurl‘]
response = requests.get(mp4_url,headers=headers)
file_name = data[‘title‘]
new_filename = change_title(file_name)
# print(file_name)
if not os.path.exists(new_filename):
os.mkdir(new_filename)
print("\033[31;1m正在下载\033[0m %s"%(new_filename))
with open(new_filename +‘/‘+new_filename+‘.mp4‘,‘wb‘) as f:
f.write(response.content)
print("\033[32;1m%s~~~下载完成。\033[m\r\n"%(new_filename))
if __name__ == "__main__":
mp4_download_json()