百度云获取全部文件API探索
2021-07-14 15:15
标签:python http .com lang 部分 json sts rem ref 此代码与上一篇文章百度云删除文件API接口探索紧密关联,其中部分代码共用,主要体现在bdstoken和cookie的获取方面,此文不多做赘述 需要注意的是,构造数据请求链接时,文件路径需要做urldecode,但是Python3中对“/”不做处理,需要手动替换才能正确获取 get_all_files则通过宽度优先遍历的方式遍历全部文件 百度云获取全部文件API探索 标签:python http .com lang 部分 json sts rem ref 原文地址:https://www.cnblogs.com/countryKing/p/9533125.html通过抓包发现百度云获取文件列表接口https://pan.baidu.com/api/list,本文主要试图通过该接口来封装一个获取百度网盘文件的api接口
1.抓包分析
2.代码实现
#! /usr/bin/python3
# -*- coding: UTF-8 -*-
import time, requests, json, urllib, sys, os
def get_dir_path (dir_path, bdstoken, cookies, db_file):
flag = True
dir_path = urllib.parse.quote (dir_path)
dir_path = dir_path.replace (‘/‘, ‘%2F‘)
page = 1
args = []
ret_list_all = []
ret_list_dir = []
while True:
url = ‘https://pan.baidu.com/api/list?‘
url += ‘app_id=250528‘
url += ‘&bdstoken=‘ + bdstoken
url += ‘&channel=chunlei‘
url += ‘&clienttype=0‘
url += ‘&desc=1‘
url += ‘&dir=‘ + dir_path
url += ‘&logid=MTUzNDM4NDk3MjYzNDAuNTAyODg4NzM4MTQyNDE0Nw==‘
url += ‘&num=100‘
url += ‘&order=time‘
url += ‘&page=‘ + str (page)
url += ‘&showempty=0‘
url += ‘&web=1‘
Cookie = ‘‘
for item in cookies:
if item [‘name‘] == ‘BDUSS‘ or item [‘name‘] == ‘STOKEN‘:
Cookie += item [‘name‘] + ‘=‘ + item [‘value‘] + ‘;‘
Cookie.rstrip (‘;‘)
headers = {
"Accept": "application/json, text/javascript, */*; q=0.01",
"Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "zh-CN,zh;q=0.9",
"Connection": "keep-alive",
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
"Cookie": Cookie,
"Host": "pan.baidu.com",
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36",
}
try:
response = requests.get (url = url, headers = headers)
content = json.loads (response.text)
if content [‘list‘] == []:
break
else:
for file_item in content [‘list‘]:
fs_id = file_item [‘fs_id‘]
path = file_item [‘path‘]
name = file_item [‘server_filename‘]
size = file_item [‘size‘]
try:
md5 = file_item [‘md5‘]
except:
md5 = ‘md5‘
isdir = file_item [‘isdir‘]
arg = (str (fs_id), path, name, str (size), md5, isdir, str (file_item))
args.append (arg)
if isdir == 1:
ret_list_dir.append (path)
ret_list_all.append (path)
except BaseException as e:
print (‘Error Function: get_dir_path (dir_path, bdstoken, cookies, db_file)‘)
print (‘Error Argument:‘, urllib.parse.unquote (dir_path))
print (‘Error :‘, response.text)
print (‘Error Reason :‘, e)
flag = False
break
page += 1
return flag, ret_list_dir, ret_list_all
def get_all_files (username, bdstoken, cookies, db_file):
flag, cur_list, all_list = get_dir_path (‘/‘, bdstoken, cookies, db_file)
retry_list = []
while len (cur_list) != 0:
cur_path = cur_list [0]
cur_list.pop (0)
#print (‘next dir path :‘, cur_path)
flag, ret_list, all_list = get_dir_path (cur_path, bdstoken, cookies, db_file)
if flag:
cur_list.extend (ret_list)
#print (‘len (cur_list) :‘, len (cur_list))
#print (‘depth :‘, cur_path.count (‘/‘))
#print (‘*‘ * 64)
else:
retry_list.append (cur_path)
if len (retry_list) == 0:
print (‘get all files successful‘)
if os.path.exists (‘error_dir_path_list_‘ + username):
os.remove (‘error_dir_path_list_‘ + username)
else:
print (‘retry_list is :‘, retry_list)
print (‘start get retry_list‘)
cur_list = retry_list
retry_list = []
while len (cur_list) != 0:
cur_path = cur_list [0]
cur_list.pop (0)
#print (‘next dir path :‘, cur_path)
flag, ret_list, all_list = get_dir_path (cur_path, bdstoken, cookies, db_file)
if flag:
cur_list.extend (ret_list)
#print (‘len (cur_list) :‘, len (cur_list))
#print (‘depth :‘, cur_path.count (‘/‘))
#print (‘*‘ * 64)
else:
retry_list.append (cur_path)
if len (retry_list) == 0:
print (‘get all files successful‘)
else:
print (‘retry_list is :‘, retry_list)
3.代码分析
下一篇:jQuery 事件处理API