json API通用爬取模块
2021-07-15 21:16
标签:python 爬虫 & json API通用爬取模块 标签:python 爬虫 原文地址:http://blog.51cto.com/9399369/2106859import requests
def jsonRequest(url,postdata):
"""
使用requests模块post方法请求api接口,返回json串
:param url:
:return json串:
"""
response = requests.post(url,data=postdata)
if response.status_code == 200:
return response.json()
else:
return "requests faild"
def extractionData(jsondata,keynote):
"""
接收一个json字符串和要提取的key值,返回相应的value值
:param jsondata:
:param keynote:
:return:
"""
extractionValue = jsondata[keynote]
if isinstance(extractionValue,dict) == True:
for key in extractionValue.keys():
print key,extractionValue[key]
elif isinstance(extractionValue,list) == True:
for item in extractionValue:
if isinstance(item,dict) == True:
for key in item.keys():
print key,item[key]
else:
print extractionValue
return extractionValue
def jsonRequestPost(url,postdata):
"""
使用requests模块post方法请求api接口,返回json串
:param url:
:return json串:
"""
response = requests.post(url,data=postdata)
if response.status_code == 200:
status = 0
msg = "请求url成功"
result = response.json()
else:
status = 400
msg = "请求url失败"
result = ''
return {
'status':status,
'msg':msg,
'result':result
}
def jsonRequestGet(url):
"""
使用requests模块get方法请求api接口,返回json串
:param url:
:return json串:
"""
headers = {
"user-agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 UBrowser/6.2.3964.2 Safari/537.36",
}
response = requests.get(url,headers=headers)
if response.status_code == 200:
status = 0
msg = "请求url成功"
result = response.json()
else:
status = 400
msg = "请求url失败"
result = ''
return {
'status':status,
'msg':msg,
'result':result
}
def makePostData(params):
"""
构建post请求的postdata
:param params:
:return:
"""
postdata = {}
for item in params:
postdata[item['key']]=item['value']
return postdata
def getTableField(jsonrequestdata,responseListKey,sourceField):
"""
jsonrequestdata:json响应的数据
responseListKey:要哪些数据
sourceField:要取responseListKey下面的哪些字段 sourceField ="networks[1]|ip"sourceField = "serverInfo|hostname"
:return:
"""
responseListKeylist = responseListKey.split('|')
jsondata = jsonrequestdata
for key in responseListKeylist:
jsondata=jsondata[key]
sourceFieldlist = sourceField.split('|')
itemlist = []
for list in jsondata:
item = list
for key in sourceFieldlist:
if '[' in key:
key2 = key.split('[')[0]
index = key.split('[')[1].split(']')[0]
try:
item = item[key2][int(index)]
except:
item = "None"
else:
try:
item = item[key]
except:
item = "None"
itemlist.append(item)
return itemlist
if __name__ == "__main__":
url = "https://box.maoyan.com/promovie/api/box/second.json"
params = [{"value": "1", "key": "flag"},
{"value": "12", "key": "userid"},
{"value": "1524234956", "key": "expiretime"},
{"value": "1d5df8ff087815336a8e0f299c9811fe", "key": "token"},
{"value": "7", "key": "roleid"},
{"value": "190", "key": "projectid"}
]
postdata = makePostData(params)
jsonrequestdata = jsonRequestGet(url)['result']
responseListKey = "data|list"
sourceField = "movieName"
print getTableField(jsonrequestdata, responseListKey, sourceField)