Python学习之爬虫
2021-04-23 21:27
标签:dex pyc 壁纸 name arm 勿喷 windows browser row 又被老师要求去搞Python ,曰,,下午回顾了一下Python的基础知识,写了个爬取图片的程序,在此做个分享吧。不喜勿喷 Python学习之爬虫 标签:dex pyc 壁纸 name arm 勿喷 windows browser row 原文地址:https://www.cnblogs.com/wfszmg/p/13268477.htmlimport requests
import time
from bs4 import BeautifulSoup
import uuid
# 下载单个页面的一些图片
def downLoader(url,page):
headers = {‘User-Agent‘: ‘Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.104 Safari/537.36 Core/1.53.4843.400 QQBrowser/9.7.13021.400‘}
if(page == 1):
res = requests.get(url + "index.html", headers=headers) # 第一页
if(page != 1):
page = str(page)
res = requests.get(url + "index_" + page + ".html", headers=headers)
res.encoding = ‘gbk‘
soup = BeautifulSoup(res.text, ‘lxml‘)
for i in range(1,22):
i = str(i)
if page == 1:
add = soup.select(‘#main > div.slist > ul > li:nth-child(‘ + i + ‘) > a > span > img‘)
else:
add = soup.select(‘#main > div.slist > ul > li:nth-child(‘ + i + ‘) > a > img‘)
#在下面写下载到本地的代码
# print(url + add[0].get(‘src‘))
down(url + add[0].get(‘src‘), str(uuid.uuid1()))
#for index in add:
#print(url + add[index].get(‘src‘))
# return url + add[index].get(‘src‘) # 返回url
pass
def down(url, name):
with open(‘G:\学习\PyCharm\PyCharm2017\py工作环境\爬虫\壁纸‘+ name+‘.png‘, ‘wb+‘) as f:
f.write(requests.get(url).content)
f.flush()
pass
if __name__ == ‘__main__‘:
url = ‘http://pic.netbian.com/‘
print(‘请骚等````‘)
for i in range(1,11):
time.sleep(0.5)
downLoader(url,i)
上一篇:C++求树子节点权重最大的和
下一篇:学Java第三天