python超简化的18行代码爬一本小说
2021-07-14 19:07
标签:webkit 2.0 decode chap ssi code wan python exp
import re
def getnvvel():
html = urllib.request.urlopen("http://www.quanshuwang.com/book/44/44683").read().decode(‘gbk‘) # download sould code
urls = re.findall(r‘
title = "douluo" # Normoally,you should use request.urlopen
f = open(‘../novel/%s.txt‘ % title, ‘w‘) # create a douluo.txt
for url in urls:
chapter_url = url[0]
chapter_title = url[1]
chapter_content_list = urllib.request.urlopen(chapter_url).read().decode("gbk")
chapter_content_list = re.findall(r‘ .?
(.*?)