python写的笔趣阁网站小说下载器
2021-03-06 08:29
标签:code xpath demo 输入 imp nts text chapter 书籍 import requests ‘,‘‘).replace(‘ python写的笔趣阁网站小说下载器 标签:code xpath demo 输入 imp nts text chapter 书籍 原文地址:https://www.cnblogs.com/fisherpau/p/14304849.html
import re
from lxml import etree
# source = requests.get(‘http://www.paoshu8.com/0_984/746463.html‘).content.decode(‘utf8‘)
# #print(source)
# demo = re.compile(‘
# lists = demo.findall(source)
# demo = re.compile(‘
# contents = demo.findall(source)[0]
# new_contents = contents.replace(‘
# print(new_contents)
#
# title = lists[0].split(‘_‘)[0]
# op = open(‘凡人修仙传.txt‘,‘a+‘)
# op.write(title+‘\n‘+new_contents)
# op.close()
c_1 = input(‘请输入要下载的书籍的关键字:‘)
source = requests.get(‘http://www.biquge.info/modules/article/search.php?searchkey=‘+c_1).content.decode(‘utf8‘)
# base = etree.HTML(source).xpath(‘//*[@id="wrapper"]/table//tr/td[1]/a‘)
# for i in base:
# title = i.xpath(‘text()‘)
# hrefs =i.xpath(‘@href‘)
# print(title,hrefs)
import time
titles = etree.HTML(source).xpath(‘//*[@id="wrapper"]/table//tr/td[1]/a/text()‘)
hrefs = etree.HTML(source).xpath(‘//*[@id="wrapper"]/table//tr/td[1]/a/@href‘)
n = 1
for i in titles:
print(str(n)+‘\t‘+i)
n = n+1
c_2 = int(input(‘请输入要下载的书籍序号‘))-1
chapter_page = ‘http://www.biquge.info‘+hrefs[c_2]
#print(chapter_page)
chapter_source = requests.get(chapter_page).content.decode(‘utf8‘)
chapter_lists = etree.HTML(chapter_source).xpath(‘//*[@id="list"]/dl/dd/a/@href‘)
#print(chapter_lists)
for h in chapter_lists:
chapter_href = chapter_page+h
content_source = requests.get(chapter_href).content.decode(‘utf8‘)
title = etree.HTML(content_source).xpath(‘//h1/text()‘)[0]
contents = ‘\n‘.join(etree.HTML(content_source).xpath(‘//*[@id="content"]/text()‘))
print(title)
print(contents)
time.sleep(1)
op = open(titles[c_2]+‘.txt‘,‘a+‘,encoding=‘utf8‘)
op.write(title+contents)
op.close()
#‘//*[@id="wrapper"]/div[4]/div/div[2]/h1‘
#http://www.biquge.info/10_10240/5018128.html
上一篇:JAVA 1.对象和封装
下一篇:Union-Find算法详解