nodejs 爬图片
2021-02-02 14:14
标签:爬图片 wait require settime code att url begin 爬取 nodejs 爬图片 标签:爬图片 wait require settime code att url begin 爬取 原文地址:https://www.cnblogs.com/qiuxd/p/13168799.htmlconst path = require(‘path‘);
const request = require(‘request‘);
const cheerio = require(‘cheerio‘);
const fs = require(‘fs‘);
let page = 0;
const total = 10; //总共几页
const waitTime = 30000; //间隔时间, 毫秒
const savePath = ‘./image‘;
begin();
function begin() {
if (!fs.existsSync(savePath)) {
fs.mkdirSync(savePath);
}
const url = ‘http://www.图片网址.com‘;
getImageByUrl(url);
}
//爬取指定页面的图片
function getImageByUrl (_url) {
page++;
console.log(`开始第${page}页`);
request(_url, function(err, resp, body) {
if (err) {
console.log(err);
} else {
const $ = cheerio.load(body);
$(‘.commentlist > li .text img‘).each(function() {
const src = $(this).attr(‘src‘);
const baseName = path.win32.basename(src);
request(`http:${src}`).pipe(fs.createWriteStream(`${savePath}/${baseName}`));
});
const prevUrl = $(‘.previous-comment-page‘).attr(‘href‘);
if (page 0) {
setTimeout(() => {
getImageByUrl(`http:${prevUrl}`);
}, waitTime);
}
}
});
}