nodejs .http模块, cheerio模块 实现 小爬虫.
2021-06-14 06:03
标签:class load text blank cti rda rar course 课程 代码: 运行: ---------------------------------------------------------------------- 参考链接: nodejs .http模块, cheerio模块 实现 小爬虫. 标签:class load text blank cti rda rar course 课程 原文地址:http://www.cnblogs.com/cbza/p/7281367.html 1 var http = require("http");
2
3 var cheerio = require("cheerio");
4
5
6 var url = ‘http://www.imooc.com/learn/348‘;
7
8
9 http.get(url, function(res){
10 var html = ‘‘;
11
12 res.on(‘data‘, function(data){
13 html += data;
14 });
15
16 res.on(‘end‘, function(){
17 var courseData = filterChapters(html);
18
19 printCourseInfo(courseData);
20 console.log(courseData);
21 });
22 }).on(‘error‘, function(){
23 console.log("获取课程数据出错!");
24 });
25
26 function filterChapters(html)
27 {
28 var $ = cheerio.load(html);
29
30 //所有章节
31 var chapters = $(‘.chapter‘);
32
33 var courseData = [];
34
35 chapters.each(function(item){
36 var chapter = $(this);
37 var chapterTitle = chapter.find(‘h3 strong‘).text().replace(/\r|\n/ig,"").trim();
38 var videos = chapter.find(".video").children(‘li‘);
39
40 var chapterData = {
41 chapterTitle: chapterTitle,
42 videos: []
43 };
44
45 videos.each(function(index, item2) {
46 var video = $(this).find(‘.J-media-item‘);
47 var videoTitle = video.text().replace(/\r|\n/ig,"").trim();
48 var id = video.attr(‘href‘).split(‘video/‘)[1];
49
50 chapterData.videos.push({
51 title: videoTitle,
52 id: id
53 })
54 });
55
56 courseData.push(chapterData);
57 });
58
59 return courseData;
60 }
61
62
63 function printCourseInfo(courseData)
64 {
65 courseData.forEach(function(item){
66 var chapterTitle = item.chapterTitle;
67 console.log(chapterTitle + ‘\n‘);
68
69 item.videos.forEach(function(video){
70 console.log(‘ [‘ + video.id+ ‘]‘ + video.title);
71 });
72 });
73 }
文章标题:nodejs .http模块, cheerio模块 实现 小爬虫.
文章链接:http://soscw.com/index.php/essay/93922.html