Jsoup解析和遍历一个HTML文档(二)
2021-07-04 09:04
标签:exce text mat org htm 编辑 pen har 汇总 关于Eclipse编辑器汇总console中字体调整: 1,下载jsoup的jar包:http://jsoup.org/download 2, jsoup英文的开发手册:http://jsoup.org/cookbook/ 3,jsoup的jsoup cookbook中文版:http://www.open-open.com/jsoup/ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 小实例: Jsoup解析和遍历一个HTML文档(二) 标签:exce text mat org htm 编辑 pen har 汇总 原文地址:http://www.cnblogs.com/kmingspirit/p/7120304.html 1 package cn.cast.test;
2
3
4
5 import org.jsoup.Jsoup;
6 import org.jsoup.nodes.Document;
7 import org.jsoup.nodes.Element;
8 import org.jsoup.select.Elements;
9
10
11 import java.io.IOException;
12
13 import org.jsoup.Jsoup;
14 import org.jsoup.Jsoup;
15 import org.jsoup.nodes.Document;
16 import org.jsoup.nodes.Element;
17 import org.jsoup.select.Elements;
18
19
20 public class test_1 {
21
22 public static void main(String[] args) {
23 // TODO Auto-generated method stub
24 getUrlAndTitle();
25 getTextMes();
26 }
27
28 public static void getUrlAndTitle()
29 {
30 String url="http://finance.sina.com.cn/";
31 try {
32 Document doc=Jsoup.connect(url).timeout(10000).get();//get all infomation from url website
33 //System.out.println(doc);
34 Elements ListDiv = doc.getElementsByAttributeValue("class","fin_tabs0_c0");
35 //System.out.println(ListDiv);
36 for (Element div :ListDiv) {
37 Elements links = div.getElementsByTag("a");
38 // System.out.println(links);
39 for (Element link : links) {
40 String linkHref = link.attr("href").trim();
41 String linkText = link.text().trim();
42 System.out.println(linkHref+"\t"+linkText);
43 }
44 }
45 } catch (IOException e) {
46 // TODO Auto-generated catch block
47 e.printStackTrace();
48 }
49 }
50
51 public static void getTextMes()
52 {
53 String url="http://finance.sina.com.cn/hy/20140823/100220099682.shtml";
54 String textMes="";
55 try {
56 Document doc=Jsoup.connect(url).timeout(10000).get();
57 Elements ListDiv = doc.getElementsByAttributeValue("class","blkContainerSblkCon BSHARE_POP");
58 //System.out.println(ListDiv);
59 for(Element div:ListDiv)
60 {
61 Elements textInfos=div.getElementsByTag("p");
62 //System.out.println(textInfos);
63 for(Element textInfo:textInfos)
64 {
65 String text=textInfo.text().trim();
66 textMes=textMes+text+"\n";
67 }
68 }
69 System.out.println(textMes);
70 } catch (IOException e) {
71 // TODO Auto-generated catch block
72 e.printStackTrace();
73 }
74 }
75 }
上一篇:数据转换成JSON类型
下一篇:phpstrom快捷键大全