java扒取网页,获取所需要内容列表展示
2020-12-13 03:57
标签:create 效果 result nta map city arraylist geturl href 1.扒取原网页内容: 2.本地展示效果 3.代码 java扒取网页,获取所需要内容列表展示 标签:create 效果 result nta map city arraylist geturl href 原文地址:https://www.cnblogs.com/irishua/p/11099026.html 3 @ResponseBody
4 public Map
1 private Elements getUrlElements() {
2 System.out.println("=======================================抓取国家政府网内容任务开始了=======================================");
3 String url = "http://www.gov.cn/pushinfo/v150203/index.htm";
4 CloseableHttpClient httpClient = HttpClients.createDefault();
5 Elements elementsByTag = null;
6
7 try {
8 HttpGet httpGet = new HttpGet(url);
9 CloseableHttpResponse response = httpClient.execute(httpGet);
10 try {
11 HttpEntity entity = response.getEntity();
12 //设置编码格式否则乱码
13 String html = new String(EntityUtils.toString(entity).getBytes("iso8859-1"));
14
15 Document document = Jsoup.parse(html);
16 elementsByTag = document.getElementsByTag("li");
17 }finally {
18 response.close();
19 }
20 } catch (ClientProtocolException e1) {
21 e1.printStackTrace();
22 } catch (IOException e1) {
23 e1.printStackTrace();
24 }finally {
25 // 关闭连接,释放资源
26 try {
27 httpClient.close();
28 } catch (IOException e) {
29 e.printStackTrace();
30 }
31 }
32
33 return elementsByTag;
34 }