jsoup实现网络爬虫并且以地图形式展现
2021-04-07 02:26
阅读:599
YPE html>
标签:gets iter cep color try drive post ted dog
本文件用到了jsoup和echarts,接下来展示目录
接下来展示各个文件内容:
Add文件内容:
package test; import java.sql.Connection; import java.sql.SQLException; import java.sql.Statement; public class Add { public boolean add(String table,AddService en) { boolean c=false; String sql="insert into "+table+"(sheng,xinzeng,leiji,zhiyu,siwang,date) values(‘"+en.getSheng()+"‘,‘"+en.getXinzeng()+"‘,‘"+en.getLeiji()+"‘,‘"+en.getZhiyu()+"‘,‘"+en.getSiwang()+"‘,‘"+en.getTime()+"‘);"; System.out.println(sql); //sql="insert into myinfo(sheng,xinzeng,leiji,zhiyu,siwang,date) values(‘湖北‘,‘13‘,‘67773‘,‘49056‘,‘3046‘,‘2020-03-11 10:12:28‘);"; System.out.println(sql); Connection conn=DBUtil.getConn(); Statement state=null; try { state=conn.createStatement(); int num=state.executeUpdate(sql); if(num!=0) c=true; state.close(); conn.close(); } catch (SQLException e) { // TODO Auto-generated catch block //e.printStackTrace(); } return c; } }
AddService文件内容:
package test; import java.sql.Time; import java.text.SimpleDateFormat; import java.util.Date; public class AddService { String sheng; String xinzeng; String leiji; String zhiyu; String siwang; String time; public String getSheng() { return sheng; } public void setSheng(String sheng) { this.sheng = sheng; } public String getXinzeng() { return xinzeng; } public void setXinzeng(String xinzeng) { this.xinzeng = xinzeng; } public String getLeiji() { return leiji; } public void setLeiji(String leiji) { this.leiji = leiji; } public String getZhiyu() { return zhiyu; } public void setZhiyu(String zhiyu) { this.zhiyu = zhiyu; } public String getSiwang() { return siwang; } public void setSiwang(String siwang) { this.siwang = siwang; } public String getTime() { return time; } public void setTime(String time) { this.time = time; } public static void main(String[] args) { Add a=new Add(); AddService as=new AddService(); as.setSheng("湖北"); as.setXinzeng("13"); as.setLeiji("67773"); as.setZhiyu("49056"); as.setSiwang("3046"); Date currentTime=new Date(); SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); String dateString = formatter.format(currentTime); as.setTime(dateString); a.add("myinfo", as); } }
DBUtil文件内容:
package test; import java.sql.Connection; import java.sql.DriverManager; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; /** * 数据库连接工具 * @author Hu * */ public class DBUtil { public static String db_url = "jdbc:mysql://localhost:3306/yonghucaozuo?useSSL=false&serverTimezone=UTC&characterEncoding=UTF-8"; public static String db_user = "root"; public static String db_pass = "20183629"; public static Connection getConn () { Connection conn = null; try { Class.forName("com.mysql.jdbc.Driver");//加载驱动 conn = DriverManager.getConnection(db_url, db_user, db_pass); } catch (Exception e) { e.printStackTrace(); } return conn; } /** * 关闭连接 * @param state * @param conn */ public static void close (Statement state, Connection conn) { if (state != null) { try { state.close(); } catch (SQLException e) { e.printStackTrace(); } } if (conn != null) { try { conn.close(); } catch (SQLException e) { e.printStackTrace(); } } } public static void close (ResultSet rs, Statement state, Connection conn) { if (rs != null) { try { rs.close(); } catch (SQLException e) { e.printStackTrace(); } } if (state != null) { try { state.close(); } catch (SQLException e) { e.printStackTrace(); } } if (conn != null) { try { conn.close(); } catch (SQLException e) { e.printStackTrace(); } } } public static void main(String[] args) throws SQLException { Connection conn = getConn(); PreparedStatement pstmt = null; ResultSet rs = null; String sql ="select * from course"; pstmt = conn.prepareStatement(sql); rs = pstmt.executeQuery(); if(rs.next()){ System.out.println("空"); }else{ System.out.println("不空"); } } }
Get文件内容:
package test; import java.sql.Connection; import java.sql.ResultSet; import java.sql.Statement; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; public class Get { public static List find(String table,String id) { List list =new ArrayList(); boolean c; Connection conn=DBUtil.getConn(); Statement state=null; try { state=conn.createStatement(); String sql="select * from "+table+" where date =‘"+id+"‘"; ResultSet rs=state.executeQuery(sql); while(rs.next()) { Mapmap=new HashMap (); map.put("name", rs.getString(2)); map.put("value", rs.getString(4)); list.add(map); } rs.close(); state.close(); conn.close(); System.out.println("over"); System.out.println(1); } catch(Exception e) { } return list; } }
JsoupTestTitile文件内容:
package test; import java.io.IOException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; import java.util.List; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import com.gargoylesoftware.htmlunit.BrowserVersion; import com.gargoylesoftware.htmlunit.WebClient; import com.gargoylesoftware.htmlunit.html.HtmlInput; import com.gargoylesoftware.htmlunit.html.HtmlPage; import com.gargoylesoftware.htmlunit.html.HtmlSubmitInput; import bean.ShengBean; public class JsoupTestTitle { public static void main(String[] args) { JsoupTestTitle.getWuMaoW(""); } // 获取http://www.ltaaa.com/ public static List getWuMaoW(String date) { String sheng=""; String xinzeng=""; String leiji=""; String zhiyu=""; String siwang=""; List list=new ArrayList(); String url = "https://voice.baidu.com/act/newpneumonia/newpneumonia/"; int i=0; try { //构造一个webClient 模拟Chrome 浏览器 WebClient webClient = new WebClient(BrowserVersion.CHROME); //支持JavaScript webClient.getOptions().setJavaScriptEnabled(true); webClient.getOptions().setCssEnabled(false); webClient.getOptions().setActiveXNative(false); webClient.getOptions().setCssEnabled(false); webClient.getOptions().setThrowExceptionOnScriptError(false); webClient.getOptions().setThrowExceptionOnFailingStatusCode(false); webClient.getOptions().setTimeout(8000); HtmlPage rootPage = webClient.getPage(url); //设置一个运行JavaScript的时间 webClient.waitForBackgroundJavaScript(6000); String html = rootPage.asXml(); Document doc = Jsoup.parse(html); //System.out.println(doc); System.out.println("进去"); //得到各省份信息 Element listDiv = doc.getElementById("nationTable"); //for (Element element : listDiv) { //只要省份信息 Elements listdiv1 = listDiv.getElementsByTag("tbody"); for(Element tbody:listdiv1) { Elements listtr=tbody.getElementsByTag("tr"); for(Element tr:listtr) { Elements td1=tr.getElementsByAttributeValue("class", "VirusTable_1-1-203_MdE8uT"); for (Element real : td1) { // 取所有文本 // String ptext = text.text(); String ptext = real.text().trim(); sheng=ptext; System.out.println(sheng); } Elements td2=tr.getElementsByAttributeValue("class", "VirusTable_1-1-203_3x1sDV VirusTable_1-1-203_2bK5NN"); for (Element real : td2) { // 取所有文本 // String ptext = text.text(); String ptext = real.text().trim(); xinzeng=ptext; } Elements td3=tr.getElementsByAttributeValue("class", "VirusTable_1-1-203_3x1sDV"); for (Element real : td3) { // 取所有文本 // String ptext = text.text(); String ptext = real.text().trim(); leiji=ptext; } i=0; Elements td4=tr.getElementsByAttributeValue("class", "VirusTable_1-1-203_EjGi8c"); for (Element real : td4) { // 取所有文本 // String ptext = text.text(); String ptext = real.text().trim(); if(i==0) { zhiyu=ptext; } else { siwang=ptext; } i++; } System.out.println(); AddService as=new AddService(); as.setSheng(sheng); as.setXinzeng(xinzeng); as.setLeiji(leiji); as.setZhiyu(zhiyu); as.setSiwang(siwang); as.setTime(date); list.add(as); } } //} } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } System.out.println("我不好"); return list; } }
servlet文件内容:
package test; import java.io.IOException; import java.text.SimpleDateFormat; import java.util.Date; import java.util.List; import javax.servlet.ServletException; import javax.servlet.annotation.WebServlet; import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import net.sf.json.JSONArray; /** * Servlet implementation class servlet */ @WebServlet("/pachongservlet") public class servlet extends HttpServlet { private static final long serialVersionUID = 1L; /** * @see HttpServlet#HttpServlet() */ public servlet() { super(); // TODO Auto-generated constructor stub } protected void service(HttpServletRequest arg0, HttpServletResponse arg1) throws ServletException, IOException { arg1.setContentType("text/html;charset=utf-8"); arg0.setCharacterEncoding("utf-8"); arg1.setCharacterEncoding("utf-8"); Date currentTime=new Date(); SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); String date = formatter.format(currentTime); System.out.println("11111"); Listall=JsoupTestTitle.getWuMaoW(date); Add a=new Add(); System.out.println("22222"); for(AddService as:all) { a.add("myinfo", as); } System.out.println("33333"); List list=Get.find("myinfo",date); arg0.setAttribute("mapDataJson", JSONArray.fromObject(list)); /* * Listall=Get.find1("info", date); Gson gson = new Gson(); String * json = gson.toJson(all); arg0.setAttribute("message", json); */ System.out.println("44444"); arg0.getRequestDispatcher("NewFile.jsp").forward(arg0, arg1); return; /*map = new HashMap(); map.put("name","河北"); map.put("value",2400); list.add(map); */ } /** * @see HttpServlet#doGet(HttpServletRequest request, HttpServletResponse response) */ protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { // TODO Auto-generated method stub response.getWriter().append("Served at: ").append(request.getContextPath()); } /** * @see HttpServlet#doPost(HttpServletRequest request, HttpServletResponse response) */ protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { // TODO Auto-generated method stub doGet(request, response); } }
NewFile文件内容:
pageEncoding="UTF-8"%>Insert title here
接下来是所爬取的网站图片:
在我完成这个小demo的时候我第一次是,每爬取到一组数据就将其导入数据库,然后就会报很多错误,然后我就改了一下方式先将爬取到的数据存到List中去然后在统一导入到数据库,然后在从servlet读取数据库中的内容,进行地图形式的信息展示
jsoup实现网络爬虫并且以地图形式展现
标签:gets iter cep color try drive post ted dog
原文地址:https://www.cnblogs.com/yizhixiaozhu/p/12490621.html
评论
亲,登录后才可以留言!