jsoup实现网络爬虫并且以地图形式展现

2021-04-07 02:26

阅读:599

YPE html>

标签:gets   iter   cep   color   try   drive   post   ted   dog   

本文件用到了jsoup和echarts,接下来展示目录

 

技术图片

 

 

接下来展示各个文件内容:

Add文件内容:

package test;

import java.sql.Connection;
import java.sql.SQLException;
import java.sql.Statement;

public class Add {
    
    public boolean add(String table,AddService en)
    {
        boolean c=false;
        String sql="insert into "+table+"(sheng,xinzeng,leiji,zhiyu,siwang,date) values(‘"+en.getSheng()+"‘,‘"+en.getXinzeng()+"‘,‘"+en.getLeiji()+"‘,‘"+en.getZhiyu()+"‘,‘"+en.getSiwang()+"‘,‘"+en.getTime()+"‘);";           
        System.out.println(sql);
        //sql="insert into myinfo(sheng,xinzeng,leiji,zhiyu,siwang,date) values(‘湖北‘,‘13‘,‘67773‘,‘49056‘,‘3046‘,‘2020-03-11 10:12:28‘);";
        System.out.println(sql);
        Connection conn=DBUtil.getConn();
        Statement state=null;
        try {
            state=conn.createStatement();
            int num=state.executeUpdate(sql);
            if(num!=0)
                c=true;
            
            
            state.close();
            conn.close();
        } catch (SQLException e) {
            // TODO Auto-generated catch block
            //e.printStackTrace();
        }
        
        
        return c;
        
    }

}

AddService文件内容:

package test;


import java.sql.Time;
import java.text.SimpleDateFormat;
import java.util.Date;



public class AddService {
    String sheng;
    String xinzeng;
    String leiji;
    String zhiyu;
    String siwang;
    String time;
    public String getSheng() {
        return sheng;
    }
    public void setSheng(String sheng) {
        this.sheng = sheng;
    }
    public String getXinzeng() {
        return xinzeng;
    }
    public void setXinzeng(String xinzeng) {
        this.xinzeng = xinzeng;
    }
    public String getLeiji() {
        return leiji;
    }
    public void setLeiji(String leiji) {
        this.leiji = leiji;
    }
    public String getZhiyu() {
        return zhiyu;
    }
    public void setZhiyu(String zhiyu) {
        this.zhiyu = zhiyu;
    }
    public String getSiwang() {
        return siwang;
    }
    public void setSiwang(String siwang) {
        this.siwang = siwang;
    }
    
    public String getTime() {
        return time;
    }
    public void setTime(String time) {
        this.time = time;
    }
    public static void main(String[] args) {
        Add a=new Add();
        AddService as=new AddService();
        as.setSheng("湖北");
        as.setXinzeng("13");
        as.setLeiji("67773");
        as.setZhiyu("49056");
        as.setSiwang("3046");
        Date currentTime=new Date();
        SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
        String dateString = formatter.format(currentTime);
        as.setTime(dateString);
        a.add("myinfo", as);
    }

}

DBUtil文件内容:

package test;

import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;

/**
 * 数据库连接工具
 * @author Hu
 *
 */
public class DBUtil {
    
    public static String db_url = "jdbc:mysql://localhost:3306/yonghucaozuo?useSSL=false&serverTimezone=UTC&characterEncoding=UTF-8";
    public static String db_user = "root";
    public static String db_pass = "20183629";
    
    public static Connection getConn () {
        Connection conn = null;
        
        try {
            Class.forName("com.mysql.jdbc.Driver");//加载驱动
            conn = DriverManager.getConnection(db_url, db_user, db_pass);
        } catch (Exception e) {
            e.printStackTrace();
        }
        
        return conn;
    }
    
    /**
     * 关闭连接
     * @param state
     * @param conn
     */
    public static void close (Statement state, Connection conn) {
        if (state != null) {
            try {
                state.close();
            } catch (SQLException e) {
                e.printStackTrace();
            }
        }
        
        if (conn != null) {
            try {
                conn.close();
            } catch (SQLException e) {
                e.printStackTrace();
            }
        }
    }
    
    public static void close (ResultSet rs, Statement state, Connection conn) {
        if (rs != null) {
            try {
                rs.close();
            } catch (SQLException e) {
                e.printStackTrace();
            }
        }
        
        if (state != null) {
            try {
                state.close();
            } catch (SQLException e) {
                e.printStackTrace();
            }
        }
        
        if (conn != null) {
            try {
                conn.close();
            } catch (SQLException e) {
                e.printStackTrace();
            }
        }
    }

    public static void main(String[] args) throws SQLException {
        Connection conn = getConn();
        PreparedStatement pstmt = null;
        ResultSet rs = null;
        String sql ="select * from course";
        pstmt = conn.prepareStatement(sql);
        rs = pstmt.executeQuery();
        if(rs.next()){
            System.out.println("空");
        }else{
            System.out.println("不空");
        }
    }
}

Get文件内容:

package test;

import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;



public class Get {
    
    public static List find(String table,String id)
    {
        List list =new ArrayList();
        boolean c;
        Connection conn=DBUtil.getConn();
        Statement state=null;
        try
        {
            state=conn.createStatement();
            String sql="select * from "+table+" where date =‘"+id+"‘";
            ResultSet rs=state.executeQuery(sql);
            while(rs.next())
            {
                Map map=new HashMap();
                map.put("name", rs.getString(2));
                map.put("value", rs.getString(4));
                list.add(map);
            }
            rs.close();
            state.close();
            conn.close();
            
            
            
            System.out.println("over");
            System.out.println(1);
        
        }
        catch(Exception e)
        {
            
        }
        return list;
    }
}

JsoupTestTitile文件内容:

package test;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import com.gargoylesoftware.htmlunit.BrowserVersion;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.html.HtmlInput;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
import com.gargoylesoftware.htmlunit.html.HtmlSubmitInput;

import bean.ShengBean;
 
public class JsoupTestTitle {
    public static void main(String[] args) {
        JsoupTestTitle.getWuMaoW("");
    }
 
    // 获取http://www.ltaaa.com/
    public static List getWuMaoW(String date) {
        String sheng="";
        String xinzeng="";
        String leiji="";
        String zhiyu="";
        String siwang="";
        List list=new ArrayList();
        String url = "https://voice.baidu.com/act/newpneumonia/newpneumonia/";
        
        int i=0;
        
        try {
            //构造一个webClient 模拟Chrome 浏览器
            WebClient webClient = new WebClient(BrowserVersion.CHROME);
            //支持JavaScript
            webClient.getOptions().setJavaScriptEnabled(true);
            webClient.getOptions().setCssEnabled(false);
            webClient.getOptions().setActiveXNative(false);
            webClient.getOptions().setCssEnabled(false);
            webClient.getOptions().setThrowExceptionOnScriptError(false);
            webClient.getOptions().setThrowExceptionOnFailingStatusCode(false);
            webClient.getOptions().setTimeout(8000);
            HtmlPage rootPage = webClient.getPage(url);
            //设置一个运行JavaScript的时间
            webClient.waitForBackgroundJavaScript(6000);
            String html = rootPage.asXml();
            Document doc = Jsoup.parse(html);
            //System.out.println(doc);
            System.out.println("进去");
            //得到各省份信息
            Element listDiv = doc.getElementById("nationTable");
            //for (Element element : listDiv) {
                //只要省份信息
                Elements listdiv1 = listDiv.getElementsByTag("tbody");
                for(Element tbody:listdiv1)
                {
                    
                    Elements listtr=tbody.getElementsByTag("tr");
                    for(Element tr:listtr)
                    {
                        Elements td1=tr.getElementsByAttributeValue("class", "VirusTable_1-1-203_MdE8uT");
                        for (Element real : td1) {
                             
                            // 取所有文本
                            // String ptext = text.text();
         
                            String ptext = real.text().trim();
                            sheng=ptext;
                            System.out.println(sheng);
         
                        }
                        Elements td2=tr.getElementsByAttributeValue("class", "VirusTable_1-1-203_3x1sDV VirusTable_1-1-203_2bK5NN");
                        for (Element real : td2) {
                             
                            // 取所有文本
                            // String ptext = text.text();
         
                            String ptext = real.text().trim();
                            xinzeng=ptext;
         
                        }
                        Elements td3=tr.getElementsByAttributeValue("class", "VirusTable_1-1-203_3x1sDV");
                        for (Element real : td3) {
                             
                            // 取所有文本
                            // String ptext = text.text();
         
                            String ptext = real.text().trim();
                            leiji=ptext;
         
                        }
                        i=0;
                        Elements td4=tr.getElementsByAttributeValue("class", "VirusTable_1-1-203_EjGi8c");
                        for (Element real : td4) {
                             
                            // 取所有文本
                            // String ptext = text.text();
         
                            String ptext = real.text().trim();
                            if(i==0)
                            {
                                zhiyu=ptext;
                            }
                            else
                            {
                                siwang=ptext;
                            }
                            i++;
                            
                            
         
                        }
                        
                        System.out.println();
                        AddService as=new AddService();
                        as.setSheng(sheng);
                        as.setXinzeng(xinzeng);
                        as.setLeiji(leiji);
                        as.setZhiyu(zhiyu);
                        as.setSiwang(siwang);
                        as.setTime(date);
                        list.add(as);
                        
                        
                        
                    }
                    
                }
                
                
            //}
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        System.out.println("我不好");
        return list;
    }
 
}

servlet文件内容:

package test;

import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.List;

import javax.servlet.ServletException;
import javax.servlet.annotation.WebServlet;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;

import net.sf.json.JSONArray;



/**
 * Servlet implementation class servlet
 */
@WebServlet("/pachongservlet")
public class servlet extends HttpServlet {
    private static final long serialVersionUID = 1L;
       
    /**
     * @see HttpServlet#HttpServlet()
     */
    public servlet() {
        super();
        // TODO Auto-generated constructor stub
    }
    protected void service(HttpServletRequest arg0, HttpServletResponse arg1) throws ServletException, IOException
    {
        arg1.setContentType("text/html;charset=utf-8");
        arg0.setCharacterEncoding("utf-8");
        arg1.setCharacterEncoding("utf-8");
        Date currentTime=new Date();
        SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
        String date = formatter.format(currentTime);
        System.out.println("11111");
        Listall=JsoupTestTitle.getWuMaoW(date);
        Add a=new Add();
        System.out.println("22222");
        for(AddService as:all)
        {
            a.add("myinfo", as);
        }
        System.out.println("33333");
        List list=Get.find("myinfo",date);
        arg0.setAttribute("mapDataJson", JSONArray.fromObject(list));
        
        
        /*
         * List all=Get.find1("info", date); Gson gson = new Gson(); String
         * json = gson.toJson(all); arg0.setAttribute("message", json);
         */
        
        System.out.println("44444");
        arg0.getRequestDispatcher("NewFile.jsp").forward(arg0, arg1);
        return;
        /*map = new HashMap();
        map.put("name","河北");
        map.put("value",2400);
        list.add(map);*/

    }

    /**
     * @see HttpServlet#doGet(HttpServletRequest request, HttpServletResponse response)
     */
    protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
        // TODO Auto-generated method stub
        response.getWriter().append("Served at: ").append(request.getContextPath());
    }

    /**
     * @see HttpServlet#doPost(HttpServletRequest request, HttpServletResponse response)
     */
    protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
        // TODO Auto-generated method stub
        doGet(request, response);
    }

}

NewFile文件内容:

    pageEncoding="UTF-8"%>Insert title here

接下来是所爬取的网站图片:

技术图片

 

 

 

在我完成这个小demo的时候我第一次是,每爬取到一组数据就将其导入数据库,然后就会报很多错误,然后我就改了一下方式先将爬取到的数据存到List中去然后在统一导入到数据库,然后在从servlet读取数据库中的内容,进行地图形式的信息展示

 

jsoup实现网络爬虫并且以地图形式展现

标签:gets   iter   cep   color   try   drive   post   ted   dog   

原文地址:https://www.cnblogs.com/yizhixiaozhu/p/12490621.html


评论


亲,登录后才可以留言!