标签:ESS 数据 oid private file pdo lag ace eai
///
/// 财政部mca
/// http://www.mca.gov.cn/article/sj/xzqh/1980/
/// https://github.com/zzzprojects/html-agility-pack
/// https://github.com/linezero/HtmlAgilityPack
///
public partial class Form1 : Form
{
int codecell = 2;
int namecell = 3;
int yearnmae = 2019;
string tableNo = "table";
string trNo = "tr";
string tdthNo = "th|td";
DataTable McaData()
{
DataTable dt = new DataTable();
dt.Columns.Add("year", typeof(int)); //年份
dt.Columns.Add("website", typeof(string)); //财政部网址
dt.Columns.Add("codecell", typeof(int)); //行政区划编码在表格的第几列
dt.Columns.Add("namecell", typeof(int)); //行政区划名称在表格的第几列
dt.Columns.Add("tableNo", typeof(string)); //表格标识
dt.Columns.Add("trNo", typeof(string)); //行标识
dt.Columns.Add("tdthNo", typeof(string)); //列标识
dt.Rows.Add(2019, "http://www.mca.gov.cn/article/sj/xzqh/1980/2019/202002281436.html", 2, 3, "table", "tr", "th|td");
dt.Rows.Add(2018, "http://www.mca.gov.cn/article/sj/xzqh/1980/201903/201903011447.html", 2, 3, "table", "tr", "th|td");
dt.Rows.Add(2017, "http://www.mca.gov.cn/article/sj/xzqh/1980/201803/201803131454.html", 2, 3, "table", "tr", "th|td");
dt.Rows.Add(2016, "http://www.mca.gov.cn/article/sj/xzqh/1980/201705/201705311652.html", 2, 3, "table//tbody", "tr", "th|td");//TBODY 都是大写
dt.Rows.Add(2015, "http://www.mca.gov.cn/article/sj/tjbz/a/2015/201706011127.html", 2, 3, "table", "tr", "th|td");
dt.Rows.Add(2014, "http://files2.mca.gov.cn/cws/201502/20150225163817214.html", 2, 3, "table", "tr", "th|td");
dt.Rows.Add(2013, "http://files2.mca.gov.cn/cws/201404/20140404125552372.htm", 2, 3, "table", "tr", "th|td");
dt.Rows.Add(2012, "http://www.mca.gov.cn/article/sj/tjbz/a/201713/201707271556.html", 2, 3, "table", "tr", "th|td");
dt.Rows.Add(2011, "http://www.mca.gov.cn/article/sj/tjbz/a/201713/201707271552.html", 2, 3, "table", "tr", "th|td");
dt.Rows.Add(2010, "http://www.mca.gov.cn/article/sj/tjbz/a/201713/201708220946.html", 2, 3, "table", "tr", "th|td");
dt.Rows.Add(2009, "http://www.mca.gov.cn/article/sj/tjbz/a/201713/201708220943.html", 2, 3, "table", "tr", "th|td");
dt.Rows.Add(2008, "http://www.mca.gov.cn/article/sj/tjbz/a/201713/201708220941.html", 2, 3, "table", "tr", "th|td");
dt.Rows.Add(2007, "http://www.mca.gov.cn/article/sj/tjbz/a/201713/201708220939.html", 2, 3, "table", "tr", "th|td");
dt.Rows.Add(2006, "http://www.mca.gov.cn/article/sj/tjbz/a/201713/201708220936.html", 2, 3, "table", "tr", "th|td");
dt.Rows.Add(2005, "http://www.mca.gov.cn/article/sj/tjbz/a/201713/201708220935.html", 2, 3, "table", "tr", "th|td");
dt.Rows.Add(2004, "http://www.mca.gov.cn/article/sj/tjbz/a/201713/201708220930.html", 2, 3, "table", "tr", "th|td");
dt.Rows.Add(2003, "http://www.mca.gov.cn/article/sj/tjbz/a/201713/201708220928.html", 2, 3, "table", "tr", "th|td");
dt.Rows.Add(2002, "http://www.mca.gov.cn/article/sj/tjbz/a/201713/201708220927.html", 2, 3, "table", "tr", "th|td");
dt.Rows.Add(2001, "http://www.mca.gov.cn/article/sj/tjbz/a/201713/201708220925.html", 2, 3, "table", "tr", "th|td");
dt.Rows.Add(2000, "http://www.mca.gov.cn/article/sj/tjbz/a/201713/201708220923.html", 2, 3, "table", "tr", "th|td");
dt.Rows.Add(1999, "http://www.mca.gov.cn/article/sj/tjbz/a/201713/201708220921.html", 2, 3, "table", "tr", "th|td");
dt.Rows.Add(1998, "http://www.mca.gov.cn/article/sj/tjbz/a/201713/201708220918.html", 2, 3, "table", "tr", "th|td");
dt.Rows.Add(1997, "http://www.mca.gov.cn/article/sj/tjbz/a/201713/201708220916.html", 2, 3, "table", "tr", "th|td");
dt.Rows.Add(1996, "http://www.mca.gov.cn/article/sj/tjbz/a/201713/201708220914.html", 2, 3, "table", "tr", "th|td");
dt.Rows.Add(1995, "http://www.mca.gov.cn/article/sj/tjbz/a/201713/201708220913.html", 2, 3, "table", "tr", "th|td");
dt.Rows.Add(1994, "http://www.mca.gov.cn/article/sj/tjbz/a/201713/201708220911.html", 2, 3, "table", "tr", "th|td");
dt.Rows.Add(1993, "http://www.mca.gov.cn/article/sj/tjbz/a/201713/201708041023.html", 2, 3, "table", "tr", "th|td");
dt.Rows.Add(1992, "http://www.mca.gov.cn/article/sj/tjbz/a/201713/201708220910.html", 2, 3, "table", "tr", "th|td");
dt.Rows.Add(1991, "http://www.mca.gov.cn/article/sj/tjbz/a/201713/201708041020.html", 2, 3, "table", "tr", "th|td");
dt.Rows.Add(1990, "http://www.mca.gov.cn/article/sj/tjbz/a/201713/201708041018.html", 2, 3, "table", "tr", "th|td");
dt.Rows.Add(1989, "http://www.mca.gov.cn/article/sj/tjbz/a/201713/201708041017.html", 2, 3, "table", "tr", "th|td");
dt.Rows.Add(1988, "http://www.mca.gov.cn/article/sj/tjbz/a/201713/201708220903.html", 2, 3, "table", "tr", "th|td");
dt.Rows.Add(1987, "http://www.mca.gov.cn/article/sj/xzqh/1980/1980/201911180950.html", 2, 3, "table", "tr", "th|td");
dt.Rows.Add(1986, "http://www.mca.gov.cn/article/sj/tjbz/a/201713/201708220859.html", 2, 3, "table", "tr", "th|td");
dt.Rows.Add(1985, "http://www.mca.gov.cn/article/sj/tjbz/a/201713/201708220858.html", 2, 3, "table", "tr", "th|td");
dt.Rows.Add(1984, "http://www.mca.gov.cn/article/sj/tjbz/a/201713/201708220856.html", 1, 2, "table", "tr", "th|td");
dt.Rows.Add(1983, "http://www.mca.gov.cn/article/sj/tjbz/a/201713/201708160821.html", 1, 2, "table", "tr", "th|td");
dt.Rows.Add(1982, "http://www.mca.gov.cn/article/sj/xzqh/1980/1980/201911180942.html", 1, 2, "table", "tr", "th|td");
dt.Rows.Add(1981, "http://www.mca.gov.cn/article/sj/tjbz/a/201713/201708041004.html", 1, 2, "table", "tr", "th|td");
dt.Rows.Add(1980, "http://www.mca.gov.cn/article/sj/tjbz/a/201713/201708040959.html", 1, 2, "table", "tr", "th|td");
return dt;
}
///
///
///
public Form1()
{
InitializeComponent();
}
///
///
///
///
///
private void Form1_Load(object sender, EventArgs e)
{
this.comboBox1.DataSource = McaData();
this.comboBox1.DisplayMember = "year";
this.comboBox1.ValueMember = "website";
}
///
/// 抓取数据
/// Geovin Du 涂聚文
///
///
///
private void button1_Click(object sender, EventArgs e)
{
try
{
string website = this.comboBox1.SelectedValue.ToString();
//codecell =(int)this.numericUpDown1.Value;
//namecell = (int)this.numericUpDown2.Value;
HtmlAgilityPack.HtmlWeb webClient = new HtmlAgilityPack.HtmlWeb();
HtmlAgilityPack.HtmlDocument doc = webClient.Load(website);
this.richTextBox1.Text = doc.Text.ToLower();
//HtmlNodeCollection nodes = doc.DocumentNode.SelectNodes("/html[1]/body[1]/div[1]/div[2]/div[3]/div[2]/div[1]/div[1]/div[1]/div");
//foreach (HtmlNode node in nodes)
//{
// Console.WriteLine(node.InnerText.Trim());
//}
//nodes = null;
yearnmae = int.Parse(this.comboBox1.Text);
DataRow[] drsselect = McaData().Select("year="+yearnmae+"");
for (int i = 0; i
HtmlAgility 抓取网页上的数据
标签:ESS 数据 oid private file pdo lag ace eai
原文地址:https://www.cnblogs.com/geovindu/p/12427358.html