C# 爬虫总结

2021-02-17 02:16

阅读:639

static void Main(string[] args)
{
//WebRequest request = WebRequest.Create("http://www.cnblogs.com/mr-wuxiansheng/p/8353615.html");
//WebResponse response = (WebResponse)request.GetResponse();
//Stream dataStream = response.GetResponseStream();
//StreamReader reader = new StreamReader(dataStream, Encoding.Default);
//string text = reader.ReadToEnd();

 

//Regex reg = new Regex(@"http(s)?://([\w-]+\.)+[\w-]+/?"); //要获取该网页上的所有超链接

//string wangzhanyuming = reg.Match(text, 0).Value;

//string[] strarr = text.Split(‘w‘);
//Random r = new Random();
//int ss = r.Next(0, strarr.Count());
//string name = strarr[ss].ToString();
//Console.WriteLine(text);

//string text = "1A 2B 3C 4D 5E 6F 7G 8H 9I 10J 11Q 12J 13K 14L 15M 16N ffee80 #800080";
//Regex rgx = new Regex(@"((\d+)([a-z]))\s+", RegexOptions.IgnoreCase);
//MatchCollection mm = rgx.Matches(text);
//string x = mm[5].Groups[2].Captures[0].Value; //x为第六个集合 第二组 的值 6
//Console.ReadKey();


WebClient wc = new WebClient();
wc.Encoding = Encoding.UTF8;
string html = wc.DownloadString("http://www.lagou.com/");

MatchCollection matches = Regex.Matches(html, "(.*)");
foreach (Match item in matches)
{
Console.WriteLine(item.Groups[0].Value);
}
Console.WriteLine(matches.Count);
Console.ReadKey();

}


评论


亲,登录后才可以留言!