Asp.net 使用正则和网络编程抓取网页数据(实用)
标签:asp.net httpwebrequest 源码 采集
Asp.net 使用正则和网络编程抓取网页数据(实用)
Asp.net 使用正则和网络编程抓取网页数据(实用)
///
/// 抓取网页相应内容
///
/// 采集地址
/// 开始字符
/// 结束字符
///
private static String GetContent(String strUrl, String Begin, String End)
{
String result = String.Empty;
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(strUrl);
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
using (StreamReader reader = new StreamReader(response.GetResponseStream(), System.Text.Encoding.Default))
{
result = reader.ReadToEnd();
}
//抓取内容
Match table = Regex.Match(result, "(?
///去除HTML标记
///
///包括HTML的源码
///已经去除后的文字
private static string NoHTML(string Htmlstring)
{
//删除脚本
Htmlstring = Regex.Replace(Htmlstring, @"", "",
RegexOptions.IgnoreCase);
//删除HTML
Htmlstring = Regex.Replace(Htmlstring, @"]*)>", "",
RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"([\r\n])[\s]+", "",
RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"-->", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"
评论