C# 网页图片采集
标签:agent draw mozilla tty rectangle 要求 archive event 对象
博客原文地址:https://www.cnblogs.com/qq260250932/p/5361043.html
using System;
- using System.Collections.Generic;
- using System.Linq;
- using System.Text;
- using System.Text.RegularExpressions;
- using System.Net;
- using System.IO;
- using System.Windows.Forms;
- namespace ImageCollect
- {
- public class GatherPic
- {
- private string savePath;
- private string getUrl;
- private WebBrowser wb;
- private int iImgCount;
- //初始化参数
- public GatherPic(string sWebUrl, string sSavePath)
- {
- this.getUrl = sWebUrl;
- this.savePath = sSavePath;
- }
- //开始采集
- public bool start()
- {
- if (getUrl.Trim().Equals(""))
- {
- MessageBox.Show("哪来的虾米连网址都没输!");
- return false;
- }
- this.wb = new WebBrowser();
- this.wb.Navigate(getUrl);
- //委托事件
- this.wb.DocumentCompleted += new System.Windows.Forms.WebBrowserDocumentCompletedEventHandler(DocumentCompleted);
- return true;
- }
- //WebBrowser.DocumentCompleted委托事件
- private void DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)
- {
- //页面里框架iframe加载完成不掉用SearchImgList()
- if (e.Url != wb.Document.Url) return;
- SearchImgList();
- }
- //检查出所有图片并采集到本地
- public void SearchImgList()
- {
- string sImgUrl;
- //取得所有图片地址
- HtmlElementCollection elemColl = this.wb.Document.GetElementsByTagName("img");
- this.iImgCount = elemColl.Count;
- foreach (HtmlElement elem in elemColl)
- {
- sImgUrl = elem.GetAttribute("src");
- //调用保存远程图片函数
- SaveImageFromWeb(sImgUrl, this.savePath);
- }
- }
- //保存远程图片函数
- public int SaveImageFromWeb(string imgUrl, string path)
- {
- string imgName = imgUrl.ToString().Substring(imgUrl.ToString().LastIndexOf("/") + 1);
- path = path + "//" + imgName;
- string defaultType = ".jpg";
- string[] imgTypes = new string[] { ".jpg", ".jpeg", ".png", ".gif", ".bmp" };
- string imgType = imgUrl.ToString().Substring(imgUrl.ToString().LastIndexOf("."));
- foreach (string it in imgTypes)
- {
- if (imgType.ToLower().Equals(it))
- break;
- if (it.Equals(".bmp"))
- imgType = defaultType;
- }
- try
- {
- HttpWebRequest request = (HttpWebRequest)WebRequest.Create(imgUrl);
- request.UserAgent = "Mozilla/6.0 (MSIE 6.0; Windows NT 5.1; Natas.Robot)";
- request.Timeout = 10000;
- WebResponse response = request.GetResponse();
- Stream stream = response.GetResponseStream();
- if (response.ContentType.ToLower().StartsWith("image/"))
- {
- byte[] arrayByte = new byte[1024];
- int imgLong = (int)response.ContentLength;
- int l = 0;
- // CreateDirectory(path);
- FileStream fso = new FileStream(path, FileMode.Create);
- while (l
- {
- int i = stream.Read(arrayByte, 0, 1024);
- fso.Write(arrayByte, 0, i);
- l += i;
- }
- fso.Close();
- stream.Close();
- response.Close();
- return 1;
- }
- else
- {
- return 0;
- }
- }
- catch (WebException)
- {
- return 0;
- }
- catch (UriFormatException)
- {
- return 0;
- }
- }
- }
- }
调用方法
- GatherPic g = new GatherPic(“http://www.baidu.com”,"E:/XXX");
- g.start();
=========================================================
在web项目中使用WebBrowser类-----给网站抓图
最近做一个WEB项目,其中要求有个功能就是程序能网页抓图,举个例子: 在test.aspx页面上放一个TextBox和一个Button,TextBox用来输入要抓取的网页地址,然后按了Button之后,服务器要对前面输入的网址进行抓图,然后显示出来。我把抓图的业务逻辑做成一个类:
using System;
using System.Data;
using System.Windows.Forms;
using System.Drawing;
///
/// WebSnap :网页抓图对象
///
public class WebSnap2
{
public WebSnap2()
{
//
// TODO: 在此处添加构造函数逻辑
//
}
///
/// 开始一个抓图并返回图象
///
/// 要抓取的网页地址
///
public Bitmap StartSnap(string Url)
{
WebBrowser myWB = this.GetPage(Url);
Bitmap returnValue = this.SnapWeb(myWB);
myWB.Dispose();
return returnValue;
}
private WebBrowser GetPage(string Url)
{
WebBrowser myWB = new WebBrowser();
myWB.ScrollBarsEnabled = false;
myWB.Navigate(Url);
while (myWB.ReadyState != WebBrowserReadyState.Complete)
{
System.Windows.Forms.Application.DoEvents();
}
return myWB;
}
private Bitmap SnapWeb(WebBrowser wb)
{
HtmlDocument hd = wb.Document;
int height = Convert.ToInt32(hd.Body.GetAttribute("scrollHeight")) + 10;
int width = Convert.ToInt32(hd.Body.GetAttribute("scrollWidth")) + 10;
wb.Height = height;
wb.Width = width;
Bitmap bmp = new Bitmap(width, height);
Rectangle rec = new Rectangle();
rec.Width = width;
rec.Height = height;
wb.DrawToBitmap(bmp, rec);
return bmp;
}
}
然后在test.asp的button_click事件里面调用:
WebSnap ws = new WebSnap();
Bitmap bmp= ws.StartSnap(TextBox1.Text);
System.IO.MemoryStream ms = new System.IO.MemoryStream();
bmp.Save(ms, System.Drawing.Imaging.ImageFormat.Jpeg);
Response.BinaryWrite(ms.GetBuffer());
C# 网页图片采集
标签:agent draw mozilla tty rectangle 要求 archive event 对象
原文地址:https://www.cnblogs.com/x-zhoulin/p/11737402.html
文章来自:
搜素材网的
编程语言模块,转载请注明文章出处。
文章标题:
C# 网页图片采集
文章链接:http://soscw.com/index.php/essay/87235.html
评论