c# 爬虫(三) 文件上传
2021-05-27 22:03
阅读:545
在上一篇中,我们说了模拟登录,
下面我们说说附件上传。
据说,最早的http协议是不支持附件上传的,后来有添加了一个RFC 2045 协议,才支持附件上传,关于附件上传,请参见
http://www.cnblogs.com/greenerycn/archive/2010/05/15/csharp_http_post.html
好了,其实用C#模拟上传附件,主要的难点就在于如何构建此协议的格式以及编码。
作为备忘,我这里只放一段成品代码了。
参数:URL,上传控件的id,附件的本地全路径,传输的Key-value数据
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
|
public string HttpWebUpload( string url, string fileControlID, string fileFullPath, Dictionary
{
string fileContentType = "application/octet-stream" ; //"application/octet-stream";//"text/plain";//image/jpeg
int index = fileFullPath.LastIndexOf( ‘\\‘ );
string filename = fileFullPath.Substring(index+1);
var boundary = DateTime.Now.Ticks.ToString( "x" );
var beginBoundary = "--" + boundary;
var reqContentType = "multipart/form-data; boundary=" + boundary;
var memStream = new MemoryStream();
var fileStream = new FileStream(fileFullPath, FileMode.Open, FileAccess.Read);
int fsLen = ( int )fileStream.Length;
byte [] btfileValue = new byte [fsLen];
int r = fileStream.Read(btfileValue, 0, btfileValue.Length);
StringBuilder fileHeader = new StringBuilder();
fileHeader.AppendLine(beginBoundary);
fileHeader.AppendLine( string .Format( "Content-Disposition: form-data; name=\"{0}\"; filename=\"{1}\"" , fileControlID, filename));
fileHeader.AppendLine( string .Format( "Content-Type: {0}" , fileContentType));
fileHeader.AppendLine();
var btfileKey = Encoding.UTF8.GetBytes(fileHeader.ToString());
StringBuilder dicData = new StringBuilder();
dicData.AppendLine();
foreach ( var item in dic)
{
dicData.AppendLine(beginBoundary);
dicData.AppendLine( string .Format( "Content-Disposition:form-data; name=\"{0}\"" , item.Key));
dicData.AppendLine();
dicData.AppendLine(item.Value);
}
dicData.AppendLine(beginBoundary + "--" );
var btDic = Encoding.UTF8.GetBytes(dicData.ToString());
HttpWebRequest req = (HttpWebRequest)WebRequest.Create(url);
req.Method = "POST" ;
req.AllowAutoRedirect = false ;
req.ContentType = reqContentType;
if (! string .IsNullOrEmpty(UserAgent))
{
req.UserAgent = this .UserAgent;
}
if (! string .IsNullOrEmpty(Cookie))
{
req.CookieContainer = new CookieContainer();
req.CookieContainer.SetCookies(req.RequestUri, this .Cookie);
}
req.ContentLength = btfileKey.Length + btfileValue.Length + btDic.Length;
Stream postDataStream = req.GetRequestStream();
postDataStream.Write(btfileKey, 0, btfileKey.Length);
postDataStream.Write(btfileValue, 0, btfileValue.Length);
postDataStream.Write(btDic, 0, btDic.Length);
postDataStream.Close();
HttpWebResponse resp = (HttpWebResponse)req.GetResponse();
string html = new StreamReader(resp.GetResponseStream()).ReadToEnd();
return html;
}
|
注意,这里如果需要cookie,则添加对应的cookie。
调用方法
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
|
string controlid = "fupCert" ;
string filefullPath = "f:\\3.pdf" ;
string url = "http://10.35.3.240/NT_HandbookExtend/FrmCertificate.aspx?type=N&zdtype=J&DELEGATENO=SBWJ16100001&ID=SB20161018000001%2c3206960757&Action=Modify&manualRecordID=SL20161018000001&tm=0.17032586549994377" ;
var hiddenVal = web.GetDoNetHiddenValuesByUrl(url); //获取__VIEWSTATE,与__EVENTVALIDATION隐藏域值
Dictionary dic.Add( "__EVENTARGUMENT" , "" );
dic.Add( "__EVENTTARGET" , "btnUpload" );
dic.Add( "__EVENTVALIDATION" , hiddenVal.Eventvalidation);
dic.Add( "__PREVIOUSPAGE" , "EncayjCF95BJXxMazWGgd9UdPYeLp64GjuJlZ-rvnQ5n34-y7KQYllE35nAdVFvk0" );
dic.Add( "__VIEWSTATE" , hiddenVal.Vievstate);
dic.Add( "__VIEWSTATEENCRYPTED" , "" );
dic.Add( "grdNavigatorPRE_EMS3_CUS_IMG$ctl06" , "" );
dic.Add( "grdNavigatorPRE_EMS3_CUS_IMG$ctl07" , "15" );
dic.Add( "grdNavigatorPRE_EMS3_CUS_IMG$labelRowCount" , "8" );
dic.Add( "hfERP" , "http://www.nteport.gov.cn/cerp/platform/erp/documentManagement/documentDocking.jsp" );
dic.Add( "hidFileType" , "333" );
dic.Add( "hidID" , "" );
//web.UserAgent = "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)"; string html = web.HttpWebUpload(url, controlid, filefullPath, dic);
|
其他注意事项:
1.如果不限制文件类型,请使用 ContentType = "application/octet-stream";这个我也没怎么懂,尴尬。。。
2. 编码很重要,由于传参时,都是以流的形式传参,所以文件编码使用默认的编码方式,和其他构建的RFC 2045 协议格式,使用UTF-8编码。
下一篇:.Net C#装箱和拆箱
评论
亲,登录后才可以留言!