使用curl 下载HTML
标签:style blog class code java c
简单的一个curl小例子:
#include
#include string>
#include
#include
#include string.h>
#define BUF_SIZE 1024 * 100
using namespace std;
string DownloadString(char* url);
int main(int argc, const char* argv[]){
curl_global_init(CURL_GLOBAL_ALL);
cout "http://www.baidu.com/");
cin.get();
curl_global_cleanup();
return 0;
}
int WriteData(char* in, size_t size, size_t nmemb, string* out){
out->append(in);
return size*nmemb;
}
string DownloadString(char* url){
string buffer;
string headerData;
CURL* conn;
curl_slist* header = NULL;
header = curl_slist_append(header, "Accept-Encoding: gzip, deflate");
header = curl_slist_append(header, "User-Agent: Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; CIBA)");
header = curl_slist_append(header, "Connection: Keep-Alive");
conn = curl_easy_init();
curl_easy_setopt(conn, CURLoption::CURLOPT_URL, url);
curl_easy_setopt(conn, CURLoption::CURLOPT_HTTPHEADER, header);
curl_easy_setopt(conn, CURLoption::CURLOPT_ACCEPT_ENCODING, "gzip");
curl_easy_setopt(conn, CURLoption::CURLOPT_WRITEDATA, &buffer);
curl_easy_setopt(conn, CURLoption::CURLOPT_WRITEFUNCTION, WriteData);
curl_easy_setopt(conn, CURLoption::CURLOPT_WRITEHEADER, &headerData);
CURLcode code = curl_easy_perform(conn);
if (code != CURLcode::CURLE_OK)
return "";
curl_slist_free_all(header);
curl_easy_cleanup(conn);
istringstream istream(headerData.c_str());
string out;
bool isgzip = false;
while (istream.good())
{
getline(istream, out, ‘\n‘);
if (!out.empty()){
if (out.find("Content-Encoding") != out.npos && out.find("gzip") != out.npos){
isgzip = true;
}
}
}
///gzip
return buffer;
}
该例子通过curl下载百度首页html与响应头信息,并自动对gzip解码。
使用curl 下载HTML,搜素材,soscw.com
使用curl 下载HTML
标签:style blog class code java c
原文地址:http://www.cnblogs.com/Gool/p/3721312.html
评论