HttpClient+jsoup登录+解析 163邮箱

2020-12-13 03:30

阅读:275

import java.io.IOException;
import java.security.cert.CertificateException;
import java.security.cert.X509Certificate;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;

import javax.net.ssl.SSLContext;
import javax.net.ssl.TrustManager;
import javax.net.ssl.X509TrustManager;

import org.apache.commons.lang.StringUtils;
import org.apache.http.Header;
import org.apache.http.HttpResponse;
import org.apache.http.NameValuePair;
import org.apache.http.client.HttpClient;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.conn.ClientConnectionManager;
import org.apache.http.conn.scheme.Scheme;
import org.apache.http.conn.scheme.SchemeRegistry;
import org.apache.http.conn.ssl.SSLSocketFactory;
import org.apache.http.cookie.Cookie;
import org.apache.http.impl.client.BasicCookieStore;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.message.BasicHeader;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.protocol.BasicHttpContext;
import org.apache.http.protocol.HttpContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
 * 对HttpClient返回的结果进一步封装
 * 
@author bangis.wangdf
 *
 
*/
public class HttpResult {
    
    private static Logger LOG = LoggerFactory.getLogger(HttpResult.class);
    
    private static Pattern headerCharsetPattern = Pattern.compile(
            "charset=((gb2312)|(gbk)|(utf-8))", 2);
    private static Pattern pattern = Pattern
            .compile(
                    "]*content=([‘\"])?[^>]*charset=((gb2312)|(gbk)|(utf-8))\\1[^>]*>",
                    2);
    private String headerCharset;
    private String headerContentType;
    private String headerContentEncoding;
    private List

 headers;
    private String metaCharset;
    private byte[] response;
    private String responseUrl;
    private int statuCode = -1;
    private static final int BUFFER_SIZE = 4096;

    public static HttpResult empty() {
        return new HttpResult();
    }

    public String getHeaderCharset() {
        return this.headerCharset;
    }

    public String getHeaderContentType() {
        return this.headerContentType;
    }

    public final List

 getHeaders() {
        return this.headers;
    }

    public String getHtml() {
        try {
            return getText();
        } catch (UnsupportedEncodingException e) {
            LOG.error("[AGDS-SPIDER]" + e.getMessage(), e);
        }
        return "";
    }
    
    public String getHtml(String encoding) {
        try {
            return getText(encoding);
        } catch (UnsupportedEncodingException e) {
            LOG.error("[AGDS-SPIDER]" + e.getMessage(), e);
        }
        return "";
    }

    public String getMetaCharset() {
        return this.metaCharset;
    }

    public byte[] getResponse() {
        return Arrays.copyOf(this.response, this.response.length);
    }

    public String getResponseUrl() {
        return this.responseUrl;
    }

    public int getStatuCode() {
        return this.statuCode;
    }

    public String getText() throws UnsupportedEncodingException {
        return getText("");
    }

    public String getText(String encoding) throws UnsupportedEncodingException {
        if (this.response == null){
            return "";
        }
        String encodingStr = encoding;
        if (StringUtils.isBlank(encoding)){
            encodingStr = this.metaCharset;
        }

        if (StringUtils.isBlank(encoding)){
            encodingStr = this.headerCharset;
        }

        if (StringUtils.isBlank(encoding)){
            encodingStr = "UTF-8";
        }

        return new String(this.response, encodingStr);
    }

    private String getCharsetFromMeta() {
        StringBuilder builder = new StringBuilder();
        String charset = "";
        for (int i = 0; (i this.response.length) && ("".equals(charset)); ++i) {
            char c = (charthis.response[i];
            switch (c) {
            case ‘                builder.delete(0, builder.length());
                builder.append(c);
                break;
            case ‘>‘:
                if (builder.length() > 0){
                    builder.append(c);
                }
                String meta = builder.toString();

                if (meta.toLowerCase().startsWith("                    charset = getCharsetFromMeta(meta);
                }
                break;
            case ‘=‘:
            default:
                if (builder.length() > 0){
                    builder.append(c);
                }
            }

        }

        return charset;
    }

    private String getCharsetFromMeta(String meta) {
        if (StringUtils.isBlank(meta)){
            return "";
        }
        Matcher m = pattern.matcher(meta);
        if (m.find()){
            return m.group(2);
        }
        return "";
    }

    private void getHttpHeaders(HttpResponse httpResponse) {
        String headerName = "";
        String headerValue = "";
        int index = -1;

        Header[] rspHeaders = httpResponse.getAllHeaders();
        for (int i = 0; i             Header header = rspHeaders[i];
            this.headers.add(header);

            headerName = header.getName();
            if ("Content-Type".equalsIgnoreCase(headerName)) {
                headerValue = header.getValue();
                index = headerValue.indexOf(‘;‘);
                if (index > 0){
                    this.headerContentType = headerValue.substring(0, index);
                }
                Matcher m = headerCharsetPattern.matcher(headerValue);
                if (m.find()){
                    this.headerCharset = m.group(1);
                }
            }

            if ("Content-Encoding".equalsIgnoreCase(headerName)){
                this.headerContentEncoding = header.getValue();
            }
        }
    }

    private void getResponseUrl(HttpContext httpContext) {
        HttpHost target = (HttpHost) httpContext
                .getAttribute("http.target_host");

        HttpUriRequest req = (HttpUriRequest) httpContext
                .getAttribute("http.request");

        this.responseUrl = target.toString() + req.getURI().toString();
    }

    public HttpResult(HttpContext httpContext, HttpResponse httpResponse) {
        this.headers = new ArrayList

();

        this.statuCode = httpResponse.getStatusLine().getStatusCode();

        if (httpContext != null) {
            getResponseUrl(httpContext);
        }

        if (httpResponse != null) {
            getHttpHeaders(httpResponse);
            try {
                if (("gzip".equalsIgnoreCase(this.headerContentEncoding))
                        || ("deflate".equalsIgnoreCase(this.headerContentEncoding))) {
                    GZIPInputStream is = new GZIPInputStream(httpResponse.getEntity().getContent());
                    ByteArrayOutputStream os = new ByteArrayOutputStream();
                    byte[] buffer = new byte[BUFFER_SIZE];
                    int count = 0;
                    while ((count = is.read(buffer)) > 0){
                        os.write(buffer, 0, count);
                    }
                    this.response = os.toByteArray();
                    os.close();
                    is.close();
                }else{
                    this.response = EntityUtils.toByteArray(httpResponse.getEntity());
                }
            } catch (Exception e) {
                LOG.error("[AGDS-SPIDER]" + e.getMessage(), e);
            }
            if (this.response != null){
                this.metaCharset = getCharsetFromMeta();
            }
        }
    }

    private HttpResult() {
    }
}


评论


亲,登录后才可以留言!