HttpClient简介
HttpClient是Apache Jakarta Common下的子项目,可以用来提供高效的、最新的、功能丰富的支持HTTP协议的客户端编程工具包,并且它支持 HTTP 协议最新的版本。它的主要功能有:
(1) 实现了所有 HTTP 的方法(GET,POST,PUT,HEAD 等)
(2) 支持自动转向
(3) 支持 HTTPS 协议
(4) 支持代理服务器等
Jsoup简介
jsoup是一款Java的HTML解析器,可直接解析某个URL地址、HTML文本内容。它提供了一套非常省力的API,可通过DOM,CSS以及类似于jQuery的操作方法来取出和操作数据。它的主要功能有:
(1) 从一个URL,文件或字符串中解析HTML;
(2) 使用DOM或CSS选择器来查找、取出数据;
(3) 可操作HTML元素、属性、文本;
使用步骤
代码
import org.apache.http.HttpEntity; import org.apache.http.client.config.RequestConfig; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.client.protocol.HttpClientContext; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClientBuilder; import org.apache.http.util.EntityUtils; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.junit.Test; import java.util.List; /** * HttpClient & Jsoup libruary test class * * Created by xuyh at 2017/11/6 15:28. */ public classHttpClientJsoupTest{ @Test public void test() { //通过httpClient获取网页响应,将返回的响应解析为纯文本 HttpGet httpGet = new HttpGet("http://sports.sina.com.cn/"); httpGet.setConfig(RequestConfig.custom().setSocketTimeout(30000).setConnectTimeout(30000).build()); CloseableHttpClient httpClient = null; CloseableHttpResponse response = null; String responseStr = ""; try { httpClient = HttpClientBuilder.create().build(); HttpClientContext context = HttpClientContext.create(); response = httpClient.execute(httpGet, context); int state = response.getStatusLine().getStatusCode(); if (state != 200) responseStr = ""; HttpEntity entity = response.getEntity(); if (entity != null) responseStr = EntityUtils.toString(entity, "utf-8"); } catch (Exception e) { e.printStackTrace(); } finally { try { if (response != null) response.close(); if (httpClient != null) httpClient.close(); } catch (Exception ex) { ex.printStackTrace(); } } if (responseStr == null) return; //将解析到的纯文本用Jsoup工具转换成Document文档并进行操作 Document document = Jsoup.parse(responseStr); List<Element> elements = document.getElementsByAttributeValue("class", "phdnews_txt fr").first() .getElementsByAttributeValue("class", "phdnews_hdline"); elements.forEach(element -> { for (Element e : element.getElementsByTag("a")) { System.out.println(e.attr("href")); System.out.println(e.text()); } }); }
详解
新建HttpGet对象,对象将从 http://sports.sina.com.cn/ 这个URL地址获取GET响应。并设置socket超时时间和连接超时时间分别为30000ms。
将HttpClient和Jsoup进行封装,形成一个工具类,内容如下:
import org.apache.http.HttpEntity; import org.apache.http.NameValuePair; import org.apache.http.client.CookieStore; import org.apache.http.client.config.RequestConfig; import org.apache.http.client.entity.UrlEncodedFormEntity; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.client.methods.HttpPost; import org.apache.http.client.protocol.HttpClientContext; import org.apache.http.conn.ssl.SSLConnectionSocketFactory; import org.apache.http.cookie.Cookie; import org.apache.http.entity.ContentType; import org.apache.http.entity.StringEntity; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClientBuilder; import org.apache.http.impl.client.HttpClients; import org.apache.http.message.BasicNameValuePair; import org.apache.http.ssl.SSLContextBuilder; import org.apache.http.util.EntityUtils; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import javax.net.ssl.*; import java.io.IOException; import java.security.GeneralSecurityException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; /** * * Http工具,包含: * 普通http请求工具(使用httpClient进行http,https请求的发送) * * Created by xuyh at 2017/7/17 19:08. */ public classHttpUtils{ /** * 请求超时时间,默认20000ms */ private int timeout = 20000; /** * cookie表 */ private Map<String, String> cookieMap = new HashMap<>(); /** * 请求编码(处理返回结果),默认UTF-8 */ private String charset = "UTF-8"; private static HttpUtils httpUtils; privateHttpUtils(){ } /** * 获取实例 * *@return */ publicstaticHttpUtilsgetInstance(){ if (httpUtils == null) httpUtils = new HttpUtils(); return httpUtils; } /** * 清空cookieMap */ publicvoidinvalidCookieMap(){ cookieMap.clear(); } publicintgetTimeout(){ return timeout; } /** * 设置请求超时时间 * *@paramtimeout */ publicvoidsetTimeout(inttimeout){ this.timeout = timeout; } publicStringgetCharset(){ return charset; } /** * 设置请求字符编码集 * *@paramcharset */ publicvoidsetCharset(String charset){ this.charset = charset; } /** * 将网页返回为解析后的文档格式 * *@paramhtml *@return *@throwsException */ publicstaticDocumentparseHtmlToDoc(String html)throwsException{ return removeHtmlSpace(html); } privatestaticDocumentremoveHtmlSpace(String str){ Document doc = Jsoup.parse(str); String result = doc.html().replace(" ", ""); return Jsoup.parse(result); } /** * 执行get请求,返回doc * *@paramurl *@return *@throwsException */ publicDocumentexecuteGetAsDocument(String url)throwsException{ return parseHtmlToDoc(executeGet(url)); } /** * 执行get请求 * *@paramurl *@return *@throwsException */ publicStringexecuteGet(String url)throwsException{ HttpGet httpGet = new HttpGet(url); httpGet.setHeader("Cookie", convertCookieMapToString(cookieMap)); httpGet.setConfig(RequestConfig.custom().setSocketTimeout(timeout).setConnectTimeout(timeout).build()); CloseableHttpClient httpClient = null; String str = ""; try { httpClient = HttpClientBuilder.create().build(); HttpClientContext context = HttpClientContext.create(); CloseableHttpResponse response = httpClient.execute(httpGet, context); getCookiesFromCookieStore(context.getCookieStore(), cookieMap); int state = response.getStatusLine().getStatusCode(); if (state == 404) { str = ""; } try { HttpEntity entity = response.getEntity(); if (entity != null) { str = EntityUtils.toString(entity, charset); } } finally { response.close(); } } catch (IOException e) { throw e; } finally { try { if (httpClient != null) httpClient.close(); } catch (IOException e) { throw e; } } return str; } /** * 用https执行get请求,返回doc * *@paramurl *@return *@throwsException */ publicDocumentexecuteGetWithSSLAsDocument(String url)throwsException{ return parseHtmlToDoc(executeGetWithSSL(url)); } /** * 用https执行get请求 * *@paramurl *@return *@throwsException */ publicStringexecuteGetWithSSL(String url)throwsException{ HttpGet httpGet = new HttpGet(url); httpGet.setHeader("Cookie", convertCookieMapToString(cookieMap)); httpGet.setConfig(RequestConfig.custom().setSocketTimeout(timeout).setConnectTimeout(timeout).build()); CloseableHttpClient httpClient = null; String str = ""; try { httpClient = createSSLInsecureClient(); HttpClientContext context = HttpClientContext.create(); CloseableHttpResponse response = httpClient.execute(httpGet, context); getCookiesFromCookieStore(context.getCookieStore(), cookieMap); int state = response.getStatusLine().getStatusCode(); if (state == 404) { str = ""; } try { HttpEntity entity = response.getEntity(); if (entity != null) { str = EntityUtils.toString(entity, charset); } } finally { response.close(); } } catch (IOException e) { throw e; } catch (GeneralSecurityException ex) { throw ex; } finally { try { if (httpClient != null) httpClient.close(); } catch (IOException e) { throw e; } } return str; } /** * 执行post请求,返回doc * *@paramurl *@paramparams *@return *@throwsException */ publicDocumentexecutePostAsDocument(String url, Map<String, String> params)throwsException{ return parseHtmlToDoc(executePost(url, params)); } /** * 执行post请求 * *@paramurl *@paramparams *@return *@throwsException */ publicStringexecutePost(String url, Map<String, String> params)throwsException{ String reStr = ""; HttpPost httpPost = new HttpPost(url); httpPost.setConfig(RequestConfig.custom().setSocketTimeout(timeout).setConnectTimeout(timeout).build()); httpPost.setHeader("Cookie", convertCookieMapToString(cookieMap)); List<NameValuePair> paramsRe = new ArrayList<>(); for (String key : params.keySet()) { paramsRe.add(new BasicNameValuePair(key, params.get(key))); } CloseableHttpClient httpclient = HttpClientBuilder.create().build(); CloseableHttpResponse response; try { httpPost.setEntity(new UrlEncodedFormEntity(paramsRe)); HttpClientContext context = HttpClientContext.create(); response = httpclient.execute(httpPost, context); getCookiesFromCookieStore(context.getCookieStore(), cookieMap); HttpEntity entity = response.getEntity(); reStr = EntityUtils.toString(entity, charset); } catch (IOException e) { throw e; } finally { httpPost.releaseConnection(); } return reStr; } /** * 用https执行post请求,返回doc * *@paramurl *@paramparams *@return *@throwsException */ publicDocumentexecutePostWithSSLAsDocument(String url, Map<String, String> params)throwsException{ return parseHtmlToDoc(executePostWithSSL(url, params)); } /** * 用https执行post请求 * *@paramurl *@paramparams *@return *@throwsException */ publicStringexecutePostWithSSL(String url, Map<String, String> params)throwsException{ String re = ""; HttpPost post = new HttpPost(url); List<NameValuePair> paramsRe = new ArrayList<>(); for (String key : params.keySet()) { paramsRe.add(new BasicNameValuePair(key, params.get(key))); } post.setHeader("Cookie", convertCookieMapToString(cookieMap)); post.setConfig(RequestConfig.custom().setSocketTimeout(timeout).setConnectTimeout(timeout).build()); CloseableHttpResponse response; try { CloseableHttpClient httpClientRe = createSSLInsecureClient(); HttpClientContext contextRe = HttpClientContext.create(); post.setEntity(new UrlEncodedFormEntity(paramsRe)); response = httpClientRe.execute(post, contextRe); HttpEntity entity = response.getEntity(); if (entity != null) { re = EntityUtils.toString(entity, charset); } getCookiesFromCookieStore(contextRe.getCookieStore(), cookieMap); } catch (Exception e) { throw e; } return re; } /** * 发送JSON格式body的POST请求 * *@paramurl 地址 *@paramjsonBody json body *@return *@throwsException */ publicStringexecutePostWithJson(String url, String jsonBody)throwsException{ String reStr = ""; HttpPost httpPost = new HttpPost(url); httpPost.setConfig(RequestConfig.custom().setSocketTimeout(timeout).setConnectTimeout(timeout).build()); httpPost.setHeader("Cookie", convertCookieMapToString(cookieMap)); CloseableHttpClient httpclient = HttpClientBuilder.create().build(); CloseableHttpResponse response; try { httpPost.setEntity(new StringEntity(jsonBody, ContentType.APPLICATION_JSON)); HttpClientContext context = HttpClientContext.create(); response = httpclient.execute(httpPost, context); getCookiesFromCookieStore(context.getCookieStore(), cookieMap); HttpEntity entity = response.getEntity(); reStr = EntityUtils.toString(entity, charset); } catch (IOException e) { throw e; } finally { httpPost.releaseConnection(); } return reStr; } /** * 发送JSON格式body的SSL POST请求 * *@paramurl 地址 *@paramjsonBody json body *@return *@throwsException */ publicStringexecutePostWithJsonAndSSL(String url, String jsonBody)throwsException{ String re = ""; HttpPost post = new HttpPost(url); post.setHeader("Cookie", convertCookieMapToString(cookieMap)); post.setConfig(RequestConfig.custom().setSocketTimeout(timeout).setConnectTimeout(timeout).build()); CloseableHttpResponse response; try { CloseableHttpClient httpClientRe = createSSLInsecureClient(); HttpClientContext contextRe = HttpClientContext.create(); post.setEntity(new StringEntity(jsonBody, ContentType.APPLICATION_JSON)); response = httpClientRe.execute(post, contextRe); HttpEntity entity = response.getEntity(); if (entity != null) { re = EntityUtils.toString(entity, charset); } getCookiesFromCookieStore(contextRe.getCookieStore(), cookieMap); } catch (Exception e) { throw e; } return re; } privatevoidgetCookiesFromCookieStore(CookieStore cookieStore, Map<String, String> cookieMap){ List<Cookie> cookies = cookieStore.getCookies(); for (Cookie cookie : cookies) { cookieMap.put(cookie.getName(), cookie.getValue()); } } privateStringconvertCookieMapToString(Map<String, String> map){ String cookie = ""; for (String key : map.keySet()) { cookie += (key + "=" + map.get(key) + "; "); } if (map.size() > 0) { cookie = cookie.substring(0, cookie.length() - 2); } return cookie; } /** * 创建 SSL连接 * *@return *@throwsGeneralSecurityException */ privatestaticCloseableHttpClientcreateSSLInsecureClient()throwsGeneralSecurityException{ try { SSLContext sslContext = new SSLContextBuilder().loadTrustMaterial(null, (chain, authType) -> true).build(); SSLConnectionSocketFactory sslConnectionSocketFactory = new SSLConnectionSocketFactory(sslContext, (s, sslContextL) -> true); return HttpClients.custom().setSSLSocketFactory(sslConnectionSocketFactory).build(); } catch (GeneralSecurityException e) { throw e; } } }
给大家推荐一个程序员学习交流群:863621962。群里有分享的视频,还有思维导图
群公告有视频,都是干货的,你可以下载来看。主要分享分布式架构、高可扩展、高性能、高并发、性能优化、Spring boot、Redis、ActiveMQ、Nginx、Mycat、Netty、Jvm大型分布式项目实战学习架构师视频。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。