httpclient4.3.1 The server failed to respond with a valid HTTP response
2014-02-22 16:16:08 ERROR GetHtmlUtil:217 - 循环批次:1, http请求错误:https://www.google.com.hk/search?lr=lang_zh-CN&newwindow=1&safe=strict&tbas=0&tbm=nws&source=lnt&tbs=sbd:1&sa=X&ei=Dj3eUqP9GuTliAf7l4C4Cw&ved=0CCUQpwUoAQ&biw=640&bih=521&dpr=1&q=site:cn.wsj.com%20%22%E4%B8%9C%E5%8F%B0%E5%B8%82%22 com.weidou.mota.exception.FetcherException: com.weidou.mota.exception.FetcherException: org.apache.http.client.ClientProtocolException at com.weidou.mota.fetcher.DefaultHtmlFetcher.getHtml(DefaultHtmlFetcher.java:209) at com.weidou.mota.fetcher.DefaultHtmlFetcher.getHtml(DefaultHtmlFetcher.java:171) at com.weidou.mota.fetcher.DefaultHtmlFetcher.getHtml(DefaultHtmlFetcher.java:225) at com.weidou.prism.spider.Utils.GetHtmlUtil.getHtml(GetHtmlUtil.java:215) at com.weidou.prism.spider.Utils.GetHtmlUtil.getHtmlByGet(GetHtmlUtil.java:85) at com.weidou.prism.spider.newsList.NewsListCrawler.searchNewss(NewsListCrawler.java:80) at com.weidou.prism.spider.newsList.NewsListCrawler.crawlerNews(NewsListCrawler.java:262) at com.weidou.prism.spider.ClientTask$1.run(ClientTask.java:59) at java.lang.Thread.run(Thread.java:701) Caused by: com.weidou.mota.exception.FetcherException: org.apache.http.client.ClientProtocolException at com.weidou.mota.fetcher.DefaultHtmlFetcher.getHtml(DefaultHtmlFetcher.java:197) ... 8 more Caused by: org.apache.http.client.ClientProtocolException at org.apache.http.impl.client.InternalHttpClient.doExecute(InternalHttpClient.java:188) at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:82) at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:106) at com.weidou.mota.fetcher.DefaultHtmlFetcher.getHtml(DefaultHtmlFetcher.java:194) ... 8 more Caused by: org.apache.http.ProtocolException: The server failed to respond with a valid HTTP response at org.apache.http.impl.conn.DefaultHttpResponseParser.parseHead(DefaultHttpResponseParser.java:151) at org.apache.http.impl.conn.DefaultHttpResponseParser.parseHead(DefaultHttpResponseParser.java:57) at org.apache.http.impl.io.AbstractMessageParser.parse(AbstractMessageParser.java:260) at org.apache.http.impl.DefaultBHttpClientConnection.receiveResponseHeader(DefaultBHttpClientConnection.java:161) at sun.reflect.GeneratedMethodAccessor15.invoke(Unknown Source) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:622) at org.apache.http.impl.conn.CPoolProxy.invoke(CPoolProxy.java:138) at com.sun.proxy.$Proxy1.receiveResponseHeader(Unknown Source) at org.apache.http.protocol.HttpRequestExecutor.doReceiveResponse(HttpRequestExecutor.java:271) at org.apache.http.protocol.HttpRequestExecutor.execute(HttpRequestExecutor.java:123) at org.apache.http.impl.execchain.MainClientExec.createTunnelToTarget(MainClientExec.java:449) at org.apache.http.impl.execchain.MainClientExec.establishRoute(MainClientExec.java:374) at org.apache.http.impl.execchain.MainClientExec.execute(MainClientExec.java:218) at org.apache.http.impl.execchain.ProtocolExec.execute(ProtocolExec.java:194) at org.apache.http.impl.execchain.RetryExec.execute(RetryExec.java:85) at org.apache.http.impl.execchain.RedirectExec.execute(RedirectExec.java:108) at org.apache.http.impl.client.InternalHttpClient.doExecute(InternalHttpClient.java:186) ... 11 more
2014-02-22 16:21:18 ERROR GetHtmlUtil:217 - 循环批次:1, http请求错误:http://www.baidu.com/link?url=CmtBvUMsEt2Z9uhOLyq4mrw3uu5fiJN4IQ6KLw5oJZJ9XBuZRNPtWMZhM4AB_sIcVAzgZRO2ejM3zL90L4p5CQVIKMtfuUKoWXni061oBZu com.weidou.mota.exception.FetcherException: com.weidou.mota.exception.FetcherException: org.apache.http.client.ClientProtocolException at com.weidou.mota.fetcher.DefaultHtmlFetcher.getHtml(DefaultHtmlFetcher.java:155) at com.weidou.mota.fetcher.DefaultHtmlFetcher.getHtml(DefaultHtmlFetcher.java:173) at com.weidou.mota.fetcher.DefaultHtmlFetcher.getHtml(DefaultHtmlFetcher.java:230) at com.weidou.prism.spider.Utils.GetHtmlUtil.getHtml(GetHtmlUtil.java:215) at com.weidou.prism.spider.Utils.GetHtmlUtil.getHtmlByGet(GetHtmlUtil.java:101) at com.weidou.prism.spider.newsDetail.NewsDetailCrawler.crawlingDetails(NewsDetailCrawler.java:75) at com.weidou.prism.spider.ClientTask$2.run(ClientTask.java:80) at java.lang.Thread.run(Thread.java:662) Caused by: com.weidou.mota.exception.FetcherException: org.apache.http.client.ClientProtocolException at com.weidou.mota.fetcher.DefaultHtmlFetcher.getHtml(DefaultHtmlFetcher.java:143) ... 7 more Caused by: org.apache.http.client.ClientProtocolException at org.apache.http.impl.client.InternalHttpClient.doExecute(InternalHttpClient.java:188) at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:82) at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:106) at com.weidou.mota.fetcher.DefaultHtmlFetcher.getHtml(DefaultHtmlFetcher.java:139) ... 7 more Caused by: org.apache.http.ProtocolException: The server failed to respond with a valid HTTP response at org.apache.http.impl.conn.DefaultHttpResponseParser.parseHead(DefaultHttpResponseParser.java:151) at org.apache.http.impl.conn.DefaultHttpResponseParser.parseHead(DefaultHttpResponseParser.java:57) at org.apache.http.impl.io.AbstractMessageParser.parse(AbstractMessageParser.java:260) at org.apache.http.impl.DefaultBHttpClientConnection.receiveResponseHeader(DefaultBHttpClientConnection.java:161) at sun.reflect.GeneratedMethodAccessor12.invoke(Unknown Source) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25) at java.lang.reflect.Method.invoke(Method.java:597) at org.apache.http.impl.conn.CPoolProxy.invoke(CPoolProxy.java:138) at $Proxy1.receiveResponseHeader(Unknown Source) at org.apache.http.protocol.HttpRequestExecutor.doReceiveResponse(HttpRequestExecutor.java:271) at org.apache.http.protocol.HttpRequestExecutor.execute(HttpRequestExecutor.java:123) at org.apache.http.impl.execchain.MainClientExec.execute(MainClientExec.java:253) at org.apache.http.impl.execchain.ProtocolExec.execute(ProtocolExec.java:194) at org.apache.http.impl.execchain.RetryExec.execute(RetryExec.java:85) at org.apache.http.impl.execchain.RedirectExec.execute(RedirectExec.java:108) at org.apache.http.impl.client.InternalHttpClient.doExecute(InternalHttpClient.java:186) ... 10 more
最近在使用httpclient4.3.1进行数据请求过程中发现大量的这个错误,不知道大家遇到类似情况没?
@黄亿华,不知道你遇到类似情况没?
补充一下代码:
@Override public String getHtml(String url, String charset, Map<String, String> headers) throws FetcherException { CloseableHttpResponse response = null; String html = null; url = CharsetUtils.encodeChars(url); HttpGet get = new HttpGet(url); get.setConfig(requestConfigBuilder.build()); try { try { get.addHeader("User-Agent", UserAgent.DESKTOP_WINDOWS_); if (headers != null && headers.size() > 0) { for (Entry<String, String> entry : headers.entrySet()) { get.addHeader(entry.getKey(), entry.getValue()); } } response = client.execute(get); html = getHtml(response.getEntity()); } catch (Exception e) { throw new FetcherException(e); } finally { if (null != response) { try { response.close(); } catch (IOException e) { throw new FetcherException(e); } } } } catch (Exception e) { get.abort(); throw new FetcherException(e); } finally { get.releaseConnection(); } return html; }
public DefaultHtmlFetcher() { client = HttpClientUtils.getDefaultHttpClient(); requestConfigBuilder.setSocketTimeout(HttpClientUtils.socketTimeout) .setConnectTimeout(HttpClientUtils.connectTimeout) .setConnectionRequestTimeout(HttpClientUtils.connectTimeout) .setRelativeRedirectsAllowed(true).setRedirectsEnabled(true) .setMaxRedirects(10).setCircularRedirectsAllowed(false); }
public class HttpClientUtils { private final static Logger logger = LoggerFactory.getLogger(HttpClientUtils.class); private static PoolingHttpClientConnectionManager connManager = null; private static CloseableHttpClient httpclient = null; public final static int connectTimeout = 10000; public final static int socketTimeout = 10000; static { try { SSLContext sslContext = SSLContexts.custom().useTLS().build(); sslContext.init(null, new TrustManager[] { new X509TrustManager() { public X509Certificate[] getAcceptedIssuers() { return null; } public void checkClientTrusted(X509Certificate[] certs, String authType) { } public void checkServerTrusted(X509Certificate[] certs, String authType) { } } }, null); Registry<ConnectionSocketFactory> socketFactoryRegistry = RegistryBuilder.<ConnectionSocketFactory> create() .register("http", PlainConnectionSocketFactory.INSTANCE) .register("https", new SSLConnectionSocketFactory(sslContext)).build(); connManager = new PoolingHttpClientConnectionManager(socketFactoryRegistry); // Create socket configuration SocketConfig socketConfig = SocketConfig.custom().setSoKeepAlive(true).setTcpNoDelay(true).build(); connManager.setDefaultSocketConfig(socketConfig); // Create message constraints MessageConstraints messageConstraints = MessageConstraints.custom() .setMaxHeaderCount(200) .setMaxLineLength(2000) .build(); // Create connection configuration ConnectionConfig connectionConfig = ConnectionConfig.custom() .setMalformedInputAction(CodingErrorAction.IGNORE) .setUnmappableInputAction(CodingErrorAction.IGNORE) .setCharset(Consts.UTF_8) .setMessageConstraints(messageConstraints) .build(); connManager.setDefaultConnectionConfig(connectionConfig); connManager.setMaxTotal(200); connManager.setDefaultMaxPerRoute(20); connManager.closeExpiredConnections(); connManager.closeIdleConnections(30, TimeUnit.SECONDS); httpclient = HttpClients.custom() .setConnectionManager(connManager) .setRedirectStrategy(new LaxRedirectStrategy()) .setKeepAliveStrategy(HttpClientUtils.keepAliveStrategy) .setConnectionReuseStrategy( new DefaultConnectionReuseStrategy()) .addInterceptorFirst(HttpClientUtils.gzipInterceptor) .build(); } catch (KeyManagementException e) { logger.error("KeyManagementException", e); } catch (NoSuchAlgorithmException e) { logger.error("NoSuchAlgorithmException", e); } } /** * gzip 压缩 */ private static HttpRequestInterceptor gzipInterceptor = new HttpRequestInterceptor() { public void process(final HttpRequest request, final HttpContext context) throws HttpException, IOException { if (!request.containsHeader("Accept-Encoding")) { request.addHeader("Accept-Encoding", "gzip"); } } }; /** * KeepAlive */ private static ConnectionKeepAliveStrategy keepAliveStrategy = new ConnectionKeepAliveStrategy() { public long getKeepAliveDuration(HttpResponse response, HttpContext context) { HeaderElementIterator it = new BasicHeaderElementIterator( response.headerIterator(HTTP.CONN_KEEP_ALIVE)); while (it.hasNext()) { HeaderElement he = it.nextElement(); String param = he.getName(); String value = he.getValue(); if (value != null && param.equalsIgnoreCase("timeout")) { try { return Long.parseLong(value) * 1000; } catch(NumberFormatException ignore) { } } } // HttpHost target = (HttpHost) context.getAttribute( HttpClientContext.HTTP_TARGET_HOST); return 30 * 1000; } }; /** * 获取客户端 * @return */ public static CloseableHttpClient getDefaultHttpClient() { return httpclient; } }
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。
绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论