httpclient4.3.1 The server failed to respond with a valid HTTP response

发布于 2021-11-25 08:15:32 字数 12530 浏览 895 评论 0

2014-02-22 16:16:08 ERROR GetHtmlUtil:217 - 循环批次:1,  http请求错误:https://www.google.com.hk/search?lr=lang_zh-CN&newwindow=1&safe=strict&tbas=0&tbm=nws&source=lnt&tbs=sbd:1&sa=X&ei=Dj3eUqP9GuTliAf7l4C4Cw&ved=0CCUQpwUoAQ&biw=640&bih=521&dpr=1&q=site:cn.wsj.com%20%22%E4%B8%9C%E5%8F%B0%E5%B8%82%22
com.weidou.mota.exception.FetcherException: com.weidou.mota.exception.FetcherException: org.apache.http.client.ClientProtocolException
    at com.weidou.mota.fetcher.DefaultHtmlFetcher.getHtml(DefaultHtmlFetcher.java:209)
    at com.weidou.mota.fetcher.DefaultHtmlFetcher.getHtml(DefaultHtmlFetcher.java:171)
    at com.weidou.mota.fetcher.DefaultHtmlFetcher.getHtml(DefaultHtmlFetcher.java:225)
    at com.weidou.prism.spider.Utils.GetHtmlUtil.getHtml(GetHtmlUtil.java:215)
    at com.weidou.prism.spider.Utils.GetHtmlUtil.getHtmlByGet(GetHtmlUtil.java:85)
    at com.weidou.prism.spider.newsList.NewsListCrawler.searchNewss(NewsListCrawler.java:80)
    at com.weidou.prism.spider.newsList.NewsListCrawler.crawlerNews(NewsListCrawler.java:262)
    at com.weidou.prism.spider.ClientTask$1.run(ClientTask.java:59)
    at java.lang.Thread.run(Thread.java:701)
Caused by: com.weidou.mota.exception.FetcherException: org.apache.http.client.ClientProtocolException
    at com.weidou.mota.fetcher.DefaultHtmlFetcher.getHtml(DefaultHtmlFetcher.java:197)
    ... 8 more
Caused by: org.apache.http.client.ClientProtocolException
    at org.apache.http.impl.client.InternalHttpClient.doExecute(InternalHttpClient.java:188)
    at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:82)
    at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:106)
    at com.weidou.mota.fetcher.DefaultHtmlFetcher.getHtml(DefaultHtmlFetcher.java:194)
    ... 8 more
Caused by: org.apache.http.ProtocolException: The server failed to respond with a valid HTTP response
    at org.apache.http.impl.conn.DefaultHttpResponseParser.parseHead(DefaultHttpResponseParser.java:151)
    at org.apache.http.impl.conn.DefaultHttpResponseParser.parseHead(DefaultHttpResponseParser.java:57)
    at org.apache.http.impl.io.AbstractMessageParser.parse(AbstractMessageParser.java:260)
    at org.apache.http.impl.DefaultBHttpClientConnection.receiveResponseHeader(DefaultBHttpClientConnection.java:161)
    at sun.reflect.GeneratedMethodAccessor15.invoke(Unknown Source)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:622)
    at org.apache.http.impl.conn.CPoolProxy.invoke(CPoolProxy.java:138)
    at com.sun.proxy.$Proxy1.receiveResponseHeader(Unknown Source)
    at org.apache.http.protocol.HttpRequestExecutor.doReceiveResponse(HttpRequestExecutor.java:271)
    at org.apache.http.protocol.HttpRequestExecutor.execute(HttpRequestExecutor.java:123)
    at org.apache.http.impl.execchain.MainClientExec.createTunnelToTarget(MainClientExec.java:449)
    at org.apache.http.impl.execchain.MainClientExec.establishRoute(MainClientExec.java:374)
    at org.apache.http.impl.execchain.MainClientExec.execute(MainClientExec.java:218)
    at org.apache.http.impl.execchain.ProtocolExec.execute(ProtocolExec.java:194)
    at org.apache.http.impl.execchain.RetryExec.execute(RetryExec.java:85)
    at org.apache.http.impl.execchain.RedirectExec.execute(RedirectExec.java:108)
    at org.apache.http.impl.client.InternalHttpClient.doExecute(InternalHttpClient.java:186)
    ... 11 more

2014-02-22 16:21:18 ERROR GetHtmlUtil:217 - 循环批次:1,  http请求错误:http://www.baidu.com/link?url=CmtBvUMsEt2Z9uhOLyq4mrw3uu5fiJN4IQ6KLw5oJZJ9XBuZRNPtWMZhM4AB_sIcVAzgZRO2ejM3zL90L4p5CQVIKMtfuUKoWXni061oBZu
com.weidou.mota.exception.FetcherException: com.weidou.mota.exception.FetcherException: org.apache.http.client.ClientProtocolException
	at com.weidou.mota.fetcher.DefaultHtmlFetcher.getHtml(DefaultHtmlFetcher.java:155)
	at com.weidou.mota.fetcher.DefaultHtmlFetcher.getHtml(DefaultHtmlFetcher.java:173)
	at com.weidou.mota.fetcher.DefaultHtmlFetcher.getHtml(DefaultHtmlFetcher.java:230)
	at com.weidou.prism.spider.Utils.GetHtmlUtil.getHtml(GetHtmlUtil.java:215)
	at com.weidou.prism.spider.Utils.GetHtmlUtil.getHtmlByGet(GetHtmlUtil.java:101)
	at com.weidou.prism.spider.newsDetail.NewsDetailCrawler.crawlingDetails(NewsDetailCrawler.java:75)
	at com.weidou.prism.spider.ClientTask$2.run(ClientTask.java:80)
	at java.lang.Thread.run(Thread.java:662)
Caused by: com.weidou.mota.exception.FetcherException: org.apache.http.client.ClientProtocolException
	at com.weidou.mota.fetcher.DefaultHtmlFetcher.getHtml(DefaultHtmlFetcher.java:143)
	... 7 more
Caused by: org.apache.http.client.ClientProtocolException
	at org.apache.http.impl.client.InternalHttpClient.doExecute(InternalHttpClient.java:188)
	at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:82)
	at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:106)
	at com.weidou.mota.fetcher.DefaultHtmlFetcher.getHtml(DefaultHtmlFetcher.java:139)
	... 7 more
Caused by: org.apache.http.ProtocolException: The server failed to respond with a valid HTTP response
	at org.apache.http.impl.conn.DefaultHttpResponseParser.parseHead(DefaultHttpResponseParser.java:151)
	at org.apache.http.impl.conn.DefaultHttpResponseParser.parseHead(DefaultHttpResponseParser.java:57)
	at org.apache.http.impl.io.AbstractMessageParser.parse(AbstractMessageParser.java:260)
	at org.apache.http.impl.DefaultBHttpClientConnection.receiveResponseHeader(DefaultBHttpClientConnection.java:161)
	at sun.reflect.GeneratedMethodAccessor12.invoke(Unknown Source)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
	at java.lang.reflect.Method.invoke(Method.java:597)
	at org.apache.http.impl.conn.CPoolProxy.invoke(CPoolProxy.java:138)
	at $Proxy1.receiveResponseHeader(Unknown Source)
	at org.apache.http.protocol.HttpRequestExecutor.doReceiveResponse(HttpRequestExecutor.java:271)
	at org.apache.http.protocol.HttpRequestExecutor.execute(HttpRequestExecutor.java:123)
	at org.apache.http.impl.execchain.MainClientExec.execute(MainClientExec.java:253)
	at org.apache.http.impl.execchain.ProtocolExec.execute(ProtocolExec.java:194)
	at org.apache.http.impl.execchain.RetryExec.execute(RetryExec.java:85)
	at org.apache.http.impl.execchain.RedirectExec.execute(RedirectExec.java:108)
	at org.apache.http.impl.client.InternalHttpClient.doExecute(InternalHttpClient.java:186)
	... 10 more







最近在使用httpclient4.3.1进行数据请求过程中发现大量的这个错误,不知道大家遇到类似情况没?

@黄亿华,不知道你遇到类似情况没?

补充一下代码:

	@Override
	public String getHtml(String url, String charset,
			Map<String, String> headers) throws FetcherException {
		CloseableHttpResponse response = null;
		String html = null;
		url = CharsetUtils.encodeChars(url);
		HttpGet get = new HttpGet(url);
		get.setConfig(requestConfigBuilder.build());
		try {
			try {
				get.addHeader("User-Agent", UserAgent.DESKTOP_WINDOWS_);
				if (headers != null && headers.size() > 0) {
					for (Entry<String, String> entry : headers.entrySet()) {
						get.addHeader(entry.getKey(), entry.getValue());
					}
				}
				response = client.execute(get);
				html = getHtml(response.getEntity());
			} catch (Exception e) {
				throw new FetcherException(e);
			} finally {
				if (null != response) {
					try {
						response.close();
					} catch (IOException e) {
						throw new FetcherException(e);
					}
				}
			}
		} catch (Exception e) {
			get.abort();
			throw new FetcherException(e);
		} finally {
			get.releaseConnection();
		}

		return html;
	}



	public DefaultHtmlFetcher() {
		client = HttpClientUtils.getDefaultHttpClient();
		requestConfigBuilder.setSocketTimeout(HttpClientUtils.socketTimeout)
				.setConnectTimeout(HttpClientUtils.connectTimeout)
				.setConnectionRequestTimeout(HttpClientUtils.connectTimeout)
				.setRelativeRedirectsAllowed(true).setRedirectsEnabled(true)
				.setMaxRedirects(10).setCircularRedirectsAllowed(false);

	}



public class HttpClientUtils {

	private final static Logger logger = LoggerFactory.getLogger(HttpClientUtils.class);

	private static PoolingHttpClientConnectionManager connManager = null;
	
	private static CloseableHttpClient httpclient = null;
	
	public final static int connectTimeout = 10000;
	
	public final static int socketTimeout = 10000;

	static {
		try {
			SSLContext sslContext = SSLContexts.custom().useTLS().build();
			sslContext.init(null, new TrustManager[] { new X509TrustManager() {
				public X509Certificate[] getAcceptedIssuers() {
					return null;
				}

				public void checkClientTrusted(X509Certificate[] certs, String authType) {
				}

				public void checkServerTrusted(X509Certificate[] certs, String authType) {
				}
			} }, null);
			Registry<ConnectionSocketFactory> socketFactoryRegistry = RegistryBuilder.<ConnectionSocketFactory> create()
					.register("http", PlainConnectionSocketFactory.INSTANCE)
					.register("https", new SSLConnectionSocketFactory(sslContext)).build();

			connManager = new PoolingHttpClientConnectionManager(socketFactoryRegistry);
			
			// Create socket configuration
			SocketConfig socketConfig = SocketConfig.custom().setSoKeepAlive(true).setTcpNoDelay(true).build();
			connManager.setDefaultSocketConfig(socketConfig);
			// Create message constraints
			MessageConstraints messageConstraints = MessageConstraints.custom()
					.setMaxHeaderCount(200)
					.setMaxLineLength(2000)
					.build();
			// Create connection configuration
			ConnectionConfig connectionConfig = ConnectionConfig.custom()
					.setMalformedInputAction(CodingErrorAction.IGNORE)
					.setUnmappableInputAction(CodingErrorAction.IGNORE)
					.setCharset(Consts.UTF_8)
					.setMessageConstraints(messageConstraints)
					.build();
			connManager.setDefaultConnectionConfig(connectionConfig);
			connManager.setMaxTotal(200);
			connManager.setDefaultMaxPerRoute(20);
			connManager.closeExpiredConnections();
			connManager.closeIdleConnections(30, TimeUnit.SECONDS);
			
			
			httpclient = HttpClients.custom()
					.setConnectionManager(connManager)
					.setRedirectStrategy(new LaxRedirectStrategy())
					.setKeepAliveStrategy(HttpClientUtils.keepAliveStrategy)
					.setConnectionReuseStrategy( new DefaultConnectionReuseStrategy())
					.addInterceptorFirst(HttpClientUtils.gzipInterceptor)
					.build();
		} catch (KeyManagementException e) {
			logger.error("KeyManagementException", e);
		} catch (NoSuchAlgorithmException e) {
			logger.error("NoSuchAlgorithmException", e);
		}
	}

	/**
	 * gzip 压缩
	 */
	private static HttpRequestInterceptor gzipInterceptor = new HttpRequestInterceptor() {
		public void process(final HttpRequest request, final HttpContext context) throws HttpException, IOException {
			if (!request.containsHeader("Accept-Encoding")) {
				request.addHeader("Accept-Encoding", "gzip");
			}
		}
	};
	
	/**
	 * KeepAlive
	 */
	private static ConnectionKeepAliveStrategy keepAliveStrategy = new ConnectionKeepAliveStrategy() {
	    public long getKeepAliveDuration(HttpResponse response, HttpContext context) {
	        HeaderElementIterator it = new BasicHeaderElementIterator(
	                response.headerIterator(HTTP.CONN_KEEP_ALIVE));
	        while (it.hasNext()) {
	            HeaderElement he = it.nextElement();
	            String param = he.getName();
	            String value = he.getValue();
	            if (value != null && param.equalsIgnoreCase("timeout")) {
	                try {
	                    return Long.parseLong(value) * 1000;
	                } catch(NumberFormatException ignore) {
	                }
	            }
	        }
	        
//	        HttpHost target = (HttpHost) context.getAttribute( HttpClientContext.HTTP_TARGET_HOST);
	        return 30 * 1000;
	    }
	};
	
	/**
	 * 获取客户端
	 * @return
	 */
	public static CloseableHttpClient getDefaultHttpClient() {
		return httpclient;
	}


}



如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。
列表为空,暂无数据
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文