获取网页内容
时间:2010-09-08 来源:topzhujia
- package zj.cto;
- import java.io.FileOutputStream;
- import java.io.IOException;
- import java.io.OutputStreamWriter;
- import java.net.URI;
- import java.net.URISyntaxException;
- import java.util.Scanner;
- import org.apache.http.HttpHost;
- import org.apache.http.HttpResponse;
- import org.apache.http.HttpVersion;
- import org.apache.http.client.ClientProtocolException;
- import org.apache.http.client.methods.HttpGet;
- import org.apache.http.conn.ClientConnectionManager;
- import org.apache.http.conn.params.ConnRoutePNames;
- import org.apache.http.conn.scheme.PlainSocketFactory;
- import org.apache.http.conn.scheme.Scheme;
- import org.apache.http.conn.scheme.SchemeRegistry;
- import org.apache.http.impl.client.DefaultHttpClient;
- import org.apache.http.impl.conn.tsccm.ThreadSafeClientConnManager;
- import org.apache.http.params.BasicHttpParams;
- import org.apache.http.params.HttpParams;
- import org.apache.http.params.HttpProtocolParams;
- /**
- * @author 祝 嘉
- *
- */
- public class BlogVistor {
- /**
- * @param args
- */
- public static void main(String[] args) {
- System.out.println("Start...");
- new BlogVistor().start();
- System.out.println("End.");
- }
- private DefaultHttpClient httpclient;
- private HttpGet request;
- private HttpResponse response = null;
- /**
- *
- */
- public BlogVistor() {
- super();
- // this.httpclient = new DefaultHttpClient();
- // make sure to use a proxy that supports connect
- HttpHost proxy = new HttpHost("edu6.zzzcn.info", 2012, "http");
- // this.target = new HttpHost("topzhujia.blog.51cto.com", 80, "http");
- SchemeRegistry supportedSchemes = new SchemeRegistry();
- supportedSchemes.register(new Scheme("http", PlainSocketFactory
- .getSocketFactory(), 80));
- // prepare parameters
- HttpParams params = new BasicHttpParams();
- HttpProtocolParams.setVersion(params, HttpVersion.HTTP_1_0);
- HttpProtocolParams.setContentCharset(params, "UTF-8");
- HttpProtocolParams.setUseExpectContinue(params, true);
- ClientConnectionManager ccm = new ThreadSafeClientConnManager(params,
- supportedSchemes);
- this.httpclient = new DefaultHttpClient(ccm, params);
- httpclient.getParams().setParameter(ConnRoutePNames.DEFAULT_PROXY,
- proxy);
- request = new HttpGet();
- try {
- request.setURI(new URI("http://topzhujia.blog.51cto.com"));
- } catch (URISyntaxException e) {
- e.printStackTrace();
- }
- }
- private void destroy() {
- this.httpclient.getConnectionManager().shutdown();
- }
- private void release() {
- try {
- this.response.getEntity().consumeContent();
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
- public void start() {
- OutputStreamWriter cout = null;
- Scanner cin;
- try {
- response = httpclient.execute(request);
- System.out.println("Request state: " + response.getStatusLine());
- cout = new OutputStreamWriter(new FileOutputStream("51cto.html"), "gb2312");
- cin = new Scanner(response.getEntity().getContent(), "gb2312");
- while (cin.hasNextLine()) {
- cout.append(cin.nextLine());
- }
- cout.flush();
- cout.close();
- cin.close();
- this.release();
- } catch (ClientProtocolException e) {
- e.printStackTrace();
- } catch (IOException e) {
- e.printStackTrace();
- }
- this.destroy();
- }
- }
相关阅读 更多 +
排行榜 更多 +