package com.myjobalert.crawler; import java.io.IOException; import java.io.InputStreamReader; import java.net.HttpURLConnection; import java.net.URL; import java.net.URLConnection; public abstract class Crawler { public String getURLContents(String urlStr, HttpParam[] httpParams) throws IOException { URL url = new URL(urlStr); URLConnection conn = url.openConnection(); HttpURLConnection httpconn = (HttpURLConnection) conn; if (httpParams != null) { httpconn.setRequestMethod("POST"); for (HttpParam param : httpParams) { httpconn.setRequestProperty(param.getName(), param.getValue()); } conn = httpconn; } InputStreamReader bis = new InputStreamReader(conn.getInputStream()); System.out.println("page size:"+conn.getContentLength()); final int char_per_page = 5000; char[] buff = new char[char_per_page]; StringBuilder sb = new StringBuilder(char_per_page); int read = 0; while (read != -1) { read = bis.read(buff); if (read != -1) sb.append(buff, 0, read); } System.out.println("page size:"+(sb.length()/1000)+"KB"); System.out.println("page is downloaded"); return sb.toString(); } } class HttpParam { private String name; private String value; public String getName() { return name; } public void setName(String name) { this.name = name; } public String getValue() { return value; } public void setValue(String value) { this.value = value; } }
http://ws.amazon.com/widgets/q?rt=qf_br_asin_ssw&ServiceVersion=20070822&MarketPlace=US&ID=V20070822%2FUS%2Fmyknowledgebo-20%2F8003%2F13de8fda-f06a-4d41-9041-b4aeab627f25&Operation=GetDisplayTemplate Amazon.com Widgets
No comments:
Post a Comment