Quadcap Embeddable Server

com/quadcap/http/client/HttpFetcher.java

Go to the documentation of this file.
00001 package com.quadcap.http.client; 00002 00003 /* Copyright 1998 - 2003 Quadcap Software. All rights reserved. 00004 * 00005 * This software is distributed under the Quadcap Free Software License. 00006 * This software may be used or modified for any purpose, personal or 00007 * commercial. Open Source redistributions are permitted. Commercial 00008 * redistribution of larger works derived from, or works which bundle 00009 * this software requires a "Commercial Redistribution License"; see 00010 * http://www.quadcap.com/purchase. 00011 * 00012 * Redistributions qualify as "Open Source" under one of the following terms: 00013 * 00014 * Redistributions are made at no charge beyond the reasonable cost of 00015 * materials and delivery. 00016 * 00017 * Redistributions are accompanied by a copy of the Source Code or by an 00018 * irrevocable offer to provide a copy of the Source Code for up to three 00019 * years at the cost of materials and delivery. Such redistributions 00020 * must allow further use, modification, and redistribution of the Source 00021 * Code under substantially the same terms as this license. 00022 * 00023 * Redistributions of source code must retain the copyright notices as they 00024 * appear in each source code file, these license terms, and the 00025 * disclaimer/limitation of liability set forth as paragraph 6 below. 00026 * 00027 * Redistributions in binary form must reproduce this Copyright Notice, 00028 * these license terms, and the disclaimer/limitation of liability set 00029 * forth as paragraph 6 below, in the documentation and/or other materials 00030 * provided with the distribution. 00031 * 00032 * The Software is provided on an "AS IS" basis. No warranty is 00033 * provided that the Software is free of defects, or fit for a 00034 * particular purpose. 00035 * 00036 * Limitation of Liability. Quadcap Software shall not be liable 00037 * for any damages suffered by the Licensee or any third party resulting 00038 * from use of the Software. 00039 */ 00040 00041 import java.io.*; 00042 import java.util.*; 00043 00044 import java.net.Socket; 00045 import java.net.URL; 00046 import java.net.URLEncoder; 00047 import java.net.URLConnection; 00048 00049 import org.xml.sax.InputSource; 00050 00051 import com.quadcap.http.util.HeaderParser; 00052 00053 import com.quadcap.util.collections.ArrayQueue; 00054 00055 import com.quadcap.util.text.OctetMap; 00056 import com.quadcap.util.text.Scanner; 00057 00058 import com.quadcap.util.Debug; 00059 import com.quadcap.util.Util; 00060 00061 import com.quadcap.io.IO; 00062 import com.quadcap.io.LimitedInputStream; 00063 import com.quadcap.io.NullOutputStream; 00064 00065 public class HttpFetcher { 00066 static boolean checkLinks = false; 00067 static boolean showResponseHeaders = false; 00068 00069 static byte[] delims = { 0x0d, 0x0a, 0x0d, 0x0a }; 00070 00071 public static byte[] fetch(String url) throws Exception { 00072 return fetch(url, new ArrayList()); 00073 } 00074 00075 public static byte[] fetch(String url, List headers) throws Exception { 00076 InputStream is = fetchStream(url, headers); 00077 byte[] doc = readStream(is); 00078 is.close(); 00079 return doc; 00080 } 00081 00082 public static byte[] post(String url, String fileName, 00083 List headers) throws Exception { 00084 InputStream is = postStream(url, fileName, headers); 00085 byte[] doc = readStream(is); 00086 is.close(); 00087 return doc; 00088 00089 } 00090 00091 public static InputStream postStream(String url, String fileName, 00092 List headers) throws Exception { 00093 if (url.indexOf("http://") != 0) { 00094 System.err.println("Bad url (protocol): " + url); 00095 return null; 00096 } 00097 url = url.substring(7); 00098 int idx = url.indexOf('/'); 00099 if (idx <= 0) { 00100 url = url + "/"; 00101 idx = url.indexOf('/'); 00102 } 00103 String host = url.substring(0, idx); 00104 String name = url.substring(idx); 00105 int port = 80; 00106 idx = host.indexOf(':'); 00107 if (idx >= 0) { 00108 port = Integer.parseInt(host.substring(idx+1)); 00109 host = host.substring(0, idx); 00110 } 00111 Socket s = new Socket(host, port); 00112 00113 headers.add("Content-Length: " + 00114 String.valueOf(new File(fileName).length())); 00115 00116 OutputStream sos = s.getOutputStream(); 00117 BufferedOutputStream os = new BufferedOutputStream(sos); 00118 os.write(("POST " + name + " HTTP/1.0\r\n").getBytes()); 00119 Iterator iter = headers.iterator(); 00120 while (iter.hasNext()) { 00121 IO.write(os, iter.next().toString()); 00122 os.write("\r\n".getBytes()); 00123 } 00124 os.write("\r\n".getBytes()); 00125 00126 FileInputStream fis = new FileInputStream(fileName); 00127 IO.copyStream(fis, os); 00128 os.flush(); 00129 00130 InputStream is = s.getInputStream(); 00131 return new BufferedInputStream(is); 00132 } 00133 00134 public static InputStream fetchStream(String url) throws Exception { 00135 return fetchStream(url, new ArrayList()); 00136 } 00137 00138 public static void ripPlayList(InputStream is, List headers) 00139 throws Exception 00140 { 00141 BufferedReader br = new BufferedReader( 00142 new InputStreamReader(is)); 00143 String line; 00144 while ((line = br.readLine()) != null) { 00145 String[] v = line.split("="); 00146 if (v.length == 2 && v[0].equals("File1")) { 00147 is.close(); 00148 ripStream(v[1], headers); 00149 return; 00150 } 00151 } 00152 is.close(); 00153 } 00154 00155 public static void ripStream(String url, List headers) 00156 throws Exception 00157 { 00158 InputStream is = fetchStream(url, headers); 00159 StringBuffer sb = new StringBuffer(); 00160 for (int c = is.read(); c != '\n'; c = is.read()) { 00161 sb.append((char)c); 00162 } 00163 Debug.println("ripStream(" + url + "), Response: " + sb); 00164 Map hdrs = HeaderParser.parseHeaders(is); 00165 Debug.println("Headers = " + hdrs); 00166 if (sb.toString().indexOf("302") > 0) { 00167 is.close(); 00168 Debug.println("redirecting to: " + hdrs.get("location")); 00169 ripStream(hdrs.get("location").toString(), headers); 00170 return; 00171 } 00172 String contentType = String.valueOf(hdrs.get("content-type")); 00173 if (contentType.equalsIgnoreCase("audio/x-scpls")) { 00174 ripPlayList(is, headers); 00175 return; 00176 } 00177 int metaInt = 0; 00178 try { 00179 metaInt = Integer.parseInt(String.valueOf(hdrs.get("icy-metaint"))); 00180 } catch (Throwable t) { 00181 } 00182 byte[] buf = new byte[metaInt]; 00183 String title = null; 00184 String lastTitle = "__INVALID_lastTitle__"; 00185 Mp3FrameStream out = new Mp3FrameStream(); 00186 FileOutputStream fout = null; 00187 while (true) { 00188 int cnt = is.read(buf); 00189 while (cnt < buf.length) { 00190 if (cnt < 0) { 00191 if (out != null) { 00192 out.close(); 00193 } 00194 Debug.println("partial buffer, returning... (" + cnt + ")"); 00195 return; 00196 } 00197 cnt += is.read(buf, cnt, buf.length - cnt); 00198 } 00199 title = getTitle(is).replace('/', ' '); 00200 if (title.length() > 0) { 00201 if (!title.equals(lastTitle)) { 00202 Debug.println("Title: " + title); 00203 if (fout == null) { 00204 // Start a new file 00205 fout = new FileOutputStream(title); 00206 out.init(fout, new NullOutputStream()); 00207 out.write(buf); 00208 } else { 00209 // split the difference 00210 out.write(buf, 0, metaInt/2); 00211 out.close(); 00212 fout = new FileOutputStream(title); 00213 out.init(fout, new NullOutputStream()); 00214 out.write(buf, metaInt/2, metaInt/2); 00215 } 00216 lastTitle = title; 00217 } 00218 } else { 00219 if (fout != null) { 00220 out.write(buf); 00221 } 00222 } 00223 } 00224 } 00225 00226 public static String getTitle(InputStream in) throws IOException { 00227 byte[] buf = new byte[in.read() * 16]; 00228 in.read(buf); 00229 StringBuffer sb = new StringBuffer(); 00230 for (int i = 0; i < buf.length && buf[i] != 0; i++) { 00231 sb.append((char)(buf[i])); 00232 } 00233 String[] p = sb.toString().split(";"); 00234 for (int i = 0; i < p.length; i++) { 00235 String[] v = p[i].trim().split("="); 00236 if (v.length == 2 && v[0].equalsIgnoreCase("StreamTitle")) { 00237 String s = v[1].substring(1, v[1].length()-1); 00238 while (s.toLowerCase().endsWith(".mp3")) { 00239 s = s.substring(0, s.length() - 4); 00240 } 00241 s += ".mp3"; 00242 return s; 00243 } 00244 } 00245 return ""; 00246 } 00247 00248 public static InputStream fetchStream(String url, List headers) 00249 throws IOException 00250 { 00251 //Debug.println(0, "Fetch: " + url); 00252 if (url.indexOf("http://") != 0) { 00253 System.err.println("Bad url (protocol): " + url); 00254 return null; 00255 } 00256 url = url.substring(7); 00257 int idx = url.indexOf('/'); 00258 if (idx <= 0) { 00259 url = url + "/"; 00260 idx = url.length() - 1; 00261 } 00262 String host = url.substring(0, idx); 00263 String name = url.substring(idx); 00264 int port = 80; 00265 idx = host.indexOf(':'); 00266 if (idx >= 0) { 00267 port = Integer.parseInt(host.substring(idx+1)); 00268 host = host.substring(0, idx); 00269 } 00270 Socket s = new Socket(host, port); 00271 00272 ByteArrayOutputStream bos = new ByteArrayOutputStream(); 00273 Debug.println("GET " + name); 00274 bos.write(("GET " + name + " HTTP/1.0\r\n").getBytes()); 00275 Iterator iter = headers.iterator(); 00276 while (iter.hasNext()) { 00277 String hdr = iter.next().toString(); 00278 IO.write(bos, hdr); 00279 Debug.println(" " + hdr); 00280 bos.write('\r'); 00281 bos.write('\n'); 00282 } 00283 bos.write("\r\n".getBytes()); 00284 00285 s.getOutputStream().write(bos.toByteArray()); 00286 00287 InputStream is = s.getInputStream(); 00288 return new BufferedInputStream(is); 00289 } 00290 00291 public static InputStream fetch2(String url) throws Exception { 00292 System.out.println("Fetch: " + url); 00293 URLConnection c = new URL(url).openConnection(); 00294 c.connect(); 00295 return c.getInputStream(); 00296 } 00297 00298 public static byte[] readStream(InputStream is) throws IOException { 00299 ByteArrayOutputStream bos = new ByteArrayOutputStream(); 00300 00301 int state = 0; 00302 int cnt = 0; 00303 if (showResponseHeaders) state = 5; 00304 while (state < 4) { 00305 int c = is.read(); 00306 if (c < 0) { 00307 throw new IOException("unexpected eof in message headers"); 00308 } 00309 if (delims[state] == c) state++; 00310 else if (delims[0] == c) state = 1; 00311 else state = 0; 00312 } 00313 00314 byte[] buf = new byte[1024]; 00315 while ((cnt = is.read(buf)) > 0) { 00316 bos.write(buf, 0, cnt); 00317 } 00318 return bos.toByteArray(); 00319 } 00320 00321 public static Hashtable buildTable(String fname) throws Exception { 00322 BufferedReader r = new BufferedReader(new FileReader(fname)); 00323 String turl; 00324 Hashtable t = new Hashtable(); 00325 while ((turl = r.readLine()) != null) { 00326 url = turl; 00327 byte[] doc = fetch(url); 00328 System.err.println(url + ": " + Util.strBytes(doc)); 00329 t.put(url, doc); 00330 } 00331 return t; 00332 } 00333 00334 public static void checkTable(String fname, Hashtable t) throws Exception { 00335 BufferedReader r = new BufferedReader(new FileReader(fname)); 00336 String url; 00337 while ((url = r.readLine()) != null) { 00338 try { 00339 byte[] doc = fetch(url); 00340 byte[] exp = (byte[])t.get(url); 00341 if (Util.compareBytes(doc, exp) != 0) { 00342 System.err.println("Failed: " + url); 00343 System.err.println("Doc: " + Util.strBytes(doc)); 00344 } 00345 } catch (Exception e) { 00346 Debug.print(e); 00347 } 00348 } 00349 } 00350 00351 public static void addAV(String fname) throws Exception { 00352 BufferedReader r = new BufferedReader(new FileReader(fname)); 00353 String url; 00354 Hashtable t = new Hashtable(); 00355 while ((url = r.readLine()) != null) { 00356 System.out.println("url: " + url); 00357 StringBuffer sb = new StringBuffer( 00358 "http://add-url.altavista.com/cgi-bin/newurl?ad=1&q="); 00359 sb.append(URLEncoder.encode(url)); 00360 byte[] doc = fetch(sb.toString()); 00361 String s = new String(doc); 00362 if (s.indexOf("The page was fetched") < 0) { 00363 System.out.println(s); 00364 break; 00365 } 00366 try { Thread.sleep(2000); } catch (Throwable dt) {} 00367 } 00368 } 00369 00370 public static void check(String name) throws Exception { 00371 LinkChecker lc = new LinkChecker(name); 00372 lc.run(); 00373 lc.printBadLinks(); 00374 } 00375 00376 public static void main(String args[]) { 00377 Debug.debugMode = Debug.debugAll; 00378 Debug.debugStream = System.out; 00379 try { 00380 doit(args); 00381 } catch (Exception e) { 00382 System.out.println("Exception: " + e.toString()); 00383 Debug.print(e); 00384 } 00385 } 00386 00387 static String fname = null; 00388 static int repeat = 2; 00389 static int delay = 0; 00390 static Hashtable t; 00391 static boolean times = false; 00392 static boolean rip = false; 00393 static String url = null; 00394 static int limit = 0; 00395 00396 public static void doit() throws Exception { 00397 if (times) { 00398 for (int i = 0; i < repeat; i++) { 00399 fetch(url); 00400 } 00401 } else { 00402 for (int i = 0; i < repeat; i++) { 00403 checkTable(fname, t); 00404 if (delay > 0) Thread.sleep(delay); 00405 } 00406 } 00407 } 00408 00409 public static void doit (String args[]) throws Exception { 00410 int numThreads = 1; 00411 boolean av = false; 00412 String post = null; 00413 List headers = new ArrayList(); 00414 00415 int ac = 0; 00416 while (ac < args.length) { 00417 String arg = args[ac].trim(); 00418 if (arg.charAt(0) != '-') break; 00419 ac++; 00420 if (arg.equals("-urls")) { 00421 fname = args[ac++]; 00422 } else if (arg.equals("-count")) { 00423 repeat = Integer.parseInt(args[ac++]); 00424 } else if (arg.equals("-delay")) { 00425 delay = Integer.parseInt(args[ac++]); 00426 } else if (arg.equals("-threads")) { 00427 numThreads = Integer.parseInt(args[ac++]); 00428 } else if (arg.equals("-checklinks")) { 00429 checkLinks = true; 00430 fname = args[ac++]; 00431 } else if (arg.equals("-headers")) { 00432 showResponseHeaders = true; 00433 } else if (arg.equals("-post")) { 00434 post = args[ac++]; 00435 } else if (arg.equals("-header")) { 00436 String hName = args[ac++]; 00437 String hVal = args[ac++]; 00438 headers.add(hName + ": " + hVal); 00439 } else if (arg.equals("-altavista")) { 00440 av = true; 00441 fname = args[ac++]; 00442 } else if (arg.equals("-times")) { 00443 times = true; 00444 } else if (arg.equals("-limit")) { 00445 limit = Integer.parseInt(args[ac++]); 00446 } else if (arg.equals("-rip")) { 00447 if (!rip) { 00448 headers.add("Host: 192.168.1.8"); 00449 headers.add("User-Agent: WinampMPEG/2.8"); 00450 headers.add("Accept: */*"); 00451 headers.add("Icy-Metadata:1"); 00452 headers.add("Connection: close"); 00453 } 00454 rip = true; 00455 } else { 00456 //throw new Exception("??"); 00457 } 00458 } 00459 00460 if (av) { 00461 addAV(fname); 00462 } else if (checkLinks) { 00463 check(fname); 00464 return; 00465 } else if (fname == null && !times) { 00466 url = args[ac]; 00467 if (rip) { 00468 ripStream(url, headers); 00469 return; 00470 } 00471 if (post == null) { 00472 InputStream in = fetchStream(url, headers); 00473 if (limit > 0) { 00474 in = new LimitedInputStream(in, limit); 00475 } 00476 try { 00477 IO.copyStream(in, System.out); 00478 } finally { 00479 in.close(); 00480 } 00481 } else { 00482 byte[] doc = post(url, post, headers); 00483 System.out.write(doc); 00484 } 00485 } else { 00486 t = buildTable(fname); 00487 Thread[] threads = new Thread[numThreads]; 00488 for (int i = 0; i < numThreads; i++) { 00489 threads[i] = new Thread() { 00490 public void run() { 00491 try { 00492 doit(); 00493 } catch (Throwable t) { 00494 Debug.print(t); 00495 } 00496 } 00497 }; 00498 } 00499 long start = System.currentTimeMillis(); 00500 for (int i = 0; i < numThreads; i++) { 00501 threads[i].start(); 00502 } 00503 for (int i = 0; i < numThreads; i++) { 00504 try { 00505 threads[i].join(); 00506 } catch (Throwable t) { 00507 Debug.print(t); 00508 } 00509 } 00510 long stop = System.currentTimeMillis(); 00511 long elap = stop - start; 00512 int r_s = (int)((repeat * numThreads * 1000) / elap); 00513 if (times) { 00514 System.out.println("" + elap + " elapsed"); 00515 System.out.println("" + r_s + " requests/second"); 00516 } 00517 } 00518 } 00519 }