00001
package com.quadcap.http.client;
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
import java.io.*;
00042
import java.util.*;
00043
00044
import java.net.Socket;
00045
import java.net.URL;
00046
import java.net.URLEncoder;
00047
import java.net.URLConnection;
00048
00049
import org.xml.sax.InputSource;
00050
00051
import com.quadcap.http.util.HeaderParser;
00052
00053
import com.quadcap.util.collections.ArrayQueue;
00054
00055
import com.quadcap.util.text.OctetMap;
00056
import com.quadcap.util.text.Scanner;
00057
00058
import com.quadcap.util.Debug;
00059
import com.quadcap.util.Util;
00060
00061
import com.quadcap.io.IO;
00062
import com.quadcap.io.LimitedInputStream;
00063
import com.quadcap.io.NullOutputStream;
00064
00065 public class HttpFetcher {
00066 static boolean checkLinks =
false;
00067 static boolean showResponseHeaders =
false;
00068
00069 static byte[]
delims = { 0x0d, 0x0a, 0x0d, 0x0a };
00070
00071 public static byte[]
fetch(String url)
throws Exception {
00072
return fetch(
url,
new ArrayList());
00073 }
00074
00075 public static byte[] fetch(String url, List headers)
throws Exception {
00076 InputStream is =
fetchStream(
url, headers);
00077 byte[] doc =
readStream(is);
00078 is.close();
00079
return doc;
00080 }
00081
00082 public static byte[]
post(String url, String fileName,
00083 List headers)
throws Exception {
00084 InputStream is =
postStream(
url, fileName, headers);
00085 byte[] doc =
readStream(is);
00086 is.close();
00087
return doc;
00088
00089 }
00090
00091 public static InputStream
postStream(String url, String fileName,
00092 List headers)
throws Exception {
00093
if (
url.indexOf(
"http://") != 0) {
00094 System.err.println(
"Bad url (protocol): " +
url);
00095
return null;
00096 }
00097
url =
url.substring(7);
00098
int idx =
url.indexOf(
'/');
00099
if (idx <= 0) {
00100
url =
url +
"/";
00101 idx = url.indexOf(
'/');
00102 }
00103 String host =
url.substring(0, idx);
00104 String name =
url.substring(idx);
00105
int port = 80;
00106 idx = host.indexOf(
':');
00107
if (idx >= 0) {
00108 port = Integer.parseInt(host.substring(idx+1));
00109 host = host.substring(0, idx);
00110 }
00111 Socket s =
new Socket(host, port);
00112
00113 headers.add(
"Content-Length: " +
00114 String.valueOf(
new File(fileName).length()));
00115
00116 OutputStream sos = s.getOutputStream();
00117 BufferedOutputStream os =
new BufferedOutputStream(sos);
00118 os.write((
"POST " + name +
" HTTP/1.0\r\n").getBytes());
00119 Iterator iter = headers.iterator();
00120
while (iter.hasNext()) {
00121
IO.write(os, iter.next().toString());
00122 os.write(
"\r\n".getBytes());
00123 }
00124 os.write(
"\r\n".getBytes());
00125
00126 FileInputStream fis =
new FileInputStream(fileName);
00127
IO.copyStream(fis, os);
00128 os.flush();
00129
00130 InputStream is = s.getInputStream();
00131
return new BufferedInputStream(is);
00132 }
00133
00134 public static InputStream
fetchStream(String url)
throws Exception {
00135
return fetchStream(
url,
new ArrayList());
00136 }
00137
00138 public static void ripPlayList(InputStream is, List headers)
00139
throws Exception
00140 {
00141 BufferedReader br =
new BufferedReader(
00142
new InputStreamReader(is));
00143 String line;
00144
while ((line = br.readLine()) != null) {
00145 String[] v = line.split(
"=");
00146
if (v.length == 2 && v[0].equals(
"File1")) {
00147 is.close();
00148
ripStream(v[1], headers);
00149
return;
00150 }
00151 }
00152 is.close();
00153 }
00154
00155 public static void ripStream(String url, List headers)
00156
throws Exception
00157 {
00158 InputStream is = fetchStream(
url, headers);
00159 StringBuffer sb =
new StringBuffer();
00160
for (
int c = is.read(); c !=
'\n'; c = is.read()) {
00161 sb.append((
char)c);
00162 }
00163
Debug.println(
"ripStream(" +
url +
"), Response: " + sb);
00164 Map hdrs =
HeaderParser.parseHeaders(is);
00165
Debug.println(
"Headers = " + hdrs);
00166
if (sb.toString().indexOf(
"302") > 0) {
00167 is.close();
00168
Debug.println(
"redirecting to: " + hdrs.get(
"location"));
00169 ripStream(hdrs.get(
"location").toString(), headers);
00170
return;
00171 }
00172 String contentType = String.valueOf(hdrs.get(
"content-type"));
00173
if (contentType.equalsIgnoreCase(
"audio/x-scpls")) {
00174 ripPlayList(is, headers);
00175
return;
00176 }
00177
int metaInt = 0;
00178
try {
00179 metaInt = Integer.parseInt(String.valueOf(hdrs.get(
"icy-metaint")));
00180 }
catch (Throwable
t) {
00181 }
00182 byte[] buf =
new byte[metaInt];
00183 String title = null;
00184 String lastTitle =
"__INVALID_lastTitle__";
00185
Mp3FrameStream out =
new Mp3FrameStream();
00186 FileOutputStream fout = null;
00187
while (
true) {
00188
int cnt = is.read(buf);
00189
while (cnt < buf.length) {
00190
if (cnt < 0) {
00191
if (out != null) {
00192 out.
close();
00193 }
00194
Debug.println(
"partial buffer, returning... (" + cnt +
")");
00195
return;
00196 }
00197 cnt += is.read(buf, cnt, buf.length - cnt);
00198 }
00199 title =
getTitle(is).replace(
'/',
' ');
00200
if (title.length() > 0) {
00201
if (!title.equals(lastTitle)) {
00202
Debug.println(
"Title: " + title);
00203
if (fout == null) {
00204
00205 fout =
new FileOutputStream(title);
00206 out.
init(fout,
new NullOutputStream());
00207 out.
write(buf);
00208 }
else {
00209
00210 out.
write(buf, 0, metaInt/2);
00211 out.
close();
00212 fout =
new FileOutputStream(title);
00213 out.
init(fout,
new NullOutputStream());
00214 out.
write(buf, metaInt/2, metaInt/2);
00215 }
00216 lastTitle = title;
00217 }
00218 }
else {
00219
if (fout != null) {
00220 out.
write(buf);
00221 }
00222 }
00223 }
00224 }
00225
00226 public static String
getTitle(InputStream in)
throws IOException {
00227 byte[] buf =
new byte[in.read() * 16];
00228 in.read(buf);
00229 StringBuffer sb =
new StringBuffer();
00230
for (
int i = 0; i < buf.length && buf[i] != 0; i++) {
00231 sb.append((
char)(buf[i]));
00232 }
00233 String[] p = sb.toString().split(
";");
00234
for (
int i = 0; i < p.length; i++) {
00235 String[] v = p[i].trim().split(
"=");
00236
if (v.length == 2 && v[0].equalsIgnoreCase(
"StreamTitle")) {
00237 String s = v[1].substring(1, v[1].length()-1);
00238
while (s.toLowerCase().endsWith(
".mp3")) {
00239 s = s.substring(0, s.length() - 4);
00240 }
00241 s +=
".mp3";
00242
return s;
00243 }
00244 }
00245
return "";
00246 }
00247
00248 public static InputStream fetchStream(String url, List headers)
00249
throws IOException
00250 {
00251
00252
if (
url.indexOf(
"http://") != 0) {
00253 System.err.println(
"Bad url (protocol): " +
url);
00254
return null;
00255 }
00256
url =
url.substring(7);
00257
int idx =
url.indexOf(
'/');
00258
if (idx <= 0) {
00259
url =
url +
"/";
00260 idx = url.length() - 1;
00261 }
00262 String host =
url.substring(0, idx);
00263 String name =
url.substring(idx);
00264
int port = 80;
00265 idx = host.indexOf(
':');
00266
if (idx >= 0) {
00267 port = Integer.parseInt(host.substring(idx+1));
00268 host = host.substring(0, idx);
00269 }
00270 Socket s =
new Socket(host, port);
00271
00272 ByteArrayOutputStream bos =
new ByteArrayOutputStream();
00273
Debug.println(
"GET " + name);
00274 bos.write((
"GET " + name +
" HTTP/1.0\r\n").getBytes());
00275 Iterator iter = headers.iterator();
00276
while (iter.hasNext()) {
00277 String hdr = iter.next().toString();
00278
IO.write(bos, hdr);
00279
Debug.println(
" " + hdr);
00280 bos.write(
'\r');
00281 bos.write(
'\n');
00282 }
00283 bos.write(
"\r\n".getBytes());
00284
00285 s.getOutputStream().write(bos.toByteArray());
00286
00287 InputStream is = s.getInputStream();
00288
return new BufferedInputStream(is);
00289 }
00290
00291 public static InputStream
fetch2(String url)
throws Exception {
00292 System.out.println(
"Fetch: " +
url);
00293 URLConnection c =
new URL(
url).openConnection();
00294 c.connect();
00295
return c.getInputStream();
00296 }
00297
00298 public static byte[]
readStream(InputStream is)
throws IOException {
00299 ByteArrayOutputStream bos =
new ByteArrayOutputStream();
00300
00301
int state = 0;
00302
int cnt = 0;
00303
if (
showResponseHeaders) state = 5;
00304
while (state < 4) {
00305
int c = is.read();
00306
if (c < 0) {
00307
throw new IOException(
"unexpected eof in message headers");
00308 }
00309
if (
delims[state] == c) state++;
00310
else if (
delims[0] == c) state = 1;
00311
else state = 0;
00312 }
00313
00314 byte[] buf =
new byte[1024];
00315
while ((cnt = is.read(buf)) > 0) {
00316 bos.write(buf, 0, cnt);
00317 }
00318
return bos.toByteArray();
00319 }
00320
00321 public static Hashtable
buildTable(String fname)
throws Exception {
00322 BufferedReader r =
new BufferedReader(
new FileReader(
fname));
00323 String turl;
00324 Hashtable
t =
new Hashtable();
00325
while ((turl = r.readLine()) != null) {
00326
url = turl;
00327 byte[] doc = fetch(
url);
00328 System.err.println(
url +
": " +
Util.strBytes(doc));
00329 t.put(
url, doc);
00330 }
00331
return t;
00332 }
00333
00334 public static void checkTable(String fname, Hashtable t)
throws Exception {
00335 BufferedReader r =
new BufferedReader(
new FileReader(
fname));
00336 String
url;
00337
while ((url = r.readLine()) != null) {
00338
try {
00339 byte[] doc = fetch(url);
00340 byte[] exp = (byte[])
t.get(url);
00341
if (
Util.compareBytes(doc, exp) != 0) {
00342 System.err.println(
"Failed: " + url);
00343 System.err.println(
"Doc: " +
Util.strBytes(doc));
00344 }
00345 }
catch (Exception e) {
00346
Debug.print(e);
00347 }
00348 }
00349 }
00350
00351 public static void addAV(String fname)
throws Exception {
00352 BufferedReader r =
new BufferedReader(
new FileReader(
fname));
00353 String
url;
00354 Hashtable
t =
new Hashtable();
00355
while ((url = r.readLine()) != null) {
00356 System.out.println(
"url: " + url);
00357 StringBuffer sb =
new StringBuffer(
00358
"http://add-url.altavista.com/cgi-bin/newurl?ad=1&q=");
00359 sb.append(URLEncoder.encode(url));
00360 byte[] doc = fetch(sb.toString());
00361 String s =
new String(doc);
00362
if (s.indexOf(
"The page was fetched") < 0) {
00363 System.out.println(s);
00364
break;
00365 }
00366
try { Thread.sleep(2000); }
catch (Throwable dt) {}
00367 }
00368 }
00369
00370 public static void check(String name)
throws Exception {
00371
LinkChecker lc =
new LinkChecker(name);
00372 lc.
run();
00373 lc.
printBadLinks();
00374 }
00375
00376 public static void main(String args[]) {
00377
Debug.debugMode =
Debug.debugAll;
00378
Debug.debugStream = System.out;
00379
try {
00380
doit(args);
00381 }
catch (Exception e) {
00382 System.out.println(
"Exception: " + e.toString());
00383
Debug.print(e);
00384 }
00385 }
00386
00387 static String
fname = null;
00388 static int repeat = 2;
00389 static int delay = 0;
00390 static Hashtable
t;
00391 static boolean times =
false;
00392 static boolean rip =
false;
00393 static String
url = null;
00394 static int limit = 0;
00395
00396 public static void doit() throws Exception {
00397
if (
times) {
00398
for (
int i = 0; i <
repeat; i++) {
00399 fetch(
url);
00400 }
00401 }
else {
00402
for (
int i = 0; i <
repeat; i++) {
00403 checkTable(
fname,
t);
00404
if (
delay > 0) Thread.sleep(
delay);
00405 }
00406 }
00407 }
00408
00409 public static void doit (String args[])
throws Exception {
00410
int numThreads = 1;
00411
boolean av =
false;
00412 String post = null;
00413 List headers =
new ArrayList();
00414
00415
int ac = 0;
00416
while (ac < args.length) {
00417 String arg = args[ac].trim();
00418
if (arg.charAt(0) !=
'-')
break;
00419 ac++;
00420
if (arg.equals(
"-urls")) {
00421
fname = args[ac++];
00422 }
else if (arg.equals(
"-count")) {
00423
repeat = Integer.parseInt(args[ac++]);
00424 }
else if (arg.equals(
"-delay")) {
00425
delay = Integer.parseInt(args[ac++]);
00426 }
else if (arg.equals(
"-threads")) {
00427 numThreads = Integer.parseInt(args[ac++]);
00428 }
else if (arg.equals(
"-checklinks")) {
00429
checkLinks =
true;
00430
fname = args[ac++];
00431 }
else if (arg.equals(
"-headers")) {
00432
showResponseHeaders =
true;
00433 }
else if (arg.equals(
"-post")) {
00434 post = args[ac++];
00435 }
else if (arg.equals(
"-header")) {
00436 String hName = args[ac++];
00437 String hVal = args[ac++];
00438 headers.add(hName +
": " + hVal);
00439 }
else if (arg.equals(
"-altavista")) {
00440 av =
true;
00441
fname = args[ac++];
00442 }
else if (arg.equals(
"-times")) {
00443
times =
true;
00444 }
else if (arg.equals(
"-limit")) {
00445
limit = Integer.parseInt(args[ac++]);
00446 }
else if (arg.equals(
"-rip")) {
00447
if (!
rip) {
00448 headers.add(
"Host: 192.168.1.8");
00449 headers.add(
"User-Agent: WinampMPEG/2.8");
00450 headers.add(
"Accept: */*");
00451 headers.add(
"Icy-Metadata:1");
00452 headers.add(
"Connection: close");
00453 }
00454
rip =
true;
00455 }
else {
00456
00457 }
00458 }
00459
00460
if (av) {
00461 addAV(
fname);
00462 }
else if (
checkLinks) {
00463 check(
fname);
00464
return;
00465 }
else if (
fname == null && !
times) {
00466
url = args[ac];
00467
if (
rip) {
00468 ripStream(
url, headers);
00469
return;
00470 }
00471
if (post == null) {
00472 InputStream in = fetchStream(
url, headers);
00473
if (
limit > 0) {
00474 in =
new LimitedInputStream(in,
limit);
00475 }
00476
try {
00477
IO.copyStream(in, System.out);
00478 } finally {
00479 in.close();
00480 }
00481 }
else {
00482 byte[] doc = post(
url, post, headers);
00483 System.out.write(doc);
00484 }
00485 }
else {
00486
t = buildTable(
fname);
00487 Thread[] threads =
new Thread[numThreads];
00488
for (
int i = 0; i < numThreads; i++) {
00489 threads[i] =
new Thread() {
00490
public void run() {
00491
try {
00492
doit();
00493 }
catch (Throwable
t) {
00494
Debug.print(
t);
00495 }
00496 }
00497 };
00498 }
00499
long start = System.currentTimeMillis();
00500
for (
int i = 0; i < numThreads; i++) {
00501 threads[i].start();
00502 }
00503
for (
int i = 0; i < numThreads; i++) {
00504
try {
00505 threads[i].join();
00506 }
catch (Throwable
t) {
00507
Debug.print(
t);
00508 }
00509 }
00510
long stop = System.currentTimeMillis();
00511
long elap = stop - start;
00512
int r_s = (
int)((
repeat * numThreads * 1000) / elap);
00513
if (
times) {
00514 System.out.println(
"" + elap +
" elapsed");
00515 System.out.println(
"" + r_s +
" requests/second");
00516 }
00517 }
00518 }
00519 }