#!/usr/local/bin/perl -w # ======================================================================== # htget - basic debugging HTTP client # Andrew Ho (andrew@tellme.com) # # This program contains embedded documentation in Perl POD (Plain Old # Documentation) format. Search for the string "=head1" in this document # to find documentation snippets, or use "perldoc" to read it; utilities # like "pod2man" and "pod2html" can reformat as well. # # $Id: htget,v 1.12 2008/02/23 01:29:47 andrew Exp $ # ======================================================================== require 5.005; use strict; =head1 NAME htget - basic debugging HTTP client =head1 SYNOPSIS % htget [-h] [-c] [-i] [-p proxy] [-t tunnel] [-o file] [url] [host] =head1 DESCRIPTION This is a barebones HTTP client which attempts to grab the URL specified via the command line, then optionally drops you to an interactive session where you can add additional HTTP headers if you so desire. If just a URL is provided, htget functions as a simple HTTP/1.1 client. If an additional host argument is provided, that host is contacted instead of the one provided by the URL. Normally, the HTTP response is returned entirely preserved. If the -c option is provided, htget will do simple translation of responses with a chunked Transfer-Encoding; any Transfer-Encoding tag that includes "chunked" will be removed, and the individual chunk sizes will not be displayed. This is useful for passing the HTTP body on to another script. If the -i option is provided, the user is put into interactive mode after a minimal initial set of request headers is sent to the server. Anything the user types is passed to the server as is; enter a blank line or EOT (Ctrl+D on Unix) to send the request on. If no Host header was given via the command line, and the user gives no Host header, one is computed from the URL given. An HTTP proxy can be requested with the -p option. The format of the proxy should be like "http://hostname:3128" (no quotes). The prepended "http://" is optional and, if the port is left out, 3128 is assumed. Note that if a proxy is supplied, it may rewrite the Host header, thus negating the usefulness of the optional real host parameter. To tunnel through an HTTP proxy using the CONNECT method, instead of via regular HTTP proxying, use the -t option. The syntax of the proxy is the same as for the -p option. You can specify -t and -p options, but the CONNECT operation will always come first. To capture the HTTP body in the response to a file, instead of sending it to stdout (similar to F or F), pass a -o option with the filename to save the body to. The file will be truncated if it already exists. Header output will still go to stdout, and errors will continue to be printed to stderr. Passing -o will also force dechunking of response bodies encoded via chunked Transfer-Encoding. =head2 Why use this particular HTTP client tool? This program is not primarily designed to be replacement for existing HTTP client tools like GET, wget, lwp-download, or curl. The following list describes some advantages of using htget over similar tools: =over 4 =item * Barebones operation; very little response interpretation is done, so using htget is more like using nc or telnet to do HTTP debugging than a real "HTTP spec compliant" client =item * Ability to specify a different host to connect to, so you can test a name based VirtualHost without hacking F =item * HTTP proxy tunnelling via CONNECT =back On the other hand, the following are some disadvantages of using htget: =over 4 =item * Not really HTTP spec compliant; for example, htget does not actually parse the content of HTTP response headers (however, this can be an advantage if your goal is to debug a broken HTTP server) =item * Less heavily tested in the outside world than many similar tools =back In other words, the general purpose of htget is for HTTP debugging. =head1 AUTHOR Andrew Ho EFE =cut # ------------------------------------------------------------------------ # Libraries and globals use FindBin; use Socket; use vars qw($ME $USAGE $FULL_USAGE $CRLF $MAX_BUFSIZE); $ME = $FindBin::Script; $USAGE = "usage: $ME [-h] [-c] [-i] [-p proxy] [-t tunnel] [-o file] url [host]\n"; $FULL_USAGE = $USAGE . << "EndUsage"; -h display this help text and exit -c dechunk content encoded via chunked Transfer-Encoding -i enter interactive mode after outputting essential headers -p proxy use this proxy (format http://hostname:3128) -t tunnel tunnel through proxy:port using CONNECT method -o file save HTTP entity body to file out (also enables -c) url the URL identifying the resource to retrieve host actual host to contact (default is to use host from URL) EndUsage $CRLF = "\015\012"; $MAX_BUFSIZE = 1024 * 1024; use vars qw( $URL $HOST $PORT $PATH $REAL_HOST $REAL_PORT $PROXY_HOST $PROXY_PORT $TUNNEL_HOST $TUNNEL_PORT $OUTPUT_FILE $UNCHUNK $INTERACTIVE ); $URL = undef; $HOST = undef; $PORT = 80; $PATH = '/'; $REAL_HOST = $HOST; $REAL_PORT = $PORT; $PROXY_HOST = undef; $PROXY_PORT = undef; $TUNNEL_HOST = undef; $TUNNEL_PORT = undef; $OUTPUT_FILE = undef; $UNCHUNK = 0; $INTERACTIVE = 0; # ------------------------------------------------------------------------ # Parse command line if(!@ARGV) { if(-t STDOUT) { my $fh = select STDOUT; local $| = 1; print 'Enter URL: '; $URL = ; chomp $URL if $URL; select $fh; } } while(@ARGV) { local $_ = shift @ARGV; if(/^\-+h(?:elp)?/i) { print STDERR $FULL_USAGE; exit 0; } elsif(/^\-+c(?:hunk(?:ed)?)?/i) { $UNCHUNK = 1; } elsif(/^\-+i(?:nteractive)?/i) { $INTERACTIVE = 1; } elsif(/^\-+p(?:roxy)?/i) { unless(@ARGV) { print STDERR qq($ME: "-p" option requires a proxy argument\n); print STDERR $USAGE; exit 1; } my $proxy = shift @ARGV; $proxy =~ s|^http://||i; $PROXY_HOST = $1 if $proxy =~ s|^([^:/]+)||; $PROXY_PORT = $proxy =~ s|:(\d+)|| ? $1 : 3128; if($proxy) { print STDERR qq($ME: malformed proxy URL\n); exit 1; } } elsif(/^\-+t(?:unnel)?/i) { unless(@ARGV) { print STDERR qq($ME: "-t" option requires a tunnel argument\n); print STDERR $USAGE; exit 1; } my $tunnel = shift @ARGV; $tunnel =~ s|^http://||i; $TUNNEL_HOST = $1 if $tunnel =~ s|^([^:/]+)||; $TUNNEL_PORT = $tunnel =~ s|:(\d+)|| ? $1 : 3128; if($tunnel) { print STDERR qq($ME: malformed tunnel URL\n); exit 1; } } elsif(/^\-+o(?:ut)?/i) { unless(@ARGV) { print STDERR qq($ME: "-o" option requires a output file argument\n); print STDERR $USAGE; exit 1; } $OUTPUT_FILE = shift @ARGV; if(!defined($OUTPUT_FILE) || $OUTPUT_FILE eq '') { print STDERR qq($ME: malformed or empty output filename\n); exit 1; } } elsif(/^\-/) { print STDERR qq($ME: unrecognized argument "$_"\n); print STDERR $USAGE; exit 1; } else { if(!$URL) { s/\s+//gsm; $URL = $_ if $_ ne ''; } elsif(!$REAL_HOST) { s/\s+//gsm; $REAL_HOST = $_; } else { print STDERR qq($ME: ignoring extra argument "$_"\n); } } } unless($URL) { print STDERR "$ME: no URL provided\n"; exit 1; } $URL =~ s|^http://||i; $HOST = $1 if $URL =~ s|^([^:/]+)||; $PORT = $1 if $URL =~ s|:(\d+)||; $PATH = $URL if $URL; unless($HOST && $PORT && $PATH) { print STDERR "$ME: malformed URL\n"; exit 1; } $REAL_HOST = $HOST unless $REAL_HOST; if($REAL_HOST =~ s/\:(\d+)$//) { $REAL_PORT = $1; } else { $REAL_PORT = $PORT; } # Keep track of last output to stdout, so we can add trailing LF if needed my $last = undef; # ------------------------------------------------------------------------ # Set up network client my $proto = getprotobyname('tcp'); socket(CLIENT, PF_INET, SOCK_STREAM, $proto) || die "$ME: socket error: $!\n"; my $connect_host = $TUNNEL_HOST || $PROXY_HOST || $REAL_HOST; my $addr = inet_aton($connect_host) || die qq($ME: could not resolve host "$connect_host"\n); my $paddr = sockaddr_in($TUNNEL_PORT || $PROXY_PORT || $REAL_PORT, $addr); connect(CLIENT, $paddr) || die "$ME: connect error: $!\n"; # If we tunnelled via CONNECT, re-establish the real connection if($TUNNEL_HOST && $TUNNEL_PORT) { my $connect_host = $PROXY_HOST || $REAL_HOST; my $connect_port = $PROXY_PORT || $REAL_PORT; # Send CONNECT request my $fh = select CLIENT; local $| = 1; print CLIENT 'CONNECT ', $connect_host, ':', $connect_port, ' HTTP/1.0', $CRLF, $CRLF; select $fh; # Read and print CONNECT response headers local $/ = $CRLF; while() { print; $last = $_; last if /^\s*$/; } } # ------------------------------------------------------------------------ # The HTTP transaction itself { my $fh = select STDOUT; local $| = 1; # Output HTTP request line and Connection header my $host_hdr = $PORT == 80 ? $HOST : join ':', $HOST, $PORT; my $path = $PROXY_HOST ? join '', 'http://', $REAL_PORT == 80 ? $REAL_HOST : join(':', $REAL_HOST, $REAL_PORT), $PATH : $PATH ; print CLIENT 'GET ', $path, ' HTTP/1.1', $CRLF; print 'GET ', $path, ' HTTP/1.1', "\n" if $INTERACTIVE; print CLIENT 'Host: ', $host_hdr, $CRLF; print 'Host: ', $host_hdr, "\n" if $INTERACTIVE; print CLIENT 'Connection: close', $CRLF; print 'Connection: close', "\n" if $INTERACTIVE; # If the user requested interactive mode, let them have a chance # to enter additional HTTP headers. if($INTERACTIVE) { my $break = 1; while() { $break = 0; chomp; last if /^\s*$/; print CLIENT $_, $CRLF; $break = 1; } print "\n" if $break; } print CLIENT $CRLF; # Flush the client output so that the transaction goes through. select CLIENT; local $| = 1; select $fh; } # Read response headers $/ = $CRLF; my($content_length, $chunked); while() { $content_length = $1 if /^content\-length\:\s*(\d+)\s*$/i; if(/^transfer\-encoding\:.*\bchunked\b/i) { $chunked = 1; unless($UNCHUNK) { print; $last = $_; } } else { print; $last = $_; } last if /^\s*$/; } # Open output file, if necessary my $out_fh = \*STDOUT; my $tmpfile; if(defined($OUTPUT_FILE) && $OUTPUT_FILE ne '') { $tmpfile = $OUTPUT_FILE . ".tmp.$$"; open my $fh, '>', $tmpfile or die "$ME: could not open $tmpfile for writing: $!\n"; $out_fh = $fh; # If we are capturing to a file, just assume we want to dechunk, too $UNCHUNK = 1; } # Read response entity body if(defined $content_length && $content_length ne '') { # Got Content-Length header, so just read that much my $total_bytes_read = 0; while($total_bytes_read < $content_length) { my $bufsize = $content_length - $total_bytes_read; $bufsize = $MAX_BUFSIZE if $bufsize > $MAX_BUFSIZE; my $buffer = ' ' x $bufsize; my $bytes_read = read CLIENT, $buffer, $bufsize; if($bytes_read != $bufsize) { my $bytes = $bytes_read == 1 ? 'byte' : 'bytes'; warn "$ME: read $bytes_read $bytes, expected $bufsize\n"; } print $out_fh $buffer; $last = $buffer; $total_bytes_read += $bytes_read; } local $/ = undef; if() { my $bytes = $content_length == 1 ? 'byte' : 'bytes'; warn "$ME: read $content_length $bytes, but more content is present\n"; print $out_fh $_; $last = $_; } } elsif($chunked && $UNCHUNK) { # Use and decode chunked Transfer-Encoding (see RFC 2616, 19.4.6) # Read first chunk size my $chunk_size = ; last unless defined $chunk_size; $chunk_size =~ s/\s+$//; $chunk_size = hex $chunk_size; # Read loop while($chunk_size > 0) { my $buffer = ' ' x $chunk_size; my $bytes_read = read CLIENT, $buffer, $chunk_size; if($bytes_read != $chunk_size) { my $bytes = $bytes_read == 1 ? 'byte' : 'bytes'; warn "$ME: read $bytes_read $bytes, but expected $chunk_size\n"; } print $out_fh $buffer; $last = $buffer; # Read next endline my $crlf = ; unless($crlf eq $CRLF) { warn qq($ME: received "$crlf", but expected CRLF); } # Read next chunk size (identical to code above) $chunk_size = ; last unless defined $chunk_size; $chunk_size =~ s/\s+$//; $chunk_size = hex $chunk_size; } # There can be more headers, or a CRLF here while() { last if $_ eq $CRLF; print $out_fh $_; $last = $_; } # Warn if there is more response body if() { warn "$ME: additional content following chunked trailer\n"; print $out_fh $_; $last = $_; while() { print $out_fh $_; $last = $_; } } } else { # Neither Content-Length nor decoding chunked Transfer-Encoding while() { print $out_fh $_; $last = $_; } } close CLIENT; if($tmpfile) { rename($tmpfile, $OUTPUT_FILE) or die "$ME: could not rename $tmpfile to $OUTPUT_FILE: $!\n"; } if(defined $last) { print "\n" unless $tmpfile || $last =~ /\n$/; } else { print STDERR "Connection closed by remote host.\n"; exit 1; } # ------------------------------------------------------------------------ # Clean up and exit exit 0; # ======================================================================== __END__