#!/usr/bin/ruby
=begin
Purpose:
Performs HTTP trace of a given urls. It's useful if you want to
trace through set of pages while keeping state (cookies).
Author: Wejn <wejn at box dot cz>
License: GPLv2 (without the "latter" option)
Requires: Ruby >= 1.8
TS: 20080304202000
Example:
Let's say you need to track what happens when you click through
some affiliate link and then what happens when you go to "order"
page.
You call this script like this:
./trace-http http://link.$OC/link.e/s28119/ph/main2/ http://link.$OC/join.e/ph
and get result like this:
*** Tracing URL: http://link.$OC/link.e/s28119/ph/main2/ ***
Req: http://link.$OC/link.e/s28119/ph/main2/
Sets cookie: referreR s28119%2Fph%2Fmain2
Sets cookie: affil_id s28119
Redirect 302 -> http://click.$OC:8080/ct?id=28119&bn=2202&url=s28119/ph/main2
Req: http://click.$OC:8080/ct?id=28119&bn=2202&url=s28119/ph/main2
Sets cookie: ezpartner 28119:2202:0:1199293054:
Redirect 302 -> http://link.$OC/tp.e/s28119/ph/main2
Req: http://link.$OC/tp.e/s28119/ph/main2
Redirect 302 -> http://$PH/preview/
Req: http://$PH/preview/
Redirect 302 -> http://pvw.$PH/preview/
Req: http://pvw.$PH/preview/
Success 200, body: 20935 bytes.
*** Tracing URL: http://link.$OC/join.e/ph ***
Req: http://link.$OC/join.e/ph
Redirect 302 -> http://click.$OC:8080/signup?link=http://link.$OC/joincb.e?site=ph
Req: http://click.$OC:8080/signup?link=http://link.$OC/joincb.e?site=ph
Redirect 302 -> http://link.$OC/joincb.e?site=ph&webmaster=28119:54802
Req: http://link.$OC/joincb.e?site=ph&webmaster=28119:54802
Redirect 302 -> https://secure.$VXS/ezbill.php3?methods=1&site=593&webmaster=28119:54802&user_1=yb:e2cac15a&user_2=czI4MTE5L3BoL21haW4y
Req: https://secure.$VXS/ezbill.php3?methods=1&site=593&webmaster=28119:54802&user_1=yb:e2cac15a&user_2=czI4MTE5L3BoL21haW4y
Success 200, body: 13669 bytes.
*** The end ***
which makes it VERY easy to see what's going on.
=end
require 'net/https'
require 'net/http'
require 'uri'
require 'cgi'
require 'parsedate'
# Sanity check # {{{1
if ARGV.size < 1
$stderr.puts "Usage: #{File.basename($0)} <url>+"
exit 1
end
# }}}1
uris = []
# verify urls # {{{1
for url in ARGV
url += '/' unless url.index('/')
url = "http://" + url unless url =~ /^(f|ht)tps?:\/\//
uri = URI.parse(url)
unless uri.kind_of?(URI::HTTP) || uri.kind_of?(URI::HTTPS)
$stderr.puts "Not a http style url"
exit 1
end
uris << uri
end
# }}}1
# Simple cookie jar implementation, enabling to simulate sessions in http
class CookieJar # {{{1
def initialize
@jar = Hash.new
end
def set_cookie(cookie, domain)
vals = cookie.split(/;\s*/)
vals.map! { |x| x.split(/=/,2) }
name, value = vals.shift
# XXX: Gaping security hole -- we allow cross-domain cookie setting
domain = vals.assoc("domain").last if vals.assoc("domain")
path = (vals.assoc("path") || []).last
expires = (vals.assoc("expires") || []).last
secure = (vals.assoc("secure") || []).last
@jar[domain] ||= {}
cook = { "name" => name, "value" => value }
cook["path"] = path if path
cook["domain"] = domain if domain
cook["expires"] = Time.gm(*ParseDate.parsedate(expires)[0,6]) if expires
cook["secure"] = secure if secure
@jar[domain][name] = cook
end
def cookies_for(domain, path)
cookies = {}
parts = domain.split(".").reverse
# get cookies
1.upto(parts.size) do |i|
d = parts[0,i].reverse.join('.')
d = "." + d unless d == domain
for name, cook in (@jar[d] || {})
# domain ok
if !cook["expires"] || cook["expires"] > Time.now
# not expired
if !cook["path"] || (path.index(cook["path"]) || 1).zero?
# path matches
# XXX: we ignore "secure" flag
cookies[name] = cook
end
end
end
end
# output
out = []
for name, cook in cookies
out << name + "=" + cook["value"]
end
out.join("; ")
end
end # }}}1
cj = CookieJar.new
# do the trace(s) # {{{1
for uri in uris
puts
puts "*** Tracing URL: #{uri.to_s} ***"
puts
loop do
puts "Req: #{uri.to_s}"
req = uri.path
req += "?" + uri.query if uri.query
http = Net::HTTP.new(uri.host, uri.port)
if uri.kind_of?(URI::HTTPS)
http.use_ssl = true
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
end
headers = { "User-Agent" => "TraceHTTP/0.1 (X11; U; Linux i686; en-US; rv:0.0.1) coded by Wejn" }
if cookies = cj.cookies_for(uri.host, uri.path)
headers['Cookie'] = cookies
end
resp = http.get(req, headers)
cook = resp.get_fields("Set-Cookie")
if cook && !cook.empty?
cook.each do |v|
cj.set_cookie(v, uri.host)
puts " Sets cookie: #{v.sub(/;.*/,'').sub(/=/, ' ')}"
end
end
case resp
when Net::HTTPSuccess
puts " Success #{resp.code}, body: #{resp.body.to_s.size} bytes."
break
when Net::HTTPRedirection
puts " Redirect #{resp.code} -> #{resp['Location']}"
puts
if resp['Location'] =~ /^(f|ht)tps?:\/\//
uri = URI.parse(resp['Location'])
else
pfx = uri.scheme + "://" + uri.host
pfx += ":" + uri.port.to_s unless uri.default_port == uri.port
if resp['Location'][0,1] == '/'
uri = URI.parse(pfx + resp['Location'])
else
if uri.path.rindex('/')
pth = uri.path[(0...uri.path.rindex('/'))]
else
pth = '/'
end
uri = URI.parse(pfx + pth + resp['Location'])
end
end
when Net::HTTPClientError
puts " Client Error #{resp.code}."
break
when Net::HTTPServerError
puts " Server Error #{resp.code}."
break
else
puts "Unknown response (WTF?): #{resp.code}."
break
end
end
end
# }}}1
puts
puts "*** The end ***"