#!/usr/bin/ruby

=begin
Purpose:
    Performs HTTP trace of a given urls. It's useful if you want to
    trace through set of pages while keeping state (cookies).
Author: Wejn <wejn at box dot cz>
License: GPLv2 (without the "latter" option)
Requires: Ruby >= 1.8
TS: 20080304202000

Example:

Let's say you need to track what happens when you click through
some affiliate link and then what happens when you go to "order"
page.

You call this script like this:

./trace-http http://link.$OC/link.e/s28119/ph/main2/ http://link.$OC/join.e/ph

and get result like this:

*** Tracing URL: http://link.$OC/link.e/s28119/ph/main2/ ***

Req: http://link.$OC/link.e/s28119/ph/main2/
  Sets cookie: referreR s28119%2Fph%2Fmain2
  Sets cookie: affil_id s28119
  Redirect 302 -> http://click.$OC:8080/ct?id=28119&bn=2202&url=s28119/ph/main2

Req: http://click.$OC:8080/ct?id=28119&bn=2202&url=s28119/ph/main2
  Sets cookie: ezpartner 28119:2202:0:1199293054:
  Redirect 302 -> http://link.$OC/tp.e/s28119/ph/main2

Req: http://link.$OC/tp.e/s28119/ph/main2
  Redirect 302 -> http://$PH/preview/

Req: http://$PH/preview/
  Redirect 302 -> http://pvw.$PH/preview/

Req: http://pvw.$PH/preview/
  Success 200, body: 20935 bytes.

*** Tracing URL: http://link.$OC/join.e/ph ***

Req: http://link.$OC/join.e/ph
  Redirect 302 -> http://click.$OC:8080/signup?link=http://link.$OC/joincb.e?site=ph

Req: http://click.$OC:8080/signup?link=http://link.$OC/joincb.e?site=ph
  Redirect 302 -> http://link.$OC/joincb.e?site=ph&webmaster=28119:54802

Req: http://link.$OC/joincb.e?site=ph&webmaster=28119:54802
  Redirect 302 -> https://secure.$VXS/ezbill.php3?methods=1&site=593&webmaster=28119:54802&user_1=yb:e2cac15a&user_2=czI4MTE5L3BoL21haW4y

Req: https://secure.$VXS/ezbill.php3?methods=1&site=593&webmaster=28119:54802&user_1=yb:e2cac15a&user_2=czI4MTE5L3BoL21haW4y
  Success 200, body: 13669 bytes.

*** The end ***

which makes it VERY easy to see what's going on.
=end

require 'net/https'
require 'net/http'
require 'uri'
require 'cgi'
require 'parsedate'

# Sanity check # {{{1
if ARGV.size < 1
    $stderr.puts "Usage: #{File.basename($0)} <url>+"
    exit 1
end
# }}}1

uris = []

# verify urls # {{{1
for url in ARGV
    url += '/' unless url.index('/')
    url = "http://" + url unless url =~ /^(f|ht)tps?:\/\//

    uri = URI.parse(url)

    unless uri.kind_of?(URI::HTTP) || uri.kind_of?(URI::HTTPS)
        $stderr.puts "Not a http style url"
        exit 1
    end

    uris << uri
end
# }}}1

# Simple cookie jar implementation, enabling to simulate sessions in http
class CookieJar # {{{1
    def initialize
        @jar = Hash.new
    end

    def set_cookie(cookie, domain)
        vals = cookie.split(/;\s*/)
        vals.map! { |x| x.split(/=/,2) }
        name, value = vals.shift

        # XXX: Gaping security hole -- we allow cross-domain cookie setting
        domain = vals.assoc("domain").last if vals.assoc("domain")

        path = (vals.assoc("path") || []).last
        expires = (vals.assoc("expires") || []).last
        secure = (vals.assoc("secure") || []).last

        @jar[domain] ||= {}

        cook = { "name" => name, "value" => value }
        cook["path"] = path if path
        cook["domain"] = domain if domain
        cook["expires"] = Time.gm(*ParseDate.parsedate(expires)[0,6]) if expires
        cook["secure"] = secure if secure

        @jar[domain][name] = cook
    end

    def cookies_for(domain, path)
        cookies = {}

        parts = domain.split(".").reverse

        # get cookies
        1.upto(parts.size) do |i|
            d = parts[0,i].reverse.join('.')
            d = "." + d unless d == domain

            for name, cook in (@jar[d] || {})
                # domain ok
                if !cook["expires"] || cook["expires"] > Time.now
                    # not expired
                    if !cook["path"] || (path.index(cook["path"]) || 1).zero?
                        # path matches
                        # XXX: we ignore "secure" flag
                        cookies[name] = cook
                    end
                end
            end
        end

        # output
        out = []
        for name, cook in cookies
            out << name + "=" + cook["value"]
        end
        out.join("; ")
    end
end # }}}1

cj = CookieJar.new

# do the trace(s) # {{{1
for uri in uris
    puts
    puts "*** Tracing URL: #{uri.to_s} ***"
    puts

    loop do
        puts "Req: #{uri.to_s}"
        req = uri.path
        req += "?" + uri.query if uri.query
        http = Net::HTTP.new(uri.host, uri.port)
        if uri.kind_of?(URI::HTTPS)
            http.use_ssl = true
            http.verify_mode = OpenSSL::SSL::VERIFY_NONE
        end

        headers = { "User-Agent" => "TraceHTTP/0.1 (X11; U; Linux i686; en-US; rv:0.0.1) coded by Wejn" }

        if cookies = cj.cookies_for(uri.host, uri.path)
            headers['Cookie'] = cookies
        end

        resp = http.get(req, headers)
        cook = resp.get_fields("Set-Cookie")

        if cook && !cook.empty?
            cook.each do |v|
                cj.set_cookie(v, uri.host)
                puts "  Sets cookie: #{v.sub(/;.*/,'').sub(/=/, ' ')}"
            end
        end

        case resp
        when Net::HTTPSuccess
            puts "  Success #{resp.code}, body: #{resp.body.to_s.size} bytes."
            break
        when Net::HTTPRedirection
            puts "  Redirect #{resp.code} -> #{resp['Location']}"
            puts
            if resp['Location'] =~ /^(f|ht)tps?:\/\//
                uri = URI.parse(resp['Location'])
            else
                pfx = uri.scheme + "://" + uri.host
                pfx += ":" + uri.port.to_s unless uri.default_port == uri.port
                if resp['Location'][0,1] == '/'
                    uri = URI.parse(pfx + resp['Location'])
                else
                    if uri.path.rindex('/')
                        pth = uri.path[(0...uri.path.rindex('/'))]
                    else
                        pth = '/'
                    end
                    uri = URI.parse(pfx + pth + resp['Location'])
                end
            end
        when Net::HTTPClientError
            puts "  Client Error #{resp.code}."
            break
        when Net::HTTPServerError
            puts "  Server Error #{resp.code}."
            break
        else
            puts "Unknown response (WTF?): #{resp.code}."
            break
        end
    end

end
# }}}1

puts
puts "*** The end ***"