#!/usr/bin/ruby1.8
require 'tempfile'
#
# This is a class which allows a remote HTTP/HTTPS page to be downloaded
# it allows both the content and the HTTP status-code to be retrieved assuming
# a success was made.
#
# This code is *horrificly* bad, but required because net/http doesn't honour
# timouts under certain circumstances. I'm not proud of this code.
#
# Steve
# --
#
class WebFetch
#
# The URL & timeout period (in seconds) we were given in the constructor
#
attr_reader :url, :timeout
#
# The HTTP status code, and content, we received from fetching the URL
#
attr_reader :status, :text
#
# An error to return to the caller, on failure
#
attr_reader :error
#
# Constructor
#
def initialize( url, timeout = 10 )
@url = url
@timeout = timeout
# defaults
@status = -1
@error = ""
@text = ""
end
#
# Perform the fetch of the remote URL. Return "true" on success.
#
def fetch
#
# Generate a temporary file to contain the header from the server.
#
tmp_head = Tempfile.new('curl-header')
head = tmp_head.path
#
# Generate a temporary file to contain the body from the server.
#
tmp_body = Tempfile.new('curl-body')
body = tmp_body.path
#
# Shell out to curl (!!!) to do the fetch.
#
# Avoid using the actual shell to avoid a security risk
#
system( "curl",
"--max-time",
timeout.to_s,
"--silent",
"--location",
"--insecure",
"--dump-header",
head,
"--out",
body,
"--silent",
@url )
#
# If the header was empty then we're a failure.
#
# (A body might be legitimately empty.)
#
if ( File.size( head ) == 0 )
#
# Cleanup
#
File.unlink( body ) if ( File.exists?( body ) )
File.unlink( head ) if ( File.exists?( head ) )
#
# Store the error.
#
@error = "Fetch of #{@url} failed"
return false
end
#
# Get the HTTP status code, by parsing the HTTP headers.
#
# NOTE: We will replace the code with later ones - this gives
# the status code *after* any potential redirection(s) have
# completed.
#
File.open( head, "r").each_line do |line|
if ( line =~ /HTTP\/[0-9]\.[0-9]\s+([0-9]+)\s+/ )
@status = $1.dup
end
end
#
# Get the body from the server, by parsing the temporary file.
#
File.open( body, "r").each_line do |line|
@text << line
end
#
# Cleanup. We're done.
#
File.unlink( body ) if ( File.exists?( body ) )
File.unlink( head ) if ( File.exists?( head ) )
return true
end
#
# Return the HTTP status code the server responded with, if the fetch was successful.
#
def status
@status
end
#
# Return the HTTP content the server responded with, if the fetch was successful.
#
def content
@text
end
#
# Return the error, if the fetch failed.
#
def error
@error
end
end