Added external curl-using HTTP fetch, to actually work.

author: Steve Kemp <steve@steve.org.uk> 2012-11-14 22:31:54 +0000
committer: Steve Kemp <steve@steve.org.uk> 2012-11-14 22:31:54 +0000
commit: 907b1b5d35a9121bc2c2d2e94270ca8a698df1cf (patch)
tree: 01b4565d5fbbfb133fc39d645dfd4211501a2f97 /lib/custodian/webfetch.rb
parent: f055de793c6bac81bf014a7fd6cb0994c5991ebc (diff)
1 files changed, 131 insertions, 0 deletions
diff --git a/lib/custodian/webfetch.rb b/lib/custodian/webfetch.rb
new file mode 100755
index 0000000..5c04b31
--- /dev/null
+++ b/lib/custodian/webfetch.rb
@@ -0,0 +1,131 @@
+#!/usr/bin/ruby1.8
+
+
+require 'tempfile'
+
+
+class WebFetch
+
+  #
+  # The URL & timeout period (in seconds) we were given in the constructor
+  #
+  attr_reader :url, :timeout
+
+  #
+  # The HTTP status code, and content, we received from fetching the URL
+  #
+  attr_reader :status, :text, :error
+
+
+
+  #
+  # Constructor
+  #
+  def initialize( url, timeout = 10 )
+    @url     = url
+    @timeout = timeout
+
+    # defaults
+    @status  = -1
+    @error   = ""
+    @text    = ""
+  end
+
+
+
+  #
+  # Perform the fetch.
+  #
+  # Return true on success.
+  #
+  def fetch
+
+    #
+    # Generate a temporary file to contain the header from the server.
+    #
+    tmp_head = Tempfile.new('curl-header')
+    head     = tmp_head.path
+
+    #
+    # Generate a temporary file to contain the body from the server.
+    #
+    tmp_body = Tempfile.new('curl-body')
+    body     = tmp_body.path
+
+    #
+    # Shell out to curl (!!!) to do the fetch.
+    #
+    system( "curl --max-time #{timeout} --silent --location --insecure --dump-header #{head} --out #{body} --silent #{@url}")
+
+
+    #
+    # If both files are size zero then we clearly failed.
+    #
+    if ( ( File.size( body ) == 0 ) ||
+         ( File.size( head ) == 0 ) )
+
+      #
+      # Cleanup
+      #
+      File.unlink( body ) if ( File.exists?( body ) )
+      File.unlink( head ) if ( File.exists?( head ) )
+
+      #
+      # Save the error.
+      #
+      @error = "Fetch failed"
+      return false
+    end
+
+
+    #
+    #  Get the HTTP status code, by parsing the HTTP headers.
+    #
+    File.open( head, "r").each_line do |line|
+      if ( line =~ /HTTP\/[0-9]\.[0-9]\s+([0-9]+)\s+/ )
+        @status = $1.dup
+      end
+    end
+
+    #
+    #  Get the body from the server, by parsing the temporary file.
+    #
+    File.open( body, "r").each_line do |line|
+      @text << line
+    end
+
+    #
+    #  Cleanup.  We're done.
+    #
+    File.unlink( body ) if ( File.exists?( body ) )
+    File.unlink( head ) if ( File.exists?( head ) )
+
+    return true
+  end
+
+
+  #
+  # Return the HTTP status code the server responded with, if the
+  # fetch was successful.
+  #
+  def status
+    @status
+  end
+
+  #
+  # Return the HTTP content the server responded with, if the
+  # fetch was successful.
+  #
+  def content
+    @text
+  end
+
+  #
+  # Return the error, if the fetch failed.
+  #
+  def error
+    @error
+  end
+
+end
+
author	Steve Kemp <steve@steve.org.uk>	2012-11-14 22:31:54 +0000
committer	Steve Kemp <steve@steve.org.uk>	2012-11-14 22:31:54 +0000
commit	907b1b5d35a9121bc2c2d2e94270ca8a698df1cf (patch)
tree	01b4565d5fbbfb133fc39d645dfd4211501a2f97 /lib/custodian/webfetch.rb
parent	f055de793c6bac81bf014a7fd6cb0994c5991ebc (diff)