summaryrefslogtreecommitdiff
path: root/lib/custodian.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/custodian.rb')
-rw-r--r--lib/custodian.rb218
1 files changed, 218 insertions, 0 deletions
diff --git a/lib/custodian.rb b/lib/custodian.rb
new file mode 100644
index 0000000..79fca72
--- /dev/null
+++ b/lib/custodian.rb
@@ -0,0 +1,218 @@
+
+
+#
+# Standard modules
+#
+require 'beanstalk-client'
+require 'json'
+require 'logger'
+
+
+
+#
+# Implementation of our protocol tests.
+#
+require 'custodian/alerter.rb'
+require 'custodian/protocol-tests/dns.rb'
+require 'custodian/protocol-tests/ftp.rb'
+require 'custodian/protocol-tests/http.rb'
+require 'custodian/protocol-tests/https.rb'
+require 'custodian/protocol-tests/jabber.rb'
+require 'custodian/protocol-tests/ldap.rb'
+require 'custodian/protocol-tests/ping.rb'
+require 'custodian/protocol-tests/rsync.rb'
+require 'custodian/protocol-tests/smtp.rb'
+require 'custodian/protocol-tests/ssh.rb'
+
+
+
+
+
+
+
+
+
+#
+# This class contains the code for connecting to a Beanstalk queue,
+# fetching tests from it, and executing them
+#
+class Custodian
+
+ #
+ # The beanstalk queue.
+ #
+ attr_reader :queue
+
+ #
+ # How many times we re-test before we detect a failure
+ #
+ attr_reader :retry_count
+
+ #
+ # The log-file object
+ #
+ attr_reader :logger
+
+ #
+ # Constructor: Connect to the queue
+ #
+ def initialize( server, logfile )
+
+ # Connect to the queue
+ @queue = Beanstalk::Pool.new([server])
+
+ # Instantiate the logger.
+ @logger = Logger.new( logfile, "daily" )
+
+ if ( ENV['REPEAT'] )
+ @retry_count=ENV['REPEAT'].to_i
+ else
+ @retry_count=5
+ end
+
+ log_message( "We'll run each test #{@retry_count} before alerting failures." )
+ end
+
+
+ #
+ # Write the given message to our logfile - and show it to the console
+ # if we're running with '--verbose' in play
+ #
+ def log_message( msg )
+ @logger.info( msg )
+ puts msg if ( ENV['VERBOSE'] )
+ end
+
+
+
+ #
+ # Process jobs from the queue - never return.
+ #
+ def run!
+ while( true )
+ log_message( "\n" )
+ log_message( "\n" )
+ log_message( "Waiting for job.." )
+ process_single_job()
+ end
+ end
+
+
+
+ #
+ # Fetch a single job from the queue, and process it.
+ #
+ def process_single_job
+
+ begin
+ job = @queue.reserve()
+
+ log_message( "Job aquired - Job ID : #{job.id}" )
+
+
+ #
+ # Parse the JSON of the job body.
+ #
+ json = job.body
+ hash = JSON.parse( json )
+ hash['verbose'] = 1 if ( ENV['VERBOSE'] )
+
+
+ #
+ # Output the details.
+ #
+ log_message( "Job body contains the following keys & values:")
+ hash.keys.each do |key|
+ log_message( " #{key} => #{hash[key]}" )
+ end
+
+
+
+ #
+ # Did the test succeed? If not count the number of times it failed in
+ # a row. We'll repeat several times
+ #
+ success = false
+ count = 0
+
+ #
+ # As a result of this test we'll either raise/clear with mauve.
+ #
+ # This helper will do that job.
+ #
+ alert = Alerter.new( hash )
+
+
+ #
+ # Convert the test-type to a class name, to do the protocol test.
+ #
+ # Given a test-type "foo" we'll attempt to instantiate a class called FOOTest.
+ #
+ test = hash['test_type']
+ clazz = test.upcase
+ clazz = "#{clazz}Test"
+
+
+ #
+ # Create the test object.
+ #
+ obj = eval(clazz).new( hash )
+
+
+ #
+ # Ensure that the object we load implements the two methods
+ # we expect.
+ #
+ if ( ( ! obj.respond_to?( "error") ) ||
+ ( ! obj.respond_to?( "run_test" ) ) )
+ puts "Class #{clazz} doesn't implement the full protocol-test API"
+ end
+
+
+
+ #
+ # We'll run no more than MAX times.
+ #
+ # We stop the execution on a single success.
+ #
+ while ( ( count < @retry_count ) && ( success == false ) )
+
+ log_message( "Running test - attempt #{count}" )
+
+ if ( obj.run_test() )
+ log_message( "Test succeeed - clearing alert" )
+ success = true
+ alert.clear()
+ end
+ count += 1
+ end
+
+ #
+ # If we didn't succeed on any of the attempts raise the alert.
+ #
+ if ( ! success )
+
+ #
+ # Raise the alert, passing the error message.
+ #
+ log_message( "Test failed - alerting with #{obj.error()}" )
+ alert.raise( obj.error() )
+ end
+
+ rescue => ex
+ puts "Exception raised processing job: #{ex}"
+
+ ensure
+ #
+ # Delete the job - either we received an error, in which case
+ # we should remove it to avoid picking it up again, or we handled
+ # it successfully so it should be removed.
+ #
+ log_message( "Job ID : #{job.id} - Removed" )
+ job.delete if ( job )
+ end
+ end
+end
+
+
+