diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/custodian.rb | 218 | ||||
-rw-r--r-- | lib/custodian/alerter.rb | 76 |
2 files changed, 294 insertions, 0 deletions
diff --git a/lib/custodian.rb b/lib/custodian.rb new file mode 100644 index 0000000..79fca72 --- /dev/null +++ b/lib/custodian.rb @@ -0,0 +1,218 @@ + + +# +# Standard modules +# +require 'beanstalk-client' +require 'json' +require 'logger' + + + +# +# Implementation of our protocol tests. +# +require 'custodian/alerter.rb' +require 'custodian/protocol-tests/dns.rb' +require 'custodian/protocol-tests/ftp.rb' +require 'custodian/protocol-tests/http.rb' +require 'custodian/protocol-tests/https.rb' +require 'custodian/protocol-tests/jabber.rb' +require 'custodian/protocol-tests/ldap.rb' +require 'custodian/protocol-tests/ping.rb' +require 'custodian/protocol-tests/rsync.rb' +require 'custodian/protocol-tests/smtp.rb' +require 'custodian/protocol-tests/ssh.rb' + + + + + + + + + +# +# This class contains the code for connecting to a Beanstalk queue, +# fetching tests from it, and executing them +# +class Custodian + + # + # The beanstalk queue. + # + attr_reader :queue + + # + # How many times we re-test before we detect a failure + # + attr_reader :retry_count + + # + # The log-file object + # + attr_reader :logger + + # + # Constructor: Connect to the queue + # + def initialize( server, logfile ) + + # Connect to the queue + @queue = Beanstalk::Pool.new([server]) + + # Instantiate the logger. + @logger = Logger.new( logfile, "daily" ) + + if ( ENV['REPEAT'] ) + @retry_count=ENV['REPEAT'].to_i + else + @retry_count=5 + end + + log_message( "We'll run each test #{@retry_count} before alerting failures." ) + end + + + # + # Write the given message to our logfile - and show it to the console + # if we're running with '--verbose' in play + # + def log_message( msg ) + @logger.info( msg ) + puts msg if ( ENV['VERBOSE'] ) + end + + + + # + # Process jobs from the queue - never return. + # + def run! + while( true ) + log_message( "\n" ) + log_message( "\n" ) + log_message( "Waiting for job.." ) + process_single_job() + end + end + + + + # + # Fetch a single job from the queue, and process it. + # + def process_single_job + + begin + job = @queue.reserve() + + log_message( "Job aquired - Job ID : #{job.id}" ) + + + # + # Parse the JSON of the job body. + # + json = job.body + hash = JSON.parse( json ) + hash['verbose'] = 1 if ( ENV['VERBOSE'] ) + + + # + # Output the details. + # + log_message( "Job body contains the following keys & values:") + hash.keys.each do |key| + log_message( " #{key} => #{hash[key]}" ) + end + + + + # + # Did the test succeed? If not count the number of times it failed in + # a row. We'll repeat several times + # + success = false + count = 0 + + # + # As a result of this test we'll either raise/clear with mauve. + # + # This helper will do that job. + # + alert = Alerter.new( hash ) + + + # + # Convert the test-type to a class name, to do the protocol test. + # + # Given a test-type "foo" we'll attempt to instantiate a class called FOOTest. + # + test = hash['test_type'] + clazz = test.upcase + clazz = "#{clazz}Test" + + + # + # Create the test object. + # + obj = eval(clazz).new( hash ) + + + # + # Ensure that the object we load implements the two methods + # we expect. + # + if ( ( ! obj.respond_to?( "error") ) || + ( ! obj.respond_to?( "run_test" ) ) ) + puts "Class #{clazz} doesn't implement the full protocol-test API" + end + + + + # + # We'll run no more than MAX times. + # + # We stop the execution on a single success. + # + while ( ( count < @retry_count ) && ( success == false ) ) + + log_message( "Running test - attempt #{count}" ) + + if ( obj.run_test() ) + log_message( "Test succeeed - clearing alert" ) + success = true + alert.clear() + end + count += 1 + end + + # + # If we didn't succeed on any of the attempts raise the alert. + # + if ( ! success ) + + # + # Raise the alert, passing the error message. + # + log_message( "Test failed - alerting with #{obj.error()}" ) + alert.raise( obj.error() ) + end + + rescue => ex + puts "Exception raised processing job: #{ex}" + + ensure + # + # Delete the job - either we received an error, in which case + # we should remove it to avoid picking it up again, or we handled + # it successfully so it should be removed. + # + log_message( "Job ID : #{job.id} - Removed" ) + job.delete if ( job ) + end + end +end + + + diff --git a/lib/custodian/alerter.rb b/lib/custodian/alerter.rb new file mode 100644 index 0000000..751b368 --- /dev/null +++ b/lib/custodian/alerter.rb @@ -0,0 +1,76 @@ + + +require 'mauve/sender' +require 'mauve/proto' + + + +# +# This class encapsulates the raising and clearing of alerts via Mauve. +# +class Alerter + + attr_reader :details + + def initialize( test_details ) + @details = test_details + end + + + # + # Raise the alert. + # + def raise( detail ) + + update = Mauve::Proto::AlertUpdate.new + update.alert = [] + update.source = "custodian" + + # be explicit about raising/clearing + update.replace = false + + alert = Mauve::Proto::Alert.new + + # e.g. ping-example.vm.bytemark.co.uk + # e.g. http-http://example.com/page1 + alert.id = "#{@details['test_type']}-#{@details['target_host']}" + + alert.subject = @details['target_host'] + alert.summary = @details['test_alert'] + alert.detail = "The #{@details['test_type']} test failed against #{@details['target_host']}: #{detail}" + alert.raise_time = Time.now.to_i + update.alert << alert + + Mauve::Sender.new("alert.bytemark.co.uk").send(update) + + end + + # + # Clear the alert. + # + def clear + + update = Mauve::Proto::AlertUpdate.new + update.alert = [] + update.source = "custodian" + + # be explicit about raising/clearing + update.replace = false + + + alert = Mauve::Proto::Alert.new + + # e.g. ping-example.vm.bytemark.co.uk + # e.g. http-http://example.com/page1 + alert.id = "#{@details['test_type']}-#{@details['target_host']}" + + alert.subject = @details['target_host'] + alert.summary = @details['test_alert'] + alert.detail = "The #{@details['test_type']} test succeeded against #{@details['target_host']}" + alert.clear_time = Time.now.to_i + update.alert << alert + + Mauve::Sender.new("alert.bytemark.co.uk").send(update) + end + +end |