3 files changed, 916 insertions, 0 deletions
diff --git a/bin/multi-ping b/bin/multi-ping
new file mode 100755
index 0000000..0d2add5
--- /dev/null
+++ b/bin/multi-ping
@@ -0,0 +1,161 @@
+#!/usr/bin/ruby1.8 -w
+#
+# NAME
+#   multi-ping - IPv4 and IPv6 ping tool
+#
+# SYNOPSIS
+#  multi-ping  [ -h | --help ] [-m | --manual] hostname1
+#
+# OPTIONS
+#
+#  -h, --help              Show a help message, and exit.
+#
+#  -m, --manual            Show this manual, and exit.
+#
+#  -v, --verbose           Show verbose errors
+#
+# ABOUT
+#
+#  The multi-ping tool is designed to be IPv4/IPv6-agnostic ping tool,
+# which removes the need to know if you're pinging an IPv4 host or an
+# IPv6 host.
+#
+#  The tool works by resolving the hostname specified upon the command line,
+# and invoking "ping" or "ping6" upon the result - using the correct one for
+# the address which has been returned.
+#
+# AUTHOR
+#
+#  Steve Kemp  <steve@bytemark.co.uk>
+#
+
+
+require 'getoptlong'
+require 'socket'
+
+
+
+#
+#  Options set by the command-line.  These are all global.
+#
+$help   = false
+$manual = false
+
+opts = GetoptLong.new(
+                      [ '--help',   '-h', GetoptLong::NO_ARGUMENT ],
+                      [ '--manual', '-m', GetoptLong::NO_ARGUMENT ] )
+
+begin
+  opts.each do |opt,arg|
+    case opt
+    when '--help'
+      $help = true
+    when '--manual'
+      $manual = true
+    end
+  end
+rescue => err
+  # any errors, show the help
+  warn err.to_s
+  $help = true
+end
+
+
+#
+# CAUTION! Here be quality kode.
+#
+if $manual or $help
+
+  # Open the file, stripping the shebang line
+  lines = File.open(__FILE__){|fh| fh.readlines}[1..-1]
+
+  found_synopsis = false
+
+  lines.each do |line|
+
+    line.chomp!
+    break if line.empty?
+
+    if $help and !found_synopsis
+      found_synopsis = (line =~ /^#\s+SYNOPSIS\s*$/)
+      next
+    end
+
+    puts line[2..-1].to_s
+
+    break if $help and found_synopsis and line =~ /^#\s*$/
+
+  end
+
+  exit 0
+end
+
+
+
+
+#
+#  Get the address to ping.
+#
+hostname = ARGV.shift
+
+#
+#  If we have no host then abort
+#
+if ( hostname.nil? )
+  puts "Usage: #{$0} hostname"
+  exit 1
+end
+
+
+#
+#  The IP we'll deal with
+#
+ip = nil
+
+
+#
+#  Lookup the IP, catching any exception
+#
+begin
+  Socket.getaddrinfo(hostname, 'echo').each do |a|
+    ip = a[3]
+  end
+rescue SocketError
+  puts "Failed to resolve: #{hostname}"
+  exit 1
+end
+
+
+#
+#  Was the result an IPv4 address?
+#
+if ( ip =~ /^([0-9]+).([0-9]+).([0-9]+).([0-9]+)$/ )
+
+  #
+  #  If so invoke "ping"
+  #
+  if ( system( "ping -c 1 #{ip} 2>/dev/null >/dev/null" ) == true )
+    puts "#{hostname} alive."
+    exit 0
+  else
+    puts "ping4 failed - #{hostname} [#{ip}]"
+    exit 1
+  end
+elsif ( ip =~ /2001/ )
+
+  #
+  #  Was the result an IPv6 address?
+  #
+  if ( system( "ping6 -c 1 -w1 #{ip} 2>/dev/null >/dev/null" ) == true )
+    puts "#{hostname} alive."
+    exit 0
+  else
+    puts "ping6 failed - #{hostname} [#{ip}]"
+    exit 1
+  end
+end
+
+
+#
+#  All done.
+#
diff --git a/bin/parser.rb b/bin/parser.rb
new file mode 100755
index 0000000..e0ce4ba
--- /dev/null
+++ b/bin/parser.rb
@@ -0,0 +1,388 @@
+#!/usr/bin/ruby
+#
+# Notes
+#
+#  Macros may be defined either literally, or as a result of a HTTP-fetch.
+# Macro names match the pattern "^[0-9A-Z_]$"
+#
+#
+
+
+require 'beanstalk-client'
+require 'getoptlong'
+require 'json'
+
+
+
+
+
+
+
+
+#
+# This is a simple class which will parse a sentinel configuration file.
+#
+# Unlike Sentinel it is not using a real parser, instead it peels off lines
+# via a small number of very simple regular expressions - this should be flaky,
+# but in practice it manages to successfully parse each of the configuration
+# files that we currently maintain @ Bytemark.
+#
+# TODO:
+#
+# 1.  Explicitly abort and panic on malformed lines.
+#
+# 2.  Implement HTTP-fetching for macro-bodies.
+#
+#
+# Steve
+# --
+#
+class MonitorConfig
+
+  #
+  # A hash of macros we found.
+  #
+  attr_reader :MACROS
+
+  #
+  # A handle to the beanstalkd queue.
+  #
+  attr_reader :queue
+
+  #
+  # The filename that we're going to parse.
+  #
+  attr_reader :filename
+
+
+
+
+  #
+  # Constructor
+  #
+  def initialize( filename )
+    @MACROS = Hash.new()
+    @queue  = Beanstalk::Pool.new(['127.0.0.1:11300'])
+    @file   = filename
+
+    if ( @file.nil? || ( ! File.exists?( @file) ) )
+      raise ArgumentError, "Missing configuration file!"
+    end
+  end
+
+
+
+  #
+  #  Define a macro, from the configuration file.
+  #
+  def define_macro( line )
+    name = nil
+    val  = Array.new
+
+    #
+    #  Get the name
+    #
+    name = $1.dup if ( line =~ /^([A-Z_]+)\s+/ )
+
+
+    #
+    #  Get the value
+    #
+    if ( line =~ /fetched\s+from\s+(.*)[\r\n\.]*$/ )
+
+      #
+      #  HTTP-fetch
+      #
+      val.push( "steve")
+      val.push("kemp")
+
+    elsif ( line =~ /\s(is|are)\s+(.*)\.+$/ )
+
+      #
+      #  Literal list.
+      #
+      tmp = $2.dup.split( /\s+and\s+/ )
+      tmp.each do |entry|
+        val.push( entry )
+      end
+
+    end
+
+    @MACROS[name] = val
+  end
+
+
+
+
+  #
+  #  Return a hash of our current macro-definitions.
+  #
+  #  This is used only by the test-suite.
+  #
+  def macros
+    @MACROS
+  end
+
+
+
+
+  #
+  # Is the given string of text a macro?
+  #
+  def is_macro?( name )
+    !@MACROS[name].nil?
+  end
+
+
+
+  #
+  # Return an array of hosts if the given string was a macro identifier.
+  #
+  def get_macro_targets( name )
+    @MACROS[name]
+  end
+
+
+
+
+  #
+  # Parse a single line from the configuration file.
+  #
+  def parse_line( line )
+
+    #
+    # A blank line, or a comment may be skipped.
+    #
+    return if ( ( line =~ /^#/ ) || ( line.length < 1 ) )
+
+    #
+    # The specification of mauve-server to which we should raise our alerts to.
+    #
+    return if ( line =~ /Mauve\s+server(.*)source/ )
+
+
+    #
+    #  Look for macro definitions, inline
+    #
+    if ( line =~ /^([A-Z]_+)\s+are\s+fetched\s+from\s+([^\s]+)\.?/ )
+      define_macro( line )
+
+    elsif ( line =~ /^([0-9A-Z_]+)\s+(is|are)\s+/ )
+      define_macro( line )
+
+    elsif ( line =~ /\s+must\s+ping/ )
+
+      #
+      #  Target
+      #
+      targets = Array.new
+
+      #
+      #  Fallback target is the first token on the line
+      #
+      target = line.split( /\s+/)[0]
+
+
+      #
+      #  If the target is a macro
+      #
+      if ( is_macro?( target ) )
+        targets = get_macro_targets(target)
+      else
+        targets.push( target )
+      end
+
+      #
+      #  The alert-failure message
+      #
+      alert = "Ping failed"
+      if ( line =~ /otherwise '([^']+)'/ )
+        alert=$1.dup
+      end
+
+      #
+      #  Store the test(s)
+      #
+      targets.each do |host|
+        test = {
+          :target_host => host,
+          :test_type => "ping",
+          :test_alert => alert
+        }
+
+        if ( !ENV['DUMP'].nil? )
+          puts ( test.to_json )
+        else
+          @queue.put( test.to_json )
+        end
+      end
+
+    elsif ( line =~ /\s+must\s+run\s+([^\s]+)\s+/i )
+
+      #
+      # Get the service we're testing, and remove any trailing "."
+      #
+      # This handles the case of:
+      #
+      #  LINN_HOSTS must run ssh.
+      #
+      service = $1.dup
+      service.chomp!(".")
+
+      #
+      #  Target of the service-test.
+      #
+      targets = Array.new
+      target  = line.split( /\s+/)[0]
+
+      #
+      #  If the target is a macro
+      #
+      if ( is_macro?( target ) )
+        targets = get_macro_targets( target )
+      else
+        targets.push( target )
+      end
+
+      #
+      #  Alert text
+      #
+      alert = "#{service} failed"
+      if ( line =~ /otherwise '([^']+)'/ )
+        alert=$1.dup
+      end
+
+      #
+      # All our service tests require a port - we setup the defaults here,
+      # but the configuration file will allow users to specify an alternative
+      # via " on XXX ".
+      #
+      case service
+      when /ssh/ then
+        port=22
+      when /jabber/ then
+        port=5222
+      when /ldap/ then
+        port=389
+      when /^https$/ then
+        port=443
+      when /^http$/ then
+        port=80
+      when /rsync/i then
+        port=873
+      when /ftp/i then
+        port=21
+      when /telnet/i then
+        port=23
+      when /smtp/i then
+        port=25
+      end
+
+      #
+      # But allow that to be changed
+      #
+      # e.g.
+      #
+      #  must run ssh on 33 otherwise ..
+      #  must run ftp on 44 otherwise ..
+      #  must run http on 8000 otherwise ..
+      #
+      if ( line =~ /\s+on\s+([0-9]+)/ )
+        port = $1.dup
+      end
+
+      targets.each do |host|
+
+        test = {
+          :target_host => host,
+          :test_type   => service,
+          :test_port   => port,
+          :test_alert  => alert
+        }
+
+        #
+        # HTTP-tests will include the expected result in one of two
+        # forms:
+        #
+        #    must run http with status 200
+        #
+        #    must run http with content 'text'
+        #
+        # If those are sepcified then include them here.
+        #
+        # Note we're deliberately fast and loose here - which allows both to be specified
+        #
+        #   http://example.vm/ must run http with status 200 and content 'OK' otherwise 'boo!'.
+        #
+        #
+        if ( line =~ /\s+with\s+status\s+([0-9]+)\s+/ )
+          test[:http_status]=$1.dup
+        end
+        if ( line =~ /\s+with\s+content\s+'([^']+)'/ )
+          test[:http_text]=$1.dup
+        end
+
+        #
+        # We've parsed(!) the line.  Either output the JSON to the console
+        # or add to the queue.
+        #
+        if ( !ENV['DUMP'].nil? )
+          puts ( test.to_json )
+        else
+          @queue.put( test.to_json )
+        end
+      end
+    else
+      puts "Unknown line: #{line}" if ( line.length > 2 )
+    end
+  end
+
+
+
+
+  #
+  # Parse the configuration file which was named in our constructor.
+  #
+  def parse_file()
+    #
+    #  Parse the configuration file on the command line
+    #
+    File.open( @file, "r").each_line do |line|
+      parse_line( line)
+    end
+  end
+
+
+end
+
+
+
+
+
+#
+#  Entry-point to our code.
+#
+if __FILE__ == $0 then
+
+
+  begin
+    opts = GetoptLong.new(
+                          [ "--dump", "-d", GetoptLong::NO_ARGUMENT ],
+                          [ "--file", "-f", GetoptLong::REQUIRED_ARGUMENT ]
+                          )
+    opts.each do |opt, arg|
+      case opt
+      when "--dump":
+          ENV["DUMP"] = "1"
+      when "--file":
+          ENV["FILE"] = arg
+      end
+    end
+  rescue StandardError => ex
+    puts "Option parsing failed: #{ex.to_s}"
+    exit
+  end
+
+  mon = MonitorConfig.new( ENV['FILE'] )
+  mon.parse_file();
+end
diff --git a/bin/worker b/bin/worker
new file mode 100755
index 0000000..3c9e355
--- /dev/null
+++ b/bin/worker
@@ -0,0 +1,367 @@
+#!/usr/bin/ruby
+#
+#  This script will pull tests to complete from the Beanstalk Queue,
+# where they will be found in JSON form, and executes them.
+#
+# Steve
+# --
+#
+
+
+
+require 'beanstalk-client'
+require 'getoptlong'
+require 'json'
+require 'logger'
+
+require 'mauve/sender'
+require 'mauve/proto'
+
+
+
+#
+# Implementation of our protocol tests.
+#
+require 'tests/ftp'
+require 'tests/http'
+require 'tests/https'
+require 'tests/jabber'
+require 'tests/ldap'
+require 'tests/ping'
+require 'tests/rsync'
+require 'tests/smtp'
+require 'tests/ssh'
+
+
+
+
+#
+#  This class encapsulates the raising and clearing of alerts via Mauve.
+#
+class Alert
+
+  attr_reader :details
+
+  def initialize( test_details )
+    @details = test_details
+  end
+
+
+  #
+  # Raise the alert.
+  #
+  def raise( detail )
+
+    puts "RAISE: #{detail}"
+    return
+
+    update = Mauve::Proto::AlertUpdate.new
+    update.alert   = []
+    update.source  = "custodian"
+    update.replace = true
+
+    alert            = Mauve::Proto::Alert.new
+    alert.id         = @details['test_type']
+    alert.summary    = "#{@details['test_host']} #{@details['test_alert']}"
+    alert.detail     = "The #{@details['test_type']} test failed against #{@details['test_host']}: #{detail}"
+    alert.raise_time = Time.now.to_i
+    update.alert << alert
+
+    Mauve::Sender.new("alert.bytemark.co.uk").send(update)
+
+  end
+
+  #
+  #  Clear the alert.
+  #
+  def clear
+    puts "CLEAR"
+    return
+
+    update = Mauve::Proto::AlertUpdate.new
+    update.alert   = []
+    update.source  = "custodian"
+    update.replace = true
+
+    alert            = Mauve::Proto::Alert.new
+    alert.id         = @details['test_type']
+    alert.summary    = "#{@details['test_host']} #{@details['test_alert']}"
+    alert.detail     = "The #{@details['test_type']} test succeeded against #{@details['test_host']}"
+    alert.clear_time = Time.now.to_i
+    update.alert << alert
+
+    Mauve::Sender.new("alert.bytemark.co.uk").send(update)
+  end
+
+end
+
+
+
+
+#
+# This class contains the code for connecting to a Beanstalk queue,
+# fetching tests from it, and executing them
+#
+class Custodian
+
+  #
+  # The beanstalk queue.
+  #
+  attr_reader :queue
+
+  #
+  # How many times we re-test before we detect a failure
+  #
+  attr_reader :retry_count
+
+  #
+  # The log-file object
+  #
+  attr_reader :logger
+
+  #
+  # Constructor: Connect to the queue
+  #
+  def initialize( server )
+
+    # Connect to the queue
+    @queue = Beanstalk::Pool.new([server])
+
+    # Instantiate the logger.
+    @logger = Logger.new( "worker.log", "daily" )
+
+    if ( ENV['REPEAT'] )
+       @retry_count=ENV['REPEAT'].to_i
+    else
+       @retry_count=5
+    end
+
+    log_message( "We'll run each test #{@retry_count} before alerting failures." )
+  end
+
+
+  #
+  # Write the given message to our logfile - and show it to the console
+  # if we're running with '--verbose' in play
+  #
+  def log_message( msg )
+      @logger.info( msg )
+      puts msg if ( ENV['VERBOSE'] )
+  end
+
+  #
+  #  Flush the queue.
+  #
+  def flush_queue!
+
+    log_message( "Flushing queue" )
+
+    while( true )
+      begin
+        job = @queue.reserve(1)
+        id  = job.id
+        log_message( "Deleted job #{id}" )
+        job.delete
+      rescue Beanstalk::TimedOut => ex
+        log_message( "The queue is now empty" )
+        return
+      end
+    end
+  end
+
+
+
+  #
+  # Process jobs from the queue - never return.
+  #
+  def run!
+    while( true )
+      log_message( "\n" )
+      log_message( "\n" )
+      log_message( "Waiting for job.." )
+      process_single_job()
+    end
+  end
+
+
+
+  #
+  # Fetch a single job from the queue, and process it.
+  #
+  def process_single_job
+
+    begin
+      job = @queue.reserve()
+
+      log_message( "Job aquired - Job ID : #{job.id}" )
+
+
+      #
+      #  Parse the JSON of the job body.
+      #
+      json = job.body
+      hash = JSON.parse( json )
+      hash['verbose'] = 1 if ( ENV['VERBOSE'] )
+
+
+      #
+      #  Output the details.
+      #
+      log_message( "Job body contains the following keys & values:")
+      hash.keys.each do |key|
+        log_message( "#{key} => #{hash[key]}" )
+      end
+
+
+
+      #
+      # Did the test succeed?  If not count the number of times it failed in
+      # a row.  We'll repeat several times
+      #
+      success = false
+      count   = 0
+
+      #
+      # As a result of this test we'll either raise/clear with mauve.
+      #
+      # This helper will do that job.
+      #
+      alert = Alert.new( hash )
+
+
+      #
+      # Convert the test-type to a class name, to do the protocol test.
+      #
+      # Given a test-type "foo" we'll attempt to instantiate a class called FOOTest.
+      #
+      test  = hash['test_type']
+      clazz = test.upcase
+      clazz = "#{clazz}Test"
+
+
+      #
+      # Create the test object.
+      #
+      obj = eval(clazz).new( hash )
+
+
+      #
+      # Ensure that the object we load implements the two methods
+      # we expect.
+      #
+      if ( ( ! obj.respond_to?( "error") ) ||
+           ( ! obj.respond_to?( "run_test" ) ) )
+        puts "Class #{clazz} doesn't implement the full protocol-test API"
+      end
+
+
+
+      #
+      #  We'll run no more than MAX times.
+      #
+      #  We stop the execution on a single success.
+      #
+      while ( ( count < @retry_count ) && ( success == false ) )
+
+        if ( obj.run_test() )
+          log_message( "Test succeeed - clearing alert" )
+          alert.clear()
+          success= true
+        end
+        count += 1
+      end
+
+      #
+      #  If we didn't succeed on any of the attempts raise the alert.
+      #
+      if ( ! success )
+
+        #
+        # Raise the alert, passing the error message.
+        #
+        log_message( "Test failed - alerting with #{obj.error()}" )
+        alert.raise( obj.error() )
+      end
+
+    rescue => ex
+      puts "Exception raised processing job: #{ex}"
+
+    ensure
+      #
+      #  Delete the job - either we received an error, in which case
+      # we should remove it to avoid picking it up again, or we handled
+      # it successfully so it should be removed.
+      #
+      log_message( "Job ID : #{job.id} - Removed" )
+      job.delete if ( job )
+    end
+  end
+end
+
+
+
+
+
+
+
+#
+#  Entry-point to our code.
+#
+if __FILE__ == $0 then
+
+  $SERVER = "127.0.0.1:11300";
+
+  begin
+    opts = GetoptLong.new(
+                          [ "--verbose", "-v", GetoptLong::NO_ARGUMENT ],
+                          [ "--flush",   "-f", GetoptLong::NO_ARGUMENT ],
+                          [ "--server",  "-S", GetoptLong::REQUIRED_ARGUMENT ],
+                          [ "--repeat",  "-r", GetoptLong::REQUIRED_ARGUMENT ],
+                          [ "--single",  "-s", GetoptLong::NO_ARGUMENT ]
+                          )
+    opts.each do |opt, arg|
+      case opt
+      when "--verbose":
+          ENV["VERBOSE"] = "1"
+      when "--flush":
+          ENV["FLUSH"] = "1"
+      when "--repeat":
+          ENV["REPEAT"] = arg
+      when "--server":
+          $SERVER = arg
+      when "--single":
+          ENV["SINGLE"] = "1"
+      end
+    end
+  rescue StandardError => ex
+    puts "Option parsing failed: #{ex.to_s}"
+    exit
+  end
+
+  #
+  #  Create the object
+  #
+  worker = Custodian.new( $SERVER )
+
+  #
+  #  Are we flushing the queue?
+  #
+  if ( ENV['FLUSH'] )
+    worker.flush_queue!
+    exit(0)
+  end
+
+  #
+  #  Single step?
+  #
+  if ( ENV['SINGLE'] )
+    worker.process_single_job
+    exit(0)
+  end
+
+  #
+  #  Otherwise loop indefinitely
+  #
+  worker.run!
+
+end