require 'custodian/util/bytemark' require 'custodian/util/dns' require 'custodian/util/prefix' require 'digest/sha1' # # This class encapsulates the raising and clearing of alerts via Mauve. # # There is a helper method to update any alerts with details of whether the # affected host is inside/outside the Bytemark network. # # This is almost Bytemark-specific, although the server it talks to is # indeed Open Source: # # https://projects.bytemark.co.uk/projects/mauvealert # # module Custodian module Alerter class AlertMauve < AlertFactory # # The test this alerter cares about # attr_reader :test # # Was this class loaded correctly? # attr_reader :loaded # # Constructor # def initialize(obj) @test = obj begin require 'mauve/sender' require 'mauve/proto' @loaded = true rescue puts 'ERROR Loading mauve libraries!' @loaded = false end end # # Generate an alert-message which will be raised via mauve. # def raise return unless @loaded # # Get ready to send to mauve. # update = Mauve::Proto::AlertUpdate.new update.alert = [] update.source = @settings.alert_source update.replace = false # # Construct a new alert structure. # alert = _get_alert(true) # # We're raising this alert. # alert.raise_time = Time.now.to_i # # The supression period varies depending on the time of day. # hour = Time.now.hour wday = Time.now.wday # # Is this inside the working day? # working = false # # Lookup the start of the day. # day_start = @settings.key('day_start').to_i || 10 day_end = @settings.key('day_end').to_i || 18 # # In hour suppress # working_suppress = @settings.key('working_suppress').to_i || 4 oncall_suppress = @settings.key('oncall_suppress').to_i || 10 # # If we're Monday-Friday, between the start & end time, then # we're in the working day. # if ((wday != 0) && (wday != 6)) && (hour >= day_start && hour < day_end) working = true end # # The suppression period can now be determined. # period = working ? working_suppress : oncall_suppress # # And logged. # puts "Suppression period is #{period}m" # # We're going to suppress this alert now # alert.suppress_until = Time.now.to_i + (period * 60) # # Update it and send it # update.alert << alert Mauve::Sender.new(@target).send(update) end # # Generate an alert-message which will be cleared via mauve. # def clear return unless @loaded # # Get ready to send to mauve. # update = Mauve::Proto::AlertUpdate.new update.alert = [] update.source = @settings.alert_source update.replace = false # # Construct a new alert structure. # alert = _get_alert(false) # # We're clearing this alert. # alert.clear_time = Time.now.to_i # # Update it and send it # update.alert << alert Mauve::Sender.new(@target).send(update) end # # Using the test object, which was set in the constructor, # generate a useful alert that can be fired off to mauve. # # Most of the mess of this method is ensuring there is some # "helpful" data in the detail-field of the alert. # def _get_alert(failure) # # The subject of an alert MUST be one of: # # 1. Hostname. # 2. IP address # 3. A URL. # # We attempt to resolve the alert to the hostname, as that is more # readable, if we have been given an IP address. # subject = @test.target if (subject =~ /^([0-9]+)\.([0-9]+)\.([0-9]+)\.([0-9]+)$/) || (subject =~ /^([0-9a-f:]+)$/) res = Custodian::Util::DNS.ip_to_hostname(subject) if res subject = res end end # # The test type + test target # test_host = test.target test_type = test.get_type alert = Mauve::Proto::Alert.new # # Mauve only lets us use IDs which are <= 255 characters in length # hash the line from the parser to ensure it is short enough. # (IDs must be unique, per-source) # # Because there might be N-classes which implemented the test # we need to make sure these are distinct too. # id_key = test.to_s id_key += test.class.to_s alert.id = Digest::SHA1.hexdigest(id_key) # Look for a subject-prefix subject_prefix = Custodian::Util::Prefix.text() alert.subject = subject_prefix + subject alert.summary = "The #{test_type} test failed against #{test_host}" # # If we're raising then add the error # if failure alert.detail = "

The #{test_type} test failed against #{test_host}.

" # # The text from the job-defition # user_text = test.get_notification_text # # Add the user-detail if present # alert.detail = "#{alert.detail}

#{user_text}

" if !user_text.nil? # # Add the test-failure message # alert.detail = "#{alert.detail}

#{test.error}

" # # Determine if this is inside/outside the bytemark network # location = expand_inside_bytemark(test_host) if !location.nil? && location.length alert.detail = "#{alert.detail}\n#{location}" end end # # Return the alert to the caller. # alert end # # Expand to a message indicating whether a hostname is inside the Bytemark network. # or not. # # def expand_inside_bytemark(host) # # If the host is a URL then we need to work with the hostname component alone. # # We'll also make the host a link that can be clicked in the alert we raise. # target = host if target =~ /^([a-z]+):\/\/([^\/]+)/ target = $2.dup host = "#{host}" end # # IP addresses we found for the host # ips = [] # # Resolve the target to an IP, unless it is already an address. # if (target =~ /^([0-9]+)\.([0-9]+)\.([0-9]+)\.([0-9]+)$/) || (target =~ /^([0-9a-f:]+)$/) ips.push(target) else # # OK if it didn't look like an IP address then attempt to # look it up, as both IPv4 and IPv6. # begin timeout(30) do Resolv::DNS.open do |dns| ress = dns.getresources(target, Resolv::DNS::Resource::IN::A) ress.map { |r| ips.push(r.address.to_s) } ress = dns.getresources(target, Resolv::DNS::Resource::IN::AAAA) ress.map { |r| ips.push(r.address.to_s) } end end rescue Timeout::Error => _e return '' end end # # Did we fail to lookup any IPs? # return '' if ips.empty? # # The string we return to the caller. # result = '' # # Return the formatted message # ips.each do |ipaddr| if Custodian::Util::Bytemark.inside?(ipaddr.to_s) result += "

#{host} resolves to #{ipaddr} which is inside the Bytemark network.

" else result += "

#{host} resolves to #{ipaddr} which is OUTSIDE the Bytemark network.

" end end result end register_alert_type 'mauve' end end end