aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick J Cherry <patrick@bytemark.co.uk>2011-08-18 18:11:10 +0100
committerPatrick J Cherry <patrick@bytemark.co.uk>2011-08-18 18:11:10 +0100
commit6aba0c3d8e2d30797755dc8b968fe338e13b8df4 (patch)
treea9c3a469146a47fe88e3a56c8bf40c0ed9b66288
parentc30daf56d4aab9e41a993d82406cba947c8efe32 (diff)
* Fixed up Alert to postpone raises/clears within initial sleep period for old alerts
* Fixed up AlertChanged to do the same * Added a migration method for the new AlertHistories table * added singleton cleanup for tests * moar tests * cleaned up Alert a bit
-rw-r--r--lib/mauve/alert.rb87
-rw-r--r--lib/mauve/alert_changed.rb13
-rw-r--r--lib/mauve/person.rb6
-rw-r--r--lib/mauve/server.rb7
-rw-r--r--test/tc_mauve_alert.rb197
-rw-r--r--test/tc_mauve_notification.rb14
-rw-r--r--test/tc_mauve_person.rb27
-rw-r--r--test/th_mauve.rb30
8 files changed, 295 insertions, 86 deletions
diff --git a/lib/mauve/alert.rb b/lib/mauve/alert.rb
index 28dfcf1..22f6784 100644
--- a/lib/mauve/alert.rb
+++ b/lib/mauve/alert.rb
@@ -87,12 +87,10 @@ module Mauve
validates_with_method :check_dates
- def inspect
+ def to_s
"#<Alert #{id}, alert_id #{alert_id}, source #{source}>"
end
- alias to_s inspect
-
#
# This is to stop datamapper inserting duff dates into the database.
#
@@ -163,20 +161,8 @@ module Mauve
def subject; attribute_get(:subject) || attribute_get(:source) || "not set" ; end
def detail; attribute_get(:detail) || "_No detail set._" ; end
- # def subject=(subject); set_changed_if_different( :subject, subject ); end
- # def summary=(summary); set_changed_if_different( :summary, summary ); end
-
- # def source=(source); attribute_set( :source, source ); end
- # def detail=(detail); attribute_set( :detail, detail ); end
-
protected
- #def set_changed_if_different(attribute, value)
- # return if self.__send__(attribute) == value
- # self.update_type ||= "changed"
- # attribute_set(attribute.to_sym, value)
- #end
-
#
# This allows us to take a copy of the changes before we save.
#
@@ -208,6 +194,7 @@ module Mauve
#
if (@attributes_before_save.has_key?(:update_type) and !is_a_new_alert) or
(self.update_type == "raised" and (is_a_new_alert or is_a_change))
+
self.notify
h = History.new(:alerts => [self], :type => "update")
@@ -265,27 +252,61 @@ module Mauve
end
def raise!(at = Time.now)
- self.acknowledged_by = nil
- self.acknowledged_at = nil
- self.will_unacknowledge_at = nil
- self.raised_at = at if self.raised_at.nil?
- self.will_raise_at = nil
- self.cleared_at = nil
- # Don't clear will_clear_at
- self.update_type = "raised" if self.update_type.nil? or self.update_type != "changed" or self.original_attributes[Alert.properties[:update_type]] == "cleared"
+ #
+ # OK if this is an alert updated in the last run, do not raise, just postpone.
+ #
+ if (self.will_raise_at or self.will_unacknowledge_at) and
+ Server.instance.in_initial_sleep? and
+ self.updated_at and
+ self.updated_at < Server.instance.started_at
+
+ postpone_until = Server.instance.started_at + Server.instance.initial_sleep
+
+ if self.will_raise_at and self.will_raise_at <= Time.now
+ self.will_raise_at = postpone_until
+ end
+
+ if self.will_unacknowledge_at and self.will_unacknowledge_at <= Time.now
+ self.will_unacknowledge_at = postpone_until
+ end
+
+ logger.info("Postponing raise of #{self} until #{postpone_until} as it was last updated in a prior run of Mauve.")
+ else
+ self.acknowledged_by = nil
+ self.acknowledged_at = nil
+ self.will_unacknowledge_at = nil
+ self.raised_at = at if self.raised_at.nil?
+ self.will_raise_at = nil
+ self.cleared_at = nil
+ # Don't clear will_clear_at
+ self.update_type = "raised" if self.update_type.nil? or self.update_type != "changed" or self.original_attributes[Alert.properties[:update_type]] == "cleared"
+ end
logger.error("Couldn't save #{self}") unless save
end
def clear!(at = Time.now)
- self.acknowledged_by = nil
- self.acknowledged_at = nil
- self.will_unacknowledge_at = nil
- self.raised_at = nil
- # Don't clear will_raise_at
- self.cleared_at = at if self.cleared_at.nil?
- self.will_clear_at = nil
- self.update_type = "cleared"
+ #
+ # Postpone clearance if we're in the sleep period.
+ #
+ if self.will_clear_at and
+ Server.instance.in_initial_sleep? and
+ self.updated_at and
+ self.updated_at < Server.instance.started_at
+
+ self.will_clear_at = Server.instance.started_at + Server.instance.initial_sleep
+
+ logger.info("Postponing clear of #{self} until #{self.will_clear_at} as it was last updated in a prior run of Mauve.")
+ else
+ self.acknowledged_by = nil
+ self.acknowledged_at = nil
+ self.will_unacknowledge_at = nil
+ self.raised_at = nil
+ # Don't clear will_raise_at
+ self.cleared_at = at if self.cleared_at.nil?
+ self.will_clear_at = nil
+ self.update_type = "cleared"
+ end
logger.error("Couldn't save #{self}") unless save
end
@@ -301,7 +322,7 @@ module Mauve
raise! if (will_unacknowledge_at and will_unacknowledge_at <= Time.now) or
(will_raise_at and will_raise_at <= Time.now)
clear! if will_clear_at && will_clear_at <= Time.now
- logger.info("Polled #{self.inspect}")
+ logger.info("Polled #{self.to_s}")
end
@@ -518,7 +539,7 @@ module Mauve
#
# Record the fact we received an update.
#
- logger.info("Received update from #{ip_source} for #{alert_db.inspect}")
+ logger.info("Received update from #{ip_source} for #{alert_db}")
end
diff --git a/lib/mauve/alert_changed.rb b/lib/mauve/alert_changed.rb
index 9b74396..2871aee 100644
--- a/lib/mauve/alert_changed.rb
+++ b/lib/mauve/alert_changed.rb
@@ -61,11 +61,22 @@ module Mauve
if alert.acknowledged?
- logger.info("Alert already acknowledged. Clearing reminder due for #{self.alert}.")
+ logger.info("Alert already acknowledged. Clearing reminder due for #{self.alert}.")
self.remind_at = nil
return save
end
+ #
+ # Postpone reminders from previous runs, if needed.
+ #
+ if Server.instance.in_initial_sleep? and
+ self.at < Server.instance.started_at
+
+ self.remind_at = Server.instance.started_at + Server.instance.initial_sleep
+ logger.info("Postponing reminder for #{self.alert} until #{self.remind_at} since this reminder was updated in a prior run of Mauve.")
+ return save
+ end
+
alert_group.notify(alert)
#
# Need to make sure this reminder is cleared.
diff --git a/lib/mauve/person.rb b/lib/mauve/person.rb
index 3831529..64ed07e 100644
--- a/lib/mauve/person.rb
+++ b/lib/mauve/person.rb
@@ -163,12 +163,6 @@ module Mauve
else
logger.info "Starting to send notifications again for #{username}." if was_suppressed
-
- end
-
- if Server.instance.started_at > alert.updated_at and (Server.instance.started_at + Server.instance.initial_sleep) > Time.now
- logger.info("Alert last updated in prior run of mauve -- ignoring for initial sleep period.")
- return true
end
#
diff --git a/lib/mauve/server.rb b/lib/mauve/server.rb
index 138636e..eb5872b 100644
--- a/lib/mauve/server.rb
+++ b/lib/mauve/server.rb
@@ -65,6 +65,10 @@ module Mauve
@initial_sleep = s
end
+ def in_initial_sleep?
+ Time.now < self.started_at + self.initial_sleep
+ end
+
def logger
@logger ||= Log4r::Logger.new(self.class.to_s)
end
@@ -92,7 +96,8 @@ module Mauve
# m.auto_migrate! if m.respond_to?("auto_migrate!")
end
- Mauve::AlertEarliestDate.create_view!
+ AlertHistory.migrate!
+ AlertEarliestDate.create_view!
return nil
end
diff --git a/test/tc_mauve_alert.rb b/test/tc_mauve_alert.rb
index f79859f..738489d 100644
--- a/test/tc_mauve_alert.rb
+++ b/test/tc_mauve_alert.rb
@@ -17,6 +17,20 @@ class TcMauveAlert < Mauve::UnitTest
def setup
super
setup_database
+ @test_config =<<EOF
+person ("test") {
+ all { true }
+}
+
+alert_group("default") {
+ level URGENT
+
+ notify("test") {
+ every 10.minutes
+ }
+}
+EOF
+
end
def teardown
@@ -24,7 +38,14 @@ class TcMauveAlert < Mauve::UnitTest
super
end
- def test_source_list
+ def test_alert_group
+
+ end
+
+ #
+ # This is also the test for in_source_list?
+ #
+ def test_source_lists
config=<<EOF
source_list "test", %w(test-1.example.com)
@@ -46,45 +67,93 @@ EOF
assert_equal( %w(has_ipv6 has_ipv4).sort, a.source_lists.sort )
end
+ def test_level
- def test_summary
+ end
+ def test_summary
a = Alert.new
a.summary = "Free swap memory (MB) (memory_swap) is too low"
assert_match(/memory_swap/, a.summary)
-
end
- def test_raise
+ def test_raise!
+ Server.instance.setup
- config=<<EOF
+ a = Alert.new(:source => "test-host", :alert_id => "test_raise!", :subject => "test")
-alert_group("test") {
+ a.raise!
-}
+ assert_equal(Time.now, a.raised_at)
-EOF
+ assert(a.raised?)
+ assert(!a.cleared?)
+ assert(!a.acknowledged?)
+ end
- Configuration.current = ConfigurationBuilder.parse(config)
+ def test_acknowledge!
+ person = Mauve::Person.new
+ person.username = "test-user"
Server.instance.setup
- a= Alert.new(:source => "test-host",
- :alert_id => "test" )
+ Mauve::Configuration.current.people[person.username] = person
- a.raise!
+ alert = Alert.new(
+ :alert_id => "test_acknowledge!",
+ :source => "test",
+ :subject => "test"
+ )
+
+ alert.raise!
+ assert(alert.raised?)
+
+ #
+ # This acknowledges an alert for 3 mins.
+ #
+ alert.acknowledge!(person, Time.now + 3.minutes)
+ assert_equal(person.username, alert.acknowledged_by)
+ assert_equal(Time.now, alert.acknowledged_at)
+ assert_equal(Time.now + 3.minutes, alert.will_unacknowledge_at)
+ assert(alert.acknowledged?)
+
+
+ next_alert = Alert.find_next_with_event
+ assert_equal(next_alert.id, alert.id)
+ assert_equal(Time.now+3.minutes, next_alert.due_at)
+
+ Timecop.freeze(Time.now + 3.minutes)
+
+
+ #
+ # The alert should unacknowledge itself.
+ #
+ alert.poll
+ assert(!alert.acknowledged?)
+ end
+
+ def test_unacknowledge!
end
- def test_alert_reception
+ def test_clear!
+ end
+
+ def test_due_at
+ end
+
+ def test_poll
+ end
+
+ def test_recieve_update
Server.instance.setup
update = Proto::AlertUpdate.new
update.source = "test-host"
message = Proto::Alert.new
update.alert << message
- message.id = "test1"
+ message.id = "test_recieve_update"
message.summary = "test summary"
message.detail = "test detail"
message.raise_time = Time.now.to_i
@@ -92,7 +161,7 @@ EOF
Alert.receive_update(update, Time.now, "127.0.0.1")
- a = Alert.first(:alert_id => 'test1')
+ a = Alert.first(:alert_id => 'test_recieve_update')
assert(a.raised?)
assert_equal("test-host", a.subject)
@@ -102,40 +171,98 @@ EOF
end
- def test_alert_ackowledgement
- person = Mauve::Person.new
- person.username = "test-user"
-
+ def test_notify_if_needed
+ Configuration.current = ConfigurationBuilder.parse(@test_config)
Server.instance.setup
-
- Mauve::Configuration.current.people[person.username] = person
-
+ #
+ # Notifications should be sent if:
+ #
+ # * the alert has changed state (update_type); or
+ # * the alert new and "raised".
+
alert = Alert.new(
- :alert_id => "test-acknowledge",
+ :alert_id => "test_notify_if_needed",
:source => "test",
:subject => "test"
)
+
+ #
+ # Must not notify -- this is a new alert which is not raised.
+ #
+ alert.clear!
+ assert_equal(0, Server.instance.notification_buffer.size, "Notifications sent erroneously on clear.")
+
+ #
+ # Now raise.
+ #
alert.raise!
- assert(alert.raised?)
+ assert_equal(1, Server.instance.notification_buffer.size, "Wrong number of notifications sent out when new alert raised.")
- alert.acknowledge!(person, Time.now + 3.minutes)
- assert(alert.acknowledged?)
+ #
+ # Empty the buffer.
+ Server.instance.notification_buffer.pop
+
+ Timecop.freeze(Time.now+5)
+ alert.raise!
+ #
+ # Should not re-raise.
+ #
+ assert_equal(0, Server.instance.notification_buffer.size, "Notification sent erroneously on second raise.")
+
+ alert.acknowledge!(Mauve::Configuration.current.people["test"])
+ assert_equal(1, Server.instance.notification_buffer.size, "Wrong number of notifications sent erroneously on acknowledge.")
+ #
+ # Empty the buffer.
+ Server.instance.notification_buffer.pop
- next_alert = Alert.find_next_with_event
- assert_equal(next_alert.id, alert.id)
- assert_equal(Time.now+3.minutes, next_alert.due_at)
+ alert.subject = "changed subject"
+ assert(alert.save)
+ assert_equal(0, Server.instance.notification_buffer.size, "Notification sent erroneously on change of subject.")
+
+ alert.clear!
+ assert_equal(1, Server.instance.notification_buffer.size, "Wrong number of notifications sent erroneously on clear.")
+ end
- Timecop.freeze(next_alert.due_at)
- alert.poll
+ #
+ # These are more in-depth tests
+ #
+ def test_no_notification_for_old_alerts
+ Configuration.current = ConfigurationBuilder.parse(@test_config)
+ Server.instance.setup
+ assert_equal(Time.now, Server.instance.started_at)
+
+ Timecop.freeze(Time.now - 10.minutes)
+ alert = Alert.new(
+ :alert_id => "test_no_notification_for_old_alerts",
+ :source => "test",
+ :subject => "test",
+ :will_raise_at => Time.now + 10.minutes
+ )
+ alert.clear!
+
+ Timecop.freeze(Time.now + 10.minutes)
+ assert_equal(Time.now - 10.minutes, alert.updated_at, "Alert should be last updated before the server instance thinks it started.")
+
+ 5.times do
+ assert(Server.instance.in_initial_sleep?,"Server not in initial sleep when it should be.")
+ alert.poll
+ assert_equal(Server.instance.started_at + Server.instance.initial_sleep, alert.will_raise_at)
+ assert_equal(0, Server.instance.notification_buffer.size, "Notification sent for old alert")
+ Timecop.freeze(Time.now + 1.minute)
+ end
#
- # The alert should unacknowledge itself.
+ # No longer in sleep period.
#
- assert(!alert.acknowledged?)
-
+ assert(!Server.instance.in_initial_sleep?,"Server in initial sleep when it shouldn't be.")
+ alert.poll
+ assert(alert.raised?)
+ assert_equal(1, Server.instance.notification_buffer.size, "Notification sent for old alert")
+ #
+ # TODO need to do for will_clear_at and will_unacknowledge_at
+ #
end
end
-
diff --git a/test/tc_mauve_notification.rb b/test/tc_mauve_notification.rb
index 435d5da..6b4c3d1 100644
--- a/test/tc_mauve_notification.rb
+++ b/test/tc_mauve_notification.rb
@@ -114,25 +114,19 @@ class TcMauveNotification < Mauve::UnitTest
include Mauve
def setup
- @logger = setup_logger
- Timecop.freeze(Time.local(2011,8,1,0,0,0,0))
+ super
+ setup_database
end
def teardown
- teardown_logger
- Timecop.return
- DataObjects::Pooling.pools.each{|pool| pool.dispose}
+ teardown_database
+ super
end
def test_notify
t = Time.now
config=<<EOF
-
-server {
- database "sqlite::memory:"
-}
-
person ("test1") {
all { true }
}
diff --git a/test/tc_mauve_person.rb b/test/tc_mauve_person.rb
index 8ac3141..0026db6 100644
--- a/test/tc_mauve_person.rb
+++ b/test/tc_mauve_person.rb
@@ -9,8 +9,35 @@ require 'pp'
class TcMauvePerson < Mauve::UnitTest
+ def setup
+ super
+ setup_database
+ end
+ def teardown
+ teardown_database
+ super
+ end
+ def test_suppressed?
+
+ end
+
+ def test_send_alert
+
+ end
+
+ def test_do_send_alert
+
+ end
+
+ def test_current_alerts
+
+ end
+
+ def test_is_on_holiday?
+
+ end
end
diff --git a/test/th_mauve.rb b/test/th_mauve.rb
index a113ad0..76a4933 100644
--- a/test/th_mauve.rb
+++ b/test/th_mauve.rb
@@ -3,6 +3,27 @@ require 'mauve/datamapper'
require 'timecop'
require 'log4r'
require 'pp'
+require 'singleton'
+
+# Taken from
+#
+# http://blog.ardes.com/2006/12/11/testing-singletons-with-ruby
+#
+class <<Singleton
+
+ def included_with_reset(klass)
+ included_without_reset(klass)
+ class <<klass
+ def reset_instance
+ Singleton.send :__init__, self
+ self
+ end
+ end
+ end
+
+ alias_method :included_without_reset, :included
+ alias_method :included, :included_with_reset
+end
module Mauve
class TestOutputter < Log4r::Outputter
@@ -33,6 +54,7 @@ module Mauve
class UnitTest < Test::Unit::TestCase
def setup
+ reset_all_singletons
setup_logger
setup_time
end
@@ -40,6 +62,7 @@ module Mauve
def teardown
teardown_logger
teardown_time
+ reset_all_singletons
end
def setup_logger
@@ -81,6 +104,13 @@ module Mauve
Timecop.return
end
+ def reset_all_singletons
+ Mauve.constants.collect{|const| Mauve.const_get(const)}.each do |klass|
+ next unless klass.respond_to?(:instance)
+ klass.reset_instance
+ end
+ end
+
def default_test
#
#