diff options
-rw-r--r-- | debian/changelog | 6 | ||||
-rw-r--r-- | lib/custodian/alerts/redis-state.rb | 131 |
2 files changed, 114 insertions, 23 deletions
diff --git a/debian/changelog b/debian/changelog index cc2aca7..6a1ba7d 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +custodian (0.24.13) stable; urgency=low + + * Updated the redis-alerter, to store more useful-state. + + -- Steve Kemp <steve@bytemark.co.uk> Tue, 16 Nov 2015 15:00:51 +0000 + custodian (0.24.12) stable; urgency=low * Explicitly force the parser to use UTF-8 when reading the configuration diff --git a/lib/custodian/alerts/redis-state.rb b/lib/custodian/alerts/redis-state.rb index 9c5903c..3abf2ad 100644 --- a/lib/custodian/alerts/redis-state.rb +++ b/lib/custodian/alerts/redis-state.rb @@ -1,8 +1,37 @@ # # The redis-alerter. # -# This doesn't raise/clear alerts in the traditional sense, instead -# it just saves the results in a "recent tests" set inside Redis. +# This alerter doesn't raise/clear alerts in the traditional sense, +# instead it stores the state of the tests in a Redis store. +# +# We maintain several structures which are updated by raise/clear +# messages. We keep track of all the recent tests in the set +# `known_tests`. +# +# `known_tests` contains an array of the tests which have been +# carried out. For example: +# +# [ "foo.vm must run ping ..", +# "bar.vm must run ssh .." ] +# +# Then for each test we keep track of the state-transitions, +# and do so based upon the SHA1hash of the test-string. +# +# Assume we have the following test: +# +# "http://google.com must run http with status 200" +# +# This is hashed to : +# +# 71cf1735cd389732877177a757c45fdb5407f673 +# +# We then keep the single current-state in the key: +# +# 71cf1735cd389732877177a757c45fdb5407f673.current = pass|fail|unknown +# +# We build up a history when the state-changes via members of the set +# +# 71cf1735cd389732877177a757c45fdb5407f673.history = [ ] # # module Custodian @@ -54,17 +83,46 @@ module Custodian return unless @redis - tmp = {} - tmp['time'] = Time.now.to_i - tmp['type'] = @test.get_type - tmp['target'] = @test.target - tmp['result'] = 'RAISE' - tmp['reason'] = @test.error - tmp['test'] = @test.to_s - tmp['class'] = @test.class - - @redis.lpush('recent-tests', tmp.to_json) - @redis.ltrim('recent-tests', 0, 100) + # + # Make sure we know about this test. + # + test_s = @test.to_s + + @redis.sadd( "known_tests", test_s ) + + # + # Get the current state of this test - so that if the state + # has changed we can add that to our history. + # + # We use SHA1hash to determine our key. + # + key = Digest::SHA1.hexdigest test_s + + # + # The current state + # + current = @redis.get( "#{key}.current" ) || "unknown" + @redis.set( "#{key}.current", "FAIL" ) + + count = @redis.get( "#{key}.count" ) || "0" + @redis.set( "#{key}.count", (count.to_i + 1)) + + # + # Bump the execution count for this test. + # + if ( current != "FAIL" ) + + # + # The state has changed to raise. + # + tmp = {} + tmp['time'] = Time.now.to_i + tmp['result'] = 'FAIL' + tmp['reason'] = @test.error + + @redis.lpush( "#{key}.history", tmp.to_json) + @redis.ltrim('#{key}.history', 0, 100) + end end @@ -77,18 +135,45 @@ module Custodian return unless @redis + # + # Make sure we know about this test. + # + test_s = @test.to_s + + @redis.sadd( "known_tests", test_s ) + + # + # Get the current state of this test - so that if the state + # has changed we can add that to our history. + # + # We use SHA1hash to determine our key. + # + key = Digest::SHA1.hexdigest test_s - tmp = {} - tmp['time'] = Time.now.to_i - tmp['type'] = @test.get_type - tmp['target'] = @test.target - tmp['result'] = 'OK' - tmp['reason'] = '' - tmp['test'] = @test.to_s - tmp['class'] = @test.class + puts( "Key is #{key}") - @redis.lpush('recent-tests', tmp.to_json) - @redis.ltrim('recent-tests', 0, 100) + # + # The current state + # + current = @redis.get( "#{key}.current" ) || "unknown" + @redis.set( "#{key}.current", "OK" ) + + count = @redis.get( "#{key}.count" ) || "0" + @redis.set( "#{key}.count", (count.to_i + 1 )) + + if ( current != "OK" ) + + # + # The state has changed to raise. + # + tmp = {} + tmp['time'] = Time.now.to_i + tmp['result'] = 'OK' + tmp['reason'] = @test.error + + @redis.lpush( "#{key}.history", tmp.to_json) + @redis.ltrim('#{key}.history', 0, 100) + end end register_alert_type 'redis' |