diff options
| author | Saku Ytti <saku@ytti.fi> | 2014-07-11 21:06:37 +0300 | 
|---|---|---|
| committer | Saku Ytti <saku@ytti.fi> | 2014-07-11 21:06:37 +0300 | 
| commit | e3aa71fc17eba8586d2e0117b6b363942f577488 (patch) | |
| tree | 4e9a91fa86cc6787c872b9273cf843ebf9aedb67 /lib/oxidized | |
| parent | ee1bcd1f4d8d9b487d9c37b8ad97c06f24bdb09a (diff) | |
add support for retrying failed attempts
Looks like this in syslog:
Jul 11 21:05:53 ytti oxidized[9820]: 10.10.10.10 raised Errno::ENETUNREACH with msg "Network is unreachable - connect(2) for "10.10.10.10" port 22"
Jul 11 21:05:53 ytti oxidized[9820]: 10.10.10.10 raised Errno::ENETUNREACH with msg "Network is unreachable - connect(2) for "10.10.10.10" port 23"
Jul 11 21:05:54 ytti oxidized[9820]: 10.10.10.10 status no_connection, retry attempt 1
Jul 11 21:05:54 ytti oxidized[9820]: 10.10.10.10 raised Errno::ENETUNREACH with msg "Network is unreachable - connect(2) for "10.10.10.10" port 22"
Jul 11 21:05:54 ytti oxidized[9820]: 10.10.10.10 raised Errno::ENETUNREACH with msg "Network is unreachable - connect(2) for "10.10.10.10" port 23"
Jul 11 21:05:55 ytti oxidized[9820]: 10.10.10.10 status no_connection, retry attempt 2
Jul 11 21:05:55 ytti oxidized[9820]: 10.10.10.10 raised Errno::ENETUNREACH with msg "Network is unreachable - connect(2) for "10.10.10.10" port 22"
Jul 11 21:05:55 ytti oxidized[9820]: 10.10.10.10 raised Errno::ENETUNREACH with msg "Network is unreachable - connect(2) for "10.10.10.10" port 23"
Jul 11 21:05:56 ytti oxidized[9820]: 10.10.10.10 status no_connection, retry attempt 3
Jul 11 21:05:56 ytti oxidized[9820]: 10.10.10.10 raised Errno::ENETUNREACH with msg "Network is unreachable - connect(2) for "10.10.10.10" port 22"
Jul 11 21:05:56 ytti oxidized[9820]: 10.10.10.10 raised Errno::ENETUNREACH with msg "Network is unreachable - connect(2) for "10.10.10.10" port 23"
Jul 11 21:05:57 ytti oxidized[9820]: 10.10.10.10 status no_connection, retries exhausted, giving up
Diffstat (limited to 'lib/oxidized')
| -rw-r--r-- | lib/oxidized/config.rb | 3 | ||||
| -rw-r--r-- | lib/oxidized/input/input.rb | 1 | ||||
| -rw-r--r-- | lib/oxidized/node.rb | 3 | ||||
| -rw-r--r-- | lib/oxidized/worker.rb | 15 | 
4 files changed, 17 insertions, 5 deletions
diff --git a/lib/oxidized/config.rb b/lib/oxidized/config.rb index 02698a6..b4d1c64 100644 --- a/lib/oxidized/config.rb +++ b/lib/oxidized/config.rb @@ -22,7 +22,8 @@ module Oxidized    CFGS.default.log           = File.join Config::Root, 'log'    CFGS.default.debug         = false    CFGS.default.threads       = 30 -  CFGS.default.timeout       = 30 +  CFGS.default.timeout       = 20 +  CFGS.default.retries       = 3    CFGS.default.prompt        = /^([\w.@-]+[#>]\s?)$/    CFGS.default.rest          = '127.0.0.1:8888' # or false to disable    CFGS.default.vars          = {}             # could be 'enable'=>'enablePW' diff --git a/lib/oxidized/input/input.rb b/lib/oxidized/input/input.rb index 1184a0b..049c99a 100644 --- a/lib/oxidized/input/input.rb +++ b/lib/oxidized/input/input.rb @@ -11,6 +11,7 @@ module Oxidized          Timeout::Error,          Errno::ECONNRESET,          Errno::EHOSTUNREACH, +        Errno::ENETUNREACH,          Errno::EPIPE,        ],      } diff --git a/lib/oxidized/node.rb b/lib/oxidized/node.rb index c39bee1..72c84bd 100644 --- a/lib/oxidized/node.rb +++ b/lib/oxidized/node.rb @@ -6,7 +6,7 @@ module Oxidized    class ModelNotFound  < OxidizedError; end    class Node      attr_reader :name, :ip, :model, :input, :output, :group, :auth, :prompt, :vars, :last -    attr_accessor :running, :user, :msg, :from, :stats +    attr_accessor :running, :user, :msg, :from, :stats, :retry      alias :running? :running      def initialize opt        @name           = opt[:name] @@ -19,6 +19,7 @@ module Oxidized        @prompt         = resolve_prompt opt        @vars           = opt[:vars]        @stats          = Stats.new +      @retry          = 0        # model instance needs to access node instance        @model.node = self diff --git a/lib/oxidized/worker.rb b/lib/oxidized/worker.rb index 58bf659..0e96212 100644 --- a/lib/oxidized/worker.rb +++ b/lib/oxidized/worker.rb @@ -29,17 +29,26 @@ module Oxidized        node.last = job        node.stats.add job        @jobs.duration job.time +      node.running = false        if job.status == :success          msg = "update #{node.name}"          msg += " from #{node.from}" if node.from          msg += " with message '#{node.msg}'" if node.msg          node.output.new.store node.name, job.config,                                :msg => msg, :user => node.user, :group => node.group -        node.reset        else -        Log.warn "#{node.name} status #{job.status}" +        msg = "#{node.name} status #{job.status}" +        if node.retry < CFG.retries +          node.retry += 1 +          msg += ", retry attempt #{node.retry}" +          @nodes.next node.name +        else +          msg += ", retries exhausted, giving up" +          node.retry = 0 +        end +        Log.warn msg        end -      node.running = false +      node.reset      end    end  end  | 
