diff options
-rw-r--r-- | CHANGELOG.md | 9 | ||||
-rw-r--r-- | extra/nagios_check_failing_nodes.rb | 25 | ||||
-rw-r--r-- | lib/oxidized/core.rb | 1 | ||||
-rw-r--r-- | lib/oxidized/jobs.rb | 32 | ||||
-rw-r--r-- | lib/oxidized/model/ironware.rb | 2 | ||||
-rw-r--r-- | lib/oxidized/model/model.rb | 9 | ||||
-rw-r--r-- | lib/oxidized/model/xos.rb | 2 | ||||
-rw-r--r-- | lib/oxidized/node.rb | 3 | ||||
-rw-r--r-- | lib/oxidized/nodes.rb | 4 | ||||
-rw-r--r-- | lib/oxidized/output/git.rb | 9 | ||||
-rw-r--r-- | lib/oxidized/worker.rb | 6 |
11 files changed, 85 insertions, 17 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index c1735c3..24b9cba 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,12 @@ +# 0.4.0 +- FEATURE: allow setting IP address in addition to name in source (SQL/CSV) +- FEATURE: approximate how long it takes to get node from larger view than 1 +- FEATURE: unconditionally start new job if too long has passed since previous start +- BUGFIX: xos while using telnet (by @fhibler) +- BUGFIX: ironware logout on some models (by @fhibler) +- BUGFIX: allow node to be removed while it is being collected +- BUGFIX: if model returns non string value, return empty string + # 0.3.0 - FEATURE: *FIXME* bunch of stuff I did for richih, docs needed - FEATURE: ComWare model (by erJasp) diff --git a/extra/nagios_check_failing_nodes.rb b/extra/nagios_check_failing_nodes.rb new file mode 100644 index 0000000..1c81f66 --- /dev/null +++ b/extra/nagios_check_failing_nodes.rb @@ -0,0 +1,25 @@ +#!/usr/bin/env ruby + +## contrib via https://github.com/ytti/oxidized/issues/67 + +require 'open-uri' +require 'json' + +critical = false +critical_nodes = [] + +json = JSON.load(open("http://localhost:8888/nodes.json")) +json.each do |node| + if node['last']['status'] != 'success' + critical_nodes << node['name'] + critical = true + end +end + +if critical + puts 'Unable to backup: ' + critical_nodes.join(' ') + exit 2 +else + puts 'Backup of all nodes completed successfully.' + exit 0 +end diff --git a/lib/oxidized/core.rb b/lib/oxidized/core.rb index ba46f3a..71267dd 100644 --- a/lib/oxidized/core.rb +++ b/lib/oxidized/core.rb @@ -17,6 +17,7 @@ module Oxidized Oxidized.mgr = Manager.new nodes = Nodes.new @worker = Worker.new nodes + trap('HUP') { nodes.load } if CFG.rest? begin require 'oxidized/web' diff --git a/lib/oxidized/jobs.rb b/lib/oxidized/jobs.rb index 6476744..ff7f92b 100644 --- a/lib/oxidized/jobs.rb +++ b/lib/oxidized/jobs.rb @@ -1,24 +1,46 @@ module Oxidized class Jobs < Array - attr_accessor :interval, :duration, :max, :want + AVERAGE_DURATION = 5 # initially presume nodes take 5s to complete + MAX_INTER_JOB_GAP = 300 # add job if more than X from last job started + attr_accessor :interval, :max, :want + def initialize max, interval, nodes @max = max - #@interval = interval * 60 @interval = interval @nodes = nodes - @duration = 4 - new_count + @last = Time.now.utc + @durations = Array.new @nodes.size, AVERAGE_DURATION + duration AVERAGE_DURATION super() end + + def push arg + @last = Time.now.utc + super + end + def duration last - @duration = (@duration + last) / 2 + @durations.push(last).shift + @duration = @durations.inject(:+).to_f / @nodes.size #rolling average new_count end + def new_count @want = ((@nodes.size * @duration) / @interval).to_i @want = 1 if @want < 1 @want = @nodes.size if @want > @nodes.size @want = @max if @want > @max end + + def work + # if a) we want less or same amount of threads as we now running + # and b) we want less threads running than the total amount of nodes + # and c) there is more than MAX_INTER_JOB_GAP since last one was started + # then we want one more thread (rationale is to fix hanging thread causing HOLB) + if @want <= size and @want < @nodes.size + @want +=1 if (Time.now.utc - @last) > MAX_INTER_JOB_GAP + end + end + end end diff --git a/lib/oxidized/model/ironware.rb b/lib/oxidized/model/ironware.rb index 679bdea..f9998ae 100644 --- a/lib/oxidized/model/ironware.rb +++ b/lib/oxidized/model/ironware.rb @@ -27,7 +27,7 @@ class IronWare < Oxidized::Model cfg :telnet, :ssh do post_login 'skip-page-display' - pre_logout 'exit' + pre_logout 'logout' end end diff --git a/lib/oxidized/model/model.rb b/lib/oxidized/model/model.rb index d3f54b9..899b40a 100644 --- a/lib/oxidized/model/model.rb +++ b/lib/oxidized/model/model.rb @@ -133,10 +133,10 @@ module Oxidized outputs << out end procs[:pre].each do |pre_proc| - outputs.unshift Oxidized::String.new(instance_eval(&pre_proc)) + outputs.unshift process_cmd_output(instance_eval(&pre_proc), '') end procs[:post].each do |post_proc| - outputs << Oxidized::String.new(instance_eval(&post_proc)) + outputs << process_cmd_output(instance_eval(&post_proc), '') end outputs end @@ -152,9 +152,8 @@ module Oxidized private def process_cmd_output output, name - if output.class != Oxidized::String - output = Oxidized::String.new output - end + output = Oxidized::String.new output if ::String === output + output = Oxidized::String.new '' unless Oxidized::String === output output.set_cmd(name) output end diff --git a/lib/oxidized/model/xos.rb b/lib/oxidized/model/xos.rb index a8292ca..88c81ed 100644 --- a/lib/oxidized/model/xos.rb +++ b/lib/oxidized/model/xos.rb @@ -29,7 +29,7 @@ class XOS < Oxidized::Model cfg :telnet do username /^login:/ - password /^passowrd:/ + password /^\r*password:/ end cfg :telnet, :ssh do diff --git a/lib/oxidized/node.rb b/lib/oxidized/node.rb index 6bc2b0f..253de53 100644 --- a/lib/oxidized/node.rb +++ b/lib/oxidized/node.rb @@ -10,7 +10,8 @@ module Oxidized alias :running? :running def initialize opt @name = opt[:name] - @ip = Resolv.getaddress @name + @ip = IPAddr.new(opt[:ip]).to_s rescue nil + @ip ||= Resolv.new.getaddress @name @group = opt[:group] @input = resolve_input opt @output = resolve_output opt diff --git a/lib/oxidized/nodes.rb b/lib/oxidized/nodes.rb index 032118d..cb2fbc5 100644 --- a/lib/oxidized/nodes.rb +++ b/lib/oxidized/nodes.rb @@ -1,6 +1,6 @@ module Oxidized - require 'oxidized/node' require 'ipaddr' + require 'oxidized/node' class Oxidized::NotSupported < OxidizedError; end class Oxidized::NodeNotFound < OxidizedError; end class Nodes < Array @@ -23,8 +23,8 @@ module Oxidized Log.error "node %s is not resolvable, raised %s with message '%s'" % [node, err.class, err.message] end end - Log.info "Loaded #{size} nodes" size == 0 ? replace(new) : update_nodes(new) + Log.info "Loaded #{size} nodes" end end diff --git a/lib/oxidized/output/git.rb b/lib/oxidized/output/git.rb index 0c73638..d5eb8e7 100644 --- a/lib/oxidized/output/git.rb +++ b/lib/oxidized/output/git.rb @@ -1,5 +1,6 @@ module Oxidized class Git < Output + class GitError < OxidizedError; end begin gem 'rugged', '~> 0.21.0' require 'rugged' @@ -71,8 +72,12 @@ class Git < Output end repo = Rugged::Repository.new repo update_repo repo, file, data, @msg, @user, @email - rescue Rugged::OSError, Rugged::RepositoryError - Rugged::Repository.init_at repo, :bare + rescue Rugged::OSError, Rugged::RepositoryError => open_error + begin + Rugged::Repository.init_at repo, :bare + rescue => create_error + raise GitError, "first '#{open_error.message}' was raised while opening git repo, then '#{create_error.message}' was while trying to create git repo" + end retry end diff --git a/lib/oxidized/worker.rb b/lib/oxidized/worker.rb index e274e1e..7ed70ac 100644 --- a/lib/oxidized/worker.rb +++ b/lib/oxidized/worker.rb @@ -7,10 +7,12 @@ module Oxidized @jobs = Jobs.new CFG.threads, CFG.interval, @nodes Thread.abort_on_exception = true end + def work ended = [] @jobs.delete_if { |job| ended << job if not job.alive? } ended.each { |job| process job } + @jobs.work while @jobs.size < @jobs.want Log.debug "Jobs #{@jobs.size}, Want: #{@jobs.want}" # ask for next node in queue non destructive way @@ -24,6 +26,7 @@ module Oxidized @jobs.push Job.new node end end + def process job node = job.node node.last = job @@ -49,6 +52,9 @@ module Oxidized end Log.warn msg end + rescue NodeNotFound + Log.warn "#{node.name} not found, removed while collecting?" end + end end |