From 5fa5d6bea05f9176407c1a86942fed4a964867ca Mon Sep 17 00:00:00 2001 From: Saku Ytti Date: Thu, 19 Feb 2015 16:41:13 +0200 Subject: xos update --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c1735c3..c65dd07 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +# 0.3.1 +- BUGFIX: xos while using telnet (by @fhibler) + # 0.3.0 - FEATURE: *FIXME* bunch of stuff I did for richih, docs needed - FEATURE: ComWare model (by erJasp) -- cgit v1.2.1 From 1a9a8143d89bcdf8ac336edd1c9d262f68055306 Mon Sep 17 00:00:00 2001 From: Saku Ytti Date: Thu, 19 Feb 2015 18:05:00 +0200 Subject: ironware fix --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c65dd07..9d7b664 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,6 @@ # 0.3.1 - BUGFIX: xos while using telnet (by @fhibler) +- BUGFIX: ironware logout on some models (by @fhibler) # 0.3.0 - FEATURE: *FIXME* bunch of stuff I did for richih, docs needed -- cgit v1.2.1 From 61c128c95ffbcdff0e379d8b005c287db54c5cc3 Mon Sep 17 00:00:00 2001 From: Saku Ytti Date: Sat, 21 Feb 2015 15:53:46 +0200 Subject: allow setting IP from source sql/csv can now set IP as well as name, useful if you don't have FQDN. If IP is given, then we don't try to resolve name. closes #78 --- CHANGELOG.md | 1 + lib/oxidized/node.rb | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9d7b664..2dcc284 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,5 @@ # 0.3.1 +- FEATURE: allow setting IP address in addition to name in source (SQL/CSV) - BUGFIX: xos while using telnet (by @fhibler) - BUGFIX: ironware logout on some models (by @fhibler) diff --git a/lib/oxidized/node.rb b/lib/oxidized/node.rb index 6bc2b0f..2d41600 100644 --- a/lib/oxidized/node.rb +++ b/lib/oxidized/node.rb @@ -10,7 +10,8 @@ module Oxidized alias :running? :running def initialize opt @name = opt[:name] - @ip = Resolv.getaddress @name + @ip = IPAddr.new(opt[:ip]).to_s rescue nil + @ip ||= Resolv.getaddress @name @group = opt[:group] @input = resolve_input opt @output = resolve_output opt -- cgit v1.2.1 From ba2d48b5d4f7bf8fa657ba4c59416250cf6caca7 Mon Sep 17 00:00:00 2001 From: Saku Ytti Date: Sat, 21 Feb 2015 16:23:22 +0200 Subject: Increase rolling average view Previously view was 2 nodes, so if average was 7s then some node took 1000s your average would be 503.5s. Now we're looking rolling average of each node, which might not be wise either, perhaps I should limit it to last 100 or 1000 nodes. Since we really don't want another place where we have potentially unbounded amount of state... --- lib/oxidized/jobs.rb | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/lib/oxidized/jobs.rb b/lib/oxidized/jobs.rb index 6476744..0511b49 100644 --- a/lib/oxidized/jobs.rb +++ b/lib/oxidized/jobs.rb @@ -1,17 +1,17 @@ module Oxidized class Jobs < Array - attr_accessor :interval, :duration, :max, :want + attr_accessor :interval, :max, :want def initialize max, interval, nodes @max = max - #@interval = interval * 60 @interval = interval @nodes = nodes - @duration = 4 + @durations = Array.new(@nodes.size, 5) # guess that nodes take 5s new_count super() end def duration last - @duration = (@duration + last) / 2 + @durations.push(last).shift + @duration = @durations.inject(:+).to_f / @nodes.size #rolling average new_count end def new_count @@ -20,5 +20,8 @@ module Oxidized @want = @nodes.size if @want > @nodes.size @want = @max if @want > @max end + def add_job + @want += 1 + end end end -- cgit v1.2.1 From 9c54ed631098db81a8cb2db60890af705c631541 Mon Sep 17 00:00:00 2001 From: Saku Ytti Date: Sat, 21 Feb 2015 16:29:24 +0200 Subject: Force new job if too long since last job MAX_INTER_JOB_GAP is now 300s, if latest job was started 300s ago, we add new job. Ratioanele is that if we want n jobs, and all these jobs are taking very very long, or perhaps hanging, then we are blocking everything else too. Consider you have use one job, because it's enough to meet your rotation interval quota. Then some one box is somehow taking tens of minutes or hours, we won't figure out new amount of workers until it finishes, so we're blocking all other jobs from spawning. I'm not super happy about this solution, not really sure what is the right wayt to tackle it. --- lib/oxidized/nodes.rb | 2 +- lib/oxidized/worker.rb | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/oxidized/nodes.rb b/lib/oxidized/nodes.rb index 032118d..3586b97 100644 --- a/lib/oxidized/nodes.rb +++ b/lib/oxidized/nodes.rb @@ -1,6 +1,6 @@ module Oxidized - require 'oxidized/node' require 'ipaddr' + require 'oxidized/node' class Oxidized::NotSupported < OxidizedError; end class Oxidized::NodeNotFound < OxidizedError; end class Nodes < Array diff --git a/lib/oxidized/worker.rb b/lib/oxidized/worker.rb index e274e1e..8fe7ab5 100644 --- a/lib/oxidized/worker.rb +++ b/lib/oxidized/worker.rb @@ -2,15 +2,18 @@ module Oxidized require 'oxidized/job' require 'oxidized/jobs' class Worker + MAX_INTER_JOB_GAP = 300 def initialize nodes @nodes = nodes @jobs = Jobs.new CFG.threads, CFG.interval, @nodes + @last = Time.now.utc Thread.abort_on_exception = true end def work ended = [] @jobs.delete_if { |job| ended << job if not job.alive? } ended.each { |job| process job } + @jobs.add_job if Time.now.utc - @last > MAX_INTER_JOB_GAP while @jobs.size < @jobs.want Log.debug "Jobs #{@jobs.size}, Want: #{@jobs.want}" # ask for next node in queue non destructive way @@ -21,6 +24,7 @@ module Oxidized # shift nodes and get the next node node = @nodes.get node.running? ? next : node.running = true + @last = Time.now.utc @jobs.push Job.new node end end -- cgit v1.2.1 From ee0298340ead4eb87ae6860d606ba61cd7e92c6c Mon Sep 17 00:00:00 2001 From: Saku Ytti Date: Sat, 21 Feb 2015 16:33:40 +0200 Subject: changelog update --- CHANGELOG.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2dcc284..89c71e0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,7 @@ -# 0.3.1 +# 0.4.0 - FEATURE: allow setting IP address in addition to name in source (SQL/CSV) +- FEATURE: approximate how long it takes to get node from larger view than 1 +- FEATURE: unconditionally start new job if too long has passed since previous start - BUGFIX: xos while using telnet (by @fhibler) - BUGFIX: ironware logout on some models (by @fhibler) -- cgit v1.2.1 From 5fe82dcf54beecbd24d30f8715aabeac12ecb33f Mon Sep 17 00:00:00 2001 From: Saku Ytti Date: Sun, 22 Feb 2015 12:05:49 +0200 Subject: add nagios monitoring script closes #67 --- extra/nagios_check_failing_nodes.rb | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 extra/nagios_check_failing_nodes.rb diff --git a/extra/nagios_check_failing_nodes.rb b/extra/nagios_check_failing_nodes.rb new file mode 100644 index 0000000..1c81f66 --- /dev/null +++ b/extra/nagios_check_failing_nodes.rb @@ -0,0 +1,25 @@ +#!/usr/bin/env ruby + +## contrib via https://github.com/ytti/oxidized/issues/67 + +require 'open-uri' +require 'json' + +critical = false +critical_nodes = [] + +json = JSON.load(open("http://localhost:8888/nodes.json")) +json.each do |node| + if node['last']['status'] != 'success' + critical_nodes << node['name'] + critical = true + end +end + +if critical + puts 'Unable to backup: ' + critical_nodes.join(' ') + exit 2 +else + puts 'Backup of all nodes completed successfully.' + exit 0 +end -- cgit v1.2.1 From 6634f355d99ede5ad6cd6149324d028e455c6eee Mon Sep 17 00:00:00 2001 From: Saku Ytti Date: Sun, 22 Feb 2015 12:12:26 +0200 Subject: Allow node to be removed while collecting Closes #68 (hopefully at least) Further, our TODO to refactor/redesign the code to move state from memory to disk should help. --- lib/oxidized/worker.rb | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lib/oxidized/worker.rb b/lib/oxidized/worker.rb index 8fe7ab5..99fc8b8 100644 --- a/lib/oxidized/worker.rb +++ b/lib/oxidized/worker.rb @@ -3,12 +3,14 @@ module Oxidized require 'oxidized/jobs' class Worker MAX_INTER_JOB_GAP = 300 + def initialize nodes @nodes = nodes @jobs = Jobs.new CFG.threads, CFG.interval, @nodes @last = Time.now.utc Thread.abort_on_exception = true end + def work ended = [] @jobs.delete_if { |job| ended << job if not job.alive? } @@ -28,6 +30,7 @@ module Oxidized @jobs.push Job.new node end end + def process job node = job.node node.last = job @@ -53,6 +56,9 @@ module Oxidized end Log.warn msg end + rescue NodeNotFound + Log.warn "#{node.name} not found, removed while collecting?" end + end end -- cgit v1.2.1 From 72096d90e6d85306418ed4e3385a657b4056d3bc Mon Sep 17 00:00:00 2001 From: Saku Ytti Date: Sun, 22 Feb 2015 12:14:41 +0200 Subject: update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 89c71e0..24201a4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ - FEATURE: unconditionally start new job if too long has passed since previous start - BUGFIX: xos while using telnet (by @fhibler) - BUGFIX: ironware logout on some models (by @fhibler) +- BUGFIX: allow node to be removed while it is being collected # 0.3.0 - FEATURE: *FIXME* bunch of stuff I did for richih, docs needed -- cgit v1.2.1 From 33124f2570a4d8c4dfa153120dbc06b3c725a25c Mon Sep 17 00:00:00 2001 From: Saku Ytti Date: Sun, 22 Feb 2015 12:28:08 +0200 Subject: run #duration not #new_count @duration isn't set before #duration is ran --- lib/oxidized/jobs.rb | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/oxidized/jobs.rb b/lib/oxidized/jobs.rb index 0511b49..3342679 100644 --- a/lib/oxidized/jobs.rb +++ b/lib/oxidized/jobs.rb @@ -1,12 +1,13 @@ module Oxidized class Jobs < Array + AVERAGE_DURATION = 5 # initially presume nodes take 5s to complete attr_accessor :interval, :max, :want def initialize max, interval, nodes @max = max @interval = interval @nodes = nodes - @durations = Array.new(@nodes.size, 5) # guess that nodes take 5s - new_count + @durations = Array.new @nodes.size, AVERAGE_DURATION + duration AVERAGE_DURATION super() end def duration last -- cgit v1.2.1 From aea148d771dca0e10ca1a2f5505446fbc689dad2 Mon Sep 17 00:00:00 2001 From: Saku Ytti Date: Sun, 22 Feb 2015 13:05:37 +0200 Subject: refactor HOLB prevention Still not sure we want this. But previous one might have caused infinite loop in #work. Consider we have just 1 node all together, and our rotation interval is more than our MAX_INTER_JOB_GAP, then we'd add @want to 2, instead of 1. Now we want more threads than we have nodes, and 'while @jobs.size < @jobs.want' will never be true --- lib/oxidized/jobs.rb | 24 +++++++++++++++++++++--- lib/oxidized/worker.rb | 6 +----- 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/lib/oxidized/jobs.rb b/lib/oxidized/jobs.rb index 3342679..ff7f92b 100644 --- a/lib/oxidized/jobs.rb +++ b/lib/oxidized/jobs.rb @@ -1,28 +1,46 @@ module Oxidized class Jobs < Array - AVERAGE_DURATION = 5 # initially presume nodes take 5s to complete + AVERAGE_DURATION = 5 # initially presume nodes take 5s to complete + MAX_INTER_JOB_GAP = 300 # add job if more than X from last job started attr_accessor :interval, :max, :want + def initialize max, interval, nodes @max = max @interval = interval @nodes = nodes + @last = Time.now.utc @durations = Array.new @nodes.size, AVERAGE_DURATION duration AVERAGE_DURATION super() end + + def push arg + @last = Time.now.utc + super + end + def duration last @durations.push(last).shift @duration = @durations.inject(:+).to_f / @nodes.size #rolling average new_count end + def new_count @want = ((@nodes.size * @duration) / @interval).to_i @want = 1 if @want < 1 @want = @nodes.size if @want > @nodes.size @want = @max if @want > @max end - def add_job - @want += 1 + + def work + # if a) we want less or same amount of threads as we now running + # and b) we want less threads running than the total amount of nodes + # and c) there is more than MAX_INTER_JOB_GAP since last one was started + # then we want one more thread (rationale is to fix hanging thread causing HOLB) + if @want <= size and @want < @nodes.size + @want +=1 if (Time.now.utc - @last) > MAX_INTER_JOB_GAP + end end + end end diff --git a/lib/oxidized/worker.rb b/lib/oxidized/worker.rb index 99fc8b8..7ed70ac 100644 --- a/lib/oxidized/worker.rb +++ b/lib/oxidized/worker.rb @@ -2,12 +2,9 @@ module Oxidized require 'oxidized/job' require 'oxidized/jobs' class Worker - MAX_INTER_JOB_GAP = 300 - def initialize nodes @nodes = nodes @jobs = Jobs.new CFG.threads, CFG.interval, @nodes - @last = Time.now.utc Thread.abort_on_exception = true end @@ -15,7 +12,7 @@ module Oxidized ended = [] @jobs.delete_if { |job| ended << job if not job.alive? } ended.each { |job| process job } - @jobs.add_job if Time.now.utc - @last > MAX_INTER_JOB_GAP + @jobs.work while @jobs.size < @jobs.want Log.debug "Jobs #{@jobs.size}, Want: #{@jobs.want}" # ask for next node in queue non destructive way @@ -26,7 +23,6 @@ module Oxidized # shift nodes and get the next node node = @nodes.get node.running? ? next : node.running = true - @last = Time.now.utc @jobs.push Job.new node end end -- cgit v1.2.1 From 628ab4e2f74837dc04f7466a60a8783d23d5167a Mon Sep 17 00:00:00 2001 From: Saku Ytti Date: Sun, 22 Feb 2015 13:46:37 +0200 Subject: Give more helpful error message from git errors closes #61 --- lib/oxidized/output/git.rb | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/lib/oxidized/output/git.rb b/lib/oxidized/output/git.rb index 0c73638..d5eb8e7 100644 --- a/lib/oxidized/output/git.rb +++ b/lib/oxidized/output/git.rb @@ -1,5 +1,6 @@ module Oxidized class Git < Output + class GitError < OxidizedError; end begin gem 'rugged', '~> 0.21.0' require 'rugged' @@ -71,8 +72,12 @@ class Git < Output end repo = Rugged::Repository.new repo update_repo repo, file, data, @msg, @user, @email - rescue Rugged::OSError, Rugged::RepositoryError - Rugged::Repository.init_at repo, :bare + rescue Rugged::OSError, Rugged::RepositoryError => open_error + begin + Rugged::Repository.init_at repo, :bare + rescue => create_error + raise GitError, "first '#{open_error.message}' was raised while opening git repo, then '#{create_error.message}' was while trying to create git repo" + end retry end -- cgit v1.2.1 From 9145678a23f86ecc52dc3f89999a864f70c2e2d8 Mon Sep 17 00:00:00 2001 From: Saku Ytti Date: Mon, 23 Feb 2015 20:22:50 +0200 Subject: guard against model returning non-string Return empty string instead of what ever model returned. Closes #63 --- lib/oxidized/model/model.rb | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/lib/oxidized/model/model.rb b/lib/oxidized/model/model.rb index d3f54b9..45de4d7 100644 --- a/lib/oxidized/model/model.rb +++ b/lib/oxidized/model/model.rb @@ -133,10 +133,10 @@ module Oxidized outputs << out end procs[:pre].each do |pre_proc| - outputs.unshift Oxidized::String.new(instance_eval(&pre_proc)) + outputs.unshift process_cmd_output(instance_eval(&pre_proc), nil) end procs[:post].each do |post_proc| - outputs << Oxidized::String.new(instance_eval(&post_proc)) + outputs << process_cmd_output(instance_eval(&post_proc), nil) end outputs end @@ -152,9 +152,8 @@ module Oxidized private def process_cmd_output output, name - if output.class != Oxidized::String - output = Oxidized::String.new output - end + output = Oxidized::String.new output if ::String === output + output = Oxidized::String.new '' unless Oxidized::String === output output.set_cmd(name) output end -- cgit v1.2.1 From 5d27f19b62edb50e62f2104c22bbd3461d5e962c Mon Sep 17 00:00:00 2001 From: Saku Ytti Date: Mon, 23 Feb 2015 20:28:07 +0200 Subject: update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 24201a4..24b9cba 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ - BUGFIX: xos while using telnet (by @fhibler) - BUGFIX: ironware logout on some models (by @fhibler) - BUGFIX: allow node to be removed while it is being collected +- BUGFIX: if model returns non string value, return empty string # 0.3.0 - FEATURE: *FIXME* bunch of stuff I did for richih, docs needed -- cgit v1.2.1 From 628f6dddc8fd41e07dba99e7fb41566b89d954c5 Mon Sep 17 00:00:00 2001 From: Saku Ytti Date: Tue, 24 Feb 2015 15:24:22 +0200 Subject: set_cmd can't be nil still not sure if this is the right way to guard non-string blocks... --- lib/oxidized/model/model.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/oxidized/model/model.rb b/lib/oxidized/model/model.rb index 45de4d7..899b40a 100644 --- a/lib/oxidized/model/model.rb +++ b/lib/oxidized/model/model.rb @@ -133,10 +133,10 @@ module Oxidized outputs << out end procs[:pre].each do |pre_proc| - outputs.unshift process_cmd_output(instance_eval(&pre_proc), nil) + outputs.unshift process_cmd_output(instance_eval(&pre_proc), '') end procs[:post].each do |post_proc| - outputs << process_cmd_output(instance_eval(&post_proc), nil) + outputs << process_cmd_output(instance_eval(&post_proc), '') end outputs end -- cgit v1.2.1 From db165b5e1ebb3091a49029d739b68a913bc75db6 Mon Sep 17 00:00:00 2001 From: Saku Ytti Date: Tue, 24 Feb 2015 16:28:51 +0200 Subject: kill -hup reloads nodes --- lib/oxidized/core.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/oxidized/core.rb b/lib/oxidized/core.rb index ba46f3a..4c67f4b 100644 --- a/lib/oxidized/core.rb +++ b/lib/oxidized/core.rb @@ -17,6 +17,7 @@ module Oxidized Oxidized.mgr = Manager.new nodes = Nodes.new @worker = Worker.new nodes + trap 'HUP' { nodes.load } if CFG.rest? begin require 'oxidized/web' -- cgit v1.2.1