From 61c128c95ffbcdff0e379d8b005c287db54c5cc3 Mon Sep 17 00:00:00 2001 From: Saku Ytti Date: Sat, 21 Feb 2015 15:53:46 +0200 Subject: allow setting IP from source sql/csv can now set IP as well as name, useful if you don't have FQDN. If IP is given, then we don't try to resolve name. closes #78 --- lib/oxidized/node.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib/oxidized') diff --git a/lib/oxidized/node.rb b/lib/oxidized/node.rb index 6bc2b0f..2d41600 100644 --- a/lib/oxidized/node.rb +++ b/lib/oxidized/node.rb @@ -10,7 +10,8 @@ module Oxidized alias :running? :running def initialize opt @name = opt[:name] - @ip = Resolv.getaddress @name + @ip = IPAddr.new(opt[:ip]).to_s rescue nil + @ip ||= Resolv.getaddress @name @group = opt[:group] @input = resolve_input opt @output = resolve_output opt -- cgit v1.2.1 From ba2d48b5d4f7bf8fa657ba4c59416250cf6caca7 Mon Sep 17 00:00:00 2001 From: Saku Ytti Date: Sat, 21 Feb 2015 16:23:22 +0200 Subject: Increase rolling average view Previously view was 2 nodes, so if average was 7s then some node took 1000s your average would be 503.5s. Now we're looking rolling average of each node, which might not be wise either, perhaps I should limit it to last 100 or 1000 nodes. Since we really don't want another place where we have potentially unbounded amount of state... --- lib/oxidized/jobs.rb | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'lib/oxidized') diff --git a/lib/oxidized/jobs.rb b/lib/oxidized/jobs.rb index 6476744..0511b49 100644 --- a/lib/oxidized/jobs.rb +++ b/lib/oxidized/jobs.rb @@ -1,17 +1,17 @@ module Oxidized class Jobs < Array - attr_accessor :interval, :duration, :max, :want + attr_accessor :interval, :max, :want def initialize max, interval, nodes @max = max - #@interval = interval * 60 @interval = interval @nodes = nodes - @duration = 4 + @durations = Array.new(@nodes.size, 5) # guess that nodes take 5s new_count super() end def duration last - @duration = (@duration + last) / 2 + @durations.push(last).shift + @duration = @durations.inject(:+).to_f / @nodes.size #rolling average new_count end def new_count @@ -20,5 +20,8 @@ module Oxidized @want = @nodes.size if @want > @nodes.size @want = @max if @want > @max end + def add_job + @want += 1 + end end end -- cgit v1.2.1 From 9c54ed631098db81a8cb2db60890af705c631541 Mon Sep 17 00:00:00 2001 From: Saku Ytti Date: Sat, 21 Feb 2015 16:29:24 +0200 Subject: Force new job if too long since last job MAX_INTER_JOB_GAP is now 300s, if latest job was started 300s ago, we add new job. Ratioanele is that if we want n jobs, and all these jobs are taking very very long, or perhaps hanging, then we are blocking everything else too. Consider you have use one job, because it's enough to meet your rotation interval quota. Then some one box is somehow taking tens of minutes or hours, we won't figure out new amount of workers until it finishes, so we're blocking all other jobs from spawning. I'm not super happy about this solution, not really sure what is the right wayt to tackle it. --- lib/oxidized/nodes.rb | 2 +- lib/oxidized/worker.rb | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'lib/oxidized') diff --git a/lib/oxidized/nodes.rb b/lib/oxidized/nodes.rb index 032118d..3586b97 100644 --- a/lib/oxidized/nodes.rb +++ b/lib/oxidized/nodes.rb @@ -1,6 +1,6 @@ module Oxidized - require 'oxidized/node' require 'ipaddr' + require 'oxidized/node' class Oxidized::NotSupported < OxidizedError; end class Oxidized::NodeNotFound < OxidizedError; end class Nodes < Array diff --git a/lib/oxidized/worker.rb b/lib/oxidized/worker.rb index e274e1e..8fe7ab5 100644 --- a/lib/oxidized/worker.rb +++ b/lib/oxidized/worker.rb @@ -2,15 +2,18 @@ module Oxidized require 'oxidized/job' require 'oxidized/jobs' class Worker + MAX_INTER_JOB_GAP = 300 def initialize nodes @nodes = nodes @jobs = Jobs.new CFG.threads, CFG.interval, @nodes + @last = Time.now.utc Thread.abort_on_exception = true end def work ended = [] @jobs.delete_if { |job| ended << job if not job.alive? } ended.each { |job| process job } + @jobs.add_job if Time.now.utc - @last > MAX_INTER_JOB_GAP while @jobs.size < @jobs.want Log.debug "Jobs #{@jobs.size}, Want: #{@jobs.want}" # ask for next node in queue non destructive way @@ -21,6 +24,7 @@ module Oxidized # shift nodes and get the next node node = @nodes.get node.running? ? next : node.running = true + @last = Time.now.utc @jobs.push Job.new node end end -- cgit v1.2.1 From 6634f355d99ede5ad6cd6149324d028e455c6eee Mon Sep 17 00:00:00 2001 From: Saku Ytti Date: Sun, 22 Feb 2015 12:12:26 +0200 Subject: Allow node to be removed while collecting Closes #68 (hopefully at least) Further, our TODO to refactor/redesign the code to move state from memory to disk should help. --- lib/oxidized/worker.rb | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'lib/oxidized') diff --git a/lib/oxidized/worker.rb b/lib/oxidized/worker.rb index 8fe7ab5..99fc8b8 100644 --- a/lib/oxidized/worker.rb +++ b/lib/oxidized/worker.rb @@ -3,12 +3,14 @@ module Oxidized require 'oxidized/jobs' class Worker MAX_INTER_JOB_GAP = 300 + def initialize nodes @nodes = nodes @jobs = Jobs.new CFG.threads, CFG.interval, @nodes @last = Time.now.utc Thread.abort_on_exception = true end + def work ended = [] @jobs.delete_if { |job| ended << job if not job.alive? } @@ -28,6 +30,7 @@ module Oxidized @jobs.push Job.new node end end + def process job node = job.node node.last = job @@ -53,6 +56,9 @@ module Oxidized end Log.warn msg end + rescue NodeNotFound + Log.warn "#{node.name} not found, removed while collecting?" end + end end -- cgit v1.2.1 From 33124f2570a4d8c4dfa153120dbc06b3c725a25c Mon Sep 17 00:00:00 2001 From: Saku Ytti Date: Sun, 22 Feb 2015 12:28:08 +0200 Subject: run #duration not #new_count @duration isn't set before #duration is ran --- lib/oxidized/jobs.rb | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'lib/oxidized') diff --git a/lib/oxidized/jobs.rb b/lib/oxidized/jobs.rb index 0511b49..3342679 100644 --- a/lib/oxidized/jobs.rb +++ b/lib/oxidized/jobs.rb @@ -1,12 +1,13 @@ module Oxidized class Jobs < Array + AVERAGE_DURATION = 5 # initially presume nodes take 5s to complete attr_accessor :interval, :max, :want def initialize max, interval, nodes @max = max @interval = interval @nodes = nodes - @durations = Array.new(@nodes.size, 5) # guess that nodes take 5s - new_count + @durations = Array.new @nodes.size, AVERAGE_DURATION + duration AVERAGE_DURATION super() end def duration last -- cgit v1.2.1 From aea148d771dca0e10ca1a2f5505446fbc689dad2 Mon Sep 17 00:00:00 2001 From: Saku Ytti Date: Sun, 22 Feb 2015 13:05:37 +0200 Subject: refactor HOLB prevention Still not sure we want this. But previous one might have caused infinite loop in #work. Consider we have just 1 node all together, and our rotation interval is more than our MAX_INTER_JOB_GAP, then we'd add @want to 2, instead of 1. Now we want more threads than we have nodes, and 'while @jobs.size < @jobs.want' will never be true --- lib/oxidized/jobs.rb | 24 +++++++++++++++++++++--- lib/oxidized/worker.rb | 6 +----- 2 files changed, 22 insertions(+), 8 deletions(-) (limited to 'lib/oxidized') diff --git a/lib/oxidized/jobs.rb b/lib/oxidized/jobs.rb index 3342679..ff7f92b 100644 --- a/lib/oxidized/jobs.rb +++ b/lib/oxidized/jobs.rb @@ -1,28 +1,46 @@ module Oxidized class Jobs < Array - AVERAGE_DURATION = 5 # initially presume nodes take 5s to complete + AVERAGE_DURATION = 5 # initially presume nodes take 5s to complete + MAX_INTER_JOB_GAP = 300 # add job if more than X from last job started attr_accessor :interval, :max, :want + def initialize max, interval, nodes @max = max @interval = interval @nodes = nodes + @last = Time.now.utc @durations = Array.new @nodes.size, AVERAGE_DURATION duration AVERAGE_DURATION super() end + + def push arg + @last = Time.now.utc + super + end + def duration last @durations.push(last).shift @duration = @durations.inject(:+).to_f / @nodes.size #rolling average new_count end + def new_count @want = ((@nodes.size * @duration) / @interval).to_i @want = 1 if @want < 1 @want = @nodes.size if @want > @nodes.size @want = @max if @want > @max end - def add_job - @want += 1 + + def work + # if a) we want less or same amount of threads as we now running + # and b) we want less threads running than the total amount of nodes + # and c) there is more than MAX_INTER_JOB_GAP since last one was started + # then we want one more thread (rationale is to fix hanging thread causing HOLB) + if @want <= size and @want < @nodes.size + @want +=1 if (Time.now.utc - @last) > MAX_INTER_JOB_GAP + end end + end end diff --git a/lib/oxidized/worker.rb b/lib/oxidized/worker.rb index 99fc8b8..7ed70ac 100644 --- a/lib/oxidized/worker.rb +++ b/lib/oxidized/worker.rb @@ -2,12 +2,9 @@ module Oxidized require 'oxidized/job' require 'oxidized/jobs' class Worker - MAX_INTER_JOB_GAP = 300 - def initialize nodes @nodes = nodes @jobs = Jobs.new CFG.threads, CFG.interval, @nodes - @last = Time.now.utc Thread.abort_on_exception = true end @@ -15,7 +12,7 @@ module Oxidized ended = [] @jobs.delete_if { |job| ended << job if not job.alive? } ended.each { |job| process job } - @jobs.add_job if Time.now.utc - @last > MAX_INTER_JOB_GAP + @jobs.work while @jobs.size < @jobs.want Log.debug "Jobs #{@jobs.size}, Want: #{@jobs.want}" # ask for next node in queue non destructive way @@ -26,7 +23,6 @@ module Oxidized # shift nodes and get the next node node = @nodes.get node.running? ? next : node.running = true - @last = Time.now.utc @jobs.push Job.new node end end -- cgit v1.2.1 From 628ab4e2f74837dc04f7466a60a8783d23d5167a Mon Sep 17 00:00:00 2001 From: Saku Ytti Date: Sun, 22 Feb 2015 13:46:37 +0200 Subject: Give more helpful error message from git errors closes #61 --- lib/oxidized/output/git.rb | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'lib/oxidized') diff --git a/lib/oxidized/output/git.rb b/lib/oxidized/output/git.rb index 0c73638..d5eb8e7 100644 --- a/lib/oxidized/output/git.rb +++ b/lib/oxidized/output/git.rb @@ -1,5 +1,6 @@ module Oxidized class Git < Output + class GitError < OxidizedError; end begin gem 'rugged', '~> 0.21.0' require 'rugged' @@ -71,8 +72,12 @@ class Git < Output end repo = Rugged::Repository.new repo update_repo repo, file, data, @msg, @user, @email - rescue Rugged::OSError, Rugged::RepositoryError - Rugged::Repository.init_at repo, :bare + rescue Rugged::OSError, Rugged::RepositoryError => open_error + begin + Rugged::Repository.init_at repo, :bare + rescue => create_error + raise GitError, "first '#{open_error.message}' was raised while opening git repo, then '#{create_error.message}' was while trying to create git repo" + end retry end -- cgit v1.2.1 From 9145678a23f86ecc52dc3f89999a864f70c2e2d8 Mon Sep 17 00:00:00 2001 From: Saku Ytti Date: Mon, 23 Feb 2015 20:22:50 +0200 Subject: guard against model returning non-string Return empty string instead of what ever model returned. Closes #63 --- lib/oxidized/model/model.rb | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'lib/oxidized') diff --git a/lib/oxidized/model/model.rb b/lib/oxidized/model/model.rb index d3f54b9..45de4d7 100644 --- a/lib/oxidized/model/model.rb +++ b/lib/oxidized/model/model.rb @@ -133,10 +133,10 @@ module Oxidized outputs << out end procs[:pre].each do |pre_proc| - outputs.unshift Oxidized::String.new(instance_eval(&pre_proc)) + outputs.unshift process_cmd_output(instance_eval(&pre_proc), nil) end procs[:post].each do |post_proc| - outputs << Oxidized::String.new(instance_eval(&post_proc)) + outputs << process_cmd_output(instance_eval(&post_proc), nil) end outputs end @@ -152,9 +152,8 @@ module Oxidized private def process_cmd_output output, name - if output.class != Oxidized::String - output = Oxidized::String.new output - end + output = Oxidized::String.new output if ::String === output + output = Oxidized::String.new '' unless Oxidized::String === output output.set_cmd(name) output end -- cgit v1.2.1 From 628f6dddc8fd41e07dba99e7fb41566b89d954c5 Mon Sep 17 00:00:00 2001 From: Saku Ytti Date: Tue, 24 Feb 2015 15:24:22 +0200 Subject: set_cmd can't be nil still not sure if this is the right way to guard non-string blocks... --- lib/oxidized/model/model.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/oxidized') diff --git a/lib/oxidized/model/model.rb b/lib/oxidized/model/model.rb index 45de4d7..899b40a 100644 --- a/lib/oxidized/model/model.rb +++ b/lib/oxidized/model/model.rb @@ -133,10 +133,10 @@ module Oxidized outputs << out end procs[:pre].each do |pre_proc| - outputs.unshift process_cmd_output(instance_eval(&pre_proc), nil) + outputs.unshift process_cmd_output(instance_eval(&pre_proc), '') end procs[:post].each do |post_proc| - outputs << process_cmd_output(instance_eval(&post_proc), nil) + outputs << process_cmd_output(instance_eval(&post_proc), '') end outputs end -- cgit v1.2.1 From db165b5e1ebb3091a49029d739b68a913bc75db6 Mon Sep 17 00:00:00 2001 From: Saku Ytti Date: Tue, 24 Feb 2015 16:28:51 +0200 Subject: kill -hup reloads nodes --- lib/oxidized/core.rb | 1 + 1 file changed, 1 insertion(+) (limited to 'lib/oxidized') diff --git a/lib/oxidized/core.rb b/lib/oxidized/core.rb index ba46f3a..4c67f4b 100644 --- a/lib/oxidized/core.rb +++ b/lib/oxidized/core.rb @@ -17,6 +17,7 @@ module Oxidized Oxidized.mgr = Manager.new nodes = Nodes.new @worker = Worker.new nodes + trap 'HUP' { nodes.load } if CFG.rest? begin require 'oxidized/web' -- cgit v1.2.1