From d166a073ad3a32af77152bf4b16206c32f9890b6 Mon Sep 17 00:00:00 2001 From: Nat Lasseter Date: Tue, 17 Mar 2020 10:14:18 +0000 Subject: Initial commit: broke out from longboat --- slurm_job_states.rb | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 slurm_job_states.rb (limited to 'slurm_job_states.rb') diff --git a/slurm_job_states.rb b/slurm_job_states.rb new file mode 100644 index 0000000..20b69c8 --- /dev/null +++ b/slurm_job_states.rb @@ -0,0 +1,41 @@ +class SlurmJobStates + def initialize(collector, config) + @collector = collector + @interval = config[:raid_every] + end + + def raid + start_time = (Time.now - @interval).strftime("%H:%M:%S") + + # Get raw data from sacct, + # read jobs into an array, + # remove any whitespace from the ends of each string, + # drop the header, + # and split each line into state and partition + raw = `sacct -a -P -o State,Partition -S #{start_time}`. + lines. + map(&:strip)[1..-1]. + map{|l|l.split("|")} + + # Make a tally of each state/partition combo + tally = Hash.new{0} + raw.each do |job| + tally[job] += 1 + end + + # Clean up any previously reported metrics + # to prevent stale labelsets + @collector.redact!("slurm_job_states") + + # Report new metrics + tally.each do |labelset, number| + @collector.report!( + "slurm_job_states", + number, + help: "Number of jobs in each state", + type: "gauge", + labels: {state: labelset[0], partition: labelset[1]} + ) + end + end +end -- cgit v1.2.1