From 58758e907801e332f5e80b4f2fed5a50f080fec1 Mon Sep 17 00:00:00 2001 From: Nat Lasseter Date: Wed, 4 Mar 2020 15:24:33 +0000 Subject: Report job state per partition --- lib/jobs/slurm_job_states.rb | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/lib/jobs/slurm_job_states.rb b/lib/jobs/slurm_job_states.rb index cef5ed6..1211f11 100644 --- a/lib/jobs/slurm_job_states.rb +++ b/lib/jobs/slurm_job_states.rb @@ -6,21 +6,35 @@ class SlurmJobStates def run start_time = (Time.now - @interval).strftime("%H:%M:%S") - raw = `sacct -a -P -o State -S #{start_time}`.lines.map(&:strip)[1..-1] - tally = Hash.new{0} + # Get raw data from sacct, + # read jobs into an array, + # remove any whitespace from the ends of each string, + # drop the header, + # and split each line into state and partition + raw = `sacct -a -P -o State,Partition -S #{start_time}`. + lines. + map(&:strip)[1..-1]. + map{|l|l.split("|")} - raw.each do |state| - tally[state] += 1 + # Make a tally of each state/partition combo + tally = Hash.new{0} + raw.each do |job| + tally[job] += 1 end - tally.each do |state, number| + # Clean up any previously reported metrics + # to prevent stale labelsets + @collector.redact!("slurm_job_states") + + # Report new metrics + tally.each do |labelset, number| @collector.report!( "slurm_job_states", number, help: "Number of jobs in each state", type: "gauge", - labels: {state: state} + labels: {state: labelset[0], partition: labelset[1]} ) end end -- cgit v1.2.1