aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/jobs/slurm_job_states.rb26
1 files changed, 20 insertions, 6 deletions
diff --git a/lib/jobs/slurm_job_states.rb b/lib/jobs/slurm_job_states.rb
index cef5ed6..1211f11 100644
--- a/lib/jobs/slurm_job_states.rb
+++ b/lib/jobs/slurm_job_states.rb
@@ -6,21 +6,35 @@ class SlurmJobStates
def run
start_time = (Time.now - @interval).strftime("%H:%M:%S")
- raw = `sacct -a -P -o State -S #{start_time}`.lines.map(&:strip)[1..-1]
- tally = Hash.new{0}
+ # Get raw data from sacct,
+ # read jobs into an array,
+ # remove any whitespace from the ends of each string,
+ # drop the header,
+ # and split each line into state and partition
+ raw = `sacct -a -P -o State,Partition -S #{start_time}`.
+ lines.
+ map(&:strip)[1..-1].
+ map{|l|l.split("|")}
- raw.each do |state|
- tally[state] += 1
+ # Make a tally of each state/partition combo
+ tally = Hash.new{0}
+ raw.each do |job|
+ tally[job] += 1
end
- tally.each do |state, number|
+ # Clean up any previously reported metrics
+ # to prevent stale labelsets
+ @collector.redact!("slurm_job_states")
+
+ # Report new metrics
+ tally.each do |labelset, number|
@collector.report!(
"slurm_job_states",
number,
help: "Number of jobs in each state",
type: "gauge",
- labels: {state: state}
+ labels: {state: labelset[0], partition: labelset[1]}
)
end
end