aboutsummaryrefslogtreecommitdiff
path: root/slurm_job_states.rb
diff options
context:
space:
mode:
authorNat Lasseter <nat.lasseter@york.ac.uk>2020-03-17 10:14:18 +0000
committerNat Lasseter <nat.lasseter@york.ac.uk>2020-03-17 10:14:18 +0000
commitd166a073ad3a32af77152bf4b16206c32f9890b6 (patch)
tree2b9c0988ddfda9e96a8865e177472699fe6efb47 /slurm_job_states.rb
Initial commit: broke out from longboat
Diffstat (limited to 'slurm_job_states.rb')
-rw-r--r--slurm_job_states.rb41
1 files changed, 41 insertions, 0 deletions
diff --git a/slurm_job_states.rb b/slurm_job_states.rb
new file mode 100644
index 0000000..20b69c8
--- /dev/null
+++ b/slurm_job_states.rb
@@ -0,0 +1,41 @@
+class SlurmJobStates
+ def initialize(collector, config)
+ @collector = collector
+ @interval = config[:raid_every]
+ end
+
+ def raid
+ start_time = (Time.now - @interval).strftime("%H:%M:%S")
+
+ # Get raw data from sacct,
+ # read jobs into an array,
+ # remove any whitespace from the ends of each string,
+ # drop the header,
+ # and split each line into state and partition
+ raw = `sacct -a -P -o State,Partition -S #{start_time}`.
+ lines.
+ map(&:strip)[1..-1].
+ map{|l|l.split("|")}
+
+ # Make a tally of each state/partition combo
+ tally = Hash.new{0}
+ raw.each do |job|
+ tally[job] += 1
+ end
+
+ # Clean up any previously reported metrics
+ # to prevent stale labelsets
+ @collector.redact!("slurm_job_states")
+
+ # Report new metrics
+ tally.each do |labelset, number|
+ @collector.report!(
+ "slurm_job_states",
+ number,
+ help: "Number of jobs in each state",
+ type: "gauge",
+ labels: {state: labelset[0], partition: labelset[1]}
+ )
+ end
+ end
+end