aboutsummaryrefslogtreecommitdiff
path: root/lib/raiders
diff options
context:
space:
mode:
authorNat Lasseter <nat.lasseter@york.ac.uk>2020-03-16 10:15:21 +0000
committerNat Lasseter <nat.lasseter@york.ac.uk>2020-03-16 10:15:21 +0000
commit666ea91e473554acabe6f6c3477eb915e70a7538 (patch)
tree2f245df6a494c63f6a829f4729552939ce0effdd /lib/raiders
parent58758e907801e332f5e80b4f2fed5a50f080fec1 (diff)
Refactor: renamed jobs to raiders, becaus pun, and jobs is confusing given the intended application of longboat by the original authors.
Diffstat (limited to 'lib/raiders')
-rw-r--r--lib/raiders/slurm_job_states.rb41
1 files changed, 41 insertions, 0 deletions
diff --git a/lib/raiders/slurm_job_states.rb b/lib/raiders/slurm_job_states.rb
new file mode 100644
index 0000000..20b69c8
--- /dev/null
+++ b/lib/raiders/slurm_job_states.rb
@@ -0,0 +1,41 @@
+class SlurmJobStates
+ def initialize(collector, config)
+ @collector = collector
+ @interval = config[:raid_every]
+ end
+
+ def raid
+ start_time = (Time.now - @interval).strftime("%H:%M:%S")
+
+ # Get raw data from sacct,
+ # read jobs into an array,
+ # remove any whitespace from the ends of each string,
+ # drop the header,
+ # and split each line into state and partition
+ raw = `sacct -a -P -o State,Partition -S #{start_time}`.
+ lines.
+ map(&:strip)[1..-1].
+ map{|l|l.split("|")}
+
+ # Make a tally of each state/partition combo
+ tally = Hash.new{0}
+ raw.each do |job|
+ tally[job] += 1
+ end
+
+ # Clean up any previously reported metrics
+ # to prevent stale labelsets
+ @collector.redact!("slurm_job_states")
+
+ # Report new metrics
+ tally.each do |labelset, number|
+ @collector.report!(
+ "slurm_job_states",
+ number,
+ help: "Number of jobs in each state",
+ type: "gauge",
+ labels: {state: labelset[0], partition: labelset[1]}
+ )
+ end
+ end
+end