aboutsummaryrefslogtreecommitdiff
path: root/slurm_job_states.rb
blob: 20b69c8bbae218ff0799caea69175e86cd3faf21 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
class SlurmJobStates
  def initialize(collector, config)
    @collector = collector
    @interval = config[:raid_every]
  end

  def raid
    start_time = (Time.now - @interval).strftime("%H:%M:%S")

    # Get raw data from sacct,
    # read jobs into an array,
    # remove any whitespace from the ends of each string,
    # drop the header,
    # and split each line into state and partition
    raw = `sacct -a -P -o State,Partition -S #{start_time}`.
      lines.
      map(&:strip)[1..-1].
      map{|l|l.split("|")}

    # Make a tally of each state/partition combo
    tally = Hash.new{0}
    raw.each do |job|
      tally[job] += 1
    end

    # Clean up any previously reported metrics
    # to prevent stale labelsets
    @collector.redact!("slurm_job_states")

    # Report new metrics
    tally.each do |labelset, number|
      @collector.report!(
        "slurm_job_states",
        number,
        help: "Number of jobs in each state",
        type: "gauge",
        labels: {state: labelset[0], partition: labelset[1]}
      )
    end
  end
end