From 37451d9cb9b5061326827a7858b2a7c71ff64e52 Mon Sep 17 00:00:00 2001 From: Killian Murphy Date: Wed, 11 Nov 2020 14:52:03 +0000 Subject: Rename QoS pending raider with slurm_ prefix --- pending_on_qos.rb | 52 ---------------------------------------------------- 1 file changed, 52 deletions(-) delete mode 100644 pending_on_qos.rb diff --git a/pending_on_qos.rb b/pending_on_qos.rb deleted file mode 100644 index 330011c..0000000 --- a/pending_on_qos.rb +++ /dev/null @@ -1,52 +0,0 @@ -# frozen_string_literal: true - -require 'date' - -# For each Viking partition, report the number of jobs pending for a long time -# due to QoS reasons. -class PendingOnQos - def initialize(collector, config) - @collector = collector - @partition_thresholds = { - nodes: 604_800, - week: 1_209_600, - month: 2_419_200, - himem: 604_800, - himem_week: 1_209_600, - gpu: 604_800, - interactive: 900, - test: 900, - preempt: 2_419_200 - } - end - - def raid - @partition_thresholds.each do |partition, threshold| - squeue_cmd = [ - 'squeue', - '--format="%R,%V"', - '--noheader', - "--partition=#{partition}", - '--state=PENDING' - ].join(' ') - - data = `#{squeue_cmd}`.split("\n").grep(/QOS/).map do |row| - row.split(',') - end - - count = data.count do |columns| - (Time.now.to_i - DateTime.parse(columns[1]).to_time.to_i) > threshold - end - - @collector.report!( - 'pending_on_qos', - count, - help: 'Number of jobs pending beyond a threshold for QoS reasons', - type: 'gauge', - labels: { - partition: partition.to_s - } - ) - end - end -end -- cgit v1.2.1