aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--pending_on_qos.rb52
1 files changed, 0 insertions, 52 deletions
diff --git a/pending_on_qos.rb b/pending_on_qos.rb
deleted file mode 100644
index 330011c..0000000
--- a/pending_on_qos.rb
+++ /dev/null
@@ -1,52 +0,0 @@
-# frozen_string_literal: true
-
-require 'date'
-
-# For each Viking partition, report the number of jobs pending for a long time
-# due to QoS reasons.
-class PendingOnQos
- def initialize(collector, config)
- @collector = collector
- @partition_thresholds = {
- nodes: 604_800,
- week: 1_209_600,
- month: 2_419_200,
- himem: 604_800,
- himem_week: 1_209_600,
- gpu: 604_800,
- interactive: 900,
- test: 900,
- preempt: 2_419_200
- }
- end
-
- def raid
- @partition_thresholds.each do |partition, threshold|
- squeue_cmd = [
- 'squeue',
- '--format="%R,%V"',
- '--noheader',
- "--partition=#{partition}",
- '--state=PENDING'
- ].join(' ')
-
- data = `#{squeue_cmd}`.split("\n").grep(/QOS/).map do |row|
- row.split(',')
- end
-
- count = data.count do |columns|
- (Time.now.to_i - DateTime.parse(columns[1]).to_time.to_i) > threshold
- end
-
- @collector.report!(
- 'pending_on_qos',
- count,
- help: 'Number of jobs pending beyond a threshold for QoS reasons',
- type: 'gauge',
- labels: {
- partition: partition.to_s
- }
- )
- end
- end
-end