From e9aa40ca482919267dc4edc3e4f3efbe8a0f81bb Mon Sep 17 00:00:00 2001 From: Killian Murphy Date: Fri, 30 Oct 2020 18:18:36 +0000 Subject: Checkpointing --- pending_on_qos.rb | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/pending_on_qos.rb b/pending_on_qos.rb index 7f7b3df..7d99c8c 100644 --- a/pending_on_qos.rb +++ b/pending_on_qos.rb @@ -20,25 +20,31 @@ class PendingOnQos def raid @partition_thresholds.each do |partition, threshold| - start_time = (Time.now - @partition_thresholds[:partition]) - .strftime('%Y-%m-:%d') + start_time = (Time.now - threshold).strftime('%Y-%m-%d') squeue_cmd = [ 'squeue', '--format="%A,%R,%V"', '--noheader', - '--parsable2', "--partition=#{partition}", '--state=PENDING' ].join(' ') + + output = `#{squeue_cmd}`.split('\n') + + puts output end @collector.report!( - name: 'pending_on_qos', - value: 255, - help: 'Number of jobs pending for QoS reasons', - type: 'gauge', - labels: { partition: 'nodes' } + 'pending_on_qos', + 255, + { + help: 'Number of jobs pending for QoS reasons', + type: 'gauge', + labels: { + partition: 'nodes' + } + } ) end end -- cgit v1.2.1