aboutsummaryrefslogtreecommitdiff
path: root/bin/mauveclient
blob: 70f33c7d966e08a3844e05ce614a4aea460d510c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
#! /usr/bin/ruby1.8
# NAME
#   mauveclient - send alert(s) to a given alert station
#
# SYNOPSIS
#   mauveclient [<destination>]
#               [--source | -o <source>] [--replace | -p] [--verbose | -v]
#               [--id <alertid> ... ]
#
# OPTIONS
# <destination>   Where the alert should go. This can be either a hostname or
#                 an IP address, and optionally a port, separated by a colon.
#                 The default port is 32741.
#
#                 If no destination is supplied, the value from the file
#                 /etc/mauvealert/mauveclient.destination is used. If no
#                 destination can be determined, an error is raised.
#
#                 If a hostname is given and no port is specified, SRV records
#                 are used to determine where the alerts should go to. The SRV
#                 prefix is _mauvealert._udp. If no SRV records are found, A
#                 records are used instead.
#                
#                 IPv6 addresses can be used, but must be enclosed in square
#                 brackets, e.g. [2001:41c8::12].
#
# --source, -o <source>  identify the source of the alert (defaults to
#                        hostname, but you might want to name your monitoring
#                        systems more explicitly).
#
# --replace, -p          Send an update replacing all other alerts for this
#                        source -- any previous alerts not specified in this
#                        update are assumed to be cleared. If you specify this
#                        option, you don't have to supply *any* alerts to raise
#                        or clear (in which case all alerts from that source
#                        will be cleared).
#
# --verbose, -v          If you specify this option once, it will print the
#                        transmission ID of the packet for debugging. If you
#                        specify it twice, it will print the entire data
#                        structure.
#
# You can specify any number of alerts in an update - every time you specify
# --id starts a new alert.
#
# --id, -i <alertid>       Unique specified for each alert raised. This should
#                          be unique on a per-source basis, i.e. for an
#                          individual application or host.
#
# --summary, -s <summary>  Text for humans describing the nature of the alert,
#                          first 100 characters are only ones guaranteed to make
#                          it to pagers, twitter, SMS etc.
#
# --detail, -d <detail>    HTML fragment describing the alert in more detail,
#                          no limit on length.
#
# --subject, -u <subject>  Set the subject of the alert (i.e. the server/entity
#                          that this alert concerns). If no subject is
#                          specified, it is assumed to be the same as <source>,
#                          detailed above.
#
# --raise, -r <time>       Mark the alert to be (re)raised at the given time.
#                          If no time is supplied, "now" is assumed. See
#                          SPECIFYING TIMES below for the format of <time>.
#
# --clear, -c <time>       Mark the alert to be cleared at the given time. If
#                          no time is specified, "now" is assumed. See
#                          SPECIFYING TIMES below for the format of <time>.
#
#
# SPECIFYING TIMES
#
# Times can be specified for an alert to be raised or cleared. This can be
# specified as any time in the past or future. The format is + or -, followed
# by a number, followed by a letter determining the units, one of s, m, h, d,
# representing seconds, minutes, hours, and days, respectively. If no units are
# specified, seconds is assumed. If no sign is specified, "+" is assumed.
#
# Some example times are:
#
# now   Immediately
# 10    In 10 seconds time
# +10m  In 10 minutes time
# -10h  10 Hours ago
#
# SENISBLE USAGE
#
# Mauve uses UDP to transmit data, which means that there is no guarrantee a
# single packet will reach the server. Therefore:
#
#  * The host/application should send "raise" notification regularly until the
#    alert clears, whereupon it should regularly send "cleared" notifications.
#
#  * When setting a heartbeat-type alert, make sure that the raise time is more
#    than double the period of the "clear" notifications. For example, if the
#    host is sending a clear every 120 seconds, the raise time should be
#    greater than 240 seconds, preferably greater than 360 seconds to allow for
#    packets going missing, reducing the likelihood of false alerts.
# 
# Try to convey salient details about the alerts in the relevant fields. A
# typical short alert from Mauve might read
#
#   RAISED: <subject>: <summary> -- <source>
#
# Make sure that the alert will be understood with just those three fields
# displayed.
#
#  * Keep the summary brief and salient. 
#  
#  * Keep the summary constant, unless there has been a material change to the
#    nature of the alert. Mauve may re-send any messages when the subject
#    changes. If something is changing quickly, like load averages, best not
#    to put them in the summary.
#
#  * Make sure that the subject is set correctly. Remember if no subject is
#    set, then the source of the alert is used instead. 
#
#  * Make sure that the source is correct too -- nothing worse than an alert
#    that comes in with an ambiguous origin. 
#
#  * The alert ID is used internally by Mauve to keep alerts consistent. This
#    must be unique on a per-source basis. It is OK to have many alerts with the
#    ID "heartbeat" as long as the source of the alert is different in each case.
#
# The raise and clear times can be specified, if needed, but generally leaving
# them empty, i.e. setting them to "now" is sufficient. Mauve remembers when
# an alert is first raised.
#
# EXAMPLES
#
# To raise an alert:
#
#   mauveclient -s smtp-out-1.example.com -i mailqueue \\ 
#     -d "Mail queue has <b>54232</b> messages in it. That's <em>LOADS</em>" \\
#     -u "Mail queue too big on outgoing SMTP server" -r
# 
# To clear an alert:
#
#   mauveclient -s smtp-out-1.example.com  -i mailqueue -c
#
# To create a "heartbeat" alert, i.e. one that says "Currently OK, but raise in the future if nothing more is heard":
#
#   mauveclient -i heartbeat -d "No heartbeat received for 1.2.3.4. Could be down!" -s "heartbeat failed"  -c -r +10m
#
# AUTHOR
#
# Patrick J Cherry <patrick@bytemark.co.uk>
#

require 'getoptlong'
require 'mauve/sender'
require 'mauve/proto'
require 'mauve/mauve_time'
require 'pp'

NOW = Mauve::MauveTime.now

def error(msg)
  STDERR.print "*** Error: #{msg}\n"
  STDERR.print "*** For help, type: #{$0} -h\n"
  exit 1
end

def parse_time_spec(spec = "now")
  case spec
    when "now"
      NOW

    when /^(\+|-)?(\d+)([smhd])?$/
      if $1 == "-"
        multiplier = -1
      else
        multiplier = 1
      end

      multiplier *= case $3
        when ?m then 60
        when ?h then 3600
        when ?d then 86400
        else
          1
      end

      NOW + $2.to_i * multiplier

    else
      raise ArgumentError, "Unrecognised time format #{spec.inspect}"

  end
end

begin
  update = Mauve::Proto::AlertUpdate.new
  update.replace = false
  update.alert = []
  message = nil
  verbose = 0
  help    = false

  opts = GetoptLong.new(
    ['-h', '--help',    GetoptLong::NO_ARGUMENT], 
    ['-o', '--source',  GetoptLong::OPTIONAL_ARGUMENT],
    ['-p', '--replace', GetoptLong::NO_ARGUMENT],
    ['-i', '--id',      GetoptLong::OPTIONAL_ARGUMENT],
    ['-s', '--summary', GetoptLong::OPTIONAL_ARGUMENT],
    ['-u', '--subject', GetoptLong::OPTIONAL_ARGUMENT],
    ['-c', '--clear',   GetoptLong::OPTIONAL_ARGUMENT],
    ['-r', '--raise',   GetoptLong::OPTIONAL_ARGUMENT],
    ['-d', '--detail',  GetoptLong::OPTIONAL_ARGUMENT],
    ['-v', '--verbose', GetoptLong::NO_ARGUMENT]
  ).each do |opt,arg|

    #
    # Can catch empty arguments better if we set the GetoptLong things to
    # "optional" rather than "required" and catch the empty arg here.
    error "#{opt} cannot be empty" if arg.empty? and not %w(-h -p -v -c -r).include?(opt)

    case opt
      when '-h'
        help = true
      when '-p'
        update.replace = true
      when '-i'
        error "Cannot specify the same ID twice in one update -- ID #{arg}" if update.alert.any?{|a| a.id == arg}
        message = Mauve::Proto::Alert.new
        message.id = arg
        update.alert << message
      when '-o'
        error "Can only specify one source" if update.source
        update.source = arg
      when '-v'
        verbose += 1
      else
        error "Must specify --id before message" unless message
        case opt
          when '-s' then message.summary = arg
          when '-u' then message.subject = arg
          when '-d' then message.detail = arg
          when '-c' then message.clear_time = parse_time_spec(arg).to_i
          when '-r' then message.raise_time = parse_time_spec(arg).to_i
          else
            error "Unknown option #{opt}"
        end
    end
  end

  # CAUTION! Kwality kode.
  #
  if help
    # Open the file, stripping the shebang line
    lines = File.open(__FILE__){|fh| fh.readlines}[1..-1]

    lines.each do |line|
    line.chomp!
      break if line.empty?
      puts line[2..-1].to_s
    end

    exit 0
  end

  error "No alerts specified" unless !update.alert.empty? || update.replace

  update.transmission_id = rand(2**63)

  Mauve::Sender.new(ARGV).send(update, verbose)
rescue Protobuf::NotInitializedError => bad
  error "Alert not initialized fully -- you must supply an ID"

rescue ArgumentError => ae
  error ae.message

rescue StandardError => ae
  error ae.message

end