summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick J Cherry <patrick@bytemark.co.uk>2017-08-09 18:26:22 +0100
committerPatrick J Cherry <patrick@bytemark.co.uk>2017-08-09 18:26:22 +0100
commit005013d98d742989d3c000b04054e15bb0482a69 (patch)
tree184257d7e27908133361504c4b68aa40834f7258
parent21870b8cf59f0450ef09c10890f64bb5b88c7645 (diff)
parent45f772cfe7626ec7c250d89c7291c23decf13558 (diff)
Merge branch '13-catch-bogus-dns' into 'master'
Resolve "Custodian temporary DNS failure" Closes #13 See merge request !11
-rw-r--r--debian/changelog7
-rw-r--r--lib/custodian/protocoltest/http.rb88
2 files changed, 93 insertions, 2 deletions
diff --git a/debian/changelog b/debian/changelog
index 157a4c8..3827334 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,10 @@
+custodian (0.36) stable; urgency=high
+
+ * If we receive a DNS-error from the curb-gem we ignore it, unless
+ failing to resolve the appropriate hostname ourselves.
+
+ -- Steve Kemp <steve@bytemark.co.uk> Tue, 08 Aug 2017 08:55:08 +0200
+
custodian (0.35) stable; urgency=low
* Alert in more detail on DNS failures.
diff --git a/lib/custodian/protocoltest/http.rb b/lib/custodian/protocoltest/http.rb
index a3f34cc..0bf1a68 100644
--- a/lib/custodian/protocoltest/http.rb
+++ b/lib/custodian/protocoltest/http.rb
@@ -38,6 +38,90 @@ module Custodian
attr_reader :expected_status, :expected_content
#
+ # Should we ignore a (temporary) DNS error in this test?
+ #
+ # We've been beset by a series of false-alerts in the recent
+ # past which have all occurred at this point:
+ #
+ # * We get bogus errors in resolving DNS from curb/libcurl.
+ #
+ # * These errors go away on retry.
+ #
+ # * But the retry isn't fast enough to outrace the
+ # supression-time of our alerts.
+ #
+ # For the moment we're going to _temporarily_ ignore these errors.
+ #
+ # * If a host has Connection-Refused, the wrong status-cde
+ # or similar failure it will be handled as normal.
+ #
+ # * If the host has genuinely lost DNS then we're going to
+ # raise an alert, but if it is this false-error then we
+ # will silently disable this test-run.
+ #
+ def ignore_failure?( protocol )
+
+ # Get the hostname we're connecting to.
+ u = URI.parse(@url)
+ target = u.host
+
+ # IPs for the target
+ ips = []
+
+ case protocol
+ when :ipv4
+ if (target =~ /^([0-9]+)\.([0-9]+)\.([0-9]+)\.([0-9]+)$/)
+ ips << target
+ end
+ when :ipv6
+ if (target =~ /^([0-9a-f:]+)$/)
+ ips << target
+ end
+ else
+ raise ArgumentError, "Sanity-checking DNS-failure of unknown type: #{protocol}"
+ end
+
+ # Early termination?
+ return true unless ips.empty?
+
+ #
+ # OK if it didn't look like an IP address then attempt to
+ # look it up, as both IPv4 and IPv6.
+ #
+ begin
+
+ type = case protocol
+ when :ipv4
+ Resolv::DNS::Resource::IN::A
+ when :ipv6
+ Resolv::DNS::Resource::IN::AAAA
+ else
+ raise ArgumentError, "Sanity-checking DNS-failure of unknown type: #{protocol}"
+ end
+
+ timeout(30) do
+ Resolv::DNS.open do |dns|
+ ips = dns.getresources(target, type)
+ end
+ end
+ rescue Timeout::Error => _e
+ # NOP
+ end
+
+
+ #
+ # At this point we either have:
+ #
+ # "ips" containing entries - because the hostname resolved
+ #
+ # "ips" being empty because the DNS failure was genuine
+ #
+ return ( ! ips.empty? )
+ end
+
+
+
+ #
# Constructor
#
def initialize(line)
@@ -337,8 +421,8 @@ module Custodian
rescue Curl::Err::TooManyRedirectsError
errors << "#{protocol_msg}: More than 10 redirections."
rescue Curl::Err::HostResolutionError => x
- # Log the DNS error-message.
- resolution_errors << "#{protocol_msg}: #{x.class}: #{x.message}\n #{x.backtrace.join("\n ")}."
+ resolution_errors << "#{protocol_msg}: #{x.class}: #{x.message}\n #{x.backtrace.join("\n ")}." unless ignore_failure?( resolve_mode)
+
rescue => x
errors << "#{protocol_msg}: #{x.class}: #{x.message}\n #{x.backtrace.join("\n ")}."
end