diff options
author | Patrick J Cherry <patrick@bytemark.co.uk> | 2017-08-09 18:26:22 +0100 |
---|---|---|
committer | Patrick J Cherry <patrick@bytemark.co.uk> | 2017-08-09 18:26:22 +0100 |
commit | 005013d98d742989d3c000b04054e15bb0482a69 (patch) | |
tree | 184257d7e27908133361504c4b68aa40834f7258 | |
parent | 21870b8cf59f0450ef09c10890f64bb5b88c7645 (diff) | |
parent | 45f772cfe7626ec7c250d89c7291c23decf13558 (diff) |
Merge branch '13-catch-bogus-dns' into 'master'
Resolve "Custodian temporary DNS failure"
Closes #13
See merge request !11
-rw-r--r-- | debian/changelog | 7 | ||||
-rw-r--r-- | lib/custodian/protocoltest/http.rb | 88 |
2 files changed, 93 insertions, 2 deletions
diff --git a/debian/changelog b/debian/changelog index 157a4c8..3827334 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,10 @@ +custodian (0.36) stable; urgency=high + + * If we receive a DNS-error from the curb-gem we ignore it, unless + failing to resolve the appropriate hostname ourselves. + + -- Steve Kemp <steve@bytemark.co.uk> Tue, 08 Aug 2017 08:55:08 +0200 + custodian (0.35) stable; urgency=low * Alert in more detail on DNS failures. diff --git a/lib/custodian/protocoltest/http.rb b/lib/custodian/protocoltest/http.rb index a3f34cc..0bf1a68 100644 --- a/lib/custodian/protocoltest/http.rb +++ b/lib/custodian/protocoltest/http.rb @@ -38,6 +38,90 @@ module Custodian attr_reader :expected_status, :expected_content # + # Should we ignore a (temporary) DNS error in this test? + # + # We've been beset by a series of false-alerts in the recent + # past which have all occurred at this point: + # + # * We get bogus errors in resolving DNS from curb/libcurl. + # + # * These errors go away on retry. + # + # * But the retry isn't fast enough to outrace the + # supression-time of our alerts. + # + # For the moment we're going to _temporarily_ ignore these errors. + # + # * If a host has Connection-Refused, the wrong status-cde + # or similar failure it will be handled as normal. + # + # * If the host has genuinely lost DNS then we're going to + # raise an alert, but if it is this false-error then we + # will silently disable this test-run. + # + def ignore_failure?( protocol ) + + # Get the hostname we're connecting to. + u = URI.parse(@url) + target = u.host + + # IPs for the target + ips = [] + + case protocol + when :ipv4 + if (target =~ /^([0-9]+)\.([0-9]+)\.([0-9]+)\.([0-9]+)$/) + ips << target + end + when :ipv6 + if (target =~ /^([0-9a-f:]+)$/) + ips << target + end + else + raise ArgumentError, "Sanity-checking DNS-failure of unknown type: #{protocol}" + end + + # Early termination? + return true unless ips.empty? + + # + # OK if it didn't look like an IP address then attempt to + # look it up, as both IPv4 and IPv6. + # + begin + + type = case protocol + when :ipv4 + Resolv::DNS::Resource::IN::A + when :ipv6 + Resolv::DNS::Resource::IN::AAAA + else + raise ArgumentError, "Sanity-checking DNS-failure of unknown type: #{protocol}" + end + + timeout(30) do + Resolv::DNS.open do |dns| + ips = dns.getresources(target, type) + end + end + rescue Timeout::Error => _e + # NOP + end + + + # + # At this point we either have: + # + # "ips" containing entries - because the hostname resolved + # + # "ips" being empty because the DNS failure was genuine + # + return ( ! ips.empty? ) + end + + + + # # Constructor # def initialize(line) @@ -337,8 +421,8 @@ module Custodian rescue Curl::Err::TooManyRedirectsError errors << "#{protocol_msg}: More than 10 redirections." rescue Curl::Err::HostResolutionError => x - # Log the DNS error-message. - resolution_errors << "#{protocol_msg}: #{x.class}: #{x.message}\n #{x.backtrace.join("\n ")}." + resolution_errors << "#{protocol_msg}: #{x.class}: #{x.message}\n #{x.backtrace.join("\n ")}." unless ignore_failure?( resolve_mode) + rescue => x errors << "#{protocol_msg}: #{x.class}: #{x.message}\n #{x.backtrace.join("\n ")}." end |