aboutsummaryrefslogtreecommitdiff
path: root/lib/mauve/source_list.rb
blob: 415482152e3bcc1f20b048074cc43cf85a23bb7d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
# encoding: UTF-8
require 'log4r'
require 'ipaddress'
require 'uri'
require 'mauve/mauve_time'
require 'mauve/mauve_resolv'
require 'mauve/generic_http_api_client'
require 'mauve/configuration'

module Mauve

  # A simple construct to match sources.
  #
  # One can ask if an IPv4, IPv6, hostname or url (match on hostname only) is
  # contained within a list.  If the query is not an IP address, it will be
  # converted into one as the checks are made.
  #
  # Note that the matching is greedy.  When a hostname maps to several IP
  # addresses and only one of tbhose is included in the list, a match 
  # will occur.
  #
  class SourceList

    include GenericHttpApiClient

    attr_reader :label, :last_resolved_at

    ## Default contructor.
    def initialize (label, url = nil)
      @label            = label
      @last_resolved_at = nil
      @list = []
      @resolved_list = []
      @url = url
    end

    alias username label

    # Adds a source onto the list.
    #
    # The source can be a string, or array of strings.  Each one can be an IPv6
    # or IPv4 address or range, or a hostname.
    #
    # Hostnames can have *, or numeric ranges in their name.  A '*' represents
    # any character except ".".  A range can be specified as 1..4, meaning 1,
    # 2, 3 or 4.
    #
    # e.g.  1.2.3.4/24
    #       2001:dead::beef/64
    #       app1..10.my-customer.com
    #       *.db.my-customer.com
    #
    # Hostnames are also resolved into IP addresses, and re-resolved every 30
    # minutes.
    #
    # @param [String or Array] l The source(s) to add.
    # @return [SourceList]
    #
    def +(l)
      arr = [l].flatten.collect do |h|
        do_parse_source(h)
      end.flatten.compact

      arr.each do |source|
        ##
        # I would use include? here, but IPAddr tries to convert "foreign"
        # classes to intgers, and RegExp doesn't have a to_i method..
        #
        if @list.any?{|e| source.is_a?(e.class) and source == e}
          logger.warn "#{source} is already on the #{self.label} list"
        else
          @list << source
        end
      end

      @resolved_list    = [] 
      @last_resolved_at = nil

      self
    end

    alias add_to_list +

    # @return [Log4r::Logger]
    def logger
      @logger ||= Log4r::Logger.new self.class.to_s
    end

    def list
      #
      # Redo resolution every thirty minutes
      #
      resolve if @resolved_list.empty? or @last_resolved_at.nil? or (Time.now - 1800) > @last_resolved_at

      @resolved_list
    end

    # 
    # Return whether or not a list contains a source.
    #
    # First the hostname is checked for a URI, using URI#parse, and then the
    # hostname is extracted from there.  If that fails, the original hostname
    # is used.
    #
    # Next we check against our list, including all IPs for any hostnames in
    # that list.
    #
    # If nothing is found, the hostname is then resolved to its IPs, and we
    # check to see if those IPs are in our list. 
    #
    # @param [String] host The host to look for.
    # @return [Boolean]
    def includes?(host)
      #
      # Pick out hostnames from URIs.
      #
      if host =~ /^[a-z][a-z0-9+-]+:\/\//
        begin      
          uri = URI.parse(host)
          host = uri.host unless uri.host.nil?
          host = $1 if host =~ /^\[([0-9a-f:]+)\]$/i
        rescue URI::InvalidURIError => ex
          # ugh
          logger.warn "Did not recognise URI #{host}"
        end
      end

      host_as_ip = nil

      begin
        host_as_ip = IPAddress.parse(host)
      rescue ArgumentError
        # Rescue IPAddress argument errors, i.e. host is not an IP address.
      end

      return true if self.list.any? do |l|
        case l
          when String
            host == l
          when Regexp
            host =~ l
          when IPAddress 
            host_as_ip.is_a?(l.class) and l.include?(host_as_ip)
          else
            false
        end
      end

      #
      # To cut down the amount of DNS queries, we'll bail out at this point.
      #
      return false if true == Configuration.current.minimal_dns_lookups

      return false unless self.list.any?{|l| l.is_a?(IPAddress)}

      ips = MauveResolv.get_ips_for(host).collect{|i| IPAddress.parse(i)}

      return false if ips.empty?

      return self.list.select{|i| i.is_a?(IPAddress)}.any? do |list_ip|
        ips.any?{|ip| ip.is_a?(list_ip.class) and list_ip.include?(ip) }
      end
      
      return false
    end

    # 
    # Resolve all hostnames in the list to IP addresses.
    #
    # @return [Array] The new list.
    #
    def resolve
      @last_resolved_at = Time.now

      if true == Configuration.current.minimal_dns_lookups

        new_list = [] + @list

      else

        url_list = []
        if @url
          url_list_s = do_get(@url)
          if url_list_s.is_a?(String)
            url_list = url_list_s.split("\n").collect{|s| do_parse_source(s)}.flatten.compact
          end
        end

        new_list = (url_list + @list).collect do |host| 
          if host.is_a?(String)
            [host] + MauveResolv.get_ips_for(host).collect{|i| IPAddress.parse(i)}
          else
            host
          end
        end

      end

      @resolved_list = new_list.flatten
    end

    private

    def do_parse_source(h)
      # "*"              means [^\.]+
      # "(\d+)\.\.(\d+)" is expanded to every integer between $1 and $2
      #                  joined by a pipe, e.g. 1..5 means 1|2|3|4|5
      #  "."              is literal, not a single-character match
      if h.is_a?(String) and (h =~ /[\[\]\*]/ or h =~ /(\d+)\.\.(\d+)/)
        Regexp.new(
            h.
            gsub(/(\d+)\.\.(\d+)/) { |a,b|
              ($1.to_i..$2.to_i).collect.join("|")
            }.
            gsub(/\./, "\\.").
            gsub(/\*/, "[0-9a-z\\-]+") +
            "\\.?$")
      elsif h.is_a?(String) and h =~ /^[0-9a-f\.:]+(\/\d+)?$/i
        IPAddress.parse(h)
      elsif h.is_a?(String) and h =~ /^\/(.*)\/$/
        Regexp.new($1)
      elsif h.is_a?(String) or h.is_a?(Regexp)
        h
      else
        logger.warn "Cannot parse source line #{h.inspect} for source list #{@label}."
        nil
      end

    end

  end

end