diff options
| author | Patrick J Cherry <patrick@bytemark.co.uk> | 2012-04-30 12:49:24 +0100 | 
|---|---|---|
| committer | Patrick J Cherry <patrick@bytemark.co.uk> | 2012-04-30 12:49:24 +0100 | 
| commit | 372547b31b51d871f104ae5679e98420a99a12c1 (patch) | |
| tree | f1f332dce11018adce79898e4614360f894e9162 /lib/mauve | |
| parent | e141a801070932e2925177bdf9f61c598d1c8dfd (diff) | |
Non-UTF8 stuff is now stripped.  Added encoding tests.
Diffstat (limited to 'lib/mauve')
| -rw-r--r-- | lib/mauve/alert.rb | 24 | 
1 files changed, 24 insertions, 0 deletions
| diff --git a/lib/mauve/alert.rb b/lib/mauve/alert.rb index 6b08f82..a5cb885 100644 --- a/lib/mauve/alert.rb +++ b/lib/mauve/alert.rb @@ -75,6 +75,20 @@ module Mauve      def size; 99; end      include DataMapper::Resource +    +    # +    # If a string matches this regex, it is valid UTF8. +    # +    UTF8_REGEXP = Regexp.new(/^(?:#{[ +         "[\x00-\x7F]",                        # ASCII +         "[\xC2-\xDF][\x80-\xBF]",             # non-overlong 2-byte +         "\xE0[\xA0-\xBF][\x80-\xBF]",         # excluding overlongs +         "[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}",  # straight 3-byte +         "\xED[\x80-\x9F][\x80-\xBF]",         # excluding surrogates +         "\xF0[\x90-\xBF][\x80-\xBF]{2}",      # planes 1-3 +         "[\xF1-\xF3][\x80-\xBF]{3}",          # planes 4-15 +         "\xF4[\x80-\x8F][\x80-\xBF]{2}"       # plane 16 +        ].join("|")})*$/)      property :id, Serial      property :alert_id, String, :required => true, :unique_index => :alert_index, :length=>256, :lazy => false @@ -618,6 +632,7 @@ module Mauve        # @return [String]        def remove_html(str, conf = Sanitize::Config::DEFAULT)          raise ArgumentError, "Expected a string, got a #{str.class}" unless str.is_a?(String) +        str = clean_utf8(str)          if str =~ /<[^0-9 <&.-]/            Sanitize.clean( str, conf ) @@ -631,9 +646,18 @@ module Mauve        # @param  [String] str String to clean        # @return [String]        def clean_html(str) +        str = clean_utf8(str)          remove_html(str, Sanitize::Config::RELAXED.merge({:remove_contents => true}))        end +      def clean_utf8(str) +        unless UTF8_REGEXP.match(str) +          str.gsub(/[^\x00-\x7F]/,'?') +        else +          str +        end +      end +        # All alerts currently raised        #        # @return [Array] | 
