aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorAlex Young <alex@bytemark.co.uk>2015-04-16 12:32:55 +0100
committerAlex Young <alex@bytemark.co.uk>2015-04-16 12:32:55 +0100
commit0c20e0fa286fdd948cf941a03a350fe28e2b751e (patch)
tree4ab9f0cb3b39afbc256c9be27c858ea9eb0c14a3 /lib
parentaffb4ec63d93eb0c9b4e6eec56fb2dc741918e1d (diff)
Use 1.9's Encoding to do do away with UTF8-checking in mauve/alert.rb
Diffstat (limited to 'lib')
-rw-r--r--lib/mauve/alert.rb25
1 files changed, 2 insertions, 23 deletions
diff --git a/lib/mauve/alert.rb b/lib/mauve/alert.rb
index 4cee65f..d27b5d4 100644
--- a/lib/mauve/alert.rb
+++ b/lib/mauve/alert.rb
@@ -1,3 +1,4 @@
+# encoding: utf-8
require 'mauve/proto'
require 'mauve/alert_changed'
require 'mauve/history'
@@ -80,22 +81,6 @@ module Mauve
include DataMapper::Resource
- #
- # If a string matches this regex, it is valid UTF8. This regex is
- # in ASCII-8BIT, so we have to force the encoding of the string to
- # match it.
- #
- UTF8_REGEXP = Regexp.new(/^(?:#{[
- "[\x00-\x7F]", # ASCII
- "[\xC2-\xDF][\x80-\xBF]", # non-overlong 2-byte
- "\xE0[\xA0-\xBF][\x80-\xBF]", # excluding overlongs
- "[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}", # straight 3-byte
- "\xED[\x80-\x9F][\x80-\xBF]", # excluding surrogates
- "\xF0[\x90-\xBF][\x80-\xBF]{2}", # planes 1-3
- "[\xF1-\xF3][\x80-\xBF]{3}", # planes 4-15
- "\xF4[\x80-\x8F][\x80-\xBF]{2}" # plane 16
- ].join("|")})*$/)
-
property :id, Serial
property :alert_id, String, :required => true, :unique_index => :alert_index, :length=>256, :lazy => false
property :source, String, :required => true, :unique_index => :alert_index, :length=>512, :lazy => false
@@ -702,13 +687,7 @@ module Mauve
end
def clean_utf8(str)
- # We're explicitly throwing away non-valid data here.
- forced = str.force_encoding("ASCII-8BIT")
- unless UTF8_REGEXP.match(str)
- str.gsub(/[^\x00-\x7F]/,'?')
- else
- str
- end
+ str.encode("utf-8", :invalid => :replace, :replace => '?', :undef => :replace)
end
# All alerts currently raised