diff options
author | Patrick J Cherry <patrick@bytemark.co.uk> | 2012-04-30 12:49:24 +0100 |
---|---|---|
committer | Patrick J Cherry <patrick@bytemark.co.uk> | 2012-04-30 12:49:24 +0100 |
commit | 372547b31b51d871f104ae5679e98420a99a12c1 (patch) | |
tree | f1f332dce11018adce79898e4614360f894e9162 | |
parent | e141a801070932e2925177bdf9f61c598d1c8dfd (diff) |
Non-UTF8 stuff is now stripped. Added encoding tests.
-rw-r--r-- | lib/mauve/alert.rb | 24 | ||||
-rw-r--r-- | test/tc_mauve_database_peculiarities.rb | 85 |
2 files changed, 109 insertions, 0 deletions
diff --git a/lib/mauve/alert.rb b/lib/mauve/alert.rb index 6b08f82..a5cb885 100644 --- a/lib/mauve/alert.rb +++ b/lib/mauve/alert.rb @@ -75,6 +75,20 @@ module Mauve def size; 99; end include DataMapper::Resource + + # + # If a string matches this regex, it is valid UTF8. + # + UTF8_REGEXP = Regexp.new(/^(?:#{[ + "[\x00-\x7F]", # ASCII + "[\xC2-\xDF][\x80-\xBF]", # non-overlong 2-byte + "\xE0[\xA0-\xBF][\x80-\xBF]", # excluding overlongs + "[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}", # straight 3-byte + "\xED[\x80-\x9F][\x80-\xBF]", # excluding surrogates + "\xF0[\x90-\xBF][\x80-\xBF]{2}", # planes 1-3 + "[\xF1-\xF3][\x80-\xBF]{3}", # planes 4-15 + "\xF4[\x80-\x8F][\x80-\xBF]{2}" # plane 16 + ].join("|")})*$/) property :id, Serial property :alert_id, String, :required => true, :unique_index => :alert_index, :length=>256, :lazy => false @@ -618,6 +632,7 @@ module Mauve # @return [String] def remove_html(str, conf = Sanitize::Config::DEFAULT) raise ArgumentError, "Expected a string, got a #{str.class}" unless str.is_a?(String) + str = clean_utf8(str) if str =~ /<[^0-9 <&.-]/ Sanitize.clean( str, conf ) @@ -631,9 +646,18 @@ module Mauve # @param [String] str String to clean # @return [String] def clean_html(str) + str = clean_utf8(str) remove_html(str, Sanitize::Config::RELAXED.merge({:remove_contents => true})) end + def clean_utf8(str) + unless UTF8_REGEXP.match(str) + str.gsub(/[^\x00-\x7F]/,'?') + else + str + end + end + # All alerts currently raised # # @return [Array] diff --git a/test/tc_mauve_database_peculiarities.rb b/test/tc_mauve_database_peculiarities.rb new file mode 100644 index 0000000..9eb612e --- /dev/null +++ b/test/tc_mauve_database_peculiarities.rb @@ -0,0 +1,85 @@ +$:.unshift "../lib" + +require 'th_mauve' +require 'mauve/datamapper' +require 'mauve/server' +require 'mauve/configuration' +require 'mauve/configuration_builder' +require 'mauve/configuration_builders' +require 'iconv' + +class TcMauveDatabasePeculiarities < Mauve::UnitTest + include Mauve + + def setup + super + setup_database + @temp_db = "mauve_test.#{10.times.collect{ rand(36).to_s(36) }.join}" + end + + def teardown + teardown_database + super + end + + def test_encoding + # + # Don't test unless the DB URL has been set. + # + return unless @db_url + + config=<<EOF +server { + database "#{@db_url}" +} +EOF + + Configuration.current = ConfigurationBuilder.parse(config) + Server.instance.setup + + x = Hash.new + x["en"] = "Please rush me my portable walrus polishing kit!" + x["fi"] = "Ole hyvä kiirehtiä minulle kannettavan mursu kiillotukseen pakki!" + x["jp"] = "私に私のポータブルセイウチの研磨キットを急いでください!" + + %w(UTF-8 WINDOWS-1252 SHIFT-JIS).each do |enc| + x.each do |lang, str| + assert_nothing_raised("Failed to use iconv to convert to #{enc}") { str = Iconv.conv(enc+"//IGNORE", "utf8", str) } + + alert = Alert.new( + :alert_id => "#{lang}:#{enc}", + :source => "test", + :subject => str + ) + + assert_nothing_raised("failed to insert #{enc}") { alert.save } + end + end + end +end + + + +class TcMauveDatabasePostgresPeculiarities < TcMauveDatabasePeculiarities + def setup + super + system("createdb #{@temp_db} --encoding UTF8") || flunk("Failed to create #{@temp_db}") + # @pg_conn = PGconn.open(:dbname => @temp_db) + @db_url = "postgres:///#{@temp_db}" + end + + def teardown + # @pg_conn.finish if @pg_conn.is_a?(PGconn) and @pg_conn.status == PGconn::CONNECTION_OK + super + (system("dropdb #{@temp_db}") || puts("Failed to drop #{@temp_db}")) if @temp_db + end +end + +class TcMauveDatabaseSqlite3Peculiarities < TcMauveDatabasePeculiarities + def setup + super + # @pg_conn = PGconn.open(:dbname => @temp_db) + @db_url = "sqlite3::memory:" + end +end + |