aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick J Cherry <patrick@bytemark.co.uk>2012-04-30 12:49:24 +0100
committerPatrick J Cherry <patrick@bytemark.co.uk>2012-04-30 12:49:24 +0100
commit372547b31b51d871f104ae5679e98420a99a12c1 (patch)
treef1f332dce11018adce79898e4614360f894e9162
parente141a801070932e2925177bdf9f61c598d1c8dfd (diff)
Non-UTF8 stuff is now stripped. Added encoding tests.
-rw-r--r--lib/mauve/alert.rb24
-rw-r--r--test/tc_mauve_database_peculiarities.rb85
2 files changed, 109 insertions, 0 deletions
diff --git a/lib/mauve/alert.rb b/lib/mauve/alert.rb
index 6b08f82..a5cb885 100644
--- a/lib/mauve/alert.rb
+++ b/lib/mauve/alert.rb
@@ -75,6 +75,20 @@ module Mauve
def size; 99; end
include DataMapper::Resource
+
+ #
+ # If a string matches this regex, it is valid UTF8.
+ #
+ UTF8_REGEXP = Regexp.new(/^(?:#{[
+ "[\x00-\x7F]", # ASCII
+ "[\xC2-\xDF][\x80-\xBF]", # non-overlong 2-byte
+ "\xE0[\xA0-\xBF][\x80-\xBF]", # excluding overlongs
+ "[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}", # straight 3-byte
+ "\xED[\x80-\x9F][\x80-\xBF]", # excluding surrogates
+ "\xF0[\x90-\xBF][\x80-\xBF]{2}", # planes 1-3
+ "[\xF1-\xF3][\x80-\xBF]{3}", # planes 4-15
+ "\xF4[\x80-\x8F][\x80-\xBF]{2}" # plane 16
+ ].join("|")})*$/)
property :id, Serial
property :alert_id, String, :required => true, :unique_index => :alert_index, :length=>256, :lazy => false
@@ -618,6 +632,7 @@ module Mauve
# @return [String]
def remove_html(str, conf = Sanitize::Config::DEFAULT)
raise ArgumentError, "Expected a string, got a #{str.class}" unless str.is_a?(String)
+ str = clean_utf8(str)
if str =~ /<[^0-9 <&.-]/
Sanitize.clean( str, conf )
@@ -631,9 +646,18 @@ module Mauve
# @param [String] str String to clean
# @return [String]
def clean_html(str)
+ str = clean_utf8(str)
remove_html(str, Sanitize::Config::RELAXED.merge({:remove_contents => true}))
end
+ def clean_utf8(str)
+ unless UTF8_REGEXP.match(str)
+ str.gsub(/[^\x00-\x7F]/,'?')
+ else
+ str
+ end
+ end
+
# All alerts currently raised
#
# @return [Array]
diff --git a/test/tc_mauve_database_peculiarities.rb b/test/tc_mauve_database_peculiarities.rb
new file mode 100644
index 0000000..9eb612e
--- /dev/null
+++ b/test/tc_mauve_database_peculiarities.rb
@@ -0,0 +1,85 @@
+$:.unshift "../lib"
+
+require 'th_mauve'
+require 'mauve/datamapper'
+require 'mauve/server'
+require 'mauve/configuration'
+require 'mauve/configuration_builder'
+require 'mauve/configuration_builders'
+require 'iconv'
+
+class TcMauveDatabasePeculiarities < Mauve::UnitTest
+ include Mauve
+
+ def setup
+ super
+ setup_database
+ @temp_db = "mauve_test.#{10.times.collect{ rand(36).to_s(36) }.join}"
+ end
+
+ def teardown
+ teardown_database
+ super
+ end
+
+ def test_encoding
+ #
+ # Don't test unless the DB URL has been set.
+ #
+ return unless @db_url
+
+ config=<<EOF
+server {
+ database "#{@db_url}"
+}
+EOF
+
+ Configuration.current = ConfigurationBuilder.parse(config)
+ Server.instance.setup
+
+ x = Hash.new
+ x["en"] = "Please rush me my portable walrus polishing kit!"
+ x["fi"] = "Ole hyvä kiirehtiä minulle kannettavan mursu kiillotukseen pakki!"
+ x["jp"] = "私に私のポータブルセイウチの研磨キットを急いでください!"
+
+ %w(UTF-8 WINDOWS-1252 SHIFT-JIS).each do |enc|
+ x.each do |lang, str|
+ assert_nothing_raised("Failed to use iconv to convert to #{enc}") { str = Iconv.conv(enc+"//IGNORE", "utf8", str) }
+
+ alert = Alert.new(
+ :alert_id => "#{lang}:#{enc}",
+ :source => "test",
+ :subject => str
+ )
+
+ assert_nothing_raised("failed to insert #{enc}") { alert.save }
+ end
+ end
+ end
+end
+
+
+
+class TcMauveDatabasePostgresPeculiarities < TcMauveDatabasePeculiarities
+ def setup
+ super
+ system("createdb #{@temp_db} --encoding UTF8") || flunk("Failed to create #{@temp_db}")
+ # @pg_conn = PGconn.open(:dbname => @temp_db)
+ @db_url = "postgres:///#{@temp_db}"
+ end
+
+ def teardown
+ # @pg_conn.finish if @pg_conn.is_a?(PGconn) and @pg_conn.status == PGconn::CONNECTION_OK
+ super
+ (system("dropdb #{@temp_db}") || puts("Failed to drop #{@temp_db}")) if @temp_db
+ end
+end
+
+class TcMauveDatabaseSqlite3Peculiarities < TcMauveDatabasePeculiarities
+ def setup
+ super
+ # @pg_conn = PGconn.open(:dbname => @temp_db)
+ @db_url = "sqlite3::memory:"
+ end
+end
+