summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSimon McVittie <simon.mcvittie@collabora.co.uk>2013-04-22 18:18:30 +0100
committerSimon McVittie <simon.mcvittie@collabora.co.uk>2013-04-24 14:45:42 +0100
commit3e0498048df554bfaa30c42aef1220f2b7135ed3 (patch)
tree3575a29c91e0db20e1ec8be4d6ebeb1a9b1c273f
parentb0dc6e3fb9754354b4b29476217c77277d16dd23 (diff)
messages/invalid-utf8.py: amend test-case to work under GLib 2.36
Reviewed-by: Guillaume Desmottes <guillaume.desmottes@collabora.co.uk>
-rw-r--r--tests/twisted/messages/invalid-utf8.py29
1 files changed, 20 insertions, 9 deletions
diff --git a/tests/twisted/messages/invalid-utf8.py b/tests/twisted/messages/invalid-utf8.py
index 9f3d057..a48c2f4 100644
--- a/tests/twisted/messages/invalid-utf8.py
+++ b/tests/twisted/messages/invalid-utf8.py
@@ -1,27 +1,31 @@
# coding=utf-8
"""
-Test that incoming messages containing well-formed but invalid UTF-8 code
-points don't make Idle fall off the bus. This is a regression test for
-<https://bugs.freedesktop.org/show_bug.cgi?id=30741>.
+Test that incoming messages containing invalid UTF-8
+don't make Idle fall off the bus. This is a regression test for
+bugs similar to <https://bugs.freedesktop.org/show_bug.cgi?id=30741>.
"""
from idletest import exec_test
from servicetest import assertEquals
+import re
def test(q, bus, conn, stream):
conn.Connect()
q.expect('dbus-signal', signal='StatusChanged', args=[0, 1])
test_with_message(q, stream, ["I'm no ", " Buddhist"])
- # Check that valid exotic characters don't get lost
- test_with_message(q, stream, [u"björk"] * 5)
+ test_with_message(q, stream, [u"björk"] * 3)
test_with_message(q, stream, ["", "lolllllll"])
test_with_message(q, stream, ["hello", ""])
test_with_message(q, stream, "I am a stabbing robot".split(" "))
-# This is the UTF-8 encoding of U+FDD2, which is not a valid Unicode character.
-WELL_FORMED_BUT_INVALID_UTF8_BYTES = "\xef\xb7\x92"
+# This is the UTF-8 encoding of U+D800, which is not valid
+# (not even as a noncharacter). We previously did this test with
+# noncharacters, but Unicode Corrigendum #9 explicitly allows noncharacters
+# to be interchanged, GLib 2.36 allows them when validating UTF-8,
+# and D-Bus 1.6.10 will do likewise.
+WELL_FORMED_BUT_INVALID_UTF8_BYTES = "\xed\xa0\x80"
def test_with_message(q, stream, parts):
invalid_utf8 = WELL_FORMED_BUT_INVALID_UTF8_BYTES.join(
@@ -42,10 +46,17 @@ def test_with_message(q, stream, parts):
# Don't make any assumption about how many U+FFFD REPLACEMENT CHARACTERs
# are used to replace surprising bytes.
- received_parts = [ part for part in content.split(u"\ufffd")
+ received_parts = [ part for part in re.split(u"\ufffd|\\?", content)
if part != u''
]
- assertEquals(filter(lambda s: s != u'', parts), received_parts)
+
+ if parts[0] == u'björk':
+ # The valid UTF-8 gets lost in transit, because we fall back
+ # to assuming ASCII when g_convert() fails (this didn't happen
+ # when we tested with noncharacters - oh well).
+ assertEquals(['bj', 'rk', 'bj', 'rk', 'bj', 'rk'], received_parts)
+ else:
+ assertEquals(filter(lambda s: s != u'', parts), received_parts)
if __name__ == '__main__':
exec_test(test)