diff options
author | Krzysztof Klinikowski <kkszysiu@gmail.com> | 2010-04-14 16:46:26 +0200 |
---|---|---|
committer | Krzysztof Klinikowski <kkszysiu@gmail.com> | 2010-04-14 16:46:26 +0200 |
commit | c07646c432a593d37c54223d08d5607b19b9ee4b (patch) | |
tree | 4e06a9d5849d3562edc0b278f315698c983cbd16 /sunshine | |
parent | 01620e1797daadf3d61fd5ea862038b3b5648448 (diff) |
HTML Entities in UTF-8 messages (still needs testing).
Diffstat (limited to 'sunshine')
-rw-r--r-- | sunshine/channel/text.py | 6 | ||||
-rw-r--r-- | sunshine/connection.py | 6 | ||||
-rw-r--r-- | sunshine/util/decorator.py | 40 |
3 files changed, 45 insertions, 7 deletions
diff --git a/sunshine/channel/text.py b/sunshine/channel/text.py index 9d01f74..d37b6d0 100644 --- a/sunshine/channel/text.py +++ b/sunshine/channel/text.py @@ -24,7 +24,7 @@ import time import telepathy -from sunshine.util.decorator import async +from sunshine.util.decorator import async, escape from sunshine.handle import SunshineHandleFactory __all__ = ['SunshineTextChannel'] @@ -47,7 +47,9 @@ class SunshineTextChannel(telepathy.server.ChannelTypeText): if message_type == telepathy.CHANNEL_TEXT_MESSAGE_TYPE_NORMAL: logger.info("Sending message to %s, id %s, body: '%s'" % (str(self.handle.name), str(self.handle.id), unicode(text))) msg = text.decode('UTF-8').encode('windows-1250', 'replace') - self.conn.gadu_client.sendTo(int(self.handle.name), str(text), str(msg)) + #gg_text = escape(text.decode('UTF-8')).encode('UTF-8').replace('<', '<').replace('>', '>') + gg_text = text.decode('UTF-8', 'xmlcharrefreplace').replace('<', '<').replace('>', '>') + self.conn.gadu_client.sendTo(int(self.handle.name), str(gg_text), str(msg)) else: raise telepathy.NotImplemented("Unhandled message type") self.Sent(int(time.time()), message_type, text) diff --git a/sunshine/connection.py b/sunshine/connection.py index 87bda75..c480f71 100644 --- a/sunshine/connection.py +++ b/sunshine/connection.py @@ -42,7 +42,7 @@ from sunshine.handle import SunshineHandleFactory from sunshine.capabilities import SunshineCapabilities from sunshine.contacts import SunshineContacts from sunshine.channel_manager import SunshineChannelManager -from sunshine.util.decorator import async, stripHTML +from sunshine.util.decorator import async, stripHTML, unescape __all__ = ['SunshineConfig', 'GaduClientFactory', 'SunshineConnection'] @@ -658,9 +658,9 @@ class SunshineConnection(telepathy.server.Connection, if msg.content.html_message: #we need to strip all html tags - text = stripHTML(msg.content.html_message).replace('<', '<').replace('>', '>') + text = unescape(stripHTML(msg.content.html_message)) else: - text = (msg.content.plain_message).decode('windows-1250') + text = unescape((msg.content.plain_message).decode('windows-1250')) message = "%s" % unicode(str(text).replace('\x00', '').replace('\r', '')) diff --git a/sunshine/util/decorator.py b/sunshine/util/decorator.py index fe53b33..7b86fbe 100644 --- a/sunshine/util/decorator.py +++ b/sunshine/util/decorator.py @@ -25,17 +25,53 @@ import time import gobject +import htmlentitydefs import re -__all__ = ['stripHTML', 'decorator', 'rw_property', 'deprecated', 'unstable', 'async', +__all__ = ['unescape', 'escape', 'stripHTML', 'decorator', 'rw_property', 'deprecated', 'unstable', 'async', 'throttled'] +## +# Removes HTML or XML character references and entities from a text string. +# +# @param text The HTML (or XML) source text. +# @return The plain text, as a Unicode string, if necessary. +def unescape(text): + def fixup(m): + text = m.group(0) + if text[:2] == "&#": + # character reference + try: + if text[:3] == "&#x": + return unichr(int(text[3:-1], 16)) + else: + return unichr(int(text[2:-1])) + except ValueError: + pass + else: + # named entity + try: + text = unichr(htmlentitydefs.name2codepoint[text[1:-1]]) + except KeyError: + pass + return text # leave as is + return re.sub("&#?\w+;", fixup, text) + +def escape(u): + htmlentities = list() + + for c in u: + try: + htmlentities.append('&%s;' % htmlentitydefs.codepoint2name[ord(c)]) + except KeyError: + htmlentities.append(c) + return ''.join(htmlentities) + def stripHTML(string): "Replacing HTML-like tags from text." p = re.compile(r'<.*?>') return p.sub('', string) - def decorator(function): """decorator to be used on decorators, it preserves the docstring and function attributes of functions to which it is applied.""" |