summaryrefslogtreecommitdiff
path: root/sunshine
diff options
context:
space:
mode:
authorKrzysztof Klinikowski <kkszysiu@gmail.com>2010-04-14 16:46:26 +0200
committerKrzysztof Klinikowski <kkszysiu@gmail.com>2010-04-14 16:46:26 +0200
commitc07646c432a593d37c54223d08d5607b19b9ee4b (patch)
tree4e06a9d5849d3562edc0b278f315698c983cbd16 /sunshine
parent01620e1797daadf3d61fd5ea862038b3b5648448 (diff)
HTML Entities in UTF-8 messages (still needs testing).
Diffstat (limited to 'sunshine')
-rw-r--r--sunshine/channel/text.py6
-rw-r--r--sunshine/connection.py6
-rw-r--r--sunshine/util/decorator.py40
3 files changed, 45 insertions, 7 deletions
diff --git a/sunshine/channel/text.py b/sunshine/channel/text.py
index 9d01f74..d37b6d0 100644
--- a/sunshine/channel/text.py
+++ b/sunshine/channel/text.py
@@ -24,7 +24,7 @@ import time
import telepathy
-from sunshine.util.decorator import async
+from sunshine.util.decorator import async, escape
from sunshine.handle import SunshineHandleFactory
__all__ = ['SunshineTextChannel']
@@ -47,7 +47,9 @@ class SunshineTextChannel(telepathy.server.ChannelTypeText):
if message_type == telepathy.CHANNEL_TEXT_MESSAGE_TYPE_NORMAL:
logger.info("Sending message to %s, id %s, body: '%s'" % (str(self.handle.name), str(self.handle.id), unicode(text)))
msg = text.decode('UTF-8').encode('windows-1250', 'replace')
- self.conn.gadu_client.sendTo(int(self.handle.name), str(text), str(msg))
+ #gg_text = escape(text.decode('UTF-8')).encode('UTF-8').replace('<', '&lt;').replace('>', '&gt;')
+ gg_text = text.decode('UTF-8', 'xmlcharrefreplace').replace('<', '&lt;').replace('>', '&gt;')
+ self.conn.gadu_client.sendTo(int(self.handle.name), str(gg_text), str(msg))
else:
raise telepathy.NotImplemented("Unhandled message type")
self.Sent(int(time.time()), message_type, text)
diff --git a/sunshine/connection.py b/sunshine/connection.py
index 87bda75..c480f71 100644
--- a/sunshine/connection.py
+++ b/sunshine/connection.py
@@ -42,7 +42,7 @@ from sunshine.handle import SunshineHandleFactory
from sunshine.capabilities import SunshineCapabilities
from sunshine.contacts import SunshineContacts
from sunshine.channel_manager import SunshineChannelManager
-from sunshine.util.decorator import async, stripHTML
+from sunshine.util.decorator import async, stripHTML, unescape
__all__ = ['SunshineConfig', 'GaduClientFactory', 'SunshineConnection']
@@ -658,9 +658,9 @@ class SunshineConnection(telepathy.server.Connection,
if msg.content.html_message:
#we need to strip all html tags
- text = stripHTML(msg.content.html_message).replace('&lt;', '<').replace('&gt;', '>')
+ text = unescape(stripHTML(msg.content.html_message))
else:
- text = (msg.content.plain_message).decode('windows-1250')
+ text = unescape((msg.content.plain_message).decode('windows-1250'))
message = "%s" % unicode(str(text).replace('\x00', '').replace('\r', ''))
diff --git a/sunshine/util/decorator.py b/sunshine/util/decorator.py
index fe53b33..7b86fbe 100644
--- a/sunshine/util/decorator.py
+++ b/sunshine/util/decorator.py
@@ -25,17 +25,53 @@ import time
import gobject
+import htmlentitydefs
import re
-__all__ = ['stripHTML', 'decorator', 'rw_property', 'deprecated', 'unstable', 'async',
+__all__ = ['unescape', 'escape', 'stripHTML', 'decorator', 'rw_property', 'deprecated', 'unstable', 'async',
'throttled']
+##
+# Removes HTML or XML character references and entities from a text string.
+#
+# @param text The HTML (or XML) source text.
+# @return The plain text, as a Unicode string, if necessary.
+def unescape(text):
+ def fixup(m):
+ text = m.group(0)
+ if text[:2] == "&#":
+ # character reference
+ try:
+ if text[:3] == "&#x":
+ return unichr(int(text[3:-1], 16))
+ else:
+ return unichr(int(text[2:-1]))
+ except ValueError:
+ pass
+ else:
+ # named entity
+ try:
+ text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
+ except KeyError:
+ pass
+ return text # leave as is
+ return re.sub("&#?\w+;", fixup, text)
+
+def escape(u):
+ htmlentities = list()
+
+ for c in u:
+ try:
+ htmlentities.append('&%s;' % htmlentitydefs.codepoint2name[ord(c)])
+ except KeyError:
+ htmlentities.append(c)
+ return ''.join(htmlentities)
+
def stripHTML(string):
"Replacing HTML-like tags from text."
p = re.compile(r'<.*?>')
return p.sub('', string)
-
def decorator(function):
"""decorator to be used on decorators, it preserves the docstring and
function attributes of functions to which it is applied."""