From 7517d3b016968c154fea96215def1325a25b6bdc Mon Sep 17 00:00:00 2001 From: Brett Parker Date: Wed, 16 Apr 2008 23:45:07 +0100 Subject: [PATCH] Fix bug in character reference handling code --- rss2maildir.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/rss2maildir.py b/rss2maildir.py index ff2901c..84a9f09 100755 --- a/rss2maildir.py +++ b/rss2maildir.py @@ -559,7 +559,16 @@ class HTML2Text(HTMLParser): self.curdata = self.curdata + data.decode("utf-8") def handle_charref(self, name): - entity = unichr(int(name)) + try: + entity = unichr(int(name)) + except: + if entity[0] == 'x': + try: + entity = unichr(int('0%s' %(name,), 16)) + except: + entity = u'#%s' %(name,) + else: + entity = u'#%s' %(name,) self.curdata = self.curdata + unicode(entity.encode('utf-8'), \ "utf-8") -- 2.30.2