projects
/
rss2maildir.git
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
| inline |
side by side
(parent:
478a1d0
)
Fix bug in character reference handling code
author
Brett Parker
<iDunno@sommitrealweird.co.uk>
Wed, 16 Apr 2008 22:45:07 +0000
(23:45 +0100)
committer
Brett Parker
<iDunno@sommitrealweird.co.uk>
Wed, 16 Apr 2008 22:45:07 +0000
(23:45 +0100)
rss2maildir.py
patch
|
blob
|
history
diff --git
a/rss2maildir.py
b/rss2maildir.py
index ff2901c5ffab0414fa563975801c5e6fd76d315f..84a9f0964408cd9f216d45be9b1921302f3312df 100755
(executable)
--- a/
rss2maildir.py
+++ b/
rss2maildir.py
@@
-559,7
+559,16
@@
class HTML2Text(HTMLParser):
self.curdata = self.curdata + data.decode("utf-8")
def handle_charref(self, name):
- entity = unichr(int(name))
+ try:
+ entity = unichr(int(name))
+ except:
+ if entity[0] == 'x':
+ try:
+ entity = unichr(int('0%s' %(name,), 16))
+ except:
+ entity = u'#%s' %(name,)
+ else:
+ entity = u'#%s' %(name,)
self.curdata = self.curdata + unicode(entity.encode('utf-8'), \
"utf-8")