projects
/
rss2maildir.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
Fix some entity handling
[rss2maildir.git]
/
rss2maildir.py
diff --git
a/rss2maildir.py
b/rss2maildir.py
index 86387f96ac01c2d8c1dfa12e9e6f4efddef92d27..9473dd009e3d5ebead31933d7ba110abd4202162 100755
(executable)
--- a/
rss2maildir.py
+++ b/
rss2maildir.py
@@
-188,7
+188,7
@@
class HTML2Text(HTMLParser):
u'dt',
u'dd',
u'div',
u'dt',
u'dd',
u'div',
-
#
u'blockquote',
+ u'blockquote',
]
liststarttags = [
]
liststarttags = [
@@
-407,11
+407,11
@@
class HTML2Text(HTMLParser):
quote = unicode( \
" ".join(self.curdata.encode("utf-8").strip().split()), \
"utf-8")
quote = unicode( \
" ".join(self.curdata.encode("utf-8").strip().split()), \
"utf-8")
- seperator = u'\n' + u' ' * self.indentlevel + u'
>
'
+ seperator = u'\n' + u' ' * self.indentlevel + u'
'
if len(self.text) > 0 and self.text[-1] != u'\n':
self.text = self.text + u'\n'
self.text = self.text \
if len(self.text) > 0 and self.text[-1] != u'\n':
self.text = self.text + u'\n'
self.text = self.text \
- + u'
>
' \
+ + u'
' \
+ seperator.join( \
textwrap.wrap( \
quote, \
+ seperator.join( \
textwrap.wrap( \
quote, \
@@
-556,12
+556,15
@@
class HTML2Text(HTMLParser):
self.opentags.append(u'p')
self.curdata = self.curdata + data.decode("utf-8")
self.opentags.append(u'p')
self.curdata = self.curdata + data.decode("utf-8")
+ def handle_charref(self, name):
+ entity = unichr(int(name))
+ self.curdata = self.curdata + unicode(entity.encode('utf-8'), \
+ "utf-8")
+
def handle_entityref(self, name):
entity = name
if HTML2Text.entities.has_key(name):
entity = HTML2Text.entities[name]
def handle_entityref(self, name):
entity = name
if HTML2Text.entities.has_key(name):
entity = HTML2Text.entities[name]
- elif name[0] == "#":
- entity = unichr(int(name[1:]))
else:
entity = "&" + name + ";"
else:
entity = "&" + name + ";"