From 42c55a085d380465b340715c70c8f1aab82bef69 Mon Sep 17 00:00:00 2001 From: Brett Parker Date: Fri, 21 Dec 2007 00:31:37 +0000 Subject: [PATCH 1/1] * improved entity handling --- rss2maildir.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/rss2maildir.py b/rss2maildir.py index fd14826..641ffb1 100755 --- a/rss2maildir.py +++ b/rss2maildir.py @@ -157,10 +157,20 @@ class HTML2Text(HTMLParser): self.text = self.text + data.strip() + " " def handle_entityref(self, name): + entity = name if entities.has_key(name.lower()): - self.text = self.text + entities[name.lower()] + entity = entities[name.lower()] + elif name[0] == "#": + entity = unichr(int(name[1:])) else: - self.text = self.text + "&" + name + ";" + entity = "&" + name + ";" + + if self.inparagraph: + self.currentparagraph = self.currentparagraph + entity + elif self.inblockquote: + self.blockquote = self.blockquote + entity + else: + self.text = self.text + entity def gettext(self): data = self.text -- 2.39.5