From: Brett Parker Date: Fri, 21 Dec 2007 00:31:37 +0000 (+0000) Subject: * improved entity handling X-Git-Url: https://git.sommitrealweird.co.uk/rss2maildir.git/commitdiff_plain/42c55a085d380465b340715c70c8f1aab82bef69?ds=inline;hp=2408a39553e5f6d8de18325a16f67b70595f6dac * improved entity handling --- diff --git a/rss2maildir.py b/rss2maildir.py index fd14826..641ffb1 100755 --- a/rss2maildir.py +++ b/rss2maildir.py @@ -157,10 +157,20 @@ class HTML2Text(HTMLParser): self.text = self.text + data.strip() + " " def handle_entityref(self, name): + entity = name if entities.has_key(name.lower()): - self.text = self.text + entities[name.lower()] + entity = entities[name.lower()] + elif name[0] == "#": + entity = unichr(int(name[1:])) else: - self.text = self.text + "&" + name + ";" + entity = "&" + name + ";" + + if self.inparagraph: + self.currentparagraph = self.currentparagraph + entity + elif self.inblockquote: + self.blockquote = self.blockquote + entity + else: + self.text = self.text + entity def gettext(self): data = self.text