X-Git-Url: https://git.sommitrealweird.co.uk/rss2maildir.git/blobdiff_plain/f92907698bb8e9edcdfc09b557c5e1ef2fbc22c1..478a1d0d963fbcd839f2fe42662dfbad6967705c:/rss2maildir.py diff --git a/rss2maildir.py b/rss2maildir.py index 47ba9dc..ff2901c 100755 --- a/rss2maildir.py +++ b/rss2maildir.py @@ -44,6 +44,8 @@ import md5 import cgi import dbm +import re + from HTMLParser import HTMLParser class HTML2Text(HTMLParser): @@ -556,12 +558,15 @@ class HTML2Text(HTMLParser): self.opentags.append(u'p') self.curdata = self.curdata + data.decode("utf-8") + def handle_charref(self, name): + entity = unichr(int(name)) + self.curdata = self.curdata + unicode(entity.encode('utf-8'), \ + "utf-8") + def handle_entityref(self, name): entity = name if HTML2Text.entities.has_key(name): entity = HTML2Text.entities[name] - elif name[0] == "#": - entity = unichr(int(name[1:])) else: entity = "&" + name + ";" @@ -722,7 +727,10 @@ def parse_and_deliver(maildir, url, statedir): pass msg.add_header("Date", createddate) subj_gen = HTML2Text() - subj_gen.feed(item["title"].encode("utf-8")) + title = item["title"].encode("utf-8") + title = re.sub(u'<', u'<', title) + title = re.sub(u'>', u'>', title) + subj_gen.feed(title) msg.add_header("Subject", subj_gen.gettext()) msg.set_default_type("text/plain")