X-Git-Url: https://git.sommitrealweird.co.uk/rss2maildir.git/blobdiff_plain/9337881574f8f9370fecabfeb9a52e4341568cd9..a53f1589ffaa52371e9eed15ff1a6ffd54997d89:/rss2maildir.py?ds=inline diff --git a/rss2maildir.py b/rss2maildir.py index 9473dd0..563da50 100755 --- a/rss2maildir.py +++ b/rss2maildir.py @@ -44,6 +44,8 @@ import md5 import cgi import dbm +import re + from HTMLParser import HTMLParser class HTML2Text(HTMLParser): @@ -557,7 +559,16 @@ class HTML2Text(HTMLParser): self.curdata = self.curdata + data.decode("utf-8") def handle_charref(self, name): - entity = unichr(int(name)) + try: + entity = unichr(int(name)) + except: + if name[0] == 'x': + try: + entity = unichr(int('0%s' %(name,), 16)) + except: + entity = u'#%s' %(name,) + else: + entity = u'#%s' %(name,) self.curdata = self.curdata + unicode(entity.encode('utf-8'), \ "utf-8") @@ -672,7 +683,10 @@ def parse_and_deliver(maildir, url, statedir): if item.has_key("content"): content = item["content"][0]["value"] else: - content = item["summary"] + if item.has_key("description"): + content = item["description"] + else: + content = u'' md5sum = md5.md5(content.encode("utf-8")).hexdigest() @@ -725,7 +739,10 @@ def parse_and_deliver(maildir, url, statedir): pass msg.add_header("Date", createddate) subj_gen = HTML2Text() - subj_gen.feed(item["title"].encode("utf-8")) + title = item["title"] + title = re.sub(u'<', u'<', title) + title = re.sub(u'>', u'>', title) + subj_gen.feed(title.encode("utf-8")) msg.add_header("Subject", subj_gen.gettext()) msg.set_default_type("text/plain")