X-Git-Url: https://git.sommitrealweird.co.uk/rss2maildir.git/blobdiff_plain/30903dc148a14cd0f59fb1e40132092e389aec4b..9337881574f8f9370fecabfeb9a52e4341568cd9:/rss2maildir.py diff --git a/rss2maildir.py b/rss2maildir.py index d36b489..9473dd0 100755 --- a/rss2maildir.py +++ b/rss2maildir.py @@ -188,7 +188,7 @@ class HTML2Text(HTMLParser): u'dt', u'dd', u'div', - #u'blockquote', + u'blockquote', ] liststarttags = [ @@ -313,12 +313,11 @@ class HTML2Text(HTMLParser): else: while self.images.has_key(alt): alt = alt + "_" - self.images[alt]["url"] = url + self.images[alt] = {"url": url} self.curdata = self.curdata \ + u'|%s|' %(alt,) else: - self.images[alt] = {} - self.images[alt]["url"] = url + self.images[alt] = {"url": url} self.curdata = self.curdata \ + u'|%s|' %(alt,) else: @@ -408,11 +407,11 @@ class HTML2Text(HTMLParser): quote = unicode( \ " ".join(self.curdata.encode("utf-8").strip().split()), \ "utf-8") - seperator = u'\n' + u' ' * self.indentlevel + u'> ' + seperator = u'\n' + u' ' * self.indentlevel + u' ' if len(self.text) > 0 and self.text[-1] != u'\n': self.text = self.text + u'\n' self.text = self.text \ - + u'> ' \ + + u' ' \ + seperator.join( \ textwrap.wrap( \ quote, \ @@ -557,12 +556,15 @@ class HTML2Text(HTMLParser): self.opentags.append(u'p') self.curdata = self.curdata + data.decode("utf-8") + def handle_charref(self, name): + entity = unichr(int(name)) + self.curdata = self.curdata + unicode(entity.encode('utf-8'), \ + "utf-8") + def handle_entityref(self, name): entity = name if HTML2Text.entities.has_key(name): entity = HTML2Text.entities[name] - elif name[0] == "#": - entity = unichr(int(name[1:])) else: entity = "&" + name + ";"