From: Brett Parker Date: Fri, 21 Dec 2007 15:40:51 +0000 (+0000) Subject: * improve handling of unicode data X-Git-Url: https://git.sommitrealweird.co.uk/rss2maildir.git/commitdiff_plain/84246cc1794eee3af04f170e4f46c1e886dbf838?ds=inline;hp=9e17dd6f0d7309794355fdc73fb796de422c76a7 * improve handling of unicode data --- diff --git a/rss2maildir.py b/rss2maildir.py index 6a319f4..cacf1ee 100755 --- a/rss2maildir.py +++ b/rss2maildir.py @@ -199,8 +199,10 @@ class HTML2Text(HTMLParser): self.text = self.text \ + u'\n> ' \ + u'\n> '.join( \ - [a.strip() for a in textwrap.wrap(self.blockquote, 68)] \ - ).encode("utf-8") \ + [a.strip() \ + for a in textwrap.wrap( \ + self.blockquote, 68)] \ + ) \ + u'\n' self.inblockquote = False self.blockquote = u'' @@ -248,11 +250,11 @@ class HTML2Text(HTMLParser): entity = "&" + name + ";" if self.inparagraph: - self.currentparagraph = self.currentparagraph + entity + self.currentparagraph = self.currentparagraph + unicode(entity, "utf-8") elif self.inblockquote: - self.blockquote = self.blockquote + entity + self.blockquote = self.blockquote + unicode(entity, "utf-8") else: - self.text = self.text + entity + self.text = self.text + unicode(entity, "utf-8") def gettext(self): data = self.text