From 84246cc1794eee3af04f170e4f46c1e886dbf838 Mon Sep 17 00:00:00 2001 From: Brett Parker Date: Fri, 21 Dec 2007 15:40:51 +0000 Subject: [PATCH 1/1] * improve handling of unicode data --- rss2maildir.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/rss2maildir.py b/rss2maildir.py index 6a319f4..cacf1ee 100755 --- a/rss2maildir.py +++ b/rss2maildir.py @@ -199,8 +199,10 @@ class HTML2Text(HTMLParser): self.text = self.text \ + u'\n> ' \ + u'\n> '.join( \ - [a.strip() for a in textwrap.wrap(self.blockquote, 68)] \ - ).encode("utf-8") \ + [a.strip() \ + for a in textwrap.wrap( \ + self.blockquote, 68)] \ + ) \ + u'\n' self.inblockquote = False self.blockquote = u'' @@ -248,11 +250,11 @@ class HTML2Text(HTMLParser): entity = "&" + name + ";" if self.inparagraph: - self.currentparagraph = self.currentparagraph + entity + self.currentparagraph = self.currentparagraph + unicode(entity, "utf-8") elif self.inblockquote: - self.blockquote = self.blockquote + entity + self.blockquote = self.blockquote + unicode(entity, "utf-8") else: - self.text = self.text + entity + self.text = self.text + unicode(entity, "utf-8") def gettext(self): data = self.text -- 2.39.5