* improve handling of unicode data

author Brett Parker <iDunno@sommitrealweird.co.uk>

Fri, 21 Dec 2007 15:40:51 +0000 (15:40 +0000)

committer Brett Parker <iDunno@sommitrealweird.co.uk>

Fri, 21 Dec 2007 15:40:51 +0000 (15:40 +0000)
author Brett Parker <iDunno@sommitrealweird.co.uk>
Fri, 21 Dec 2007 15:40:51 +0000 (15:40 +0000)
committer Brett Parker <iDunno@sommitrealweird.co.uk>
Fri, 21 Dec 2007 15:40:51 +0000 (15:40 +0000)
diff --git a/rss2maildir.py b/rss2maildir.py

index 6a319f47ad2ba14a44df777dfd83b21c7cf0d9da..cacf1ee8eaabe04a6d5fc20d21c3e9e8d664c9c1 100755 (executable)
--- a/rss2maildir.py
+++ b/rss2maildir.py
@@ -199,8 +199,10 @@ class HTML2Text(HTMLParser):
              self.text = self.text \
                  + u'\n> ' \
                  + u'\n> '.join( \
-                    [a.strip() for a in textwrap.wrap(self.blockquote, 68)] \
-                    ).encode("utf-8") \
+                    [a.strip() \
+                        for a in textwrap.wrap( \
+                            self.blockquote, 68)] \
+                    ) \
                  + u'\n'
              self.inblockquote = False
              self.blockquote = u''
@@ -248,11 +250,11 @@ class HTML2Text(HTMLParser):
              entity = "&" + name + ";"
  
          if self.inparagraph:
-            self.currentparagraph = self.currentparagraph + entity
+            self.currentparagraph = self.currentparagraph + unicode(entity, "utf-8")
          elif self.inblockquote:
-            self.blockquote = self.blockquote + entity
+            self.blockquote = self.blockquote + unicode(entity, "utf-8")
          else:
-            self.text = self.text + entity
+            self.text = self.text + unicode(entity, "utf-8")
  
      def gettext(self):
          data = self.text
author	Brett Parker <iDunno@sommitrealweird.co.uk>
	Fri, 21 Dec 2007 15:40:51 +0000 (15:40 +0000)
committer	Brett Parker <iDunno@sommitrealweird.co.uk>
	Fri, 21 Dec 2007 15:40:51 +0000 (15:40 +0000)