More utf-8 handling for images
authorBrett Parker <iDunno@sommitrealweird.co.uk>
Tue, 30 Apr 2013 20:17:40 +0000 (21:17 +0100)
committerBrett Parker <iDunno@sommitrealweird.co.uk>
Tue, 30 Apr 2013 20:17:40 +0000 (21:17 +0100)
    - iff we get a string object rather than a unicode one, decode it from utf-8

rss2maildir.py

index dc0427a..a9abe4d 100755 (executable)
@@ -307,9 +307,15 @@ class HTML2Text(HTMLParser):
         url = u''
         for attr in attrs:
             if attr[0] == 'alt':
-                alt = attr[1]
+                if isinstance(attr[1], str):
+                    alt = u'%s' %(attr[1].decode("utf-8"))
+                else:
+                    alt = attr[1]
             elif attr[0] == 'src':
-                url = attr[1]
+                if isinstance(attr[1], str):
+                    url = u'%s' %(attr[1].decode("utf-8"))
+                else:
+                    url = attr[1]
         if url:
             if alt:
                 if self.images.has_key(alt):