From: Brett Parker Date: Tue, 30 Apr 2013 20:17:40 +0000 (+0100) Subject: More utf-8 handling for images X-Git-Url: https://git.sommitrealweird.co.uk/rss2maildir.git/commitdiff_plain/aa4dd6a06ba1dd47704ac7dd6bcaa062673162d1?hp=0cecf7db4b4c9b9e32a161006db49d38672df8aa More utf-8 handling for images - iff we get a string object rather than a unicode one, decode it from utf-8 --- diff --git a/rss2maildir.py b/rss2maildir.py index dc0427a..a9abe4d 100755 --- a/rss2maildir.py +++ b/rss2maildir.py @@ -307,9 +307,15 @@ class HTML2Text(HTMLParser): url = u'' for attr in attrs: if attr[0] == 'alt': - alt = attr[1] + if isinstance(attr[1], str): + alt = u'%s' %(attr[1].decode("utf-8")) + else: + alt = attr[1] elif attr[0] == 'src': - url = attr[1] + if isinstance(attr[1], str): + url = u'%s' %(attr[1].decode("utf-8")) + else: + url = attr[1] if url: if alt: if self.images.has_key(alt):