]> git.sommitrealweird.co.uk Git - rss2maildir.git/blobdiff - rss2maildir.py
More utf-8 handling for images
[rss2maildir.git] / rss2maildir.py
index 67121c53bd2c2fae93e6b8d4c48d0662d2a57055..a9abe4d1496cb676ffe4e3efe41d623431835dbe 100755 (executable)
@@ -39,7 +39,11 @@ from optparse import OptionParser
 from ConfigParser import SafeConfigParser
 
 from base64 import b64encode
 from ConfigParser import SafeConfigParser
 
 from base64 import b64encode
-import md5
+
+if sys.version_info[0] == 2 and sys.version_info[1] >= 6:
+    import hashlib as md5
+else:
+    import md5
 
 import cgi
 import dbm
 
 import cgi
 import dbm
@@ -303,9 +307,15 @@ class HTML2Text(HTMLParser):
         url = u''
         for attr in attrs:
             if attr[0] == 'alt':
         url = u''
         for attr in attrs:
             if attr[0] == 'alt':
-                alt = attr[1].decode('utf-8')
+                if isinstance(attr[1], str):
+                    alt = u'%s' %(attr[1].decode("utf-8"))
+                else:
+                    alt = attr[1]
             elif attr[0] == 'src':
             elif attr[0] == 'src':
-                url = attr[1].decode('utf-8')
+                if isinstance(attr[1], str):
+                    url = u'%s' %(attr[1].decode("utf-8"))
+                else:
+                    url = attr[1]
         if url:
             if alt:
                 if self.images.has_key(alt):
         if url:
             if alt:
                 if self.images.has_key(alt):
@@ -608,10 +618,17 @@ def open_url(method, url):
         (type, rest) = urllib.splittype(url)
         (host, path) = urllib.splithost(rest)
         (host, port) = urllib.splitport(host)
         (type, rest) = urllib.splittype(url)
         (host, path) = urllib.splithost(rest)
         (host, port) = urllib.splitport(host)
-        if port == None:
+        if type == "https":
+            if port == None:
+                port = 443
+        elif port == None:
             port = 80
         try:
             port = 80
         try:
-            conn = httplib.HTTPConnection("%s:%s" %(host, port))
+            conn = None
+            if type == "http":
+                conn = httplib.HTTPConnection("%s:%s" %(host, port))
+            else:
+                conn = httplib.HTTPSConnection("%s:%s" %(host, port))
             conn.request(method, path)
             response = conn.getresponse()
             if response.status in [301, 302, 303, 307]:
             conn.request(method, path)
             response = conn.getresponse()
             if response.status in [301, 302, 303, 307]:
@@ -730,8 +747,8 @@ def parse_and_deliver(maildir, url, statedir):
                 ]) + "@" + socket.gethostname() + ">"
         msg.add_header("Message-ID", messageid)
         msg.set_unixfrom("\"%s\" <rss2maildir@localhost>" %(url))
                 ]) + "@" + socket.gethostname() + ">"
         msg.add_header("Message-ID", messageid)
         msg.set_unixfrom("\"%s\" <rss2maildir@localhost>" %(url))
-        msg.add_header("From", "\"%s\" <rss2maildir@localhost>" %(author))
-        msg.add_header("To", "\"%s\" <rss2maildir@localhost>" %(url))
+        msg.add_header("From", "\"%s\" <rss2maildir@localhost>" %(author.encode("utf-8")))
+        msg.add_header("To", "\"%s\" <rss2maildir@localhost>" %(url.encode("utf-8")))
         if prevmessageid:
             msg.add_header("References", prevmessageid)
         createddate = datetime.datetime.now() \
         if prevmessageid:
             msg.add_header("References", prevmessageid)
         createddate = datetime.datetime.now() \