Fix bug when link/guid contains characters not in ascii by encoding the keys as utf-8
authorBrett Parker <iDunno@sommitrealweird.co.uk>
Fri, 12 Jun 2009 09:55:18 +0000 (10:55 +0100)
committerBrett Parker <iDunno@sommitrealweird.co.uk>
Fri, 12 Jun 2009 09:55:18 +0000 (10:55 +0100)
rss2maildir.py

index 563da506da2d8f95307e44c38e6e0392882d3793..533e34d614c810bda43514a118f1677dcfaffe4f 100755 (executable)
@@ -692,17 +692,20 @@ def parse_and_deliver(maildir, url, statedir):
 
         prevmessageid = None
 
 
         prevmessageid = None
 
+        db_guid_key = (url + u'|' + item["guid"]).encode("utf-8")
+        db_link_key = (url + u'|' + item["link"]).encode("utf-8")
+
         # check if there's a guid too - if that exists and we match the md5,
         # return
         if item.has_key("guid"):
         # check if there's a guid too - if that exists and we match the md5,
         # return
         if item.has_key("guid"):
-            if db.has_key(url + "|" + item["guid"]):
-                data = db[url + "|" + item["guid"]]
+            if db.has_key(db_guid_key):
+                data = db[db_guid_key]
                 data = cgi.parse_qs(data)
                 if data["contentmd5"][0] == md5sum:
                     continue
 
                 data = cgi.parse_qs(data)
                 if data["contentmd5"][0] == md5sum:
                     continue
 
-        if db.has_key(url + "|" + item["link"]):
-            data = db[url + "|" + item["link"]]
+        if db.has_key(db_link_key):
+            data = db[db_link_key]
             data = cgi.parse_qs(data)
             if data.has_key("message-id"):
                 prevmessageid = data["message-id"][0]
             data = cgi.parse_qs(data)
             if data.has_key("message-id"):
                 prevmessageid = data["message-id"][0]
@@ -790,25 +793,25 @@ def parse_and_deliver(maildir, url, statedir):
                 ("created", createddate), \
                 ("contentmd5", md5sum) \
                 ))
                 ("created", createddate), \
                 ("contentmd5", md5sum) \
                 ))
-            db[url + "|" + item["guid"]] = data
+            db[db_guid_key] = data
             try:
             try:
-                data = db[url + "|" + item["link"]]
+                data = db[db_link_key]
                 data = cgi.parse_qs(data)
                 newdata = urllib.urlencode(( \
                     ("message-id", messageid), \
                     ("created", data["created"][0]), \
                     ("contentmd5", data["contentmd5"][0]) \
                     ))
                 data = cgi.parse_qs(data)
                 newdata = urllib.urlencode(( \
                     ("message-id", messageid), \
                     ("created", data["created"][0]), \
                     ("contentmd5", data["contentmd5"][0]) \
                     ))
-                db[url + "|" + item["link"]] = newdata
+                db[db_link_key] = newdata
             except:
             except:
-                db[url + "|" + item["link"]] = data
+                db[db_link_key] = data
         else:
             data = urllib.urlencode(( \
                 ("message-id", messageid), \
                 ("created", createddate), \
                 ("contentmd5", md5sum) \
                 ))
         else:
             data = urllib.urlencode(( \
                 ("message-id", messageid), \
                 ("created", createddate), \
                 ("contentmd5", md5sum) \
                 ))
-            db[url + "|" + item["link"]] = data
+            db[db_link_key] = data
 
     if headers:
         data = []
 
     if headers:
         data = []