]> git.sommitrealweird.co.uk Git - rss2maildir.git/blobdiff - rss2maildir.py
* Add redirect support
[rss2maildir.git] / rss2maildir.py
index 4402c43884ae5271a201de8529ebc46271a5fcdc..aa3b0ccac5badb4a6c21f46591470ab3e5fc0531 100755 (executable)
@@ -53,7 +53,7 @@ entities = {
     "pound": "£",
     "copy": "©",
     "apos": "'",
     "pound": "£",
     "copy": "©",
     "apos": "'",
-    "quote": "\"",
+    "quot": "\"",
     "nbsp": " ",
     }
 
     "nbsp": " ",
     }
 
@@ -252,25 +252,42 @@ class HTML2Text(HTMLParser):
             data = data + "\n".join(textwrap.wrap(self.currentparagraph, 70))
         return data
 
             data = data + "\n".join(textwrap.wrap(self.currentparagraph, 70))
         return data
 
+def open_url(method, url):
+    redirectcount = 0
+    while redirectcount < 3:
+        (type, rest) = urllib.splittype(url)
+        (host, path) = urllib.splithost(rest)
+        (host, port) = urllib.splitport(host)
+        if port == None:
+            port = 80
+        try:
+            conn = httplib.HTTPConnection("%s:%s" %(host, port))
+            conn.request(method, path)
+            response = conn.getresponse()
+            if response.status in [301, 302, 303, 307]:
+                headers = response.getheaders()
+                for header in headers:
+                    if header[0] == "location":
+                        url = header[1]
+            elif response.status == 200:
+                return response
+        except:
+            pass
+        redirectcount = redirectcount + 1
+    return None
+
 def parse_and_deliver(maildir, url, statedir):
     feedhandle = None
     headers = None
     # first check if we know about this feed already
     feeddb = dbm.open(os.path.join(statedir, "feeds"), "c")
 def parse_and_deliver(maildir, url, statedir):
     feedhandle = None
     headers = None
     # first check if we know about this feed already
     feeddb = dbm.open(os.path.join(statedir, "feeds"), "c")
-    # we need all the parts of the url 
-    (type, rest) = urllib.splittype(url)
-    (host, path) = urllib.splithost(rest)
-    (host, port) = urllib.splitport(host)
-    if port == None:
-        port = 80
     if feeddb.has_key(url):
         data = feeddb[url]
         data = cgi.parse_qs(data)
     if feeddb.has_key(url):
         data = feeddb[url]
         data = cgi.parse_qs(data)
-        # now do a head on the feed to see if it's been updated
-        conn = httplib.HTTPConnection("%s:%s" %(host, port))
-        conn.request("HEAD", path)
-        response = conn.getresponse()
-        headers = response.getheaders()
+        response = open_url("HEAD", url)
+        headers = None
+        if response:
+            headers = response.getheaders()
         ischanged = False
         try:
             for header in headers:
         ischanged = False
         try:
             for header in headers:
@@ -289,19 +306,23 @@ def parse_and_deliver(maildir, url, statedir):
         except:
             ischanged = True
         if ischanged:
         except:
             ischanged = True
         if ischanged:
-            conn = httplib.HTTPConnection("%s:%s" %(host, port))
-            conn.request("GET", path)
-            response = conn.getresponse()
-            headers = response.getheaders()
-            feedhandle = response
+            response = open_url("GET", url)
+            if response != None:
+                headers = response.getheaders()
+                feedhandle = response
+            else:
+                sys.stderr.write("Failed to fetch feed: %s\n" %(url))
+                return
         else:
             return # don't need to do anything, nothings changed.
     else:
         else:
             return # don't need to do anything, nothings changed.
     else:
-        conn = httplib.HTTPConnection("%s:%s" %(host, port))
-        conn.request("GET", path)
-        response = conn.getresponse()
-        headers = response.getheaders()
-        feedhandle = response
+        response = open_url("GET", url)
+        if response != None:
+            headers = response.getheaders()
+            feedhandle = response
+        else:
+            sys.stderr.write("Failed to fetch feed: %s\n" %(url))
+            return
 
     fp = feedparser.parse(feedhandle)
     db = dbm.open(os.path.join(statedir, "seen"), "c")
 
     fp = feedparser.parse(feedhandle)
     db = dbm.open(os.path.join(statedir, "seen"), "c")
@@ -347,8 +368,13 @@ def parse_and_deliver(maildir, url, statedir):
         msg.add_header("To", "\"%s\" <rss2maildir@localhost>" %(url))
         if prevmessageid:
             msg.add_header("References", prevmessageid)
         msg.add_header("To", "\"%s\" <rss2maildir@localhost>" %(url))
         if prevmessageid:
             msg.add_header("References", prevmessageid)
-        createddate = datetime.datetime(*item["updated_parsed"][0:6]) \
+        createddate = datetime.datetime.now() \
             .strftime("%a, %e %b %Y %T -0000")
             .strftime("%a, %e %b %Y %T -0000")
+        try:
+            createddate = datetime.datetime(*item["updated_parsed"][0:6]) \
+                .strftime("%a, %e %b %Y %T -0000")
+        except:
+            pass
         msg.add_header("Date", createddate)
         msg.add_header("Subject", item["title"])
         msg.set_default_type("text/plain")
         msg.add_header("Date", createddate)
         msg.add_header("Subject", item["title"])
         msg.set_default_type("text/plain")