* Add redirect support
authorBrett Parker <iDunno@sommitrealweird.co.uk>
Sat, 22 Dec 2007 01:02:37 +0000 (01:02 +0000)
committerBrett Parker <iDunno@sommitrealweird.co.uk>
Sat, 22 Dec 2007 01:02:37 +0000 (01:02 +0000)
* Try to get a URL 3 times (redirects are included in the count...)
* Refactor connection creation in to it's own function to lower duplication of
  code

TODO
rss2maildir.py

diff --git a/TODO b/TODO
index 1f141878478a2f502581e623539d427a20bbb4ec..aa98d4db1bf6a4895f42f4db71536f0ebbaa43a6 100644 (file)
--- a/TODO
+++ b/TODO
@@ -1,4 +1,3 @@
-* follow 302
 * add ol and dl support
 * add multilevel list support
 * normalise the number of spaces in formatted text
index 32a39f6243d1f21523eb437e1c9885462c2a16c7..aa3b0ccac5badb4a6c21f46591470ab3e5fc0531 100755 (executable)
@@ -252,25 +252,42 @@ class HTML2Text(HTMLParser):
             data = data + "\n".join(textwrap.wrap(self.currentparagraph, 70))
         return data
 
+def open_url(method, url):
+    redirectcount = 0
+    while redirectcount < 3:
+        (type, rest) = urllib.splittype(url)
+        (host, path) = urllib.splithost(rest)
+        (host, port) = urllib.splitport(host)
+        if port == None:
+            port = 80
+        try:
+            conn = httplib.HTTPConnection("%s:%s" %(host, port))
+            conn.request(method, path)
+            response = conn.getresponse()
+            if response.status in [301, 302, 303, 307]:
+                headers = response.getheaders()
+                for header in headers:
+                    if header[0] == "location":
+                        url = header[1]
+            elif response.status == 200:
+                return response
+        except:
+            pass
+        redirectcount = redirectcount + 1
+    return None
+
 def parse_and_deliver(maildir, url, statedir):
     feedhandle = None
     headers = None
     # first check if we know about this feed already
     feeddb = dbm.open(os.path.join(statedir, "feeds"), "c")
-    # we need all the parts of the url 
-    (type, rest) = urllib.splittype(url)
-    (host, path) = urllib.splithost(rest)
-    (host, port) = urllib.splitport(host)
-    if port == None:
-        port = 80
     if feeddb.has_key(url):
         data = feeddb[url]
         data = cgi.parse_qs(data)
-        # now do a head on the feed to see if it's been updated
-        conn = httplib.HTTPConnection("%s:%s" %(host, port))
-        conn.request("HEAD", path)
-        response = conn.getresponse()
-        headers = response.getheaders()
+        response = open_url("HEAD", url)
+        headers = None
+        if response:
+            headers = response.getheaders()
         ischanged = False
         try:
             for header in headers:
@@ -289,24 +306,23 @@ def parse_and_deliver(maildir, url, statedir):
         except:
             ischanged = True
         if ischanged:
-            conn = httplib.HTTPConnection("%s:%s" %(host, port))
-            conn.request("GET", path)
-            response = conn.getresponse()
-            headers = response.getheaders()
-            feedhandle = response
+            response = open_url("GET", url)
+            if response != None:
+                headers = response.getheaders()
+                feedhandle = response
+            else:
+                sys.stderr.write("Failed to fetch feed: %s\n" %(url))
+                return
         else:
             return # don't need to do anything, nothings changed.
     else:
-        conn = httplib.HTTPConnection("%s:%s" %(host, port))
-        conn.request("GET", path)
-        response = None
-        try:
-            response = conn.getresponse()
-        except:
-            print "Failed to fetch feed: %s" %(url)
+        response = open_url("GET", url)
+        if response != None:
+            headers = response.getheaders()
+            feedhandle = response
+        else:
+            sys.stderr.write("Failed to fetch feed: %s\n" %(url))
             return
-        headers = response.getheaders()
-        feedhandle = response
 
     fp = feedparser.parse(feedhandle)
     db = dbm.open(os.path.join(statedir, "seen"), "c")