* Add item url to html parts
[rss2maildir.git] / rss2maildir.py
index aa3b0ccac5badb4a6c21f46591470ab3e5fc0531..97623535e64bc96bd118223d5c983c01a9a47e62 100755 (executable)
@@ -339,6 +339,15 @@ def parse_and_deliver(maildir, url, statedir):
 
         prevmessageid = None
 
+        # check if there's a guid too - if that exists and we match the md5,
+        # return
+        if item.has_key("guid"):
+            if db.has_key(url + "|" + item["guid"]):
+                data = db[url + "|" + item["guid"]]
+                data = cgi.parse_qs(data)
+                if data["contentmd5"][0] == md5sum:
+                    continue
+
         if db.has_key(url + "|" + item["link"]):
             data = db[url + "|" + item["link"]]
             data = cgi.parse_qs(data)
@@ -379,10 +388,18 @@ def parse_and_deliver(maildir, url, statedir):
         msg.add_header("Subject", item["title"])
         msg.set_default_type("text/plain")
 
-        htmlpart = MIMEText(content.encode("utf-8"), "html", "utf-8")
+        htmlcontent = content.encode("utf-8")
+        htmlcontent = "%s\n\n<p>Item URL: <a href='%s'>%s</a></p>" %( \
+            content, \
+            item["link"], \
+            item["link"] )
+        htmlpart = MIMEText(htmlcontent.encode("utf-8"), "html", "utf-8")
         textparser = HTML2Text()
         textparser.feed(content.encode("utf-8"))
         textcontent = textparser.gettext()
+        textcontent = "%s\n\nItem URL: %s" %( \
+            textcontent, \
+            item["link"] )
         textpart = MIMEText(textcontent.encode("utf-8"), "plain", "utf-8")
         msg.attach(textpart)
         msg.attach(htmlpart)
@@ -409,12 +426,31 @@ def parse_and_deliver(maildir, url, statedir):
         # now add to the database about the item
         if prevmessageid:
             messageid = prevmessageid + " " + messageid
-        data = urllib.urlencode((
-            ("message-id", messageid), \
-            ("created", createddate), \
-            ("contentmd5", md5sum) \
-            ))
-        db[url + "|" + item["link"]] = data
+        if item.has_key("guid") and item["guid"] != item["link"]:
+            data = urllib.urlencode(( \
+                ("message-id", messageid), \
+                ("created", createddate), \
+                ("contentmd5", md5sum) \
+                ))
+            db[url + "|" + item["guid"]] = data
+            try:
+                data = db[url + "|" + item["link"]]
+                data = cgi.parse_qs(data)
+                newdata = urllib.urlencode(( \
+                    ("message-id", messageid), \
+                    ("created", data["created"][0]), \
+                    ("contentmd5", data["contentmd5"][0]) \
+                    ))
+                db[url + "|" + item["link"]] = newdata
+            except:
+                db[url + "|" + item["link"]] = data
+        else:
+            data = urllib.urlencode(( \
+                ("message-id", messageid), \
+                ("created", createddate), \
+                ("contentmd5", md5sum) \
+                ))
+            db[url + "|" + item["link"]] = data
 
     if headers:
         data = []