12 from email.MIMEMultipart import MIMEMultipart
13 from email.MIMEText import MIMEText
21 from optparse import OptionParser
22 from ConfigParser import SafeConfigParser
24 from base64 import b64encode
30 from HTMLParser import HTMLParser
42 class HTML2Text(HTMLParser):
45 self.inheadingone = False
46 self.inheadingtwo = False
47 self.inotherheading = False
51 HTMLParser.__init__(self)
53 def handle_starttag(self, tag, attrs):
54 if tag.lower() == "h1":
55 self.inheadingone = True
56 elif tag.lower() == "h2":
57 self.inheadingtwo = True
58 elif tag.lower() in ["h3", "h4", "h5", "h6"]:
59 self.inotherheading = True
60 elif tag.lower() == "a":
62 elif tag.lower() == "br":
63 self.text = self.text + "\n"
64 elif tag.lower() == "p":
66 self.text = self.text + "\n\n"
68 def handle_startendtag(self, tag, attrs):
69 if tag.lower() == "br":
70 self.text = self.text + "\n"
72 def handle_endtag(self, tag):
73 if tag.lower() == "h1":
74 self.inheadingone = False
75 self.text = self.text + self.headingtext + "\n" + "=" * len(self.headingtext)
77 elif tag.lower() == "h2":
78 self.inheadingtwo = False
79 self.text = self.text + self.headingtext + "\n" + "-" * len(self.headingtext)
81 elif tag.lower() in ["h3", "h4", "h5", "h6"]:
82 self.inotherheading = False
83 self.text = self.text + self.headingtext + "\n" + "~" * len(self.headingtext)
86 def handle_data(self, data):
87 if not self.inheadingone and not self.inheadingtwo and not self.inotherheading:
88 self.text = self.text + data.strip() + " "
90 self.headingtext = self.headingtext + data.strip() + " "
92 def handle_entityref(self, name):
93 if entities.has_key(name.lower()):
94 self.text = self.text + entities[name.lower()]
96 self.text = self.text + "&" + name + ";"
101 def parse_and_deliver(maildir, url, statedir):
102 md = mailbox.Maildir(maildir)
103 fp = feedparser.parse(url)
104 db = dbm.open(os.path.join(statedir, "seen"), "c")
105 for item in fp["items"]:
106 # have we seen it before?
107 # need to work out what the content is first...
109 if item.has_key("content"):
110 content = item["content"][0]["value"]
112 content = item["summary"]
114 md5sum = md5.md5(content.encode("utf8")).hexdigest()
116 if db.has_key(item["link"]):
117 data = db[item["link"]]
118 data = cgi.parse_qs(data)
119 if data["contentmd5"][0] == md5sum:
123 author = item["author"]
127 # create a basic email message
128 msg = MIMEMultipart("alternative")
129 messageid = "<" + datetime.datetime.now().strftime("%Y%m%d%H%M") + "." + "".join([random.choice(string.ascii_letters + string.digits) for a in range(0,6)]) + "@" + socket.gethostname() + ">"
130 msg.add_header("Message-ID", messageid)
131 msg.set_unixfrom("\"%s\" <rss2maildir@localhost>" %(url))
132 msg.add_header("From", "\"%s\" <rss2maildir@localhost>" %(author))
133 msg.add_header("To", "\"%s\" <rss2maildir@localhost>" %(url))
134 createddate = datetime.datetime(*item["updated_parsed"][0:6]).strftime("%a, %e %b %Y %T -0000")
135 msg.add_header("Date", createddate)
136 msg.add_header("Subject", item["title"])
137 msg.set_default_type("text/plain")
139 htmlpart = MIMEText(content.encode("utf8"), "html", "utf8")
140 textparser = HTML2Text()
141 textparser.feed(content.encode("utf8"))
142 textcontent = textparser.gettext()
143 textpart = MIMEText(textcontent, "plain", "utf8")
147 # start by working out the filename we should be writting to, we do
148 # this following the normal maildir style rules
149 fname = str(os.getpid()) + "." + socket.gethostname() + "." + "".join([random.choice(string.ascii_letters + string.digits) for a in range(0,10)]) + "." + datetime.datetime.now().strftime('%s')
150 fn = os.path.join(maildir, "tmp", fname)
152 fh.write(msg.as_string())
154 # now move it in to the new directory
155 newfn = os.path.join(maildir, "new", fname)
159 # now add to the database about the item
160 data = urllib.urlencode((("message-id", messageid), ("created", createddate), ("contentmd5", md5sum)))
161 db[item["link"]] = data
165 # first off, parse the command line arguments
167 oparser = OptionParser()
169 "-c", "--conf", dest="conf",
170 help="location of config file"
173 "-s", "--statedir", dest="statedir",
174 help="location of directory to store state in"
177 (options, args) = oparser.parse_args()
179 # check for the configfile
183 if options.conf != None:
184 # does the file exist?
186 os.stat(options.conf)
187 configfile = options.conf
189 # should exit here as the specified file doesn't exist
190 sys.stderr.write("Config file %s does not exist. Exiting.\n" %(options.conf,))
193 # check through the default locations
195 os.stat("%s/.rss2maildir.conf" %(os.environ["HOME"],))
196 configfile = "%s/.rss2maildir.conf" %(os.environ["HOME"],)
199 os.stat("/etc/rss2maildir.conf")
200 configfile = "/etc/rss2maildir.conf"
202 sys.stderr.write("No config file found. Exiting.\n")
205 # Right - if we've got this far, we've got a config file, now for the hard
208 scp = SafeConfigParser()
211 maildir_root = "RSSMaildir"
214 if options.statedir != None:
215 state_dir = options.statedir
217 mode = os.stat(state_dir)[stat.ST_MODE]
218 if not stat.S_ISDIR(mode):
219 sys.stderr.write("State directory (%s) is not a directory\n" %(state_dir))
222 # try to make the directory
226 sys.stderr.write("Couldn't create statedir %s" %(state_dir))
228 elif scp.has_option("general", "state_dir"):
229 new_state_dir = scp.get("general", "state_dir")
231 mode = os.stat(state_dir)[stat.ST_MODE]
232 if not stat.S_ISDIR(mode):
233 sys.stderr.write("State directory (%s) is not a directory\n" %(state_dir))
238 os.mkdir(new_state_dir)
239 state_dir = new_state_dir
241 sys.stderr.write("Couldn't create state directory %s\n" %(new_state_dir))
245 mode = os.stat(state_dir)[stat.ST_MODE]
246 if not stat.S_ISDIR(mode):
247 sys.stderr.write("State directory %s is not a directory\n" %(state_dir))
253 sys.stderr.write("State directory %s could not be created\n" %(state_dir))
256 if scp.has_option("general", "maildir_root"):
257 maildir_root = scp.get("general", "maildir_root")
260 mode = os.stat(maildir_root)[stat.ST_MODE]
261 if not stat.S_ISDIR(mode):
262 sys.stderr.write("Maildir Root %s is not a directory\n" %(maildir_root))
266 os.mkdir(maildir_root)
268 sys.stderr.write("Couldn't create Maildir Root %s\n" %(maildir_root))
271 feeds = scp.sections()
273 feeds.remove("general")
277 for section in feeds:
278 # check if the directory exists
281 maildir = scp.get(section, "maildir")
285 maildir = urllib.urlencode(((section, maildir),)).split("=")[1]
286 maildir = os.path.join(maildir_root, maildir)
289 exists = os.stat(maildir)
290 if stat.S_ISDIR(exists[stat.ST_MODE]):
291 # check if there's a new, cur and tmp directory
293 mode = os.stat(os.path.join(maildir, "cur"))[stat.ST_MODE]
295 os.mkdir(os.path.join(maildir, "cur"))
296 if not stat.S_ISDIR(mode):
297 sys.stderr.write("Broken maildir: %s\n" %(maildir))
299 mode = os.stat(os.path.join(maildir, "tmp"))[stat.ST_MODE]
301 os.mkdir(os.path.join(maildir, "tmp"))
302 if not stat.S_ISDIR(mode):
303 sys.stderr.write("Broken maildir: %s\n" %(maildir))
305 mode = os.stat(os.path.join(maildir, "new"))[stat.ST_MODE]
306 if not stat.S_ISDIR(mode):
307 sys.stderr.write("Broken maildir: %s\n" %(maildir))
309 os.mkdir(os.path.join(maildir, "new"))
311 sys.stderr.write("Broken maildir: %s\n" %(maildir))
316 sys.stderr.write("Couldn't create root maildir %s\n" %(maildir))
319 os.mkdir(os.path.join(maildir, "new"))
320 os.mkdir(os.path.join(maildir, "cur"))
321 os.mkdir(os.path.join(maildir, "tmp"))
323 sys.stderr.write("Couldn't create required maildir directories for %s\n" %(section,))
326 # right - we've got the directories, we've got the section, we know the
329 parse_and_deliver(maildir, section, state_dir)