Normalise spaces where they should be.
[rss2maildir.git] / rss2maildir.py
1 #!/usr/bin/python
2 # coding=utf-8
3
4 # rss2maildir.py - RSS feeds to Maildir 1 email per item
5 # Copyright (C) 2007  Brett Parker <iDunno@sommitrealweird.co.uk>
6
7 # This program is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation, either version 3 of the License, or
10 # (at your option) any later version.
11
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 # GNU General Public License for more details.
16
17 # You should have received a copy of the GNU General Public License
18 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
19
20 import sys
21 import os
22 import stat
23 import httplib
24 import urllib
25
26 import feedparser
27
28 from email.MIMEMultipart import MIMEMultipart
29 from email.MIMEText import MIMEText
30
31 import datetime
32 import random
33 import string
34 import textwrap
35
36 import socket
37
38 from optparse import OptionParser
39 from ConfigParser import SafeConfigParser
40
41 from base64 import b64encode
42 import md5
43
44 import cgi
45 import dbm
46
47 from HTMLParser import HTMLParser
48
49 class HTML2Text(HTMLParser):
50     entities = {
51         u'amp': "&",
52         u'lt': "<",
53         u'gt': ">",
54         u'pound': "£",
55         u'copy': "©",
56         u'apos': "'",
57         u'quot': "\"",
58         u'nbsp': " ",
59         }
60
61     blockleveltags = [
62         u'h1',
63         u'h2',
64         u'h3',
65         u'h4',
66         u'h5',
67         u'h6',
68         u'pre',
69         u'p',
70         u'ul',
71         u'ol',
72         u'dl',
73         u'li',
74         u'dt',
75         u'dd',
76         u'div',
77         #u'blockquote',
78         ]
79
80     liststarttags = [
81         u'ul',
82         u'ol',
83         u'dl',
84         ]
85
86     cancontainflow = [
87         u'div',
88         u'li',
89         u'dd',
90         u'blockquote',
91     ]
92
93     def __init__(self,textwidth=70):
94         self.text = u''
95         self.curdata = u''
96         self.textwidth = textwidth
97         self.opentags = []
98         self.indentlevel = 0
99         self.ignorenodata = False
100         self.listcount = []
101         self.urls = []
102         HTMLParser.__init__(self)
103
104     def handle_starttag(self, tag, attrs):
105         tag_name = tag.lower()
106         if tag_name in self.blockleveltags:
107             # handle starting a new block - unless we're in a block element
108             # that can contain other blocks, we'll assume that we want to close
109             # the container
110             if len(self.opentags) > 1 and self.opentags[-1] == u'li':
111                 self.handle_curdata()
112
113             if tag_name == u'ol':
114                 self.handle_curdata()
115                 self.listcount.append(1)
116                 self.listlevel = len(self.listcount) - 1
117
118             if tag_name in self.liststarttags:
119                 smallist = self.opentags[-3:-1]
120                 smallist.reverse()
121                 for prev_listtag in smallist:
122                     if prev_listtag in [u'dl', u'ol']:
123                         self.indentlevel = self.indentlevel + 4
124                         break
125                     elif prev_listtag == u'ul':
126                         self.indentlevel = self.indentlevel + 3
127                         break
128
129             if len(self.opentags) > 0:
130                 self.handle_curdata()
131                 if tag_name not in self.cancontainflow:
132                     self.opentags.pop()
133             self.opentags.append(tag_name)
134         else:
135             if tag_name == "span":
136                 return
137             listcount = 0
138             try:
139                 listcount = self.listcount[-1]
140             except:
141                 pass
142
143             if tag_name == u'dd' and len(self.opentags) > 1 \
144                 and self.opentags[-1] == u'dt':
145                 self.handle_curdata()
146                 self.opentags.pop()
147             elif tag_name == u'dt' and len(self.opentags) > 1 \
148                 and self.opentags[-1] == u'dd':
149                 self.handle_curdata()
150                 self.opentags.pop()
151             elif tag_name == u'a':
152                 for attr in attrs:
153                     if attr[0].lower() == u'href':
154                         self.urls.append(attr[1].decode('utf-8'))
155                 self.curdata = self.curdata + u'`'
156                 self.opentags.append(tag_name)
157                 return
158             elif tag_name == u'img':
159                 self.handle_image(attrs)
160                 return
161             elif tag_name == u'br':
162                 self.handle_br()
163                 return
164             else:
165                 # we don't know the tag, so lets avoid handling it!
166                 return 
167
168     def handle_startendtag(self, tag, attrs):
169         if tag.lower() == u'br':
170             self.handle_br()
171         elif tag.lower() == u'img':
172             self.handle_image(attrs)
173             return
174
175     def handle_br(self):
176             self.handle_curdata()
177             self.opentags.append(u'br')
178             self.handle_curdata()
179             self.opentags.pop()
180
181     def handle_image(self, attrs):
182         alt = u''
183         url = u''
184         for attr in attrs:
185             if attr[0] == 'alt':
186                 alt = attr[1].decode('utf-8')
187             elif attr[0] == 'src':
188                 url = attr[1].decode('utf-8')
189         if url:
190             self.curdata = self.curdata \
191                 + u' [img:' \
192                 + url
193             if alt:
194                 self.curdata = self.curdata \
195                     + u'(' \
196                     + alt \
197                     + u')'
198             self.curdata = self.curdata \
199                 + u']'
200
201     def handle_curdata(self):
202
203         if len(self.opentags) == 0:
204             return
205
206         tag_thats_done = self.opentags[-1]
207
208         if len(self.curdata) == 0:
209             return
210
211         if tag_thats_done == u'br':
212             if len(self.text) == 0 or self.text[-1] != '\n':
213                 self.text = self.text + '\n'
214                 self.ignorenodata = True
215             return
216
217         if len(self.curdata.strip()) == 0:
218             return
219
220         if tag_thats_done in self.blockleveltags:
221             newlinerequired = self.text != u''
222             if self.ignorenodata:
223                 newlinerequired = False
224             self.ignorenodata = False
225             if newlinerequired:
226                 if tag_thats_done in [u'dt', u'dd', u'li'] \
227                     and len(self.text) > 1 \
228                     and self.text[-1] != u'\n':
229                         self.text = self.text + u'\n'
230                 elif len(self.text) > 2 \
231                     and self.text[-1] != u'\n' \
232                     and self.text[-2] != u'\n':
233                     self.text = self.text + u'\n\n'
234
235         if tag_thats_done in ["h1", "h2", "h3", "h4", "h5", "h6"]:
236             underline = u''
237             underlinechar = u'='
238             headingtext = " ".join(self.curdata.split())
239             seperator = u'\n' + u' '*self.indentlevel
240             headingtext = seperator.join( \
241                 textwrap.wrap( \
242                     headingtext, \
243                     self.textwidth - self.indentlevel \
244                     ) \
245                 )
246
247             if tag_thats_done == u'h2':
248                 underlinechar = u'-'
249             elif tag_thats_done != u'h1':
250                 underlinechar = u'~'
251
252             if u'\n' in headingtext:
253                 underline = u' ' * self.indentlevel \
254                     + underlinechar * (self.textwidth - self.indentlevel)
255             else:
256                 underline = u' ' * self.indentlevel \
257                     + underlinechar * len(headingtext)
258             self.text = self.text \
259                 + headingtext + u'\n' \
260                 + underline
261         elif tag_thats_done in [u'p', u'div']:
262             paragraph = unicode( \
263                 " ".join(self.curdata.strip().encode("utf-8").split()), \
264                 "utf-8")
265             seperator = u'\n' + u' ' * self.indentlevel
266             self.text = self.text \
267                 + u' ' * self.indentlevel \
268                 + seperator.join( \
269                     textwrap.wrap( \
270                         paragraph, self.textwidth - self.indentlevel))
271         elif tag_thats_done == "pre":
272             self.text = self.text + unicode( \
273                 " ".join(self.curdata.encode("utf-8").split()), "utf-8")
274         elif tag_thats_done == u'blockquote':
275             quote = unicode( \
276                 " ".join(self.curdata.encode("utf-8").strip().split()), \
277                 "utf-8")
278             seperator = u'\n' + u' ' * self.indentlevel + u'> '
279             if len(self.text) > 0 and self.text[-1] != u'\n':
280                 self.text = self.text + u'\n'
281             self.text = self.text \
282                 + u'> ' \
283                 + seperator.join( \
284                     textwrap.wrap( \
285                         quote, \
286                         self.textwidth - self.indentlevel - 2 \
287                     )
288                 )
289             self.curdata = u''
290         elif tag_thats_done == "li":
291             item = unicode(self.curdata.encode("utf-8").strip(), "utf-8")
292             if len(self.text) > 0 and self.text[-1] != u'\n':
293                 self.text = self.text + u'\n'
294             # work out if we're in an ol rather than a ul
295             latesttags = self.opentags[-4:]
296             latesttags.reverse()
297             isul = None
298             for thing in latesttags:
299                 if thing == 'ul':
300                     isul = True
301                     break
302                 elif thing == 'ol':
303                     isul = False
304                     break
305
306             listindent = 3
307             if not isul:
308                 listindent = 4
309
310             listmarker = u' * '
311             if isul == False:
312                 listmarker = u' %2d. ' %(self.listcount[-1])
313                 self.listcount[-1] = self.listcount[-1] + 1
314
315             seperator = u'\n' \
316                 + u' ' * self.indentlevel \
317                 + u' ' * listindent
318             self.text = self.text \
319                 + u' ' * self.indentlevel \
320                 + listmarker \
321                 + seperator.join( \
322                     textwrap.wrap( \
323                         item, \
324                         self.textwidth - self.indentlevel - listindent \
325                     ) \
326                 )
327             self.curdata = u''
328         elif tag_thats_done == u'dt':
329             definition = unicode(" ".join( \
330                     self.curdata.encode("utf-8").strip().split()), \
331                 "utf-8")
332             if len(self.text) > 0 and self.text[-1] != u'\n':
333                 self.text = self.text + u'\n\n'
334             elif len(self.text) > 1 and self.text[-2] != u'\n':
335                 self.text = self.text + u'\n'
336             definition = u' ' * self.indentlevel + definition + "::"
337             indentstring = u'\n' + u' ' * (self.indentlevel + 1)
338             self.text = self.text \
339                 + indentstring.join(
340                     textwrap.wrap(definition, \
341                         self.textwidth - self.indentlevel - 1))
342             self.curdata = u''
343         elif tag_thats_done == u'dd':
344             definition = unicode(" ".join( \
345                     self.curdata.encode("utf-8").strip().split()),
346                 "utf-8")
347             if len(definition) > 0:
348                 if len(self.text) > 0 and self.text[-1] != u'\n':
349                     self.text = self.text + u'\n'
350                 indentstring = u'\n' + u' ' * (self.indentlevel + 4)
351                 self.text = self.text \
352                     + u' ' * (self.indentlevel + 4) \
353                     + indentstring.join( \
354                         textwrap.wrap( \
355                             definition, \
356                             self.textwidth - self.indentlevel - 4 \
357                             ) \
358                         )
359                 self.curdata = u''
360         elif tag_thats_done == u'a':
361             self.curdata = self.curdata + u'`__'
362             pass
363         elif tag_thats_done in self.liststarttags:
364             pass
365
366         if tag_thats_done in self.blockleveltags:
367             self.curdata = u''
368
369         self.ignorenodata = False
370
371     def handle_endtag(self, tag):
372         self.ignorenodata = False
373         if tag == "span":
374             return
375
376         try:
377             tagindex = self.opentags.index(tag)
378         except:
379             return
380         tag = tag.lower()
381
382         if tag in [u'br', u'img']:
383             return
384
385         if tag in self.liststarttags:
386             if tag in [u'ol', u'dl', u'ul']:
387                 self.handle_curdata()
388                 # find if there was a previous list level
389                 smalllist = self.opentags[:-1]
390                 smalllist.reverse()
391                 for prev_listtag in smalllist:
392                     if prev_listtag in [u'ol', u'dl']:
393                         self.indentlevel = self.indentlevel - 4
394                         break
395                     elif prev_listtag == u'ul':
396                         self.indentlevel = self.indentlevel - 3
397                         break
398
399         if tag == u'ol':
400             self.listcount = self.listcount[:-1]
401
402         while tagindex < len(self.opentags) \
403             and tag in self.opentags[tagindex+1:]:
404             try:
405                 tagindex = self.opentags.index(tag, tagindex+1)
406             except:
407                 # well, we don't want to do that then
408                 pass
409         if tagindex != len(self.opentags) - 1:
410             # Assuming the data was for the last opened tag first
411             self.handle_curdata()
412             # Now kill the list to be a slice before this tag was opened
413             self.opentags = self.opentags[:tagindex + 1]
414         else:
415             self.handle_curdata()
416             if self.opentags[-1] == tag:
417                 self.opentags.pop()
418
419     def handle_data(self, data):
420         if len(self.opentags) == 0:
421             self.opentags.append(u'p')
422         self.curdata = self.curdata + data.decode("utf-8")
423
424     def handle_entityref(self, name):
425         entity = name
426         if HTML2Text.entities.has_key(name.lower()):
427             entity = HTML2Text.entities[name.lower()]
428         elif name[0] == "#":
429             entity = unichr(int(name[1:]))
430         else:
431             entity = "&" + name + ";"
432
433         self.curdata = self.curdata + unicode(entity, "utf-8")
434
435     def gettext(self):
436         self.handle_curdata()
437         if len(self.text) == 0 or self.text[-1] != u'\n':
438             self.text = self.text + u'\n'
439         self.opentags = []
440         if len(self.text) > 0:
441             while len(self.text) > 1 and self.text[-1] == u'\n':
442                 self.text = self.text[:-1]
443             self.text = self.text + u'\n'
444         if len(self.urls) > 0:
445             self.text = self.text + u'\n__ ' + u'\n__ '.join(self.urls) + u'\n'
446             self.urls = []
447         return self.text
448
449 def open_url(method, url):
450     redirectcount = 0
451     while redirectcount < 3:
452         (type, rest) = urllib.splittype(url)
453         (host, path) = urllib.splithost(rest)
454         (host, port) = urllib.splitport(host)
455         if port == None:
456             port = 80
457         try:
458             conn = httplib.HTTPConnection("%s:%s" %(host, port))
459             conn.request(method, path)
460             response = conn.getresponse()
461             if response.status in [301, 302, 303, 307]:
462                 headers = response.getheaders()
463                 for header in headers:
464                     if header[0] == "location":
465                         url = header[1]
466             elif response.status == 200:
467                 return response
468         except:
469             pass
470         redirectcount = redirectcount + 1
471     return None
472
473 def parse_and_deliver(maildir, url, statedir):
474     feedhandle = None
475     headers = None
476     # first check if we know about this feed already
477     feeddb = dbm.open(os.path.join(statedir, "feeds"), "c")
478     if feeddb.has_key(url):
479         data = feeddb[url]
480         data = cgi.parse_qs(data)
481         response = open_url("HEAD", url)
482         headers = None
483         if response:
484             headers = response.getheaders()
485         ischanged = False
486         try:
487             for header in headers:
488                 if header[0] == "content-length":
489                     if header[1] != data["content-length"][0]:
490                         ischanged = True
491                 elif header[0] == "etag":
492                     if header[1] != data["etag"][0]:
493                         ischanged = True
494                 elif header[0] == "last-modified":
495                     if header[1] != data["last-modified"][0]:
496                         ischanged = True
497                 elif header[0] == "content-md5":
498                     if header[1] != data["content-md5"][0]:
499                         ischanged = True
500         except:
501             ischanged = True
502         if ischanged:
503             response = open_url("GET", url)
504             if response != None:
505                 headers = response.getheaders()
506                 feedhandle = response
507             else:
508                 sys.stderr.write("Failed to fetch feed: %s\n" %(url))
509                 return
510         else:
511             return # don't need to do anything, nothings changed.
512     else:
513         response = open_url("GET", url)
514         if response != None:
515             headers = response.getheaders()
516             feedhandle = response
517         else:
518             sys.stderr.write("Failed to fetch feed: %s\n" %(url))
519             return
520
521     fp = feedparser.parse(feedhandle)
522     db = dbm.open(os.path.join(statedir, "seen"), "c")
523     for item in fp["items"]:
524         # have we seen it before?
525         # need to work out what the content is first...
526
527         if item.has_key("content"):
528             content = item["content"][0]["value"]
529         else:
530             content = item["summary"]
531
532         md5sum = md5.md5(content.encode("utf-8")).hexdigest()
533
534         prevmessageid = None
535
536         # check if there's a guid too - if that exists and we match the md5,
537         # return
538         if item.has_key("guid"):
539             if db.has_key(url + "|" + item["guid"]):
540                 data = db[url + "|" + item["guid"]]
541                 data = cgi.parse_qs(data)
542                 if data["contentmd5"][0] == md5sum:
543                     continue
544
545         if db.has_key(url + "|" + item["link"]):
546             data = db[url + "|" + item["link"]]
547             data = cgi.parse_qs(data)
548             if data.has_key("message-id"):
549                 prevmessageid = data["message-id"][0]
550             if data["contentmd5"][0] == md5sum:
551                 continue
552
553         try:
554             author = item["author"]
555         except:
556             author = url
557
558         # create a basic email message
559         msg = MIMEMultipart("alternative")
560         messageid = "<" \
561             + datetime.datetime.now().strftime("%Y%m%d%H%M") \
562             + "." \
563             + "".join( \
564                 [random.choice( \
565                     string.ascii_letters + string.digits \
566                     ) for a in range(0,6) \
567                 ]) + "@" + socket.gethostname() + ">"
568         msg.add_header("Message-ID", messageid)
569         msg.set_unixfrom("\"%s\" <rss2maildir@localhost>" %(url))
570         msg.add_header("From", "\"%s\" <rss2maildir@localhost>" %(author))
571         msg.add_header("To", "\"%s\" <rss2maildir@localhost>" %(url))
572         if prevmessageid:
573             msg.add_header("References", prevmessageid)
574         createddate = datetime.datetime.now() \
575             .strftime("%a, %e %b %Y %T -0000")
576         try:
577             createddate = datetime.datetime(*item["updated_parsed"][0:6]) \
578                 .strftime("%a, %e %b %Y %T -0000")
579         except:
580             pass
581         msg.add_header("Date", createddate)
582         msg.add_header("Subject", item["title"])
583         msg.set_default_type("text/plain")
584
585         htmlcontent = content.encode("utf-8")
586         htmlcontent = "%s\n\n<p>Item URL: <a href='%s'>%s</a></p>" %( \
587             content, \
588             item["link"], \
589             item["link"] )
590         htmlpart = MIMEText(htmlcontent.encode("utf-8"), "html", "utf-8")
591         textparser = HTML2Text()
592         textparser.feed(content.encode("utf-8"))
593         textcontent = textparser.gettext()
594         textcontent = "%s\n\nItem URL: %s" %( \
595             textcontent, \
596             item["link"] )
597         textpart = MIMEText(textcontent.encode("utf-8"), "plain", "utf-8")
598         msg.attach(textpart)
599         msg.attach(htmlpart)
600
601         # start by working out the filename we should be writting to, we do
602         # this following the normal maildir style rules
603         fname = str(os.getpid()) \
604             + "." + socket.gethostname() \
605             + "." + "".join( \
606                 [random.choice( \
607                     string.ascii_letters + string.digits \
608                     ) for a in range(0,10) \
609                 ]) + "." \
610             + datetime.datetime.now().strftime('%s')
611         fn = os.path.join(maildir, "tmp", fname)
612         fh = open(fn, "w")
613         fh.write(msg.as_string())
614         fh.close()
615         # now move it in to the new directory
616         newfn = os.path.join(maildir, "new", fname)
617         os.link(fn, newfn)
618         os.unlink(fn)
619
620         # now add to the database about the item
621         if prevmessageid:
622             messageid = prevmessageid + " " + messageid
623         if item.has_key("guid") and item["guid"] != item["link"]:
624             data = urllib.urlencode(( \
625                 ("message-id", messageid), \
626                 ("created", createddate), \
627                 ("contentmd5", md5sum) \
628                 ))
629             db[url + "|" + item["guid"]] = data
630             try:
631                 data = db[url + "|" + item["link"]]
632                 data = cgi.parse_qs(data)
633                 newdata = urllib.urlencode(( \
634                     ("message-id", messageid), \
635                     ("created", data["created"][0]), \
636                     ("contentmd5", data["contentmd5"][0]) \
637                     ))
638                 db[url + "|" + item["link"]] = newdata
639             except:
640                 db[url + "|" + item["link"]] = data
641         else:
642             data = urllib.urlencode(( \
643                 ("message-id", messageid), \
644                 ("created", createddate), \
645                 ("contentmd5", md5sum) \
646                 ))
647             db[url + "|" + item["link"]] = data
648
649     if headers:
650         data = []
651         for header in headers:
652             if header[0] in \
653                 ["content-md5", "etag", "last-modified", "content-length"]:
654                 data.append((header[0], header[1]))
655         if len(data) > 0:
656             data = urllib.urlencode(data)
657             feeddb[url] = data
658
659     db.close()
660     feeddb.close()
661
662 if __name__ == "__main__":
663     # This only gets executed if we really called the program
664     # first off, parse the command line arguments
665
666     oparser = OptionParser()
667     oparser.add_option(
668         "-c", "--conf", dest="conf",
669         help="location of config file"
670         )
671     oparser.add_option(
672         "-s", "--statedir", dest="statedir",
673         help="location of directory to store state in"
674         )
675
676     (options, args) = oparser.parse_args()
677
678     # check for the configfile
679
680     configfile = None
681
682     if options.conf != None:
683         # does the file exist?
684         try:
685             os.stat(options.conf)
686             configfile = options.conf
687         except:
688             # should exit here as the specified file doesn't exist
689             sys.stderr.write( \
690                 "Config file %s does not exist. Exiting.\n" %(options.conf,))
691             sys.exit(2)
692     else:
693         # check through the default locations
694         try:
695             os.stat("%s/.rss2maildir.conf" %(os.environ["HOME"],))
696             configfile = "%s/.rss2maildir.conf" %(os.environ["HOME"],)
697         except:
698             try:
699                 os.stat("/etc/rss2maildir.conf")
700                 configfile = "/etc/rss2maildir.conf"
701             except:
702                 sys.stderr.write("No config file found. Exiting.\n")
703                 sys.exit(2)
704
705     # Right - if we've got this far, we've got a config file, now for the hard
706     # bits...
707
708     scp = SafeConfigParser()
709     scp.read(configfile)
710
711     maildir_root = "RSSMaildir"
712     state_dir = "state"
713
714     if options.statedir != None:
715         state_dir = options.statedir
716         try:
717             mode = os.stat(state_dir)[stat.ST_MODE]
718             if not stat.S_ISDIR(mode):
719                 sys.stderr.write( \
720                     "State directory (%s) is not a directory\n" %(state_dir))
721                 sys.exit(1)
722         except:
723             # try to make the directory
724             try:
725                 os.mkdir(state_dir)
726             except:
727                 sys.stderr.write("Couldn't create statedir %s" %(state_dir))
728                 sys.exit(1)
729     elif scp.has_option("general", "state_dir"):
730         new_state_dir = scp.get("general", "state_dir")
731         try:
732             mode = os.stat(new_state_dir)[stat.ST_MODE]
733             if not stat.S_ISDIR(mode):
734                 sys.stderr.write( \
735                     "State directory (%s) is not a directory\n" %(state_dir))
736                 sys.exit(1)
737             else:
738                 state_dir = new_state_dir
739         except:
740             # try to create it
741             try:
742                 os.mkdir(new_state_dir)
743                 state_dir = new_state_dir
744             except:
745                 sys.stderr.write( \
746                     "Couldn't create state directory %s\n" %(new_state_dir))
747                 sys.exit(1)
748     else:
749         try:
750             mode = os.stat(state_dir)[stat.ST_MODE]
751             if not stat.S_ISDIR(mode):
752                 sys.stderr.write( \
753                     "State directory %s is not a directory\n" %(state_dir))
754                 sys.exit(1)
755         except:
756             try:
757                 os.mkdir(state_dir)
758             except:
759                 sys.stderr.write( \
760                     "State directory %s could not be created\n" %(state_dir))
761                 sys.exit(1)
762
763     if scp.has_option("general", "maildir_root"):
764         maildir_root = scp.get("general", "maildir_root")
765
766     try:
767         mode = os.stat(maildir_root)[stat.ST_MODE]
768         if not stat.S_ISDIR(mode):
769             sys.stderr.write( \
770                 "Maildir Root %s is not a directory\n" \
771                 %(maildir_root))
772             sys.exit(1)
773     except:
774         try:
775             os.mkdir(maildir_root)
776         except:
777             sys.stderr.write("Couldn't create Maildir Root %s\n" \
778                 %(maildir_root))
779             sys.exit(1)
780
781     feeds = scp.sections()
782     try:
783         feeds.remove("general")
784     except:
785         pass
786
787     for section in feeds:
788         # check if the directory exists
789         maildir = None
790         try:
791             maildir = scp.get(section, "maildir")
792         except:
793             maildir = section
794
795         maildir = urllib.urlencode(((section, maildir),)).split("=")[1]
796         maildir = os.path.join(maildir_root, maildir)
797
798         try:
799             exists = os.stat(maildir)
800             if stat.S_ISDIR(exists[stat.ST_MODE]):
801                 # check if there's a new, cur and tmp directory
802                 try:
803                     mode = os.stat(os.path.join(maildir, "cur"))[stat.ST_MODE]
804                 except:
805                     os.mkdir(os.path.join(maildir, "cur"))
806                     if not stat.S_ISDIR(mode):
807                         sys.stderr.write("Broken maildir: %s\n" %(maildir))
808                 try:
809                     mode = os.stat(os.path.join(maildir, "tmp"))[stat.ST_MODE]
810                 except:
811                     os.mkdir(os.path.join(maildir, "tmp"))
812                     if not stat.S_ISDIR(mode):
813                         sys.stderr.write("Broken maildir: %s\n" %(maildir))
814                 try:
815                     mode = os.stat(os.path.join(maildir, "new"))[stat.ST_MODE]
816                     if not stat.S_ISDIR(mode):
817                         sys.stderr.write("Broken maildir: %s\n" %(maildir))
818                 except:
819                     os.mkdir(os.path.join(maildir, "new"))
820             else:
821                 sys.stderr.write("Broken maildir: %s\n" %(maildir))
822         except:
823             try:
824                 os.mkdir(maildir)
825             except:
826                 sys.stderr.write("Couldn't create root maildir %s\n" \
827                     %(maildir))
828                 sys.exit(1)
829             try:
830                 os.mkdir(os.path.join(maildir, "new"))
831                 os.mkdir(os.path.join(maildir, "cur"))
832                 os.mkdir(os.path.join(maildir, "tmp"))
833             except:
834                 sys.stderr.write( \
835                     "Couldn't create required maildir directories for %s\n" \
836                     %(section,))
837                 sys.exit(1)
838
839         # right - we've got the directories, we've got the section, we know the
840         # url... lets play!
841
842         parse_and_deliver(maildir, section, state_dir)