fix silly regression on pre formatting
[rss2maildir.git] / rss2maildir.py
1 #!/usr/bin/python
2 # coding=utf-8
3
4 # rss2maildir.py - RSS feeds to Maildir 1 email per item
5 # Copyright (C) 2007  Brett Parker <iDunno@sommitrealweird.co.uk>
6
7 # This program is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation, either version 3 of the License, or
10 # (at your option) any later version.
11
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 # GNU General Public License for more details.
16
17 # You should have received a copy of the GNU General Public License
18 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
19
20 import sys
21 import os
22 import stat
23 import httplib
24 import urllib
25
26 import feedparser
27
28 from email.MIMEMultipart import MIMEMultipart
29 from email.MIMEText import MIMEText
30
31 import datetime
32 import random
33 import string
34 import textwrap
35
36 import socket
37
38 from optparse import OptionParser
39 from ConfigParser import SafeConfigParser
40
41 from base64 import b64encode
42 import md5
43
44 import cgi
45 import dbm
46
47 from HTMLParser import HTMLParser
48
49 class HTML2Text(HTMLParser):
50     entities = {
51         u'amp': "&",
52         u'lt': "<",
53         u'gt': ">",
54         u'pound': "£",
55         u'copy': "©",
56         u'apos': "'",
57         u'quot': "\"",
58         u'nbsp': " ",
59         }
60
61     blockleveltags = [
62         u'h1',
63         u'h2',
64         u'h3',
65         u'h4',
66         u'h5',
67         u'h6',
68         u'pre',
69         u'p',
70         u'ul',
71         u'ol',
72         u'dl',
73         u'li',
74         u'dt',
75         u'dd',
76         u'div',
77         #u'blockquote',
78         ]
79
80     liststarttags = [
81         u'ul',
82         u'ol',
83         u'dl',
84         ]
85
86     cancontainflow = [
87         u'div',
88         u'li',
89         u'dd',
90         u'blockquote',
91     ]
92
93     def __init__(self,textwidth=70):
94         self.text = u''
95         self.curdata = u''
96         self.textwidth = textwidth
97         self.opentags = []
98         self.indentlevel = 0
99         self.ignorenodata = False
100         self.listcount = []
101         self.urls = []
102         self.images = {}
103         HTMLParser.__init__(self)
104
105     def handle_starttag(self, tag, attrs):
106         tag_name = tag.lower()
107         if tag_name in self.blockleveltags:
108             # handle starting a new block - unless we're in a block element
109             # that can contain other blocks, we'll assume that we want to close
110             # the container
111             if len(self.opentags) > 1 and self.opentags[-1] == u'li':
112                 self.handle_curdata()
113
114             if tag_name == u'ol':
115                 self.handle_curdata()
116                 self.listcount.append(1)
117                 self.listlevel = len(self.listcount) - 1
118
119             if tag_name in self.liststarttags:
120                 smallist = self.opentags[-3:-1]
121                 smallist.reverse()
122                 for prev_listtag in smallist:
123                     if prev_listtag in [u'dl', u'ol']:
124                         self.indentlevel = self.indentlevel + 4
125                         break
126                     elif prev_listtag == u'ul':
127                         self.indentlevel = self.indentlevel + 3
128                         break
129
130             if len(self.opentags) > 0:
131                 self.handle_curdata()
132                 if tag_name not in self.cancontainflow:
133                     self.opentags.pop()
134             self.opentags.append(tag_name)
135         else:
136             if tag_name == "span":
137                 return
138             listcount = 0
139             try:
140                 listcount = self.listcount[-1]
141             except:
142                 pass
143
144             if tag_name == u'dd' and len(self.opentags) > 1 \
145                 and self.opentags[-1] == u'dt':
146                 self.handle_curdata()
147                 self.opentags.pop()
148             elif tag_name == u'dt' and len(self.opentags) > 1 \
149                 and self.opentags[-1] == u'dd':
150                 self.handle_curdata()
151                 self.opentags.pop()
152             elif tag_name == u'a':
153                 for attr in attrs:
154                     if attr[0].lower() == u'href':
155                         self.urls.append(attr[1].decode('utf-8'))
156                 self.curdata = self.curdata + u'`'
157                 self.opentags.append(tag_name)
158                 return
159             elif tag_name == u'img':
160                 self.handle_image(attrs)
161                 return
162             elif tag_name == u'br':
163                 self.handle_br()
164                 return
165             else:
166                 # we don't know the tag, so lets avoid handling it!
167                 return 
168
169     def handle_startendtag(self, tag, attrs):
170         if tag.lower() == u'br':
171             self.handle_br()
172         elif tag.lower() == u'img':
173             self.handle_image(attrs)
174             return
175
176     def handle_br(self):
177             self.handle_curdata()
178             self.opentags.append(u'br')
179             self.handle_curdata()
180             self.opentags.pop()
181
182     def handle_image(self, attrs):
183         alt = u''
184         url = u''
185         for attr in attrs:
186             if attr[0] == 'alt':
187                 alt = attr[1].decode('utf-8')
188             elif attr[0] == 'src':
189                 url = attr[1].decode('utf-8')
190         if url:
191             if alt:
192                 if self.images.has_key(alt):
193                     if self.images[alt]["url"] == url:
194                         self.curdata = self.curdata \
195                             + u'|%s|' %(alt,)
196                     else:
197                         while self.images.has_key(alt):
198                             alt = alt + "_"
199                         self.images[alt]["url"] = url
200                         self.curdata = self.curdata \
201                             + u'|%s|' %(alt,)
202                 else:
203                     self.images[alt] = {}
204                     self.images[alt]["url"] = url
205                     self.curdata = self.curdata \
206                         + u'|%s|' %(alt,)
207             else:
208                 if self.images.has_key(url):
209                     self.curdata = self.curdata \
210                         + u'|%s|' %(url,)
211                 else:
212                     self.images[url] = {}
213                     self.images[url]["url"] =url
214                     self.curdata = self.curdata \
215                         + u'|%s|' %(url,)
216
217     def handle_curdata(self):
218
219         if len(self.opentags) == 0:
220             return
221
222         tag_thats_done = self.opentags[-1]
223
224         if len(self.curdata) == 0:
225             return
226
227         if tag_thats_done == u'br':
228             if len(self.text) == 0 or self.text[-1] != '\n':
229                 self.text = self.text + '\n'
230                 self.ignorenodata = True
231             return
232
233         if len(self.curdata.strip()) == 0:
234             return
235
236         if tag_thats_done in self.blockleveltags:
237             newlinerequired = self.text != u''
238             if self.ignorenodata:
239                 newlinerequired = False
240             self.ignorenodata = False
241             if newlinerequired:
242                 if tag_thats_done in [u'dt', u'dd', u'li'] \
243                     and len(self.text) > 1 \
244                     and self.text[-1] != u'\n':
245                         self.text = self.text + u'\n'
246                 elif len(self.text) > 2 \
247                     and self.text[-1] != u'\n' \
248                     and self.text[-2] != u'\n':
249                     self.text = self.text + u'\n\n'
250
251         if tag_thats_done in ["h1", "h2", "h3", "h4", "h5", "h6"]:
252             underline = u''
253             underlinechar = u'='
254             headingtext = " ".join(self.curdata.split())
255             seperator = u'\n' + u' '*self.indentlevel
256             headingtext = seperator.join( \
257                 textwrap.wrap( \
258                     headingtext, \
259                     self.textwidth - self.indentlevel \
260                     ) \
261                 )
262
263             if tag_thats_done == u'h2':
264                 underlinechar = u'-'
265             elif tag_thats_done != u'h1':
266                 underlinechar = u'~'
267
268             if u'\n' in headingtext:
269                 underline = u' ' * self.indentlevel \
270                     + underlinechar * (self.textwidth - self.indentlevel)
271             else:
272                 underline = u' ' * self.indentlevel \
273                     + underlinechar * len(headingtext)
274             self.text = self.text \
275                 + headingtext + u'\n' \
276                 + underline
277         elif tag_thats_done in [u'p', u'div']:
278             paragraph = unicode( \
279                 " ".join(self.curdata.strip().encode("utf-8").split()), \
280                 "utf-8")
281             seperator = u'\n' + u' ' * self.indentlevel
282             self.text = self.text \
283                 + u' ' * self.indentlevel \
284                 + seperator.join( \
285                     textwrap.wrap( \
286                         paragraph, self.textwidth - self.indentlevel))
287         elif tag_thats_done == "pre":
288             self.text = self.text + unicode( \
289                 self.curdata.encode("utf-8"), "utf-8")
290         elif tag_thats_done == u'blockquote':
291             quote = unicode( \
292                 " ".join(self.curdata.encode("utf-8").strip().split()), \
293                 "utf-8")
294             seperator = u'\n' + u' ' * self.indentlevel + u'> '
295             if len(self.text) > 0 and self.text[-1] != u'\n':
296                 self.text = self.text + u'\n'
297             self.text = self.text \
298                 + u'> ' \
299                 + seperator.join( \
300                     textwrap.wrap( \
301                         quote, \
302                         self.textwidth - self.indentlevel - 2 \
303                     )
304                 )
305             self.curdata = u''
306         elif tag_thats_done == "li":
307             item = unicode(self.curdata.encode("utf-8").strip(), "utf-8")
308             if len(self.text) > 0 and self.text[-1] != u'\n':
309                 self.text = self.text + u'\n'
310             # work out if we're in an ol rather than a ul
311             latesttags = self.opentags[-4:]
312             latesttags.reverse()
313             isul = None
314             for thing in latesttags:
315                 if thing == 'ul':
316                     isul = True
317                     break
318                 elif thing == 'ol':
319                     isul = False
320                     break
321
322             listindent = 3
323             if not isul:
324                 listindent = 4
325
326             listmarker = u' * '
327             if isul == False:
328                 listmarker = u' %2d. ' %(self.listcount[-1])
329                 self.listcount[-1] = self.listcount[-1] + 1
330
331             seperator = u'\n' \
332                 + u' ' * self.indentlevel \
333                 + u' ' * listindent
334             self.text = self.text \
335                 + u' ' * self.indentlevel \
336                 + listmarker \
337                 + seperator.join( \
338                     textwrap.wrap( \
339                         item, \
340                         self.textwidth - self.indentlevel - listindent \
341                     ) \
342                 )
343             self.curdata = u''
344         elif tag_thats_done == u'dt':
345             definition = unicode(" ".join( \
346                     self.curdata.encode("utf-8").strip().split()), \
347                 "utf-8")
348             if len(self.text) > 0 and self.text[-1] != u'\n':
349                 self.text = self.text + u'\n\n'
350             elif len(self.text) > 1 and self.text[-2] != u'\n':
351                 self.text = self.text + u'\n'
352             definition = u' ' * self.indentlevel + definition + "::"
353             indentstring = u'\n' + u' ' * (self.indentlevel + 1)
354             self.text = self.text \
355                 + indentstring.join(
356                     textwrap.wrap(definition, \
357                         self.textwidth - self.indentlevel - 1))
358             self.curdata = u''
359         elif tag_thats_done == u'dd':
360             definition = unicode(" ".join( \
361                     self.curdata.encode("utf-8").strip().split()),
362                 "utf-8")
363             if len(definition) > 0:
364                 if len(self.text) > 0 and self.text[-1] != u'\n':
365                     self.text = self.text + u'\n'
366                 indentstring = u'\n' + u' ' * (self.indentlevel + 4)
367                 self.text = self.text \
368                     + u' ' * (self.indentlevel + 4) \
369                     + indentstring.join( \
370                         textwrap.wrap( \
371                             definition, \
372                             self.textwidth - self.indentlevel - 4 \
373                             ) \
374                         )
375                 self.curdata = u''
376         elif tag_thats_done == u'a':
377             self.curdata = self.curdata + u'`__'
378             pass
379         elif tag_thats_done in self.liststarttags:
380             pass
381
382         if tag_thats_done in self.blockleveltags:
383             self.curdata = u''
384
385         self.ignorenodata = False
386
387     def handle_endtag(self, tag):
388         self.ignorenodata = False
389         if tag == "span":
390             return
391
392         try:
393             tagindex = self.opentags.index(tag)
394         except:
395             return
396         tag = tag.lower()
397
398         if tag in [u'br', u'img']:
399             return
400
401         if tag in self.liststarttags:
402             if tag in [u'ol', u'dl', u'ul']:
403                 self.handle_curdata()
404                 # find if there was a previous list level
405                 smalllist = self.opentags[:-1]
406                 smalllist.reverse()
407                 for prev_listtag in smalllist:
408                     if prev_listtag in [u'ol', u'dl']:
409                         self.indentlevel = self.indentlevel - 4
410                         break
411                     elif prev_listtag == u'ul':
412                         self.indentlevel = self.indentlevel - 3
413                         break
414
415         if tag == u'ol':
416             self.listcount = self.listcount[:-1]
417
418         while tagindex < len(self.opentags) \
419             and tag in self.opentags[tagindex+1:]:
420             try:
421                 tagindex = self.opentags.index(tag, tagindex+1)
422             except:
423                 # well, we don't want to do that then
424                 pass
425         if tagindex != len(self.opentags) - 1:
426             # Assuming the data was for the last opened tag first
427             self.handle_curdata()
428             # Now kill the list to be a slice before this tag was opened
429             self.opentags = self.opentags[:tagindex + 1]
430         else:
431             self.handle_curdata()
432             if self.opentags[-1] == tag:
433                 self.opentags.pop()
434
435     def handle_data(self, data):
436         if len(self.opentags) == 0:
437             self.opentags.append(u'p')
438         self.curdata = self.curdata + data.decode("utf-8")
439
440     def handle_entityref(self, name):
441         entity = name
442         if HTML2Text.entities.has_key(name.lower()):
443             entity = HTML2Text.entities[name.lower()]
444         elif name[0] == "#":
445             entity = unichr(int(name[1:]))
446         else:
447             entity = "&" + name + ";"
448
449         self.curdata = self.curdata + unicode(entity, "utf-8")
450
451     def gettext(self):
452         self.handle_curdata()
453         if len(self.text) == 0 or self.text[-1] != u'\n':
454             self.text = self.text + u'\n'
455         self.opentags = []
456         if len(self.text) > 0:
457             while len(self.text) > 1 and self.text[-1] == u'\n':
458                 self.text = self.text[:-1]
459             self.text = self.text + u'\n'
460         if len(self.urls) > 0:
461             self.text = self.text + u'\n__ ' + u'\n__ '.join(self.urls) + u'\n'
462             self.urls = []
463         if len(self.images.keys()) > 0:
464             self.text = self.text + u'\n.. ' \
465                 + u'.. '.join( \
466                     ["|%s| image:: %s" %(a, self.images[a]["url"]) \
467                 for a in self.images.keys()]) + u'\n'
468             self.images = {}
469         return self.text
470
471 def open_url(method, url):
472     redirectcount = 0
473     while redirectcount < 3:
474         (type, rest) = urllib.splittype(url)
475         (host, path) = urllib.splithost(rest)
476         (host, port) = urllib.splitport(host)
477         if port == None:
478             port = 80
479         try:
480             conn = httplib.HTTPConnection("%s:%s" %(host, port))
481             conn.request(method, path)
482             response = conn.getresponse()
483             if response.status in [301, 302, 303, 307]:
484                 headers = response.getheaders()
485                 for header in headers:
486                     if header[0] == "location":
487                         url = header[1]
488             elif response.status == 200:
489                 return response
490         except:
491             pass
492         redirectcount = redirectcount + 1
493     return None
494
495 def parse_and_deliver(maildir, url, statedir):
496     feedhandle = None
497     headers = None
498     # first check if we know about this feed already
499     feeddb = dbm.open(os.path.join(statedir, "feeds"), "c")
500     if feeddb.has_key(url):
501         data = feeddb[url]
502         data = cgi.parse_qs(data)
503         response = open_url("HEAD", url)
504         headers = None
505         if response:
506             headers = response.getheaders()
507         ischanged = False
508         try:
509             for header in headers:
510                 if header[0] == "content-length":
511                     if header[1] != data["content-length"][0]:
512                         ischanged = True
513                 elif header[0] == "etag":
514                     if header[1] != data["etag"][0]:
515                         ischanged = True
516                 elif header[0] == "last-modified":
517                     if header[1] != data["last-modified"][0]:
518                         ischanged = True
519                 elif header[0] == "content-md5":
520                     if header[1] != data["content-md5"][0]:
521                         ischanged = True
522         except:
523             ischanged = True
524         if ischanged:
525             response = open_url("GET", url)
526             if response != None:
527                 headers = response.getheaders()
528                 feedhandle = response
529             else:
530                 sys.stderr.write("Failed to fetch feed: %s\n" %(url))
531                 return
532         else:
533             return # don't need to do anything, nothings changed.
534     else:
535         response = open_url("GET", url)
536         if response != None:
537             headers = response.getheaders()
538             feedhandle = response
539         else:
540             sys.stderr.write("Failed to fetch feed: %s\n" %(url))
541             return
542
543     fp = feedparser.parse(feedhandle)
544     db = dbm.open(os.path.join(statedir, "seen"), "c")
545     for item in fp["items"]:
546         # have we seen it before?
547         # need to work out what the content is first...
548
549         if item.has_key("content"):
550             content = item["content"][0]["value"]
551         else:
552             content = item["summary"]
553
554         md5sum = md5.md5(content.encode("utf-8")).hexdigest()
555
556         prevmessageid = None
557
558         # check if there's a guid too - if that exists and we match the md5,
559         # return
560         if item.has_key("guid"):
561             if db.has_key(url + "|" + item["guid"]):
562                 data = db[url + "|" + item["guid"]]
563                 data = cgi.parse_qs(data)
564                 if data["contentmd5"][0] == md5sum:
565                     continue
566
567         if db.has_key(url + "|" + item["link"]):
568             data = db[url + "|" + item["link"]]
569             data = cgi.parse_qs(data)
570             if data.has_key("message-id"):
571                 prevmessageid = data["message-id"][0]
572             if data["contentmd5"][0] == md5sum:
573                 continue
574
575         try:
576             author = item["author"]
577         except:
578             author = url
579
580         # create a basic email message
581         msg = MIMEMultipart("alternative")
582         messageid = "<" \
583             + datetime.datetime.now().strftime("%Y%m%d%H%M") \
584             + "." \
585             + "".join( \
586                 [random.choice( \
587                     string.ascii_letters + string.digits \
588                     ) for a in range(0,6) \
589                 ]) + "@" + socket.gethostname() + ">"
590         msg.add_header("Message-ID", messageid)
591         msg.set_unixfrom("\"%s\" <rss2maildir@localhost>" %(url))
592         msg.add_header("From", "\"%s\" <rss2maildir@localhost>" %(author))
593         msg.add_header("To", "\"%s\" <rss2maildir@localhost>" %(url))
594         if prevmessageid:
595             msg.add_header("References", prevmessageid)
596         createddate = datetime.datetime.now() \
597             .strftime("%a, %e %b %Y %T -0000")
598         try:
599             createddate = datetime.datetime(*item["updated_parsed"][0:6]) \
600                 .strftime("%a, %e %b %Y %T -0000")
601         except:
602             pass
603         msg.add_header("Date", createddate)
604         msg.add_header("Subject", item["title"])
605         msg.set_default_type("text/plain")
606
607         htmlcontent = content.encode("utf-8")
608         htmlcontent = "%s\n\n<p>Item URL: <a href='%s'>%s</a></p>" %( \
609             content, \
610             item["link"], \
611             item["link"] )
612         htmlpart = MIMEText(htmlcontent.encode("utf-8"), "html", "utf-8")
613         textparser = HTML2Text()
614         textparser.feed(content.encode("utf-8"))
615         textcontent = textparser.gettext()
616         textcontent = "%s\n\nItem URL: %s" %( \
617             textcontent, \
618             item["link"] )
619         textpart = MIMEText(textcontent.encode("utf-8"), "plain", "utf-8")
620         msg.attach(textpart)
621         msg.attach(htmlpart)
622
623         # start by working out the filename we should be writting to, we do
624         # this following the normal maildir style rules
625         fname = str(os.getpid()) \
626             + "." + socket.gethostname() \
627             + "." + "".join( \
628                 [random.choice( \
629                     string.ascii_letters + string.digits \
630                     ) for a in range(0,10) \
631                 ]) + "." \
632             + datetime.datetime.now().strftime('%s')
633         fn = os.path.join(maildir, "tmp", fname)
634         fh = open(fn, "w")
635         fh.write(msg.as_string())
636         fh.close()
637         # now move it in to the new directory
638         newfn = os.path.join(maildir, "new", fname)
639         os.link(fn, newfn)
640         os.unlink(fn)
641
642         # now add to the database about the item
643         if prevmessageid:
644             messageid = prevmessageid + " " + messageid
645         if item.has_key("guid") and item["guid"] != item["link"]:
646             data = urllib.urlencode(( \
647                 ("message-id", messageid), \
648                 ("created", createddate), \
649                 ("contentmd5", md5sum) \
650                 ))
651             db[url + "|" + item["guid"]] = data
652             try:
653                 data = db[url + "|" + item["link"]]
654                 data = cgi.parse_qs(data)
655                 newdata = urllib.urlencode(( \
656                     ("message-id", messageid), \
657                     ("created", data["created"][0]), \
658                     ("contentmd5", data["contentmd5"][0]) \
659                     ))
660                 db[url + "|" + item["link"]] = newdata
661             except:
662                 db[url + "|" + item["link"]] = data
663         else:
664             data = urllib.urlencode(( \
665                 ("message-id", messageid), \
666                 ("created", createddate), \
667                 ("contentmd5", md5sum) \
668                 ))
669             db[url + "|" + item["link"]] = data
670
671     if headers:
672         data = []
673         for header in headers:
674             if header[0] in \
675                 ["content-md5", "etag", "last-modified", "content-length"]:
676                 data.append((header[0], header[1]))
677         if len(data) > 0:
678             data = urllib.urlencode(data)
679             feeddb[url] = data
680
681     db.close()
682     feeddb.close()
683
684 if __name__ == "__main__":
685     # This only gets executed if we really called the program
686     # first off, parse the command line arguments
687
688     oparser = OptionParser()
689     oparser.add_option(
690         "-c", "--conf", dest="conf",
691         help="location of config file"
692         )
693     oparser.add_option(
694         "-s", "--statedir", dest="statedir",
695         help="location of directory to store state in"
696         )
697
698     (options, args) = oparser.parse_args()
699
700     # check for the configfile
701
702     configfile = None
703
704     if options.conf != None:
705         # does the file exist?
706         try:
707             os.stat(options.conf)
708             configfile = options.conf
709         except:
710             # should exit here as the specified file doesn't exist
711             sys.stderr.write( \
712                 "Config file %s does not exist. Exiting.\n" %(options.conf,))
713             sys.exit(2)
714     else:
715         # check through the default locations
716         try:
717             os.stat("%s/.rss2maildir.conf" %(os.environ["HOME"],))
718             configfile = "%s/.rss2maildir.conf" %(os.environ["HOME"],)
719         except:
720             try:
721                 os.stat("/etc/rss2maildir.conf")
722                 configfile = "/etc/rss2maildir.conf"
723             except:
724                 sys.stderr.write("No config file found. Exiting.\n")
725                 sys.exit(2)
726
727     # Right - if we've got this far, we've got a config file, now for the hard
728     # bits...
729
730     scp = SafeConfigParser()
731     scp.read(configfile)
732
733     maildir_root = "RSSMaildir"
734     state_dir = "state"
735
736     if options.statedir != None:
737         state_dir = options.statedir
738         try:
739             mode = os.stat(state_dir)[stat.ST_MODE]
740             if not stat.S_ISDIR(mode):
741                 sys.stderr.write( \
742                     "State directory (%s) is not a directory\n" %(state_dir))
743                 sys.exit(1)
744         except:
745             # try to make the directory
746             try:
747                 os.mkdir(state_dir)
748             except:
749                 sys.stderr.write("Couldn't create statedir %s" %(state_dir))
750                 sys.exit(1)
751     elif scp.has_option("general", "state_dir"):
752         new_state_dir = scp.get("general", "state_dir")
753         try:
754             mode = os.stat(new_state_dir)[stat.ST_MODE]
755             if not stat.S_ISDIR(mode):
756                 sys.stderr.write( \
757                     "State directory (%s) is not a directory\n" %(state_dir))
758                 sys.exit(1)
759             else:
760                 state_dir = new_state_dir
761         except:
762             # try to create it
763             try:
764                 os.mkdir(new_state_dir)
765                 state_dir = new_state_dir
766             except:
767                 sys.stderr.write( \
768                     "Couldn't create state directory %s\n" %(new_state_dir))
769                 sys.exit(1)
770     else:
771         try:
772             mode = os.stat(state_dir)[stat.ST_MODE]
773             if not stat.S_ISDIR(mode):
774                 sys.stderr.write( \
775                     "State directory %s is not a directory\n" %(state_dir))
776                 sys.exit(1)
777         except:
778             try:
779                 os.mkdir(state_dir)
780             except:
781                 sys.stderr.write( \
782                     "State directory %s could not be created\n" %(state_dir))
783                 sys.exit(1)
784
785     if scp.has_option("general", "maildir_root"):
786         maildir_root = scp.get("general", "maildir_root")
787
788     try:
789         mode = os.stat(maildir_root)[stat.ST_MODE]
790         if not stat.S_ISDIR(mode):
791             sys.stderr.write( \
792                 "Maildir Root %s is not a directory\n" \
793                 %(maildir_root))
794             sys.exit(1)
795     except:
796         try:
797             os.mkdir(maildir_root)
798         except:
799             sys.stderr.write("Couldn't create Maildir Root %s\n" \
800                 %(maildir_root))
801             sys.exit(1)
802
803     feeds = scp.sections()
804     try:
805         feeds.remove("general")
806     except:
807         pass
808
809     for section in feeds:
810         # check if the directory exists
811         maildir = None
812         try:
813             maildir = scp.get(section, "maildir")
814         except:
815             maildir = section
816
817         maildir = urllib.urlencode(((section, maildir),)).split("=")[1]
818         maildir = os.path.join(maildir_root, maildir)
819
820         try:
821             exists = os.stat(maildir)
822             if stat.S_ISDIR(exists[stat.ST_MODE]):
823                 # check if there's a new, cur and tmp directory
824                 try:
825                     mode = os.stat(os.path.join(maildir, "cur"))[stat.ST_MODE]
826                 except:
827                     os.mkdir(os.path.join(maildir, "cur"))
828                     if not stat.S_ISDIR(mode):
829                         sys.stderr.write("Broken maildir: %s\n" %(maildir))
830                 try:
831                     mode = os.stat(os.path.join(maildir, "tmp"))[stat.ST_MODE]
832                 except:
833                     os.mkdir(os.path.join(maildir, "tmp"))
834                     if not stat.S_ISDIR(mode):
835                         sys.stderr.write("Broken maildir: %s\n" %(maildir))
836                 try:
837                     mode = os.stat(os.path.join(maildir, "new"))[stat.ST_MODE]
838                     if not stat.S_ISDIR(mode):
839                         sys.stderr.write("Broken maildir: %s\n" %(maildir))
840                 except:
841                     os.mkdir(os.path.join(maildir, "new"))
842             else:
843                 sys.stderr.write("Broken maildir: %s\n" %(maildir))
844         except:
845             try:
846                 os.mkdir(maildir)
847             except:
848                 sys.stderr.write("Couldn't create root maildir %s\n" \
849                     %(maildir))
850                 sys.exit(1)
851             try:
852                 os.mkdir(os.path.join(maildir, "new"))
853                 os.mkdir(os.path.join(maildir, "cur"))
854                 os.mkdir(os.path.join(maildir, "tmp"))
855             except:
856                 sys.stderr.write( \
857                     "Couldn't create required maildir directories for %s\n" \
858                     %(section,))
859                 sys.exit(1)
860
861         # right - we've got the directories, we've got the section, we know the
862         # url... lets play!
863
864         parse_and_deliver(maildir, section, state_dir)