More unicode fixes
[rss2maildir.git] / rss2maildir.py
1 #!/usr/bin/python
2 # coding=utf-8
3
4 # rss2maildir.py - RSS feeds to Maildir 1 email per item
5 # Copyright (C) 2007  Brett Parker <iDunno@sommitrealweird.co.uk>
6
7 # This program is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation, either version 3 of the License, or
10 # (at your option) any later version.
11
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 # GNU General Public License for more details.
16
17 # You should have received a copy of the GNU General Public License
18 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
19
20 import sys
21 import os
22 import stat
23 import httplib
24 import urllib
25
26 import feedparser
27
28 from email.MIMEMultipart import MIMEMultipart
29 from email.MIMEText import MIMEText
30
31 import datetime
32 import random
33 import string
34 import textwrap
35
36 import socket
37
38 from optparse import OptionParser
39 from ConfigParser import SafeConfigParser
40
41 from base64 import b64encode
42 import md5
43
44 import cgi
45 import dbm
46
47 from HTMLParser import HTMLParser
48
49 class HTML2Text(HTMLParser):
50     entities = {
51         u'amp': "&",
52         u'lt': "<",
53         u'gt': ">",
54         u'pound': "£",
55         u'copy': "©",
56         u'apos': "'",
57         u'quot': "\"",
58         u'nbsp': " ",
59         }
60
61     blockleveltags = [
62         u'h1',
63         u'h2',
64         u'h3',
65         u'h4',
66         u'h5',
67         u'h6',
68         u'pre',
69         u'p',
70         u'ul',
71         u'ol',
72         u'dl',
73         u'li',
74         u'dt',
75         u'dd',
76         u'div',
77         #u'blockquote',
78         ]
79
80     liststarttags = [
81         u'ul',
82         u'ol',
83         u'dl',
84         ]
85
86     cancontainflow = [
87         u'div',
88         u'li',
89         u'dd',
90         u'blockquote',
91     ]
92
93     def __init__(self,textwidth=70):
94         self.text = u''
95         self.curdata = u''
96         self.textwidth = textwidth
97         self.opentags = []
98         self.indentlevel = 0
99         self.ignorenodata = False
100         self.listcount = []
101         self.urls = []
102         HTMLParser.__init__(self)
103
104     def handle_starttag(self, tag, attrs):
105         tag_name = tag.lower()
106         if tag_name in self.blockleveltags:
107             # handle starting a new block - unless we're in a block element
108             # that can contain other blocks, we'll assume that we want to close
109             # the container
110             if len(self.opentags) > 1 and self.opentags[-1] == u'li':
111                 self.handle_curdata()
112
113             if tag_name == u'ol':
114                 self.handle_curdata()
115                 self.listcount.append(1)
116                 self.listlevel = len(self.listcount) - 1
117
118             if tag_name in self.liststarttags:
119                 smallist = self.opentags[-3:-1]
120                 smallist.reverse()
121                 for prev_listtag in smallist:
122                     if prev_listtag in [u'dl', u'ol']:
123                         self.indentlevel = self.indentlevel + 4
124                         break
125                     elif prev_listtag == u'ul':
126                         self.indentlevel = self.indentlevel + 3
127                         break
128
129             if len(self.opentags) > 0:
130                 self.handle_curdata()
131                 if tag_name not in self.cancontainflow:
132                     self.opentags.pop()
133             self.opentags.append(tag_name)
134         else:
135             if tag_name == "span":
136                 return
137             listcount = 0
138             try:
139                 listcount = self.listcount[-1]
140             except:
141                 pass
142
143             if tag_name == u'dd' and len(self.opentags) > 1 \
144                 and self.opentags[-1] == u'dt':
145                 self.handle_curdata()
146                 self.opentags.pop()
147             elif tag_name == u'dt' and len(self.opentags) > 1 \
148                 and self.opentags[-1] == u'dd':
149                 self.handle_curdata()
150                 self.opentags.pop()
151             elif tag_name == u'a':
152                 for attr in attrs:
153                     if attr[0].lower() == u'href':
154                         self.urls.append(attr[1])
155                 self.curdata = self.curdata + u'`'
156                 self.opentags.append(tag_name)
157                 return
158             elif tag_name == u'img':
159                 self.handle_image(attrs)
160                 return
161             elif tag_name == u'br':
162                 self.handle_br()
163                 return
164             else:
165                 # we don't know the tag, so lets avoid handling it!
166                 return 
167
168     def handle_startendtag(self, tag, attrs):
169         if tag.lower() == u'br':
170             self.handle_br()
171         elif tag.lower() == u'img':
172             self.handle_image(attrs)
173             return
174
175     def handle_br(self):
176             self.handle_curdata()
177             self.opentags.append(u'br')
178             self.handle_curdata()
179             self.opentags.pop()
180
181     def handle_image(self, attrs):
182         alt = u''
183         url = u''
184         for attr in attrs:
185             if attr[0] == 'alt':
186                 alt = attr[1].decode('utf-8')
187             elif attr[0] == 'src':
188                 url = attr[1].decode('utf-8')
189         if url:
190             self.curdata = self.curdata \
191                 + u' [img:' \
192                 + url
193             if alt:
194                 self.curdata = self.curdata \
195                     + u'(' \
196                     + alt \
197                     + u')'
198             self.curdata = self.curdata \
199                 + u']'
200
201     def handle_curdata(self):
202
203         if len(self.opentags) == 0:
204             return
205
206         tag_thats_done = self.opentags[-1]
207
208         if len(self.curdata) == 0:
209             return
210
211         if tag_thats_done == u'br':
212             if len(self.text) == 0 or self.text[-1] != '\n':
213                 self.text = self.text + '\n'
214                 self.ignorenodata = True
215             return
216
217         if len(self.curdata.strip()) == 0:
218             return
219
220         if tag_thats_done in self.blockleveltags:
221             newlinerequired = self.text != u''
222             if self.ignorenodata:
223                 newlinerequired = False
224             self.ignorenodata = False
225             if newlinerequired:
226                 if tag_thats_done in [u'dt', u'dd', u'li'] \
227                     and len(self.text) > 1 \
228                     and self.text[-1] != u'\n':
229                         self.text = self.text + u'\n'
230                 elif len(self.text) > 2 \
231                     and self.text[-1] != u'\n' \
232                     and self.text[-2] != u'\n':
233                     self.text = self.text + u'\n\n'
234
235         if tag_thats_done in ["h1", "h2", "h3", "h4", "h5", "h6"]:
236             underline = u''
237             underlinechar = u'='
238             headingtext = self.curdata
239             seperator = u'\n' + u' '*self.indentlevel
240             headingtext = seperator.join( \
241                 textwrap.wrap( \
242                     headingtext, \
243                     self.textwidth - self.indentlevel \
244                     ) \
245                 )
246
247             if tag_thats_done == u'h2':
248                 underlinechar = u'-'
249             elif tag_thats_done != u'h1':
250                 underlinechar = u'~'
251
252             if u'\n' in headingtext:
253                 underline = u' ' * self.indentlevel \
254                     + underlinechar * (self.textwidth - self.indentlevel)
255             else:
256                 underline = u' ' * self.indentlevel \
257                     + underlinechar * len(headingtext)
258             self.text = self.text \
259                 + headingtext + u'\n' \
260                 + underline
261         elif tag_thats_done in [u'p', u'div']:
262             paragraph = unicode( \
263                 self.curdata.strip().encode("utf-8"), "utf-8")
264             seperator = u'\n' + u' ' * self.indentlevel
265             self.text = self.text \
266                 + u' ' * self.indentlevel \
267                 + seperator.join( \
268                     textwrap.wrap( \
269                         paragraph, self.textwidth - self.indentlevel))
270         elif tag_thats_done == "pre":
271             self.text = self.text + unicode( \
272                 self.curdata.encode("utf-8"), "utf-8")
273         elif tag_thats_done == u'blockquote':
274             quote = unicode( \
275                 self.curdata.encode("utf-8").strip(), "utf-8")
276             seperator = u'\n' + u' ' * self.indentlevel + u'> '
277             if len(self.text) > 0 and self.text[-1] != u'\n':
278                 self.text = self.text + u'\n'
279             self.text = self.text \
280                 + u'> ' \
281                 + seperator.join( \
282                     textwrap.wrap( \
283                         quote, \
284                         self.textwidth - self.indentlevel - 2 \
285                     )
286                 )
287             self.curdata = u''
288         elif tag_thats_done == "li":
289             item = unicode(self.curdata.encode("utf-8").strip(), "utf-8")
290             if len(self.text) > 0 and self.text[-1] != u'\n':
291                 self.text = self.text + u'\n'
292             # work out if we're in an ol rather than a ul
293             latesttags = self.opentags[-4:]
294             latesttags.reverse()
295             isul = None
296             for thing in latesttags:
297                 if thing == 'ul':
298                     isul = True
299                     break
300                 elif thing == 'ol':
301                     isul = False
302                     break
303
304             listindent = 3
305             if not isul:
306                 listindent = 4
307
308             listmarker = u' * '
309             if isul == False:
310                 listmarker = u' %2d. ' %(self.listcount[-1])
311                 self.listcount[-1] = self.listcount[-1] + 1
312
313             seperator = u'\n' \
314                 + u' ' * self.indentlevel \
315                 + u' ' * listindent
316             self.text = self.text \
317                 + u' ' * self.indentlevel \
318                 + listmarker \
319                 + seperator.join( \
320                     textwrap.wrap( \
321                         item, \
322                         self.textwidth - self.indentlevel - listindent \
323                     ) \
324                 )
325             self.curdata = u''
326         elif tag_thats_done == u'dt':
327             definition = unicode(self.curdata.encode("utf-8").strip(), "utf-8")
328             if len(self.text) > 0 and self.text[-1] != u'\n':
329                 self.text = self.text + u'\n\n'
330             elif len(self.text) > 1 and self.text[-2] != u'\n':
331                 self.text = self.text + u'\n'
332             definition = u' ' * self.indentlevel + definition + "::"
333             indentstring = u'\n' + u' ' * (self.indentlevel + 1)
334             self.text = self.text \
335                 + indentstring.join(
336                     textwrap.wrap(definition, \
337                         self.textwidth - self.indentlevel - 1))
338             self.curdata = u''
339         elif tag_thats_done == u'dd':
340             definition = unicode(self.curdata.encode("utf-8").strip(), "utf-8")
341             if len(definition) > 0:
342                 if len(self.text) > 0 and self.text[-1] != u'\n':
343                     self.text = self.text + u'\n'
344                 indentstring = u'\n' + u' ' * (self.indentlevel + 4)
345                 self.text = self.text \
346                     + u' ' * (self.indentlevel + 4) \
347                     + indentstring.join( \
348                         textwrap.wrap( \
349                             definition, \
350                             self.textwidth - self.indentlevel - 4 \
351                             ) \
352                         )
353                 self.curdata = u''
354         elif tag_thats_done == u'a':
355             self.curdata = self.curdata + u'`__'
356             pass
357         elif tag_thats_done in self.liststarttags:
358             pass
359
360         if tag_thats_done in self.blockleveltags:
361             self.curdata = u''
362
363         self.ignorenodata = False
364
365     def handle_endtag(self, tag):
366         self.ignorenodata = False
367         if tag == "span":
368             return
369
370         try:
371             tagindex = self.opentags.index(tag)
372         except:
373             return
374         tag = tag.lower()
375
376         if tag in [u'br', u'img']:
377             return
378
379         if tag in self.liststarttags:
380             if tag in [u'ol', u'dl', u'ul']:
381                 self.handle_curdata()
382                 # find if there was a previous list level
383                 smalllist = self.opentags[:-1]
384                 smalllist.reverse()
385                 for prev_listtag in smalllist:
386                     if prev_listtag in [u'ol', u'dl']:
387                         self.indentlevel = self.indentlevel - 4
388                         break
389                     elif prev_listtag == u'ul':
390                         self.indentlevel = self.indentlevel - 3
391                         break
392
393         if tag == u'ol':
394             self.listcount = self.listcount[:-1]
395
396         while tagindex < len(self.opentags) \
397             and tag in self.opentags[tagindex+1:]:
398             try:
399                 tagindex = self.opentags.index(tag, tagindex+1)
400             except:
401                 # well, we don't want to do that then
402                 pass
403         if tagindex != len(self.opentags) - 1:
404             # Assuming the data was for the last opened tag first
405             self.handle_curdata()
406             # Now kill the list to be a slice before this tag was opened
407             self.opentags = self.opentags[:tagindex + 1]
408         else:
409             self.handle_curdata()
410             if self.opentags[-1] == tag:
411                 self.opentags.pop()
412
413     def handle_data(self, data):
414         if len(self.opentags) == 0:
415             self.opentags.append(u'p')
416         self.curdata = self.curdata + data.decode("utf-8")
417
418     def handle_entityref(self, name):
419         entity = name
420         if HTML2Text.entities.has_key(name.lower()):
421             entity = HTML2Text.entities[name.lower()]
422         elif name[0] == "#":
423             entity = unichr(int(name[1:]))
424         else:
425             entity = "&" + name + ";"
426
427         self.curdata = self.curdata + unicode(entity, "utf-8")
428
429     def gettext(self):
430         self.handle_curdata()
431         if len(self.text) == 0 or self.text[-1] != u'\n':
432             self.text = self.text + u'\n'
433         self.opentags = []
434         if len(self.text) > 0:
435             while len(self.text) > 1 and self.text[-1] == u'\n':
436                 self.text = self.text[:-1]
437             self.text = self.text + u'\n'
438         if len(self.urls) > 0:
439             self.text = self.text + u'\n__ ' + u'\n__ '.join(self.urls) + u'\n'
440             self.urls = []
441         return self.text
442
443 def open_url(method, url):
444     redirectcount = 0
445     while redirectcount < 3:
446         (type, rest) = urllib.splittype(url)
447         (host, path) = urllib.splithost(rest)
448         (host, port) = urllib.splitport(host)
449         if port == None:
450             port = 80
451         try:
452             conn = httplib.HTTPConnection("%s:%s" %(host, port))
453             conn.request(method, path)
454             response = conn.getresponse()
455             if response.status in [301, 302, 303, 307]:
456                 headers = response.getheaders()
457                 for header in headers:
458                     if header[0] == "location":
459                         url = header[1]
460             elif response.status == 200:
461                 return response
462         except:
463             pass
464         redirectcount = redirectcount + 1
465     return None
466
467 def parse_and_deliver(maildir, url, statedir):
468     feedhandle = None
469     headers = None
470     # first check if we know about this feed already
471     feeddb = dbm.open(os.path.join(statedir, "feeds"), "c")
472     if feeddb.has_key(url):
473         data = feeddb[url]
474         data = cgi.parse_qs(data)
475         response = open_url("HEAD", url)
476         headers = None
477         if response:
478             headers = response.getheaders()
479         ischanged = False
480         try:
481             for header in headers:
482                 if header[0] == "content-length":
483                     if header[1] != data["content-length"][0]:
484                         ischanged = True
485                 elif header[0] == "etag":
486                     if header[1] != data["etag"][0]:
487                         ischanged = True
488                 elif header[0] == "last-modified":
489                     if header[1] != data["last-modified"][0]:
490                         ischanged = True
491                 elif header[0] == "content-md5":
492                     if header[1] != data["content-md5"][0]:
493                         ischanged = True
494         except:
495             ischanged = True
496         if ischanged:
497             response = open_url("GET", url)
498             if response != None:
499                 headers = response.getheaders()
500                 feedhandle = response
501             else:
502                 sys.stderr.write("Failed to fetch feed: %s\n" %(url))
503                 return
504         else:
505             return # don't need to do anything, nothings changed.
506     else:
507         response = open_url("GET", url)
508         if response != None:
509             headers = response.getheaders()
510             feedhandle = response
511         else:
512             sys.stderr.write("Failed to fetch feed: %s\n" %(url))
513             return
514
515     fp = feedparser.parse(feedhandle)
516     db = dbm.open(os.path.join(statedir, "seen"), "c")
517     for item in fp["items"]:
518         # have we seen it before?
519         # need to work out what the content is first...
520
521         if item.has_key("content"):
522             content = item["content"][0]["value"]
523         else:
524             content = item["summary"]
525
526         md5sum = md5.md5(content.encode("utf-8")).hexdigest()
527
528         prevmessageid = None
529
530         # check if there's a guid too - if that exists and we match the md5,
531         # return
532         if item.has_key("guid"):
533             if db.has_key(url + "|" + item["guid"]):
534                 data = db[url + "|" + item["guid"]]
535                 data = cgi.parse_qs(data)
536                 if data["contentmd5"][0] == md5sum:
537                     continue
538
539         if db.has_key(url + "|" + item["link"]):
540             data = db[url + "|" + item["link"]]
541             data = cgi.parse_qs(data)
542             if data.has_key("message-id"):
543                 prevmessageid = data["message-id"][0]
544             if data["contentmd5"][0] == md5sum:
545                 continue
546
547         try:
548             author = item["author"]
549         except:
550             author = url
551
552         # create a basic email message
553         msg = MIMEMultipart("alternative")
554         messageid = "<" \
555             + datetime.datetime.now().strftime("%Y%m%d%H%M") \
556             + "." \
557             + "".join( \
558                 [random.choice( \
559                     string.ascii_letters + string.digits \
560                     ) for a in range(0,6) \
561                 ]) + "@" + socket.gethostname() + ">"
562         msg.add_header("Message-ID", messageid)
563         msg.set_unixfrom("\"%s\" <rss2maildir@localhost>" %(url))
564         msg.add_header("From", "\"%s\" <rss2maildir@localhost>" %(author))
565         msg.add_header("To", "\"%s\" <rss2maildir@localhost>" %(url))
566         if prevmessageid:
567             msg.add_header("References", prevmessageid)
568         createddate = datetime.datetime.now() \
569             .strftime("%a, %e %b %Y %T -0000")
570         try:
571             createddate = datetime.datetime(*item["updated_parsed"][0:6]) \
572                 .strftime("%a, %e %b %Y %T -0000")
573         except:
574             pass
575         msg.add_header("Date", createddate)
576         msg.add_header("Subject", item["title"])
577         msg.set_default_type("text/plain")
578
579         htmlcontent = content.encode("utf-8")
580         htmlcontent = "%s\n\n<p>Item URL: <a href='%s'>%s</a></p>" %( \
581             content, \
582             item["link"], \
583             item["link"] )
584         htmlpart = MIMEText(htmlcontent.encode("utf-8"), "html", "utf-8")
585         textparser = HTML2Text()
586         textparser.feed(content.encode("utf-8"))
587         textcontent = textparser.gettext()
588         textcontent = "%s\n\nItem URL: %s" %( \
589             textcontent, \
590             item["link"] )
591         textpart = MIMEText(textcontent.encode("utf-8"), "plain", "utf-8")
592         msg.attach(textpart)
593         msg.attach(htmlpart)
594
595         # start by working out the filename we should be writting to, we do
596         # this following the normal maildir style rules
597         fname = str(os.getpid()) \
598             + "." + socket.gethostname() \
599             + "." + "".join( \
600                 [random.choice( \
601                     string.ascii_letters + string.digits \
602                     ) for a in range(0,10) \
603                 ]) + "." \
604             + datetime.datetime.now().strftime('%s')
605         fn = os.path.join(maildir, "tmp", fname)
606         fh = open(fn, "w")
607         fh.write(msg.as_string())
608         fh.close()
609         # now move it in to the new directory
610         newfn = os.path.join(maildir, "new", fname)
611         os.link(fn, newfn)
612         os.unlink(fn)
613
614         # now add to the database about the item
615         if prevmessageid:
616             messageid = prevmessageid + " " + messageid
617         if item.has_key("guid") and item["guid"] != item["link"]:
618             data = urllib.urlencode(( \
619                 ("message-id", messageid), \
620                 ("created", createddate), \
621                 ("contentmd5", md5sum) \
622                 ))
623             db[url + "|" + item["guid"]] = data
624             try:
625                 data = db[url + "|" + item["link"]]
626                 data = cgi.parse_qs(data)
627                 newdata = urllib.urlencode(( \
628                     ("message-id", messageid), \
629                     ("created", data["created"][0]), \
630                     ("contentmd5", data["contentmd5"][0]) \
631                     ))
632                 db[url + "|" + item["link"]] = newdata
633             except:
634                 db[url + "|" + item["link"]] = data
635         else:
636             data = urllib.urlencode(( \
637                 ("message-id", messageid), \
638                 ("created", createddate), \
639                 ("contentmd5", md5sum) \
640                 ))
641             db[url + "|" + item["link"]] = data
642
643     if headers:
644         data = []
645         for header in headers:
646             if header[0] in \
647                 ["content-md5", "etag", "last-modified", "content-length"]:
648                 data.append((header[0], header[1]))
649         if len(data) > 0:
650             data = urllib.urlencode(data)
651             feeddb[url] = data
652
653     db.close()
654     feeddb.close()
655
656 if __name__ == "__main__":
657     # This only gets executed if we really called the program
658     # first off, parse the command line arguments
659
660     oparser = OptionParser()
661     oparser.add_option(
662         "-c", "--conf", dest="conf",
663         help="location of config file"
664         )
665     oparser.add_option(
666         "-s", "--statedir", dest="statedir",
667         help="location of directory to store state in"
668         )
669
670     (options, args) = oparser.parse_args()
671
672     # check for the configfile
673
674     configfile = None
675
676     if options.conf != None:
677         # does the file exist?
678         try:
679             os.stat(options.conf)
680             configfile = options.conf
681         except:
682             # should exit here as the specified file doesn't exist
683             sys.stderr.write( \
684                 "Config file %s does not exist. Exiting.\n" %(options.conf,))
685             sys.exit(2)
686     else:
687         # check through the default locations
688         try:
689             os.stat("%s/.rss2maildir.conf" %(os.environ["HOME"],))
690             configfile = "%s/.rss2maildir.conf" %(os.environ["HOME"],)
691         except:
692             try:
693                 os.stat("/etc/rss2maildir.conf")
694                 configfile = "/etc/rss2maildir.conf"
695             except:
696                 sys.stderr.write("No config file found. Exiting.\n")
697                 sys.exit(2)
698
699     # Right - if we've got this far, we've got a config file, now for the hard
700     # bits...
701
702     scp = SafeConfigParser()
703     scp.read(configfile)
704
705     maildir_root = "RSSMaildir"
706     state_dir = "state"
707
708     if options.statedir != None:
709         state_dir = options.statedir
710         try:
711             mode = os.stat(state_dir)[stat.ST_MODE]
712             if not stat.S_ISDIR(mode):
713                 sys.stderr.write( \
714                     "State directory (%s) is not a directory\n" %(state_dir))
715                 sys.exit(1)
716         except:
717             # try to make the directory
718             try:
719                 os.mkdir(state_dir)
720             except:
721                 sys.stderr.write("Couldn't create statedir %s" %(state_dir))
722                 sys.exit(1)
723     elif scp.has_option("general", "state_dir"):
724         new_state_dir = scp.get("general", "state_dir")
725         try:
726             mode = os.stat(new_state_dir)[stat.ST_MODE]
727             if not stat.S_ISDIR(mode):
728                 sys.stderr.write( \
729                     "State directory (%s) is not a directory\n" %(state_dir))
730                 sys.exit(1)
731             else:
732                 state_dir = new_state_dir
733         except:
734             # try to create it
735             try:
736                 os.mkdir(new_state_dir)
737                 state_dir = new_state_dir
738             except:
739                 sys.stderr.write( \
740                     "Couldn't create state directory %s\n" %(new_state_dir))
741                 sys.exit(1)
742     else:
743         try:
744             mode = os.stat(state_dir)[stat.ST_MODE]
745             if not stat.S_ISDIR(mode):
746                 sys.stderr.write( \
747                     "State directory %s is not a directory\n" %(state_dir))
748                 sys.exit(1)
749         except:
750             try:
751                 os.mkdir(state_dir)
752             except:
753                 sys.stderr.write( \
754                     "State directory %s could not be created\n" %(state_dir))
755                 sys.exit(1)
756
757     if scp.has_option("general", "maildir_root"):
758         maildir_root = scp.get("general", "maildir_root")
759
760     try:
761         mode = os.stat(maildir_root)[stat.ST_MODE]
762         if not stat.S_ISDIR(mode):
763             sys.stderr.write( \
764                 "Maildir Root %s is not a directory\n" \
765                 %(maildir_root))
766             sys.exit(1)
767     except:
768         try:
769             os.mkdir(maildir_root)
770         except:
771             sys.stderr.write("Couldn't create Maildir Root %s\n" \
772                 %(maildir_root))
773             sys.exit(1)
774
775     feeds = scp.sections()
776     try:
777         feeds.remove("general")
778     except:
779         pass
780
781     for section in feeds:
782         # check if the directory exists
783         maildir = None
784         try:
785             maildir = scp.get(section, "maildir")
786         except:
787             maildir = section
788
789         maildir = urllib.urlencode(((section, maildir),)).split("=")[1]
790         maildir = os.path.join(maildir_root, maildir)
791
792         try:
793             exists = os.stat(maildir)
794             if stat.S_ISDIR(exists[stat.ST_MODE]):
795                 # check if there's a new, cur and tmp directory
796                 try:
797                     mode = os.stat(os.path.join(maildir, "cur"))[stat.ST_MODE]
798                 except:
799                     os.mkdir(os.path.join(maildir, "cur"))
800                     if not stat.S_ISDIR(mode):
801                         sys.stderr.write("Broken maildir: %s\n" %(maildir))
802                 try:
803                     mode = os.stat(os.path.join(maildir, "tmp"))[stat.ST_MODE]
804                 except:
805                     os.mkdir(os.path.join(maildir, "tmp"))
806                     if not stat.S_ISDIR(mode):
807                         sys.stderr.write("Broken maildir: %s\n" %(maildir))
808                 try:
809                     mode = os.stat(os.path.join(maildir, "new"))[stat.ST_MODE]
810                     if not stat.S_ISDIR(mode):
811                         sys.stderr.write("Broken maildir: %s\n" %(maildir))
812                 except:
813                     os.mkdir(os.path.join(maildir, "new"))
814             else:
815                 sys.stderr.write("Broken maildir: %s\n" %(maildir))
816         except:
817             try:
818                 os.mkdir(maildir)
819             except:
820                 sys.stderr.write("Couldn't create root maildir %s\n" \
821                     %(maildir))
822                 sys.exit(1)
823             try:
824                 os.mkdir(os.path.join(maildir, "new"))
825                 os.mkdir(os.path.join(maildir, "cur"))
826                 os.mkdir(os.path.join(maildir, "tmp"))
827             except:
828                 sys.stderr.write( \
829                     "Couldn't create required maildir directories for %s\n" \
830                     %(section,))
831                 sys.exit(1)
832
833         # right - we've got the directories, we've got the section, we know the
834         # url... lets play!
835
836         parse_and_deliver(maildir, section, state_dir)