* Change all entity refs in to unicode strings
[rss2maildir.git] / rss2maildir.py
1 #!/usr/bin/python
2 # coding=utf-8
3
4 # rss2maildir.py - RSS feeds to Maildir 1 email per item
5 # Copyright (C) 2007  Brett Parker <iDunno@sommitrealweird.co.uk>
6
7 # This program is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation, either version 3 of the License, or
10 # (at your option) any later version.
11
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 # GNU General Public License for more details.
16
17 # You should have received a copy of the GNU General Public License
18 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
19
20 import sys
21 import os
22 import stat
23 import httplib
24 import urllib
25
26 import feedparser
27
28 from email.MIMEMultipart import MIMEMultipart
29 from email.MIMEText import MIMEText
30
31 import datetime
32 import random
33 import string
34 import textwrap
35
36 import socket
37
38 from optparse import OptionParser
39 from ConfigParser import SafeConfigParser
40
41 from base64 import b64encode
42 import md5
43
44 import cgi
45 import dbm
46
47 from HTMLParser import HTMLParser
48
49 class HTML2Text(HTMLParser):
50     entities = {
51         u'amp': "&",
52         u'lt': "<",
53         u'gt': ">",
54         u'pound': "£",
55         u'copy': "©",
56         u'apos': "'",
57         u'quot': "\"",
58         u'nbsp': " ",
59         }
60
61     blockleveltags = [
62         u'h1',
63         u'h2',
64         u'h3',
65         u'h4',
66         u'h5',
67         u'h6',
68         u'pre',
69         u'p',
70         u'ul',
71         u'ol',
72         u'dl',
73         u'div',
74         #u'blockquote',
75         ]
76
77     liststarttags = [
78         u'ul',
79         u'ol',
80         u'dl',
81         ]
82
83     cancontainflow = [
84         u'div',
85         u'li',
86         u'dd',
87         u'blockquote',
88     ]
89
90     def __init__(self,textwidth=70):
91         self.text = u''
92         self.curdata = u''
93         self.textwidth = textwidth
94         self.opentags = []
95         self.indentlevel = 0
96         self.ignorenodata = False
97         self.listcount = []
98         self.urls = []
99         HTMLParser.__init__(self)
100
101     def handle_starttag(self, tag, attrs):
102         tag_name = tag.lower()
103         if tag_name in self.blockleveltags:
104             # handle starting a new block - unless we're in a block element
105             # that can contain other blocks, we'll assume that we want to close
106             # the container
107             if len(self.opentags) > 1 and self.opentags[-1] == u'li':
108                 self.handle_curdata()
109
110             if tag_name == u'ol':
111                 self.handle_curdata()
112                 self.listcount.append(1)
113                 self.listlevel = len(self.listcount) - 1
114
115             if tag_name in self.liststarttags:
116                 smallist = self.opentags[-3:-1]
117                 smallist.reverse()
118                 for prev_listtag in smallist:
119                     if prev_listtag in [u'dl', u'ol']:
120                         self.indentlevel = self.indentlevel + 4
121                         break
122                     elif prev_listtag == u'ul':
123                         self.indentlevel = self.indentlevel + 3
124                         break
125
126             if len(self.opentags) > 0:
127                 self.handle_curdata()
128                 if tag_name not in self.cancontainflow:
129                     self.opentags.pop()
130             self.opentags.append(tag_name)
131         else:
132             if tag_name == "span":
133                 return
134             listcount = 0
135             try:
136                 listcount = self.listcount[-1]
137             except:
138                 pass
139
140             if tag_name == u'dd' and len(self.opentags) > 1 \
141                 and self.opentags[-1] == u'dt':
142                 self.handle_curdata()
143                 self.opentags.pop()
144             elif tag_name == u'dt' and len(self.opentags) > 1 \
145                 and self.opentags[-1] == u'dd':
146                 self.handle_curdata()
147                 self.opentags.pop()
148             elif tag_name == u'a':
149                 for attr in attrs:
150                     if attr[0].lower() == u'href':
151                         self.urls.append(attr[1])
152                 self.curdata = self.curdata + u'`'
153                 self.opentags.append(tag_name)
154                 return
155             elif tag_name == u'img':
156                 self.handle_image(attrs)
157                 return
158             elif tag_name == u'br':
159                 self.handle_br()
160                 return
161             else:
162                 # we don't know the tag, so lets avoid handling it!
163                 return 
164
165     def handle_startendtag(self, tag, attrs):
166         if tag.lower() == u'br':
167             self.handle_br()
168         elif tag.lower() == u'img':
169             self.handle_image(attrs)
170             return
171
172     def handle_br(self):
173             self.handle_curdata()
174             self.opentags.append(u'br')
175             self.handle_curdata()
176             self.opentags.pop()
177
178     def handle_image(self, attrs):
179         alt = u''
180         url = u''
181         for attr in attrs:
182             if attr[0] == 'alt':
183                 alt = attr[1]
184             elif attr[0] == 'src':
185                 url = attr[1]
186         if url:
187             self.curdata = self.curdata \
188                 + u' [img:' \
189                 + unicode( \
190                     url.encode('utf-8'), \
191                     'utf-8')
192             if alt:
193                 self.curdata = self.curdata \
194                     + u'(' \
195                     + unicode( \
196                         alt.encode('utf-8'), \
197                         'utf-8') \
198                     + u')'
199             self.curdata = self.curdata \
200                 + u']'
201
202     def handle_curdata(self):
203
204         if len(self.opentags) == 0:
205             return
206
207         tag_thats_done = self.opentags[-1]
208
209         if len(self.curdata) == 0:
210             return
211
212         if tag_thats_done == u'br':
213             if len(self.text) == 0 or self.text[-1] != '\n':
214                 self.text = self.text + '\n'
215                 self.ignorenodata = True
216             return
217
218         if len(self.curdata.strip()) == 0:
219             return
220
221         if tag_thats_done in self.blockleveltags:
222             newlinerequired = self.text != u''
223             if self.ignorenodata:
224                 newlinerequired = False
225             self.ignorenodata = False
226             if newlinerequired \
227                 and len(self.text) > 2 \
228                 and self.text[-1] != u'\n' \
229                 and self.text[-2] != u'\n':
230                     self.text = self.text + u'\n\n'
231
232         if tag_thats_done in ["h1", "h2", "h3", "h4", "h5", "h6"]:
233             underline = u''
234             underlinechar = u'='
235             headingtext = unicode( \
236                 self.curdata.encode("utf-8").strip(), "utf-8")
237             seperator = u'\n' + u' '*self.indentlevel
238             headingtext = seperator.join( \
239                 textwrap.wrap( \
240                     headingtext, \
241                     self.textwidth - self.indentlevel \
242                     ) \
243                 )
244
245             if tag_thats_done == u'h2':
246                 underlinechar = u'-'
247             elif tag_thats_done != u'h1':
248                 underlinechar = u'~'
249
250             if u'\n' in headingtext:
251                 underline = u' ' * self.indentlevel \
252                     + underlinechar * (self.textwidth - self.indentlevel)
253             else:
254                 underline = u' ' * self.indentlevel \
255                     + underlinechar * len(headingtext)
256             self.text = self.text \
257                 + headingtext.encode("utf-8") + u'\n' \
258                 + underline
259         elif tag_thats_done in [u'p', u'div']:
260             paragraph = unicode( \
261                 self.curdata.strip().encode("utf-8"), "utf-8")
262             seperator = u'\n' + u' ' * self.indentlevel
263             self.text = self.text \
264                 + u' ' * self.indentlevel \
265                 + seperator.join( \
266                     textwrap.wrap( \
267                         paragraph, self.textwidth - self.indentlevel))
268         elif tag_thats_done == "pre":
269             self.text = self.text + unicode( \
270                 self.curdata.encode("utf-8"), "utf-8")
271         elif tag_thats_done == u'blockquote':
272             quote = unicode( \
273                 self.curdata.encode("utf-8").strip(), "utf-8")
274             seperator = u'\n' + u' ' * self.indentlevel + u'> '
275             if len(self.text) > 0 and self.text[-1] != u'\n':
276                 self.text = self.text + u'\n'
277             self.text = self.text \
278                 + u'> ' \
279                 + seperator.join( \
280                     textwrap.wrap( \
281                         quote, \
282                         self.textwidth - self.indentlevel - 2 \
283                     )
284                 )
285             self.curdata = u''
286         elif tag_thats_done == "li":
287             item = unicode(self.curdata.encode("utf-8").strip(), "utf-8")
288             if len(self.text) > 0 and self.text[-1] != u'\n':
289                 self.text = self.text + u'\n'
290             # work out if we're in an ol rather than a ul
291             latesttags = self.opentags[-4:]
292             latesttags.reverse()
293             isul = None
294             for thing in latesttags:
295                 if thing == 'ul':
296                     isul = True
297                     break
298                 elif thing == 'ol':
299                     isul = False
300                     break
301
302             listindent = 3
303             if not isul:
304                 listindent = 4
305
306             listmarker = u' * '
307             if isul == False:
308                 listmarker = u' %2d. ' %(self.listcount[-1])
309                 self.listcount[-1] = self.listcount[-1] + 1
310
311             seperator = u'\n' \
312                 + u' ' * self.indentlevel \
313                 + u' ' * listindent
314             self.text = self.text \
315                 + u' ' * self.indentlevel \
316                 + listmarker \
317                 + seperator.join( \
318                     textwrap.wrap( \
319                         item, \
320                         self.textwidth - self.indentlevel - listindent \
321                     ) \
322                 )
323             self.curdata = u''
324         elif tag_thats_done == u'dt':
325             definition = unicode(self.curdata.encode("utf-8").strip(), "utf-8")
326             if len(self.text) > 0 and self.text[-1] != u'\n':
327                 self.text = self.text + u'\n\n'
328             elif len(self.text) > 1 and self.text[-2] != u'\n':
329                 self.text = self.text + u'\n'
330             definition = u' ' * self.indentlevel + definition + "::"
331             indentstring = u'\n' + u' ' * (self.indentlevel + 1)
332             self.text = self.text \
333                 + indentstring.join(
334                     textwrap.wrap(definition, \
335                         self.textwidth - self.indentlevel - 1))
336             self.curdata = u''
337         elif tag_thats_done == u'dd':
338             definition = unicode(self.curdata.encode("utf-8").strip(), "utf-8")
339             if len(definition) > 0:
340                 if len(self.text) > 0 and self.text[-1] != u'\n':
341                     self.text = self.text + u'\n'
342                 indentstring = u'\n' + u' ' * (self.indentlevel + 4)
343                 self.text = self.text \
344                     + u' ' * (self.indentlevel + 4) \
345                     + indentstring.join( \
346                         textwrap.wrap( \
347                             definition, \
348                             self.textwidth - self.indentlevel - 4 \
349                             ) \
350                         )
351                 self.curdata = u''
352         elif tag_thats_done == u'a':
353             self.curdata = self.curdata + u'`__'
354             pass
355         elif tag_thats_done in self.liststarttags:
356             pass
357
358         if tag_thats_done in self.blockleveltags:
359             self.curdata = u''
360
361         self.ignorenodata = False
362
363     def handle_endtag(self, tag):
364         self.ignorenodata = False
365         if tag == "span":
366             return
367
368         try:
369             tagindex = self.opentags.index(tag)
370         except:
371             return
372         tag = tag.lower()
373
374         if tag in [u'br', u'img']:
375             return
376
377         if tag in self.liststarttags:
378             if tag in [u'ol', u'dl', u'ul']:
379                 self.handle_curdata()
380                 # find if there was a previous list level
381                 smalllist = self.opentags[:-1]
382                 smalllist.reverse()
383                 for prev_listtag in smalllist:
384                     if prev_listtag in [u'ol', u'dl']:
385                         self.indentlevel = self.indentlevel - 4
386                         break
387                     elif prev_listtag == u'ul':
388                         self.indentlevel = self.indentlevel - 3
389                         break
390
391         if tag == u'ol':
392             self.listcount = self.listcount[:-1]
393
394         while tagindex < len(self.opentags) \
395             and tag in self.opentags[tagindex+1:]:
396             try:
397                 tagindex = self.opentags.index(tag, tagindex+1)
398             except:
399                 # well, we don't want to do that then
400                 pass
401         if tagindex != len(self.opentags) - 1:
402             # Assuming the data was for the last opened tag first
403             self.handle_curdata()
404             # Now kill the list to be a slice before this tag was opened
405             self.opentags = self.opentags[:tagindex + 1]
406         else:
407             self.handle_curdata()
408             if self.opentags[-1] == tag:
409                 self.opentags.pop()
410
411     def handle_data(self, data):
412         if len(self.opentags) == 0:
413             self.opentags.append(u'p')
414         self.curdata = self.curdata + unicode(data, "utf-8")
415
416     def handle_entityref(self, name):
417         entity = name
418         if HTML2Text.entities.has_key(name.lower()):
419             entity = HTML2Text.entities[name.lower()]
420         elif name[0] == "#":
421             entity = unichr(int(name[1:]))
422         else:
423             entity = "&" + name + ";"
424
425         self.curdata = self.curdata + unicode(entity, "utf-8")
426
427     def gettext(self):
428         self.handle_curdata()
429         if len(self.text) == 0 or self.text[-1] != u'\n':
430             self.text = self.text + u'\n'
431         self.opentags = []
432         if len(self.text) > 0:
433             while len(self.text) > 1 and self.text[-1] == u'\n':
434                 self.text = self.text[:-1]
435             self.text = self.text + u'\n'
436         if len(self.urls) > 0:
437             self.text = self.text + u'\n__ ' + u'\n__ '.join(self.urls) + u'\n'
438             self.urls = []
439         return self.text
440
441 def open_url(method, url):
442     redirectcount = 0
443     while redirectcount < 3:
444         (type, rest) = urllib.splittype(url)
445         (host, path) = urllib.splithost(rest)
446         (host, port) = urllib.splitport(host)
447         if port == None:
448             port = 80
449         try:
450             conn = httplib.HTTPConnection("%s:%s" %(host, port))
451             conn.request(method, path)
452             response = conn.getresponse()
453             if response.status in [301, 302, 303, 307]:
454                 headers = response.getheaders()
455                 for header in headers:
456                     if header[0] == "location":
457                         url = header[1]
458             elif response.status == 200:
459                 return response
460         except:
461             pass
462         redirectcount = redirectcount + 1
463     return None
464
465 def parse_and_deliver(maildir, url, statedir):
466     feedhandle = None
467     headers = None
468     # first check if we know about this feed already
469     feeddb = dbm.open(os.path.join(statedir, "feeds"), "c")
470     if feeddb.has_key(url):
471         data = feeddb[url]
472         data = cgi.parse_qs(data)
473         response = open_url("HEAD", url)
474         headers = None
475         if response:
476             headers = response.getheaders()
477         ischanged = False
478         try:
479             for header in headers:
480                 if header[0] == "content-length":
481                     if header[1] != data["content-length"][0]:
482                         ischanged = True
483                 elif header[0] == "etag":
484                     if header[1] != data["etag"][0]:
485                         ischanged = True
486                 elif header[0] == "last-modified":
487                     if header[1] != data["last-modified"][0]:
488                         ischanged = True
489                 elif header[0] == "content-md5":
490                     if header[1] != data["content-md5"][0]:
491                         ischanged = True
492         except:
493             ischanged = True
494         if ischanged:
495             response = open_url("GET", url)
496             if response != None:
497                 headers = response.getheaders()
498                 feedhandle = response
499             else:
500                 sys.stderr.write("Failed to fetch feed: %s\n" %(url))
501                 return
502         else:
503             return # don't need to do anything, nothings changed.
504     else:
505         response = open_url("GET", url)
506         if response != None:
507             headers = response.getheaders()
508             feedhandle = response
509         else:
510             sys.stderr.write("Failed to fetch feed: %s\n" %(url))
511             return
512
513     fp = feedparser.parse(feedhandle)
514     db = dbm.open(os.path.join(statedir, "seen"), "c")
515     for item in fp["items"]:
516         # have we seen it before?
517         # need to work out what the content is first...
518
519         if item.has_key("content"):
520             content = item["content"][0]["value"]
521         else:
522             content = item["summary"]
523
524         md5sum = md5.md5(content.encode("utf-8")).hexdigest()
525
526         prevmessageid = None
527
528         # check if there's a guid too - if that exists and we match the md5,
529         # return
530         if item.has_key("guid"):
531             if db.has_key(url + "|" + item["guid"]):
532                 data = db[url + "|" + item["guid"]]
533                 data = cgi.parse_qs(data)
534                 if data["contentmd5"][0] == md5sum:
535                     continue
536
537         if db.has_key(url + "|" + item["link"]):
538             data = db[url + "|" + item["link"]]
539             data = cgi.parse_qs(data)
540             if data.has_key("message-id"):
541                 prevmessageid = data["message-id"][0]
542             if data["contentmd5"][0] == md5sum:
543                 continue
544
545         try:
546             author = item["author"]
547         except:
548             author = url
549
550         # create a basic email message
551         msg = MIMEMultipart("alternative")
552         messageid = "<" \
553             + datetime.datetime.now().strftime("%Y%m%d%H%M") \
554             + "." \
555             + "".join( \
556                 [random.choice( \
557                     string.ascii_letters + string.digits \
558                     ) for a in range(0,6) \
559                 ]) + "@" + socket.gethostname() + ">"
560         msg.add_header("Message-ID", messageid)
561         msg.set_unixfrom("\"%s\" <rss2maildir@localhost>" %(url))
562         msg.add_header("From", "\"%s\" <rss2maildir@localhost>" %(author))
563         msg.add_header("To", "\"%s\" <rss2maildir@localhost>" %(url))
564         if prevmessageid:
565             msg.add_header("References", prevmessageid)
566         createddate = datetime.datetime.now() \
567             .strftime("%a, %e %b %Y %T -0000")
568         try:
569             createddate = datetime.datetime(*item["updated_parsed"][0:6]) \
570                 .strftime("%a, %e %b %Y %T -0000")
571         except:
572             pass
573         msg.add_header("Date", createddate)
574         msg.add_header("Subject", item["title"])
575         msg.set_default_type("text/plain")
576
577         htmlcontent = content.encode("utf-8")
578         htmlcontent = "%s\n\n<p>Item URL: <a href='%s'>%s</a></p>" %( \
579             content, \
580             item["link"], \
581             item["link"] )
582         htmlpart = MIMEText(htmlcontent.encode("utf-8"), "html", "utf-8")
583         textparser = HTML2Text()
584         textparser.feed(content.encode("utf-8"))
585         textcontent = textparser.gettext()
586         textcontent = "%s\n\nItem URL: %s" %( \
587             textcontent, \
588             item["link"] )
589         textpart = MIMEText(textcontent.encode("utf-8"), "plain", "utf-8")
590         msg.attach(textpart)
591         msg.attach(htmlpart)
592
593         # start by working out the filename we should be writting to, we do
594         # this following the normal maildir style rules
595         fname = str(os.getpid()) \
596             + "." + socket.gethostname() \
597             + "." + "".join( \
598                 [random.choice( \
599                     string.ascii_letters + string.digits \
600                     ) for a in range(0,10) \
601                 ]) + "." \
602             + datetime.datetime.now().strftime('%s')
603         fn = os.path.join(maildir, "tmp", fname)
604         fh = open(fn, "w")
605         fh.write(msg.as_string())
606         fh.close()
607         # now move it in to the new directory
608         newfn = os.path.join(maildir, "new", fname)
609         os.link(fn, newfn)
610         os.unlink(fn)
611
612         # now add to the database about the item
613         if prevmessageid:
614             messageid = prevmessageid + " " + messageid
615         if item.has_key("guid") and item["guid"] != item["link"]:
616             data = urllib.urlencode(( \
617                 ("message-id", messageid), \
618                 ("created", createddate), \
619                 ("contentmd5", md5sum) \
620                 ))
621             db[url + "|" + item["guid"]] = data
622             try:
623                 data = db[url + "|" + item["link"]]
624                 data = cgi.parse_qs(data)
625                 newdata = urllib.urlencode(( \
626                     ("message-id", messageid), \
627                     ("created", data["created"][0]), \
628                     ("contentmd5", data["contentmd5"][0]) \
629                     ))
630                 db[url + "|" + item["link"]] = newdata
631             except:
632                 db[url + "|" + item["link"]] = data
633         else:
634             data = urllib.urlencode(( \
635                 ("message-id", messageid), \
636                 ("created", createddate), \
637                 ("contentmd5", md5sum) \
638                 ))
639             db[url + "|" + item["link"]] = data
640
641     if headers:
642         data = []
643         for header in headers:
644             if header[0] in \
645                 ["content-md5", "etag", "last-modified", "content-length"]:
646                 data.append((header[0], header[1]))
647         if len(data) > 0:
648             data = urllib.urlencode(data)
649             feeddb[url] = data
650
651     db.close()
652     feeddb.close()
653
654 if __name__ == "__main__":
655     # This only gets executed if we really called the program
656     # first off, parse the command line arguments
657
658     oparser = OptionParser()
659     oparser.add_option(
660         "-c", "--conf", dest="conf",
661         help="location of config file"
662         )
663     oparser.add_option(
664         "-s", "--statedir", dest="statedir",
665         help="location of directory to store state in"
666         )
667
668     (options, args) = oparser.parse_args()
669
670     # check for the configfile
671
672     configfile = None
673
674     if options.conf != None:
675         # does the file exist?
676         try:
677             os.stat(options.conf)
678             configfile = options.conf
679         except:
680             # should exit here as the specified file doesn't exist
681             sys.stderr.write( \
682                 "Config file %s does not exist. Exiting.\n" %(options.conf,))
683             sys.exit(2)
684     else:
685         # check through the default locations
686         try:
687             os.stat("%s/.rss2maildir.conf" %(os.environ["HOME"],))
688             configfile = "%s/.rss2maildir.conf" %(os.environ["HOME"],)
689         except:
690             try:
691                 os.stat("/etc/rss2maildir.conf")
692                 configfile = "/etc/rss2maildir.conf"
693             except:
694                 sys.stderr.write("No config file found. Exiting.\n")
695                 sys.exit(2)
696
697     # Right - if we've got this far, we've got a config file, now for the hard
698     # bits...
699
700     scp = SafeConfigParser()
701     scp.read(configfile)
702
703     maildir_root = "RSSMaildir"
704     state_dir = "state"
705
706     if options.statedir != None:
707         state_dir = options.statedir
708         try:
709             mode = os.stat(state_dir)[stat.ST_MODE]
710             if not stat.S_ISDIR(mode):
711                 sys.stderr.write( \
712                     "State directory (%s) is not a directory\n" %(state_dir))
713                 sys.exit(1)
714         except:
715             # try to make the directory
716             try:
717                 os.mkdir(state_dir)
718             except:
719                 sys.stderr.write("Couldn't create statedir %s" %(state_dir))
720                 sys.exit(1)
721     elif scp.has_option("general", "state_dir"):
722         new_state_dir = scp.get("general", "state_dir")
723         try:
724             mode = os.stat(state_dir)[stat.ST_MODE]
725             if not stat.S_ISDIR(mode):
726                 sys.stderr.write( \
727                     "State directory (%s) is not a directory\n" %(state_dir))
728                 sys.exit(1)
729         except:
730             # try to create it
731             try:
732                 os.mkdir(new_state_dir)
733                 state_dir = new_state_dir
734             except:
735                 sys.stderr.write( \
736                     "Couldn't create state directory %s\n" %(new_state_dir))
737                 sys.exit(1)
738     else:
739         try:
740             mode = os.stat(state_dir)[stat.ST_MODE]
741             if not stat.S_ISDIR(mode):
742                 sys.stderr.write( \
743                     "State directory %s is not a directory\n" %(state_dir))
744                 sys.exit(1)
745         except:
746             try:
747                 os.mkdir(state_dir)
748             except:
749                 sys.stderr.write( \
750                     "State directory %s could not be created\n" %(state_dir))
751                 sys.exit(1)
752
753     if scp.has_option("general", "maildir_root"):
754         maildir_root = scp.get("general", "maildir_root")
755
756     try:
757         mode = os.stat(maildir_root)[stat.ST_MODE]
758         if not stat.S_ISDIR(mode):
759             sys.stderr.write( \
760                 "Maildir Root %s is not a directory\n" \
761                 %(maildir_root))
762             sys.exit(1)
763     except:
764         try:
765             os.mkdir(maildir_root)
766         except:
767             sys.stderr.write("Couldn't create Maildir Root %s\n" \
768                 %(maildir_root))
769             sys.exit(1)
770
771     feeds = scp.sections()
772     try:
773         feeds.remove("general")
774     except:
775         pass
776
777     for section in feeds:
778         # check if the directory exists
779         maildir = None
780         try:
781             maildir = scp.get(section, "maildir")
782         except:
783             maildir = section
784
785         maildir = urllib.urlencode(((section, maildir),)).split("=")[1]
786         maildir = os.path.join(maildir_root, maildir)
787
788         try:
789             exists = os.stat(maildir)
790             if stat.S_ISDIR(exists[stat.ST_MODE]):
791                 # check if there's a new, cur and tmp directory
792                 try:
793                     mode = os.stat(os.path.join(maildir, "cur"))[stat.ST_MODE]
794                 except:
795                     os.mkdir(os.path.join(maildir, "cur"))
796                     if not stat.S_ISDIR(mode):
797                         sys.stderr.write("Broken maildir: %s\n" %(maildir))
798                 try:
799                     mode = os.stat(os.path.join(maildir, "tmp"))[stat.ST_MODE]
800                 except:
801                     os.mkdir(os.path.join(maildir, "tmp"))
802                     if not stat.S_ISDIR(mode):
803                         sys.stderr.write("Broken maildir: %s\n" %(maildir))
804                 try:
805                     mode = os.stat(os.path.join(maildir, "new"))[stat.ST_MODE]
806                     if not stat.S_ISDIR(mode):
807                         sys.stderr.write("Broken maildir: %s\n" %(maildir))
808                 except:
809                     os.mkdir(os.path.join(maildir, "new"))
810             else:
811                 sys.stderr.write("Broken maildir: %s\n" %(maildir))
812         except:
813             try:
814                 os.mkdir(maildir)
815             except:
816                 sys.stderr.write("Couldn't create root maildir %s\n" \
817                     %(maildir))
818                 sys.exit(1)
819             try:
820                 os.mkdir(os.path.join(maildir, "new"))
821                 os.mkdir(os.path.join(maildir, "cur"))
822                 os.mkdir(os.path.join(maildir, "tmp"))
823             except:
824                 sys.stderr.write( \
825                     "Couldn't create required maildir directories for %s\n" \
826                     %(section,))
827                 sys.exit(1)
828
829         # right - we've got the directories, we've got the section, we know the
830         # url... lets play!
831
832         parse_and_deliver(maildir, section, state_dir)