]> git.sommitrealweird.co.uk Git - rss2maildir.git/blob - rss2maildir.py
* Begin fixes to list handling code - there's 2 unittests that are failing due
[rss2maildir.git] / rss2maildir.py
1 #!/usr/bin/python
2 # coding=utf-8
3
4 # rss2maildir.py - RSS feeds to Maildir 1 email per item
5 # Copyright (C) 2007  Brett Parker <iDunno@sommitrealweird.co.uk>
6
7 # This program is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation, either version 3 of the License, or
10 # (at your option) any later version.
11
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 # GNU General Public License for more details.
16
17 # You should have received a copy of the GNU General Public License
18 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
19
20 import sys
21 import os
22 import stat
23 import httplib
24 import urllib
25
26 import feedparser
27
28 from email.MIMEMultipart import MIMEMultipart
29 from email.MIMEText import MIMEText
30
31 import datetime
32 import random
33 import string
34 import textwrap
35
36 import socket
37
38 from optparse import OptionParser
39 from ConfigParser import SafeConfigParser
40
41 from base64 import b64encode
42 import md5
43
44 import cgi
45 import dbm
46
47 from HTMLParser import HTMLParser
48
49 class HTML2Text(HTMLParser):
50     entities = {
51         u'amp': "&",
52         u'lt': "<",
53         u'gt': ">",
54         u'pound': "£",
55         u'copy': "©",
56         u'apos': "'",
57         u'quot': "\"",
58         u'nbsp': " ",
59         }
60
61     blockleveltags = [
62         u'h1',
63         u'h2',
64         u'h3',
65         u'h4',
66         u'h5',
67         u'h6',
68         u'pre',
69         u'p',
70         u'ul',
71         u'ol',
72         u'dl',
73         u'li',
74         u'dt',
75         u'dd',
76         u'div',
77         #u'blockquote',
78         ]
79
80     liststarttags = [
81         u'ul',
82         u'ol',
83         u'dl',
84         ]
85
86     cancontainflow = [
87         u'div',
88         u'li',
89         u'dd',
90         u'blockquote',
91     ]
92
93     def __init__(self,textwidth=70):
94         self.text = u''
95         self.curdata = u''
96         self.textwidth = textwidth
97         self.opentags = []
98         self.indentlevel = 0
99         self.ignorenodata = False
100         self.listcount = []
101         self.urls = []
102         HTMLParser.__init__(self)
103
104     def handle_starttag(self, tag, attrs):
105         tag_name = tag.lower()
106         if tag_name in self.blockleveltags:
107             # handle starting a new block - unless we're in a block element
108             # that can contain other blocks, we'll assume that we want to close
109             # the container
110             if len(self.opentags) > 1 and self.opentags[-1] == u'li':
111                 self.handle_curdata()
112
113             if tag_name == u'ol':
114                 self.handle_curdata()
115                 self.listcount.append(1)
116                 self.listlevel = len(self.listcount) - 1
117
118             if tag_name in self.liststarttags:
119                 smallist = self.opentags[-3:-1]
120                 smallist.reverse()
121                 for prev_listtag in smallist:
122                     if prev_listtag in [u'dl', u'ol']:
123                         self.indentlevel = self.indentlevel + 4
124                         break
125                     elif prev_listtag == u'ul':
126                         self.indentlevel = self.indentlevel + 3
127                         break
128
129             if len(self.opentags) > 0:
130                 self.handle_curdata()
131                 if tag_name not in self.cancontainflow:
132                     self.opentags.pop()
133             self.opentags.append(tag_name)
134         else:
135             if tag_name == "span":
136                 return
137             listcount = 0
138             try:
139                 listcount = self.listcount[-1]
140             except:
141                 pass
142
143             if tag_name == u'dd' and len(self.opentags) > 1 \
144                 and self.opentags[-1] == u'dt':
145                 self.handle_curdata()
146                 self.opentags.pop()
147             elif tag_name == u'dt' and len(self.opentags) > 1 \
148                 and self.opentags[-1] == u'dd':
149                 self.handle_curdata()
150                 self.opentags.pop()
151             elif tag_name == u'a':
152                 for attr in attrs:
153                     if attr[0].lower() == u'href':
154                         self.urls.append(attr[1])
155                 self.curdata = self.curdata + u'`'
156                 self.opentags.append(tag_name)
157                 return
158             elif tag_name == u'img':
159                 self.handle_image(attrs)
160                 return
161             elif tag_name == u'br':
162                 self.handle_br()
163                 return
164             else:
165                 # we don't know the tag, so lets avoid handling it!
166                 return 
167
168     def handle_startendtag(self, tag, attrs):
169         if tag.lower() == u'br':
170             self.handle_br()
171         elif tag.lower() == u'img':
172             self.handle_image(attrs)
173             return
174
175     def handle_br(self):
176             self.handle_curdata()
177             self.opentags.append(u'br')
178             self.handle_curdata()
179             self.opentags.pop()
180
181     def handle_image(self, attrs):
182         alt = u''
183         url = u''
184         for attr in attrs:
185             if attr[0] == 'alt':
186                 alt = attr[1].decode('utf-8')
187             elif attr[0] == 'src':
188                 url = attr[1].decode('utf-8')
189         if url:
190             self.curdata = self.curdata \
191                 + u' [img:' \
192                 + url
193             if alt:
194                 self.curdata = self.curdata \
195                     + u'(' \
196                     + alt \
197                     + u')'
198             self.curdata = self.curdata \
199                 + u']'
200
201     def handle_curdata(self):
202
203         if len(self.opentags) == 0:
204             return
205
206         tag_thats_done = self.opentags[-1]
207
208         if len(self.curdata) == 0:
209             return
210
211         if tag_thats_done == u'br':
212             if len(self.text) == 0 or self.text[-1] != '\n':
213                 self.text = self.text + '\n'
214                 self.ignorenodata = True
215             return
216
217         if len(self.curdata.strip()) == 0:
218             return
219
220         if tag_thats_done in self.blockleveltags:
221             newlinerequired = self.text != u''
222             if self.ignorenodata:
223                 newlinerequired = False
224             self.ignorenodata = False
225             if newlinerequired:
226                 if tag_thats_done in [u'dt', u'dd', u'li'] \
227                     and len(self.text) > 1 \
228                     and self.text[-1] != u'\n':
229                         self.text = self.text + u'\n'
230                 elif len(self.text) > 2 \
231                     and self.text[-1] != u'\n' \
232                     and self.text[-2] != u'\n':
233                     self.text = self.text + u'\n\n'
234
235         if tag_thats_done in ["h1", "h2", "h3", "h4", "h5", "h6"]:
236             underline = u''
237             underlinechar = u'='
238             headingtext = unicode( \
239                 self.curdata.encode("utf-8").strip(), "utf-8")
240             seperator = u'\n' + u' '*self.indentlevel
241             headingtext = seperator.join( \
242                 textwrap.wrap( \
243                     headingtext, \
244                     self.textwidth - self.indentlevel \
245                     ) \
246                 )
247
248             if tag_thats_done == u'h2':
249                 underlinechar = u'-'
250             elif tag_thats_done != u'h1':
251                 underlinechar = u'~'
252
253             if u'\n' in headingtext:
254                 underline = u' ' * self.indentlevel \
255                     + underlinechar * (self.textwidth - self.indentlevel)
256             else:
257                 underline = u' ' * self.indentlevel \
258                     + underlinechar * len(headingtext)
259             self.text = self.text \
260                 + headingtext.encode("utf-8") + u'\n' \
261                 + underline
262         elif tag_thats_done in [u'p', u'div']:
263             paragraph = unicode( \
264                 self.curdata.strip().encode("utf-8"), "utf-8")
265             seperator = u'\n' + u' ' * self.indentlevel
266             self.text = self.text \
267                 + u' ' * self.indentlevel \
268                 + seperator.join( \
269                     textwrap.wrap( \
270                         paragraph, self.textwidth - self.indentlevel))
271         elif tag_thats_done == "pre":
272             self.text = self.text + unicode( \
273                 self.curdata.encode("utf-8"), "utf-8")
274         elif tag_thats_done == u'blockquote':
275             quote = unicode( \
276                 self.curdata.encode("utf-8").strip(), "utf-8")
277             seperator = u'\n' + u' ' * self.indentlevel + u'> '
278             if len(self.text) > 0 and self.text[-1] != u'\n':
279                 self.text = self.text + u'\n'
280             self.text = self.text \
281                 + u'> ' \
282                 + seperator.join( \
283                     textwrap.wrap( \
284                         quote, \
285                         self.textwidth - self.indentlevel - 2 \
286                     )
287                 )
288             self.curdata = u''
289         elif tag_thats_done == "li":
290             item = unicode(self.curdata.encode("utf-8").strip(), "utf-8")
291             if len(self.text) > 0 and self.text[-1] != u'\n':
292                 self.text = self.text + u'\n'
293             # work out if we're in an ol rather than a ul
294             latesttags = self.opentags[-4:]
295             latesttags.reverse()
296             isul = None
297             for thing in latesttags:
298                 if thing == 'ul':
299                     isul = True
300                     break
301                 elif thing == 'ol':
302                     isul = False
303                     break
304
305             listindent = 3
306             if not isul:
307                 listindent = 4
308
309             listmarker = u' * '
310             if isul == False:
311                 listmarker = u' %2d. ' %(self.listcount[-1])
312                 self.listcount[-1] = self.listcount[-1] + 1
313
314             seperator = u'\n' \
315                 + u' ' * self.indentlevel \
316                 + u' ' * listindent
317             self.text = self.text \
318                 + u' ' * self.indentlevel \
319                 + listmarker \
320                 + seperator.join( \
321                     textwrap.wrap( \
322                         item, \
323                         self.textwidth - self.indentlevel - listindent \
324                     ) \
325                 )
326             self.curdata = u''
327         elif tag_thats_done == u'dt':
328             definition = unicode(self.curdata.encode("utf-8").strip(), "utf-8")
329             if len(self.text) > 0 and self.text[-1] != u'\n':
330                 self.text = self.text + u'\n\n'
331             elif len(self.text) > 1 and self.text[-2] != u'\n':
332                 self.text = self.text + u'\n'
333             definition = u' ' * self.indentlevel + definition + "::"
334             indentstring = u'\n' + u' ' * (self.indentlevel + 1)
335             self.text = self.text \
336                 + indentstring.join(
337                     textwrap.wrap(definition, \
338                         self.textwidth - self.indentlevel - 1))
339             self.curdata = u''
340         elif tag_thats_done == u'dd':
341             definition = unicode(self.curdata.encode("utf-8").strip(), "utf-8")
342             if len(definition) > 0:
343                 if len(self.text) > 0 and self.text[-1] != u'\n':
344                     self.text = self.text + u'\n'
345                 indentstring = u'\n' + u' ' * (self.indentlevel + 4)
346                 self.text = self.text \
347                     + u' ' * (self.indentlevel + 4) \
348                     + indentstring.join( \
349                         textwrap.wrap( \
350                             definition, \
351                             self.textwidth - self.indentlevel - 4 \
352                             ) \
353                         )
354                 self.curdata = u''
355         elif tag_thats_done == u'a':
356             self.curdata = self.curdata + u'`__'
357             pass
358         elif tag_thats_done in self.liststarttags:
359             pass
360
361         if tag_thats_done in self.blockleveltags:
362             self.curdata = u''
363
364         self.ignorenodata = False
365
366     def handle_endtag(self, tag):
367         self.ignorenodata = False
368         if tag == "span":
369             return
370
371         try:
372             tagindex = self.opentags.index(tag)
373         except:
374             return
375         tag = tag.lower()
376
377         if tag in [u'br', u'img']:
378             return
379
380         if tag in self.liststarttags:
381             if tag in [u'ol', u'dl', u'ul']:
382                 self.handle_curdata()
383                 # find if there was a previous list level
384                 smalllist = self.opentags[:-1]
385                 smalllist.reverse()
386                 for prev_listtag in smalllist:
387                     if prev_listtag in [u'ol', u'dl']:
388                         self.indentlevel = self.indentlevel - 4
389                         break
390                     elif prev_listtag == u'ul':
391                         self.indentlevel = self.indentlevel - 3
392                         break
393
394         if tag == u'ol':
395             self.listcount = self.listcount[:-1]
396
397         while tagindex < len(self.opentags) \
398             and tag in self.opentags[tagindex+1:]:
399             try:
400                 tagindex = self.opentags.index(tag, tagindex+1)
401             except:
402                 # well, we don't want to do that then
403                 pass
404         if tagindex != len(self.opentags) - 1:
405             # Assuming the data was for the last opened tag first
406             self.handle_curdata()
407             # Now kill the list to be a slice before this tag was opened
408             self.opentags = self.opentags[:tagindex + 1]
409         else:
410             self.handle_curdata()
411             if self.opentags[-1] == tag:
412                 self.opentags.pop()
413
414     def handle_data(self, data):
415         if len(self.opentags) == 0:
416             self.opentags.append(u'p')
417         self.curdata = self.curdata + unicode(data, "utf-8")
418
419     def handle_entityref(self, name):
420         entity = name
421         if HTML2Text.entities.has_key(name.lower()):
422             entity = HTML2Text.entities[name.lower()]
423         elif name[0] == "#":
424             entity = unichr(int(name[1:]))
425         else:
426             entity = "&" + name + ";"
427
428         self.curdata = self.curdata + unicode(entity, "utf-8")
429
430     def gettext(self):
431         self.handle_curdata()
432         if len(self.text) == 0 or self.text[-1] != u'\n':
433             self.text = self.text + u'\n'
434         self.opentags = []
435         if len(self.text) > 0:
436             while len(self.text) > 1 and self.text[-1] == u'\n':
437                 self.text = self.text[:-1]
438             self.text = self.text + u'\n'
439         if len(self.urls) > 0:
440             self.text = self.text + u'\n__ ' + u'\n__ '.join(self.urls) + u'\n'
441             self.urls = []
442         return self.text
443
444 def open_url(method, url):
445     redirectcount = 0
446     while redirectcount < 3:
447         (type, rest) = urllib.splittype(url)
448         (host, path) = urllib.splithost(rest)
449         (host, port) = urllib.splitport(host)
450         if port == None:
451             port = 80
452         try:
453             conn = httplib.HTTPConnection("%s:%s" %(host, port))
454             conn.request(method, path)
455             response = conn.getresponse()
456             if response.status in [301, 302, 303, 307]:
457                 headers = response.getheaders()
458                 for header in headers:
459                     if header[0] == "location":
460                         url = header[1]
461             elif response.status == 200:
462                 return response
463         except:
464             pass
465         redirectcount = redirectcount + 1
466     return None
467
468 def parse_and_deliver(maildir, url, statedir):
469     feedhandle = None
470     headers = None
471     # first check if we know about this feed already
472     feeddb = dbm.open(os.path.join(statedir, "feeds"), "c")
473     if feeddb.has_key(url):
474         data = feeddb[url]
475         data = cgi.parse_qs(data)
476         response = open_url("HEAD", url)
477         headers = None
478         if response:
479             headers = response.getheaders()
480         ischanged = False
481         try:
482             for header in headers:
483                 if header[0] == "content-length":
484                     if header[1] != data["content-length"][0]:
485                         ischanged = True
486                 elif header[0] == "etag":
487                     if header[1] != data["etag"][0]:
488                         ischanged = True
489                 elif header[0] == "last-modified":
490                     if header[1] != data["last-modified"][0]:
491                         ischanged = True
492                 elif header[0] == "content-md5":
493                     if header[1] != data["content-md5"][0]:
494                         ischanged = True
495         except:
496             ischanged = True
497         if ischanged:
498             response = open_url("GET", url)
499             if response != None:
500                 headers = response.getheaders()
501                 feedhandle = response
502             else:
503                 sys.stderr.write("Failed to fetch feed: %s\n" %(url))
504                 return
505         else:
506             return # don't need to do anything, nothings changed.
507     else:
508         response = open_url("GET", url)
509         if response != None:
510             headers = response.getheaders()
511             feedhandle = response
512         else:
513             sys.stderr.write("Failed to fetch feed: %s\n" %(url))
514             return
515
516     fp = feedparser.parse(feedhandle)
517     db = dbm.open(os.path.join(statedir, "seen"), "c")
518     for item in fp["items"]:
519         # have we seen it before?
520         # need to work out what the content is first...
521
522         if item.has_key("content"):
523             content = item["content"][0]["value"]
524         else:
525             content = item["summary"]
526
527         md5sum = md5.md5(content.encode("utf-8")).hexdigest()
528
529         prevmessageid = None
530
531         # check if there's a guid too - if that exists and we match the md5,
532         # return
533         if item.has_key("guid"):
534             if db.has_key(url + "|" + item["guid"]):
535                 data = db[url + "|" + item["guid"]]
536                 data = cgi.parse_qs(data)
537                 if data["contentmd5"][0] == md5sum:
538                     continue
539
540         if db.has_key(url + "|" + item["link"]):
541             data = db[url + "|" + item["link"]]
542             data = cgi.parse_qs(data)
543             if data.has_key("message-id"):
544                 prevmessageid = data["message-id"][0]
545             if data["contentmd5"][0] == md5sum:
546                 continue
547
548         try:
549             author = item["author"]
550         except:
551             author = url
552
553         # create a basic email message
554         msg = MIMEMultipart("alternative")
555         messageid = "<" \
556             + datetime.datetime.now().strftime("%Y%m%d%H%M") \
557             + "." \
558             + "".join( \
559                 [random.choice( \
560                     string.ascii_letters + string.digits \
561                     ) for a in range(0,6) \
562                 ]) + "@" + socket.gethostname() + ">"
563         msg.add_header("Message-ID", messageid)
564         msg.set_unixfrom("\"%s\" <rss2maildir@localhost>" %(url))
565         msg.add_header("From", "\"%s\" <rss2maildir@localhost>" %(author))
566         msg.add_header("To", "\"%s\" <rss2maildir@localhost>" %(url))
567         if prevmessageid:
568             msg.add_header("References", prevmessageid)
569         createddate = datetime.datetime.now() \
570             .strftime("%a, %e %b %Y %T -0000")
571         try:
572             createddate = datetime.datetime(*item["updated_parsed"][0:6]) \
573                 .strftime("%a, %e %b %Y %T -0000")
574         except:
575             pass
576         msg.add_header("Date", createddate)
577         msg.add_header("Subject", item["title"])
578         msg.set_default_type("text/plain")
579
580         htmlcontent = content.encode("utf-8")
581         htmlcontent = "%s\n\n<p>Item URL: <a href='%s'>%s</a></p>" %( \
582             content, \
583             item["link"], \
584             item["link"] )
585         htmlpart = MIMEText(htmlcontent.encode("utf-8"), "html", "utf-8")
586         textparser = HTML2Text()
587         textparser.feed(content.encode("utf-8"))
588         textcontent = textparser.gettext()
589         textcontent = "%s\n\nItem URL: %s" %( \
590             textcontent, \
591             item["link"] )
592         textpart = MIMEText(textcontent.encode("utf-8"), "plain", "utf-8")
593         msg.attach(textpart)
594         msg.attach(htmlpart)
595
596         # start by working out the filename we should be writting to, we do
597         # this following the normal maildir style rules
598         fname = str(os.getpid()) \
599             + "." + socket.gethostname() \
600             + "." + "".join( \
601                 [random.choice( \
602                     string.ascii_letters + string.digits \
603                     ) for a in range(0,10) \
604                 ]) + "." \
605             + datetime.datetime.now().strftime('%s')
606         fn = os.path.join(maildir, "tmp", fname)
607         fh = open(fn, "w")
608         fh.write(msg.as_string())
609         fh.close()
610         # now move it in to the new directory
611         newfn = os.path.join(maildir, "new", fname)
612         os.link(fn, newfn)
613         os.unlink(fn)
614
615         # now add to the database about the item
616         if prevmessageid:
617             messageid = prevmessageid + " " + messageid
618         if item.has_key("guid") and item["guid"] != item["link"]:
619             data = urllib.urlencode(( \
620                 ("message-id", messageid), \
621                 ("created", createddate), \
622                 ("contentmd5", md5sum) \
623                 ))
624             db[url + "|" + item["guid"]] = data
625             try:
626                 data = db[url + "|" + item["link"]]
627                 data = cgi.parse_qs(data)
628                 newdata = urllib.urlencode(( \
629                     ("message-id", messageid), \
630                     ("created", data["created"][0]), \
631                     ("contentmd5", data["contentmd5"][0]) \
632                     ))
633                 db[url + "|" + item["link"]] = newdata
634             except:
635                 db[url + "|" + item["link"]] = data
636         else:
637             data = urllib.urlencode(( \
638                 ("message-id", messageid), \
639                 ("created", createddate), \
640                 ("contentmd5", md5sum) \
641                 ))
642             db[url + "|" + item["link"]] = data
643
644     if headers:
645         data = []
646         for header in headers:
647             if header[0] in \
648                 ["content-md5", "etag", "last-modified", "content-length"]:
649                 data.append((header[0], header[1]))
650         if len(data) > 0:
651             data = urllib.urlencode(data)
652             feeddb[url] = data
653
654     db.close()
655     feeddb.close()
656
657 if __name__ == "__main__":
658     # This only gets executed if we really called the program
659     # first off, parse the command line arguments
660
661     oparser = OptionParser()
662     oparser.add_option(
663         "-c", "--conf", dest="conf",
664         help="location of config file"
665         )
666     oparser.add_option(
667         "-s", "--statedir", dest="statedir",
668         help="location of directory to store state in"
669         )
670
671     (options, args) = oparser.parse_args()
672
673     # check for the configfile
674
675     configfile = None
676
677     if options.conf != None:
678         # does the file exist?
679         try:
680             os.stat(options.conf)
681             configfile = options.conf
682         except:
683             # should exit here as the specified file doesn't exist
684             sys.stderr.write( \
685                 "Config file %s does not exist. Exiting.\n" %(options.conf,))
686             sys.exit(2)
687     else:
688         # check through the default locations
689         try:
690             os.stat("%s/.rss2maildir.conf" %(os.environ["HOME"],))
691             configfile = "%s/.rss2maildir.conf" %(os.environ["HOME"],)
692         except:
693             try:
694                 os.stat("/etc/rss2maildir.conf")
695                 configfile = "/etc/rss2maildir.conf"
696             except:
697                 sys.stderr.write("No config file found. Exiting.\n")
698                 sys.exit(2)
699
700     # Right - if we've got this far, we've got a config file, now for the hard
701     # bits...
702
703     scp = SafeConfigParser()
704     scp.read(configfile)
705
706     maildir_root = "RSSMaildir"
707     state_dir = "state"
708
709     if options.statedir != None:
710         state_dir = options.statedir
711         try:
712             mode = os.stat(state_dir)[stat.ST_MODE]
713             if not stat.S_ISDIR(mode):
714                 sys.stderr.write( \
715                     "State directory (%s) is not a directory\n" %(state_dir))
716                 sys.exit(1)
717         except:
718             # try to make the directory
719             try:
720                 os.mkdir(state_dir)
721             except:
722                 sys.stderr.write("Couldn't create statedir %s" %(state_dir))
723                 sys.exit(1)
724     elif scp.has_option("general", "state_dir"):
725         new_state_dir = scp.get("general", "state_dir")
726         try:
727             mode = os.stat(state_dir)[stat.ST_MODE]
728             if not stat.S_ISDIR(mode):
729                 sys.stderr.write( \
730                     "State directory (%s) is not a directory\n" %(state_dir))
731                 sys.exit(1)
732         except:
733             # try to create it
734             try:
735                 os.mkdir(new_state_dir)
736                 state_dir = new_state_dir
737             except:
738                 sys.stderr.write( \
739                     "Couldn't create state directory %s\n" %(new_state_dir))
740                 sys.exit(1)
741     else:
742         try:
743             mode = os.stat(state_dir)[stat.ST_MODE]
744             if not stat.S_ISDIR(mode):
745                 sys.stderr.write( \
746                     "State directory %s is not a directory\n" %(state_dir))
747                 sys.exit(1)
748         except:
749             try:
750                 os.mkdir(state_dir)
751             except:
752                 sys.stderr.write( \
753                     "State directory %s could not be created\n" %(state_dir))
754                 sys.exit(1)
755
756     if scp.has_option("general", "maildir_root"):
757         maildir_root = scp.get("general", "maildir_root")
758
759     try:
760         mode = os.stat(maildir_root)[stat.ST_MODE]
761         if not stat.S_ISDIR(mode):
762             sys.stderr.write( \
763                 "Maildir Root %s is not a directory\n" \
764                 %(maildir_root))
765             sys.exit(1)
766     except:
767         try:
768             os.mkdir(maildir_root)
769         except:
770             sys.stderr.write("Couldn't create Maildir Root %s\n" \
771                 %(maildir_root))
772             sys.exit(1)
773
774     feeds = scp.sections()
775     try:
776         feeds.remove("general")
777     except:
778         pass
779
780     for section in feeds:
781         # check if the directory exists
782         maildir = None
783         try:
784             maildir = scp.get(section, "maildir")
785         except:
786             maildir = section
787
788         maildir = urllib.urlencode(((section, maildir),)).split("=")[1]
789         maildir = os.path.join(maildir_root, maildir)
790
791         try:
792             exists = os.stat(maildir)
793             if stat.S_ISDIR(exists[stat.ST_MODE]):
794                 # check if there's a new, cur and tmp directory
795                 try:
796                     mode = os.stat(os.path.join(maildir, "cur"))[stat.ST_MODE]
797                 except:
798                     os.mkdir(os.path.join(maildir, "cur"))
799                     if not stat.S_ISDIR(mode):
800                         sys.stderr.write("Broken maildir: %s\n" %(maildir))
801                 try:
802                     mode = os.stat(os.path.join(maildir, "tmp"))[stat.ST_MODE]
803                 except:
804                     os.mkdir(os.path.join(maildir, "tmp"))
805                     if not stat.S_ISDIR(mode):
806                         sys.stderr.write("Broken maildir: %s\n" %(maildir))
807                 try:
808                     mode = os.stat(os.path.join(maildir, "new"))[stat.ST_MODE]
809                     if not stat.S_ISDIR(mode):
810                         sys.stderr.write("Broken maildir: %s\n" %(maildir))
811                 except:
812                     os.mkdir(os.path.join(maildir, "new"))
813             else:
814                 sys.stderr.write("Broken maildir: %s\n" %(maildir))
815         except:
816             try:
817                 os.mkdir(maildir)
818             except:
819                 sys.stderr.write("Couldn't create root maildir %s\n" \
820                     %(maildir))
821                 sys.exit(1)
822             try:
823                 os.mkdir(os.path.join(maildir, "new"))
824                 os.mkdir(os.path.join(maildir, "cur"))
825                 os.mkdir(os.path.join(maildir, "tmp"))
826             except:
827                 sys.stderr.write( \
828                     "Couldn't create required maildir directories for %s\n" \
829                     %(section,))
830                 sys.exit(1)
831
832         # right - we've got the directories, we've got the section, we know the
833         # url... lets play!
834
835         parse_and_deliver(maildir, section, state_dir)