+from HTMLParser import HTMLParser
+
+class HTML2Text(HTMLParser):
+ entities = {
+ u'amp': u'&',
+ u'lt': u'<',
+ u'gt': u'>',
+ u'pound': u'£',
+ u'copy': u'©',
+ u'apos': u'\'',
+ u'quot': u'"',
+ u'nbsp': u' ',
+ u'ldquo': u'“',
+ u'rdquo': u'”',
+ u'lsquo': u'‘',
+ u'rsquo': u'’',
+ u'laquo': u'«',
+ u'raquo': u'»',
+ u'lsaquo': u'‹',
+ u'rsaquo': u'›',
+ u'bull': u'•',
+ u'middot': u'·',
+ u'deg': u'°',
+ u'helip': u'…',
+ u'trade': u'™',
+ u'reg': u'®',
+ u'agrave': u'à',
+ u'Agrave': u'À',
+ u'egrave': u'è',
+ u'Egrave': u'È',
+ u'igrave': u'ì',
+ u'Igrave': u'Ì',
+ u'ograve': u'ò',
+ u'Ograve': u'Ò',
+ u'ugrave': u'ù',
+ u'Ugrave': u'Ù',
+ u'aacute': u'á',
+ u'Aacute': u'Á',
+ u'eacute': u'é',
+ u'Eacute': u'É',
+ u'iacute': u'í',
+ u'Iacute': u'Í',
+ u'oacute': u'ó',
+ u'Oacute': u'Ó',
+ u'uacute': u'ú',
+ u'Uacute': u'Ú',
+ u'yactue': u'ý',
+ u'Yacute': u'Ý',
+ u'acirc': u'â',
+ u'Acirc': u'Â',
+ u'ecirc': u'ê',
+ u'Ecirc': u'Ê',
+ u'icirc': u'î',
+ u'Icirc': u'Î',
+ u'ocirc': u'ô',
+ u'Ocirc': u'Ô',
+ u'ucirc': u'û',
+ u'Ucirc': u'Û',
+ u'atilde': u'ã',
+ u'Atilde': u'Ã',
+ u'ntilde': u'ñ',
+ u'Ntilde': u'Ñ',
+ u'otilde': u'õ',
+ u'Otilde': u'Õ',
+ u'auml': u'ä',
+ u'Auml': u'Ä',
+ u'euml': u'ë',
+ u'Euml': u'Ë',
+ u'iuml': u'ï',
+ u'Iuml': u'Ï',
+ u'ouml': u'ö',
+ u'Ouml': u'Ö',
+ u'uuml': u'ü',
+ u'Uuml': u'Ü',
+ u'yuml': u'ÿ',
+ u'Yuml': u'Ÿ',
+ u'iexcl': u'¡',
+ u'iquest': u'¿',
+ u'ccedil': u'ç',
+ u'Ccedil': u'Ç',
+ u'oelig': u'œ',
+ u'OElig': u'Œ',
+ u'szlig': u'ß',
+ u'oslash': u'ø',
+ u'Oslash': u'Ø',
+ u'aring': u'å',
+ u'Aring': u'Å',
+ u'aelig': u'æ',
+ u'AElig': u'Æ',
+ u'thorn': u'þ',
+ u'THORN': u'Þ',
+ u'eth': u'ð',
+ u'ETH': u'Ð',
+ u'mdash': u'—',
+ u'ndash': u'–',
+ u'sect': u'§',
+ u'para': u'¶',
+ u'uarr': u'↑',
+ u'darr': u'↓',
+ u'larr': u'←',
+ u'rarr': u'→',
+ u'dagger': u'†',
+ u'Dagger': u'‡',
+ u'permil': u'‰',
+ u'prod': u'∏',
+ u'infin': u'∞',
+ u'radic': u'√',
+ u'there4': u'∴',
+ u'int': u'∫',
+ u'asymp': u'≈',
+ u'ne': u'≠',
+ u'equiv': '≡',
+ u'le': u'≤',
+ u'ge': u'≥',
+ u'loz': u'⋄',
+ u'sum': u'∑',
+ u'part': u'∂',
+ u'prime': u'′',
+ u'Prime': u'″',
+ u'harr': u'↔',
+ u'micro': u'µ',
+ u'not': u'¬',
+ u'plusmn': u'±',
+ u'divide': u'÷',
+ u'cent': u'¢',
+ u'euro': u'€',
+ }
+
+ blockleveltags = [
+ u'h1',
+ u'h2',
+ u'h3',
+ u'h4',
+ u'h5',
+ u'h6',
+ u'pre',
+ u'p',
+ u'ul',
+ u'ol',
+ u'dl',
+ u'li',
+ u'dt',
+ u'dd',
+ u'div',
+ u'blockquote',
+ ]
+
+ liststarttags = [
+ u'ul',
+ u'ol',
+ u'dl',
+ ]
+
+ cancontainflow = [
+ u'div',
+ u'li',
+ u'dd',
+ u'blockquote',
+ ]
+
+ def __init__(self,textwidth=70):
+ self.text = u''
+ self.curdata = u''
+ self.textwidth = textwidth
+ self.opentags = []
+ self.indentlevel = 0
+ self.ignorenodata = False
+ self.listcount = []
+ self.urls = []
+ self.images = {}
+ HTMLParser.__init__(self)
+
+ def handle_starttag(self, tag, attrs):
+ tag_name = tag.lower()
+ if tag_name in self.blockleveltags:
+ # handle starting a new block - unless we're in a block element
+ # that can contain other blocks, we'll assume that we want to close
+ # the container
+ if len(self.opentags) > 1 and self.opentags[-1] == u'li':
+ self.handle_curdata()
+
+ if tag_name == u'ol':
+ self.handle_curdata()
+ self.listcount.append(1)
+ self.listlevel = len(self.listcount) - 1
+
+ if tag_name == u'dl':
+ self.indentlevel = self.indentlevel + 4
+
+ if tag_name in self.liststarttags:
+ smallist = self.opentags[-3:-1]
+ smallist.reverse()
+ for prev_listtag in smallist:
+ if prev_listtag in [u'dl', u'ol']:
+ self.indentlevel = self.indentlevel + 4
+ break
+ elif prev_listtag == u'ul':
+ self.indentlevel = self.indentlevel + 3
+ break
+
+ if len(self.opentags) > 0:
+ self.handle_curdata()
+ if tag_name not in self.cancontainflow:
+ self.opentags.pop()
+ self.opentags.append(tag_name)
+ else:
+ if tag_name == "span":
+ return
+ listcount = 0
+ try:
+ listcount = self.listcount[-1]
+ except:
+ pass
+
+ if tag_name == u'dd' and len(self.opentags) > 1 \
+ and self.opentags[-1] == u'dt':
+ self.handle_curdata()
+ self.opentags.pop()
+ elif tag_name == u'dt' and len(self.opentags) > 1 \
+ and self.opentags[-1] == u'dd':
+ self.handle_curdata()
+ self.opentags.pop()
+ elif tag_name == u'a':
+ for attr in attrs:
+ if attr[0].lower() == u'href':
+ self.urls.append(attr[1])
+ self.curdata = self.curdata + u'`'
+ self.opentags.append(tag_name)
+ return
+ elif tag_name == u'img':
+ self.handle_image(attrs)
+ return
+ elif tag_name == u'br':
+ self.handle_br()
+ return
+ else:
+ # we don't know the tag, so lets avoid handling it!
+ return
+
+ def handle_startendtag(self, tag, attrs):
+ if tag.lower() == u'br':
+ self.handle_br()
+ elif tag.lower() == u'img':
+ self.handle_image(attrs)
+ return
+
+ def handle_br(self):
+ self.handle_curdata()
+ self.opentags.append(u'br')
+ self.handle_curdata()
+ self.opentags.pop()
+
+ def handle_image(self, attrs):
+ alt = u''
+ url = u''
+ for attr in attrs:
+ if attr[0] == 'alt':
+ if isinstance(attr[1], str):
+ alt = u'%s' %(attr[1])
+ else:
+ alt = attr[1]
+ elif attr[0] == 'src':
+ if isinstance(attr[1], str):
+ url = u'%s' %(attr[1])
+ else:
+ url = attr[1]
+ if url:
+ if alt:
+ if self.images.has_key(alt):
+ if self.images[alt]["url"] == url:
+ self.curdata = self.curdata \
+ + u'|%s|' %(alt,)
+ else:
+ while self.images.has_key(alt):
+ alt = alt + "_"
+ self.images[alt] = {"url": url}
+ self.curdata = self.curdata \
+ + u'|%s|' %(alt,)
+ else:
+ self.images[alt] = {"url": url}
+ self.curdata = self.curdata \
+ + u'|%s|' %(alt,)
+ else:
+ if self.images.has_key(url):
+ self.curdata = self.curdata \
+ + u'|%s|' %(url,)
+ else:
+ self.images[url] = {}
+ self.images[url]["url"] =url
+ self.curdata = self.curdata \
+ + u'|%s|' %(url,)
+
+ def handle_curdata(self):
+
+ if len(self.opentags) == 0:
+ return
+
+ tag_thats_done = self.opentags[-1]
+
+ if len(self.curdata) == 0:
+ return
+
+ if tag_thats_done == u'br':
+ if len(self.text) == 0 or self.text[-1] != '\n':
+ self.text = self.text + '\n'
+ self.ignorenodata = True
+ return
+
+ if len(self.curdata.strip()) == 0:
+ return
+
+ if tag_thats_done in self.blockleveltags:
+ newlinerequired = self.text != u''
+ if self.ignorenodata:
+ newlinerequired = False
+ self.ignorenodata = False
+ if newlinerequired:
+ if tag_thats_done in [u'dt', u'dd', u'li'] \
+ and len(self.text) > 1 \
+ and self.text[-1] != u'\n':
+ self.text = self.text + u'\n'
+ elif len(self.text) > 2 \
+ and self.text[-1] != u'\n' \
+ and self.text[-2] != u'\n':
+ self.text = self.text + u'\n\n'
+
+ if tag_thats_done in ["h1", "h2", "h3", "h4", "h5", "h6"]:
+ underline = u''
+ underlinechar = u'='
+ headingtext = " ".join(self.curdata.split())
+ seperator = u'\n' + u' '*self.indentlevel
+ headingtext = seperator.join( \
+ textwrap.wrap( \
+ headingtext, \
+ self.textwidth - self.indentlevel \
+ ) \
+ )
+
+ if tag_thats_done == u'h2':
+ underlinechar = u'-'
+ elif tag_thats_done != u'h1':
+ underlinechar = u'~'
+
+ if u'\n' in headingtext:
+ underline = u' ' * self.indentlevel \
+ + underlinechar * (self.textwidth - self.indentlevel)
+ else:
+ underline = u' ' * self.indentlevel \
+ + underlinechar * len(headingtext)
+ self.text = self.text \
+ + headingtext + u'\n' \
+ + underline
+ elif tag_thats_done in [u'p', u'div']:
+ paragraph = unicode( \
+ " ".join(self.curdata.strip().encode("utf-8").split()), \
+ "utf-8")
+ seperator = u'\n' + u' ' * self.indentlevel
+ self.text = self.text \
+ + u' ' * self.indentlevel \
+ + seperator.join( \
+ textwrap.wrap( \
+ paragraph, self.textwidth - self.indentlevel))
+ elif tag_thats_done == "pre":
+ self.text = self.text + unicode( \
+ self.curdata.encode("utf-8"), "utf-8")
+ elif tag_thats_done == u'blockquote':
+ quote = unicode( \
+ " ".join(self.curdata.encode("utf-8").strip().split()), \
+ "utf-8")
+ seperator = u'\n' + u' ' * self.indentlevel + u' '
+ if len(self.text) > 0 and self.text[-1] != u'\n':
+ self.text = self.text + u'\n'
+ self.text = self.text \
+ + u' ' \
+ + seperator.join( \
+ textwrap.wrap( \
+ quote, \
+ self.textwidth - self.indentlevel - 2 \
+ )
+ )
+ self.curdata = u''
+ elif tag_thats_done == "li":
+ item = unicode(self.curdata.encode("utf-8").strip(), "utf-8")
+ if len(self.text) > 0 and self.text[-1] != u'\n':
+ self.text = self.text + u'\n'
+ # work out if we're in an ol rather than a ul
+ latesttags = self.opentags[-4:]
+ latesttags.reverse()
+ isul = None
+ for thing in latesttags:
+ if thing == 'ul':
+ isul = True
+ break
+ elif thing == 'ol':
+ isul = False
+ break
+
+ listindent = 3
+ if not isul:
+ listindent = 4
+
+ listmarker = u' * '
+ if isul == False:
+ listmarker = u' %2d. ' %(self.listcount[-1])
+ self.listcount[-1] = self.listcount[-1] + 1
+
+ seperator = u'\n' \
+ + u' ' * self.indentlevel \
+ + u' ' * listindent
+ self.text = self.text \
+ + u' ' * self.indentlevel \
+ + listmarker \
+ + seperator.join( \
+ textwrap.wrap( \
+ item, \
+ self.textwidth - self.indentlevel - listindent \
+ ) \
+ )
+ self.curdata = u''
+ elif tag_thats_done == u'dt':
+ definition = unicode(" ".join( \
+ self.curdata.encode("utf-8").strip().split()), \
+ "utf-8")
+ if len(self.text) > 0 and self.text[-1] != u'\n':
+ self.text = self.text + u'\n\n'
+ elif len(self.text) > 1 and self.text[-2] != u'\n':
+ self.text = self.text + u'\n'
+ definition = u' ' * (self.indentlevel - 4) + definition + "::"
+ indentstring = u'\n' + u' ' * (self.indentlevel - 3)
+ self.text = self.text \
+ + indentstring.join(
+ textwrap.wrap(definition, \
+ self.textwidth - self.indentlevel - 4))
+ self.curdata = u''
+ elif tag_thats_done == u'dd':
+ definition = unicode(" ".join( \
+ self.curdata.encode("utf-8").strip().split()),
+ "utf-8")
+ if len(definition) > 0:
+ if len(self.text) > 0 and self.text[-1] != u'\n':
+ self.text = self.text + u'\n'
+ indentstring = u'\n' + u' ' * self.indentlevel
+ self.text = self.text \
+ + indentstring \
+ + indentstring.join( \
+ textwrap.wrap( \
+ definition, \
+ self.textwidth - self.indentlevel \
+ ) \
+ )
+ self.curdata = u''
+ elif tag_thats_done == u'a':
+ self.curdata = self.curdata + u'`__'
+ pass
+ elif tag_thats_done in self.liststarttags:
+ pass
+
+ if tag_thats_done in self.blockleveltags:
+ self.curdata = u''
+
+ self.ignorenodata = False
+
+ def handle_endtag(self, tag):
+ self.ignorenodata = False
+ if tag == "span":
+ return
+
+ try:
+ tagindex = self.opentags.index(tag)
+ except:
+ return
+ tag = tag.lower()
+
+ if tag in [u'br', u'img']:
+ return
+
+ if tag == u'dl':
+ self.indentlevel = self.indentlevel - 4
+
+ if tag in self.liststarttags:
+ if tag in [u'ol', u'dl', u'ul', u'dd']:
+ self.handle_curdata()
+ # find if there was a previous list level
+ smalllist = self.opentags[:-1]
+ smalllist.reverse()
+ for prev_listtag in smalllist:
+ if prev_listtag in [u'ol', u'dl']:
+ self.indentlevel = self.indentlevel - 4
+ break
+ elif prev_listtag == u'ul':
+ self.indentlevel = self.indentlevel - 3
+ break
+
+ if tag == u'ol':
+ self.listcount = self.listcount[:-1]
+
+ while tagindex < len(self.opentags) \
+ and tag in self.opentags[tagindex+1:]:
+ try:
+ tagindex = self.opentags.index(tag, tagindex+1)
+ except:
+ # well, we don't want to do that then
+ pass
+ if tagindex != len(self.opentags) - 1:
+ # Assuming the data was for the last opened tag first
+ self.handle_curdata()
+ # Now kill the list to be a slice before this tag was opened
+ self.opentags = self.opentags[:tagindex + 1]
+ else:
+ self.handle_curdata()
+ if self.opentags[-1] == tag:
+ self.opentags.pop()
+
+ def handle_data(self, data):
+ if len(self.opentags) == 0:
+ self.opentags.append(u'p')
+ self.curdata = "%s%s" %(self.curdata, data)
+
+ def handle_charref(self, name):
+ try:
+ entity = unichr(int(name))
+ except:
+ if name[0] == 'x':
+ try:
+ entity = unichr(int('0%s' %(name,), 16))
+ except:
+ entity = u'#%s' %(name,)
+ else:
+ entity = u'#%s' %(name,)
+ self.curdata = self.curdata + unicode(entity.encode('utf-8'), \
+ "utf-8")
+
+ def handle_entityref(self, name):
+ entity = name
+ if HTML2Text.entities.has_key(name):
+ entity = HTML2Text.entities[name]
+ else:
+ entity = "&" + name + ";"
+
+ self.curdata = self.curdata + unicode(entity.encode('utf-8'), \
+ "utf-8")
+
+ def gettext(self):
+ self.handle_curdata()
+ if len(self.text) == 0 or self.text[-1] != u'\n':
+ self.text = self.text + u'\n'
+ self.opentags = []
+ if len(self.text) > 0:
+ while len(self.text) > 1 and self.text[-1] == u'\n':
+ self.text = self.text[:-1]
+ self.text = self.text + u'\n'
+ if len(self.urls) > 0:
+ self.text = self.text + u'\n__ ' + u'\n__ '.join(self.urls) + u'\n'
+ self.urls = []
+ if len(self.images.keys()) > 0:
+ self.text = self.text + u'\n.. ' \
+ + u'\n.. '.join( \
+ ["|%s| image:: %s" %(a, self.images[a]["url"]) \
+ for a in self.images.keys()]) + u'\n'
+ self.images = {}
+ return self.text
+
+def open_url(method, url):
+ redirectcount = 0
+ while redirectcount < 3:
+ (type, rest) = urllib.splittype(url)
+ (host, path) = urllib.splithost(rest)
+ (host, port) = urllib.splitport(host)
+ if type == "https":
+ if port == None:
+ port = 443
+ elif port == None:
+ port = 80
+ try:
+ conn = None
+ if type == "http":
+ conn = httplib.HTTPConnection("%s:%s" %(host, port))
+ else:
+ conn = httplib.HTTPSConnection("%s:%s" %(host, port))
+ conn.request(method, path)
+ response = conn.getresponse()
+ if response.status in [301, 302, 303, 307]:
+ headers = response.getheaders()
+ for header in headers:
+ if header[0] == "location":
+ url = header[1]
+ elif response.status == 200:
+ return response
+ except:
+ pass
+ redirectcount = redirectcount + 1
+ return None
+
+def parse_and_deliver(maildir, url, statedir):
+ feedhandle = None
+ headers = None
+ # first check if we know about this feed already
+ feeddb = dbm.open(os.path.join(statedir, "feeds"), "c")
+ if feeddb.has_key(url):
+ data = feeddb[url]
+ data = cgi.parse_qs(data)
+ response = open_url("HEAD", url)
+ headers = None
+ if response:
+ headers = response.getheaders()
+ ischanged = False
+ try:
+ for header in headers:
+ if header[0] == "content-length":
+ if header[1] != data["content-length"][0]:
+ ischanged = True
+ elif header[0] == "etag":
+ if header[1] != data["etag"][0]:
+ ischanged = True
+ elif header[0] == "last-modified":
+ if header[1] != data["last-modified"][0]:
+ ischanged = True
+ elif header[0] == "content-md5":
+ if header[1] != data["content-md5"][0]:
+ ischanged = True
+ except:
+ ischanged = True
+ if ischanged:
+ response = open_url("GET", url)
+ if response != None:
+ headers = response.getheaders()
+ feedhandle = response
+ else:
+ sys.stderr.write("Failed to fetch feed: %s\n" %(url))
+ return
+ else:
+ return # don't need to do anything, nothings changed.
+ else:
+ response = open_url("GET", url)
+ if response != None:
+ headers = response.getheaders()
+ feedhandle = response
+ else:
+ sys.stderr.write("Failed to fetch feed: %s\n" %(url))
+ return
+
+ fp = feedparser.parse(feedhandle)
+ db = dbm.open(os.path.join(statedir, "seen"), "c")
+ for item in fp["items"]:
+ # have we seen it before?
+ # need to work out what the content is first...
+
+ if item.has_key("content"):
+ content = item["content"][0]["value"]
+ else:
+ if item.has_key("description"):
+ content = item["description"]
+ else:
+ content = u''
+
+ md5sum = md5.md5(content.encode("utf-8")).hexdigest()
+
+ # make sure content is unicode encoded
+ if not isinstance(content, unicode):
+ cd_res = chardet.detect(content)
+ chrset = cd_res['encoding']
+ print "detected charset %s for item %s" %(chrset, item["link"])
+ content = content.decode(chrset)
+
+ prevmessageid = None
+
+ db_guid_key = None
+ if not item.has_key("link"):
+ item["link"] = u'#' + md5sum
+ db_link_key = (url + u'|' + item["link"]).encode("utf-8")
+
+ # check if there's a guid too - if that exists and we match the md5,
+ # return
+ if item.has_key("guid"):
+ db_guid_key = (url + u'|' + item["guid"]).encode("utf-8")
+ if db.has_key(db_guid_key):
+ data = db[db_guid_key]
+ data = cgi.parse_qs(data)
+ if data["contentmd5"][0] == md5sum:
+ continue
+
+ if db.has_key(db_link_key):
+ data = db[db_link_key]
+ data = cgi.parse_qs(data)
+ if data.has_key("message-id"):
+ prevmessageid = data["message-id"][0]
+ if data["contentmd5"][0] == md5sum:
+ continue
+
+ try:
+ author = item["author"]
+ except:
+ author = url
+
+ # create a basic email message
+ msg = MIMEMultipart("alternative")
+ messageid = "<" \
+ + datetime.datetime.now().strftime("%Y%m%d%H%M") \
+ + "." \
+ + "".join( \
+ [random.choice( \
+ string.ascii_letters + string.digits \
+ ) for a in range(0,6) \
+ ]) + "@" + socket.gethostname() + ">"
+ msg.add_header("Message-ID", messageid)
+ msg.set_unixfrom("\"%s\" <rss2maildir@localhost>" %(url))
+ msg.add_header("From", "\"%s\" <rss2maildir@localhost>" %(author.encode("utf-8")))
+ msg.add_header("To", "\"%s\" <rss2maildir@localhost>" %(url.encode("utf-8")))
+ if prevmessageid:
+ msg.add_header("References", prevmessageid)
+ createddate = datetime.datetime.now() \
+ .strftime("%a, %e %b %Y %T -0000")
+ try:
+ createddate = datetime.datetime(*item["updated_parsed"][0:6]) \
+ .strftime("%a, %e %b %Y %T -0000")
+ except:
+ pass
+ msg.add_header("Date", createddate)
+ msg.add_header("X-rss2maildir-rundate", datetime.datetime.now() \
+ .strftime("%a, %e %b %Y %T -0000"))
+ subj_gen = HTML2Text()
+ title = item["title"]
+ title = re.sub(u'<', u'<', title)
+ title = re.sub(u'>', u'>', title)
+ subj_gen.feed(title)
+ msg.add_header("Subject", subj_gen.gettext())
+ msg.set_default_type("text/plain")
+
+ htmlcontent = content.encode("utf-8")
+ htmlcontent = "%s\n\n<p>Item URL: <a href='%s'>%s</a></p>" %( \
+ content, \
+ item["link"], \
+ item["link"] )
+ htmlpart = MIMEText(htmlcontent.encode("utf-8"), "html", "utf-8")
+ textparser = HTML2Text()
+ textparser.feed(content)
+ textcontent = textparser.gettext()
+ textcontent = "%s\n\nItem URL: %s" %( \
+ textcontent, \
+ item["link"] )
+ textpart = MIMEText(textcontent.encode("utf-8"), "plain", "utf-8")
+ msg.attach(textpart)
+ msg.attach(htmlpart)
+
+ # start by working out the filename we should be writting to, we do
+ # this following the normal maildir style rules
+ fname = str(os.getpid()) \
+ + "." + socket.gethostname() \
+ + "." + "".join( \
+ [random.choice( \
+ string.ascii_letters + string.digits \
+ ) for a in range(0,10) \
+ ]) + "." \
+ + datetime.datetime.now().strftime('%s')
+ fn = os.path.join(maildir, "tmp", fname)
+ fh = open(fn, "w")
+ fh.write(msg.as_string())
+ fh.close()
+ # now move it in to the new directory
+ newfn = os.path.join(maildir, "new", fname)
+ os.link(fn, newfn)
+ os.unlink(fn)
+
+ # now add to the database about the item
+ if prevmessageid:
+ messageid = prevmessageid + " " + messageid
+ if item.has_key("guid") and item["guid"] != item["link"]:
+ data = urllib.urlencode(( \
+ ("message-id", messageid), \
+ ("created", createddate), \
+ ("contentmd5", md5sum) \
+ ))
+ db[db_guid_key] = data
+ try:
+ data = db[db_link_key]
+ data = cgi.parse_qs(data)
+ newdata = urllib.urlencode(( \
+ ("message-id", messageid), \
+ ("created", data["created"][0]), \
+ ("contentmd5", data["contentmd5"][0]) \
+ ))
+ db[db_link_key] = newdata
+ except:
+ db[db_link_key] = data
+ else:
+ data = urllib.urlencode(( \
+ ("message-id", messageid), \
+ ("created", createddate), \
+ ("contentmd5", md5sum) \
+ ))
+ db[db_link_key] = data
+
+ if headers:
+ data = []
+ for header in headers:
+ if header[0] in \
+ ["content-md5", "etag", "last-modified", "content-length"]:
+ data.append((header[0], header[1]))
+ if len(data) > 0:
+ data = urllib.urlencode(data)
+ feeddb[url] = data
+
+ db.close()
+ feeddb.close()
+
+if __name__ == "__main__":
+ # This only gets executed if we really called the program
+ # first off, parse the command line arguments
+
+ oparser = OptionParser()
+ oparser.add_option(
+ "-c", "--conf", dest="conf",
+ help="location of config file"
+ )
+ oparser.add_option(
+ "-s", "--statedir", dest="statedir",
+ help="location of directory to store state in"
+ )
+
+ (options, args) = oparser.parse_args()
+
+ # check for the configfile
+
+ configfile = None
+
+ if options.conf != None:
+ # does the file exist?