- if tag.lower() == "h1":
- self.inheadingone = True
- self.inparagraph = False
- elif tag.lower() == "h2":
- self.inheadingtwo = True
- self.inparagraph = False
- elif tag.lower() in ["h3", "h4", "h5", "h6"]:
- self.inotherheading = True
- self.inparagraph = False
- elif tag.lower() == "a":
- self.inlink = True
- elif tag.lower() == "br":
- if self.inparagraph:
- self.text = self.text \
- + u'\n'.join( \
- textwrap.wrap(self.currentparagraph, 70)) \
- + u'\n'
- self.currentparagraph = ""
- elif self.inblockquote:
- self.text = self.text \
- + u'\n> ' \
- + u'\n> '.join( \
- [a.strip() \
- for a in textwrap.wrap(self.blockquote, 68) \
- ]) \
- + u'\n'
- self.blockquote = u''
- else:
- self.text = self.text + u'\n'
- elif tag.lower() == "blockquote":
- self.inblockquote = True
- self.text = self.text + u'\n'
- elif tag.lower() == "p":
- if self.text != "":
- self.text = self.text + u'\n\n'
- if self.inparagraph:
- self.text = self.text \
- + u'\n'.join(textwrap.wrap(self.currentparagraph, 70))
- self.currentparagraph = u''
- self.inparagraph = True
- elif tag.lower() == "pre":
- self.text = self.text + "\n"
- self.inpre = True
- self.inparagraph = False
- self.inblockquote = False
- elif tag.lower() == "ul":
- self.item = u''
- self.inul = True
- self.text = self.text + "\n"
- elif tag.lower() == "li" and self.inul:
- if not self.initem:
- self.initem = True
- self.item = u''
- else:
- self.text = self.text \
- + u' * ' \
- + u'\n '.join([a.strip() for a in \
- textwrap.wrap(self.item, 67)]) \
- + u'\n'
- self.item = u''
+ tag_name = tag.lower()
+ if tag_name in self.blockleveltags:
+ # handle starting a new block - unless we're in a block element
+ # that can contain other blocks, we'll assume that we want to close
+ # the container
+ if tag_name == u'br':
+ self.handle_curdata()
+ self.opentags.append(tag_name)
+ self.opentags.pop()
+
+ if len(self.opentags) > 0:
+ self.handle_curdata()
+ self.opentags.pop()
+ self.opentags.append(tag_name)
+ else:
+ self.handle_curdata()
+ self.opentags.append(tag_name)