- def handle_endtag(self, tag):
- if tag.lower() == "h1":
- self.inheadingone = False
- self.text = self.text \
- + u'\n\n' \
- + self.headingtext.encode("utf-8") \
- + u'\n' \
- + u'=' * len(self.headingtext.encode("utf-8").strip())
- self.headingtext = u''
- elif tag.lower() == "h2":
- self.inheadingtwo = False
+ def handle_curdata(self):
+
+ if len(self.opentags) == 0:
+ return
+
+ tag_thats_done = self.opentags[-1]
+
+ if len(self.curdata) == 0:
+ return
+
+ if tag_thats_done == u'br':
+ if len(self.text) == 0 or self.text[-1] != '\n':
+ self.text = self.text + '\n'
+ self.ignorenodata = True
+ return
+
+ if len(self.curdata.strip()) == 0:
+ return
+
+ if tag_thats_done in self.blockleveltags:
+ newlinerequired = self.text != u''
+ if self.ignorenodata:
+ newlinerequired = False
+ self.ignorenodata = False
+ if newlinerequired:
+ if tag_thats_done in [u'dt', u'dd', u'li'] \
+ and len(self.text) > 1 \
+ and self.text[-1] != u'\n':
+ self.text = self.text + u'\n'
+ elif len(self.text) > 2 \
+ and self.text[-1] != u'\n' \
+ and self.text[-2] != u'\n':
+ self.text = self.text + u'\n\n'
+
+ if tag_thats_done in ["h1", "h2", "h3", "h4", "h5", "h6"]:
+ underline = u''
+ underlinechar = u'='
+ headingtext = " ".join(self.curdata.split())
+ seperator = u'\n' + u' '*self.indentlevel
+ headingtext = seperator.join( \
+ textwrap.wrap( \
+ headingtext, \
+ self.textwidth - self.indentlevel \
+ ) \
+ )
+
+ if tag_thats_done == u'h2':
+ underlinechar = u'-'
+ elif tag_thats_done != u'h1':
+ underlinechar = u'~'
+
+ if u'\n' in headingtext:
+ underline = u' ' * self.indentlevel \
+ + underlinechar * (self.textwidth - self.indentlevel)
+ else:
+ underline = u' ' * self.indentlevel \
+ + underlinechar * len(headingtext)