"nbsp": " ",
}
- def __init__(self):
+ def __init__(self,textwidth=70):
self.inheadingone = False
self.inheadingtwo = False
self.inotherheading = False
self.inul = False
self.initem = False
self.item = u''
+ self.textwidth = textwidth
HTMLParser.__init__(self)
def handle_starttag(self, tag, attrs):
self.text = self.text + u'\n\n'
if self.inparagraph:
self.text = self.text \
- + u'\n'.join(textwrap.wrap(self.currentparagraph, 70))
+ + u'\n'.join(textwrap.wrap(self.currentparagraph, self.textwidth))
self.currentparagraph = u''
self.inparagraph = True
elif tag.lower() == "pre":
self.text = self.text \
+ u' * ' \
+ u'\n '.join([a.strip() for a in \
- textwrap.wrap(self.item, 67)]) \
+ textwrap.wrap(self.item, self.textwidth - 3)]) \
+ u'\n'
self.item = u''
self.initem = True
+ u'\n'.join( \
[a \
for a in textwrap.wrap( \
- self.currentparagraph, 70) \
+ self.currentparagraph, self.textwidth) \
] \
) \
+ u'\n'
elif tag.lower() == "p":
self.text = self.text \
+ u'\n'.join(textwrap.wrap( \
- self.currentparagraph, 70) \
+ self.currentparagraph, self.textwidth) \
)
self.inparagraph = False
self.currentparagraph = u''
+ u'\n> '.join( \
[a.strip() \
for a in textwrap.wrap( \
- self.blockquote, 68)] \
+ self.blockquote, self.textwidth - 2)] \
) \
+ u'\n'
self.inblockquote = False
self.text = self.text \
+ u' * ' \
+ u'\n '.join( \
- [a.strip() for a in textwrap.wrap(self.item, 67)]) \
+ [a.strip() for a in textwrap.wrap(self.item, self.textwidth - 3)]) \
+ u'\n'
self.item = u''
elif tag.lower() == "ul":
elif self.inpre:
self.text = self.text + unicode(data, "utf-8")
else:
- self.text = self.text + unicode(data, "utf-8").strip() + u' '
+ isallwhitespace = data.strip() == ""
+ if not isallwhitespace:
+ self.text = self.text + unicode(data, "utf-8").strip() + u' '
def handle_entityref(self, name):
entity = name
def gettext(self):
data = self.text
if self.inparagraph:
- data = data + "\n".join(textwrap.wrap(self.currentparagraph, 70))
+ data = data + "\n".join(textwrap.wrap(self.currentparagraph, self.textwidth))
if data[-1] != '\n':
data = data + '\n'
return data