From 6c5e73886e42526c4a4ae522e45ddae07d141df8 Mon Sep 17 00:00:00 2001 From: Brett Parker Date: Mon, 31 Dec 2007 03:08:57 +0000 Subject: [PATCH] * Move some of the list handling above the paragraph handling so that it doesn't get confused (bless it!) * Make expected output match actual output, unittest now passes --- rss2maildir.py | 9 +++++---- tests/expected/unorderedlist-badlyformed.txt | 7 ++++--- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/rss2maildir.py b/rss2maildir.py index 80f22d5..7acbe54 100755 --- a/rss2maildir.py +++ b/rss2maildir.py @@ -109,7 +109,7 @@ class HTML2Text(HTMLParser): self.item = u'' self.inul = True self.text = self.text + "\n" - elif tag.lower() == "li" and self.inul: + elif tag.lower() == "li": if not self.initem: self.initem = True self.item = u'' @@ -120,6 +120,7 @@ class HTML2Text(HTMLParser): textwrap.wrap(self.item, 67)]) \ + u'\n' self.item = u'' + self.initem = True def handle_startendtag(self, tag, attrs): if tag.lower() == "br": @@ -198,7 +199,7 @@ class HTML2Text(HTMLParser): self.inpre = False elif tag.lower() == "li": self.initem = False - if self.item != "": + if self.item != u'': self.text = self.text \ + u' * ' \ + u'\n '.join( \ @@ -217,12 +218,12 @@ class HTML2Text(HTMLParser): self.blockquote = self.blockquote \ + unicode(data, "utf-8").strip() \ + u' ' + elif self.initem: + self.item = self.item + unicode(data, "utf-8") elif self.inparagraph: self.currentparagraph = self.currentparagraph \ + unicode(data, "utf-8").strip() \ + u' ' - elif self.inul and self.initem: - self.item = self.item + unicode(data, "utf-8") elif self.inpre: self.text = self.text + unicode(data, "utf-8") else: diff --git a/tests/expected/unorderedlist-badlyformed.txt b/tests/expected/unorderedlist-badlyformed.txt index 49d47f6..b4c04f9 100644 --- a/tests/expected/unorderedlist-badlyformed.txt +++ b/tests/expected/unorderedlist-badlyformed.txt @@ -1,3 +1,4 @@ - * This is an item - * And a second - * And a third + + * This is an item + * And a second + * And a third -- 2.39.5