From: Brett Parker Date: Sun, 2 Mar 2008 19:02:23 +0000 (+0000) Subject: Small fixes to list handling code X-Git-Url: https://git.sommitrealweird.co.uk/rss2maildir.git/commitdiff_plain/9a3c701ba08530e1a884ad3fea33f790feb0aa0a?ds=inline Small fixes to list handling code --- diff --git a/rss2maildir.py b/rss2maildir.py index 59e9361..6a24b94 100755 --- a/rss2maildir.py +++ b/rss2maildir.py @@ -197,6 +197,9 @@ class HTML2Text(HTMLParser): self.listcount.append(1) self.listlevel = len(self.listcount) - 1 + if tag_name == u'dl': + self.indentlevel = self.indentlevel + 4 + if tag_name in self.liststarttags: smallist = self.opentags[-3:-1] smallist.reverse() @@ -430,12 +433,12 @@ class HTML2Text(HTMLParser): self.text = self.text + u'\n\n' elif len(self.text) > 1 and self.text[-2] != u'\n': self.text = self.text + u'\n' - definition = u' ' * self.indentlevel + definition + "::" - indentstring = u'\n' + u' ' * (self.indentlevel + 1) + definition = u' ' * (self.indentlevel - 4) + definition + "::" + indentstring = u'\n' + u' ' * (self.indentlevel - 3) self.text = self.text \ + indentstring.join( textwrap.wrap(definition, \ - self.textwidth - self.indentlevel - 1)) + self.textwidth - self.indentlevel - 4)) self.curdata = u'' elif tag_thats_done == u'dd': definition = unicode(" ".join( \ @@ -444,13 +447,13 @@ class HTML2Text(HTMLParser): if len(definition) > 0: if len(self.text) > 0 and self.text[-1] != u'\n': self.text = self.text + u'\n' - indentstring = u'\n' + u' ' * (self.indentlevel + 4) + indentstring = u'\n' + u' ' * self.indentlevel self.text = self.text \ - + u' ' * (self.indentlevel + 4) \ + + indentstring \ + indentstring.join( \ textwrap.wrap( \ definition, \ - self.textwidth - self.indentlevel - 4 \ + self.textwidth - self.indentlevel \ ) \ ) self.curdata = u'' @@ -479,8 +482,11 @@ class HTML2Text(HTMLParser): if tag in [u'br', u'img']: return + if tag == u'dl': + self.indentlevel = self.indentlevel - 4 + if tag in self.liststarttags: - if tag in [u'ol', u'dl', u'ul']: + if tag in [u'ol', u'dl', u'ul', u'dd']: self.handle_curdata() # find if there was a previous list level smalllist = self.opentags[:-1] diff --git a/tests/expected/definitionlist-badlyformed.txt b/tests/expected/definitionlist-badlyformed.txt index 3d9e83f..0ec11bb 100644 --- a/tests/expected/definitionlist-badlyformed.txt +++ b/tests/expected/definitionlist-badlyformed.txt @@ -1,6 +1,8 @@ An item:: + It's definition Another item:: + And it's got a much longer definition because we like to make sure that we've got the test wrapping right don't we. diff --git a/tests/expected/definitionlist-wellformed.txt b/tests/expected/definitionlist-wellformed.txt index 3d9e83f..0ec11bb 100644 --- a/tests/expected/definitionlist-wellformed.txt +++ b/tests/expected/definitionlist-wellformed.txt @@ -1,6 +1,8 @@ An item:: + It's definition Another item:: + And it's got a much longer definition because we like to make sure that we've got the test wrapping right don't we. diff --git a/tests/expected/mixednestedlists-wellformed.txt b/tests/expected/mixednestedlists-wellformed.txt index 86e83fc..7ee5aa1 100644 --- a/tests/expected/mixednestedlists-wellformed.txt +++ b/tests/expected/mixednestedlists-wellformed.txt @@ -5,15 +5,16 @@ * We should have at least one silly long line to check that wrapping works like we expect otherwise what's the point - What is HTML2Text?:: +What is HTML2Text?:: - HTML2Text is a funky bit of python that translates HTML fragments - in to plain text in a human readable format. It's primary use was - for generating the plain text body of email messages generated from - rssfeeds as part of rss2maildir. + HTML2Text is a funky bit of python that translates HTML fragments + in to plain text in a human readable format. It's primary use was + for generating the plain text body of email messages generated + from rssfeeds as part of rss2maildir. - Hopefully one day it'll be good enough that other people will want - to use it! + Hopefully one day it'll be good enough that other people will want + to use it! - Who wrote it?:: - HTML2Text was written by Brett Parker. +Who wrote it?:: + + HTML2Text was written by Brett Parker.