X-Git-Url: https://git.sommitrealweird.co.uk/rss2maildir.git/blobdiff_plain/fb3a7e40f413322d90b5452acc7c3a8f09046661..64f951d0b32c3c210b71ef941e23b8dd98f13658:/rss2maildir.py diff --git a/rss2maildir.py b/rss2maildir.py index 99735a7..a0c40a1 100755 --- a/rss2maildir.py +++ b/rss2maildir.py @@ -109,7 +109,7 @@ class HTML2Text(HTMLParser): self.item = u'' self.inul = True self.text = self.text + "\n" - elif tag.lower() == "li" and self.inul: + elif tag.lower() == "li": if not self.initem: self.initem = True self.item = u'' @@ -120,6 +120,7 @@ class HTML2Text(HTMLParser): textwrap.wrap(self.item, 67)]) \ + u'\n' self.item = u'' + self.initem = True def handle_startendtag(self, tag, attrs): if tag.lower() == "br": @@ -198,7 +199,7 @@ class HTML2Text(HTMLParser): self.inpre = False elif tag.lower() == "li": self.initem = False - if self.item != "": + if self.item != u'': self.text = self.text \ + u' * ' \ + u'\n '.join( \ @@ -217,16 +218,18 @@ class HTML2Text(HTMLParser): self.blockquote = self.blockquote \ + unicode(data, "utf-8").strip() \ + u' ' + elif self.initem: + self.item = self.item + unicode(data, "utf-8") elif self.inparagraph: self.currentparagraph = self.currentparagraph \ + unicode(data, "utf-8").strip() \ + u' ' - elif self.inul and self.initem: - self.item = self.item + unicode(data, "utf-8") elif self.inpre: self.text = self.text + unicode(data, "utf-8") else: - self.text = self.text + unicode(data, "utf-8").strip() + u' ' + isallwhitespace = data.strip() == "" + if not isallwhitespace: + self.text = self.text + unicode(data, "utf-8").strip() + u' ' def handle_entityref(self, name): entity = name @@ -249,6 +252,8 @@ class HTML2Text(HTMLParser): data = self.text if self.inparagraph: data = data + "\n".join(textwrap.wrap(self.currentparagraph, 70)) + if data[-1] != '\n': + data = data + '\n' return data def open_url(method, url):