]> git.sommitrealweird.co.uk Git - rss2maildir.git/blobdiff - rss2maildir.py
Update li handling a bit, and make the expected test results be what we'd
[rss2maildir.git] / rss2maildir.py
index 99735a70a8dd93957741b3c1c3a66e1cd3779171..3e4ed605d1eaa7669e1faccd25904449d2a6596b 100755 (executable)
@@ -109,7 +109,7 @@ class HTML2Text(HTMLParser):
             self.item = u''
             self.inul = True
             self.text = self.text + "\n"
             self.item = u''
             self.inul = True
             self.text = self.text + "\n"
-        elif tag.lower() == "li" and self.inul:
+        elif tag.lower() == "li":
             if not self.initem:
                 self.initem = True
                 self.item = u''
             if not self.initem:
                 self.initem = True
                 self.item = u''
@@ -120,6 +120,7 @@ class HTML2Text(HTMLParser):
                         textwrap.wrap(self.item, 67)]) \
                     + u'\n'
                 self.item = u''
                         textwrap.wrap(self.item, 67)]) \
                     + u'\n'
                 self.item = u''
+                self.initem = True
 
     def handle_startendtag(self, tag, attrs):
         if tag.lower() == "br":
 
     def handle_startendtag(self, tag, attrs):
         if tag.lower() == "br":
@@ -198,7 +199,7 @@ class HTML2Text(HTMLParser):
             self.inpre = False
         elif tag.lower() == "li":
             self.initem = False
             self.inpre = False
         elif tag.lower() == "li":
             self.initem = False
-            if self.item != "":
+            if self.item != u'':
                 self.text = self.text \
                     + u' * ' \
                     + u'\n   '.join( \
                 self.text = self.text \
                     + u' * ' \
                     + u'\n   '.join( \
@@ -217,16 +218,18 @@ class HTML2Text(HTMLParser):
             self.blockquote = self.blockquote \
                 + unicode(data, "utf-8").strip() \
                 + u' '
             self.blockquote = self.blockquote \
                 + unicode(data, "utf-8").strip() \
                 + u' '
+        elif self.initem:
+            self.item = self.item + unicode(data, "utf-8")
         elif self.inparagraph:
             self.currentparagraph = self.currentparagraph \
                 + unicode(data, "utf-8").strip() \
                 + u' '
         elif self.inparagraph:
             self.currentparagraph = self.currentparagraph \
                 + unicode(data, "utf-8").strip() \
                 + u' '
-        elif self.inul and self.initem:
-            self.item = self.item + unicode(data, "utf-8")
         elif self.inpre:
             self.text = self.text + unicode(data, "utf-8")
         else:
         elif self.inpre:
             self.text = self.text + unicode(data, "utf-8")
         else:
-            self.text = self.text + unicode(data, "utf-8").strip() + u' '
+            isallwhitespace = data.strip()
+            if isallwhitespace != "" and self.text[-1] == "\n":
+                self.text = self.text + unicode(data, "utf-8").strip() + u' '
 
     def handle_entityref(self, name):
         entity = name
 
     def handle_entityref(self, name):
         entity = name
@@ -249,6 +252,8 @@ class HTML2Text(HTMLParser):
         data = self.text
         if self.inparagraph:
             data = data + "\n".join(textwrap.wrap(self.currentparagraph, 70))
         data = self.text
         if self.inparagraph:
             data = data + "\n".join(textwrap.wrap(self.currentparagraph, 70))
+        if data[-1] != '\n':
+            data = data + '\n'
         return data
 
 def open_url(method, url):
         return data
 
 def open_url(method, url):