]> git.sommitrealweird.co.uk Git - rss2maildir.git/blobdiff - rss2maildir.py
* Small improvements to the HTML2Text code
[rss2maildir.git] / rss2maildir.py
index 739c1f315186c9cec746f891516fe7ec74696065..a51209ce0749e70fd5e9d40ff696d8cf6a2243e1 100755 (executable)
@@ -123,6 +123,9 @@ class HTML2Text(HTMLParser):
         if len(self.opentags) == 0:
             return
 
+        if len(self.curdata) == 0:
+            return
+
         tag_thats_done = self.opentags[-1]
 
         if tag_thats_done in self.blockleveltags:
@@ -169,6 +172,26 @@ class HTML2Text(HTMLParser):
                 + u'\n   '.join( \
                     textwrap.wrap(item, self.textwidth - 3))
             self.curdata = u''
+        elif tag_thats_done == "dt":
+            definition = self.curdata.encode("utf-8").strip()
+            if len(self.text) > 0 and self.text[-1] != u'\n':
+                self.text = self.text + u'\n\n'
+            elif len(self.text) > 0 and self.text[-2] != u'\n':
+                self.text = self.text + u'\n'
+            definition = definition + "::"
+            self.text = self.text \
+                + '\n '.join(
+                    textwrap.wrap(definition, self.textwidth - 1))
+            self.curdata = u''
+        elif tag_thats_done == "dd":
+            definition = self.curdata.encode("utf-8").strip()
+            if len(self.text) > 0 and self.text[-1] != u'\n':
+                self.text = self.text + u'\n'
+            self.text = self.text \
+                + '    ' \
+                + '\n    '.join( \
+                    textwrap.wrap(definition, self.textwidth - 4))
+            self.curdata = u''
         elif tag_thats_done in self.liststarttags:
             pass
         else:
@@ -224,6 +247,10 @@ class HTML2Text(HTMLParser):
         if len(self.text) == 0 or self.text[-1] != u'\n':
             self.text = self.text + u'\n'
         self.opentags = []
+        if len(self.text) > 0:
+            while len(self.text) > 1 and self.text[-1] == u'\n':
+                self.text = self.text[:-1]
+            self.text = self.text + u'\n'
         return self.text
 
 def open_url(method, url):