if len(self.opentags) == 0:
return
+ if len(self.curdata) == 0:
+ return
+
tag_thats_done = self.opentags[-1]
if tag_thats_done in self.blockleveltags:
+ u'\n '.join( \
textwrap.wrap(item, self.textwidth - 3))
self.curdata = u''
+ elif tag_thats_done == "dt":
+ definition = self.curdata.encode("utf-8").strip()
+ if len(self.text) > 0 and self.text[-1] != u'\n':
+ self.text = self.text + u'\n\n'
+ elif len(self.text) > 0 and self.text[-2] != u'\n':
+ self.text = self.text + u'\n'
+ definition = definition + "::"
+ self.text = self.text \
+ + '\n '.join(
+ textwrap.wrap(definition, self.textwidth - 1))
+ self.curdata = u''
+ elif tag_thats_done == "dd":
+ definition = self.curdata.encode("utf-8").strip()
+ if len(self.text) > 0 and self.text[-1] != u'\n':
+ self.text = self.text + u'\n'
+ self.text = self.text \
+ + ' ' \
+ + '\n '.join( \
+ textwrap.wrap(definition, self.textwidth - 4))
+ self.curdata = u''
elif tag_thats_done in self.liststarttags:
pass
else:
if len(self.text) == 0 or self.text[-1] != u'\n':
self.text = self.text + u'\n'
self.opentags = []
+ if len(self.text) > 0:
+ while len(self.text) > 1 and self.text[-1] == u'\n':
+ self.text = self.text[:-1]
+ self.text = self.text + u'\n'
return self.text
def open_url(method, url):
--- /dev/null
+An item::
+ It's definition
+
+Another item::
+ And it's got a much longer definition because we like to make sure
+ that we've got the test wrapping right don't we.
-An item
+An item::
It's definition
-Another item
+Another item::
And it's got a much longer definition because we like to make sure
that we've got the test wrapping right don't we.
--- /dev/null
+<dl>
+ <dt>An item
+ <dd>It's definition
+ <dt>Another item
+ <dd>And it's got a much longer definition because we like to make sure that we've got the test wrapping right don't we.
+</dl>
#!/usr/bin/python
import unittest
-import sys
import os
-class DefinitionListTests(unittest.TestCase):
- def setUp(self):
- self.inputpath = os.path.sep.join(os.path.dirname(os.path.realpath(__file__)).split(os.path.sep)[0:-1])
+import ParsingTests
+class DefinitionListTests(ParsingTests.ParsingTest):
def testWellFormedDefinitionList(self):
- try:
- from rss2maildir import HTML2Text
- except:
- sys.path.append(os.path.sep.join(self.inputpath.split(os.path.sep)[0:-1]))
- try:
- from rss2maildir import HTML2Text
- except:
- self.assert_(False)
- input_path = os.path.sep.join(os.path.dirname(os.path.realpath(__file__)).split(os.path.sep)[0:-1])
- input = open(os.path.join(input_path, "html", "definitionlist-wellformed.html")).read()
- expectedoutput = open(os.path.join(input_path, "expected", "definitionlist-wellformed.txt")).read()
- parser = HTML2Text()
- parser.feed(input)
- output = parser.gettext()
- self.assertEqual(output, expectedoutput)
+ return self.runParsingTest("definitionlist-wellformed")
+
+ def testBadlyFormedDefinitionList(self):
+ return self.runParsingTest("definitionlist-badlyformed")
def suite():
suite = unittest.TestSuite()
suite.addTest(DefinitionListTests("testWellFormedDefinitionList"))
+ suite.addTest(DefinitionListTests("testBadlyFormedDefinitionList"))
return suite
if __name__ == "__main__":
import sys
import os
-class ParagraphTests(unittest.TestCase):
- def setUp(self):
- self.inputpath = os.path.sep.join(os.path.dirname(os.path.realpath(__file__)).split(os.path.sep)[0:-1])
+import ParsingTests
+class ParagraphTests(ParsingTests.ParsingTest):
def testWellFormedParagraphs(self):
- try:
- from rss2maildir import HTML2Text
- except:
- sys.path.append(os.path.sep.join(self.inputpath.split(os.path.sep)[0:-1]))
- try:
- from rss2maildir import HTML2Text
- except:
- self.assert_(False)
- input_path = os.path.sep.join(os.path.dirname(os.path.realpath(__file__)).split(os.path.sep)[0:-1])
- input = open(os.path.join(input_path, "html", "multiparagraph-wellformed.html")).read()
- expectedoutput = open(os.path.join(input_path, "expected", "multiparagraph-wellformed.txt")).read()
- parser = HTML2Text()
- parser.feed(input)
- output = parser.gettext()
- self.assertEqual(output, expectedoutput)
+ return self.runParsingTest("multiparagraph-wellformed")
def suite():
suite = unittest.TestSuite()
--- /dev/null
+#!/usr/bin/python
+
+import unittest
+import sys
+import os
+
+class ParsingTest(unittest.TestCase):
+ def setUp(self):
+ self.inputpath = os.path.sep.join(os.path.dirname(os.path.realpath(__file__)).split(os.path.sep)[0:-1])
+
+ def runParsingTest(self, filename):
+ try:
+ from rss2maildir import HTML2Text
+ except:
+ sys.path.append(os.path.sep.join(self.inputpath.split(os.path.sep)[0:-1]))
+ try:
+ from rss2maildir import HTML2Text
+ except:
+ self.assert_(False)
+ input_path = os.path.sep.join(os.path.dirname(os.path.realpath(__file__)).split(os.path.sep)[0:-1])
+ input = open(os.path.join(input_path, "html", filename + ".html")).read()
+ expectedoutput = open(os.path.join(input_path, "expected", filename + ".txt")).read()
+ parser = HTML2Text()
+ parser.feed(input)
+ output = parser.gettext()
+ self.assertEqual(output, expectedoutput)
import sys
import os
-class UnorderedListTests(unittest.TestCase):
- def setUp(self):
- self.inputpath = os.path.sep.join(os.path.dirname(os.path.realpath(__file__)).split(os.path.sep)[0:-1])
+import ParsingTests
+class UnorderedListTests(ParsingTests.ParsingTest):
def testWellFormedList(self):
- try:
- from rss2maildir import HTML2Text
- except:
- sys.path.append(os.path.sep.join(self.inputpath.split(os.path.sep)[0:-1]))
- try:
- from rss2maildir import HTML2Text
- except:
- self.assert_(False)
- input_path = os.path.sep.join(os.path.dirname(os.path.realpath(__file__)).split(os.path.sep)[0:-1])
- input = open(os.path.join(input_path, "html", "unorderedlist-wellformed.html")).read()
- expectedoutput = open(os.path.join(input_path, "expected", "unorderedlist-wellformed.txt")).read()
- parser = HTML2Text()
- parser.feed(input)
- output = parser.gettext()
- self.assertEqual(output, expectedoutput)
+ return self.runParsingTest("unordered-wellformed")
def testBadlyFormedList(self):
- try:
- from rss2maildir import HTML2Text
- except:
- sys.path.append(os.path.sep.join(self.inputpath.split(os.path.sep)[0:-1]))
- try:
- from rss2maildir import HTML2Text
- except:
- self.assert_(False)
-
- input_path = os.path.sep.join(os.path.dirname(os.path.realpath(__file__)).split(os.path.sep)[0:-1])
- input = open(os.path.join(input_path, "html", "unorderedlist-badlyformed.html")).read()
- expectedoutput = open(os.path.join(input_path, "expected", "unorderedlist-badlyformed.txt")).read()
- parser = HTML2Text()
- parser.feed(input)
- output = parser.gettext()
- self.assertEqual(output, expectedoutput)
+ return self.runParsingTest("unordered-badlyformed")
def suite():
suite = unittest.TestSuite()