Fix some entity handling
authorBrett Parker <iDunno@sommitrealweird.co.uk>
Mon, 3 Mar 2008 15:08:11 +0000 (15:08 +0000)
committerBrett Parker <iDunno@sommitrealweird.co.uk>
Mon, 3 Mar 2008 15:08:11 +0000 (15:08 +0000)
    * fixes handling of numeric entities
    * fixes unittest for entities.

rss2maildir.py
tests/expected/entities.txt
tests/html/entities.html
tests/unittests/EntityTests.py

index 47ba9dc125687576f8759aaeebe0a2e63cca43f4..9473dd009e3d5ebead31933d7ba110abd4202162 100755 (executable)
@@ -556,12 +556,15 @@ class HTML2Text(HTMLParser):
             self.opentags.append(u'p')
         self.curdata = self.curdata + data.decode("utf-8")
 
+    def handle_charref(self, name):
+        entity = unichr(int(name))
+        self.curdata = self.curdata + unicode(entity.encode('utf-8'), \
+            "utf-8")
+
     def handle_entityref(self, name):
         entity = name
         if HTML2Text.entities.has_key(name):
             entity = HTML2Text.entities[name]
-        elif name[0] == "#":
-            entity = unichr(int(name[1:]))
         else:
             entity = "&" + name + ";"
 
index 20e85cd19ff0a473534ef7b3409591bc878edb9e..72d5d3a1880171afae99b262482cae7af363fe3f 100644 (file)
@@ -1 +1 @@
-áÞö
+áÞö
index 77fd2ca15899957c228f54f967b2fc31fbae020a..6b19183af1f9a4494f8a83b65537a5c52e030832 100644 (file)
@@ -1 +1 @@
-<p>&aacute;&THORN;&ouml;</p>
+<p>&aacute;&THORN;&ouml;&#8217;</p>
index b317f4d491e5fe9c43ce93751979a64d099f232c..3b6ec3da709b785c60456a07ea0313bd2e2347a9 100755 (executable)
@@ -11,7 +11,7 @@ class EntityTests(ParsingTests.ParsingTest):
 
 def suite():
     suite = unittest.TestSuite()
-    suite.addTest(SpacingTests("testEntities"))
+    suite.addTest(EntityTests("testEntities"))
     return suite
 
 if __name__ == "__main__":