From: Brett Parker Date: Mon, 3 Mar 2008 15:08:11 +0000 (+0000) Subject: Fix some entity handling X-Git-Url: https://git.sommitrealweird.co.uk/rss2maildir.git/commitdiff_plain/9337881574f8f9370fecabfeb9a52e4341568cd9?ds=inline Fix some entity handling * fixes handling of numeric entities * fixes unittest for entities. --- diff --git a/rss2maildir.py b/rss2maildir.py index 47ba9dc..9473dd0 100755 --- a/rss2maildir.py +++ b/rss2maildir.py @@ -556,12 +556,15 @@ class HTML2Text(HTMLParser): self.opentags.append(u'p') self.curdata = self.curdata + data.decode("utf-8") + def handle_charref(self, name): + entity = unichr(int(name)) + self.curdata = self.curdata + unicode(entity.encode('utf-8'), \ + "utf-8") + def handle_entityref(self, name): entity = name if HTML2Text.entities.has_key(name): entity = HTML2Text.entities[name] - elif name[0] == "#": - entity = unichr(int(name[1:])) else: entity = "&" + name + ";" diff --git a/tests/expected/entities.txt b/tests/expected/entities.txt index 20e85cd..72d5d3a 100644 --- a/tests/expected/entities.txt +++ b/tests/expected/entities.txt @@ -1 +1 @@ -áÞö +áÞö’ diff --git a/tests/html/entities.html b/tests/html/entities.html index 77fd2ca..6b19183 100644 --- a/tests/html/entities.html +++ b/tests/html/entities.html @@ -1 +1 @@ -

áÞö

+

áÞö’

diff --git a/tests/unittests/EntityTests.py b/tests/unittests/EntityTests.py index b317f4d..3b6ec3d 100755 --- a/tests/unittests/EntityTests.py +++ b/tests/unittests/EntityTests.py @@ -11,7 +11,7 @@ class EntityTests(ParsingTests.ParsingTest): def suite(): suite = unittest.TestSuite() - suite.addTest(SpacingTests("testEntities")) + suite.addTest(EntityTests("testEntities")) return suite if __name__ == "__main__":