* improved entity handling
authorBrett Parker <iDunno@sommitrealweird.co.uk>
Fri, 21 Dec 2007 00:31:37 +0000 (00:31 +0000)
committerBrett Parker <iDunno@sommitrealweird.co.uk>
Fri, 21 Dec 2007 00:31:37 +0000 (00:31 +0000)
rss2maildir.py

index fd148267ec68c55a375206990f598e09119c97b4..641ffb196d488fbce95a6354242b55d80640b216 100755 (executable)
@@ -157,10 +157,20 @@ class HTML2Text(HTMLParser):
             self.text = self.text + data.strip() + " "
 
     def handle_entityref(self, name):
+        entity = name
         if entities.has_key(name.lower()):
-            self.text = self.text + entities[name.lower()]
+            entity = entities[name.lower()]
+        elif name[0] == "#":
+            entity = unichr(int(name[1:]))
         else:
-            self.text = self.text + "&" + name + ";"
+            entity = "&" + name + ";"
+
+        if self.inparagraph:
+            self.currentparagraph = self.currentparagraph + entity
+        elif self.inblockquote:
+            self.blockquote = self.blockquote + entity
+        else:
+            self.text = self.text + entity
 
     def gettext(self):
         data = self.text