Fix some entity handling
[rss2maildir.git] / rss2maildir.py
index 86387f96ac01c2d8c1dfa12e9e6f4efddef92d27..9473dd009e3d5ebead31933d7ba110abd4202162 100755 (executable)
@@ -188,7 +188,7 @@ class HTML2Text(HTMLParser):
         u'dt',
         u'dd',
         u'div',
-        #u'blockquote',
+        u'blockquote',
         ]
 
     liststarttags = [
@@ -407,11 +407,11 @@ class HTML2Text(HTMLParser):
             quote = unicode( \
                 " ".join(self.curdata.encode("utf-8").strip().split()), \
                 "utf-8")
-            seperator = u'\n' + u' ' * self.indentlevel + u'> '
+            seperator = u'\n' + u' ' * self.indentlevel + u'    '
             if len(self.text) > 0 and self.text[-1] != u'\n':
                 self.text = self.text + u'\n'
             self.text = self.text \
-                + u'> ' \
+                + u'    ' \
                 + seperator.join( \
                     textwrap.wrap( \
                         quote, \
@@ -556,12 +556,15 @@ class HTML2Text(HTMLParser):
             self.opentags.append(u'p')
         self.curdata = self.curdata + data.decode("utf-8")
 
+    def handle_charref(self, name):
+        entity = unichr(int(name))
+        self.curdata = self.curdata + unicode(entity.encode('utf-8'), \
+            "utf-8")
+
     def handle_entityref(self, name):
         entity = name
         if HTML2Text.entities.has_key(name):
             entity = HTML2Text.entities[name]
-        elif name[0] == "#":
-            entity = unichr(int(name[1:]))
         else:
             entity = "&" + name + ";"