]> git.sommitrealweird.co.uk Git - rss2maildir.git/blobdiff - rss2maildir.py
Fix issue with images having the same alt value but different urls
[rss2maildir.git] / rss2maildir.py
index 6a24b94edc40a119845ea24caad6859c3f7c4b88..86387f96ac01c2d8c1dfa12e9e6f4efddef92d27 100755 (executable)
@@ -137,6 +137,39 @@ class HTML2Text(HTMLParser):
         u'THORN': u'Þ',
         u'eth': u'ð',
         u'ETH': u'Ð',
         u'THORN': u'Þ',
         u'eth': u'ð',
         u'ETH': u'Ð',
+        u'mdash': u'—',
+        u'ndash': u'–',
+        u'sect': u'§',
+        u'para': u'¶',
+        u'uarr': u'↑',
+        u'darr': u'↓',
+        u'larr': u'←',
+        u'rarr': u'→',
+        u'dagger': u'†',
+        u'Dagger': u'‡',
+        u'permil': u'‰',
+        u'prod': u'∏',
+        u'infin': u'∞',
+        u'radic': u'√',
+        u'there4': u'∴',
+        u'int': u'∫',
+        u'asymp': u'≈',
+        u'ne': u'≠',
+        u'equiv': '≡',
+        u'le': u'≤',
+        u'ge': u'≥',
+        u'loz': u'⋄',
+        u'sum': u'∑',
+        u'part': u'∂',
+        u'prime': u'′',
+        u'Prime': u'″',
+        u'harr': u'↔',
+        u'micro': u'µ',
+        u'not': u'¬',
+        u'plusmn': u'±',
+        u'divide': u'÷',
+        u'cent': u'¢',
+        u'euro': u'€',
         }
 
     blockleveltags = [
         }
 
     blockleveltags = [
@@ -280,12 +313,11 @@ class HTML2Text(HTMLParser):
                     else:
                         while self.images.has_key(alt):
                             alt = alt + "_"
                     else:
                         while self.images.has_key(alt):
                             alt = alt + "_"
-                        self.images[alt]["url"] = url
+                        self.images[alt] = {"url": url}
                         self.curdata = self.curdata \
                             + u'|%s|' %(alt,)
                 else:
                         self.curdata = self.curdata \
                             + u'|%s|' %(alt,)
                 else:
-                    self.images[alt] = {}
-                    self.images[alt]["url"] = url
+                    self.images[alt] = {"url": url}
                     self.curdata = self.curdata \
                         + u'|%s|' %(alt,)
             else:
                     self.curdata = self.curdata \
                         + u'|%s|' %(alt,)
             else:
@@ -550,7 +582,7 @@ class HTML2Text(HTMLParser):
             self.urls = []
         if len(self.images.keys()) > 0:
             self.text = self.text + u'\n.. ' \
             self.urls = []
         if len(self.images.keys()) > 0:
             self.text = self.text + u'\n.. ' \
-                + u'.. '.join( \
+                + u'\n.. '.join( \
                     ["|%s| image:: %s" %(a, self.images[a]["url"]) \
                 for a in self.images.keys()]) + u'\n'
             self.images = {}
                     ["|%s| image:: %s" %(a, self.images[a]["url"]) \
                 for a in self.images.keys()]) + u'\n'
             self.images = {}