change images to ReST format
authorBrett Parker <iDunno@sommitrealweird.co.uk>
Sat, 1 Mar 2008 22:16:46 +0000 (22:16 +0000)
committerBrett Parker <iDunno@sommitrealweird.co.uk>
Sat, 1 Mar 2008 22:16:46 +0000 (22:16 +0000)
rss2maildir.py
tests/expected/image-test.txt [new file with mode: 0644]
tests/html/image-test.html [new file with mode: 0644]
tests/unittests/ImageTests.py [new file with mode: 0755]

index ce6c3426b54fff080b6639a09e85269375fb170a..b0bc441d09b23c56807d993ff98c9f9c09372dde 100755 (executable)
@@ -99,6 +99,7 @@ class HTML2Text(HTMLParser):
         self.ignorenodata = False
         self.listcount = []
         self.urls = []
+        self.images = {}
         HTMLParser.__init__(self)
 
     def handle_starttag(self, tag, attrs):
@@ -187,16 +188,31 @@ class HTML2Text(HTMLParser):
             elif attr[0] == 'src':
                 url = attr[1].decode('utf-8')
         if url:
-            self.curdata = self.curdata \
-                + u' [img:' \
-                + url
             if alt:
-                self.curdata = self.curdata \
-                    + u'(' \
-                    + alt \
-                    + u')'
-            self.curdata = self.curdata \
-                + u']'
+                if self.images.has_key(alt):
+                    if self.images[alt]["url"] == url:
+                        self.curdata = self.curdata \
+                            + u'|%s|' %(alt,)
+                    else:
+                        while self.images.has_key(alt):
+                            alt = alt + "_"
+                        self.images[alt]["url"] = url
+                        self.curdata = self.curdata \
+                            + u'|%s|' %(alt,)
+                else:
+                    self.images[alt] = {}
+                    self.images[alt]["url"] = url
+                    self.curdata = self.curdata \
+                        + u'|%s|' %(alt,)
+            else:
+                if self.images.has_key(url):
+                    self.curdata = self.curdata \
+                        + u'|%s|' %(url,)
+                else:
+                    self.images[url] = {}
+                    self.images[url]["url"] =url
+                    self.curdata = self.curdata \
+                        + u'|%s|' %(url,)
 
     def handle_curdata(self):
 
@@ -444,6 +460,12 @@ class HTML2Text(HTMLParser):
         if len(self.urls) > 0:
             self.text = self.text + u'\n__ ' + u'\n__ '.join(self.urls) + u'\n'
             self.urls = []
+        if len(self.images.keys()) > 0:
+            self.text = self.text + u'\n.. ' \
+                + u'.. '.join( \
+                    ["|%s| image:: %s" %(a, self.images[a]["url"]) \
+                for a in self.images.keys()]) + u'\n'
+            self.images = {}
         return self.text
 
 def open_url(method, url):
diff --git a/tests/expected/image-test.txt b/tests/expected/image-test.txt
new file mode 100644 (file)
index 0000000..2e4ac72
--- /dev/null
@@ -0,0 +1,6 @@
+This just tests a simple image
+==============================
+
+Images are a fundamental item, innit. |:)|
+
+.. |:)| image:: http://www.example.com/smilie.jpg
diff --git a/tests/html/image-test.html b/tests/html/image-test.html
new file mode 100644 (file)
index 0000000..49ea98a
--- /dev/null
@@ -0,0 +1,3 @@
+<h1>This just tests a simple image</h1>
+
+<p>Images are a fundamental item, innit. <img src="http://www.example.com/smilie.jpg" alt=":)" /></p>
diff --git a/tests/unittests/ImageTests.py b/tests/unittests/ImageTests.py
new file mode 100755 (executable)
index 0000000..f296d84
--- /dev/null
@@ -0,0 +1,18 @@
+#!/usr/bin/python
+
+import unittest
+import os
+
+import ParsingTests
+
+class ImageTests(ParsingTests.ParsingTest):
+    def testBasicImage(self):
+        return self.runParsingTest("image-test")
+
+def suite():
+    suite = unittest.TestSuite()
+    suite.addTest(ImageTests("testBasicImage"))
+    return suite
+
+if __name__ == "__main__":
+    unittest.main()