change images to ReST format

author Brett Parker <iDunno@sommitrealweird.co.uk>

Sat, 1 Mar 2008 22:16:46 +0000 (22:16 +0000)

committer Brett Parker <iDunno@sommitrealweird.co.uk>

Sat, 1 Mar 2008 22:16:46 +0000 (22:16 +0000)
author Brett Parker <iDunno@sommitrealweird.co.uk>
Sat, 1 Mar 2008 22:16:46 +0000 (22:16 +0000)
committer Brett Parker <iDunno@sommitrealweird.co.uk>
Sat, 1 Mar 2008 22:16:46 +0000 (22:16 +0000)
diff --git a/rss2maildir.py b/rss2maildir.py

index ce6c3426b54fff080b6639a09e85269375fb170a..b0bc441d09b23c56807d993ff98c9f9c09372dde 100755 (executable)
--- a/rss2maildir.py
+++ b/rss2maildir.py
@@ -99,6 +99,7 @@ class HTML2Text(HTMLParser):
          self.ignorenodata = False
          self.listcount = []
          self.urls = []
+        self.images = {}
          HTMLParser.__init__(self)
  
      def handle_starttag(self, tag, attrs):
@@ -187,16 +188,31 @@ class HTML2Text(HTMLParser):
              elif attr[0] == 'src':
                  url = attr[1].decode('utf-8')
          if url:
-            self.curdata = self.curdata \
-                + u' [img:' \
-                + url
              if alt:
-                self.curdata = self.curdata \
-                    + u'(' \
-                    + alt \
-                    + u')'
-            self.curdata = self.curdata \
-                + u']'
+                if self.images.has_key(alt):
+                    if self.images[alt]["url"] == url:
+                        self.curdata = self.curdata \
+                            + u'|%s|' %(alt,)
+                    else:
+                        while self.images.has_key(alt):
+                            alt = alt + "_"
+                        self.images[alt]["url"] = url
+                        self.curdata = self.curdata \
+                            + u'|%s|' %(alt,)
+                else:
+                    self.images[alt] = {}
+                    self.images[alt]["url"] = url
+                    self.curdata = self.curdata \
+                        + u'|%s|' %(alt,)
+            else:
+                if self.images.has_key(url):
+                    self.curdata = self.curdata \
+                        + u'|%s|' %(url,)
+                else:
+                    self.images[url] = {}
+                    self.images[url]["url"] =url
+                    self.curdata = self.curdata \
+                        + u'|%s|' %(url,)
  
      def handle_curdata(self):
  
@@ -444,6 +460,12 @@ class HTML2Text(HTMLParser):
          if len(self.urls) > 0:
              self.text = self.text + u'\n__ ' + u'\n__ '.join(self.urls) + u'\n'
              self.urls = []
+        if len(self.images.keys()) > 0:
+            self.text = self.text + u'\n.. ' \
+                + u'.. '.join( \
+                    ["|%s| image:: %s" %(a, self.images[a]["url"]) \
+                for a in self.images.keys()]) + u'\n'
+            self.images = {}
          return self.text
  
  def open_url(method, url):
diff --git a/tests/expected/image-test.txt b/tests/expected/image-test.txt

new file mode 100644 (file)

index 0000000..2e4ac72
--- /dev/null
+++ b/tests/expected/image-test.txt
@@ -0,0 +1,6 @@
+This just tests a simple image
+==============================
+
+Images are a fundamental item, innit. |:)|
+
+.. |:)| image:: http://www.example.com/smilie.jpg
diff --git a/tests/html/image-test.html b/tests/html/image-test.html

new file mode 100644 (file)

index 0000000..49ea98a
--- /dev/null
+++ b/tests/html/image-test.html
@@ -0,0 +1,3 @@
+<h1>This just tests a simple image</h1>
+
+<p>Images are a fundamental item, innit. <img src="http://www.example.com/smilie.jpg" alt=":)" /></p>
diff --git a/tests/unittests/ImageTests.py b/tests/unittests/ImageTests.py

new file mode 100755 (executable)

index 0000000..f296d84
--- /dev/null
+++ b/tests/unittests/ImageTests.py
@@ -0,0 +1,18 @@
+#!/usr/bin/python
+
+import unittest
+import os
+
+import ParsingTests
+
+class ImageTests(ParsingTests.ParsingTest):
+    def testBasicImage(self):
+        return self.runParsingTest("image-test")
+
+def suite():
+    suite = unittest.TestSuite()
+    suite.addTest(ImageTests("testBasicImage"))
+    return suite
+
+if __name__ == "__main__":
+    unittest.main()
author	Brett Parker <iDunno@sommitrealweird.co.uk>
	Sat, 1 Mar 2008 22:16:46 +0000 (22:16 +0000)
committer	Brett Parker <iDunno@sommitrealweird.co.uk>
	Sat, 1 Mar 2008 22:16:46 +0000 (22:16 +0000)
rss2maildir.py		patch \| blob \| history
tests/expected/image-test.txt	[new file with mode: 0644]	patch \| blob
tests/html/image-test.html	[new file with mode: 0644]	patch \| blob
tests/unittests/ImageTests.py	[new file with mode: 0755]	patch \| blob