projects
/
rss2maildir.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
Fix for title parsing
[rss2maildir.git]
/
rss2maildir.py
diff --git
a/rss2maildir.py
b/rss2maildir.py
index d36b48967a433761e622c43b660a57f4dc92af9e..ff2901c5ffab0414fa563975801c5e6fd76d315f 100755
(executable)
--- a/
rss2maildir.py
+++ b/
rss2maildir.py
@@
-44,6
+44,8
@@
import md5
import cgi
import dbm
import cgi
import dbm
+import re
+
from HTMLParser import HTMLParser
class HTML2Text(HTMLParser):
from HTMLParser import HTMLParser
class HTML2Text(HTMLParser):
@@
-188,7
+190,7
@@
class HTML2Text(HTMLParser):
u'dt',
u'dd',
u'div',
u'dt',
u'dd',
u'div',
-
#
u'blockquote',
+ u'blockquote',
]
liststarttags = [
]
liststarttags = [
@@
-313,12
+315,11
@@
class HTML2Text(HTMLParser):
else:
while self.images.has_key(alt):
alt = alt + "_"
else:
while self.images.has_key(alt):
alt = alt + "_"
- self.images[alt]
["url"] = url
+ self.images[alt]
= {"url": url}
self.curdata = self.curdata \
+ u'|%s|' %(alt,)
else:
self.curdata = self.curdata \
+ u'|%s|' %(alt,)
else:
- self.images[alt] = {}
- self.images[alt]["url"] = url
+ self.images[alt] = {"url": url}
self.curdata = self.curdata \
+ u'|%s|' %(alt,)
else:
self.curdata = self.curdata \
+ u'|%s|' %(alt,)
else:
@@
-408,11
+409,11
@@
class HTML2Text(HTMLParser):
quote = unicode( \
" ".join(self.curdata.encode("utf-8").strip().split()), \
"utf-8")
quote = unicode( \
" ".join(self.curdata.encode("utf-8").strip().split()), \
"utf-8")
- seperator = u'\n' + u' ' * self.indentlevel + u'
>
'
+ seperator = u'\n' + u' ' * self.indentlevel + u'
'
if len(self.text) > 0 and self.text[-1] != u'\n':
self.text = self.text + u'\n'
self.text = self.text \
if len(self.text) > 0 and self.text[-1] != u'\n':
self.text = self.text + u'\n'
self.text = self.text \
- + u'
>
' \
+ + u'
' \
+ seperator.join( \
textwrap.wrap( \
quote, \
+ seperator.join( \
textwrap.wrap( \
quote, \
@@
-557,12
+558,15
@@
class HTML2Text(HTMLParser):
self.opentags.append(u'p')
self.curdata = self.curdata + data.decode("utf-8")
self.opentags.append(u'p')
self.curdata = self.curdata + data.decode("utf-8")
+ def handle_charref(self, name):
+ entity = unichr(int(name))
+ self.curdata = self.curdata + unicode(entity.encode('utf-8'), \
+ "utf-8")
+
def handle_entityref(self, name):
entity = name
if HTML2Text.entities.has_key(name):
entity = HTML2Text.entities[name]
def handle_entityref(self, name):
entity = name
if HTML2Text.entities.has_key(name):
entity = HTML2Text.entities[name]
- elif name[0] == "#":
- entity = unichr(int(name[1:]))
else:
entity = "&" + name + ";"
else:
entity = "&" + name + ";"
@@
-723,7
+727,10
@@
def parse_and_deliver(maildir, url, statedir):
pass
msg.add_header("Date", createddate)
subj_gen = HTML2Text()
pass
msg.add_header("Date", createddate)
subj_gen = HTML2Text()
- subj_gen.feed(item["title"].encode("utf-8"))
+ title = item["title"].encode("utf-8")
+ title = re.sub(u'<', u'<', title)
+ title = re.sub(u'>', u'>', title)
+ subj_gen.feed(title)
msg.add_header("Subject", subj_gen.gettext())
msg.set_default_type("text/plain")
msg.add_header("Subject", subj_gen.gettext())
msg.set_default_type("text/plain")