projects
/
rss2maildir.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
Fix for items that actually have no content.
[rss2maildir.git]
/
rss2maildir.py
diff --git
a/rss2maildir.py
b/rss2maildir.py
index 9473dd009e3d5ebead31933d7ba110abd4202162..b77c6d7d11ef14c2878fcdcb379829f4f9f82f1e 100755
(executable)
--- a/
rss2maildir.py
+++ b/
rss2maildir.py
@@
-44,6
+44,8
@@
import md5
import cgi
import dbm
import cgi
import dbm
+import re
+
from HTMLParser import HTMLParser
class HTML2Text(HTMLParser):
from HTMLParser import HTMLParser
class HTML2Text(HTMLParser):
@@
-557,7
+559,16
@@
class HTML2Text(HTMLParser):
self.curdata = self.curdata + data.decode("utf-8")
def handle_charref(self, name):
self.curdata = self.curdata + data.decode("utf-8")
def handle_charref(self, name):
- entity = unichr(int(name))
+ try:
+ entity = unichr(int(name))
+ except:
+ if name[0] == 'x':
+ try:
+ entity = unichr(int('0%s' %(name,), 16))
+ except:
+ entity = u'#%s' %(name,)
+ else:
+ entity = u'#%s' %(name,)
self.curdata = self.curdata + unicode(entity.encode('utf-8'), \
"utf-8")
self.curdata = self.curdata + unicode(entity.encode('utf-8'), \
"utf-8")
@@
-672,7
+683,10
@@
def parse_and_deliver(maildir, url, statedir):
if item.has_key("content"):
content = item["content"][0]["value"]
else:
if item.has_key("content"):
content = item["content"][0]["value"]
else:
- content = item["summary"]
+ if item.has_key("description"):
+ content = item["description"]
+ else:
+ content = u''
md5sum = md5.md5(content.encode("utf-8")).hexdigest()
md5sum = md5.md5(content.encode("utf-8")).hexdigest()
@@
-725,7
+739,10
@@
def parse_and_deliver(maildir, url, statedir):
pass
msg.add_header("Date", createddate)
subj_gen = HTML2Text()
pass
msg.add_header("Date", createddate)
subj_gen = HTML2Text()
- subj_gen.feed(item["title"].encode("utf-8"))
+ title = item["title"].encode("utf-8")
+ title = re.sub(u'<', u'<', title)
+ title = re.sub(u'>', u'>', title)
+ subj_gen.feed(title)
msg.add_header("Subject", subj_gen.gettext())
msg.set_default_type("text/plain")
msg.add_header("Subject", subj_gen.gettext())
msg.set_default_type("text/plain")