#
-import sys, urllib2, codecs
-import XMLParse, XMLWriter
+# standard python modules
+import sys, codecs
+# planetUCC modules
+import XMLParse2 as XMLParse, XMLWriter, CacheHandler
+# planetUCC output plugins
+import XHTMLWriter, RSS2Writer, RSS1Writer, FOAFWriter, OPMLWriter
-# step 1: read in the config and download the feeds
+# step 1: read in the config and check each object from cache
+cache = CacheHandler.CacheHandler()
feeds = []
+
for feed in open('feedlist').readlines():
if feed.strip()[0] != '#':
storage = feed.strip().split('\t')
name, feed = storage[0], storage[-1]
- sys.stdout.write('Downloading feed "%s" from %s... ' % (name, feed))
try:
- # XXX: might want to consider some good caching code in here
- feeds.append((name, feed, urllib2.urlopen(feed).read()))
- sys.stdout.write('done.\n')
+ feeds.append((name, feed, cache.getBlog(name, feed)))
except:
- sys.stdout.write('failed.\n')
+ sys.stderr.write('DEBUG: update-planet: something went wrong retrieving feed\n')
# step 2: process each feed
+tainted = False
blogs = []
for feed in feeds:
- xml = XMLParse.XMLParse(feed[2]).parse()
- for blog in xml:
- blog.blogTitle = feed[0]
- blogs += xml
+ # XMLParse2 takes two paramaters, a URL and a CacheObject
+ blog = XMLParse.XMLParse(feed[1], feed[2]).parse()
+ if blog:
+ blog.blogName = feed[0]
+ blog.feedURL = feed[1]
+ blogs.append(blog)
+ # check the old copy of the cache, vs the new copy
+ if not feed[2] or not feed[2].cache or not blog.cache or feed[2].cache != blog.cache:
+ tainted = True
+ elif len(blog.items) > 0 and len(feed[2].items) > 0 and (blog.items[0].itemTitle != feed[2].items[0].itemTitle or blog.items[0].contents != feed[2].items[0].contents):
+ tainted = True
+ # write the cache back down to disk
+ cache.storeBlog(blog)
+ else:
+ pass
+
+# step 3: sift the feeds
+xmlwriter = XMLWriter.XMLWriter(blogs)
-# step 3: write feed to disk
+# step 4: write feed to disk
try:
- codecs.open('planet.html', 'wb', 'utf-8').write(XMLWriter.XMLWriter(XMLWriter.XHTMLWriter, blogs).write())
+ codecs.open('planet.html', 'wb', 'utf-8').write(xmlwriter.write(XHTMLWriter.XHTMLWriter))
except:
sys.stderr.write('DEBUG: update-planet: could not write planet.html, aborting\n')
raise
+
+try:
+ codecs.open('rss2.xml', 'wb', 'utf-8').write(xmlwriter.write(RSS2Writer.RSS2Writer))
+except:
+ sys.stderr.write('DEBUG: update-planet: could not write rss2.xml, aborting\n')
+
+try:
+ codecs.open('rss1.xml', 'wb', 'utf-8').write(xmlwriter.write(RSS1Writer.RSS1Writer))
+except:
+ sys.stderr.write('DEBUG: update-planet: could not write rss1.xml, aborting\n')
+
+try:
+ codecs.open('foaf.xml', 'wb', 'utf-8').write(xmlwriter.write(FOAFWriter.FOAFWriter))
+except:
+ sys.stderr.write('DEBUG: update-planet: could not write foaf.xml, aborting\n')
+
+try:
+ codecs.open('opml.xml', 'wb', 'utf-8').write(xmlwriter.write(OPMLWriter.OPMLWriter))
+except:
+ sys.stderr.write('DEBUG: update-planet: could not write opml.xml, aborting\n')
+
+
+if not tainted:
+ sys.stdout.write('PlanetUCC: no objects have changed in the cache, not updating\n')
+ sys.exit(1)
+