#!/usr/bin/python # # update-planet # # Downloads feeds from the URLs specified and generates the XHTML files. # # (c) 2004, Davyd Madeley # import sys, urllib2, codecs import XMLParse, XMLWriter # step 1: read in the config and download the feeds feeds = [] for feed in open('feedlist').readlines(): if feed.strip()[0] != '#': storage = feed.strip().split('\t') name, feed = storage[0], storage[-1] sys.stdout.write('Downloading feed "%s" from %s... ' % (name, feed)) try: # XXX: might want to consider some good caching code in here feeds.append((name, feed, urllib2.urlopen(feed).read())) sys.stdout.write('done.\n') except: sys.stdout.write('failed.\n') # step 2: process each feed blogs = [] for feed in feeds: xml = XMLParse.XMLParse(feed[2]).parse() for blog in xml: blog.blogTitle = feed[0] blogs += xml # step 3: write feed to disk try: codecs.open('planet.html', 'wb', 'utf-8').write(XMLWriter.XMLWriter(XMLWriter.XHTMLWriter, blogs).write()) except: sys.stderr.write('DEBUG: update-planet: could not write planet.html, aborting\n') raise