2004-11-02 Davyd Madeley <[email protected]>
[planet-ucc.git] / update-planet
1 #!/usr/bin/python
2 #
3 # update-planet
4 #
5 # Downloads feeds from the URLs specified and generates the XHTML files.
6 #
7 # (c) 2004, Davyd Madeley <[email protected]>
8 #
9
10 # standard python modules
11 import sys, codecs
12 # planetUCC modules
13 import XMLParse2 as XMLParse, XMLWriter, CacheHandler
14 # planetUCC output plugins
15 import XHTMLWriter, RSS2Writer, RSS1Writer, FOAFWriter, OPMLWriter
16
17 # step 1: read in the config and check each object from cache
18 cache   = CacheHandler.CacheHandler()
19 feeds   = []
20
21 for feed in open('feedlist').readlines():
22         if feed.strip()[0] != '#':
23                 storage         = feed.strip().split('\t')
24                 name, feed      = storage[0], storage[-1]
25                 try:
26                         feeds.append((name, feed, cache.getBlog(name, feed)))
27                 except:
28                         sys.stderr.write('DEBUG: update-planet: something went wrong retrieving feed\n')
29
30 # step 2: process each feed
31 tainted = False
32 blogs   = []
33 for feed in feeds:
34         # XMLParse2 takes two paramaters, a URL and a CacheObject
35         blog    = XMLParse.XMLParse(feed[1], feed[2]).parse()
36         if blog:
37                 blog.blogName   = feed[0]
38                 blog.feedURL    = feed[1]
39                 blogs.append(blog)
40                 # check the old copy of the cache, vs the new copy
41                 if not feed[2] or not feed[2].cache or not blog.cache or feed[2].cache != blog.cache:
42                         tainted = True
43                 elif len(blog.items) > 0 and len(feed[2].items) > 0 and (blog.items[0].itemTitle != feed[2].items[0].itemTitle or blog.items[0].contents != feed[2].items[0].contents):
44                         tainted = True
45                 # write the cache back down to disk
46                 cache.storeBlog(blog)
47         else:
48                 pass
49
50 # step 3: sift the feeds
51 xmlwriter       = XMLWriter.XMLWriter(blogs)
52
53 # step 4: write feed to disk
54 try:
55         codecs.open('planet.html', 'wb', 'utf-8').write(xmlwriter.write(XHTMLWriter.XHTMLWriter))
56 except:
57         sys.stderr.write('DEBUG: update-planet: could not write planet.html, aborting\n')
58
59 try:
60         codecs.open('rss2.xml', 'wb', 'utf-8').write(xmlwriter.write(RSS2Writer.RSS2Writer))
61 except:
62         sys.stderr.write('DEBUG: update-planet: could not write rss2.xml, aborting\n')
63
64 try:
65         codecs.open('rss1.xml', 'wb', 'utf-8').write(xmlwriter.write(RSS1Writer.RSS1Writer))
66 except:
67         sys.stderr.write('DEBUG: update-planet: could not write rss1.xml, aborting\n')
68
69 try:
70         codecs.open('foaf.xml', 'wb', 'utf-8').write(xmlwriter.write(FOAFWriter.FOAFWriter))
71 except:
72         sys.stderr.write('DEBUG: update-planet: could not write foaf.xml, aborting\n')
73
74 try:
75         codecs.open('opml.xml', 'wb', 'utf-8').write(xmlwriter.write(OPMLWriter.OPMLWriter))
76 except:
77         sys.stderr.write('DEBUG: update-planet: could not write opml.xml, aborting\n')
78         raise
79
80 if not tainted:
81         sys.stdout.write('PlanetUCC: no objects have changed in the cache, not updating\n')
82         sys.exit(1)
83

UCC git Repository :: git.ucc.asn.au