Merge branch 'newcache' of [email protected]:planet-ucc into newcache
[planet-ucc.git] / update-planet
1 #!/usr/bin/python
2 #
3 # update-planet
4 #
5 # Downloads feeds from the URLs specified and generates the XHTML files.
6 #
7 # (c) 2004, Davyd Madeley <[email protected]>
8 #
9
10 # standard python modules
11 import sys, codecs
12 # planetUCC modules
13 import XMLParse2 as XMLParse, XMLWriter, CacheHandler
14 # planetUCC output plugins
15 import XHTMLWriter, RSS2Writer, RSS1Writer, FOAFWriter, OPMLWriter
16
17 # step 1: read in the config and check each object from cache
18 cache   = CacheHandler.CacheHandler()
19 feeds   = []
20
21 for feed in open('feedlist').readlines():
22         if feed.strip()[0] != '#':
23                 storage         = feed.strip().split('\t')
24                 name, feed      = storage[0], storage[-1]
25                 try:
26                         feeds.append((name, feed, cache.getBlog(name, feed)))
27                 except:
28                         sys.stderr.write('DEBUG: update-planet: something went wrong retrieving feed\n')
29
30 # step 2: process each feed
31 tainted = False
32 blogs   = []
33 for feed in feeds:
34         # XMLParse2 takes two paramaters, a URL and a CacheObject
35         blog    = XMLParse.XMLParse(feed[1], feed[2]).parse()
36         if blog:
37                 blog.blogName   = feed[0]
38                 blog.feedURL    = feed[1]
39                 blogs.append(blog)
40                 # check the old copy of the cache, vs the new copy
41                 if not feed[2] or not feed[2].cache or not blog.cache or feed[2].cache != blog.cache:
42                         tainted = True
43                 elif len(blog.items) > 0 and len(feed[2].items) > 0 and (blog.items[0].itemTitle != feed[2].items[0].itemTitle or blog.items[0].contents != feed[2].items[0].contents):
44                         tainted = True
45                 # write the cache back down to disk
46                 cache.storeBlog(blog)
47         else:
48                 pass
49
50 # step 3: sift the feeds
51 xmlwriter       = XMLWriter.XMLWriter(blogs)
52
53 # step 4: write feed to disk
54 try:
55         codecs.open('planet.html', 'wb', 'utf-8').write(xmlwriter.write(XHTMLWriter.XHTMLWriter))
56 except:
57         sys.stderr.write('DEBUG: update-planet: could not write planet.html, aborting\n')
58         raise
59
60 try:
61         codecs.open('rss2.xml', 'wb', 'utf-8').write(xmlwriter.write(RSS2Writer.RSS2Writer))
62 except:
63         sys.stderr.write('DEBUG: update-planet: could not write rss2.xml, aborting\n')
64
65 try:
66         codecs.open('rss1.xml', 'wb', 'utf-8').write(xmlwriter.write(RSS1Writer.RSS1Writer))
67 except:
68         sys.stderr.write('DEBUG: update-planet: could not write rss1.xml, aborting\n')
69
70 try:
71         codecs.open('foaf.xml', 'wb', 'utf-8').write(xmlwriter.write(FOAFWriter.FOAFWriter))
72 except:
73         sys.stderr.write('DEBUG: update-planet: could not write foaf.xml, aborting\n')
74
75 try:
76         codecs.open('opml.xml', 'wb', 'utf-8').write(xmlwriter.write(OPMLWriter.OPMLWriter))
77 except:
78         sys.stderr.write('DEBUG: update-planet: could not write opml.xml, aborting\n')
79
80
81 if not tainted:
82         sys.stdout.write('PlanetUCC: no objects have changed in the cache, not updating\n')
83         sys.exit(1)
84

UCC git Repository :: git.ucc.asn.au