X-Git-Url: https://git.ucc.asn.au/?p=planet-ucc.git;a=blobdiff_plain;f=XMLParse2.py;h=978b41829b10971a592fc4bc45be45ddc72466cd;hp=1cec66838912963af64dffb38e37e2f6b20c3069;hb=450a7bfdaba8430a03cc88fb67b29ce40801a2dd;hpb=783caef083f5dcafaed71dfa3f3a8035109717e9 diff --git a/XMLParse2.py b/XMLParse2.py index 1cec668..978b418 100644 --- a/XMLParse2.py +++ b/XMLParse2.py @@ -13,8 +13,11 @@ import CacheHandler sys.path.insert(0, 'extra') import feedparser +feedparser.USER_AGENT = "PlanetUCC/1.0b +http://planet.ucc.asn.au/ %s" % feedparser.USER_AGENT + class Blog: def __init__(self): + self.blogName = None self.blogTitle = None self.blogURL = None self.feedURL = None @@ -28,6 +31,7 @@ class BlogItem: self.itemTitle = None self.itemDate = None self.itemURL = None + self.commentsURL = None self.contents = None class XMLParse: @@ -38,21 +42,21 @@ class XMLParse: def parse(self): "Return a single Blog object" item = Blog() - if self.blogObject: - sys.stdout.write('Downloading feed %s...' % self.feedURL) + if self.blogObject and self.blogObject.cache: + sys.stdout.write('Downloading feed %s... ' % self.feedURL) try: data = feedparser.parse(self.feedURL, self.blogObject.cache.etag, self.blogObject.cache.date) - sys.stdout.write('done.\n') + # check to see what we got returned + if data['items'] == [] and data['channel'] == {}: + sys.stdout.write('cached.\n') + return self.blogObject + else: + sys.stdout.write('done.\n') except: sys.stdout.write('failed.\n') - raise return None - # check to see what we got returned - if data['items'] == [] and data['channel'] == {}: - sys.stdout.write('Feed %s is upto date.\n' % self.feedURL) - return self.blogObject else: - sys.stdout.write('Downloading feed from %s (no cache)...' % self.feedURL) + sys.stdout.write('Downloading feed (no cache) %s... ' % self.feedURL) try: data = feedparser.parse(self.feedURL) sys.stdout.write('done.\n') @@ -62,10 +66,17 @@ class XMLParse: # create caching data try: cache = CacheHandler.CacheObject() - cache.etag = data['etag'] - cache.date = data['modified'] + try: + cache.etag = data['etag'] + except: + cache.etag = None + try: + cache.date = data['modified'] + except: + cache.date = None item.cache = cache except: + sys.stderr.write('DEBUG: XMLParse2: cache item generation failed\n') item.cache = None # parse the return of data into a blog if data['channel'].has_key('title'): @@ -76,6 +87,10 @@ class XMLParse: item.blogURL = data['channel']['link'] else: item.blogURL = self.feedURL + if data['feed'].has_key ('image') and data['feed']['image'].has_key ('url'): + item.imageURL = data['feed']['image']['url'] + if data['feed'].has_key ('image') and data['feed']['image'].has_key ('link'): + item.imageLink = data['feed']['image']['link'] for entry in data['items']: blogItem = BlogItem() if entry.has_key('title'): @@ -86,13 +101,16 @@ class XMLParse: blogItem.itemURL = entry['link'] else: blogItem.itemURL = item.blogURL - if entry.has_key('date_parsed'): - blogItem.itemDate = time.mktime(entry['date_parsed']) + if entry.has_key('modified_parsed'): + try: blogItem.itemDate = time.mktime(entry['modified_parsed']) + 28800 + except: blogItem.itemDate = 0 else: blogItem.itemDate = 0 if entry.has_key('description'): blogItem.contents = entry['description'] else: blogItem.contents = '(entry could not be retrieved)' + if entry.has_key ('comments'): + blogItem.commentsURL = entry['comments'] item.items.append(blogItem) return item