X-Git-Url: https://git.ucc.asn.au/?p=planet-ucc.git;a=blobdiff_plain;f=XMLParse2.py;h=978b41829b10971a592fc4bc45be45ddc72466cd;hp=563a0faad0f12adb0ac2b45b167887abf59c3073;hb=53cb3dba671c04eed6700fabda0f8ac7afb484b3;hpb=70b487ce56a52ae51eccb89d2e1ae57485a30f59 diff --git a/XMLParse2.py b/XMLParse2.py index 563a0fa..978b418 100644 --- a/XMLParse2.py +++ b/XMLParse2.py @@ -13,8 +13,11 @@ import CacheHandler sys.path.insert(0, 'extra') import feedparser +feedparser.USER_AGENT = "PlanetUCC/1.0b +http://planet.ucc.asn.au/ %s" % feedparser.USER_AGENT + class Blog: def __init__(self): + self.blogName = None self.blogTitle = None self.blogURL = None self.feedURL = None @@ -28,6 +31,7 @@ class BlogItem: self.itemTitle = None self.itemDate = None self.itemURL = None + self.commentsURL = None self.contents = None class XMLParse: @@ -39,19 +43,20 @@ class XMLParse: "Return a single Blog object" item = Blog() if self.blogObject and self.blogObject.cache: - sys.stdout.write('Downloading feed %s...' % self.feedURL) + sys.stdout.write('Downloading feed %s... ' % self.feedURL) try: data = feedparser.parse(self.feedURL, self.blogObject.cache.etag, self.blogObject.cache.date) - sys.stdout.write('done.\n') + # check to see what we got returned + if data['items'] == [] and data['channel'] == {}: + sys.stdout.write('cached.\n') + return self.blogObject + else: + sys.stdout.write('done.\n') except: sys.stdout.write('failed.\n') return None - # check to see what we got returned - if data['items'] == [] and data['channel'] == {}: - sys.stdout.write('Feed %s is upto date.\n' % self.feedURL) - return self.blogObject else: - sys.stdout.write('Downloading feed from %s (no cache)...' % self.feedURL) + sys.stdout.write('Downloading feed (no cache) %s... ' % self.feedURL) try: data = feedparser.parse(self.feedURL) sys.stdout.write('done.\n') @@ -82,6 +87,10 @@ class XMLParse: item.blogURL = data['channel']['link'] else: item.blogURL = self.feedURL + if data['feed'].has_key ('image') and data['feed']['image'].has_key ('url'): + item.imageURL = data['feed']['image']['url'] + if data['feed'].has_key ('image') and data['feed']['image'].has_key ('link'): + item.imageLink = data['feed']['image']['link'] for entry in data['items']: blogItem = BlogItem() if entry.has_key('title'): @@ -92,13 +101,16 @@ class XMLParse: blogItem.itemURL = entry['link'] else: blogItem.itemURL = item.blogURL - if entry.has_key('date_parsed'): - blogItem.itemDate = time.mktime(entry['date_parsed']) + 28800 + if entry.has_key('modified_parsed'): + try: blogItem.itemDate = time.mktime(entry['modified_parsed']) + 28800 + except: blogItem.itemDate = 0 else: blogItem.itemDate = 0 if entry.has_key('description'): blogItem.contents = entry['description'] else: blogItem.contents = '(entry could not be retrieved)' + if entry.has_key ('comments'): + blogItem.commentsURL = entry['comments'] item.items.append(blogItem) return item