X-Git-Url: https://git.ucc.asn.au/?p=planet-ucc.git;a=blobdiff_plain;f=XMLParse2.py;h=488a3443072c5aca543047bdd65f067b188bc2d7;hp=861b33322de0bd19c79489eea21da4369b5fc9a7;hb=HEAD;hpb=dc7db379d1b11f9de8894865689f1c626d62a523 diff --git a/XMLParse2.py b/XMLParse2.py index 861b333..488a344 100644 --- a/XMLParse2.py +++ b/XMLParse2.py @@ -31,6 +31,7 @@ class BlogItem: self.itemTitle = None self.itemDate = None self.itemURL = None + self.commentsURL = None self.contents = None class XMLParse: @@ -42,19 +43,20 @@ class XMLParse: "Return a single Blog object" item = Blog() if self.blogObject and self.blogObject.cache: - sys.stdout.write('Downloading feed %s...' % self.feedURL) + sys.stdout.write('Downloading feed %s... ' % self.feedURL) try: data = feedparser.parse(self.feedURL, self.blogObject.cache.etag, self.blogObject.cache.date) - sys.stdout.write('done.\n') + # check to see what we got returned + if data['items'] == [] and data['channel'] == {}: + sys.stdout.write('cached.\n') + return self.blogObject + else: + sys.stdout.write('done.\n') except: sys.stdout.write('failed.\n') return None - # check to see what we got returned - if data['items'] == [] and data['channel'] == {}: - sys.stdout.write('Feed %s is upto date.\n' % self.feedURL) - return self.blogObject else: - sys.stdout.write('Downloading feed from %s (no cache)...' % self.feedURL) + sys.stdout.write('Downloading feed (no cache) %s... ' % self.feedURL) try: data = feedparser.parse(self.feedURL) sys.stdout.write('done.\n') @@ -85,6 +87,10 @@ class XMLParse: item.blogURL = data['channel']['link'] else: item.blogURL = self.feedURL + if data['feed'].has_key ('image') and data['feed']['image'].has_key ('url'): + item.imageURL = data['feed']['image']['url'] + if data['feed'].has_key ('image') and data['feed']['image'].has_key ('link'): + item.imageLink = data['feed']['image']['link'] for entry in data['items']: blogItem = BlogItem() if entry.has_key('title'): @@ -95,13 +101,20 @@ class XMLParse: blogItem.itemURL = entry['link'] else: blogItem.itemURL = item.blogURL - if entry.has_key('date_parsed'): - blogItem.itemDate = time.mktime(entry['date_parsed']) + 28800 + if entry.has_key('modified_parsed'): + try: blogItem.itemDate = time.mktime(entry['modified_parsed']) + 28800 + except: blogItem.itemDate = 0 else: blogItem.itemDate = 0 - if entry.has_key('description'): + if entry.has_key('content'): + # get the contents of the first item with a text/html type + # no feeds without a text/html type have been encountered in the wild, but who knows + blogItem.contents = [content['value'] for content in entry['content'] if content['type'] == 'text/html'][0] + elif entry.has_key('description'): blogItem.contents = entry['description'] else: blogItem.contents = '(entry could not be retrieved)' + if entry.has_key ('comments'): + blogItem.commentsURL = entry['comments'] item.items.append(blogItem) return item