--- /dev/null
+#
+# XMLParse2.py
+#
+# Parse arbitrary XML news streams into an object type
+# understandable by Planet UCC.
+# Now uses feedparser to parse 9 different types of RSS _and_ Atom
+#
+#
+
+import sys, time
+import CacheHandler
+sys.path.insert(0, 'extra')
+import feedparser
+
+class Blog:
+ def __init__(self):
+ self.blogTitle = None
+ self.blogURL = None
+ self.feedURL = None
+ self.imageURL = None
+ self.imageLink = None
+ self.items = []
+ self.cache = None
+
+class BlogItem:
+ def __init__(self):
+ self.itemTitle = None
+ self.itemDate = None
+ self.itemURL = None
+ self.contents = None
+
+class XMLParse:
+ def __init__(self, URL, blogObject):
+ self.feedURL = URL
+ self.blogObject = blogObject
+
+ def parse(self):
+ "Return a single Blog object"
+ item = Blog()
+ if self.blogObject:
+ sys.stdout.write('Downloading feed %s...' % self.feedURL)
+ try:
+ data = feedparser.parse(self.feedURL, self.blogObject.cache.etag, self.blogObject.cache.date)
+ sys.stdout.write('done.\n')
+ except:
+ sys.stdout.write('failed.\n')
+ raise
+ return None
+ # check to see what we got returned
+ if data['items'] == [] and data['channel'] == {}:
+ sys.stdout.write('Feed %s is upto date.\n' % self.feedURL)
+ return self.blogObject
+ else:
+ sys.stdout.write('Downloading feed from %s (no cache)...' % self.feedURL)
+ try:
+ data = feedparser.parse(self.feedURL)
+ sys.stdout.write('done.\n')
+ except:
+ sys.stdout.write('failed.\n')
+ return None
+ # create caching data
+ try:
+ cache = CacheHandler.CacheObject()
+ cache.etag = data['etag']
+ cache.date = data['modified']
+ item.cache = cache
+ except:
+ item.cache = None
+ # parse the return of data into a blog
+ if data['channel'].has_key('title'):
+ item.blogTitle = data['channel']['title']
+ else:
+ item.blogTitle = '(Unknown)'
+ if data['channel'].has_key('link'):
+ item.blogURL = data['channel']['link']
+ else:
+ item.blogURL = self.feedURL
+ for entry in data['items']:
+ blogItem = BlogItem()
+ if entry.has_key('title'):
+ blogItem.itemTitle = entry['title']
+ else:
+ blogItem.itemTitle = '(Untitled)'
+ if entry.has_key('link'):
+ blogItem.itemURL = entry['link']
+ else:
+ blogItem.itemURL = item.blogURL
+ if entry.has_key('date_parsed'):
+ blogItem.itemDate = time.mktime(entry['date_parsed'])
+ else:
+ blogItem.itemDate = 0
+ if entry.has_key('description'):
+ blogItem.contents = entry['description']
+ else:
+ blogItem.contents = '(entry could not be retrieved)'
+ item.items.append(blogItem)
+ return item