4 # Parse arbitrary XML news streams into an object type
5 # understandable by Planet UCC.
6 # Now uses feedparser to parse 9 different types of RSS _and_ Atom
13 sys.path.insert(0, 'extra')
16 feedparser.USER_AGENT = "PlanetUCC/1.0b +http://planet.ucc.asn.au/ %s" % feedparser.USER_AGENT
34 self.commentsURL = None
38 def __init__(self, URL, blogObject):
40 self.blogObject = blogObject
43 "Return a single Blog object"
45 if self.blogObject and self.blogObject.cache:
46 sys.stdout.write('Downloading feed %s... ' % self.feedURL)
48 data = feedparser.parse(self.feedURL, self.blogObject.cache.etag, self.blogObject.cache.date)
49 # check to see what we got returned
50 if data['items'] == [] and data['channel'] == {}:
51 sys.stdout.write('cached.\n')
52 return self.blogObject
54 sys.stdout.write('done.\n')
56 sys.stdout.write('failed.\n')
59 sys.stdout.write('Downloading feed (no cache) %s... ' % self.feedURL)
61 data = feedparser.parse(self.feedURL)
62 sys.stdout.write('done.\n')
64 sys.stdout.write('failed.\n')
68 cache = CacheHandler.CacheObject()
70 cache.etag = data['etag']
74 cache.date = data['modified']
79 sys.stderr.write('DEBUG: XMLParse2: cache item generation failed\n')
81 # parse the return of data into a blog
82 if data['channel'].has_key('title'):
83 item.blogTitle = data['channel']['title']
85 item.blogTitle = '(Unknown)'
86 if data['channel'].has_key('link'):
87 item.blogURL = data['channel']['link']
89 item.blogURL = self.feedURL
90 if data['feed'].has_key ('image') and data['feed']['image'].has_key ('url'):
91 item.imageURL = data['feed']['image']['url']
92 if data['feed'].has_key ('image') and data['feed']['image'].has_key ('link'):
93 item.imageLink = data['feed']['image']['link']
94 for entry in data['items']:
96 if entry.has_key('title'):
97 blogItem.itemTitle = entry['title']
99 blogItem.itemTitle = '(Untitled)'
100 if entry.has_key('link'):
101 blogItem.itemURL = entry['link']
103 blogItem.itemURL = item.blogURL
104 if entry.has_key('modified_parsed'):
105 try: blogItem.itemDate = time.mktime(entry['modified_parsed']) + 28800
106 except: blogItem.itemDate = 0
108 blogItem.itemDate = 0
109 if entry.has_key('content'):
110 # get the contents of the first item with a text/html type
111 # no feeds without a text/html type have been encountered in the wild, but who knows
112 blogItem.contents = [content['value'] for content in entry['content'] if content['type'] == 'text/html'][0]
113 elif entry.has_key('description'):
114 blogItem.contents = entry['description']
116 blogItem.contents = '(entry could not be retrieved)'
117 if entry.has_key ('comments'):
118 blogItem.commentsURL = entry['comments']
119 item.items.append(blogItem)