Added feeds, caching tweaks and sorting feedlist alphabetically
[planet-ucc.git] / XMLParse2.py
1 #
2 # XMLParse2.py
3 #
4 # Parse arbitrary XML news streams into an object type
5 # understandable by Planet UCC.
6 # Now uses feedparser to parse 9 different types of RSS _and_ Atom
7 #
8 # (c) 2004, Davyd Madeley <[email protected]>
9 #
10
11 import sys, time
12 import CacheHandler
13 sys.path.insert(0, 'extra')
14 import feedparser
15
16 class Blog:
17         def __init__(self):
18                 self.blogTitle  = None
19                 self.blogURL    = None
20                 self.feedURL    = None
21                 self.imageURL   = None
22                 self.imageLink  = None
23                 self.items      = []
24                 self.cache      = None
25
26 class BlogItem:
27         def __init__(self):
28                 self.itemTitle  = None
29                 self.itemDate   = None
30                 self.itemURL    = None
31                 self.contents   = None
32
33 class XMLParse:
34         def __init__(self, URL, blogObject):
35                 self.feedURL    = URL
36                 self.blogObject = blogObject
37                 
38         def parse(self):
39                 "Return a single Blog object"
40                 item            = Blog()
41                 if self.blogObject and self.blogObject.cache:
42                         sys.stdout.write('Downloading feed %s...' % self.feedURL)
43                         try:
44                                 data    = feedparser.parse(self.feedURL, self.blogObject.cache.etag, self.blogObject.cache.date)
45                                 sys.stdout.write('done.\n')
46                         except:
47                                 sys.stdout.write('failed.\n')
48                                 return None
49                         # check to see what we got returned
50                         if data['items'] == [] and data['channel'] == {}:
51                                 sys.stdout.write('Feed %s is upto date.\n' % self.feedURL)
52                                 return self.blogObject
53                 else:
54                         sys.stdout.write('Downloading feed from %s (no cache)...' % self.feedURL)
55                         try:
56                                 data    = feedparser.parse(self.feedURL)
57                                 sys.stdout.write('done.\n')
58                         except:
59                                 sys.stdout.write('failed.\n')
60                                 return None
61                 # create caching data
62                 try:
63                         cache           = CacheHandler.CacheObject()
64                         cache.etag      = data['etag']
65                         cache.date      = data['modified']
66                         item.cache      = cache
67                 except:
68                         item.cache      = None
69                 # parse the return of data into a blog
70                 if data['channel'].has_key('title'):
71                         item.blogTitle  = data['channel']['title']
72                 else:
73                         item.blogTitle  = '(Unknown)'
74                 if data['channel'].has_key('link'):
75                         item.blogURL    = data['channel']['link']
76                 else:
77                         item.blogURL    = self.feedURL
78                 for entry in data['items']:
79                         blogItem                = BlogItem()
80                         if entry.has_key('title'):
81                                 blogItem.itemTitle      = entry['title']
82                         else:
83                                 blogItem.itemTitle      = '(Untitled)'
84                         if entry.has_key('link'):
85                                 blogItem.itemURL        = entry['link']
86                         else:
87                                 blogItem.itemURL        = item.blogURL
88                         if entry.has_key('date_parsed'):
89                                 blogItem.itemDate       = time.mktime(entry['date_parsed']) + 28800
90                         else:
91                                 blogItem.itemDate       = 0
92                         if entry.has_key('description'):
93                                 blogItem.contents       = entry['description']
94                         else:
95                                 blogItem.contents       = '(entry could not be retrieved)'
96                         item.items.append(blogItem)
97                 return item

UCC git Repository :: git.ucc.asn.au