CSS tweak
[planet-ucc.git] / XMLParse2.py
1 #
2 # XMLParse2.py
3 #
4 # Parse arbitrary XML news streams into an object type
5 # understandable by Planet UCC.
6 # Now uses feedparser to parse 9 different types of RSS _and_ Atom
7 #
8 # (c) 2004, Davyd Madeley <[email protected]>
9 #
10
11 import sys, time
12 import CacheHandler
13 sys.path.insert(0, 'extra')
14 import feedparser
15
16 feedparser.USER_AGENT = "PlanetUCC/1.0b +http://planet.ucc.asn.au/ %s" % feedparser.USER_AGENT
17
18 class Blog:
19         def __init__(self):
20                 self.blogTitle  = None
21                 self.blogURL    = None
22                 self.feedURL    = None
23                 self.imageURL   = None
24                 self.imageLink  = None
25                 self.items      = []
26                 self.cache      = None
27
28 class BlogItem:
29         def __init__(self):
30                 self.itemTitle  = None
31                 self.itemDate   = None
32                 self.itemURL    = None
33                 self.contents   = None
34
35 class XMLParse:
36         def __init__(self, URL, blogObject):
37                 self.feedURL    = URL
38                 self.blogObject = blogObject
39                 
40         def parse(self):
41                 "Return a single Blog object"
42                 item            = Blog()
43                 if self.blogObject and self.blogObject.cache:
44                         sys.stdout.write('Downloading feed %s...' % self.feedURL)
45                         try:
46                                 data    = feedparser.parse(self.feedURL, self.blogObject.cache.etag, self.blogObject.cache.date)
47                                 sys.stdout.write('done.\n')
48                         except:
49                                 sys.stdout.write('failed.\n')
50                                 return None
51                         # check to see what we got returned
52                         if data['items'] == [] and data['channel'] == {}:
53                                 sys.stdout.write('Feed %s is upto date.\n' % self.feedURL)
54                                 return self.blogObject
55                 else:
56                         sys.stdout.write('Downloading feed from %s (no cache)...' % self.feedURL)
57                         try:
58                                 data    = feedparser.parse(self.feedURL)
59                                 sys.stdout.write('done.\n')
60                         except:
61                                 sys.stdout.write('failed.\n')
62                                 return None
63                 # create caching data
64                 try:
65                         cache           = CacheHandler.CacheObject()
66                         try:
67                                 cache.etag      = data['etag']
68                         except:
69                                 cache.etag      = None
70                         try:
71                                 cache.date      = data['modified']
72                         except:
73                                 cache.date      = None
74                         item.cache      = cache
75                 except:
76                         sys.stderr.write('DEBUG: XMLParse2: cache item generation failed\n')
77                         item.cache      = None
78                 # parse the return of data into a blog
79                 if data['channel'].has_key('title'):
80                         item.blogTitle  = data['channel']['title']
81                 else:
82                         item.blogTitle  = '(Unknown)'
83                 if data['channel'].has_key('link'):
84                         item.blogURL    = data['channel']['link']
85                 else:
86                         item.blogURL    = self.feedURL
87                 for entry in data['items']:
88                         blogItem                = BlogItem()
89                         if entry.has_key('title'):
90                                 blogItem.itemTitle      = entry['title']
91                         else:
92                                 blogItem.itemTitle      = '(Untitled)'
93                         if entry.has_key('link'):
94                                 blogItem.itemURL        = entry['link']
95                         else:
96                                 blogItem.itemURL        = item.blogURL
97                         if entry.has_key('date_parsed'):
98                                 blogItem.itemDate       = time.mktime(entry['date_parsed']) + 28800
99                         else:
100                                 blogItem.itemDate       = 0
101                         if entry.has_key('description'):
102                                 blogItem.contents       = entry['description']
103                         else:
104                                 blogItem.contents       = '(entry could not be retrieved)'
105                         item.items.append(blogItem)
106                 return item

UCC git Repository :: git.ucc.asn.au