From: davyd Date: Sat, 7 Feb 2004 05:44:17 +0000 (+0000) Subject: Initial Upload X-Git-Url: https://git.ucc.asn.au/?p=planet-ucc.git;a=commitdiff_plain;h=b2d7ed4b39a5a03fb8f264e26b0d04a502928ddf Initial Upload --- b2d7ed4b39a5a03fb8f264e26b0d04a502928ddf diff --git a/XMLParse.py b/XMLParse.py new file mode 100644 index 0000000..a3ef6ae --- /dev/null +++ b/XMLParse.py @@ -0,0 +1,115 @@ +# +# XMLParse.py +# +# Parse arbitrary XML news streams into an object type +# understandable by Planet UCC. +# +# (c) 2004, Davyd Madeley +# + +import sys, time +from xml.dom.minidom import parseString + +class Blog: + def __init__(self): + self.blogTitle = None + self.blogURL = None + self.imageURL = None + self.imageLink = None + self.items = [] + +class BlogItem: + def __init__(self): + self.itemTitle = None + self.itemDate = None + self.itemURL = None + self.contents = None + +class XMLParse: + def __init__(self, XMLString): + # parse our XML file + self.dom = parseString(XMLString) + self.bloglist = None + # find out what sort of XML format we're dealing with + if self.dom.documentElement.tagName == 'rss': + # this is some sort of RSS feed + # find out what version + if self.dom.documentElement.attributes.has_key('version'): + version = self.dom.documentElement.attributes['version'].value + if version == '2.0': + # this is an RSS2 document + self.news = RSS2Parse(self.dom) + else: + sys.stderr.write('DEBUG: XMLParse: Unknown RSS version %s\n' % version) + else: + sys.stderr.write('DEBUG: XMLParse: RSS document has no version information\n') + else: + sys.stderr.write('DEBUG: XMLParse: Unknown XML document %s\n' % dom.documentElement.tagname) + + def parse(self): + "Return a list of Blog objects from the XML file we parsed" + # quick cache for XML parsing + if self.bloglist: + return self.bloglist + else: + self.bloglist = self.news.parse() + return self.bloglist + +class RSS2Parse: + def __init__(self, dom): + self.dom = dom + self.root = dom.documentElement + + def __retrieve_value__(self, fromNode): + "Returns the value from between two nodes, ie text" + for node in fromNode.childNodes: + if node.nodeType == 3: + # this is the information contained within our node + return node.nodeValue + else: + sys.stderr.write('DEBUG: RSS2Parse: Asked to retrieve value from wrong part of tree\n') + return None + + def __parse_item__(self, fromNode): + "Returns a BlogItem collected from fromNode" + item = BlogItem() + for node in fromNode.childNodes: + if node.nodeType == 1 and node.tagName == 'title': + item.itemTitle = self.__retrieve_value__(node) + elif node.nodeType == 1 and node.tagName == 'pubDate': + try: + item.itemDate = time.mktime(time.strptime(self.__retrieve_value__(node), '%a, %d %b %Y %H:%M:%S %Z')) + except: + try: + item.itemDate = time.mktime(time.strptime(self.__retrieve_value__(node), '%a, %d %b %Y %H:%M:%S +0000')) + except: + sys.stderr.write("DEBUG: RSS2Parse: time string %s unparseable\n" % (self.__retrieve_value__(node))) + elif node.nodeType == 1 and node.tagName == 'link': + item.itemURL = self.__retrieve_value__(node) + elif node.nodeType == 1 and node.tagName == 'description': + item.contents = self.__retrieve_value__(node) + return item + + def parse(self): + "Returns a list of Blog objects for parsing into an arbitrary data format." + channellist = [] + for node in self.root.childNodes: + if node.nodeType == 1 and node.tagName == 'channel': + channel = Blog() + channellist.append(channel) + # populate channel with information from the blog + for node2 in node.childNodes: + if node2.nodeType == 1 and node2.tagName == 'title': + channel.blogTitle = self.__retrieve_value__(node2) + elif node2.nodeType == 1 and node2.tagName == 'link': + channel.blogURL = self.__retrieve_value__(node2) + elif node2.nodeType == 1 and node2.tagName == 'image': + for node3 in node2.childNodes: + if node3.nodeType == 1 and node3.tagName == 'url': + channel.imageURL = self.__retrieve_value__(node3) + elif node3.nodeType == 1 and node3.tagName == 'link': + channel.imageLink = self.__retrieve_value__(node3) + elif node2.nodeType == 1 and node2.tagName == 'item': + item = self.__parse_item__(node2) + channel.items.append(item) + return channellist diff --git a/XMLWriter.py b/XMLWriter.py new file mode 100644 index 0000000..1311fb6 --- /dev/null +++ b/XMLWriter.py @@ -0,0 +1,126 @@ +# +# XMLWriter.py +# +# Generate arbitrary XML files +# +# (c) 2004, Davyd Madeley +# + +import time + +class PlanetItem: + def __init__(self, blog, item): + self.itemTitle = item.itemTitle + self.itemURL = item.itemURL + self.itemDate = item.itemDate + self.blogTitle = blog.blogTitle + self.blogURL = blog.blogURL + self.imageURL = blog.imageURL + self.imageLink = blog.imageLink + self.contents = item.contents + +class PlanetDate: + def __init__(self, date): + self.planetDate = date + self.items = [] + +class Planet: + def __init__(self, bloglist): + self.__bloglist__ = bloglist + self.__tainted__ = True + self.dates = [] + + def append(self, blog): + self.__bloglist__.append(blog) + self.__tainted__ = True + + def __getNext__(self, bloglist): + "Returns a PlanetItem reaped from a bloglist" + latestTime = 0 + holdingBlog = None + for blog in bloglist: + if len(blog.items) > 0 and blog.items[0].itemDate > latestTime: + latestTime = blog.items[0].itemDate + holdingBlog = blog + if holdingBlog == None: + return None + item = holdingBlog.items.pop(0) + return PlanetItem(holdingBlog, item) + + def sort(self): + if self.__tainted__: + # we need to sort the blogs into a single news feed + # copy the bloglist to a working symbol + bloglist = self.__bloglist__ + [] + lastDate = -1 + workingDate = None + self.dates = [] + while True: + lastItem = self.__getNext__(bloglist) + if lastItem == None: + break + # this checks to see if it's a new day + if time.localtime(lastItem.itemDate) != lastDate: + workingDate = PlanetDate(lastItem.itemDate) + self.dates.append(workingDate) + # append the item to the current date + workingDate.items.append(lastItem) + self.__tainted__ = False + return self.dates + + +class XMLWriter: + def __init__(self, doctype, bloglist): + self.planet = Planet(bloglist) + self.items = self.planet.sort() + # doctype should be something like XMLWriter.XHTMLWriter + self.writer = doctype(self.items) + + def write(self): + output = self.writer.write() + return output + +class XHTMLWriter: + def __init__(self, planet): + self.planet = planet + self.maxitems = 100 + + def __write_item__(self, item): + output = '' + output += '
\n' + output += '

%s: %s

\n' % (item.blogTitle, item.itemTitle) + output += '

\n' + output += '(%s)\n' % time.strftime('%A %B %d, %Y %H:%M %Z', time.localtime(item.itemDate)) + output += '

\n' + output += '

\n' + output += item.contents + output += '\n

\n' + return output + + def write(self): + itemcount = 0 + output = '' + output += '\n' + output += '\n' + output += '\n' + output += '\n' + output += 'Planet UCC\n' + # XXX: we'll want a style sheet in here + output += '\n' + output += '\n' + # XXX: we want some stuff in here, I'm sure + for date in self.planet: + output += '

%s

\n' % time.strftime('%A %B %d, %Y', time.localtime(date.planetDate)) + for item in date.items: + output += self.__write_item__(item) + # see how many items we've written + itemcount += 1 + if itemcount >= self.maxitems: + break + # again, check to see if we've written the maximum number of items + if itemcount >= self.maxitems: + break + # XXX: we want further stuff here + output += '' + output += '' + return output