--- /dev/null
+#
+# XMLParse.py
+#
+# Parse arbitrary XML news streams into an object type
+# understandable by Planet UCC.
+#
+#
+
+import sys, time
+from xml.dom.minidom import parseString
+
+class Blog:
+ def __init__(self):
+ self.blogTitle = None
+ self.blogURL = None
+ self.imageURL = None
+ self.imageLink = None
+ self.items = []
+
+class BlogItem:
+ def __init__(self):
+ self.itemTitle = None
+ self.itemDate = None
+ self.itemURL = None
+ self.contents = None
+
+class XMLParse:
+ def __init__(self, XMLString):
+ # parse our XML file
+ self.dom = parseString(XMLString)
+ self.bloglist = None
+ # find out what sort of XML format we're dealing with
+ if self.dom.documentElement.tagName == 'rss':
+ # this is some sort of RSS feed
+ # find out what version
+ if self.dom.documentElement.attributes.has_key('version'):
+ version = self.dom.documentElement.attributes['version'].value
+ if version == '2.0':
+ # this is an RSS2 document
+ self.news = RSS2Parse(self.dom)
+ else:
+ sys.stderr.write('DEBUG: XMLParse: Unknown RSS version %s\n' % version)
+ else:
+ sys.stderr.write('DEBUG: XMLParse: RSS document has no version information\n')
+ else:
+ sys.stderr.write('DEBUG: XMLParse: Unknown XML document %s\n' % dom.documentElement.tagname)
+
+ def parse(self):
+ "Return a list of Blog objects from the XML file we parsed"
+ # quick cache for XML parsing
+ if self.bloglist:
+ return self.bloglist
+ else:
+ self.bloglist = self.news.parse()
+ return self.bloglist
+
+class RSS2Parse:
+ def __init__(self, dom):
+ self.dom = dom
+ self.root = dom.documentElement
+
+ def __retrieve_value__(self, fromNode):
+ "Returns the value from between two nodes, ie <node>text</node>"
+ for node in fromNode.childNodes:
+ if node.nodeType == 3:
+ # this is the information contained within our node
+ return node.nodeValue
+ else:
+ sys.stderr.write('DEBUG: RSS2Parse: Asked to retrieve value from wrong part of tree\n')
+ return None
+
+ def __parse_item__(self, fromNode):
+ "Returns a BlogItem collected from fromNode"
+ item = BlogItem()
+ for node in fromNode.childNodes:
+ if node.nodeType == 1 and node.tagName == 'title':
+ item.itemTitle = self.__retrieve_value__(node)
+ elif node.nodeType == 1 and node.tagName == 'pubDate':
+ try:
+ item.itemDate = time.mktime(time.strptime(self.__retrieve_value__(node), '%a, %d %b %Y %H:%M:%S %Z'))
+ except:
+ try:
+ item.itemDate = time.mktime(time.strptime(self.__retrieve_value__(node), '%a, %d %b %Y %H:%M:%S +0000'))
+ except:
+ sys.stderr.write("DEBUG: RSS2Parse: time string %s unparseable\n" % (self.__retrieve_value__(node)))
+ elif node.nodeType == 1 and node.tagName == 'link':
+ item.itemURL = self.__retrieve_value__(node)
+ elif node.nodeType == 1 and node.tagName == 'description':
+ item.contents = self.__retrieve_value__(node)
+ return item
+
+ def parse(self):
+ "Returns a list of Blog objects for parsing into an arbitrary data format."
+ channellist = []
+ for node in self.root.childNodes:
+ if node.nodeType == 1 and node.tagName == 'channel':
+ channel = Blog()
+ channellist.append(channel)
+ # populate channel with information from the blog
+ for node2 in node.childNodes:
+ if node2.nodeType == 1 and node2.tagName == 'title':
+ channel.blogTitle = self.__retrieve_value__(node2)
+ elif node2.nodeType == 1 and node2.tagName == 'link':
+ channel.blogURL = self.__retrieve_value__(node2)
+ elif node2.nodeType == 1 and node2.tagName == 'image':
+ for node3 in node2.childNodes:
+ if node3.nodeType == 1 and node3.tagName == 'url':
+ channel.imageURL = self.__retrieve_value__(node3)
+ elif node3.nodeType == 1 and node3.tagName == 'link':
+ channel.imageLink = self.__retrieve_value__(node3)
+ elif node2.nodeType == 1 and node2.tagName == 'item':
+ item = self.__parse_item__(node2)
+ channel.items.append(item)
+ return channellist
--- /dev/null
+#
+# XMLWriter.py
+#
+# Generate arbitrary XML files
+#
+#
+
+import time
+
+class PlanetItem:
+ def __init__(self, blog, item):
+ self.itemTitle = item.itemTitle
+ self.itemURL = item.itemURL
+ self.itemDate = item.itemDate
+ self.blogTitle = blog.blogTitle
+ self.blogURL = blog.blogURL
+ self.imageURL = blog.imageURL
+ self.imageLink = blog.imageLink
+ self.contents = item.contents
+
+class PlanetDate:
+ def __init__(self, date):
+ self.planetDate = date
+ self.items = []
+
+class Planet:
+ def __init__(self, bloglist):
+ self.__bloglist__ = bloglist
+ self.__tainted__ = True
+ self.dates = []
+
+ def append(self, blog):
+ self.__bloglist__.append(blog)
+ self.__tainted__ = True
+
+ def __getNext__(self, bloglist):
+ "Returns a PlanetItem reaped from a bloglist"
+ latestTime = 0
+ holdingBlog = None
+ for blog in bloglist:
+ if len(blog.items) > 0 and blog.items[0].itemDate > latestTime:
+ latestTime = blog.items[0].itemDate
+ holdingBlog = blog
+ if holdingBlog == None:
+ return None
+ item = holdingBlog.items.pop(0)
+ return PlanetItem(holdingBlog, item)
+
+ def sort(self):
+ if self.__tainted__:
+ # we need to sort the blogs into a single news feed
+ # copy the bloglist to a working symbol
+ bloglist = self.__bloglist__ + []
+ lastDate = -1
+ workingDate = None
+ self.dates = []
+ while True:
+ lastItem = self.__getNext__(bloglist)
+ if lastItem == None:
+ break
+ # this checks to see if it's a new day
+ if time.localtime(lastItem.itemDate) != lastDate:
+ workingDate = PlanetDate(lastItem.itemDate)
+ self.dates.append(workingDate)
+ # append the item to the current date
+ workingDate.items.append(lastItem)
+ self.__tainted__ = False
+ return self.dates
+
+
+class XMLWriter:
+ def __init__(self, doctype, bloglist):
+ self.planet = Planet(bloglist)
+ self.items = self.planet.sort()
+ # doctype should be something like XMLWriter.XHTMLWriter
+ self.writer = doctype(self.items)
+
+ def write(self):
+ output = self.writer.write()
+ return output
+
+class XHTMLWriter:
+ def __init__(self, planet):
+ self.planet = planet
+ self.maxitems = 100
+
+ def __write_item__(self, item):
+ output = ''
+ output += '<div id="item">\n'
+ output += '<h2>%s: %s</h2>\n' % (item.blogTitle, item.itemTitle)
+ output += '<p class="time">\n'
+ output += '(%s)\n' % time.strftime('%A %B %d, %Y %H:%M %Z', time.localtime(item.itemDate))
+ output += '</p>\n'
+ output += '<p class="body">\n'
+ output += item.contents
+ output += '\n</p>\n'
+ return output
+
+ def write(self):
+ itemcount = 0
+ output = ''
+ output += '<?xml version="1.0" encoding="UTF-8"?>\n'
+ output += '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n'
+ output += '<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" >\n'
+ output += '<head>\n'
+ output += '<title>Planet UCC</title>\n'
+ # XXX: we'll want a style sheet in here
+ output += '</head>\n'
+ output += '<body>\n'
+ # XXX: we want some stuff in here, I'm sure
+ for date in self.planet:
+ output += '<h1>%s</h1>\n' % time.strftime('%A %B %d, %Y', time.localtime(date.planetDate))
+ for item in date.items:
+ output += self.__write_item__(item)
+ # see how many items we've written
+ itemcount += 1
+ if itemcount >= self.maxitems:
+ break
+ # again, check to see if we've written the maximum number of items
+ if itemcount >= self.maxitems:
+ break
+ # XXX: we want further stuff here
+ output += '</body>'
+ output += '</html>'
+ return output