Initial Upload
authordavyd <davyd>
Sat, 7 Feb 2004 05:44:17 +0000 (05:44 +0000)
committerdavyd <davyd>
Sat, 7 Feb 2004 05:44:17 +0000 (05:44 +0000)
XMLParse.py [new file with mode: 0644]
XMLWriter.py [new file with mode: 0644]

diff --git a/XMLParse.py b/XMLParse.py
new file mode 100644 (file)
index 0000000..a3ef6ae
--- /dev/null
@@ -0,0 +1,115 @@
+#
+# XMLParse.py
+#
+# Parse arbitrary XML news streams into an object type
+# understandable by Planet UCC.
+#
+# (c) 2004, Davyd Madeley <[email protected]>
+#
+
+import sys, time
+from xml.dom.minidom import parseString
+
+class Blog:
+       def __init__(self):
+               self.blogTitle  = None
+               self.blogURL    = None
+               self.imageURL   = None
+               self.imageLink  = None
+               self.items      = []
+
+class BlogItem:
+       def __init__(self):
+               self.itemTitle  = None
+               self.itemDate   = None
+               self.itemURL    = None
+               self.contents   = None
+
+class XMLParse:
+       def __init__(self, XMLString):
+               # parse our XML file
+               self.dom        = parseString(XMLString)
+               self.bloglist   = None
+               # find out what sort of XML format we're dealing with
+               if self.dom.documentElement.tagName == 'rss':
+                       # this is some sort of RSS feed
+                       # find out what version
+                       if self.dom.documentElement.attributes.has_key('version'):
+                               version = self.dom.documentElement.attributes['version'].value
+                               if version == '2.0':
+                                       # this is an RSS2 document
+                                       self.news       = RSS2Parse(self.dom)
+                               else:
+                                       sys.stderr.write('DEBUG: XMLParse: Unknown RSS version %s\n' % version)
+                       else:
+                               sys.stderr.write('DEBUG: XMLParse: RSS document has no version information\n')
+               else:
+                       sys.stderr.write('DEBUG: XMLParse: Unknown XML document %s\n' % dom.documentElement.tagname)
+                       
+       def parse(self):
+               "Return a list of Blog objects from the XML file we parsed"
+               # quick cache for XML parsing
+               if self.bloglist:
+                       return self.bloglist
+               else:
+                       self.bloglist   = self.news.parse()
+                       return self.bloglist
+
+class RSS2Parse:
+       def __init__(self, dom):
+               self.dom        = dom
+               self.root       = dom.documentElement
+
+       def __retrieve_value__(self, fromNode):
+               "Returns the value from between two nodes, ie <node>text</node>"
+               for node in fromNode.childNodes:
+                       if node.nodeType == 3:
+                               # this is the information contained within our node
+                               return node.nodeValue
+                       else:
+                               sys.stderr.write('DEBUG: RSS2Parse: Asked to retrieve value from wrong part of tree\n')
+                               return None
+       
+       def __parse_item__(self, fromNode):
+               "Returns a BlogItem collected from fromNode"
+               item    = BlogItem()
+               for node in fromNode.childNodes:
+                       if node.nodeType == 1 and node.tagName == 'title':
+                               item.itemTitle  = self.__retrieve_value__(node)
+                       elif node.nodeType == 1 and node.tagName == 'pubDate':
+                               try:
+                                       item.itemDate   = time.mktime(time.strptime(self.__retrieve_value__(node), '%a, %d %b %Y %H:%M:%S %Z'))
+                               except:
+                                       try:
+                                               item.itemDate   = time.mktime(time.strptime(self.__retrieve_value__(node), '%a, %d %b %Y %H:%M:%S +0000'))
+                                       except:
+                                               sys.stderr.write("DEBUG: RSS2Parse: time string %s unparseable\n" % (self.__retrieve_value__(node)))
+                       elif node.nodeType == 1 and node.tagName == 'link':
+                               item.itemURL    = self.__retrieve_value__(node)
+                       elif node.nodeType == 1 and node.tagName == 'description':
+                               item.contents   = self.__retrieve_value__(node)
+               return item
+
+       def parse(self):
+               "Returns a list of Blog objects for parsing into an arbitrary data format."
+               channellist     = []
+               for node in self.root.childNodes:
+                       if node.nodeType == 1 and node.tagName == 'channel':
+                               channel = Blog()
+                               channellist.append(channel)
+                               # populate channel with information from the blog
+                               for node2 in node.childNodes:
+                                       if node2.nodeType == 1 and node2.tagName == 'title':
+                                               channel.blogTitle       = self.__retrieve_value__(node2)
+                                       elif node2.nodeType == 1 and node2.tagName == 'link':
+                                               channel.blogURL         = self.__retrieve_value__(node2)
+                                       elif node2.nodeType == 1 and node2.tagName == 'image':
+                                               for node3 in node2.childNodes:
+                                                       if node3.nodeType == 1 and node3.tagName == 'url':
+                                                               channel.imageURL                = self.__retrieve_value__(node3)
+                                                       elif node3.nodeType == 1 and node3.tagName == 'link':
+                                                               channel.imageLink       = self.__retrieve_value__(node3)
+                                       elif node2.nodeType == 1 and node2.tagName == 'item':
+                                               item    = self.__parse_item__(node2)
+                                               channel.items.append(item)
+               return channellist
diff --git a/XMLWriter.py b/XMLWriter.py
new file mode 100644 (file)
index 0000000..1311fb6
--- /dev/null
@@ -0,0 +1,126 @@
+#
+# XMLWriter.py
+#
+# Generate arbitrary XML files
+#
+# (c) 2004, Davyd Madeley <[email protected]>
+#
+
+import time
+
+class PlanetItem:
+       def __init__(self, blog, item):
+               self.itemTitle  = item.itemTitle
+               self.itemURL    = item.itemURL
+               self.itemDate   = item.itemDate
+               self.blogTitle  = blog.blogTitle
+               self.blogURL    = blog.blogURL
+               self.imageURL   = blog.imageURL
+               self.imageLink  = blog.imageLink
+               self.contents   = item.contents
+
+class PlanetDate:
+       def __init__(self, date):
+               self.planetDate = date
+               self.items      = []
+
+class Planet:
+       def __init__(self, bloglist):
+               self.__bloglist__       = bloglist
+               self.__tainted__        = True
+               self.dates              = []
+               
+       def append(self, blog):
+               self.__bloglist__.append(blog)
+               self.__tainted__        = True
+       
+       def __getNext__(self, bloglist):
+               "Returns a PlanetItem reaped from a bloglist"
+               latestTime              = 0
+               holdingBlog             = None
+               for blog in bloglist:
+                       if len(blog.items) > 0 and blog.items[0].itemDate > latestTime:
+                               latestTime      = blog.items[0].itemDate
+                               holdingBlog     = blog
+               if holdingBlog == None:
+                       return None
+               item    = holdingBlog.items.pop(0)
+               return PlanetItem(holdingBlog, item)
+       
+       def sort(self):
+               if self.__tainted__:
+                       # we need to sort the blogs into a single news feed
+                       # copy the bloglist to a working symbol
+                       bloglist        = self.__bloglist__ + []
+                       lastDate        = -1
+                       workingDate     = None
+                       self.dates      = []
+                       while True:
+                               lastItem        = self.__getNext__(bloglist)
+                               if lastItem == None:
+                                       break
+                               # this checks to see if it's a new day
+                               if time.localtime(lastItem.itemDate) != lastDate:
+                                       workingDate     = PlanetDate(lastItem.itemDate)
+                                       self.dates.append(workingDate)
+                               # append the item to the current date
+                               workingDate.items.append(lastItem)      
+                       self.__tainted__        = False
+               return self.dates
+                       
+
+class XMLWriter:
+       def __init__(self, doctype, bloglist):
+               self.planet     = Planet(bloglist)
+               self.items      = self.planet.sort()
+               # doctype should be something like XMLWriter.XHTMLWriter
+               self.writer     = doctype(self.items)
+       
+       def write(self):
+               output          = self.writer.write()
+               return output
+
+class XHTMLWriter:
+       def __init__(self, planet):
+               self.planet     = planet
+               self.maxitems   = 100
+       
+       def __write_item__(self, item):
+               output  =       ''
+               output  +=      '<div id="item">\n'
+               output  +=      '<h2>%s: %s</h2>\n' % (item.blogTitle, item.itemTitle)
+               output  +=      '<p class="time">\n'
+               output  +=      '(%s)\n' % time.strftime('%A %B %d, %Y %H:%M %Z', time.localtime(item.itemDate))
+               output  +=      '</p>\n'
+               output  +=      '<p class="body">\n'
+               output  +=      item.contents
+               output  +=      '\n</p>\n'
+               return output
+       
+       def write(self):
+               itemcount       = 0
+               output  =       ''
+               output  +=      '<?xml version="1.0" encoding="UTF-8"?>\n'
+               output  +=      '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n'
+               output  +=      '<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" >\n'
+               output  +=      '<head>\n'
+               output  +=      '<title>Planet UCC</title>\n'
+               # XXX: we'll want a style sheet in here
+               output  +=      '</head>\n'
+               output  +=      '<body>\n'
+               # XXX: we want some stuff in here, I'm sure
+               for date in self.planet:
+                       output  += '<h1>%s</h1>\n' % time.strftime('%A %B %d, %Y', time.localtime(date.planetDate))
+                       for item in date.items:
+                               output  += self.__write_item__(item)
+                               # see how many items we've written
+                               itemcount += 1
+                               if itemcount >= self.maxitems:
+                                       break
+                       # again, check to see if we've written the maximum number of items
+                       if itemcount >= self.maxitems:
+                               break
+               # XXX: we want further stuff here
+               output  +=      '</body>'
+               output  +=      '</html>'
+               return output

UCC git Repository :: git.ucc.asn.au