From: davyd Date: Mon, 15 Mar 2004 02:38:33 +0000 (+0000) Subject: This file is no longer needed. X-Git-Url: https://git.ucc.asn.au/?p=planet-ucc.git;a=commitdiff_plain;h=7518c2e9e4da03f4bac58e9729145dbbbe9fc9a6;hp=027c1e69c4f61325523bc5f25929320fa028fc85 This file is no longer needed. --- diff --git a/XMLParse.py b/XMLParse.py deleted file mode 100644 index 8ce8e40..0000000 --- a/XMLParse.py +++ /dev/null @@ -1,186 +0,0 @@ -# -# XMLParse.py -# -# Parse arbitrary XML news streams into an object type -# understandable by Planet UCC. -# -# (c) 2004, Davyd Madeley -# - -import sys, time -from xml.dom.minidom import parseString - -class Blog: - def __init__(self): - self.blogTitle = None - self.blogURL = None - self.imageURL = None - self.imageLink = None - self.items = [] - -class BlogItem: - def __init__(self): - self.itemTitle = None - self.itemDate = None - self.itemURL = None - self.contents = None - -class XMLParse: - def __init__(self, XMLString): - # parse our XML file - self.dom = parseString(XMLString) - self.bloglist = None - # find out what sort of XML format we're dealing with - if self.dom.documentElement.tagName == 'rss': - # this is some sort of RSS feed - # find out what version - if self.dom.documentElement.attributes.has_key('version'): - version = self.dom.documentElement.attributes['version'].value - if version == '2.0': - # this is an RSS2 document - self.news = RSS2Parse(self.dom) - else: - sys.stderr.write('DEBUG: XMLParse: Unknown RSS version %s\n' % version) - else: - sys.stderr.write('DEBUG: XMLParse: RSS document has no version information\n') - elif self.dom.documentElement.tagName == 'rdf:RDF': - # this is an RDF document - self.news = RDFParse(self.dom) - elif self.dom.documentElement.tagName == 'feed': - # this seems to be an Atom feed - self.news = AtomParse(self.dom) - else: - sys.stderr.write('DEBUG: XMLParse: Unknown XML document \'%s\'\n' % self.dom.documentElement.tagName) - - def parse(self): - "Return a list of Blog objects from the XML file we parsed" - # quick cache for XML parsing - if self.bloglist: - return self.bloglist - else: - self.bloglist = self.news.parse() - return self.bloglist - -class Parse: - "Generic class for parsing XML feeds" - def __init__(self, dom): - self.dom = dom - self.root = dom.documentElement - - def __retrieve_value__(self, fromNode): - "Retrieve a value from between two nodes" - for node in fromNode.childNodes: - if node.nodeType == 3: - return node.nodeValue - else: - sys.stderr.write('DEBUG: Parse: Asked to retrieve value from wrong part of tree\n') - return None - -class AtomParse(Parse): - def parse(self): - channel = Blog() - for node in self.root.childNodes: - if node.nodeType == 1 and node.tagName == 'title': - channel.blogTitle = self.__retrieve_value__(node) - elif node.nodeType == 1 and node.tagName == 'link' and node.attributes.has_key('rel') and node.attributes['rel'].value == "alternate": - if node.attributes.has_key('href'): - channel.blogURL = node.attributes['href'].value - else: - sys.stderr.write('DEBUG: AtomParse: Could not find href for link, ignoring\n') - elif node.nodeType == 1 and node.tagName == 'entry': - # create an item and add it to the list - item = BlogItem() - channel.items.append(item) - # handlers for tags - for node2 in node.childNodes: - if node2.nodeType == 1 and node2.tagName == 'created': - date = self.__retrieve_value__(node2) - try: - item.itemDate = time.mktime(time.strptime(date, '%Y-%m-%dT%H:%M:%SZ')) + 28800 - except: - sys.stderr.write("DEBUG: AtomParse: time string %s is unparseable\n" % date) - elif node2.nodeType == 1 and node2.tagName == 'link' and node2.attributes.has_key('rel') and node2.attributes['rel'].value == 'alternate': - if node2.attributes.has_key('href'): - item.itemURL = node2.attributes['href'].value - else: - sys.stderr.write('DEBUG: AtomParse: Could not find href for link, ignoring\n') - elif node2.nodeType == 1 and node2.tagName == 'title': - item.itemTitle = self.__retrieve_value__(node2) - elif node2.nodeType == 1 and node2.tagName == 'summary': - for node3 in node2.childNodes: - if node3.nodeType == 1 and node3.tagName == 'div': - item.contents = self.__retrieve_value__(node3) - return [channel] - -class RDFParse(Parse): - def parse(self): - channel = Blog() - for node in self.root.childNodes: - if node.nodeType == 1 and node.tagName == 'channel': - for node2 in node.childNodes: - if node2.nodeType == 1 and node2.tagName == 'title': - channel.blogTitle = self.__retrieve_value__(node2) - elif node2.nodeType == 1 and node2.tagName == 'link': - channel.blogURL = self.__retrieve_value__(node2) - elif node.nodeType == 1 and node.tagName == 'item': - item = BlogItem() - for node2 in node.childNodes: - if node2.nodeType == 1 and node2.tagName == 'title': - item.itemTitle = self.__retrieve_value__(node2) - elif node2.nodeType == 1 and node2.tagName == 'link': - item.itemURL = self.__retrieve_value__(node2) - elif node2.nodeType == 1 and node2.tagName == 'dc:date': - date = self.__retrieve_value__(node2) - try: - item.itemDate = time.mktime(time.strptime(date, '%Y-%m-%dT%H:%M:%S+08:00')) - except: - sys.stderr.write("DEBUG: RDFParse: time string %s unparseable\n" % date) - elif node2.nodeType == 1 and node2.tagName == 'description': - item.contents = self.__retrieve_value__(node2) - channel.items.append(item) - return [channel] - -class RSS2Parse(Parse): - def __parse_item__(self, fromNode): - "Returns a BlogItem collected from fromNode" - item = BlogItem() - for node in fromNode.childNodes: - if node.nodeType == 1 and node.tagName == 'title': - item.itemTitle = self.__retrieve_value__(node) - elif node.nodeType == 1 and node.tagName == 'pubDate': - try: - item.itemDate = time.mktime(time.strptime(self.__retrieve_value__(node), '%a, %d %b %Y %H:%M:%S GMT')) + 28800 - except: - try: - item.itemDate = time.mktime(time.strptime(self.__retrieve_value__(node), '%a, %d %b %Y %H:%M:%S +0000')) + 28800 - except: - sys.stderr.write("DEBUG: RSS2Parse: time string %s unparseable\n" % self.__retrieve_value__(node)) - elif node.nodeType == 1 and node.tagName == 'link': - item.itemURL = self.__retrieve_value__(node) - elif node.nodeType == 1 and node.tagName == 'description': - item.contents = self.__retrieve_value__(node) - return item - - def parse(self): - "Returns a list of Blog objects for parsing into an arbitrary data format." - channellist = [] - for node in self.root.childNodes: - if node.nodeType == 1 and node.tagName == 'channel': - channel = Blog() - channellist.append(channel) - # populate channel with information from the blog - for node2 in node.childNodes: - if node2.nodeType == 1 and node2.tagName == 'title': - channel.blogTitle = self.__retrieve_value__(node2) - elif node2.nodeType == 1 and node2.tagName == 'link': - channel.blogURL = self.__retrieve_value__(node2) - elif node2.nodeType == 1 and node2.tagName == 'image': - for node3 in node2.childNodes: - if node3.nodeType == 1 and node3.tagName == 'url': - channel.imageURL = self.__retrieve_value__(node3) - elif node3.nodeType == 1 and node3.tagName == 'link': - channel.imageLink = self.__retrieve_value__(node3) - elif node2.nodeType == 1 and node2.tagName == 'item': - item = self.__parse_item__(node2) - channel.items.append(item) - return channellist