From: davyd Date: Sat, 7 Feb 2004 12:18:06 +0000 (+0000) Subject: Added RDF Parsing X-Git-Url: https://git.ucc.asn.au/?p=planet-ucc.git;a=commitdiff_plain;h=75ba24f872f13e1d11b1153806cdf9a5269c3b7c Added RDF Parsing --- diff --git a/XMLParse.py b/XMLParse.py index a3ef6ae..7ca0f93 100644 --- a/XMLParse.py +++ b/XMLParse.py @@ -43,6 +43,9 @@ class XMLParse: sys.stderr.write('DEBUG: XMLParse: Unknown RSS version %s\n' % version) else: sys.stderr.write('DEBUG: XMLParse: RSS document has no version information\n') + elif self.dom.documentElement.tagName == 'rdf:RDF': + # this is an RDF document + self.news = RDFParse(self.dom) else: sys.stderr.write('DEBUG: XMLParse: Unknown XML document %s\n' % dom.documentElement.tagname) @@ -55,6 +58,46 @@ class XMLParse: self.bloglist = self.news.parse() return self.bloglist +class RDFParse: + def __init__(self, dom): + self.dom = dom + self.root = dom.documentElement + + def __retrieve_value__(self, fromNode): + for node in fromNode.childNodes: + if node.nodeType == 3: + return node.nodeValue + else: + sys.stderr.write('DEBUG: RDFParse: Asked to retrieve value from wrong part of tree\n') + return None + + def parse(self): + channel = Blog() + for node in self.root.childNodes: + if node.nodeType == 1 and node.tagName == 'channel': + for node2 in node.childNodes: + if node2.nodeType == 1 and node2.tagName == 'title': + channel.blogTitle = self.__retrieve_value__(node2) + elif node2.nodeType == 1 and node2.tagName == 'link': + channel.blogURL = self.__retrieve_value__(node2) + elif node.nodeType == 1 and node.tagName == 'item': + item = BlogItem() + for node2 in node.childNodes: + if node2.nodeType == 1 and node2.tagName == 'title': + item.itemTitle = self.__retrieve_value__(node2) + elif node2.nodeType == 1 and node2.tagName == 'link': + item.itemURL = self.__retrieve_value__(node2) + elif node2.nodeType == 1 and node2.tagName == 'dc:date': + date = self.__retrieve_value__(node2) + try: + item.itemDate = time.mktime(time.strptime(date, '%Y-%m-%dT%H:%M:%S+07:00')) + except: + sys.stderr.write("DEBUG: RDFParse: time string %s unparseable\n" % date) + elif node2.nodeType == 1 and node2.tagName == 'description': + item.contents = self.__retrieve_value__(node2) + channel.items.append(item) + return [channel] + class RSS2Parse: def __init__(self, dom): self.dom = dom @@ -83,7 +126,7 @@ class RSS2Parse: try: item.itemDate = time.mktime(time.strptime(self.__retrieve_value__(node), '%a, %d %b %Y %H:%M:%S +0000')) except: - sys.stderr.write("DEBUG: RSS2Parse: time string %s unparseable\n" % (self.__retrieve_value__(node))) + sys.stderr.write("DEBUG: RSS2Parse: time string %s unparseable\n" % self.__retrieve_value__(node)) elif node.nodeType == 1 and node.tagName == 'link': item.itemURL = self.__retrieve_value__(node) elif node.nodeType == 1 and node.tagName == 'description':