Added RDF Parsing
authordavyd <davyd>
Sat, 7 Feb 2004 12:18:06 +0000 (12:18 +0000)
committerdavyd <davyd>
Sat, 7 Feb 2004 12:18:06 +0000 (12:18 +0000)
XMLParse.py

index a3ef6ae..7ca0f93 100644 (file)
@@ -43,6 +43,9 @@ class XMLParse:
                                        sys.stderr.write('DEBUG: XMLParse: Unknown RSS version %s\n' % version)
                        else:
                                sys.stderr.write('DEBUG: XMLParse: RSS document has no version information\n')
+               elif self.dom.documentElement.tagName == 'rdf:RDF':
+                       # this is an RDF document
+                       self.news       = RDFParse(self.dom)
                else:
                        sys.stderr.write('DEBUG: XMLParse: Unknown XML document %s\n' % dom.documentElement.tagname)
                        
@@ -55,6 +58,46 @@ class XMLParse:
                        self.bloglist   = self.news.parse()
                        return self.bloglist
 
+class RDFParse:
+       def __init__(self, dom):
+               self.dom        = dom
+               self.root       = dom.documentElement
+       
+       def __retrieve_value__(self, fromNode):
+               for node in fromNode.childNodes:
+                       if node.nodeType == 3:
+                               return node.nodeValue
+                       else:
+                               sys.stderr.write('DEBUG: RDFParse: Asked to retrieve value from wrong part of tree\n')
+                               return None
+       
+       def parse(self):
+               channel = Blog()
+               for node in self.root.childNodes:
+                       if node.nodeType == 1 and node.tagName == 'channel':
+                               for node2 in node.childNodes:
+                                       if node2.nodeType == 1 and node2.tagName == 'title':
+                                               channel.blogTitle       = self.__retrieve_value__(node2)
+                                       elif node2.nodeType == 1 and node2.tagName == 'link':
+                                               channel.blogURL         = self.__retrieve_value__(node2)
+                       elif node.nodeType == 1 and node.tagName == 'item':
+                               item    = BlogItem()
+                               for node2 in node.childNodes:
+                                       if node2.nodeType == 1 and node2.tagName == 'title':
+                                               item.itemTitle          = self.__retrieve_value__(node2)
+                                       elif node2.nodeType == 1 and node2.tagName == 'link':
+                                               item.itemURL            = self.__retrieve_value__(node2)
+                                       elif node2.nodeType == 1 and node2.tagName == 'dc:date':
+                                               date                    = self.__retrieve_value__(node2)
+                                               try:
+                                                       item.itemDate   = time.mktime(time.strptime(date, '%Y-%m-%dT%H:%M:%S+07:00'))
+                                               except:
+                                                       sys.stderr.write("DEBUG: RDFParse: time string %s unparseable\n" % date)
+                                       elif node2.nodeType == 1 and node2.tagName == 'description':
+                                               item.contents           = self.__retrieve_value__(node2)
+                               channel.items.append(item)
+               return [channel]
+
 class RSS2Parse:
        def __init__(self, dom):
                self.dom        = dom
@@ -83,7 +126,7 @@ class RSS2Parse:
                                        try:
                                                item.itemDate   = time.mktime(time.strptime(self.__retrieve_value__(node), '%a, %d %b %Y %H:%M:%S +0000'))
                                        except:
-                                               sys.stderr.write("DEBUG: RSS2Parse: time string %s unparseable\n" % (self.__retrieve_value__(node)))
+                                               sys.stderr.write("DEBUG: RSS2Parse: time string %s unparseable\n" % self.__retrieve_value__(node))
                        elif node.nodeType == 1 and node.tagName == 'link':
                                item.itemURL    = self.__retrieve_value__(node)
                        elif node.nodeType == 1 and node.tagName == 'description':

UCC git Repository :: git.ucc.asn.au