Added Atom XML support

author davyd <davyd>

Sat, 7 Feb 2004 19:28:09 +0000 (19:28 +0000)

committer davyd <davyd>

Sat, 7 Feb 2004 19:28:09 +0000 (19:28 +0000)
author davyd <davyd>
Sat, 7 Feb 2004 19:28:09 +0000 (19:28 +0000)
committer davyd <davyd>
Sat, 7 Feb 2004 19:28:09 +0000 (19:28 +0000)
diff --git a/XMLParse.py b/XMLParse.py

index a50925f..69edc6c 100644 (file)
--- a/XMLParse.py
+++ b/XMLParse.py
@@ -46,8 +46,11 @@ class XMLParse:
                 elif self.dom.documentElement.tagName == 'rdf:RDF':
                         # this is an RDF document
                         self.news       = RDFParse(self.dom)
                 elif self.dom.documentElement.tagName == 'rdf:RDF':
                         # this is an RDF document
                         self.news       = RDFParse(self.dom)
+               elif self.dom.documentElement.tagName == 'feed':
+                       # this seems to be an Atom feed
+                       self.news       = AtomParse(self.dom)
                 else:
                 else:
-                       sys.stderr.write('DEBUG: XMLParse: Unknown XML document %s\n' % dom.documentElement.tagname)
+                       sys.stderr.write('DEBUG: XMLParse: Unknown XML document \'%s\'\n' % self.dom.documentElement.tagName)
                         
         def parse(self):
                 "Return a list of Blog objects from the XML file we parsed"
                         
         def parse(self):
                 "Return a list of Blog objects from the XML file we parsed"
@@ -58,19 +61,58 @@ class XMLParse:
                         self.bloglist   = self.news.parse()
                         return self.bloglist
  
                         self.bloglist   = self.news.parse()
                         return self.bloglist
  
-class RDFParse:
+class Parse:
+       "Generic class for parsing XML feeds"
         def __init__(self, dom):
                 self.dom        = dom
                 self.root       = dom.documentElement
         def __init__(self, dom):
                 self.dom        = dom
                 self.root       = dom.documentElement
-       
+
         def __retrieve_value__(self, fromNode):
         def __retrieve_value__(self, fromNode):
+               "Retrieve a value from between two nodes"
                 for node in fromNode.childNodes:
                         if node.nodeType == 3:
                                 return node.nodeValue
                         else:
                 for node in fromNode.childNodes:
                         if node.nodeType == 3:
                                 return node.nodeValue
                         else:
-                               sys.stderr.write('DEBUG: RDFParse: Asked to retrieve value from wrong part of tree\n')
+                               sys.stderr.write('DEBUG: Parse: Asked to retrieve value from wrong part of tree\n')
                                 return None
                                 return None
-       
+
+class AtomParse(Parse):        
+       def parse(self):
+               channel = Blog()
+               for node in self.root.childNodes:
+                       if node.nodeType == 1 and node.tagName == 'title':
+                               channel.blogTitle       = self.__retrieve_value__(node)
+                       elif node.nodeType == 1 and node.tagName == 'link' and node.attributes.has_key('rel') and node.attributes['rel'].value == "alternate":
+                               if node.attributes.has_key('href'):
+                                       channel.blogURL         = node.attributes['href'].value
+                               else:
+                                       sys.stderr.write('DEBUG: AtomParse: Could not find href for link, ignoring\n')
+                       elif node.nodeType == 1 and node.tagName == 'entry':
+                               # create an item and add it to the list
+                               item    = BlogItem()
+                               channel.items.append(item)
+                               # handlers for tags
+                               for node2 in node.childNodes:
+                                       if node2.nodeType == 1 and node2.tagName == 'created':
+                                               date    = self.__retrieve_value__(node2)
+                                               try:
+                                                       item.itemDate   = time.mktime(time.strptime(date, '%Y-%m-%dT%H:%M:%SZ')) + 28800
+                                               except:
+                                                       sys.stderr.write("DEBUG: AtomParse: time string %s is unparseable\n" % date)
+                                       elif node2.nodeType == 1 and node2.tagName == 'link' and node2.attributes.has_key('rel') and node2.attributes['rel'].value == 'alternate':
+                                               if node2.attributes.has_key('href'):
+                                                       item.itemURL    = node2.attributes['href'].value
+                                               else:
+                                                       sys.stderr.write('DEBUG: AtomParse: Could not find href for link, ignoring\n')
+                                       elif node2.nodeType == 1 and node2.tagName == 'title':
+                                               item.itemTitle  = self.__retrieve_value__(node2)
+                                       elif node2.nodeType == 1 and node2.tagName == 'summary':
+                                               for node3 in node2.childNodes:
+                                                       if node3.nodeType == 1 and node3.tagName == 'div':
+                                                               item.contents   = self.__retrieve_value__(node3)
+               return [channel]
+
+class RDFParse(Parse):
         def parse(self):
                 channel = Blog()
                 for node in self.root.childNodes:
         def parse(self):
                 channel = Blog()
                 for node in self.root.childNodes:
@@ -98,21 +140,7 @@ class RDFParse:
                                 channel.items.append(item)
                 return [channel]
  
                                 channel.items.append(item)
                 return [channel]
  
-class RSS2Parse:
-       def __init__(self, dom):
-               self.dom        = dom
-               self.root       = dom.documentElement
-
-       def __retrieve_value__(self, fromNode):
-               "Returns the value from between two nodes, ie <node>text</node>"
-               for node in fromNode.childNodes:
-                       if node.nodeType == 3:
-                               # this is the information contained within our node
-                               return node.nodeValue
-                       else:
-                               sys.stderr.write('DEBUG: RSS2Parse: Asked to retrieve value from wrong part of tree\n')
-                               return None
-       
+class RSS2Parse(Parse):
         def __parse_item__(self, fromNode):
                 "Returns a BlogItem collected from fromNode"
                 item    = BlogItem()
         def __parse_item__(self, fromNode):
                 "Returns a BlogItem collected from fromNode"
                 item    = BlogItem()
diff --git a/feedlist b/feedlist

index 9a77148..e5f239d 100644 (file)
--- a/feedlist
+++ b/feedlist
@@ -15,3 +15,4 @@ Michael Deegan        http://www.livejournal.com/users/leahcim/data/rss
  James Cox      http://www.livejournal.com/users/coxymla/data/rss
  Mark Tearle    http://www.livejournal.com/users/unfoldedreality/data/rss
  Tom Castiglione        http://www.livejournal.com/users/zharradan/data/rss
  James Cox      http://www.livejournal.com/users/coxymla/data/rss
  Mark Tearle    http://www.livejournal.com/users/unfoldedreality/data/rss
  Tom Castiglione        http://www.livejournal.com/users/zharradan/data/rss
+Elaine Walker  http://ariaflame.blogspot.com/atom.xml
diff --git a/sidebar.html b/sidebar.html

index d602a1a..ae4b57a 100644 (file)
--- a/sidebar.html
+++ b/sidebar.html
@@ -7,11 +7,12 @@
   with help from
   <a href="http://www.gimp.org/">the Gimp</a>
   and other tools.<br />
   with help from
   <a href="http://www.gimp.org/">the Gimp</a>
   and other tools.<br />
- It currently supports RSS v2 and RDF news
- syndication formats, and XHTML as an output
- format. Other formats can be added with ease.<br />
+ It currently supports RSSv2, RDF and Atom
+ (used by Blogger) news syndication formats,
+ as well as XHTML as an output format. Other
+ formats can be added with ease.<br />
   Those interested in the source can check it out from
   <a href="http://cvs.ucc.asn.au/cgi-bin/viewcvs.cgi/projects/planetucc/">
   Those interested in the source can check it out from
   <a href="http://cvs.ucc.asn.au/cgi-bin/viewcvs.cgi/projects/planetucc/">
- UCC CVS</a>.
- It is currently in Beta.
+ UCC CVS</a>.<br />
+ <strong>Planet UCC can be considered BETA</strong>
  </p>
  </p>
author	davyd <davyd>
	Sat, 7 Feb 2004 19:28:09 +0000 (19:28 +0000)
committer	davyd <davyd>
	Sat, 7 Feb 2004 19:28:09 +0000 (19:28 +0000)
XMLParse.py		patch \| blob \| history
feedlist		patch \| blob \| history
sidebar.html		patch \| blob \| history