elif self.dom.documentElement.tagName == 'rdf:RDF':
# this is an RDF document
self.news = RDFParse(self.dom)
+ elif self.dom.documentElement.tagName == 'feed':
+ # this seems to be an Atom feed
+ self.news = AtomParse(self.dom)
else:
- sys.stderr.write('DEBUG: XMLParse: Unknown XML document %s\n' % dom.documentElement.tagname)
+ sys.stderr.write('DEBUG: XMLParse: Unknown XML document \'%s\'\n' % self.dom.documentElement.tagName)
def parse(self):
"Return a list of Blog objects from the XML file we parsed"
self.bloglist = self.news.parse()
return self.bloglist
-class RDFParse:
+class Parse:
+ "Generic class for parsing XML feeds"
def __init__(self, dom):
self.dom = dom
self.root = dom.documentElement
-
+
def __retrieve_value__(self, fromNode):
+ "Retrieve a value from between two nodes"
for node in fromNode.childNodes:
if node.nodeType == 3:
return node.nodeValue
else:
- sys.stderr.write('DEBUG: RDFParse: Asked to retrieve value from wrong part of tree\n')
+ sys.stderr.write('DEBUG: Parse: Asked to retrieve value from wrong part of tree\n')
return None
-
+
+class AtomParse(Parse):
+ def parse(self):
+ channel = Blog()
+ for node in self.root.childNodes:
+ if node.nodeType == 1 and node.tagName == 'title':
+ channel.blogTitle = self.__retrieve_value__(node)
+ elif node.nodeType == 1 and node.tagName == 'link' and node.attributes.has_key('rel') and node.attributes['rel'].value == "alternate":
+ if node.attributes.has_key('href'):
+ channel.blogURL = node.attributes['href'].value
+ else:
+ sys.stderr.write('DEBUG: AtomParse: Could not find href for link, ignoring\n')
+ elif node.nodeType == 1 and node.tagName == 'entry':
+ # create an item and add it to the list
+ item = BlogItem()
+ channel.items.append(item)
+ # handlers for tags
+ for node2 in node.childNodes:
+ if node2.nodeType == 1 and node2.tagName == 'created':
+ date = self.__retrieve_value__(node2)
+ try:
+ item.itemDate = time.mktime(time.strptime(date, '%Y-%m-%dT%H:%M:%SZ')) + 28800
+ except:
+ sys.stderr.write("DEBUG: AtomParse: time string %s is unparseable\n" % date)
+ elif node2.nodeType == 1 and node2.tagName == 'link' and node2.attributes.has_key('rel') and node2.attributes['rel'].value == 'alternate':
+ if node2.attributes.has_key('href'):
+ item.itemURL = node2.attributes['href'].value
+ else:
+ sys.stderr.write('DEBUG: AtomParse: Could not find href for link, ignoring\n')
+ elif node2.nodeType == 1 and node2.tagName == 'title':
+ item.itemTitle = self.__retrieve_value__(node2)
+ elif node2.nodeType == 1 and node2.tagName == 'summary':
+ for node3 in node2.childNodes:
+ if node3.nodeType == 1 and node3.tagName == 'div':
+ item.contents = self.__retrieve_value__(node3)
+ return [channel]
+
+class RDFParse(Parse):
def parse(self):
channel = Blog()
for node in self.root.childNodes:
channel.items.append(item)
return [channel]
-class RSS2Parse:
- def __init__(self, dom):
- self.dom = dom
- self.root = dom.documentElement
-
- def __retrieve_value__(self, fromNode):
- "Returns the value from between two nodes, ie <node>text</node>"
- for node in fromNode.childNodes:
- if node.nodeType == 3:
- # this is the information contained within our node
- return node.nodeValue
- else:
- sys.stderr.write('DEBUG: RSS2Parse: Asked to retrieve value from wrong part of tree\n')
- return None
-
+class RSS2Parse(Parse):
def __parse_item__(self, fromNode):
"Returns a BlogItem collected from fromNode"
item = BlogItem()
with help from
<a href="http://www.gimp.org/">the Gimp</a>
and other tools.<br />
- It currently supports RSS v2 and RDF news
- syndication formats, and XHTML as an output
- format. Other formats can be added with ease.<br />
+ It currently supports RSSv2, RDF and Atom
+ (used by Blogger) news syndication formats,
+ as well as XHTML as an output format. Other
+ formats can be added with ease.<br />
Those interested in the source can check it out from
<a href="http://cvs.ucc.asn.au/cgi-bin/viewcvs.cgi/projects/planetucc/">
- UCC CVS</a>.
- It is currently in Beta.
+ UCC CVS</a>.<br />
+ <strong>Planet UCC can be considered BETA</strong>
</p>