From 100b7713bd366245264413eb99751b8b83b117dc Mon Sep 17 00:00:00 2001 From: davyd Date: Sat, 7 Feb 2004 19:28:09 +0000 Subject: [PATCH] Added Atom XML support --- XMLParse.py | 68 ++++++++++++++++++++++++++++++++++++---------------- feedlist | 1 + sidebar.html | 11 +++++---- 3 files changed, 55 insertions(+), 25 deletions(-) diff --git a/XMLParse.py b/XMLParse.py index a50925f..69edc6c 100644 --- a/XMLParse.py +++ b/XMLParse.py @@ -46,8 +46,11 @@ class XMLParse: elif self.dom.documentElement.tagName == 'rdf:RDF': # this is an RDF document self.news = RDFParse(self.dom) + elif self.dom.documentElement.tagName == 'feed': + # this seems to be an Atom feed + self.news = AtomParse(self.dom) else: - sys.stderr.write('DEBUG: XMLParse: Unknown XML document %s\n' % dom.documentElement.tagname) + sys.stderr.write('DEBUG: XMLParse: Unknown XML document \'%s\'\n' % self.dom.documentElement.tagName) def parse(self): "Return a list of Blog objects from the XML file we parsed" @@ -58,19 +61,58 @@ class XMLParse: self.bloglist = self.news.parse() return self.bloglist -class RDFParse: +class Parse: + "Generic class for parsing XML feeds" def __init__(self, dom): self.dom = dom self.root = dom.documentElement - + def __retrieve_value__(self, fromNode): + "Retrieve a value from between two nodes" for node in fromNode.childNodes: if node.nodeType == 3: return node.nodeValue else: - sys.stderr.write('DEBUG: RDFParse: Asked to retrieve value from wrong part of tree\n') + sys.stderr.write('DEBUG: Parse: Asked to retrieve value from wrong part of tree\n') return None - + +class AtomParse(Parse): + def parse(self): + channel = Blog() + for node in self.root.childNodes: + if node.nodeType == 1 and node.tagName == 'title': + channel.blogTitle = self.__retrieve_value__(node) + elif node.nodeType == 1 and node.tagName == 'link' and node.attributes.has_key('rel') and node.attributes['rel'].value == "alternate": + if node.attributes.has_key('href'): + channel.blogURL = node.attributes['href'].value + else: + sys.stderr.write('DEBUG: AtomParse: Could not find href for link, ignoring\n') + elif node.nodeType == 1 and node.tagName == 'entry': + # create an item and add it to the list + item = BlogItem() + channel.items.append(item) + # handlers for tags + for node2 in node.childNodes: + if node2.nodeType == 1 and node2.tagName == 'created': + date = self.__retrieve_value__(node2) + try: + item.itemDate = time.mktime(time.strptime(date, '%Y-%m-%dT%H:%M:%SZ')) + 28800 + except: + sys.stderr.write("DEBUG: AtomParse: time string %s is unparseable\n" % date) + elif node2.nodeType == 1 and node2.tagName == 'link' and node2.attributes.has_key('rel') and node2.attributes['rel'].value == 'alternate': + if node2.attributes.has_key('href'): + item.itemURL = node2.attributes['href'].value + else: + sys.stderr.write('DEBUG: AtomParse: Could not find href for link, ignoring\n') + elif node2.nodeType == 1 and node2.tagName == 'title': + item.itemTitle = self.__retrieve_value__(node2) + elif node2.nodeType == 1 and node2.tagName == 'summary': + for node3 in node2.childNodes: + if node3.nodeType == 1 and node3.tagName == 'div': + item.contents = self.__retrieve_value__(node3) + return [channel] + +class RDFParse(Parse): def parse(self): channel = Blog() for node in self.root.childNodes: @@ -98,21 +140,7 @@ class RDFParse: channel.items.append(item) return [channel] -class RSS2Parse: - def __init__(self, dom): - self.dom = dom - self.root = dom.documentElement - - def __retrieve_value__(self, fromNode): - "Returns the value from between two nodes, ie text" - for node in fromNode.childNodes: - if node.nodeType == 3: - # this is the information contained within our node - return node.nodeValue - else: - sys.stderr.write('DEBUG: RSS2Parse: Asked to retrieve value from wrong part of tree\n') - return None - +class RSS2Parse(Parse): def __parse_item__(self, fromNode): "Returns a BlogItem collected from fromNode" item = BlogItem() diff --git a/feedlist b/feedlist index 9a77148..e5f239d 100644 --- a/feedlist +++ b/feedlist @@ -15,3 +15,4 @@ Michael Deegan http://www.livejournal.com/users/leahcim/data/rss James Cox http://www.livejournal.com/users/coxymla/data/rss Mark Tearle http://www.livejournal.com/users/unfoldedreality/data/rss Tom Castiglione http://www.livejournal.com/users/zharradan/data/rss +Elaine Walker http://ariaflame.blogspot.com/atom.xml diff --git a/sidebar.html b/sidebar.html index d602a1a..ae4b57a 100644 --- a/sidebar.html +++ b/sidebar.html @@ -7,11 +7,12 @@ with help from the Gimp and other tools.
- It currently supports RSS v2 and RDF news - syndication formats, and XHTML as an output - format. Other formats can be added with ease.
+ It currently supports RSSv2, RDF and Atom + (used by Blogger) news syndication formats, + as well as XHTML as an output format. Other + formats can be added with ease.
Those interested in the source can check it out from - UCC CVS. - It is currently in Beta. + UCC CVS.
+ Planet UCC can be considered BETA

-- 2.20.1