icalparse.py

   1 #!/usr/bin/python
   2 #
   3 # Copyright (c) 2010 James French <[email protected]>
   4 #
   5 # Permission is hereby granted, free of charge, to any person obtaining a copy
   6 # of this software and associated documentation files (the "Software"), to deal
   7 # in the Software without restriction, including without limitation the rights
   8 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
   9 # copies of the Software, and to permit persons to whom the Software is
  10 # furnished to do so, subject to the following conditions:
  11 #
  12 # The above copyright notice and this permission notice shall be included in
  13 # all copies or substantial portions of the Software.
  14 #
  15 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  21 # THE SOFTWARE.
  22
  23 import sys
  24 import urlparse
  25 import os
  26
  27 class InvalidICS(Exception): pass
  28 class notJoined(Exception): pass
  29
  30 def lineJoiner(oldcal):
  31         '''Takes a string containing a calendar and returns an array of its lines'''
  32
  33         if not oldcal[0:15] == 'BEGIN:VCALENDAR':
  34                 raise InvalidICS, "Does not appear to be a valid ICS file"
  35
  36         if list(oldcal) == oldcal:
  37                 oldcal = '\r\n'.join(oldcal)
  38
  39         oldcal.replace('\r\n ', '')
  40         return oldcal.split('\r\n')
  41
  42
  43 def lineFolder(oldcal, length=75):
  44         '''Folds content lines to a specified length, returns a list'''
  45
  46         if length > 75:
  47                 sys.stderr.write('WARN: lines > 75 octets are not RFC compliant\n')
  48
  49         cal = []
  50         sl = length - 1
  51
  52         for line in oldcal:
  53                 # Line fits inside length, do nothing
  54                 if len(line.rstrip()) <= length:
  55                         cal.append(line)
  56                 else:
  57                         brokenline = [line[0:length] + '\r\n']
  58                         ll = length
  59                         while ll < len(line.rstrip('\r\n')) + 1:
  60                                 brokenline.append(' ' + line[ll:sl+ll].rstrip('\r\n') + '\r\n')
  61                                 ll += sl
  62                         cal += brokenline
  63
  64         return cal
  65
  66 def getContent(url='',stdin=False):
  67         '''Generic content retriever, DO NOT use this function in a CGI script as
  68         it can read from the local disk (which you probably don't want it to).
  69         '''
  70
  71         # Special case, if this is a HTTP url, return the data from it using
  72         # the HTTP functions which attempt to play a bit nicer.
  73         parsedURL = urlparse.urlparse(url)
  74         if 'http' in parsedURL[0]: return getHTTPContent(url)
  75
  76         if stdin:
  77                 content = sys.stdin.read()
  78                 return content
  79
  80         if not parsedURL[0]:
  81                 try: content = open(os.path.abspath(url),'r').read()
  82                 except (IOError, OSError), e:
  83                         sys.stderr.write('%s\n'%e)
  84                         sys.exit(1)
  85                 return content
  86
  87         # If we've survived, use python's generic URL opening library to handle it
  88         import urllib2
  89         try:
  90                 res = urllib2.urlopen(url)
  91                 content = res.read()
  92                 res.close()
  93         except (urllib2.URLError, ValueError), e:
  94                 sys.stderr.write('%s\n'%e)
  95                 sys.exit(1)
  96         return content
  97
  98
  99 def getHTTPContent(url='',cache='.httplib2-cache'):
 100         '''This function attempts to play nice when retrieving content from HTTP
 101         services. It's what you should use in a CGI script. It will (by default)
 102         slurp the first 20 bytes of the file and check that we are indeed looking
 103         at an ICS file before going for broke.'''
 104
 105         try:
 106                 import httplib2
 107         except ImportError:
 108                 import urllib2
 109
 110         if not url: return ''
 111
 112         if 'httplib2' in sys.modules:
 113                 try: h = httplib2.Http('.httplib2-cache')
 114                 except OSError: h = httplib2.Http()
 115         else: h = False
 116
 117         try:
 118                 if h: content = h.request(url)[1]
 119                 return content
 120         except ValueError, e:
 121                 sys.stderr.write('%s\n'%e)
 122                 sys.exit(1)
 123
 124         try:
 125                 content = urllib2.urlopen(url).read()
 126                 return content
 127         except urllib2.URLError, e:
 128                 sys.stderr.write('%s\n'%e)
 129                 sys.exit(1)
 130
 131         return ''
 132
 133 if __name__ == '__main__':
 134         from optparse import OptionParser
 135         # If the user passed us a 'stdin' argument, we'll go with that,
 136         # otherwise we'll try for a url opener
 137
 138         parser = OptionParser('usage: %prog [options] url')
 139         parser.add_option('-s', '--stdin', action='store_true', dest='stdin',
 140                 default=False, help='Take a calendar from standard input')
 141         parser.add_option('-o', '--output', dest='outfile', default='',
 142                 help='Specify output file (defaults to standard output)')
 143
 144         (options, args) = parser.parse_args()
 145
 146         if not args and not options.stdin:
 147                 parser.print_usage()
 148                 sys.exit(0)
 149         elif not options.stdin:
 150                 url = args[0]
 151         else:
 152                 url = ''
 153
 154         content = getContent(url, options.stdin)
 155         cal = lineJoiner(content)
 156         print cal