icalparse.py

   1 #!/usr/bin/python
   2 #
   3 # Copyright (c) 2010 James French <[email protected]>
   4 #
   5 # Permission is hereby granted, free of charge, to any person obtaining a copy
   6 # of this software and associated documentation files (the "Software"), to deal
   7 # in the Software without restriction, including without limitation the rights
   8 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
   9 # copies of the Software, and to permit persons to whom the Software is
  10 # furnished to do so, subject to the following conditions:
  11 #
  12 # The above copyright notice and this permission notice shall be included in
  13 # all copies or substantial portions of the Software.
  14 #
  15 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  21 # THE SOFTWARE.
  22
  23 import sys
  24 import urlparse
  25 import os
  26
  27
  28 class InvalidICS(Exception): pass
  29 class notJoined(Exception): pass
  30 class IncompleteICS(InvalidICS): pass
  31
  32
  33 def lineJoiner(oldcal):
  34         '''Takes a string containing a calendar and returns an array of its lines'''
  35
  36         if not oldcal[0:15] == 'BEGIN:VCALENDAR':
  37                 raise InvalidICS, "Does not appear to be a valid ICS file"
  38
  39         if not 'END:VCALENDAR' in oldcal[-15:-1]:
  40                 raise IncompleteICS, "File appears to be incomplete"
  41
  42         if list(oldcal) == oldcal:
  43                 oldcal = '\r\n'.join(oldcal)
  44
  45         oldcal = oldcal.replace('\r\n ', '').replace('\r\n\t','')
  46         return oldcal.strip().split('\r\n')
  47
  48
  49 def lineFolder(oldcal, length=75):
  50         '''Folds content lines to a specified length, returns a list'''
  51
  52         if length > 75:
  53                 sys.stderr.write('WARN: lines > 75 octets are not RFC compliant\n')
  54
  55         cal = []
  56         sl = length - 1
  57
  58         for line in oldcal:
  59                 # Line fits inside length, do nothing
  60                 if len(line.rstrip()) <= length:
  61                         cal.append(line)
  62                 else:
  63                         brokenline = [line[0:length]]
  64                         ll = length
  65                         while ll < len(line) + 1:
  66                                 brokenline.append(line[ll:sl+ll])
  67                                 ll += sl
  68                         brokenline = '\r\n '.join(brokenline)
  69                         cal.append(brokenline)
  70
  71         return cal
  72
  73
  74 def splitFields(cal):
  75         '''Takes a list of lines in a calendar file and returns a list of key, value pairs'''
  76
  77         ical = [tuple(x.split(':',1)) for x in cal]
  78
  79         # Check that we got 2 items on every line
  80         for line in ical:
  81                 if not len(line) == 2:
  82                         raise InvalidICS, "Didn't find a content key on: %s"%(line)
  83
  84         return ical
  85
  86
  87 def joinFields(ical):
  88         '''Takes a list of tuples that make up a calendar file and returns a list of lines'''
  89
  90         return [':'.join(x) for x in ical]
  91
  92
  93 def getContent(url='',stdin=False):
  94         '''Generic content retriever, DO NOT use this function in a CGI script as
  95         it can read from the local disk (which you probably don't want it to).
  96         '''
  97
  98         # Special case, if this is a HTTP url, return the data from it using
  99         # the HTTP functions which attempt to play a bit nicer.
 100         parsedURL = urlparse.urlparse(url)
 101         if 'http' in parsedURL[0]: return getHTTPContent(url)
 102
 103         if stdin:
 104                 content = sys.stdin.read()
 105                 return content
 106
 107         if not parsedURL[0]:
 108                 try: content = open(os.path.abspath(url),'r').read()
 109                 except (IOError, OSError), e:
 110                         sys.stderr.write('%s\n'%e)
 111                         sys.exit(1)
 112                 return content
 113
 114         # If we've survived, use python's generic URL opening library to handle it
 115         import urllib2
 116         try:
 117                 res = urllib2.urlopen(url)
 118                 content = res.read()
 119                 res.close()
 120         except (urllib2.URLError, OSError), e:
 121                 sys.stderr.write('%s\n'%e)
 122                 sys.exit(1)
 123         return content
 124
 125
 126 def getHTTPContent(url='',cache='.httplib2-cache'):
 127         '''This function attempts to play nice when retrieving content from HTTP
 128         services. It's what you should use in a CGI script. It will (by default)
 129         slurp the first 20 bytes of the file and check that we are indeed looking
 130         at an ICS file before going for broke.'''
 131
 132         try:
 133                 import httplib2
 134         except ImportError:
 135                 import urllib2
 136
 137         if not url: return ''
 138
 139         if 'httplib2' in sys.modules:
 140                 try: h = httplib2.Http('.httplib2-cache')
 141                 except OSError: h = httplib2.Http()
 142         else: h = False
 143
 144         try:
 145                 if h: content = h.request(url)[1]
 146                 return content
 147         except ValueError, e:
 148                 sys.stderr.write('%s\n'%e)
 149                 sys.exit(1)
 150
 151         try:
 152                 content = urllib2.urlopen(url).read()
 153                 return content
 154         except (urllib2.URLError, OSError), e:
 155                 sys.stderr.write('%s\n'%e)
 156                 sys.exit(1)
 157
 158         return ''
 159
 160
 161 def generateRules():
 162         '''Attempts to load a series of rules into a list'''
 163         try:
 164                 import parserrules
 165         except ImportError:
 166                 return []
 167
 168         rules = [getattr(parserrules, rule) for rule in dir(parserrules) if callable(getattr(parserrules, rule))]
 169         return rules
 170
 171
 172 def applyRules(ical, rules=[], verbose=False):
 173         'Runs a series of rules on the lines in ical and mangles its output'
 174
 175         for rule in rules:
 176                 output = []
 177                 if rule.__doc__ and verbose:
 178                         print(rule.__doc__)
 179                 for line in ical:
 180                         try:
 181                                 out = rule(line[0],line[1])
 182                         except TypeError, e:
 183                                 output.append(line)
 184                                 print(e)
 185                                 continue
 186
 187                         # Drop lines that are boolean False
 188                         if not out and not out == None: continue
 189
 190                         # If the rule did something and is a tuple or a list we'll accept it
 191                         # otherwise, pay no attention to the man behind the curtain
 192                         try:
 193                                 if tuple(out) == out or list(out) == out and len(out) == 2:
 194                                         output.append(tuple(out))
 195                                 else:
 196                                         output.append(line)
 197                         except TypeError, e:
 198                                 output.append(line)
 199
 200                 ical = output
 201
 202         return ical
 203
 204
 205 def writeOutput(cal, outfile=''):
 206         '''Takes a list of lines and outputs to the specified file'''
 207
 208         if not cal:
 209                 sys.stderr.write('Refusing to write out an empty file')
 210                 sys.exit(0)
 211
 212         if not outfile:
 213                 out = sys.stdout
 214         else:
 215                 try:
 216                         out = open(outfile, 'w')
 217                 except (IOError, OSError), e:
 218                         sys.stderr.write('%s\n'%e)
 219                         sys.exit(1)
 220
 221         if cal[-1]: cal.append('')
 222
 223         out.write('\r\n'.join(cal))
 224
 225         if not out == sys.stdout:
 226                 out.close()
 227
 228
 229 if __name__ == '__main__':
 230         from optparse import OptionParser
 231         # If the user passed us a 'stdin' argument, we'll go with that,
 232         # otherwise we'll try for a url opener
 233
 234         parser = OptionParser('usage: %prog [options] url')
 235         parser.add_option('-s', '--stdin', action='store_true', dest='stdin',
 236                 default=False, help='Take a calendar from standard input')
 237         parser.add_option('-o', '--output', dest='outfile', default='',
 238                 help='Specify output file (defaults to standard output)')
 239
 240         (options, args) = parser.parse_args()
 241
 242         if not args and not options.stdin:
 243                 parser.print_usage()
 244                 sys.exit(0)
 245         elif not options.stdin:
 246                 url = args[0]
 247         else:
 248                 url = ''
 249
 250         content = getContent(url, options.stdin)
 251         cal = lineJoiner(content)
 252         ical = applyRules(splitFields(cal), generateRules())
 253         output = lineFolder(joinFields(ical))
 254         writeOutput(output, options.outfile)