icalparse.py

   1 #!/usr/bin/python
   2 #
   3 # Copyright (c) 2010 James French <[email protected]>
   4 #
   5 # Permission is hereby granted, free of charge, to any person obtaining a copy
   6 # of this software and associated documentation files (the "Software"), to deal
   7 # in the Software without restriction, including without limitation the rights
   8 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
   9 # copies of the Software, and to permit persons to whom the Software is
  10 # furnished to do so, subject to the following conditions:
  11 #
  12 # The above copyright notice and this permission notice shall be included in
  13 # all copies or substantial portions of the Software.
  14 #
  15 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  21 # THE SOFTWARE.
  22
  23 import sys
  24 import urlparse
  25 import os
  26
  27
  28 class InvalidICS(Exception): pass
  29 class notJoined(Exception): pass
  30 class IncompleteICS(InvalidICS): pass
  31
  32
  33 def lineJoiner(oldcal):
  34         '''Takes a string containing a calendar and returns an array of its lines'''
  35
  36         if not oldcal[0:15] == 'BEGIN:VCALENDAR':
  37                 raise InvalidICS, "Does not appear to be a valid ICS file"
  38
  39         if not 'END:VCALENDAR' in oldcal[-15:-1]:
  40                 raise IncompleteICS, "File appears to be incomplete"
  41
  42         if list(oldcal) == oldcal:
  43                 oldcal = '\r\n'.join(oldcal)
  44
  45         oldcal = oldcal.replace('\r\n ', '').replace('\r\n\t','')
  46         return oldcal.strip().split('\r\n')
  47
  48
  49 def lineFolder(oldcal, length=75):
  50         '''Folds content lines to a specified length, returns a list'''
  51
  52         if length > 75:
  53                 sys.stderr.write('WARN: lines > 75 octets are not RFC compliant\n')
  54
  55         cal = []
  56         sl = length - 1
  57
  58         for line in oldcal:
  59                 # Line fits inside length, do nothing
  60                 if len(line.rstrip()) <= length:
  61                         cal.append(line)
  62                 else:
  63                         brokenline = [line[0:length] + '\r\n']
  64                         ll = length
  65                         while ll < len(line.rstrip('\r\n')) + 1:
  66                                 brokenline.append(' ' + line[ll:sl+ll].rstrip('\r\n') + '\r\n')
  67                                 ll += sl
  68                         cal += brokenline
  69
  70         return cal
  71
  72
  73 def splitFields(cal):
  74         '''Takes a list of lines in a calendar file and returns a list of key, value pairs'''
  75
  76         ical = [tuple(x.split(':',1)) for x in cal]
  77
  78         # Check that we got 2 items on every line
  79         for line in ical:
  80                 if not len(line) == 2:
  81                         raise InvalidICS, "Didn't find a content key on: %s"%(line)
  82
  83         return ical
  84
  85
  86 def joinFields(ical):
  87         '''Takes a list of tuples that make up a calendar file and returns a list of lines'''
  88
  89         return [':'.join(x) for x in ical]
  90
  91
  92 def getContent(url='',stdin=False):
  93         '''Generic content retriever, DO NOT use this function in a CGI script as
  94         it can read from the local disk (which you probably don't want it to).
  95         '''
  96
  97         # Special case, if this is a HTTP url, return the data from it using
  98         # the HTTP functions which attempt to play a bit nicer.
  99         parsedURL = urlparse.urlparse(url)
 100         if 'http' in parsedURL[0]: return getHTTPContent(url)
 101
 102         if stdin:
 103                 content = sys.stdin.read()
 104                 return content
 105
 106         if not parsedURL[0]:
 107                 try: content = open(os.path.abspath(url),'r').read()
 108                 except (IOError, OSError), e:
 109                         sys.stderr.write('%s\n'%e)
 110                         sys.exit(1)
 111                 return content
 112
 113         # If we've survived, use python's generic URL opening library to handle it
 114         import urllib2
 115         try:
 116                 res = urllib2.urlopen(url)
 117                 content = res.read()
 118                 res.close()
 119         except (urllib2.URLError, OSError), e:
 120                 sys.stderr.write('%s\n'%e)
 121                 sys.exit(1)
 122         return content
 123
 124
 125 def getHTTPContent(url='',cache='.httplib2-cache'):
 126         '''This function attempts to play nice when retrieving content from HTTP
 127         services. It's what you should use in a CGI script. It will (by default)
 128         slurp the first 20 bytes of the file and check that we are indeed looking
 129         at an ICS file before going for broke.'''
 130
 131         try:
 132                 import httplib2
 133         except ImportError:
 134                 import urllib2
 135
 136         if not url: return ''
 137
 138         if 'httplib2' in sys.modules:
 139                 try: h = httplib2.Http('.httplib2-cache')
 140                 except OSError: h = httplib2.Http()
 141         else: h = False
 142
 143         try:
 144                 if h: content = h.request(url)[1]
 145                 return content
 146         except ValueError, e:
 147                 sys.stderr.write('%s\n'%e)
 148                 sys.exit(1)
 149
 150         try:
 151                 content = urllib2.urlopen(url).read()
 152                 return content
 153         except (urllib2.URLError, OSError), e:
 154                 sys.stderr.write('%s\n'%e)
 155                 sys.exit(1)
 156
 157         return ''
 158
 159
 160 def generateRules():
 161         '''Attempts to load a series of rules into a list'''
 162         try:
 163                 import parserrules
 164         except ImportError:
 165                 return []
 166
 167         rules = [getattr(parserrules, rule) for rule in dir(parserrules) if callable(getattr(parserrules, rule))]
 168         return rules
 169
 170
 171 def applyRules(ical, rules=[], verbose=False):
 172         'Runs a series of rules on the lines in ical and mangles its output'
 173
 174         for rule in rules:
 175                 output = []
 176                 if rule.__doc__ and verbose:
 177                         print(rule.__doc__)
 178                 for line in ical:
 179                         try:
 180                                 out = rule(line[0],line[1])
 181                         except TypeError, e:
 182                                 output.append(line)
 183                                 print(e)
 184                                 continue
 185
 186                         # Drop lines that are boolean False
 187                         if not out and not out == None: continue
 188
 189                         # If the rule did something and is a tuple or a list we'll accept it
 190                         # otherwise, pay no attention to the man behind the curtain
 191                         try:
 192                                 if tuple(out) == out or list(out) == out and len(out) == 2:
 193                                         output.append(tuple(out))
 194                                 else:
 195                                         output.append(line)
 196                         except TypeError, e:
 197                                 output.append(line)
 198
 199                 ical = output
 200
 201         return ical
 202
 203
 204
 205 if __name__ == '__main__':
 206         from optparse import OptionParser
 207         # If the user passed us a 'stdin' argument, we'll go with that,
 208         # otherwise we'll try for a url opener
 209
 210         parser = OptionParser('usage: %prog [options] url')
 211         parser.add_option('-s', '--stdin', action='store_true', dest='stdin',
 212                 default=False, help='Take a calendar from standard input')
 213         parser.add_option('-o', '--output', dest='outfile', default='',
 214                 help='Specify output file (defaults to standard output)')
 215
 216         (options, args) = parser.parse_args()
 217
 218         if not args and not options.stdin:
 219                 parser.print_usage()
 220                 sys.exit(0)
 221         elif not options.stdin:
 222                 url = args[0]
 223         else:
 224                 url = ''
 225
 226         content = getContent(url, options.stdin)
 227         cal = lineJoiner(content)
 228         ical = applyRules(splitFields(cal), generateRules())
 229         output = lineFolder(joinFields(ical))
 230         print output