icalparse.py

   1 #!/usr/bin/python
   2 #
   3 # Copyright (c) 2011 James French <[email protected]>
   4 #
   5 # Permission is hereby granted, free of charge, to any person obtaining a copy
   6 # of this software and associated documentation files (the "Software"), to deal
   7 # in the Software without restriction, including without limitation the rights
   8 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
   9 # copies of the Software, and to permit persons to whom the Software is
  10 # furnished to do so, subject to the following conditions:
  11 #
  12 # The above copyright notice and this permission notice shall be included in
  13 # all copies or substantial portions of the Software.
  14 #
  15 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  21 # THE SOFTWARE.
  22
  23 import sys
  24 import urlparse
  25 import os
  26 import vobject
  27 from cgi import parse_header
  28
  29
  30 def getContent(url='',stdin=False):
  31         '''Generic content retriever, DO NOT use this function in a CGI script as
  32         it can read from the local disk (which you probably don't want it to).
  33         '''
  34
  35         encoding = '' # If we don't populate this, the script will assume UTF-8
  36
  37         # Special case, if this is a HTTP url, return the data from it using
  38         # the HTTP functions which attempt to play a bit nicer.
  39         parsedURL = urlparse.urlparse(url)
  40         if 'http' in parsedURL[0]: return getHTTPContent(url)
  41
  42         if stdin:
  43                 content = sys.stdin.read()
  44                 return (content, encoding)
  45
  46         if not parsedURL[0]: url = 'file://' + os.path.abspath(url)
  47
  48         # If we've survived, use python's generic URL opening library to handle it
  49         import urllib2
  50         try:
  51                 res = urllib2.urlopen(url)
  52                 content = res.read()
  53                 ct = res.info().getplist()
  54                 res.close()
  55         except (urllib2.URLError, OSError), e:
  56                 sys.stderr.write('%s\n'%e)
  57                 sys.exit(1)
  58
  59         for param in ct:
  60                 if 'charset' in param:
  61                         encoding = param.split('=')[1]
  62                         break
  63
  64         return (content, encoding)
  65
  66
  67 def getHTTPContent(url='',cache='.httplib2-cache'):
  68         '''This function attempts to play nice when retrieving content from HTTP
  69         services. It's what you should use in a CGI script.'''
  70
  71         try:
  72                 import httplib2
  73         except ImportError:
  74                 import urllib2
  75
  76         if not url: return ('','')
  77
  78         if not 'http' in urlparse.urlparse(url)[0]: return ('','')
  79
  80         if 'httplib2' in sys.modules:
  81                 try: h = httplib2.Http('.httplib2-cache')
  82                 except OSError: h = httplib2.Http()
  83         else: h = False
  84
  85         if h:
  86                 try:
  87                         req = h.request(url)
  88                 except ValueError, e:
  89                         sys.stderr.write('%s\n'%e)
  90                         sys.exit(1)
  91
  92                 resp, content = req
  93                 if 'content-type' in resp:
  94                         ct = 'Content-Type: %s'%req[0]['content-type']
  95                         ct = parse_header(ct)
  96                         if 'charset' in ct[1]: encoding = ct[1]['charset']
  97                         else: encoding = ''
  98                 else:
  99                         ct = ''
 100                         encoding = ''
 101
 102         else:
 103                 try:
 104                         req = urllib2.urlopen(url)
 105                 except urllib2.URLError, e:
 106                         sys.stderr.write('%s\n'%e)
 107                         sys.exit(1)
 108
 109                 content = req.read()
 110                 ct = req.info().getplist()
 111                 for param in ct:
 112                         if 'charset' in param:
 113                                 encoding = param.split('=')[1]
 114                                 break
 115
 116         return (content, encoding)
 117
 118
 119 def generateRules(ruleConfig):
 120         '''Attempts to load a series of rules into a list'''
 121         try:
 122                 import parserrules
 123         except ImportError:
 124                 return []
 125
 126         for conf in ruleConfig:
 127                 parserrules.ruleConfig[conf] = ruleConfig[conf]
 128
 129         rules = [getattr(parserrules, rule) for rule in dir(parserrules) if callable(getattr(parserrules, rule))]
 130         return rules
 131
 132
 133 def applyRules(cal, rules=[], verbose=False):
 134         'Runs a series of rules on the lines in ical and mangles its output'
 135
 136         for rule in rules:
 137                 cal = rule(cal)
 138
 139         return cal
 140
 141 def writeOutput(cal, outfile=''):
 142         '''Takes a list of lines and outputs to the specified file'''
 143
 144         if not cal:
 145                 sys.stderr.write('Refusing to write out an empty file')
 146                 sys.exit(0)
 147
 148         if not outfile:
 149                 out = sys.stdout
 150         else:
 151                 try:
 152                         out = open(outfile, 'w')
 153                 except (IOError, OSError), e:
 154                         sys.stderr.write('%s\n'%e)
 155                         sys.exit(1)
 156
 157         cal.serialize(out)
 158
 159         if not out == sys.stdout:
 160                 out.close()
 161
 162 if __name__ == '__main__':
 163         # Only load options parsing if this script was called directly, skip it
 164         # if it's being called as a module.
 165         from optparse import OptionParser
 166
 167         parser = OptionParser('usage: %prog [options] url')
 168         parser.add_option('-s', '--stdin', action='store_true', dest='stdin',
 169                 default=False, help='Take a calendar from standard input')
 170         parser.add_option('-v', '--verbose', action='store_true', dest='verbose',
 171                 default=False, help='Be verbose when rules are being applied')
 172         parser.add_option('-o', '--output', dest='outfile', default='',
 173                 help='Specify output file (defaults to standard output)')
 174         parser.add_option('-m','--encoding', dest='encoding', default='',
 175                 help='Specify a different character encoding'
 176                 '(ignored if the remote server also specifies one)')
 177         parser.add_option('-t','--timezone', dest='timezone', default='Australia/Perth',
 178                 help='Specify a timezone to use if the remote calendar doesn\'t set it properly')
 179
 180         (options, args) = parser.parse_args()
 181
 182         # Ensure the rules process using the desired timezone
 183         ruleConfig = {}
 184         ruleConfig["defaultTZ"] = options.timezone
 185
 186         # If the user passed us a 'stdin' argument, we'll go with that,
 187         # otherwise we'll try for a url opener
 188         if not args and not options.stdin:
 189                 parser.print_usage()
 190                 sys.exit(0)
 191         elif not options.stdin:
 192                 url = args[0]
 193         else:
 194                 url = ''
 195
 196         (content, encoding) = getContent(url, options.stdin)
 197         encoding = encoding or options.encoding or 'utf-8'
 198
 199         cal = vobject.readOne(unicode(content, encoding))
 200         cal = applyRules(cal, generateRules(ruleConfig), options.verbose)
 201
 202         writeOutput(cal, options.outfile)