Usermode/Libraries/libspiderscript.so_src/lex.c

   1 /*
   2  * SpiderScript
   3  * - Script Lexer
   4  */
   5 #include "tokens.h"
   6 #include <stdlib.h>
   7 #include <stdio.h>
   8 #include <string.h>
   9
  10 // Make the scope character ('.') be a symbol, otherwise it's just
  11 // a ident character
  12 #define USE_SCOPE_CHAR  0
  13
  14 #define DEBUG   0
  15
  16 #define ARRAY_SIZE(x)   ((sizeof(x))/(sizeof((x)[0])))
  17
  18 // === PROTOTYPES ===
  19  int    is_ident(char ch);
  20  int    isdigit(int ch);
  21  int    isspace(int ch);
  22  int    GetToken(tParser *File);
  23
  24 // === CONSTANTS ===
  25 const struct {
  26         const  int      Value;
  27         const char      *Name;
  28 } csaReservedWords[] = {
  29         {TOK_RWD_FUNCTION, "function"},
  30
  31         {TOK_RWD_RETURN, "return"},
  32         {TOK_RWD_NEW, "new"},
  33
  34         {TOK_RWD_IF, "if"},
  35         {TOK_RWD_ELSE, "else"},
  36         {TOK_RWD_DO, "do"},
  37         {TOK_RWD_WHILE, "while"},
  38         {TOK_RWD_FOR, "for"},
  39
  40         {TOK_RWD_VOID, "void"},
  41         {TOK_RWD_OBJECT, "Object"},
  42         {TOK_RWD_OPAQUE, "Opaque"},
  43         {TOK_RWD_INTEGER, "Integer"},
  44         {TOK_RWD_REAL, "Real"},
  45         {TOK_RWD_STRING, "String"}
  46 };
  47
  48 // === CODE ===
  49 /**
  50  * \brief Read a token from a buffer
  51  * \param File  Parser state
  52  */
  53 int GetToken(tParser *File)
  54 {
  55          int    ret;
  56
  57         if( File->NextToken != -1 ) {
  58                 // Save Last
  59                 File->LastToken = File->Token;
  60                 File->LastTokenStr = File->TokenStr;
  61                 File->LastTokenLen = File->TokenLen;
  62                 File->LastLine = File->CurLine;
  63                 // Restore Next
  64                 File->Token = File->NextToken;
  65                 File->TokenStr = File->NextTokenStr;
  66                 File->TokenLen = File->NextTokenLen;
  67                 File->CurLine = File->NextLine;
  68                 // Set State
  69                 File->CurPos = File->TokenStr + File->TokenLen;
  70                 File->NextToken = -1;
  71                 {
  72                         char    buf[ File->TokenLen + 1];
  73                         memcpy(buf, File->TokenStr, File->TokenLen);
  74                         buf[File->TokenLen] = 0;
  75                         #if DEBUG
  76                         printf(" GetToken: FAST Return %i (%i long) (%s)\n", File->Token, File->TokenLen, buf);
  77                         #endif
  78                 }
  79                 return File->Token;
  80         }
  81
  82         //printf("  GetToken: File=%p, File->CurPos = %p\n", File, File->CurPos);
  83
  84         // Clear whitespace (including comments)
  85         for( ;; )
  86         {
  87                 // Whitespace
  88                 while( isspace( *File->CurPos ) )
  89                 {
  90                         //printf("whitespace 0x%x, line = %i\n", *File->CurPos, File->CurLine);
  91                         if( *File->CurPos == '\n' )
  92                                 File->CurLine ++;
  93                         File->CurPos ++;
  94                 }
  95
  96                 // # Line Comments
  97                 if( *File->CurPos == '#' ) {
  98                         while( *File->CurPos && *File->CurPos != '\n' )
  99                                 File->CurPos ++;
 100                         continue ;
 101                 }
 102
 103                 // C-Style Line Comments
 104                 if( *File->CurPos == '/' && File->CurPos[1] == '/' ) {
 105                         while( *File->CurPos && *File->CurPos != '\n' )
 106                                 File->CurPos ++;
 107                         continue ;
 108                 }
 109
 110                 // C-Style Block Comments
 111                 if( *File->CurPos == '/' && File->CurPos[1] == '*' ) {
 112                         File->CurPos += 2;      // Eat the '/*'
 113                         while( *File->CurPos && !(File->CurPos[-1] == '*' && *File->CurPos == '/') )
 114                         {
 115                                 if( *File->CurPos == '\n' )     File->CurLine ++;
 116                                 File->CurPos ++;
 117                         }
 118                         File->CurPos ++;        // Eat the '/'
 119                         continue ;
 120                 }
 121
 122                 // No more "whitespace"
 123                 break;
 124         }
 125
 126         // Save previous tokens (speeds up PutBack and LookAhead)
 127         File->LastToken = File->Token;
 128         File->LastTokenStr = File->TokenStr;
 129         File->LastTokenLen = File->TokenLen;
 130         File->LastLine = File->CurLine;
 131
 132         // Read token
 133         File->TokenStr = File->CurPos;
 134         switch( *File->CurPos++ )
 135         {
 136         case '\0':      ret = TOK_EOF;  break;
 137
 138         // Operations
 139         case '^':
 140                 if( *File->CurPos == '^' ) {
 141                         File->CurPos ++;
 142                         ret = TOK_LOGICXOR;
 143                         break;
 144                 }
 145                 ret = TOK_XOR;
 146                 break;
 147
 148         case '|':
 149                 if( *File->CurPos == '|' ) {
 150                         File->CurPos ++;
 151                         ret = TOK_LOGICOR;
 152                         break;
 153                 }
 154                 ret = TOK_OR;
 155                 break;
 156
 157         case '&':
 158                 if( *File->CurPos == '&' ) {
 159                         File->CurPos ++;
 160                         ret = TOK_LOGICAND;
 161                         break;
 162                 }
 163                 ret = TOK_AND;
 164                 break;
 165
 166         case '/':       ret = TOK_DIV;  break;
 167         case '*':       ret = TOK_MUL;  break;
 168         case '+':       ret = TOK_PLUS; break;
 169         case '-':
 170                 if( *File->CurPos == '>' ) {
 171                         File->CurPos ++;
 172                         ret = TOK_ELEMENT;
 173                 }
 174                 else
 175                         ret = TOK_MINUS;
 176                 break;
 177
 178         // Strings
 179         case '"':
 180                 while( *File->CurPos && !(*File->CurPos == '"' && *File->CurPos != '\\') )
 181                         File->CurPos ++;
 182                 File->CurPos ++;
 183                 ret = TOK_STR;
 184                 break;
 185
 186         // Brackets
 187         case '(':       ret = TOK_PAREN_OPEN;   break;
 188         case ')':       ret = TOK_PAREN_CLOSE;  break;
 189         case '{':       ret = TOK_BRACE_OPEN;   break;
 190         case '}':       ret = TOK_BRACE_CLOSE;  break;
 191         case '[':       ret = TOK_SQUARE_OPEN;  break;
 192         case ']':       ret = TOK_SQUARE_CLOSE; break;
 193
 194         // Core symbols
 195         case ';':       ret = TOK_SEMICOLON;    break;
 196         case ',':       ret = TOK_COMMA;        break;
 197         #if USE_SCOPE_CHAR
 198         case '.':       ret = TOK_SCOPE;        break;
 199         #endif
 200
 201         // Equals
 202         case '=':
 203                 // Comparison Equals
 204                 if( *File->CurPos == '=' ) {
 205                         File->CurPos ++;
 206                         ret = TOK_EQUALS;
 207                         break;
 208                 }
 209                 // Assignment Equals
 210                 ret = TOK_ASSIGN;
 211                 break;
 212
 213         // Variables
 214         // \$[0-9]+ or \$[_a-zA-Z][_a-zA-Z0-9]*
 215         case '$':
 216                 // Numeric Variable
 217                 if( isdigit( *File->CurPos ) ) {
 218                         while( isdigit(*File->CurPos) )
 219                                 File->CurPos ++;
 220                 }
 221                 // Ident Variable
 222                 else {
 223                         while( is_ident(*File->CurPos) || isdigit(*File->CurPos) )
 224                                 File->CurPos ++;
 225                 }
 226                 ret = TOK_VARIABLE;
 227                 break;
 228
 229         // Default (Numbers and Identifiers)
 230         default:
 231                 File->CurPos --;
 232
 233                 // Numbers
 234                 if( isdigit(*File->CurPos) )
 235                 {
 236                         if( *File->CurPos == '0' && File->CurPos[1] == 'x' ) {
 237                                 File->CurPos += 2;
 238                                 while(('0' <= *File->CurPos && *File->CurPos <= '9')
 239                                    || ('A' <= *File->CurPos && *File->CurPos <= 'F')
 240                                    || ('a' <= *File->CurPos && *File->CurPos <= 'f') )
 241                                 {
 242                                         File->CurPos ++;
 243                                 }
 244                         }
 245                         else {
 246                                 while( isdigit(*File->CurPos) )
 247                                         File->CurPos ++;
 248                         }
 249                         ret = TOK_INTEGER;
 250                         break;
 251                 }
 252
 253                 // Identifier
 254                 if( is_ident(*File->CurPos) )
 255                 {
 256                         // Identifier
 257                         while( is_ident(*File->CurPos) || isdigit(*File->CurPos) )
 258                                 File->CurPos ++;
 259
 260                         // This is set later too, but we use it below
 261                         File->TokenLen = File->CurPos - File->TokenStr;
 262                         ret = TOK_IDENT;
 263
 264                         // Check if it's a reserved word
 265                         {
 266                                 char    buf[File->TokenLen + 1];
 267                                  int    i;
 268                                 memcpy(buf, File->TokenStr, File->TokenLen);
 269                                 buf[File->TokenLen] = 0;
 270                                 for( i = 0; i < ARRAY_SIZE(csaReservedWords); i ++ )
 271                                 {
 272                                         if(strcmp(csaReservedWords[i].Name, buf) == 0) {
 273                                                 ret = csaReservedWords[i].Value;
 274                                                 break ;
 275                                         }
 276                                 }
 277                         }
 278                         // If there's no match, just keep ret as TOK_IDENT
 279
 280                         break;
 281                 }
 282                 // Syntax Error
 283                 ret = TOK_INVAL;
 284
 285                 fprintf(stderr, "Syntax Error: Unknown symbol '%c'\n", *File->CurPos);
 286                 longjmp(File->JmpTarget, 1);
 287
 288                 break;
 289         }
 290         // Return
 291         File->Token = ret;
 292         File->TokenLen = File->CurPos - File->TokenStr;
 293
 294         #if DEBUG
 295         {
 296                 char    buf[ File->TokenLen + 1];
 297                 memcpy(buf, File->TokenStr, File->TokenLen);
 298                 buf[File->TokenLen] = 0;
 299                 //printf("  GetToken: File->CurPos = %p\n", File->CurPos);
 300                 printf(" GetToken: Return %i (%i long) (%s)\n", ret, File->TokenLen, buf);
 301         }
 302         #endif
 303         return ret;
 304 }
 305
 306 void PutBack(tParser *File)
 307 {
 308         if( File->LastToken == -1 ) {
 309                 // ERROR:
 310                 fprintf(stderr, "INTERNAL ERROR: Putback when LastToken==-1\n");
 311                 longjmp( File->JmpTarget, -1 );
 312                 return ;
 313         }
 314         #if DEBUG
 315         printf(" PutBack: Was on %i\n", File->Token);
 316         #endif
 317         // Save
 318         File->NextLine = File->CurLine;
 319         File->NextToken = File->Token;
 320         File->NextTokenStr = File->TokenStr;
 321         File->NextTokenLen = File->TokenLen;
 322         // Restore
 323         File->CurLine = File->LastLine;
 324         File->Token = File->LastToken;
 325         File->TokenStr = File->LastTokenStr;
 326         File->TokenLen = File->LastTokenLen;
 327         File->CurPos = File->NextTokenStr;
 328         // Invalidate
 329         File->LastToken = -1;
 330 }
 331
 332 int LookAhead(tParser *File)
 333 {
 334         // TODO: Should I save the entire state here?
 335          int    ret = GetToken(File);
 336         PutBack(File);
 337         return ret;
 338 }
 339
 340 // --- Helpers ---
 341 /**
 342  * \brief Check for ident characters
 343  * \note Matches Regex [a-zA-Z_]
 344  */
 345 int is_ident(char ch)
 346 {
 347         if('a' <= ch && ch <= 'z')      return 1;
 348         if('A' <= ch && ch <= 'Z')      return 1;
 349         if(ch == '_')   return 1;
 350         #if !USE_SCOPE_CHAR
 351         if(ch == '.')   return 1;
 352         #endif
 353         if(ch < 0)      return 1;
 354         return 0;
 355 }
 356
 357 int isdigit(int ch)
 358 {
 359         if('0' <= ch && ch <= '9')      return 1;
 360         return 0;
 361 }
 362
 363 int isspace(int ch)
 364 {
 365         if(' ' == ch)   return 1;
 366         if('\t' == ch)  return 1;
 367         if('\b' == ch)  return 1;
 368         if('\n' == ch)  return 1;
 369         if('\r' == ch)  return 1;
 370         return 0;
 371 }