SpiderScript - Added tags to loops to allow arbitary depth breaks
[tpg/acess2.git] / Usermode / Libraries / libspiderscript.so_src / lex.c
1 /*
2  * SpiderScript
3  * - Script Lexer
4  */
5 #include "tokens.h"
6 #include <stdlib.h>
7 #include <stdio.h>
8 #include <string.h>
9
10 #define DEBUG   0
11
12 #define ARRAY_SIZE(x)   ((sizeof(x))/(sizeof((x)[0])))
13
14 // === PROTOTYPES ===
15  int    is_ident(char ch);
16  int    isdigit(int ch);
17  int    isspace(int ch);
18  int    GetToken(tParser *File);
19
20 // === CONSTANTS ===
21 const struct {
22         const  int      Value;
23         const char      *Name;
24 } csaReservedWords[] = {
25         {TOK_RWD_FUNCTION, "function"},
26         
27         {TOK_RWD_RETURN, "return"},
28         {TOK_RWD_BREAK, "break"},
29         {TOK_RWD_CONTINUE, "continue"},
30         {TOK_RWD_NEW, "new"},
31         
32         {TOK_RWD_IF, "if"},
33         {TOK_RWD_ELSE, "else"},
34         {TOK_RWD_DO, "do"},
35         {TOK_RWD_WHILE, "while"},
36         {TOK_RWD_FOR, "for"},
37         
38         {TOK_RWD_NULL, "null"},
39         {TOK_RWD_VOID, "void"},
40         {TOK_RWD_OBJECT, "Object"},
41         {TOK_RWD_OPAQUE, "Opaque"},
42         {TOK_RWD_INTEGER, "Integer"},
43         {TOK_RWD_REAL, "Real"},
44         {TOK_RWD_STRING, "String"}
45 };
46
47 // === CODE ===
48 /**
49  * \brief Read a token from a buffer
50  * \param File  Parser state
51  */
52 int GetToken(tParser *File)
53 {
54          int    ret;
55         
56         if( File->NextToken != -1 ) {
57                 // Save Last
58                 File->LastToken = File->Token;
59                 File->LastTokenStr = File->TokenStr;
60                 File->LastTokenLen = File->TokenLen;
61                 File->LastLine = File->CurLine;
62                 // Restore Next
63                 File->Token = File->NextToken;
64                 File->TokenStr = File->NextTokenStr;
65                 File->TokenLen = File->NextTokenLen;
66                 File->CurLine = File->NextLine;
67                 // Set State
68                 File->CurPos = File->TokenStr + File->TokenLen;
69                 File->NextToken = -1;
70                 {
71                         char    buf[ File->TokenLen + 1];
72                         memcpy(buf, File->TokenStr, File->TokenLen);
73                         buf[File->TokenLen] = 0;
74                         #if DEBUG
75                         printf(" GetToken: FAST Return %i (%i long) (%s)\n", File->Token, File->TokenLen, buf);
76                         #endif
77                 }
78                 return File->Token;
79         }
80         
81         //printf("  GetToken: File=%p, File->CurPos = %p\n", File, File->CurPos);
82         
83         // Clear whitespace (including comments)
84         for( ;; )
85         {
86                 // Whitespace
87                 while( isspace( *File->CurPos ) )
88                 {
89                         //printf("whitespace 0x%x, line = %i\n", *File->CurPos, File->CurLine);
90                         if( *File->CurPos == '\n' )
91                                 File->CurLine ++;
92                         File->CurPos ++;
93                 }
94                 
95                 // # Line Comments
96                 if( *File->CurPos == '#' ) {
97                         while( *File->CurPos && *File->CurPos != '\n' )
98                                 File->CurPos ++;
99                         continue ;
100                 }
101                 
102                 // C-Style Line Comments
103                 if( *File->CurPos == '/' && File->CurPos[1] == '/' ) {
104                         while( *File->CurPos && *File->CurPos != '\n' )
105                                 File->CurPos ++;
106                         continue ;
107                 }
108                 
109                 // C-Style Block Comments
110                 if( *File->CurPos == '/' && File->CurPos[1] == '*' ) {
111                         File->CurPos += 2;      // Eat the '/*'
112                         while( *File->CurPos && !(File->CurPos[-1] == '*' && *File->CurPos == '/') )
113                         {
114                                 if( *File->CurPos == '\n' )     File->CurLine ++;
115                                 File->CurPos ++;
116                         }
117                         File->CurPos ++;        // Eat the '/'
118                         continue ;
119                 }
120                 
121                 // No more "whitespace"
122                 break;
123         }
124         
125         // Save previous tokens (speeds up PutBack and LookAhead)
126         File->LastToken = File->Token;
127         File->LastTokenStr = File->TokenStr;
128         File->LastTokenLen = File->TokenLen;
129         File->LastLine = File->CurLine;
130         
131         // Read token
132         File->TokenStr = File->CurPos;
133         switch( *File->CurPos++ )
134         {
135         case '\0':      ret = TOK_EOF;  break;
136         
137         // Operations
138         case '^':
139                 if( *File->CurPos == '^' ) {
140                         File->CurPos ++;
141                         ret = TOK_LOGICXOR;
142                         break;
143                 }
144                 ret = TOK_XOR;
145                 break;
146         
147         case '|':
148                 if( *File->CurPos == '|' ) {
149                         File->CurPos ++;
150                         ret = TOK_LOGICOR;
151                         break;
152                 }
153                 ret = TOK_OR;
154                 break;
155         
156         case '&':
157                 if( *File->CurPos == '&' ) {
158                         File->CurPos ++;
159                         ret = TOK_LOGICAND;
160                         break;
161                 }
162                 ret = TOK_AND;
163                 break;
164         
165         case '/':
166                 if( *File->CurPos == '=' ) {
167                         File->CurPos ++;
168                         ret = TOK_ASSIGN_DIV;
169                         break;
170                 }
171                 ret = TOK_DIV;
172                 break;
173         case '*':
174                 if( *File->CurPos == '=' ) {
175                         File->CurPos ++;
176                         ret = TOK_ASSIGN_MUL;
177                         break;
178                 }
179                 ret = TOK_MUL;
180                 break;
181         case '+':
182                 if( *File->CurPos == '+' ) {
183                         File->CurPos ++;
184                         ret = TOK_INCREMENT;
185                         break;
186                 }
187                 if( *File->CurPos == '=' ) {
188                         File->CurPos ++;
189                         ret = TOK_ASSIGN_PLUS;
190                         break;
191                 }
192                 ret = TOK_PLUS;
193                 break;
194         case '-':
195                 if( *File->CurPos == '-' ) {
196                         File->CurPos ++;
197                         ret = TOK_DECREMENT;
198                         break;
199                 }
200                 if( *File->CurPos == '=' ) {
201                         File->CurPos ++;
202                         ret = TOK_ASSIGN_MINUS;
203                         break;
204                 }
205                 if( *File->CurPos == '>' ) {
206                         File->CurPos ++;
207                         ret = TOK_ELEMENT;
208                         break;
209                 }
210                 ret = TOK_MINUS;
211                 break;
212         
213         // Strings
214         case '"':
215                 while( *File->CurPos && !(*File->CurPos == '"' && *File->CurPos != '\\') )
216                         File->CurPos ++;
217                 if( *File->CurPos )
218                 {
219                         File->CurPos ++;
220                         ret = TOK_STR;
221                 }
222                 else
223                         ret = TOK_EOF;
224                 break;
225         
226         // Brackets
227         case '(':       ret = TOK_PAREN_OPEN;   break;
228         case ')':       ret = TOK_PAREN_CLOSE;  break;
229         case '{':       ret = TOK_BRACE_OPEN;   break;
230         case '}':       ret = TOK_BRACE_CLOSE;  break;
231         case '[':       ret = TOK_SQUARE_OPEN;  break;
232         case ']':       ret = TOK_SQUARE_CLOSE; break;
233         
234         // Core symbols
235         case ';':       ret = TOK_SEMICOLON;    break;
236         case ',':       ret = TOK_COMMA;        break;
237         #if USE_SCOPE_CHAR
238         case '.':       ret = TOK_SCOPE;        break;
239         #endif
240         
241         // Equals
242         case '=':
243                 // Comparison Equals
244                 if( *File->CurPos == '=' ) {
245                         File->CurPos ++;
246                         ret = TOK_EQUALS;
247                         break;
248                 }
249                 // Assignment Equals
250                 ret = TOK_ASSIGN;
251                 break;
252         
253         // Less-Than
254         case '<':
255                 // Less-Than or Equal
256                 if( *File->CurPos == '=' ) {
257                         File->CurPos ++;
258                         ret = TOK_LTE;
259                         break;
260                 }
261                 ret = TOK_LT;
262                 break;
263         
264         // Greater-Than
265         case '>':
266                 // Greater-Than or Equal
267                 if( *File->CurPos == '=' ) {
268                         File->CurPos ++;
269                         ret = TOK_GTE;
270                         break;
271                 }
272                 ret = TOK_GT;
273                 break;
274         
275         // Logical NOT
276         case '!':
277                 ret = TOK_LOGICNOT;
278                 break;
279         // Bitwise NOT
280         case '~':
281                 ret = TOK_BWNOT;
282                 break;
283         
284         // Variables
285         // \$[0-9]+ or \$[_a-zA-Z][_a-zA-Z0-9]*
286         case '$':
287                 // Numeric Variable
288                 if( isdigit( *File->CurPos ) ) {
289                         while( isdigit(*File->CurPos) )
290                                 File->CurPos ++;
291                 }
292                 // Ident Variable
293                 else {
294                         while( is_ident(*File->CurPos) || isdigit(*File->CurPos) )
295                                 File->CurPos ++;
296                 }
297                 ret = TOK_VARIABLE;
298                 break;
299         
300         // Default (Numbers and Identifiers)
301         default:
302                 File->CurPos --;
303                 
304                 // Numbers
305                 if( isdigit(*File->CurPos) )
306                 {
307                         ret = TOK_INTEGER;
308                         if( *File->CurPos == '0' && File->CurPos[1] == 'x' )
309                         {
310                                 File->CurPos += 2;
311                                 while(('0' <= *File->CurPos && *File->CurPos <= '9')
312                                    || ('A' <= *File->CurPos && *File->CurPos <= 'F')
313                                    || ('a' <= *File->CurPos && *File->CurPos <= 'f') )
314                                 {
315                                         File->CurPos ++;
316                                 }
317                         }
318                         else
319                         {
320                                 while( isdigit(*File->CurPos) )
321                                         File->CurPos ++;
322                                 
323 //                              printf("*File->CurPos = '%c'\n", *File->CurPos);
324                                 
325                                 // Decimal
326                                 if( *File->CurPos == '.' )
327                                 {
328                                         ret = TOK_REAL;
329                                         File->CurPos ++;
330                                         while( isdigit(*File->CurPos) )
331                                                 File->CurPos ++;
332                                 }
333                                 // Exponent
334                                 if( *File->CurPos == 'e' || *File->CurPos == 'E' )
335                                 {
336                                         ret = TOK_REAL;
337                                         File->CurPos ++;
338                                         if(*File->CurPos == '-' || *File->CurPos == '+')
339                                                 File->CurPos ++;
340                                         while( isdigit(*File->CurPos) )
341                                                 File->CurPos ++;
342                                 }
343                                 
344 //                              printf(" ret = %i\n", ret);
345                         }
346                         break;
347                 }
348         
349                 // Identifier
350                 if( is_ident(*File->CurPos) )
351                 {
352                         ret = TOK_IDENT;
353                         
354                         // Identifier
355                         while( is_ident(*File->CurPos) || isdigit(*File->CurPos) )
356                                 File->CurPos ++;
357                         
358                         // This is set later too, but we use it below
359                         File->TokenLen = File->CurPos - File->TokenStr;
360                         
361                         // Check if it's a reserved word
362                         {
363                                 char    buf[File->TokenLen + 1];
364                                  int    i;
365                                 memcpy(buf, File->TokenStr, File->TokenLen);
366                                 buf[File->TokenLen] = 0;
367                                 for( i = 0; i < ARRAY_SIZE(csaReservedWords); i ++ )
368                                 {
369                                         if(strcmp(csaReservedWords[i].Name, buf) == 0) {
370                                                 ret = csaReservedWords[i].Value;
371                                                 break ;
372                                         }
373                                 }
374                         }
375                         // If there's no match, just keep ret as TOK_IDENT
376                         
377                         break;
378                 }
379                 // Syntax Error
380                 ret = TOK_INVAL;
381                 
382                 fprintf(stderr, "Syntax Error: Unknown symbol '%c'\n", *File->CurPos);
383                 longjmp(File->JmpTarget, 1);
384                 
385                 break;
386         }
387         // Return
388         File->Token = ret;
389         File->TokenLen = File->CurPos - File->TokenStr;
390         
391         #if DEBUG
392         {
393                 char    buf[ File->TokenLen + 1];
394                 memcpy(buf, File->TokenStr, File->TokenLen);
395                 buf[File->TokenLen] = 0;
396                 //printf("  GetToken: File->CurPos = %p\n", File->CurPos);
397                 printf(" GetToken: Return %i (%i long) (%s)\n", ret, File->TokenLen, buf);
398         }
399         #endif
400         return ret;
401 }
402
403 void PutBack(tParser *File)
404 {
405         if( File->LastToken == -1 ) {
406                 // ERROR:
407                 fprintf(stderr, "INTERNAL ERROR: Putback when LastToken==-1\n");
408                 longjmp( File->JmpTarget, -1 );
409                 return ;
410         }
411         #if DEBUG
412         printf(" PutBack: Was on %i\n", File->Token);
413         #endif
414         // Save
415         File->NextLine = File->CurLine;
416         File->NextToken = File->Token;
417         File->NextTokenStr = File->TokenStr;
418         File->NextTokenLen = File->TokenLen;
419         // Restore
420         File->CurLine = File->LastLine;
421         File->Token = File->LastToken;
422         File->TokenStr = File->LastTokenStr;
423         File->TokenLen = File->LastTokenLen;
424         File->CurPos = File->NextTokenStr;
425         // Invalidate
426         File->LastToken = -1;
427 }
428
429 int LookAhead(tParser *File)
430 {
431         // TODO: Should I save the entire state here?
432          int    ret = GetToken(File);
433         PutBack(File);
434         return ret;
435 }
436
437 // --- Helpers ---
438 /**
439  * \brief Check for ident characters
440  * \note Matches Regex [a-zA-Z_]
441  */
442 int is_ident(char ch)
443 {
444         if('a' <= ch && ch <= 'z')      return 1;
445         if('A' <= ch && ch <= 'Z')      return 1;
446         if(ch == '_')   return 1;
447         #if !USE_SCOPE_CHAR
448         if(ch == '.')   return 1;
449         #endif
450         if(ch < 0)      return 1;
451         return 0;
452 }
453
454 int isdigit(int ch)
455 {
456         if('0' <= ch && ch <= '9')      return 1;
457         return 0;
458 }
459
460 int isspace(int ch)
461 {
462         if(' ' == ch)   return 1;
463         if('\t' == ch)  return 1;
464         if('\b' == ch)  return 1;
465         if('\n' == ch)  return 1;
466         if('\r' == ch)  return 1;
467         return 0;
468 }

UCC git Repository :: git.ucc.asn.au