SpiderScript - Fixed namespace handling (and unset the . hack)
[tpg/acess2.git] / Usermode / Libraries / libspiderscript.so_src / lex.c
1 /*
2  * SpiderScript
3  * - Script Lexer
4  */
5 #include "tokens.h"
6 #include <stdlib.h>
7 #include <stdio.h>
8 #include <string.h>
9
10 #define DEBUG   0
11
12 #define ARRAY_SIZE(x)   ((sizeof(x))/(sizeof((x)[0])))
13
14 // === PROTOTYPES ===
15  int    is_ident(char ch);
16  int    isdigit(int ch);
17  int    isspace(int ch);
18  int    GetToken(tParser *File);
19
20 // === CONSTANTS ===
21 const struct {
22         const  int      Value;
23         const char      *Name;
24 } csaReservedWords[] = {
25         {TOK_RWD_FUNCTION, "function"},
26         
27         {TOK_RWD_RETURN, "return"},
28         {TOK_RWD_NEW, "new"},
29         
30         {TOK_RWD_IF, "if"},
31         {TOK_RWD_ELSE, "else"},
32         {TOK_RWD_DO, "do"},
33         {TOK_RWD_WHILE, "while"},
34         {TOK_RWD_FOR, "for"},
35         
36         {TOK_RWD_VOID, "void"},
37         {TOK_RWD_OBJECT, "Object"},
38         {TOK_RWD_OPAQUE, "Opaque"},
39         {TOK_RWD_INTEGER, "Integer"},
40         {TOK_RWD_REAL, "Real"},
41         {TOK_RWD_STRING, "String"}
42 };
43
44 // === CODE ===
45 /**
46  * \brief Read a token from a buffer
47  * \param File  Parser state
48  */
49 int GetToken(tParser *File)
50 {
51          int    ret;
52         
53         if( File->NextToken != -1 ) {
54                 // Save Last
55                 File->LastToken = File->Token;
56                 File->LastTokenStr = File->TokenStr;
57                 File->LastTokenLen = File->TokenLen;
58                 File->LastLine = File->CurLine;
59                 // Restore Next
60                 File->Token = File->NextToken;
61                 File->TokenStr = File->NextTokenStr;
62                 File->TokenLen = File->NextTokenLen;
63                 File->CurLine = File->NextLine;
64                 // Set State
65                 File->CurPos = File->TokenStr + File->TokenLen;
66                 File->NextToken = -1;
67                 {
68                         char    buf[ File->TokenLen + 1];
69                         memcpy(buf, File->TokenStr, File->TokenLen);
70                         buf[File->TokenLen] = 0;
71                         #if DEBUG
72                         printf(" GetToken: FAST Return %i (%i long) (%s)\n", File->Token, File->TokenLen, buf);
73                         #endif
74                 }
75                 return File->Token;
76         }
77         
78         //printf("  GetToken: File=%p, File->CurPos = %p\n", File, File->CurPos);
79         
80         // Clear whitespace (including comments)
81         for( ;; )
82         {
83                 // Whitespace
84                 while( isspace( *File->CurPos ) )
85                 {
86                         //printf("whitespace 0x%x, line = %i\n", *File->CurPos, File->CurLine);
87                         if( *File->CurPos == '\n' )
88                                 File->CurLine ++;
89                         File->CurPos ++;
90                 }
91                 
92                 // # Line Comments
93                 if( *File->CurPos == '#' ) {
94                         while( *File->CurPos && *File->CurPos != '\n' )
95                                 File->CurPos ++;
96                         continue ;
97                 }
98                 
99                 // C-Style Line Comments
100                 if( *File->CurPos == '/' && File->CurPos[1] == '/' ) {
101                         while( *File->CurPos && *File->CurPos != '\n' )
102                                 File->CurPos ++;
103                         continue ;
104                 }
105                 
106                 // C-Style Block Comments
107                 if( *File->CurPos == '/' && File->CurPos[1] == '*' ) {
108                         File->CurPos += 2;      // Eat the '/*'
109                         while( *File->CurPos && !(File->CurPos[-1] == '*' && *File->CurPos == '/') )
110                         {
111                                 if( *File->CurPos == '\n' )     File->CurLine ++;
112                                 File->CurPos ++;
113                         }
114                         File->CurPos ++;        // Eat the '/'
115                         continue ;
116                 }
117                 
118                 // No more "whitespace"
119                 break;
120         }
121         
122         // Save previous tokens (speeds up PutBack and LookAhead)
123         File->LastToken = File->Token;
124         File->LastTokenStr = File->TokenStr;
125         File->LastTokenLen = File->TokenLen;
126         File->LastLine = File->CurLine;
127         
128         // Read token
129         File->TokenStr = File->CurPos;
130         switch( *File->CurPos++ )
131         {
132         case '\0':      ret = TOK_EOF;  break;
133         
134         // Operations
135         case '^':
136                 if( *File->CurPos == '^' ) {
137                         File->CurPos ++;
138                         ret = TOK_LOGICXOR;
139                         break;
140                 }
141                 ret = TOK_XOR;
142                 break;
143         
144         case '|':
145                 if( *File->CurPos == '|' ) {
146                         File->CurPos ++;
147                         ret = TOK_LOGICOR;
148                         break;
149                 }
150                 ret = TOK_OR;
151                 break;
152         
153         case '&':
154                 if( *File->CurPos == '&' ) {
155                         File->CurPos ++;
156                         ret = TOK_LOGICAND;
157                         break;
158                 }
159                 ret = TOK_AND;
160                 break;
161         
162         case '/':
163                 if( *File->CurPos == '=' ) {
164                         File->CurPos ++;
165                         ret = TOK_ASSIGN_DIV;
166                         break;
167                 }
168                 ret = TOK_DIV;
169                 break;
170         case '*':
171                 if( *File->CurPos == '=' ) {
172                         File->CurPos ++;
173                         ret = TOK_ASSIGN_MUL;
174                         break;
175                 }
176                 ret = TOK_MUL;
177                 break;
178         case '+':
179                 if( *File->CurPos == '+' ) {
180                         File->CurPos ++;
181                         ret = TOK_INCREMENT;
182                         break;
183                 }
184                 if( *File->CurPos == '=' ) {
185                         File->CurPos ++;
186                         ret = TOK_ASSIGN_PLUS;
187                         break;
188                 }
189                 ret = TOK_PLUS;
190                 break;
191         case '-':
192                 if( *File->CurPos == '-' ) {
193                         File->CurPos ++;
194                         ret = TOK_DECREMENT;
195                         break;
196                 }
197                 if( *File->CurPos == '=' ) {
198                         File->CurPos ++;
199                         ret = TOK_ASSIGN_MINUS;
200                         break;
201                 }
202                 if( *File->CurPos == '>' ) {
203                         File->CurPos ++;
204                         ret = TOK_ELEMENT;
205                         break;
206                 }
207                 ret = TOK_MINUS;
208                 break;
209         
210         // Strings
211         case '"':
212                 while( *File->CurPos && !(*File->CurPos == '"' && *File->CurPos != '\\') )
213                         File->CurPos ++;
214                 if( *File->CurPos )
215                 {
216                         File->CurPos ++;
217                         ret = TOK_STR;
218                 }
219                 else
220                         ret = TOK_EOF;
221                 break;
222         
223         // Brackets
224         case '(':       ret = TOK_PAREN_OPEN;   break;
225         case ')':       ret = TOK_PAREN_CLOSE;  break;
226         case '{':       ret = TOK_BRACE_OPEN;   break;
227         case '}':       ret = TOK_BRACE_CLOSE;  break;
228         case '[':       ret = TOK_SQUARE_OPEN;  break;
229         case ']':       ret = TOK_SQUARE_CLOSE; break;
230         
231         // Core symbols
232         case ';':       ret = TOK_SEMICOLON;    break;
233         case ',':       ret = TOK_COMMA;        break;
234         #if USE_SCOPE_CHAR
235         case '.':       ret = TOK_SCOPE;        break;
236         #endif
237         
238         // Equals
239         case '=':
240                 // Comparison Equals
241                 if( *File->CurPos == '=' ) {
242                         File->CurPos ++;
243                         ret = TOK_EQUALS;
244                         break;
245                 }
246                 // Assignment Equals
247                 ret = TOK_ASSIGN;
248                 break;
249         
250         // Less-Than
251         case '<':
252                 // Less-Than or Equal
253                 if( *File->CurPos == '=' ) {
254                         File->CurPos ++;
255                         ret = TOK_LTE;
256                         break;
257                 }
258                 ret = TOK_LT;
259                 break;
260         
261         // Greater-Than
262         case '>':
263                 // Greater-Than or Equal
264                 if( *File->CurPos == '=' ) {
265                         File->CurPos ++;
266                         ret = TOK_GTE;
267                         break;
268                 }
269                 ret = TOK_GT;
270                 break;
271         
272         // Logical NOT
273         case '!':
274                 ret = TOK_LOGICNOT;
275                 break;
276         // Bitwise NOT
277         case '~':
278                 ret = TOK_BWNOT;
279                 break;
280         
281         // Variables
282         // \$[0-9]+ or \$[_a-zA-Z][_a-zA-Z0-9]*
283         case '$':
284                 // Numeric Variable
285                 if( isdigit( *File->CurPos ) ) {
286                         while( isdigit(*File->CurPos) )
287                                 File->CurPos ++;
288                 }
289                 // Ident Variable
290                 else {
291                         while( is_ident(*File->CurPos) || isdigit(*File->CurPos) )
292                                 File->CurPos ++;
293                 }
294                 ret = TOK_VARIABLE;
295                 break;
296         
297         // Default (Numbers and Identifiers)
298         default:
299                 File->CurPos --;
300                 
301                 // Numbers
302                 if( isdigit(*File->CurPos) )
303                 {
304                         ret = TOK_INTEGER;
305                         if( *File->CurPos == '0' && File->CurPos[1] == 'x' )
306                         {
307                                 File->CurPos += 2;
308                                 while(('0' <= *File->CurPos && *File->CurPos <= '9')
309                                    || ('A' <= *File->CurPos && *File->CurPos <= 'F')
310                                    || ('a' <= *File->CurPos && *File->CurPos <= 'f') )
311                                 {
312                                         File->CurPos ++;
313                                 }
314                         }
315                         else
316                         {
317                                 while( isdigit(*File->CurPos) )
318                                         File->CurPos ++;
319                                 
320 //                              printf("*File->CurPos = '%c'\n", *File->CurPos);
321                                 
322                                 // Decimal
323                                 if( *File->CurPos == '.' )
324                                 {
325                                         ret = TOK_REAL;
326                                         File->CurPos ++;
327                                         while( isdigit(*File->CurPos) )
328                                                 File->CurPos ++;
329                                 }
330                                 // Exponent
331                                 if( *File->CurPos == 'e' || *File->CurPos == 'E' )
332                                 {
333                                         ret = TOK_REAL;
334                                         File->CurPos ++;
335                                         if(*File->CurPos == '-' || *File->CurPos == '+')
336                                                 File->CurPos ++;
337                                         while( isdigit(*File->CurPos) )
338                                                 File->CurPos ++;
339                                 }
340                                 
341 //                              printf(" ret = %i\n", ret);
342                         }
343                         break;
344                 }
345         
346                 // Identifier
347                 if( is_ident(*File->CurPos) )
348                 {
349                         ret = TOK_IDENT;
350                         
351                         // Identifier
352                         while( is_ident(*File->CurPos) || isdigit(*File->CurPos) )
353                                 File->CurPos ++;
354                         
355                         // This is set later too, but we use it below
356                         File->TokenLen = File->CurPos - File->TokenStr;
357                         
358                         // Check if it's a reserved word
359                         {
360                                 char    buf[File->TokenLen + 1];
361                                  int    i;
362                                 memcpy(buf, File->TokenStr, File->TokenLen);
363                                 buf[File->TokenLen] = 0;
364                                 for( i = 0; i < ARRAY_SIZE(csaReservedWords); i ++ )
365                                 {
366                                         if(strcmp(csaReservedWords[i].Name, buf) == 0) {
367                                                 ret = csaReservedWords[i].Value;
368                                                 break ;
369                                         }
370                                 }
371                         }
372                         // If there's no match, just keep ret as TOK_IDENT
373                         
374                         break;
375                 }
376                 // Syntax Error
377                 ret = TOK_INVAL;
378                 
379                 fprintf(stderr, "Syntax Error: Unknown symbol '%c'\n", *File->CurPos);
380                 longjmp(File->JmpTarget, 1);
381                 
382                 break;
383         }
384         // Return
385         File->Token = ret;
386         File->TokenLen = File->CurPos - File->TokenStr;
387         
388         #if DEBUG
389         {
390                 char    buf[ File->TokenLen + 1];
391                 memcpy(buf, File->TokenStr, File->TokenLen);
392                 buf[File->TokenLen] = 0;
393                 //printf("  GetToken: File->CurPos = %p\n", File->CurPos);
394                 printf(" GetToken: Return %i (%i long) (%s)\n", ret, File->TokenLen, buf);
395         }
396         #endif
397         return ret;
398 }
399
400 void PutBack(tParser *File)
401 {
402         if( File->LastToken == -1 ) {
403                 // ERROR:
404                 fprintf(stderr, "INTERNAL ERROR: Putback when LastToken==-1\n");
405                 longjmp( File->JmpTarget, -1 );
406                 return ;
407         }
408         #if DEBUG
409         printf(" PutBack: Was on %i\n", File->Token);
410         #endif
411         // Save
412         File->NextLine = File->CurLine;
413         File->NextToken = File->Token;
414         File->NextTokenStr = File->TokenStr;
415         File->NextTokenLen = File->TokenLen;
416         // Restore
417         File->CurLine = File->LastLine;
418         File->Token = File->LastToken;
419         File->TokenStr = File->LastTokenStr;
420         File->TokenLen = File->LastTokenLen;
421         File->CurPos = File->NextTokenStr;
422         // Invalidate
423         File->LastToken = -1;
424 }
425
426 int LookAhead(tParser *File)
427 {
428         // TODO: Should I save the entire state here?
429          int    ret = GetToken(File);
430         PutBack(File);
431         return ret;
432 }
433
434 // --- Helpers ---
435 /**
436  * \brief Check for ident characters
437  * \note Matches Regex [a-zA-Z_]
438  */
439 int is_ident(char ch)
440 {
441         if('a' <= ch && ch <= 'z')      return 1;
442         if('A' <= ch && ch <= 'Z')      return 1;
443         if(ch == '_')   return 1;
444         #if !USE_SCOPE_CHAR
445         if(ch == '.')   return 1;
446         #endif
447         if(ch < 0)      return 1;
448         return 0;
449 }
450
451 int isdigit(int ch)
452 {
453         if('0' <= ch && ch <= '9')      return 1;
454         return 0;
455 }
456
457 int isspace(int ch)
458 {
459         if(' ' == ch)   return 1;
460         if('\t' == ch)  return 1;
461         if('\b' == ch)  return 1;
462         if('\n' == ch)  return 1;
463         if('\r' == ch)  return 1;
464         return 0;
465 }

UCC git Repository :: git.ucc.asn.au