bd1712b41b46d153ff6cd79bf1a209be3fdb1cb0
[tpg/acess2.git] / Usermode / Libraries / libspiderscript.so_src / lex.c
1 /*
2  * SpiderScript
3  * - Script Lexer
4  */
5 #include "tokens.h"
6 #include <stdlib.h>
7 #include <stdio.h>
8 #include <string.h>
9
10 // Make the scope character ('.') be a symbol, otherwise it's just
11 // a ident character
12 #define USE_SCOPE_CHAR  0
13
14 #define DEBUG   0
15
16 #define ARRAY_SIZE(x)   ((sizeof(x))/(sizeof((x)[0])))
17
18 // === PROTOTYPES ===
19  int    is_ident(char ch);
20  int    isdigit(int ch);
21  int    isspace(int ch);
22  int    GetToken(tParser *File);
23
24 // === CONSTANTS ===
25 const struct {
26         const  int      Value;
27         const char      *Name;
28 } csaReservedWords[] = {
29         {TOK_RWD_FUNCTION, "function"},
30         
31         {TOK_RWD_RETURN, "return"},
32         {TOK_RWD_NEW, "new"},
33         
34         {TOK_RWD_IF, "if"},
35         {TOK_RWD_ELSE, "else"},
36         {TOK_RWD_DO, "do"},
37         {TOK_RWD_WHILE, "while"},
38         {TOK_RWD_FOR, "for"},
39         
40         {TOK_RWD_VOID, "void"},
41         {TOK_RWD_OBJECT, "Object"},
42         {TOK_RWD_OPAQUE, "Opaque"},
43         {TOK_RWD_INTEGER, "Integer"},
44         {TOK_RWD_REAL, "Real"},
45         {TOK_RWD_STRING, "String"}
46 };
47
48 // === CODE ===
49 /**
50  * \brief Read a token from a buffer
51  * \param File  Parser state
52  */
53 int GetToken(tParser *File)
54 {
55          int    ret;
56         
57         if( File->NextToken != -1 ) {
58                 // Save Last
59                 File->LastToken = File->Token;
60                 File->LastTokenStr = File->TokenStr;
61                 File->LastTokenLen = File->TokenLen;
62                 File->LastLine = File->CurLine;
63                 // Restore Next
64                 File->Token = File->NextToken;
65                 File->TokenStr = File->NextTokenStr;
66                 File->TokenLen = File->NextTokenLen;
67                 File->CurLine = File->NextLine;
68                 // Set State
69                 File->CurPos = File->TokenStr + File->TokenLen;
70                 File->NextToken = -1;
71                 {
72                         char    buf[ File->TokenLen + 1];
73                         memcpy(buf, File->TokenStr, File->TokenLen);
74                         buf[File->TokenLen] = 0;
75                         #if DEBUG
76                         printf(" GetToken: FAST Return %i (%i long) (%s)\n", File->Token, File->TokenLen, buf);
77                         #endif
78                 }
79                 return File->Token;
80         }
81         
82         //printf("  GetToken: File=%p, File->CurPos = %p\n", File, File->CurPos);
83         
84         // Clear whitespace (including comments)
85         for( ;; )
86         {
87                 // Whitespace
88                 while( isspace( *File->CurPos ) )
89                 {
90                         //printf("whitespace 0x%x, line = %i\n", *File->CurPos, File->CurLine);
91                         if( *File->CurPos == '\n' )
92                                 File->CurLine ++;
93                         File->CurPos ++;
94                 }
95                 
96                 // # Line Comments
97                 if( *File->CurPos == '#' ) {
98                         while( *File->CurPos && *File->CurPos != '\n' )
99                                 File->CurPos ++;
100                         continue ;
101                 }
102                 
103                 // C-Style Line Comments
104                 if( *File->CurPos == '/' && File->CurPos[1] == '/' ) {
105                         while( *File->CurPos && *File->CurPos != '\n' )
106                                 File->CurPos ++;
107                         continue ;
108                 }
109                 
110                 // C-Style Block Comments
111                 if( *File->CurPos == '/' && File->CurPos[1] == '*' ) {
112                         File->CurPos += 2;      // Eat the '/*'
113                         while( *File->CurPos && !(File->CurPos[-1] == '*' && *File->CurPos == '/') )
114                         {
115                                 if( *File->CurPos == '\n' )     File->CurLine ++;
116                                 File->CurPos ++;
117                         }
118                         File->CurPos ++;        // Eat the '/'
119                         continue ;
120                 }
121                 
122                 // No more "whitespace"
123                 break;
124         }
125         
126         // Save previous tokens (speeds up PutBack and LookAhead)
127         File->LastToken = File->Token;
128         File->LastTokenStr = File->TokenStr;
129         File->LastTokenLen = File->TokenLen;
130         File->LastLine = File->CurLine;
131         
132         // Read token
133         File->TokenStr = File->CurPos;
134         switch( *File->CurPos++ )
135         {
136         case '\0':      ret = TOK_EOF;  break;
137         
138         // Operations
139         case '^':
140                 if( *File->CurPos == '^' ) {
141                         File->CurPos ++;
142                         ret = TOK_LOGICXOR;
143                         break;
144                 }
145                 ret = TOK_XOR;
146                 break;
147         
148         case '|':
149                 if( *File->CurPos == '|' ) {
150                         File->CurPos ++;
151                         ret = TOK_LOGICOR;
152                         break;
153                 }
154                 ret = TOK_OR;
155                 break;
156         
157         case '&':
158                 if( *File->CurPos == '&' ) {
159                         File->CurPos ++;
160                         ret = TOK_LOGICAND;
161                         break;
162                 }
163                 ret = TOK_AND;
164                 break;
165         
166         case '/':       ret = TOK_DIV;  break;
167         case '*':       ret = TOK_MUL;  break;
168         case '+':
169                 if( *File->CurPos == '+' ) {
170                         File->CurPos ++;
171                         ret = TOK_INCREMENT;
172                         break;
173                 }
174                 if( *File->CurPos == '=' ) {
175                         File->CurPos ++;
176                         ret = TOK_ASSIGN_PLUS;
177                         break;
178                 }
179                 ret = TOK_PLUS;
180                 break;
181         case '-':
182                 if( *File->CurPos == '-' ) {
183                         File->CurPos ++;
184                         ret = TOK_DECREMENT;
185                         break;
186                 }
187                 if( *File->CurPos == '=' ) {
188                         File->CurPos ++;
189                         ret = TOK_ASSIGN_MINUS;
190                         break;
191                 }
192                 if( *File->CurPos == '>' ) {
193                         File->CurPos ++;
194                         ret = TOK_ELEMENT;
195                         break;
196                 }
197                 ret = TOK_MINUS;
198                 break;
199         
200         // Strings
201         case '"':
202                 while( *File->CurPos && !(*File->CurPos == '"' && *File->CurPos != '\\') )
203                         File->CurPos ++;
204                 File->CurPos ++;
205                 ret = TOK_STR;
206                 break;
207         
208         // Brackets
209         case '(':       ret = TOK_PAREN_OPEN;   break;
210         case ')':       ret = TOK_PAREN_CLOSE;  break;
211         case '{':       ret = TOK_BRACE_OPEN;   break;
212         case '}':       ret = TOK_BRACE_CLOSE;  break;
213         case '[':       ret = TOK_SQUARE_OPEN;  break;
214         case ']':       ret = TOK_SQUARE_CLOSE; break;
215         
216         // Core symbols
217         case ';':       ret = TOK_SEMICOLON;    break;
218         case ',':       ret = TOK_COMMA;        break;
219         #if USE_SCOPE_CHAR
220         case '.':       ret = TOK_SCOPE;        break;
221         #endif
222         
223         // Equals
224         case '=':
225                 // Comparison Equals
226                 if( *File->CurPos == '=' ) {
227                         File->CurPos ++;
228                         ret = TOK_EQUALS;
229                         break;
230                 }
231                 // Assignment Equals
232                 ret = TOK_ASSIGN;
233                 break;
234         
235         // Less-Than
236         case '<':
237                 // Less-Than or Equal
238                 if( *File->CurPos == '=' ) {
239                         File->CurPos ++;
240                         ret = TOK_LTE;
241                         break;
242                 }
243                 ret = TOK_LT;
244                 break;
245         
246         // Greater-Than
247         case '>':
248                 // Greater-Than or Equal
249                 if( *File->CurPos == '=' ) {
250                         File->CurPos ++;
251                         ret = TOK_GTE;
252                         break;
253                 }
254                 ret = TOK_GT;
255                 break;
256         
257         // Variables
258         // \$[0-9]+ or \$[_a-zA-Z][_a-zA-Z0-9]*
259         case '$':
260                 // Numeric Variable
261                 if( isdigit( *File->CurPos ) ) {
262                         while( isdigit(*File->CurPos) )
263                                 File->CurPos ++;
264                 }
265                 // Ident Variable
266                 else {
267                         while( is_ident(*File->CurPos) || isdigit(*File->CurPos) )
268                                 File->CurPos ++;
269                 }
270                 ret = TOK_VARIABLE;
271                 break;
272         
273         // Default (Numbers and Identifiers)
274         default:
275                 File->CurPos --;
276                 
277                 // Numbers
278                 if( isdigit(*File->CurPos) )
279                 {
280                         if( *File->CurPos == '0' && File->CurPos[1] == 'x' ) {
281                                 File->CurPos += 2;
282                                 while(('0' <= *File->CurPos && *File->CurPos <= '9')
283                                    || ('A' <= *File->CurPos && *File->CurPos <= 'F')
284                                    || ('a' <= *File->CurPos && *File->CurPos <= 'f') )
285                                 {
286                                         File->CurPos ++;
287                                 }
288                         }
289                         else {
290                                 while( isdigit(*File->CurPos) )
291                                         File->CurPos ++;
292                         }
293                         ret = TOK_INTEGER;
294                         break;
295                 }
296         
297                 // Identifier
298                 if( is_ident(*File->CurPos) )
299                 {
300                         // Identifier
301                         while( is_ident(*File->CurPos) || isdigit(*File->CurPos) )
302                                 File->CurPos ++;
303                         
304                         // This is set later too, but we use it below
305                         File->TokenLen = File->CurPos - File->TokenStr;
306                         ret = TOK_IDENT;
307                         
308                         // Check if it's a reserved word
309                         {
310                                 char    buf[File->TokenLen + 1];
311                                  int    i;
312                                 memcpy(buf, File->TokenStr, File->TokenLen);
313                                 buf[File->TokenLen] = 0;
314                                 for( i = 0; i < ARRAY_SIZE(csaReservedWords); i ++ )
315                                 {
316                                         if(strcmp(csaReservedWords[i].Name, buf) == 0) {
317                                                 ret = csaReservedWords[i].Value;
318                                                 break ;
319                                         }
320                                 }
321                         }
322                         // If there's no match, just keep ret as TOK_IDENT
323                         
324                         break;
325                 }
326                 // Syntax Error
327                 ret = TOK_INVAL;
328                 
329                 fprintf(stderr, "Syntax Error: Unknown symbol '%c'\n", *File->CurPos);
330                 longjmp(File->JmpTarget, 1);
331                 
332                 break;
333         }
334         // Return
335         File->Token = ret;
336         File->TokenLen = File->CurPos - File->TokenStr;
337         
338         #if DEBUG
339         {
340                 char    buf[ File->TokenLen + 1];
341                 memcpy(buf, File->TokenStr, File->TokenLen);
342                 buf[File->TokenLen] = 0;
343                 //printf("  GetToken: File->CurPos = %p\n", File->CurPos);
344                 printf(" GetToken: Return %i (%i long) (%s)\n", ret, File->TokenLen, buf);
345         }
346         #endif
347         return ret;
348 }
349
350 void PutBack(tParser *File)
351 {
352         if( File->LastToken == -1 ) {
353                 // ERROR:
354                 fprintf(stderr, "INTERNAL ERROR: Putback when LastToken==-1\n");
355                 longjmp( File->JmpTarget, -1 );
356                 return ;
357         }
358         #if DEBUG
359         printf(" PutBack: Was on %i\n", File->Token);
360         #endif
361         // Save
362         File->NextLine = File->CurLine;
363         File->NextToken = File->Token;
364         File->NextTokenStr = File->TokenStr;
365         File->NextTokenLen = File->TokenLen;
366         // Restore
367         File->CurLine = File->LastLine;
368         File->Token = File->LastToken;
369         File->TokenStr = File->LastTokenStr;
370         File->TokenLen = File->LastTokenLen;
371         File->CurPos = File->NextTokenStr;
372         // Invalidate
373         File->LastToken = -1;
374 }
375
376 int LookAhead(tParser *File)
377 {
378         // TODO: Should I save the entire state here?
379          int    ret = GetToken(File);
380         PutBack(File);
381         return ret;
382 }
383
384 // --- Helpers ---
385 /**
386  * \brief Check for ident characters
387  * \note Matches Regex [a-zA-Z_]
388  */
389 int is_ident(char ch)
390 {
391         if('a' <= ch && ch <= 'z')      return 1;
392         if('A' <= ch && ch <= 'Z')      return 1;
393         if(ch == '_')   return 1;
394         #if !USE_SCOPE_CHAR
395         if(ch == '.')   return 1;
396         #endif
397         if(ch < 0)      return 1;
398         return 0;
399 }
400
401 int isdigit(int ch)
402 {
403         if('0' <= ch && ch <= '9')      return 1;
404         return 0;
405 }
406
407 int isspace(int ch)
408 {
409         if(' ' == ch)   return 1;
410         if('\t' == ch)  return 1;
411         if('\b' == ch)  return 1;
412         if('\n' == ch)  return 1;
413         if('\r' == ch)  return 1;
414         return 0;
415 }

UCC git Repository :: git.ucc.asn.au