Per-CPU task switch disable, minor spiderscript changes
[tpg/acess2.git] / Usermode / Libraries / libspiderscript.so_src / lex.c
1 /*
2  * SpiderScript
3  * - Script Lexer
4  */
5 #include "tokens.h"
6 #include <stdlib.h>
7 #include <stdio.h>
8 #include <string.h>
9
10 #define USE_SCOPE_CHAR  0
11
12 #define DEBUG   0
13
14 #define ARRAY_SIZE(x)   ((sizeof(x))/(sizeof((x)[0])))
15
16 // === PROTOTYPES ===
17  int    is_ident(char ch);
18  int    isdigit(int ch);
19  int    isspace(int ch);
20  int    GetToken(tParser *File);
21
22 // === CONSTANTS ===
23 const struct {
24         const  int      Value;
25         const char      *Name;
26 } csaReservedWords[] = {
27         {TOK_RWD_FUNCTION, "function"},
28         
29         {TOK_RWD_RETURN, "return"},
30         {TOK_RWD_NEW, "new"},
31         
32         {TOK_RWD_IF, "if"},
33         {TOK_RWD_ELSE, "else"},
34         {TOK_RWD_DO, "do"},
35         {TOK_RWD_WHILE, "while"},
36         {TOK_RWD_FOR, "for"},
37         
38         {TOK_RWD_VOID, "void"},
39         {TOK_RWD_OBJECT, "Object"},
40         {TOK_RWD_OPAQUE, "Opaque"},
41         {TOK_RWD_INTEGER, "Integer"},
42         {TOK_RWD_REAL, "Real"},
43         {TOK_RWD_STRING, "String"}
44 };
45
46 // === CODE ===
47 /**
48  * \brief Read a token from a buffer
49  * \param File  Parser state
50  */
51 int GetToken(tParser *File)
52 {
53          int    ret;
54         
55         if( File->NextToken != -1 ) {
56                 // Save Last
57                 File->LastToken = File->Token;
58                 File->LastTokenStr = File->TokenStr;
59                 File->LastTokenLen = File->TokenLen;
60                 File->LastLine = File->CurLine;
61                 // Restore Next
62                 File->Token = File->NextToken;
63                 File->TokenStr = File->NextTokenStr;
64                 File->TokenLen = File->NextTokenLen;
65                 File->CurLine = File->NextLine;
66                 // Set State
67                 File->CurPos = File->TokenStr + File->TokenLen;
68                 File->NextToken = -1;
69                 {
70                         char    buf[ File->TokenLen + 1];
71                         memcpy(buf, File->TokenStr, File->TokenLen);
72                         buf[File->TokenLen] = 0;
73                         #if DEBUG
74                         printf(" GetToken: FAST Return %i (%i long) (%s)\n", File->Token, File->TokenLen, buf);
75                         #endif
76                 }
77                 return File->Token;
78         }
79         
80         //printf("  GetToken: File=%p, File->CurPos = %p\n", File, File->CurPos);
81         
82         // Clear whitespace (including comments)
83         for( ;; )
84         {
85                 // Whitespace
86                 while( isspace( *File->CurPos ) )
87                 {
88                         //printf("whitespace 0x%x, line = %i\n", *File->CurPos, File->CurLine);
89                         if( *File->CurPos == '\n' )
90                                 File->CurLine ++;
91                         File->CurPos ++;
92                 }
93                 
94                 // # Line Comments
95                 if( *File->CurPos == '#' ) {
96                         while( *File->CurPos && *File->CurPos != '\n' )
97                                 File->CurPos ++;
98                         continue ;
99                 }
100                 
101                 // C-Style Line Comments
102                 if( *File->CurPos == '/' && File->CurPos[1] == '/' ) {
103                         while( *File->CurPos && *File->CurPos != '\n' )
104                                 File->CurPos ++;
105                         continue ;
106                 }
107                 
108                 // C-Style Block Comments
109                 if( *File->CurPos == '/' && File->CurPos[1] == '*' ) {
110                         File->CurPos += 2;      // Eat the '/*'
111                         while( *File->CurPos && !(File->CurPos[-1] == '*' && *File->CurPos == '/') )
112                         {
113                                 if( *File->CurPos == '\n' )     File->CurLine ++;
114                                 File->CurPos ++;
115                         }
116                         File->CurPos ++;        // Eat the '/'
117                         continue ;
118                 }
119                 
120                 // No more "whitespace"
121                 break;
122         }
123         
124         // Save previous tokens (speeds up PutBack and LookAhead)
125         File->LastToken = File->Token;
126         File->LastTokenStr = File->TokenStr;
127         File->LastTokenLen = File->TokenLen;
128         File->LastLine = File->CurLine;
129         
130         // Read token
131         File->TokenStr = File->CurPos;
132         switch( *File->CurPos++ )
133         {
134         case '\0':      ret = TOK_EOF;  break;
135         
136         // Operations
137         case '^':
138                 if( *File->CurPos == '^' ) {
139                         File->CurPos ++;
140                         ret = TOK_LOGICXOR;
141                         break;
142                 }
143                 ret = TOK_XOR;
144                 break;
145         
146         case '|':
147                 if( *File->CurPos == '|' ) {
148                         File->CurPos ++;
149                         ret = TOK_LOGICOR;
150                         break;
151                 }
152                 ret = TOK_OR;
153                 break;
154         
155         case '&':
156                 if( *File->CurPos == '&' ) {
157                         File->CurPos ++;
158                         ret = TOK_LOGICAND;
159                         break;
160                 }
161                 ret = TOK_AND;
162                 break;
163         
164         case '/':       ret = TOK_DIV;  break;
165         case '*':       ret = TOK_MUL;  break;
166         case '+':       ret = TOK_PLUS; break;
167         case '-':
168                 if( *File->CurPos == '>' ) {
169                         File->CurPos ++;
170                         ret = TOK_ELEMENT;
171                 }
172                 else
173                         ret = TOK_MINUS;
174                 break;
175         
176         // Strings
177         case '"':
178                 while( *File->CurPos && !(*File->CurPos == '"' && *File->CurPos != '\\') )
179                         File->CurPos ++;
180                 File->CurPos ++;
181                 ret = TOK_STR;
182                 break;
183         
184         // Brackets
185         case '(':       ret = TOK_PAREN_OPEN;   break;
186         case ')':       ret = TOK_PAREN_CLOSE;  break;
187         case '{':       ret = TOK_BRACE_OPEN;   break;
188         case '}':       ret = TOK_BRACE_CLOSE;  break;
189         case '[':       ret = TOK_SQUARE_OPEN;  break;
190         case ']':       ret = TOK_SQUARE_CLOSE; break;
191         
192         // Core symbols
193         case ';':       ret = TOK_SEMICOLON;    break;
194         case ',':       ret = TOK_COMMA;        break;
195         #if USE_SCOPE_CHAR
196         case '.':       ret = TOK_SCOPE;        break;
197         #endif
198         
199         // Equals
200         case '=':
201                 // Comparison Equals
202                 if( *File->CurPos == '=' ) {
203                         File->CurPos ++;
204                         ret = TOK_EQUALS;
205                         break;
206                 }
207                 // Assignment Equals
208                 ret = TOK_ASSIGN;
209                 break;
210         
211         // Variables
212         // \$[0-9]+ or \$[_a-zA-Z][_a-zA-Z0-9]*
213         case '$':
214                 // Numeric Variable
215                 if( isdigit( *File->CurPos ) ) {
216                         while( isdigit(*File->CurPos) )
217                                 File->CurPos ++;
218                 }
219                 // Ident Variable
220                 else {
221                         while( is_ident(*File->CurPos) || isdigit(*File->CurPos) )
222                                 File->CurPos ++;
223                 }
224                 ret = TOK_VARIABLE;
225                 break;
226         
227         // Default (Numbers and Identifiers)
228         default:
229                 File->CurPos --;
230                 
231                 // Numbers
232                 if( isdigit(*File->CurPos) )
233                 {
234                         if( *File->CurPos == '0' && File->CurPos[1] == 'x' ) {
235                                 File->CurPos += 2;
236                                 while(('0' <= *File->CurPos && *File->CurPos <= '9')
237                                    || ('A' <= *File->CurPos && *File->CurPos <= 'F')
238                                    || ('a' <= *File->CurPos && *File->CurPos <= 'f') )
239                                 {
240                                         File->CurPos ++;
241                                 }
242                         }
243                         else {
244                                 while( isdigit(*File->CurPos) )
245                                         File->CurPos ++;
246                         }
247                         ret = TOK_INTEGER;
248                         break;
249                 }
250         
251                 // Identifier
252                 if( is_ident(*File->CurPos) )
253                 {
254                         // Identifier
255                         while( is_ident(*File->CurPos) || isdigit(*File->CurPos) )
256                                 File->CurPos ++;
257                         
258                         // This is set later too, but we use it below
259                         File->TokenLen = File->CurPos - File->TokenStr;
260                         ret = TOK_IDENT;
261                         
262                         // Check if it's a reserved word
263                         {
264                                 char    buf[File->TokenLen + 1];
265                                  int    i;
266                                 memcpy(buf, File->TokenStr, File->TokenLen);
267                                 buf[File->TokenLen] = 0;
268                                 for( i = 0; i < ARRAY_SIZE(csaReservedWords); i ++ )
269                                 {
270                                         if(strcmp(csaReservedWords[i].Name, buf) == 0) {
271                                                 ret = csaReservedWords[i].Value;
272                                                 break ;
273                                         }
274                                 }
275                         }
276                         // If there's no match, just keep ret as TOK_IDENT
277                         
278                         break;
279                 }
280                 // Syntax Error
281                 ret = TOK_INVAL;
282                 
283                 fprintf(stderr, "Syntax Error: Unknown symbol '%c'\n", *File->CurPos);
284                 longjmp(File->JmpTarget, 1);
285                 
286                 break;
287         }
288         // Return
289         File->Token = ret;
290         File->TokenLen = File->CurPos - File->TokenStr;
291         
292         #if DEBUG
293         {
294                 char    buf[ File->TokenLen + 1];
295                 memcpy(buf, File->TokenStr, File->TokenLen);
296                 buf[File->TokenLen] = 0;
297                 //printf("  GetToken: File->CurPos = %p\n", File->CurPos);
298                 printf(" GetToken: Return %i (%i long) (%s)\n", ret, File->TokenLen, buf);
299         }
300         #endif
301         return ret;
302 }
303
304 void PutBack(tParser *File)
305 {
306         if( File->LastToken == -1 ) {
307                 // ERROR:
308                 fprintf(stderr, "INTERNAL ERROR: Putback when LastToken==-1\n");
309                 longjmp( File->JmpTarget, -1 );
310                 return ;
311         }
312         #if DEBUG
313         printf(" PutBack: Was on %i\n", File->Token);
314         #endif
315         // Save
316         File->NextLine = File->CurLine;
317         File->NextToken = File->Token;
318         File->NextTokenStr = File->TokenStr;
319         File->NextTokenLen = File->TokenLen;
320         // Restore
321         File->CurLine = File->LastLine;
322         File->Token = File->LastToken;
323         File->TokenStr = File->LastTokenStr;
324         File->TokenLen = File->LastTokenLen;
325         File->CurPos = File->NextTokenStr;
326         // Invalidate
327         File->LastToken = -1;
328 }
329
330 int LookAhead(tParser *File)
331 {
332         // TODO: Should I save the entire state here?
333          int    ret = GetToken(File);
334         PutBack(File);
335         return ret;
336 }
337
338 // --- Helpers ---
339 /**
340  * \brief Check for ident characters
341  * \note Matches Regex [a-zA-Z_]
342  */
343 int is_ident(char ch)
344 {
345         if('a' <= ch && ch <= 'z')      return 1;
346         if('A' <= ch && ch <= 'Z')      return 1;
347         if(ch == '_')   return 1;
348         #if !USE_SCOPE_CHAR
349         if(ch == '.')   return 1;
350         #endif
351         if(ch < 0)      return 1;
352         return 0;
353 }
354
355 int isdigit(int ch)
356 {
357         if('0' <= ch && ch <= '9')      return 1;
358         return 0;
359 }
360
361 int isspace(int ch)
362 {
363         if(' ' == ch)   return 1;
364         if('\t' == ch)  return 1;
365         if('\b' == ch)  return 1;
366         if('\n' == ch)  return 1;
367         if('\r' == ch)  return 1;
368         return 0;
369 }

UCC git Repository :: git.ucc.asn.au