fd395e0f2d188f07a65473ceaea20184630c7555
[tpg/acess2.git] / Usermode / Libraries / libspiderscript.so_src / lex.c
1 /*
2  * SpiderScript
3  * - Script Lexer
4  */
5 #include "tokens.h"
6 #include <stdlib.h>
7 #include <stdio.h>
8 #include <string.h>
9
10 // Make the scope character ('.') be a symbol, otherwise it's just
11 // a ident character
12 #define USE_SCOPE_CHAR  0
13
14 #define DEBUG   0
15
16 #define ARRAY_SIZE(x)   ((sizeof(x))/(sizeof((x)[0])))
17
18 // === PROTOTYPES ===
19  int    is_ident(char ch);
20  int    isdigit(int ch);
21  int    isspace(int ch);
22  int    GetToken(tParser *File);
23
24 // === CONSTANTS ===
25 const struct {
26         const  int      Value;
27         const char      *Name;
28 } csaReservedWords[] = {
29         {TOK_RWD_FUNCTION, "function"},
30         
31         {TOK_RWD_RETURN, "return"},
32         {TOK_RWD_NEW, "new"},
33         
34         {TOK_RWD_IF, "if"},
35         {TOK_RWD_ELSE, "else"},
36         {TOK_RWD_DO, "do"},
37         {TOK_RWD_WHILE, "while"},
38         {TOK_RWD_FOR, "for"},
39         
40         {TOK_RWD_VOID, "void"},
41         {TOK_RWD_OBJECT, "Object"},
42         {TOK_RWD_OPAQUE, "Opaque"},
43         {TOK_RWD_INTEGER, "Integer"},
44         {TOK_RWD_REAL, "Real"},
45         {TOK_RWD_STRING, "String"}
46 };
47
48 // === CODE ===
49 /**
50  * \brief Read a token from a buffer
51  * \param File  Parser state
52  */
53 int GetToken(tParser *File)
54 {
55          int    ret;
56         
57         if( File->NextToken != -1 ) {
58                 // Save Last
59                 File->LastToken = File->Token;
60                 File->LastTokenStr = File->TokenStr;
61                 File->LastTokenLen = File->TokenLen;
62                 File->LastLine = File->CurLine;
63                 // Restore Next
64                 File->Token = File->NextToken;
65                 File->TokenStr = File->NextTokenStr;
66                 File->TokenLen = File->NextTokenLen;
67                 File->CurLine = File->NextLine;
68                 // Set State
69                 File->CurPos = File->TokenStr + File->TokenLen;
70                 File->NextToken = -1;
71                 {
72                         char    buf[ File->TokenLen + 1];
73                         memcpy(buf, File->TokenStr, File->TokenLen);
74                         buf[File->TokenLen] = 0;
75                         #if DEBUG
76                         printf(" GetToken: FAST Return %i (%i long) (%s)\n", File->Token, File->TokenLen, buf);
77                         #endif
78                 }
79                 return File->Token;
80         }
81         
82         //printf("  GetToken: File=%p, File->CurPos = %p\n", File, File->CurPos);
83         
84         // Clear whitespace (including comments)
85         for( ;; )
86         {
87                 // Whitespace
88                 while( isspace( *File->CurPos ) )
89                 {
90                         //printf("whitespace 0x%x, line = %i\n", *File->CurPos, File->CurLine);
91                         if( *File->CurPos == '\n' )
92                                 File->CurLine ++;
93                         File->CurPos ++;
94                 }
95                 
96                 // # Line Comments
97                 if( *File->CurPos == '#' ) {
98                         while( *File->CurPos && *File->CurPos != '\n' )
99                                 File->CurPos ++;
100                         continue ;
101                 }
102                 
103                 // C-Style Line Comments
104                 if( *File->CurPos == '/' && File->CurPos[1] == '/' ) {
105                         while( *File->CurPos && *File->CurPos != '\n' )
106                                 File->CurPos ++;
107                         continue ;
108                 }
109                 
110                 // C-Style Block Comments
111                 if( *File->CurPos == '/' && File->CurPos[1] == '*' ) {
112                         File->CurPos += 2;      // Eat the '/*'
113                         while( *File->CurPos && !(File->CurPos[-1] == '*' && *File->CurPos == '/') )
114                         {
115                                 if( *File->CurPos == '\n' )     File->CurLine ++;
116                                 File->CurPos ++;
117                         }
118                         File->CurPos ++;        // Eat the '/'
119                         continue ;
120                 }
121                 
122                 // No more "whitespace"
123                 break;
124         }
125         
126         // Save previous tokens (speeds up PutBack and LookAhead)
127         File->LastToken = File->Token;
128         File->LastTokenStr = File->TokenStr;
129         File->LastTokenLen = File->TokenLen;
130         File->LastLine = File->CurLine;
131         
132         // Read token
133         File->TokenStr = File->CurPos;
134         switch( *File->CurPos++ )
135         {
136         case '\0':      ret = TOK_EOF;  break;
137         
138         // Operations
139         case '^':
140                 if( *File->CurPos == '^' ) {
141                         File->CurPos ++;
142                         ret = TOK_LOGICXOR;
143                         break;
144                 }
145                 ret = TOK_XOR;
146                 break;
147         
148         case '|':
149                 if( *File->CurPos == '|' ) {
150                         File->CurPos ++;
151                         ret = TOK_LOGICOR;
152                         break;
153                 }
154                 ret = TOK_OR;
155                 break;
156         
157         case '&':
158                 if( *File->CurPos == '&' ) {
159                         File->CurPos ++;
160                         ret = TOK_LOGICAND;
161                         break;
162                 }
163                 ret = TOK_AND;
164                 break;
165         
166         case '/':       ret = TOK_DIV;  break;
167         case '*':       ret = TOK_MUL;  break;
168         case '+':       ret = TOK_PLUS; break;
169         case '-':
170                 if( *File->CurPos == '>' ) {
171                         File->CurPos ++;
172                         ret = TOK_ELEMENT;
173                 }
174                 else
175                         ret = TOK_MINUS;
176                 break;
177         
178         // Strings
179         case '"':
180                 while( *File->CurPos && !(*File->CurPos == '"' && *File->CurPos != '\\') )
181                         File->CurPos ++;
182                 File->CurPos ++;
183                 ret = TOK_STR;
184                 break;
185         
186         // Brackets
187         case '(':       ret = TOK_PAREN_OPEN;   break;
188         case ')':       ret = TOK_PAREN_CLOSE;  break;
189         case '{':       ret = TOK_BRACE_OPEN;   break;
190         case '}':       ret = TOK_BRACE_CLOSE;  break;
191         case '[':       ret = TOK_SQUARE_OPEN;  break;
192         case ']':       ret = TOK_SQUARE_CLOSE; break;
193         
194         // Core symbols
195         case ';':       ret = TOK_SEMICOLON;    break;
196         case ',':       ret = TOK_COMMA;        break;
197         #if USE_SCOPE_CHAR
198         case '.':       ret = TOK_SCOPE;        break;
199         #endif
200         
201         // Equals
202         case '=':
203                 // Comparison Equals
204                 if( *File->CurPos == '=' ) {
205                         File->CurPos ++;
206                         ret = TOK_EQUALS;
207                         break;
208                 }
209                 // Assignment Equals
210                 ret = TOK_ASSIGN;
211                 break;
212         
213         // Variables
214         // \$[0-9]+ or \$[_a-zA-Z][_a-zA-Z0-9]*
215         case '$':
216                 // Numeric Variable
217                 if( isdigit( *File->CurPos ) ) {
218                         while( isdigit(*File->CurPos) )
219                                 File->CurPos ++;
220                 }
221                 // Ident Variable
222                 else {
223                         while( is_ident(*File->CurPos) || isdigit(*File->CurPos) )
224                                 File->CurPos ++;
225                 }
226                 ret = TOK_VARIABLE;
227                 break;
228         
229         // Default (Numbers and Identifiers)
230         default:
231                 File->CurPos --;
232                 
233                 // Numbers
234                 if( isdigit(*File->CurPos) )
235                 {
236                         if( *File->CurPos == '0' && File->CurPos[1] == 'x' ) {
237                                 File->CurPos += 2;
238                                 while(('0' <= *File->CurPos && *File->CurPos <= '9')
239                                    || ('A' <= *File->CurPos && *File->CurPos <= 'F')
240                                    || ('a' <= *File->CurPos && *File->CurPos <= 'f') )
241                                 {
242                                         File->CurPos ++;
243                                 }
244                         }
245                         else {
246                                 while( isdigit(*File->CurPos) )
247                                         File->CurPos ++;
248                         }
249                         ret = TOK_INTEGER;
250                         break;
251                 }
252         
253                 // Identifier
254                 if( is_ident(*File->CurPos) )
255                 {
256                         // Identifier
257                         while( is_ident(*File->CurPos) || isdigit(*File->CurPos) )
258                                 File->CurPos ++;
259                         
260                         // This is set later too, but we use it below
261                         File->TokenLen = File->CurPos - File->TokenStr;
262                         ret = TOK_IDENT;
263                         
264                         // Check if it's a reserved word
265                         {
266                                 char    buf[File->TokenLen + 1];
267                                  int    i;
268                                 memcpy(buf, File->TokenStr, File->TokenLen);
269                                 buf[File->TokenLen] = 0;
270                                 for( i = 0; i < ARRAY_SIZE(csaReservedWords); i ++ )
271                                 {
272                                         if(strcmp(csaReservedWords[i].Name, buf) == 0) {
273                                                 ret = csaReservedWords[i].Value;
274                                                 break ;
275                                         }
276                                 }
277                         }
278                         // If there's no match, just keep ret as TOK_IDENT
279                         
280                         break;
281                 }
282                 // Syntax Error
283                 ret = TOK_INVAL;
284                 
285                 fprintf(stderr, "Syntax Error: Unknown symbol '%c'\n", *File->CurPos);
286                 longjmp(File->JmpTarget, 1);
287                 
288                 break;
289         }
290         // Return
291         File->Token = ret;
292         File->TokenLen = File->CurPos - File->TokenStr;
293         
294         #if DEBUG
295         {
296                 char    buf[ File->TokenLen + 1];
297                 memcpy(buf, File->TokenStr, File->TokenLen);
298                 buf[File->TokenLen] = 0;
299                 //printf("  GetToken: File->CurPos = %p\n", File->CurPos);
300                 printf(" GetToken: Return %i (%i long) (%s)\n", ret, File->TokenLen, buf);
301         }
302         #endif
303         return ret;
304 }
305
306 void PutBack(tParser *File)
307 {
308         if( File->LastToken == -1 ) {
309                 // ERROR:
310                 fprintf(stderr, "INTERNAL ERROR: Putback when LastToken==-1\n");
311                 longjmp( File->JmpTarget, -1 );
312                 return ;
313         }
314         #if DEBUG
315         printf(" PutBack: Was on %i\n", File->Token);
316         #endif
317         // Save
318         File->NextLine = File->CurLine;
319         File->NextToken = File->Token;
320         File->NextTokenStr = File->TokenStr;
321         File->NextTokenLen = File->TokenLen;
322         // Restore
323         File->CurLine = File->LastLine;
324         File->Token = File->LastToken;
325         File->TokenStr = File->LastTokenStr;
326         File->TokenLen = File->LastTokenLen;
327         File->CurPos = File->NextTokenStr;
328         // Invalidate
329         File->LastToken = -1;
330 }
331
332 int LookAhead(tParser *File)
333 {
334         // TODO: Should I save the entire state here?
335          int    ret = GetToken(File);
336         PutBack(File);
337         return ret;
338 }
339
340 // --- Helpers ---
341 /**
342  * \brief Check for ident characters
343  * \note Matches Regex [a-zA-Z_]
344  */
345 int is_ident(char ch)
346 {
347         if('a' <= ch && ch <= 'z')      return 1;
348         if('A' <= ch && ch <= 'Z')      return 1;
349         if(ch == '_')   return 1;
350         #if !USE_SCOPE_CHAR
351         if(ch == '.')   return 1;
352         #endif
353         if(ch < 0)      return 1;
354         return 0;
355 }
356
357 int isdigit(int ch)
358 {
359         if('0' <= ch && ch <= '9')      return 1;
360         return 0;
361 }
362
363 int isspace(int ch)
364 {
365         if(' ' == ch)   return 1;
366         if('\t' == ch)  return 1;
367         if('\b' == ch)  return 1;
368         if('\n' == ch)  return 1;
369         if('\r' == ch)  return 1;
370         return 0;
371 }

UCC git Repository :: git.ucc.asn.au