25a7aa7ec3a08dd50f65e4681d6e89b70042aadc
[tpg/acess2.git] / Usermode / Libraries / libspiderscript.so_src / lex.c
1 /*
2  * SpiderScript
3  * - Script Lexer
4  */
5 #include "tokens.h"
6 #include <stdlib.h>
7 #include <stdio.h>
8 #include <string.h>
9
10 #define USE_SCOPE_CHAR  0
11
12 #define DEBUG   0
13
14 #define ARRAY_SIZE(x)   ((sizeof(x))/(sizeof((x)[0])))
15
16 // === PROTOTYPES ===
17  int    is_ident(char ch);
18  int    isdigit(int ch);
19  int    isspace(int ch);
20  int    GetToken(tParser *File);
21
22 // === CONSTANTS ===
23 const struct {
24         const  int      Value;
25         const char      *Name;
26 } csaReservedWords[] = {
27         {TOK_RWD_FUNCTION, "function"},
28         {TOK_RWD_RETURN, "return"},
29         {TOK_RWD_VOID, "void"},
30         {TOK_RWD_OBJECT, "Object"},
31         {TOK_RWD_INTEGER, "Integer"},
32         {TOK_RWD_REAL, "Real"},
33         {TOK_RWD_STRING, "String"}
34 };
35
36 // === CODE ===
37 /**
38  * \brief Read a token from a buffer
39  * \param File  Parser state
40  */
41 int GetToken(tParser *File)
42 {
43          int    ret;
44         
45         if( File->NextToken != -1 ) {
46                 // Save Last
47                 File->LastToken = File->Token;
48                 File->LastTokenStr = File->TokenStr;
49                 File->LastTokenLen = File->TokenLen;
50                 File->LastLine = File->CurLine;
51                 // Restore Next
52                 File->Token = File->NextToken;
53                 File->TokenStr = File->NextTokenStr;
54                 File->TokenLen = File->NextTokenLen;
55                 File->CurLine = File->NextLine;
56                 // Set State
57                 File->CurPos = File->TokenStr + File->TokenLen;
58                 File->NextToken = -1;
59                 {
60                         char    buf[ File->TokenLen + 1];
61                         memcpy(buf, File->TokenStr, File->TokenLen);
62                         buf[File->TokenLen] = 0;
63                         #if DEBUG
64                         printf(" GetToken: FAST Return %i (%i long) (%s)\n", File->Token, File->TokenLen, buf);
65                         #endif
66                 }
67                 return File->Token;
68         }
69         
70         //printf("  GetToken: File=%p, File->CurPos = %p\n", File, File->CurPos);
71         
72         // Clear whitespace (including comments)
73         for( ;; )
74         {
75                 // Whitespace
76                 while( isspace( *File->CurPos ) )
77                 {
78                         //printf("whitespace 0x%x, line = %i\n", *File->CurPos, File->CurLine);
79                         if( *File->CurPos == '\n' )
80                                 File->CurLine ++;
81                         File->CurPos ++;
82                 }
83                 
84                 // # Line Comments
85                 if( *File->CurPos == '#' ) {
86                         while( *File->CurPos && *File->CurPos != '\n' )
87                                 File->CurPos ++;
88                         continue ;
89                 }
90                 
91                 // C-Style Line Comments
92                 if( *File->CurPos == '/' && File->CurPos[1] == '/' ) {
93                         while( *File->CurPos && *File->CurPos != '\n' )
94                                 File->CurPos ++;
95                         continue ;
96                 }
97                 
98                 // C-Style Block Comments
99                 if( *File->CurPos == '/' && File->CurPos[1] == '*' ) {
100                         File->CurPos += 2;      // Eat the '/*'
101                         while( *File->CurPos && !(File->CurPos[-1] == '*' && *File->CurPos == '/') )
102                         {
103                                 if( *File->CurPos == '\n' )     File->CurLine ++;
104                                 File->CurPos ++;
105                         }
106                         File->CurPos ++;        // Eat the '/'
107                         continue ;
108                 }
109                 
110                 // No more "whitespace"
111                 break;
112         }
113         
114         // Save previous tokens (speeds up PutBack and LookAhead)
115         File->LastToken = File->Token;
116         File->LastTokenStr = File->TokenStr;
117         File->LastTokenLen = File->TokenLen;
118         File->LastLine = File->CurLine;
119         
120         // Read token
121         File->TokenStr = File->CurPos;
122         switch( *File->CurPos++ )
123         {
124         case '\0':      ret = TOK_EOF;  break;
125         
126         // Operations
127         case '/':       ret = TOK_DIV;  break;
128         case '*':       ret = TOK_MUL;  break;
129         case '+':       ret = TOK_PLUS; break;
130         case '-':
131                 if( *File->CurPos == '>' ) {
132                         File->CurPos ++;
133                         ret = TOK_ELEMENT;
134                 }
135                 else
136                         ret = TOK_MINUS;
137                 break;
138         
139         // Strings
140         case '"':
141                 while( *File->CurPos && !(*File->CurPos == '"' && *File->CurPos != '\\') )
142                         File->CurPos ++;
143                 File->CurPos ++;
144                 ret = TOK_STR;
145                 break;
146         
147         // Brackets
148         case '(':       ret = TOK_PAREN_OPEN;   break;
149         case ')':       ret = TOK_PAREN_CLOSE;  break;
150         case '{':       ret = TOK_BRACE_OPEN;   break;
151         case '}':       ret = TOK_BRACE_CLOSE;  break;
152         case '[':       ret = TOK_SQUARE_OPEN;  break;
153         case ']':       ret = TOK_SQUARE_CLOSE; break;
154         
155         // Core symbols
156         case ';':       ret = TOK_SEMICOLON;    break;
157         case ',':       ret = TOK_COMMA;        break;
158         #if USE_SCOPE_CHAR
159         case '.':       ret = TOK_SCOPE;        break;
160         #endif
161         
162         // Equals
163         case '=':
164                 // Comparison Equals
165                 if( *File->CurPos == '=' ) {
166                         File->CurPos ++;
167                         ret = TOK_EQUALS;
168                         break;
169                 }
170                 // Assignment Equals
171                 ret = TOK_ASSIGN;
172                 break;
173         
174         // Variables
175         // \$[0-9]+ or \$[_a-zA-Z][_a-zA-Z0-9]*
176         case '$':
177                 // Numeric Variable
178                 if( isdigit( *File->CurPos ) ) {
179                         while( isdigit(*File->CurPos) )
180                                 File->CurPos ++;
181                 }
182                 // Ident Variable
183                 else {
184                         while( is_ident(*File->CurPos) || isdigit(*File->CurPos) )
185                                 File->CurPos ++;
186                 }
187                 ret = TOK_VARIABLE;
188                 break;
189         
190         // Default (Numbers and Identifiers)
191         default:
192                 File->CurPos --;
193                 // Numbers
194                 if( isdigit(*File->CurPos) )
195                 {
196                         while( isdigit(*File->CurPos) )
197                                 File->CurPos ++;
198                         ret = TOK_INTEGER;
199                         break;
200                 }
201         
202                 // Identifier
203                 if( is_ident(*File->CurPos) )
204                 {
205                         // Identifier
206                         while( is_ident(*File->CurPos) || isdigit(*File->CurPos) )
207                                 File->CurPos ++;
208                         
209                         // This is set later too, but we use it below
210                         File->TokenLen = File->CurPos - File->TokenStr;
211                         ret = TOK_IDENT;
212                         
213                         // Check if it's a reserved word
214                         {
215                                 char    buf[File->TokenLen + 1];
216                                  int    i;
217                                 memcpy(buf, File->TokenStr, File->TokenLen);
218                                 buf[File->TokenLen] = 0;
219                                 for( i = 0; i < ARRAY_SIZE(csaReservedWords); i ++ )
220                                 {
221                                         if(strcmp(csaReservedWords[i].Name, buf) == 0) {
222                                                 ret = csaReservedWords[i].Value;
223                                                 break ;
224                                         }
225                                 }
226                         }
227                         // If there's no match, just keep ret as TOK_IDENT
228                         
229                         break;
230                 }
231                 // Syntax Error
232                 ret = TOK_INVAL;
233                 
234                 fprintf(stderr, "Syntax Error: Unknown symbol '%c'\n", *File->CurPos);
235                 longjmp(File->JmpTarget, 1);
236                 
237                 break;
238         }
239         // Return
240         File->Token = ret;
241         File->TokenLen = File->CurPos - File->TokenStr;
242         
243         #if DEBUG
244         {
245                 char    buf[ File->TokenLen + 1];
246                 memcpy(buf, File->TokenStr, File->TokenLen);
247                 buf[File->TokenLen] = 0;
248                 //printf("  GetToken: File->CurPos = %p\n", File->CurPos);
249                 printf(" GetToken: Return %i (%i long) (%s)\n", ret, File->TokenLen, buf);
250         }
251         #endif
252         return ret;
253 }
254
255 void PutBack(tParser *File)
256 {
257         if( File->LastToken == -1 ) {
258                 // ERROR:
259                 fprintf(stderr, "INTERNAL ERROR: Putback when LastToken==-1\n");
260                 longjmp( File->JmpTarget, -1 );
261                 return ;
262         }
263         #if DEBUG
264         printf(" PutBack: Was on %i\n", File->Token);
265         #endif
266         // Save
267         File->NextLine = File->CurLine;
268         File->NextToken = File->Token;
269         File->NextTokenStr = File->TokenStr;
270         File->NextTokenLen = File->TokenLen;
271         // Restore
272         File->CurLine = File->LastLine;
273         File->Token = File->LastToken;
274         File->TokenStr = File->LastTokenStr;
275         File->TokenLen = File->LastTokenLen;
276         File->CurPos = File->NextTokenStr;
277         // Invalidate
278         File->LastToken = -1;
279 }
280
281 int LookAhead(tParser *File)
282 {
283         // TODO: Should I save the entire state here?
284          int    ret = GetToken(File);
285         PutBack(File);
286         return ret;
287 }
288
289 // --- Helpers ---
290 /**
291  * \brief Check for ident characters
292  * \note Matches Regex [a-zA-Z_]
293  */
294 int is_ident(char ch)
295 {
296         if('a' <= ch && ch <= 'z')      return 1;
297         if('A' <= ch && ch <= 'Z')      return 1;
298         if(ch == '_')   return 1;
299         #if !USE_SCOPE_CHAR
300         if(ch == '.')   return 1;
301         #endif
302         if(ch < 0)      return 1;
303         return 0;
304 }
305
306 int isdigit(int ch)
307 {
308         if('0' <= ch && ch <= '9')      return 1;
309         return 0;
310 }
311
312 int isspace(int ch)
313 {
314         if(' ' == ch)   return 1;
315         if('\t' == ch)  return 1;
316         if('\b' == ch)  return 1;
317         if('\n' == ch)  return 1;
318         if('\r' == ch)  return 1;
319         return 0;
320 }

UCC git Repository :: git.ucc.asn.au