10 // Make the scope character ('.') be a symbol, otherwise it's just
12 #define USE_SCOPE_CHAR 0
16 #define ARRAY_SIZE(x) ((sizeof(x))/(sizeof((x)[0])))
19 int is_ident(char ch);
22 int GetToken(tParser *File);
28 } csaReservedWords[] = {
29 {TOK_RWD_FUNCTION, "function"},
31 {TOK_RWD_RETURN, "return"},
35 {TOK_RWD_ELSE, "else"},
37 {TOK_RWD_WHILE, "while"},
40 {TOK_RWD_VOID, "void"},
41 {TOK_RWD_OBJECT, "Object"},
42 {TOK_RWD_OPAQUE, "Opaque"},
43 {TOK_RWD_INTEGER, "Integer"},
44 {TOK_RWD_REAL, "Real"},
45 {TOK_RWD_STRING, "String"}
50 * \brief Read a token from a buffer
51 * \param File Parser state
53 int GetToken(tParser *File)
57 if( File->NextToken != -1 ) {
59 File->LastToken = File->Token;
60 File->LastTokenStr = File->TokenStr;
61 File->LastTokenLen = File->TokenLen;
62 File->LastLine = File->CurLine;
64 File->Token = File->NextToken;
65 File->TokenStr = File->NextTokenStr;
66 File->TokenLen = File->NextTokenLen;
67 File->CurLine = File->NextLine;
69 File->CurPos = File->TokenStr + File->TokenLen;
72 char buf[ File->TokenLen + 1];
73 memcpy(buf, File->TokenStr, File->TokenLen);
74 buf[File->TokenLen] = 0;
76 printf(" GetToken: FAST Return %i (%i long) (%s)\n", File->Token, File->TokenLen, buf);
82 //printf(" GetToken: File=%p, File->CurPos = %p\n", File, File->CurPos);
84 // Clear whitespace (including comments)
88 while( isspace( *File->CurPos ) )
90 //printf("whitespace 0x%x, line = %i\n", *File->CurPos, File->CurLine);
91 if( *File->CurPos == '\n' )
97 if( *File->CurPos == '#' ) {
98 while( *File->CurPos && *File->CurPos != '\n' )
103 // C-Style Line Comments
104 if( *File->CurPos == '/' && File->CurPos[1] == '/' ) {
105 while( *File->CurPos && *File->CurPos != '\n' )
110 // C-Style Block Comments
111 if( *File->CurPos == '/' && File->CurPos[1] == '*' ) {
112 File->CurPos += 2; // Eat the '/*'
113 while( *File->CurPos && !(File->CurPos[-1] == '*' && *File->CurPos == '/') )
115 if( *File->CurPos == '\n' ) File->CurLine ++;
118 File->CurPos ++; // Eat the '/'
122 // No more "whitespace"
126 // Save previous tokens (speeds up PutBack and LookAhead)
127 File->LastToken = File->Token;
128 File->LastTokenStr = File->TokenStr;
129 File->LastTokenLen = File->TokenLen;
130 File->LastLine = File->CurLine;
133 File->TokenStr = File->CurPos;
134 switch( *File->CurPos++ )
136 case '\0': ret = TOK_EOF; break;
140 if( *File->CurPos == '^' ) {
149 if( *File->CurPos == '|' ) {
158 if( *File->CurPos == '&' ) {
167 if( *File->CurPos == '=' ) {
169 ret = TOK_ASSIGN_DIV;
175 if( *File->CurPos == '=' ) {
177 ret = TOK_ASSIGN_MUL;
183 if( *File->CurPos == '+' ) {
188 if( *File->CurPos == '=' ) {
190 ret = TOK_ASSIGN_PLUS;
196 if( *File->CurPos == '-' ) {
201 if( *File->CurPos == '=' ) {
203 ret = TOK_ASSIGN_MINUS;
206 if( *File->CurPos == '>' ) {
216 while( *File->CurPos && !(*File->CurPos == '"' && *File->CurPos != '\\') )
228 case '(': ret = TOK_PAREN_OPEN; break;
229 case ')': ret = TOK_PAREN_CLOSE; break;
230 case '{': ret = TOK_BRACE_OPEN; break;
231 case '}': ret = TOK_BRACE_CLOSE; break;
232 case '[': ret = TOK_SQUARE_OPEN; break;
233 case ']': ret = TOK_SQUARE_CLOSE; break;
236 case ';': ret = TOK_SEMICOLON; break;
237 case ',': ret = TOK_COMMA; break;
239 case '.': ret = TOK_SCOPE; break;
245 if( *File->CurPos == '=' ) {
256 // Less-Than or Equal
257 if( *File->CurPos == '=' ) {
267 // Greater-Than or Equal
268 if( *File->CurPos == '=' ) {
286 // \$[0-9]+ or \$[_a-zA-Z][_a-zA-Z0-9]*
289 if( isdigit( *File->CurPos ) ) {
290 while( isdigit(*File->CurPos) )
295 while( is_ident(*File->CurPos) || isdigit(*File->CurPos) )
301 // Default (Numbers and Identifiers)
306 if( isdigit(*File->CurPos) )
309 if( *File->CurPos == '0' && File->CurPos[1] == 'x' )
312 while(('0' <= *File->CurPos && *File->CurPos <= '9')
313 || ('A' <= *File->CurPos && *File->CurPos <= 'F')
314 || ('a' <= *File->CurPos && *File->CurPos <= 'f') )
321 while( isdigit(*File->CurPos) )
324 // printf("*File->CurPos = '%c'\n", *File->CurPos);
327 if( *File->CurPos == '.' )
331 while( isdigit(*File->CurPos) )
335 if( *File->CurPos == 'e' || *File->CurPos == 'E' )
339 if(*File->CurPos == '-' || *File->CurPos == '+')
341 while( isdigit(*File->CurPos) )
345 // printf(" ret = %i\n", ret);
351 if( is_ident(*File->CurPos) )
356 while( is_ident(*File->CurPos) || isdigit(*File->CurPos) )
359 // This is set later too, but we use it below
360 File->TokenLen = File->CurPos - File->TokenStr;
362 // Check if it's a reserved word
364 char buf[File->TokenLen + 1];
366 memcpy(buf, File->TokenStr, File->TokenLen);
367 buf[File->TokenLen] = 0;
368 for( i = 0; i < ARRAY_SIZE(csaReservedWords); i ++ )
370 if(strcmp(csaReservedWords[i].Name, buf) == 0) {
371 ret = csaReservedWords[i].Value;
376 // If there's no match, just keep ret as TOK_IDENT
383 fprintf(stderr, "Syntax Error: Unknown symbol '%c'\n", *File->CurPos);
384 longjmp(File->JmpTarget, 1);
390 File->TokenLen = File->CurPos - File->TokenStr;
394 char buf[ File->TokenLen + 1];
395 memcpy(buf, File->TokenStr, File->TokenLen);
396 buf[File->TokenLen] = 0;
397 //printf(" GetToken: File->CurPos = %p\n", File->CurPos);
398 printf(" GetToken: Return %i (%i long) (%s)\n", ret, File->TokenLen, buf);
404 void PutBack(tParser *File)
406 if( File->LastToken == -1 ) {
408 fprintf(stderr, "INTERNAL ERROR: Putback when LastToken==-1\n");
409 longjmp( File->JmpTarget, -1 );
413 printf(" PutBack: Was on %i\n", File->Token);
416 File->NextLine = File->CurLine;
417 File->NextToken = File->Token;
418 File->NextTokenStr = File->TokenStr;
419 File->NextTokenLen = File->TokenLen;
421 File->CurLine = File->LastLine;
422 File->Token = File->LastToken;
423 File->TokenStr = File->LastTokenStr;
424 File->TokenLen = File->LastTokenLen;
425 File->CurPos = File->NextTokenStr;
427 File->LastToken = -1;
430 int LookAhead(tParser *File)
432 // TODO: Should I save the entire state here?
433 int ret = GetToken(File);
440 * \brief Check for ident characters
441 * \note Matches Regex [a-zA-Z_]
443 int is_ident(char ch)
445 if('a' <= ch && ch <= 'z') return 1;
446 if('A' <= ch && ch <= 'Z') return 1;
447 if(ch == '_') return 1;
449 if(ch == '.') return 1;
457 if('0' <= ch && ch <= '9') return 1;
463 if(' ' == ch) return 1;
464 if('\t' == ch) return 1;
465 if('\b' == ch) return 1;
466 if('\n' == ch) return 1;
467 if('\r' == ch) return 1;