10 // Make the scope character ('.') be a symbol, otherwise it's just
12 #define USE_SCOPE_CHAR 0
16 #define ARRAY_SIZE(x) ((sizeof(x))/(sizeof((x)[0])))
19 int is_ident(char ch);
22 int GetToken(tParser *File);
28 } csaReservedWords[] = {
29 {TOK_RWD_FUNCTION, "function"},
31 {TOK_RWD_RETURN, "return"},
35 {TOK_RWD_ELSE, "else"},
37 {TOK_RWD_WHILE, "while"},
40 {TOK_RWD_VOID, "void"},
41 {TOK_RWD_OBJECT, "Object"},
42 {TOK_RWD_OPAQUE, "Opaque"},
43 {TOK_RWD_INTEGER, "Integer"},
44 {TOK_RWD_REAL, "Real"},
45 {TOK_RWD_STRING, "String"}
50 * \brief Read a token from a buffer
51 * \param File Parser state
53 int GetToken(tParser *File)
57 if( File->NextToken != -1 ) {
59 File->LastToken = File->Token;
60 File->LastTokenStr = File->TokenStr;
61 File->LastTokenLen = File->TokenLen;
62 File->LastLine = File->CurLine;
64 File->Token = File->NextToken;
65 File->TokenStr = File->NextTokenStr;
66 File->TokenLen = File->NextTokenLen;
67 File->CurLine = File->NextLine;
69 File->CurPos = File->TokenStr + File->TokenLen;
72 char buf[ File->TokenLen + 1];
73 memcpy(buf, File->TokenStr, File->TokenLen);
74 buf[File->TokenLen] = 0;
76 printf(" GetToken: FAST Return %i (%i long) (%s)\n", File->Token, File->TokenLen, buf);
82 //printf(" GetToken: File=%p, File->CurPos = %p\n", File, File->CurPos);
84 // Clear whitespace (including comments)
88 while( isspace( *File->CurPos ) )
90 //printf("whitespace 0x%x, line = %i\n", *File->CurPos, File->CurLine);
91 if( *File->CurPos == '\n' )
97 if( *File->CurPos == '#' ) {
98 while( *File->CurPos && *File->CurPos != '\n' )
103 // C-Style Line Comments
104 if( *File->CurPos == '/' && File->CurPos[1] == '/' ) {
105 while( *File->CurPos && *File->CurPos != '\n' )
110 // C-Style Block Comments
111 if( *File->CurPos == '/' && File->CurPos[1] == '*' ) {
112 File->CurPos += 2; // Eat the '/*'
113 while( *File->CurPos && !(File->CurPos[-1] == '*' && *File->CurPos == '/') )
115 if( *File->CurPos == '\n' ) File->CurLine ++;
118 File->CurPos ++; // Eat the '/'
122 // No more "whitespace"
126 // Save previous tokens (speeds up PutBack and LookAhead)
127 File->LastToken = File->Token;
128 File->LastTokenStr = File->TokenStr;
129 File->LastTokenLen = File->TokenLen;
130 File->LastLine = File->CurLine;
133 File->TokenStr = File->CurPos;
134 switch( *File->CurPos++ )
136 case '\0': ret = TOK_EOF; break;
140 if( *File->CurPos == '^' ) {
149 if( *File->CurPos == '|' ) {
158 if( *File->CurPos == '&' ) {
166 case '/': ret = TOK_DIV; break;
167 case '*': ret = TOK_MUL; break;
169 if( *File->CurPos == '+' ) {
174 if( *File->CurPos == '=' ) {
176 ret = TOK_ASSIGN_PLUS;
182 if( *File->CurPos == '-' ) {
187 if( *File->CurPos == '=' ) {
189 ret = TOK_ASSIGN_MINUS;
192 if( *File->CurPos == '>' ) {
202 while( *File->CurPos && !(*File->CurPos == '"' && *File->CurPos != '\\') )
209 case '(': ret = TOK_PAREN_OPEN; break;
210 case ')': ret = TOK_PAREN_CLOSE; break;
211 case '{': ret = TOK_BRACE_OPEN; break;
212 case '}': ret = TOK_BRACE_CLOSE; break;
213 case '[': ret = TOK_SQUARE_OPEN; break;
214 case ']': ret = TOK_SQUARE_CLOSE; break;
217 case ';': ret = TOK_SEMICOLON; break;
218 case ',': ret = TOK_COMMA; break;
220 case '.': ret = TOK_SCOPE; break;
226 if( *File->CurPos == '=' ) {
237 // Less-Than or Equal
238 if( *File->CurPos == '=' ) {
248 // Greater-Than or Equal
249 if( *File->CurPos == '=' ) {
258 // \$[0-9]+ or \$[_a-zA-Z][_a-zA-Z0-9]*
261 if( isdigit( *File->CurPos ) ) {
262 while( isdigit(*File->CurPos) )
267 while( is_ident(*File->CurPos) || isdigit(*File->CurPos) )
273 // Default (Numbers and Identifiers)
278 if( isdigit(*File->CurPos) )
280 if( *File->CurPos == '0' && File->CurPos[1] == 'x' ) {
282 while(('0' <= *File->CurPos && *File->CurPos <= '9')
283 || ('A' <= *File->CurPos && *File->CurPos <= 'F')
284 || ('a' <= *File->CurPos && *File->CurPos <= 'f') )
290 while( isdigit(*File->CurPos) )
298 if( is_ident(*File->CurPos) )
301 while( is_ident(*File->CurPos) || isdigit(*File->CurPos) )
304 // This is set later too, but we use it below
305 File->TokenLen = File->CurPos - File->TokenStr;
308 // Check if it's a reserved word
310 char buf[File->TokenLen + 1];
312 memcpy(buf, File->TokenStr, File->TokenLen);
313 buf[File->TokenLen] = 0;
314 for( i = 0; i < ARRAY_SIZE(csaReservedWords); i ++ )
316 if(strcmp(csaReservedWords[i].Name, buf) == 0) {
317 ret = csaReservedWords[i].Value;
322 // If there's no match, just keep ret as TOK_IDENT
329 fprintf(stderr, "Syntax Error: Unknown symbol '%c'\n", *File->CurPos);
330 longjmp(File->JmpTarget, 1);
336 File->TokenLen = File->CurPos - File->TokenStr;
340 char buf[ File->TokenLen + 1];
341 memcpy(buf, File->TokenStr, File->TokenLen);
342 buf[File->TokenLen] = 0;
343 //printf(" GetToken: File->CurPos = %p\n", File->CurPos);
344 printf(" GetToken: Return %i (%i long) (%s)\n", ret, File->TokenLen, buf);
350 void PutBack(tParser *File)
352 if( File->LastToken == -1 ) {
354 fprintf(stderr, "INTERNAL ERROR: Putback when LastToken==-1\n");
355 longjmp( File->JmpTarget, -1 );
359 printf(" PutBack: Was on %i\n", File->Token);
362 File->NextLine = File->CurLine;
363 File->NextToken = File->Token;
364 File->NextTokenStr = File->TokenStr;
365 File->NextTokenLen = File->TokenLen;
367 File->CurLine = File->LastLine;
368 File->Token = File->LastToken;
369 File->TokenStr = File->LastTokenStr;
370 File->TokenLen = File->LastTokenLen;
371 File->CurPos = File->NextTokenStr;
373 File->LastToken = -1;
376 int LookAhead(tParser *File)
378 // TODO: Should I save the entire state here?
379 int ret = GetToken(File);
386 * \brief Check for ident characters
387 * \note Matches Regex [a-zA-Z_]
389 int is_ident(char ch)
391 if('a' <= ch && ch <= 'z') return 1;
392 if('A' <= ch && ch <= 'Z') return 1;
393 if(ch == '_') return 1;
395 if(ch == '.') return 1;
403 if('0' <= ch && ch <= '9') return 1;
409 if(' ' == ch) return 1;
410 if('\t' == ch) return 1;
411 if('\b' == ch) return 1;
412 if('\n' == ch) return 1;
413 if('\r' == ch) return 1;