10 #define USE_SCOPE_CHAR 0
14 #define ARRAY_SIZE(x) ((sizeof(x))/(sizeof((x)[0])))
17 int is_ident(char ch);
20 int GetToken(tParser *File);
26 } csaReservedWords[] = {
27 {TOK_RWD_FUNCTION, "function"},
29 {TOK_RWD_RETURN, "return"},
33 {TOK_RWD_ELSE, "else"},
35 {TOK_RWD_WHILE, "while"},
38 {TOK_RWD_VOID, "void"},
39 {TOK_RWD_OBJECT, "Object"},
40 {TOK_RWD_OPAQUE, "Opaque"},
41 {TOK_RWD_INTEGER, "Integer"},
42 {TOK_RWD_REAL, "Real"},
43 {TOK_RWD_STRING, "String"}
48 * \brief Read a token from a buffer
49 * \param File Parser state
51 int GetToken(tParser *File)
55 if( File->NextToken != -1 ) {
57 File->LastToken = File->Token;
58 File->LastTokenStr = File->TokenStr;
59 File->LastTokenLen = File->TokenLen;
60 File->LastLine = File->CurLine;
62 File->Token = File->NextToken;
63 File->TokenStr = File->NextTokenStr;
64 File->TokenLen = File->NextTokenLen;
65 File->CurLine = File->NextLine;
67 File->CurPos = File->TokenStr + File->TokenLen;
70 char buf[ File->TokenLen + 1];
71 memcpy(buf, File->TokenStr, File->TokenLen);
72 buf[File->TokenLen] = 0;
74 printf(" GetToken: FAST Return %i (%i long) (%s)\n", File->Token, File->TokenLen, buf);
80 //printf(" GetToken: File=%p, File->CurPos = %p\n", File, File->CurPos);
82 // Clear whitespace (including comments)
86 while( isspace( *File->CurPos ) )
88 //printf("whitespace 0x%x, line = %i\n", *File->CurPos, File->CurLine);
89 if( *File->CurPos == '\n' )
95 if( *File->CurPos == '#' ) {
96 while( *File->CurPos && *File->CurPos != '\n' )
101 // C-Style Line Comments
102 if( *File->CurPos == '/' && File->CurPos[1] == '/' ) {
103 while( *File->CurPos && *File->CurPos != '\n' )
108 // C-Style Block Comments
109 if( *File->CurPos == '/' && File->CurPos[1] == '*' ) {
110 File->CurPos += 2; // Eat the '/*'
111 while( *File->CurPos && !(File->CurPos[-1] == '*' && *File->CurPos == '/') )
113 if( *File->CurPos == '\n' ) File->CurLine ++;
116 File->CurPos ++; // Eat the '/'
120 // No more "whitespace"
124 // Save previous tokens (speeds up PutBack and LookAhead)
125 File->LastToken = File->Token;
126 File->LastTokenStr = File->TokenStr;
127 File->LastTokenLen = File->TokenLen;
128 File->LastLine = File->CurLine;
131 File->TokenStr = File->CurPos;
132 switch( *File->CurPos++ )
134 case '\0': ret = TOK_EOF; break;
138 if( *File->CurPos == '^' ) {
147 if( *File->CurPos == '|' ) {
156 if( *File->CurPos == '&' ) {
164 case '/': ret = TOK_DIV; break;
165 case '*': ret = TOK_MUL; break;
166 case '+': ret = TOK_PLUS; break;
168 if( *File->CurPos == '>' ) {
178 while( *File->CurPos && !(*File->CurPos == '"' && *File->CurPos != '\\') )
185 case '(': ret = TOK_PAREN_OPEN; break;
186 case ')': ret = TOK_PAREN_CLOSE; break;
187 case '{': ret = TOK_BRACE_OPEN; break;
188 case '}': ret = TOK_BRACE_CLOSE; break;
189 case '[': ret = TOK_SQUARE_OPEN; break;
190 case ']': ret = TOK_SQUARE_CLOSE; break;
193 case ';': ret = TOK_SEMICOLON; break;
194 case ',': ret = TOK_COMMA; break;
196 case '.': ret = TOK_SCOPE; break;
202 if( *File->CurPos == '=' ) {
212 // \$[0-9]+ or \$[_a-zA-Z][_a-zA-Z0-9]*
215 if( isdigit( *File->CurPos ) ) {
216 while( isdigit(*File->CurPos) )
221 while( is_ident(*File->CurPos) || isdigit(*File->CurPos) )
227 // Default (Numbers and Identifiers)
232 if( isdigit(*File->CurPos) )
234 if( *File->CurPos == '0' && File->CurPos[1] == 'x' ) {
236 while(('0' <= *File->CurPos && *File->CurPos <= '9')
237 || ('A' <= *File->CurPos && *File->CurPos <= 'F')
238 || ('a' <= *File->CurPos && *File->CurPos <= 'f') )
244 while( isdigit(*File->CurPos) )
252 if( is_ident(*File->CurPos) )
255 while( is_ident(*File->CurPos) || isdigit(*File->CurPos) )
258 // This is set later too, but we use it below
259 File->TokenLen = File->CurPos - File->TokenStr;
262 // Check if it's a reserved word
264 char buf[File->TokenLen + 1];
266 memcpy(buf, File->TokenStr, File->TokenLen);
267 buf[File->TokenLen] = 0;
268 for( i = 0; i < ARRAY_SIZE(csaReservedWords); i ++ )
270 if(strcmp(csaReservedWords[i].Name, buf) == 0) {
271 ret = csaReservedWords[i].Value;
276 // If there's no match, just keep ret as TOK_IDENT
283 fprintf(stderr, "Syntax Error: Unknown symbol '%c'\n", *File->CurPos);
284 longjmp(File->JmpTarget, 1);
290 File->TokenLen = File->CurPos - File->TokenStr;
294 char buf[ File->TokenLen + 1];
295 memcpy(buf, File->TokenStr, File->TokenLen);
296 buf[File->TokenLen] = 0;
297 //printf(" GetToken: File->CurPos = %p\n", File->CurPos);
298 printf(" GetToken: Return %i (%i long) (%s)\n", ret, File->TokenLen, buf);
304 void PutBack(tParser *File)
306 if( File->LastToken == -1 ) {
308 fprintf(stderr, "INTERNAL ERROR: Putback when LastToken==-1\n");
309 longjmp( File->JmpTarget, -1 );
313 printf(" PutBack: Was on %i\n", File->Token);
316 File->NextLine = File->CurLine;
317 File->NextToken = File->Token;
318 File->NextTokenStr = File->TokenStr;
319 File->NextTokenLen = File->TokenLen;
321 File->CurLine = File->LastLine;
322 File->Token = File->LastToken;
323 File->TokenStr = File->LastTokenStr;
324 File->TokenLen = File->LastTokenLen;
325 File->CurPos = File->NextTokenStr;
327 File->LastToken = -1;
330 int LookAhead(tParser *File)
332 // TODO: Should I save the entire state here?
333 int ret = GetToken(File);
340 * \brief Check for ident characters
341 * \note Matches Regex [a-zA-Z_]
343 int is_ident(char ch)
345 if('a' <= ch && ch <= 'z') return 1;
346 if('A' <= ch && ch <= 'Z') return 1;
347 if(ch == '_') return 1;
349 if(ch == '.') return 1;
357 if('0' <= ch && ch <= '9') return 1;
363 if(' ' == ch) return 1;
364 if('\t' == ch) return 1;
365 if('\b' == ch) return 1;
366 if('\n' == ch) return 1;
367 if('\r' == ch) return 1;