10 // Make the scope character ('.') be a symbol, otherwise it's just
12 #define USE_SCOPE_CHAR 0
16 #define ARRAY_SIZE(x) ((sizeof(x))/(sizeof((x)[0])))
19 int is_ident(char ch);
22 int GetToken(tParser *File);
28 } csaReservedWords[] = {
29 {TOK_RWD_FUNCTION, "function"},
31 {TOK_RWD_RETURN, "return"},
35 {TOK_RWD_ELSE, "else"},
37 {TOK_RWD_WHILE, "while"},
40 {TOK_RWD_VOID, "void"},
41 {TOK_RWD_OBJECT, "Object"},
42 {TOK_RWD_OPAQUE, "Opaque"},
43 {TOK_RWD_INTEGER, "Integer"},
44 {TOK_RWD_REAL, "Real"},
45 {TOK_RWD_STRING, "String"}
50 * \brief Read a token from a buffer
51 * \param File Parser state
53 int GetToken(tParser *File)
57 if( File->NextToken != -1 ) {
59 File->LastToken = File->Token;
60 File->LastTokenStr = File->TokenStr;
61 File->LastTokenLen = File->TokenLen;
62 File->LastLine = File->CurLine;
64 File->Token = File->NextToken;
65 File->TokenStr = File->NextTokenStr;
66 File->TokenLen = File->NextTokenLen;
67 File->CurLine = File->NextLine;
69 File->CurPos = File->TokenStr + File->TokenLen;
72 char buf[ File->TokenLen + 1];
73 memcpy(buf, File->TokenStr, File->TokenLen);
74 buf[File->TokenLen] = 0;
76 printf(" GetToken: FAST Return %i (%i long) (%s)\n", File->Token, File->TokenLen, buf);
82 //printf(" GetToken: File=%p, File->CurPos = %p\n", File, File->CurPos);
84 // Clear whitespace (including comments)
88 while( isspace( *File->CurPos ) )
90 //printf("whitespace 0x%x, line = %i\n", *File->CurPos, File->CurLine);
91 if( *File->CurPos == '\n' )
97 if( *File->CurPos == '#' ) {
98 while( *File->CurPos && *File->CurPos != '\n' )
103 // C-Style Line Comments
104 if( *File->CurPos == '/' && File->CurPos[1] == '/' ) {
105 while( *File->CurPos && *File->CurPos != '\n' )
110 // C-Style Block Comments
111 if( *File->CurPos == '/' && File->CurPos[1] == '*' ) {
112 File->CurPos += 2; // Eat the '/*'
113 while( *File->CurPos && !(File->CurPos[-1] == '*' && *File->CurPos == '/') )
115 if( *File->CurPos == '\n' ) File->CurLine ++;
118 File->CurPos ++; // Eat the '/'
122 // No more "whitespace"
126 // Save previous tokens (speeds up PutBack and LookAhead)
127 File->LastToken = File->Token;
128 File->LastTokenStr = File->TokenStr;
129 File->LastTokenLen = File->TokenLen;
130 File->LastLine = File->CurLine;
133 File->TokenStr = File->CurPos;
134 switch( *File->CurPos++ )
136 case '\0': ret = TOK_EOF; break;
140 if( *File->CurPos == '^' ) {
149 if( *File->CurPos == '|' ) {
158 if( *File->CurPos == '&' ) {
166 case '/': ret = TOK_DIV; break;
167 case '*': ret = TOK_MUL; break;
168 case '+': ret = TOK_PLUS; break;
170 if( *File->CurPos == '>' ) {
180 while( *File->CurPos && !(*File->CurPos == '"' && *File->CurPos != '\\') )
187 case '(': ret = TOK_PAREN_OPEN; break;
188 case ')': ret = TOK_PAREN_CLOSE; break;
189 case '{': ret = TOK_BRACE_OPEN; break;
190 case '}': ret = TOK_BRACE_CLOSE; break;
191 case '[': ret = TOK_SQUARE_OPEN; break;
192 case ']': ret = TOK_SQUARE_CLOSE; break;
195 case ';': ret = TOK_SEMICOLON; break;
196 case ',': ret = TOK_COMMA; break;
198 case '.': ret = TOK_SCOPE; break;
204 if( *File->CurPos == '=' ) {
214 // \$[0-9]+ or \$[_a-zA-Z][_a-zA-Z0-9]*
217 if( isdigit( *File->CurPos ) ) {
218 while( isdigit(*File->CurPos) )
223 while( is_ident(*File->CurPos) || isdigit(*File->CurPos) )
229 // Default (Numbers and Identifiers)
234 if( isdigit(*File->CurPos) )
236 if( *File->CurPos == '0' && File->CurPos[1] == 'x' ) {
238 while(('0' <= *File->CurPos && *File->CurPos <= '9')
239 || ('A' <= *File->CurPos && *File->CurPos <= 'F')
240 || ('a' <= *File->CurPos && *File->CurPos <= 'f') )
246 while( isdigit(*File->CurPos) )
254 if( is_ident(*File->CurPos) )
257 while( is_ident(*File->CurPos) || isdigit(*File->CurPos) )
260 // This is set later too, but we use it below
261 File->TokenLen = File->CurPos - File->TokenStr;
264 // Check if it's a reserved word
266 char buf[File->TokenLen + 1];
268 memcpy(buf, File->TokenStr, File->TokenLen);
269 buf[File->TokenLen] = 0;
270 for( i = 0; i < ARRAY_SIZE(csaReservedWords); i ++ )
272 if(strcmp(csaReservedWords[i].Name, buf) == 0) {
273 ret = csaReservedWords[i].Value;
278 // If there's no match, just keep ret as TOK_IDENT
285 fprintf(stderr, "Syntax Error: Unknown symbol '%c'\n", *File->CurPos);
286 longjmp(File->JmpTarget, 1);
292 File->TokenLen = File->CurPos - File->TokenStr;
296 char buf[ File->TokenLen + 1];
297 memcpy(buf, File->TokenStr, File->TokenLen);
298 buf[File->TokenLen] = 0;
299 //printf(" GetToken: File->CurPos = %p\n", File->CurPos);
300 printf(" GetToken: Return %i (%i long) (%s)\n", ret, File->TokenLen, buf);
306 void PutBack(tParser *File)
308 if( File->LastToken == -1 ) {
310 fprintf(stderr, "INTERNAL ERROR: Putback when LastToken==-1\n");
311 longjmp( File->JmpTarget, -1 );
315 printf(" PutBack: Was on %i\n", File->Token);
318 File->NextLine = File->CurLine;
319 File->NextToken = File->Token;
320 File->NextTokenStr = File->TokenStr;
321 File->NextTokenLen = File->TokenLen;
323 File->CurLine = File->LastLine;
324 File->Token = File->LastToken;
325 File->TokenStr = File->LastTokenStr;
326 File->TokenLen = File->LastTokenLen;
327 File->CurPos = File->NextTokenStr;
329 File->LastToken = -1;
332 int LookAhead(tParser *File)
334 // TODO: Should I save the entire state here?
335 int ret = GetToken(File);
342 * \brief Check for ident characters
343 * \note Matches Regex [a-zA-Z_]
345 int is_ident(char ch)
347 if('a' <= ch && ch <= 'z') return 1;
348 if('A' <= ch && ch <= 'Z') return 1;
349 if(ch == '_') return 1;
351 if(ch == '.') return 1;
359 if('0' <= ch && ch <= '9') return 1;
365 if(' ' == ch) return 1;
366 if('\t' == ch) return 1;
367 if('\b' == ch) return 1;
368 if('\n' == ch) return 1;
369 if('\r' == ch) return 1;