X-Git-Url: https://git.ucc.asn.au/?a=blobdiff_plain;f=Usermode%2FLibraries%2Flibspiderscript.so_src%2Flex.c;h=fd395e0f2d188f07a65473ceaea20184630c7555;hb=270e5fe88b0666021a7a6393334db7feeb8245f8;hp=6badd45bd6750aac9592cc32ec55e1d16cd244f4;hpb=efa38e0d56b1b620b6f4e5c4f91abc483a3065e2;p=tpg%2Facess2.git diff --git a/Usermode/Libraries/libspiderscript.so_src/lex.c b/Usermode/Libraries/libspiderscript.so_src/lex.c index 6badd45b..fd395e0f 100644 --- a/Usermode/Libraries/libspiderscript.so_src/lex.c +++ b/Usermode/Libraries/libspiderscript.so_src/lex.c @@ -1,14 +1,24 @@ /* - * Acess2 init + * SpiderScript * - Script Lexer */ #include "tokens.h" #include +#include +#include + +// Make the scope character ('.') be a symbol, otherwise it's just +// a ident character +#define USE_SCOPE_CHAR 0 + +#define DEBUG 0 + +#define ARRAY_SIZE(x) ((sizeof(x))/(sizeof((x)[0]))) // === PROTOTYPES === int is_ident(char ch); - int isdigit(char ch); - int isspace(char ch); + int isdigit(int ch); + int isspace(int ch); int GetToken(tParser *File); // === CONSTANTS === @@ -17,8 +27,22 @@ const struct { const char *Name; } csaReservedWords[] = { {TOK_RWD_FUNCTION, "function"}, - {TOK_RWD_INTEGER, "integer"}, - {TOK_RWD_REAL, "string"} + + {TOK_RWD_RETURN, "return"}, + {TOK_RWD_NEW, "new"}, + + {TOK_RWD_IF, "if"}, + {TOK_RWD_ELSE, "else"}, + {TOK_RWD_DO, "do"}, + {TOK_RWD_WHILE, "while"}, + {TOK_RWD_FOR, "for"}, + + {TOK_RWD_VOID, "void"}, + {TOK_RWD_OBJECT, "Object"}, + {TOK_RWD_OPAQUE, "Opaque"}, + {TOK_RWD_INTEGER, "Integer"}, + {TOK_RWD_REAL, "Real"}, + {TOK_RWD_STRING, "String"} }; // === CODE === @@ -31,19 +55,43 @@ int GetToken(tParser *File) int ret; if( File->NextToken != -1 ) { + // Save Last + File->LastToken = File->Token; + File->LastTokenStr = File->TokenStr; + File->LastTokenLen = File->TokenLen; + File->LastLine = File->CurLine; + // Restore Next File->Token = File->NextToken; File->TokenStr = File->NextTokenStr; File->TokenLen = File->NextTokenLen; + File->CurLine = File->NextLine; + // Set State + File->CurPos = File->TokenStr + File->TokenLen; File->NextToken = -1; + { + char buf[ File->TokenLen + 1]; + memcpy(buf, File->TokenStr, File->TokenLen); + buf[File->TokenLen] = 0; + #if DEBUG + printf(" GetToken: FAST Return %i (%i long) (%s)\n", File->Token, File->TokenLen, buf); + #endif + } return File->Token; } + //printf(" GetToken: File=%p, File->CurPos = %p\n", File, File->CurPos); + // Clear whitespace (including comments) for( ;; ) { // Whitespace while( isspace( *File->CurPos ) ) + { + //printf("whitespace 0x%x, line = %i\n", *File->CurPos, File->CurLine); + if( *File->CurPos == '\n' ) + File->CurLine ++; File->CurPos ++; + } // # Line Comments if( *File->CurPos == '#' ) { @@ -61,9 +109,13 @@ int GetToken(tParser *File) // C-Style Block Comments if( *File->CurPos == '/' && File->CurPos[1] == '*' ) { - File->CurPos += 2; + File->CurPos += 2; // Eat the '/*' while( *File->CurPos && !(File->CurPos[-1] == '*' && *File->CurPos == '/') ) + { + if( *File->CurPos == '\n' ) File->CurLine ++; File->CurPos ++; + } + File->CurPos ++; // Eat the '/' continue ; } @@ -75,12 +127,42 @@ int GetToken(tParser *File) File->LastToken = File->Token; File->LastTokenStr = File->TokenStr; File->LastTokenLen = File->TokenLen; + File->LastLine = File->CurLine; // Read token File->TokenStr = File->CurPos; switch( *File->CurPos++ ) { + case '\0': ret = TOK_EOF; break; + // Operations + case '^': + if( *File->CurPos == '^' ) { + File->CurPos ++; + ret = TOK_LOGICXOR; + break; + } + ret = TOK_XOR; + break; + + case '|': + if( *File->CurPos == '|' ) { + File->CurPos ++; + ret = TOK_LOGICOR; + break; + } + ret = TOK_OR; + break; + + case '&': + if( *File->CurPos == '&' ) { + File->CurPos ++; + ret = TOK_LOGICAND; + break; + } + ret = TOK_AND; + break; + case '/': ret = TOK_DIV; break; case '*': ret = TOK_MUL; break; case '+': ret = TOK_PLUS; break; @@ -95,9 +177,9 @@ int GetToken(tParser *File) // Strings case '"': - File->TokenStr ++; while( *File->CurPos && !(*File->CurPos == '"' && *File->CurPos != '\\') ) File->CurPos ++; + File->CurPos ++; ret = TOK_STR; break; @@ -111,7 +193,10 @@ int GetToken(tParser *File) // Core symbols case ';': ret = TOK_SEMICOLON; break; + case ',': ret = TOK_COMMA; break; + #if USE_SCOPE_CHAR case '.': ret = TOK_SCOPE; break; + #endif // Equals case '=': @@ -128,7 +213,6 @@ int GetToken(tParser *File) // Variables // \$[0-9]+ or \$[_a-zA-Z][_a-zA-Z0-9]* case '$': - File->TokenStr ++; // Numeric Variable if( isdigit( *File->CurPos ) ) { while( isdigit(*File->CurPos) ) @@ -136,7 +220,7 @@ int GetToken(tParser *File) } // Ident Variable else { - while( is_ident(*File->CurPos) ) + while( is_ident(*File->CurPos) || isdigit(*File->CurPos) ) File->CurPos ++; } ret = TOK_VARIABLE; @@ -144,11 +228,24 @@ int GetToken(tParser *File) // Default (Numbers and Identifiers) default: + File->CurPos --; + // Numbers if( isdigit(*File->CurPos) ) { - while( isdigit(*File->CurPos) ) - File->CurPos ++; + if( *File->CurPos == '0' && File->CurPos[1] == 'x' ) { + File->CurPos += 2; + while(('0' <= *File->CurPos && *File->CurPos <= '9') + || ('A' <= *File->CurPos && *File->CurPos <= 'F') + || ('a' <= *File->CurPos && *File->CurPos <= 'f') ) + { + File->CurPos ++; + } + } + else { + while( isdigit(*File->CurPos) ) + File->CurPos ++; + } ret = TOK_INTEGER; break; } @@ -160,16 +257,49 @@ int GetToken(tParser *File) while( is_ident(*File->CurPos) || isdigit(*File->CurPos) ) File->CurPos ++; + // This is set later too, but we use it below + File->TokenLen = File->CurPos - File->TokenStr; ret = TOK_IDENT; + + // Check if it's a reserved word + { + char buf[File->TokenLen + 1]; + int i; + memcpy(buf, File->TokenStr, File->TokenLen); + buf[File->TokenLen] = 0; + for( i = 0; i < ARRAY_SIZE(csaReservedWords); i ++ ) + { + if(strcmp(csaReservedWords[i].Name, buf) == 0) { + ret = csaReservedWords[i].Value; + break ; + } + } + } + // If there's no match, just keep ret as TOK_IDENT + break; } // Syntax Error - ret = 0; + ret = TOK_INVAL; + + fprintf(stderr, "Syntax Error: Unknown symbol '%c'\n", *File->CurPos); + longjmp(File->JmpTarget, 1); + break; } // Return File->Token = ret; File->TokenLen = File->CurPos - File->TokenStr; + + #if DEBUG + { + char buf[ File->TokenLen + 1]; + memcpy(buf, File->TokenStr, File->TokenLen); + buf[File->TokenLen] = 0; + //printf(" GetToken: File->CurPos = %p\n", File->CurPos); + printf(" GetToken: Return %i (%i long) (%s)\n", ret, File->TokenLen, buf); + } + #endif return ret; } @@ -177,13 +307,20 @@ void PutBack(tParser *File) { if( File->LastToken == -1 ) { // ERROR: + fprintf(stderr, "INTERNAL ERROR: Putback when LastToken==-1\n"); + longjmp( File->JmpTarget, -1 ); return ; } + #if DEBUG + printf(" PutBack: Was on %i\n", File->Token); + #endif // Save + File->NextLine = File->CurLine; File->NextToken = File->Token; File->NextTokenStr = File->TokenStr; File->NextTokenLen = File->TokenLen; // Restore + File->CurLine = File->LastLine; File->Token = File->LastToken; File->TokenStr = File->LastTokenStr; File->TokenLen = File->LastTokenLen; @@ -194,6 +331,7 @@ void PutBack(tParser *File) int LookAhead(tParser *File) { + // TODO: Should I save the entire state here? int ret = GetToken(File); PutBack(File); return ret; @@ -207,19 +345,22 @@ int LookAhead(tParser *File) int is_ident(char ch) { if('a' <= ch && ch <= 'z') return 1; - if('Z' <= ch && ch <= 'Z') return 1; + if('A' <= ch && ch <= 'Z') return 1; if(ch == '_') return 1; + #if !USE_SCOPE_CHAR + if(ch == '.') return 1; + #endif if(ch < 0) return 1; return 0; } -int isdigit(char ch) +int isdigit(int ch) { if('0' <= ch && ch <= '9') return 1; return 0; } -int isspace(char ch) +int isspace(int ch) { if(' ' == ch) return 1; if('\t' == ch) return 1;