From fe954297ac90179585eeeb9685a3c21e095cf9d4 Mon Sep 17 00:00:00 2001 From: John Hodge Date: Tue, 23 Jul 2013 22:51:50 +0800 Subject: [PATCH] Usermode/libunicode - Cleaned up ReadUTF8 to support Val=NULL --- Usermode/Libraries/libunicode.so_src/utf-8.c | 59 +++++++++++--------- 1 file changed, 33 insertions(+), 26 deletions(-) diff --git a/Usermode/Libraries/libunicode.so_src/utf-8.c b/Usermode/Libraries/libunicode.so_src/utf-8.c index 3aa9d1a8..02c62175 100644 --- a/Usermode/Libraries/libunicode.so_src/utf-8.c +++ b/Usermode/Libraries/libunicode.so_src/utf-8.c @@ -17,57 +17,64 @@ int ReadUTF8(const char *Input, uint32_t *Val) { const uint8_t *str = (const uint8_t *)Input; - *Val = 0xFFFD; // Assume invalid character - // ASCII - if( !(*str & 0x80) ) { - *Val = *str; - return 1; - } + if(Val) *Val = 0xFFFD; // Assume invalid character + + int len; + uint32_t val; // Middle of a sequence if( (*str & 0xC0) == 0x80 ) { return 1; } - + + // ASCII + if( !(*str & 0x80) ) { + val = *str; + len = 1; + } // Two Byte - if( (*str & 0xE0) == 0xC0 ) { - *Val = (*str & 0x1F) << 6; // Upper 6 Bits + else if( (*str & 0xE0) == 0xC0 ) { + val = (*str & 0x1F) << 6; // Upper 6 Bits str ++; if( (*str & 0xC0) != 0x80) return -1; // Validity check - *Val |= (*str & 0x3F); // Lower 6 Bits - return 2; + val |= (*str & 0x3F); // Lower 6 Bits + len = 2; } - // Three Byte - if( (*str & 0xF0) == 0xE0 ) { - *Val = (*str & 0x0F) << 12; // Upper 4 Bits + else if( (*str & 0xF0) == 0xE0 ) { + val = (*str & 0x0F) << 12; // Upper 4 Bits str ++; if( (*str & 0xC0) != 0x80) return -1; // Validity check - *Val |= (*str & 0x3F) << 6; // Middle 6 Bits + val |= (*str & 0x3F) << 6; // Middle 6 Bits str ++; if( (*str & 0xC0) != 0x80) return -1; // Validity check - *Val |= (*str & 0x3F); // Lower 6 Bits - return 3; + val |= (*str & 0x3F); // Lower 6 Bits + len = 3; } - // Four Byte - if( (*str & 0xF8) == 0xF0 ) { - *Val = (*str & 0x07) << 18; // Upper 3 Bits + else if( (*str & 0xF8) == 0xF0 ) { + val = (*str & 0x07) << 18; // Upper 3 Bits str ++; if( (*str & 0xC0) != 0x80) return -1; // Validity check - *Val |= (*str & 0x3F) << 12; // Middle-upper 6 Bits + val |= (*str & 0x3F) << 12; // Middle-upper 6 Bits str ++; if( (*str & 0xC0) != 0x80) return -1; // Validity check - *Val |= (*str & 0x3F) << 6; // Middle-lower 6 Bits + val |= (*str & 0x3F) << 6; // Middle-lower 6 Bits str ++; if( (*str & 0xC0) != 0x80) return -1; // Validity check - *Val |= (*str & 0x3F); // Lower 6 Bits - return 4; + val |= (*str & 0x3F); // Lower 6 Bits + len = 4; } - // UTF-8 Doesn't support more than four bytes - return 4; + else { + return 4; + } + + if( Val ) + *Val = val; + + return len; } /** -- 2.20.1