-
-/**
- * \fn int ReadUTF8(char *Input, uint32_t *Val)
- * \brief Read a UTF-8 character from a string
- */
-int ReadUTF8(const char *Input, uint32_t *Val)
-{
- const uint8_t *str = (const uint8_t *)Input;
- *Val = 0xFFFD; // Assume invalid character
-
- // ASCII
- if( !(*str & 0x80) ) {
- *Val = *str;
- return 1;
- }
-
- // Middle of a sequence
- if( (*str & 0xC0) == 0x80 ) {
- return 1;
- }
-
- // Two Byte
- if( (*str & 0xE0) == 0xC0 ) {
- *Val = (*str & 0x1F) << 6; // Upper 6 Bits
- str ++;
- if( (*str & 0xC0) != 0x80) return -1; // Validity check
- *Val |= (*str & 0x3F); // Lower 6 Bits
- return 2;
- }
-
- // Three Byte
- if( (*str & 0xF0) == 0xE0 ) {
- *Val = (*str & 0x0F) << 12; // Upper 4 Bits
- str ++;
- if( (*str & 0xC0) != 0x80) return -1; // Validity check
- *Val |= (*str & 0x3F) << 6; // Middle 6 Bits
- str ++;
- if( (*str & 0xC0) != 0x80) return -1; // Validity check
- *Val |= (*str & 0x3F); // Lower 6 Bits
- return 3;
- }
-
- // Four Byte
- if( (*str & 0xF1) == 0xF0 ) {
- *Val = (*str & 0x07) << 18; // Upper 3 Bits
- str ++;
- if( (*str & 0xC0) != 0x80) return -1; // Validity check
- *Val |= (*str & 0x3F) << 12; // Middle-upper 6 Bits
- str ++;
- if( (*str & 0xC0) != 0x80) return -1; // Validity check
- *Val |= (*str & 0x3F) << 6; // Middle-lower 6 Bits
- str ++;
- if( (*str & 0xC0) != 0x80) return -1; // Validity check
- *Val |= (*str & 0x3F); // Lower 6 Bits
- return 4;
- }
-
- // UTF-8 Doesn't support more than four bytes
- return 4;
-}
-
-
-
-