2 * Acess2 GUI (AxWin) Version 3
3 * - By John Hodge (thePowersGang)
12 * \brief Read a UTF-8 character from a string
13 * \param Input Source UTF-8 encoded string
14 * \param Val Destination for read codepoint
15 * \return Number of bytes read/used
17 int ReadUTF8(const char *Input, uint32_t *Val)
19 const uint8_t *str = (const uint8_t *)Input;
20 *Val = 0xFFFD; // Assume invalid character
23 if( !(*str & 0x80) ) {
28 // Middle of a sequence
29 if( (*str & 0xC0) == 0x80 ) {
34 if( (*str & 0xE0) == 0xC0 ) {
35 *Val = (*str & 0x1F) << 6; // Upper 6 Bits
37 if( (*str & 0xC0) != 0x80) return -1; // Validity check
38 *Val |= (*str & 0x3F); // Lower 6 Bits
43 if( (*str & 0xF0) == 0xE0 ) {
44 *Val = (*str & 0x0F) << 12; // Upper 4 Bits
46 if( (*str & 0xC0) != 0x80) return -1; // Validity check
47 *Val |= (*str & 0x3F) << 6; // Middle 6 Bits
49 if( (*str & 0xC0) != 0x80) return -1; // Validity check
50 *Val |= (*str & 0x3F); // Lower 6 Bits
55 if( (*str & 0xF1) == 0xF0 ) {
56 *Val = (*str & 0x07) << 18; // Upper 3 Bits
58 if( (*str & 0xC0) != 0x80) return -1; // Validity check
59 *Val |= (*str & 0x3F) << 12; // Middle-upper 6 Bits
61 if( (*str & 0xC0) != 0x80) return -1; // Validity check
62 *Val |= (*str & 0x3F) << 6; // Middle-lower 6 Bits
64 if( (*str & 0xC0) != 0x80) return -1; // Validity check
65 *Val |= (*str & 0x3F); // Lower 6 Bits
69 // UTF-8 Doesn't support more than four bytes
74 * \brief Get the UTF-8 character before the
77 int ReadUTF8Rev(const char *Base, int Offset, uint32_t *Val)
81 // Scan backwards for the beginning of the character
82 while( Offset > 0 && (Base[Offset--] & 0xC0) == 0x80 )
84 // Invalid string (no beginning)
85 if(Offset == 0 && (Base[Offset] & 0xC0) == 0x80 )
88 len ++; // First character
89 if( ReadUTF8(Base+Offset, Val) != len ) {
96 * \brief Write a UTF-8 character sequence to a string
97 * \param buf Destination buffer (must have at least 4 bytes available)
98 * \param Val Unicode codepoint to write
99 * \return Number of bytes written
100 * \note Does not NULL terminate the string in \a buf
102 int WriteUTF8(char *buf, uint32_t Val)
104 uint8_t *str = (void*)buf;
117 *str = 0xC0 | (Val >> 6);
119 *str = 0x80 | (Val & 0x3F);
125 if( Val < 0x10000 ) {
127 *str = 0xE0 | (Val >> 12);
129 *str = 0x80 | ((Val >> 6) & 0x3F);
131 *str = 0x80 | (Val & 0x3F);
137 if( Val < 0x110000 ) {
139 *str = 0xF0 | (Val >> 18);
141 *str = 0x80 | ((Val >> 12) & 0x3F);
143 *str = 0x80 | ((Val >> 6) & 0x3F);
145 *str = 0x80 | (Val & 0x3F);
150 // UTF-8 Doesn't support more than four bytes