X-Git-Url: https://git.ucc.asn.au/?a=blobdiff_plain;f=Usermode%2FLibraries%2Flibunicode.so_src%2Futf-8.c;fp=Usermode%2FLibraries%2Flibunicode.so_src%2Futf-8.c;h=3aa9d1a8dbcf9449d2b6c2fbf10dfb9989229346;hb=6a99a6d70179161964d47de9a825fd61e8445b86;hp=0000000000000000000000000000000000000000;hpb=265bcb9e6fd6611eda6bba3aed13da83e584e058;p=tpg%2Facess2.git

diff --git a/Usermode/Libraries/libunicode.so_src/utf-8.c b/Usermode/Libraries/libunicode.so_src/utf-8.c
new file mode 100644
index 00000000..3aa9d1a8
--- /dev/null
+++ b/Usermode/Libraries/libunicode.so_src/utf-8.c
@@ -0,0 +1,153 @@
+/*
+ * Acess2 "libunicode" UTF Parser
+ * - By John Hodge (thePowersGang)
+ *
+ * utf-8.c
+ * - UTF-8 Parsing code
+ */
+#include <stdint.h>
+#include <unicode.h>
+
+/**
+ * \brief Read a UTF-8 character from a string
+ * \param Input	Source UTF-8 encoded string
+ * \param Val	Destination for read codepoint
+ * \return Number of bytes read/used
+ */
+int ReadUTF8(const char *Input, uint32_t *Val)
+{
+	const uint8_t	*str = (const uint8_t *)Input;
+	*Val = 0xFFFD;	// Assume invalid character
+	
+	// ASCII
+	if( !(*str & 0x80) ) {
+		*Val = *str;
+		return 1;
+	}
+	
+	// Middle of a sequence
+	if( (*str & 0xC0) == 0x80 ) {
+		return 1;
+	}
+	
+	// Two Byte
+	if( (*str & 0xE0) == 0xC0 ) {
+		*Val = (*str & 0x1F) << 6;	// Upper 6 Bits
+		str ++;
+		if( (*str & 0xC0) != 0x80)	return -1;	// Validity check
+		*Val |= (*str & 0x3F);	// Lower 6 Bits
+		return 2;
+	}
+	
+	// Three Byte
+	if( (*str & 0xF0) == 0xE0 ) {
+		*Val = (*str & 0x0F) << 12;	// Upper 4 Bits
+		str ++;
+		if( (*str & 0xC0) != 0x80)	return -1;	// Validity check
+		*Val |= (*str & 0x3F) << 6;	// Middle 6 Bits
+		str ++;
+		if( (*str & 0xC0) != 0x80)	return -1;	// Validity check
+		*Val |= (*str & 0x3F);	// Lower 6 Bits
+		return 3;
+	}
+	
+	// Four Byte
+	if( (*str & 0xF8) == 0xF0 ) {
+		*Val = (*str & 0x07) << 18;	// Upper 3 Bits
+		str ++;
+		if( (*str & 0xC0) != 0x80)	return -1;	// Validity check
+		*Val |= (*str & 0x3F) << 12;	// Middle-upper 6 Bits
+		str ++;
+		if( (*str & 0xC0) != 0x80)	return -1;	// Validity check
+		*Val |= (*str & 0x3F) << 6;	// Middle-lower 6 Bits
+		str ++;
+		if( (*str & 0xC0) != 0x80)	return -1;	// Validity check
+		*Val |= (*str & 0x3F);	// Lower 6 Bits
+		return 4;
+	}
+	
+	// UTF-8 Doesn't support more than four bytes
+	return 4;
+}
+
+/**
+ * \brief Get the UTF-8 character before the 
+ * \
+ */
+int ReadUTF8Rev(const char *Base, int Offset, uint32_t *Val)
+{
+	 int	len = 0;
+	
+	// Scan backwards for the beginning of the character
+	while( Offset > 0 && (Base[Offset--] & 0xC0) == 0x80 )
+		len ++;
+	// Invalid string (no beginning)
+	if(Offset == 0 && (Base[Offset] & 0xC0) == 0x80 )
+		return len;
+	
+	len ++;	// First character
+	if( ReadUTF8(Base+Offset, Val) != len ) {
+		*Val = 0xFFFD;
+	}
+	return len;
+}
+
+/**
+ * \brief Write a UTF-8 character sequence to a string
+ * \param buf	Destination buffer (must have at least 4 bytes available)
+ * \param Val	Unicode codepoint to write
+ * \return Number of bytes written
+ * \note Does not NULL terminate the string in \a buf
+ */
+int WriteUTF8(char *buf, uint32_t Val)
+{
+	uint8_t	*str = (void*)buf;
+	
+	// ASCII
+	if( Val < 128 ) {
+		if(str) {
+			*str = Val;
+		}
+		return 1;
+	}
+	
+	// Two Byte
+	if( Val < 0x8000 ) {
+		if(str) {
+			*str = 0xC0 | (Val >> 6);
+			str ++;
+			*str = 0x80 | (Val & 0x3F);
+		}
+		return 2;
+	}
+	
+	// Three Byte
+	if( Val < 0x10000 ) {
+		if(str) {
+			*str = 0xE0 | (Val >> 12);
+			str ++;
+			*str = 0x80 | ((Val >> 6) & 0x3F);
+			str ++;
+			*str = 0x80 | (Val & 0x3F);
+		}
+		return 3;
+	}
+	
+	// Four Byte
+	if( Val < 0x110000 ) {
+		if(str) {
+			*str = 0xF0 | (Val >> 18);
+			str ++;
+			*str = 0x80 | ((Val >> 12) & 0x3F);
+			str ++;
+			*str = 0x80 | ((Val >> 6) & 0x3F);
+			str ++;
+			*str = 0x80 | (Val & 0x3F);
+		}
+		return 4;
+	}
+	
+	// UTF-8 Doesn't support more than four bytes
+	return 0;
+}
+