Kernel - Added UTF-16 functions
authorJohn Hodge <[email protected]>
Sat, 22 Jun 2013 16:07:44 +0000 (00:07 +0800)
committerJohn Hodge <[email protected]>
Sat, 22 Jun 2013 16:07:44 +0000 (00:07 +0800)
KernelLand/Kernel/Makefile
KernelLand/Kernel/include/utf16.h [new file with mode: 0644]
KernelLand/Kernel/utf16.c [new file with mode: 0644]

index f682013..0aa55ab 100644 (file)
@@ -54,7 +54,7 @@ BUILDINFO_SRC := $(OBJDIR)buildinfo.c$(OBJSUFFIX)
 
 OBJ := $(addprefix arch/$(ARCHDIR)/,$(A_OBJ))
 OBJ += pmemmap.o
-OBJ += heap.o logging.o debug.o lib.o libc.o adt.o time.o
+OBJ += heap.o logging.o debug.o lib.o libc.o adt.o time.o utf16.o
 OBJ += drvutil_video.o drvutil_disk.o
 OBJ += messages.o modules.o syscalls.o system.o
 OBJ += threads.o mutex.o semaphore.o workqueue.o events.o rwlock.o
diff --git a/KernelLand/Kernel/include/utf16.h b/KernelLand/Kernel/include/utf16.h
new file mode 100644 (file)
index 0000000..0a6666c
--- /dev/null
@@ -0,0 +1,16 @@
+/*
+ * Acess2 Kernel
+ * - By John Hodge (thePowersGang)
+ *
+ * utf16.h
+ * - UTF-16 <-> UTF-8/UCS32 translation
+ */
+#ifndef _UTF16_H_
+#define _UTF16_H_
+
+extern int     ReadUTF16(const Uint16 *Str16, Uint32 *Codepoint);
+extern size_t  UTF16_ConvertToUTF8(size_t DestLen, char *Dest, size_t SrcLen, const Uint16 *Source);
+extern int     UTF16_CompareWithUTF8(size_t Str16Len, const Uint16 *Str16, const char *Str8);
+
+#endif
+
diff --git a/KernelLand/Kernel/utf16.c b/KernelLand/Kernel/utf16.c
new file mode 100644 (file)
index 0000000..514fc7c
--- /dev/null
@@ -0,0 +1,79 @@
+/*
+ * Acess2 Kernel
+ * - By John Hodge (thePowersGang) 
+ *
+ * utf16.c
+ * - UTF-16 Translation/Manipulation
+ */
+#define DEBUG  1
+#include <acess.h>
+#include <utf16.h>
+
+int ReadUTF16(const Uint16 *Str16, Uint32 *Codepoint)
+{
+       if( 0xD800 < *Str16 && *Str16 <= 0xDFFF )
+       {
+               // UTF-16 surrogate pair
+               // > 0xDC00 is the second word
+               if( Str16[0] > 0xDC00 ) {
+                       *Codepoint = 0;
+                       return 1;
+               }
+               if( Str16[1] < 0xD800 || Str16[1] >= 0xDC00 ) {
+                       *Codepoint = 0;
+                       return 2;
+               }
+               // 2^16 + 20-bit
+               *Codepoint = 0x10000 + (((Str16[0] & 0x3FF) << 10) | (Str16[1] & 0x3FF));
+               return 2;
+       }
+       else {
+               *Codepoint = *Str16;
+               return 1;
+       }
+}
+
+size_t UTF16_ConvertToUTF8(size_t DestLen, char *Dest, size_t SrcLen, const Uint16 *Source)
+{
+        int    len = 0;
+       for( ; *Source && SrcLen --; Source ++ )
+       {
+               // TODO: Decode/Reencode
+               if( Dest && len < DestLen )
+                       Dest[len] = *Source;
+               len += 1;
+       }
+       if( Dest && len < DestLen )
+               Dest[len] = 0;
+       return len;
+}
+
+int UTF16_CompareWithUTF8(size_t Str16Len, const Uint16 *Str16, const char *Str8)
+{
+        int    pos16 = 0, pos8 = 0;
+       const Uint8     *str8 = (const Uint8 *)Str8;
+       
+       while( pos16 < Str16Len && Str16[pos16] && str8[pos8] )
+       {
+               Uint32  cp8, cp16;
+               pos16 += ReadUTF16(Str16+pos16, &cp16);
+               pos8 += ReadUTF8(str8 + pos8, &cp8);
+       
+               LOG("cp16 = %x, cp8 = %x", cp16, cp8);
+               if(cp16 == cp8) continue ;
+               
+               if(cp16 < cp8)
+                       return -1;
+               else
+                       return 1;
+       }
+       if( pos16 == Str16Len )
+               return 0;
+       if( Str16[pos16] && str8[pos8] )
+               return 0;
+       if( Str16[pos16] )
+               return 1;
+       else
+               return -1;
+}
+

UCC git Repository :: git.ucc.asn.au