From d0607dd9d43829284ebfe4909cc627bb1edf1ce8 Mon Sep 17 00:00:00 2001 From: John Hodge Date: Sun, 23 Jun 2013 00:07:44 +0800 Subject: [PATCH] Kernel - Added UTF-16 functions --- KernelLand/Kernel/Makefile | 2 +- KernelLand/Kernel/include/utf16.h | 16 +++++++ KernelLand/Kernel/utf16.c | 79 +++++++++++++++++++++++++++++++ 3 files changed, 96 insertions(+), 1 deletion(-) create mode 100644 KernelLand/Kernel/include/utf16.h create mode 100644 KernelLand/Kernel/utf16.c diff --git a/KernelLand/Kernel/Makefile b/KernelLand/Kernel/Makefile index f6820132..0aa55aba 100644 --- a/KernelLand/Kernel/Makefile +++ b/KernelLand/Kernel/Makefile @@ -54,7 +54,7 @@ BUILDINFO_SRC := $(OBJDIR)buildinfo.c$(OBJSUFFIX) OBJ := $(addprefix arch/$(ARCHDIR)/,$(A_OBJ)) OBJ += pmemmap.o -OBJ += heap.o logging.o debug.o lib.o libc.o adt.o time.o +OBJ += heap.o logging.o debug.o lib.o libc.o adt.o time.o utf16.o OBJ += drvutil_video.o drvutil_disk.o OBJ += messages.o modules.o syscalls.o system.o OBJ += threads.o mutex.o semaphore.o workqueue.o events.o rwlock.o diff --git a/KernelLand/Kernel/include/utf16.h b/KernelLand/Kernel/include/utf16.h new file mode 100644 index 00000000..0a6666cf --- /dev/null +++ b/KernelLand/Kernel/include/utf16.h @@ -0,0 +1,16 @@ +/* + * Acess2 Kernel + * - By John Hodge (thePowersGang) + * + * utf16.h + * - UTF-16 <-> UTF-8/UCS32 translation + */ +#ifndef _UTF16_H_ +#define _UTF16_H_ + +extern int ReadUTF16(const Uint16 *Str16, Uint32 *Codepoint); +extern size_t UTF16_ConvertToUTF8(size_t DestLen, char *Dest, size_t SrcLen, const Uint16 *Source); +extern int UTF16_CompareWithUTF8(size_t Str16Len, const Uint16 *Str16, const char *Str8); + +#endif + diff --git a/KernelLand/Kernel/utf16.c b/KernelLand/Kernel/utf16.c new file mode 100644 index 00000000..514fc7cb --- /dev/null +++ b/KernelLand/Kernel/utf16.c @@ -0,0 +1,79 @@ +/* + * Acess2 Kernel + * - By John Hodge (thePowersGang) + * + * utf16.c + * - UTF-16 Translation/Manipulation + */ +#define DEBUG 1 +#include +#include + +int ReadUTF16(const Uint16 *Str16, Uint32 *Codepoint) +{ + if( 0xD800 < *Str16 && *Str16 <= 0xDFFF ) + { + // UTF-16 surrogate pair + // > 0xDC00 is the second word + if( Str16[0] > 0xDC00 ) { + *Codepoint = 0; + return 1; + } + if( Str16[1] < 0xD800 || Str16[1] >= 0xDC00 ) { + *Codepoint = 0; + return 2; + } + // 2^16 + 20-bit + *Codepoint = 0x10000 + (((Str16[0] & 0x3FF) << 10) | (Str16[1] & 0x3FF)); + return 2; + } + else { + *Codepoint = *Str16; + return 1; + } +} + +size_t UTF16_ConvertToUTF8(size_t DestLen, char *Dest, size_t SrcLen, const Uint16 *Source) +{ + int len = 0; + for( ; *Source && SrcLen --; Source ++ ) + { + // TODO: Decode/Reencode + if( Dest && len < DestLen ) + Dest[len] = *Source; + len += 1; + } + if( Dest && len < DestLen ) + Dest[len] = 0; + return len; +} + +int UTF16_CompareWithUTF8(size_t Str16Len, const Uint16 *Str16, const char *Str8) +{ + int pos16 = 0, pos8 = 0; + const Uint8 *str8 = (const Uint8 *)Str8; + + while( pos16 < Str16Len && Str16[pos16] && str8[pos8] ) + { + Uint32 cp8, cp16; + pos16 += ReadUTF16(Str16+pos16, &cp16); + pos8 += ReadUTF8(str8 + pos8, &cp8); + + LOG("cp16 = %x, cp8 = %x", cp16, cp8); + if(cp16 == cp8) continue ; + + if(cp16 < cp8) + return -1; + else + return 1; + } + if( pos16 == Str16Len ) + return 0; + if( Str16[pos16] && str8[pos8] ) + return 0; + if( Str16[pos16] ) + return 1; + else + return -1; +} + -- 2.20.1