From: John Hodge Date: Thu, 20 Oct 2011 14:31:21 +0000 (+0800) Subject: Kernel/armv7 - Improved memcpy X-Git-Tag: rel0.13~17 X-Git-Url: https://git.ucc.asn.au/?a=commitdiff_plain;h=6a590915c73bf34116724ee15aa9c6d13c8bc404;p=tpg%2Facess2.git Kernel/armv7 - Improved memcpy --- diff --git a/Kernel/arch/armv7/lib.S b/Kernel/arch/armv7/lib.S new file mode 100644 index 00000000..d37c4a18 --- /dev/null +++ b/Kernel/arch/armv7/lib.S @@ -0,0 +1,44 @@ +/* + * Acess2 ARM + * - By John Hodge (thePowersGang) + * + * arch/arm7/lib.S + * - Assembly editions of library functions + */ +#include "include/assembly.h" + +.globl __memcpy_byte +__memcpy_byte: +1: + tst r2, r2 @ Check counter + moveq pc, lr @ Return if zero + ldrb r3, [r1],#1 @ Read + strb r3, [r0],#1 @ Write + sub r2, #1 + b 1b + +@ +@ Pre-aligned memcpy (32-bit blocks) +@ +.globl __memcpy_align4 +__memcpy_align4: + push {r4} + mvn r3, #3 @ Mask for checking length + + @ 4 byte chunk copies +1: tst r2, r3 + ldrne r4, [r1],#4 + strne r4, [r0],#4 + subne r2, #4 + bne 1b + + @ single byte copies to finish off +2: tst r2, #3 + beq 3f + ldrb r4, [r1],#1 + strb r4, [r0],#1 + sub r2, #1 + b 2b + +3: pop {r4} + mov pc, lr diff --git a/Kernel/arch/armv7/lib.c b/Kernel/arch/armv7/lib.c index a59c0625..c0feab6c 100644 --- a/Kernel/arch/armv7/lib.c +++ b/Kernel/arch/armv7/lib.c @@ -5,6 +5,10 @@ */ #include +// === IMPORTS === +extern void __memcpy_align4(void *_dest, const void *_src, size_t _length); +extern void __memcpy_byte(void *_dest, const void *_src, size_t _length); + // === PROTOTYPES === Uint64 __divmod64(Uint64 Num, Uint64 Den, Uint64 *Rem); Uint32 __divmod32(Uint32 Num, Uint32 Den, Uint32 *Rem); @@ -18,31 +22,26 @@ Sint32 __modsi3(Sint32 Num, Sint32 Den); // === CODE === void *memcpy(void *_dest, const void *_src, size_t _length) { - Uint32 *dst; - const Uint32 *src; Uint8 *dst8 = _dest; const Uint8 *src8 = _src; + if( ((tVAddr)_dest & 3) == 0 && ((tVAddr)_src & 3) == 0 ) + { + __memcpy_align4(_dest, _src, _length); + return _dest; + } + // Handle small copies / Non-aligned if( _length < 4 || ((tVAddr)_dest & 3) != ((tVAddr)_src & 3) ) { - for( ; _length--; dst8++,src8++ ) - *dst8 = *src8; + __memcpy_byte(_dest, _src, _length); return _dest; } // Force alignment - while( (tVAddr)dst8 & 3 ) *dst8 ++ = *src8++; - dst = (void *)dst8; src = (void *)src8; + while( (tVAddr)dst8 & 3 ) *dst8 ++ = *src8++, _length --; - // DWORD copies - for( ; _length > 3; _length -= 4) - *dst++ = *src++; - - // Trailing bytes - dst8 = (void*)dst; src8 = (void*)src; - for( ; _length; _length -- ) - *dst8 ++ = *src8 ++; + __memcpy_align32(dst8, src8, _length); return _dest; } @@ -86,7 +85,7 @@ void *memset(void *_dest, int _value, size_t _length) _value = (Uint8)_value; - // Handle small copies / Non-aligned + // Handle small copies if( _length < 4 ) { for( ; _length--; dst8++ )