Kernel/armv7 - Improved memcpy
authorJohn Hodge <[email protected]>
Thu, 20 Oct 2011 14:31:21 +0000 (22:31 +0800)
committerJohn Hodge <[email protected]>
Thu, 20 Oct 2011 14:31:21 +0000 (22:31 +0800)
Kernel/arch/armv7/lib.S [new file with mode: 0644]
Kernel/arch/armv7/lib.c

diff --git a/Kernel/arch/armv7/lib.S b/Kernel/arch/armv7/lib.S
new file mode 100644 (file)
index 0000000..d37c4a1
--- /dev/null
@@ -0,0 +1,44 @@
+/*
+ * Acess2 ARM
+ * - By John Hodge (thePowersGang)
+ *
+ * arch/arm7/lib.S
+ * - Assembly editions of library functions
+ */
+#include "include/assembly.h"
+
+.globl __memcpy_byte
+__memcpy_byte:
+1:
+       tst r2, r2      @ Check counter
+       moveq pc, lr    @ Return if zero
+       ldrb r3, [r1],#1        @ Read
+       strb r3, [r0],#1        @ Write
+       sub r2, #1
+       b 1b
+
+@ 
+@ Pre-aligned memcpy (32-bit blocks)
+@ 
+.globl __memcpy_align4
+__memcpy_align4:
+       push {r4}
+       mvn r3, #3      @ Mask for checking length
+       
+       @ 4 byte chunk copies
+1:     tst r2, r3
+       ldrne r4, [r1],#4
+       strne r4, [r0],#4
+       subne r2, #4
+       bne 1b
+
+       @ single byte copies to finish off
+2:     tst r2, #3
+       beq 3f
+       ldrb r4, [r1],#1
+       strb r4, [r0],#1
+       sub r2, #1
+       b 2b
+
+3:     pop {r4}
+       mov pc, lr
index a59c062..c0feab6 100644 (file)
@@ -5,6 +5,10 @@
  */
 #include <acess.h>
 
+// === IMPORTS ===
+extern void    __memcpy_align4(void *_dest, const void *_src, size_t _length);
+extern void    __memcpy_byte(void *_dest, const void *_src, size_t _length);
+
 // === PROTOTYPES ===
 Uint64 __divmod64(Uint64 Num, Uint64 Den, Uint64 *Rem);
 Uint32 __divmod32(Uint32 Num, Uint32 Den, Uint32 *Rem);
@@ -18,31 +22,26 @@ Sint32      __modsi3(Sint32 Num, Sint32 Den);
 // === CODE ===
 void *memcpy(void *_dest, const void *_src, size_t _length)
 {
-       Uint32  *dst;
-       const Uint32    *src;
        Uint8   *dst8 = _dest;
        const Uint8     *src8 = _src;
 
+       if( ((tVAddr)_dest & 3) == 0 && ((tVAddr)_src & 3) == 0 )
+       {
+               __memcpy_align4(_dest, _src, _length);
+               return _dest;
+       }
+
        // Handle small copies / Non-aligned
        if( _length < 4 || ((tVAddr)_dest & 3) != ((tVAddr)_src & 3) )
        {
-               for( ; _length--; dst8++,src8++ )
-                       *dst8 = *src8;
+               __memcpy_byte(_dest, _src, _length);
                return _dest;
        }
 
        // Force alignment
-       while( (tVAddr)dst8 & 3 ) *dst8 ++ = *src8++;
-       dst = (void *)dst8;     src = (void *)src8;
+       while( (tVAddr)dst8 & 3 ) *dst8 ++ = *src8++, _length --;
 
-       // DWORD copies
-       for( ; _length > 3; _length -= 4)
-               *dst++ = *src++;
-
-       // Trailing bytes
-       dst8 = (void*)dst;      src8 = (void*)src;
-       for( ; _length; _length -- )
-               *dst8 ++ = *src8 ++;
+       __memcpy_align32(dst8, src8, _length);
        
        return _dest;
 }
@@ -86,7 +85,7 @@ void *memset(void *_dest, int _value, size_t _length)
 
        _value = (Uint8)_value;
 
-       // Handle small copies / Non-aligned
+       // Handle small copies
        if( _length < 4 )
        {
                for( ; _length--; dst8++ )

UCC git Repository :: git.ucc.asn.au