--- /dev/null
+/*
+ * Acess2 ARM
+ * - By John Hodge (thePowersGang)
+ *
+ * arch/arm7/lib.S
+ * - Assembly editions of library functions
+ */
+#include "include/assembly.h"
+
+.globl __memcpy_byte
+__memcpy_byte:
+1:
+ tst r2, r2 @ Check counter
+ moveq pc, lr @ Return if zero
+ ldrb r3, [r1],#1 @ Read
+ strb r3, [r0],#1 @ Write
+ sub r2, #1
+ b 1b
+
+@
+@ Pre-aligned memcpy (32-bit blocks)
+@
+.globl __memcpy_align4
+__memcpy_align4:
+ push {r4}
+ mvn r3, #3 @ Mask for checking length
+
+ @ 4 byte chunk copies
+1: tst r2, r3
+ ldrne r4, [r1],#4
+ strne r4, [r0],#4
+ subne r2, #4
+ bne 1b
+
+ @ single byte copies to finish off
+2: tst r2, #3
+ beq 3f
+ ldrb r4, [r1],#1
+ strb r4, [r0],#1
+ sub r2, #1
+ b 2b
+
+3: pop {r4}
+ mov pc, lr
*/
#include <acess.h>
+// === IMPORTS ===
+extern void __memcpy_align4(void *_dest, const void *_src, size_t _length);
+extern void __memcpy_byte(void *_dest, const void *_src, size_t _length);
+
// === PROTOTYPES ===
Uint64 __divmod64(Uint64 Num, Uint64 Den, Uint64 *Rem);
Uint32 __divmod32(Uint32 Num, Uint32 Den, Uint32 *Rem);
// === CODE ===
void *memcpy(void *_dest, const void *_src, size_t _length)
{
- Uint32 *dst;
- const Uint32 *src;
Uint8 *dst8 = _dest;
const Uint8 *src8 = _src;
+ if( ((tVAddr)_dest & 3) == 0 && ((tVAddr)_src & 3) == 0 )
+ {
+ __memcpy_align4(_dest, _src, _length);
+ return _dest;
+ }
+
// Handle small copies / Non-aligned
if( _length < 4 || ((tVAddr)_dest & 3) != ((tVAddr)_src & 3) )
{
- for( ; _length--; dst8++,src8++ )
- *dst8 = *src8;
+ __memcpy_byte(_dest, _src, _length);
return _dest;
}
// Force alignment
- while( (tVAddr)dst8 & 3 ) *dst8 ++ = *src8++;
- dst = (void *)dst8; src = (void *)src8;
+ while( (tVAddr)dst8 & 3 ) *dst8 ++ = *src8++, _length --;
- // DWORD copies
- for( ; _length > 3; _length -= 4)
- *dst++ = *src++;
-
- // Trailing bytes
- dst8 = (void*)dst; src8 = (void*)src;
- for( ; _length; _length -- )
- *dst8 ++ = *src8 ++;
+ __memcpy_align32(dst8, src8, _length);
return _dest;
}
_value = (Uint8)_value;
- // Handle small copies / Non-aligned
+ // Handle small copies
if( _length < 4 )
{
for( ; _length--; dst8++ )