* lib.c - Library Functions
*/
#include <acess.h>
+#include "../helpers.h"
+
+// === IMPORTS ===
+extern void __memcpy_align4(void *_dest, const void *_src, size_t _length);
+extern void __memcpy_byte(void *_dest, const void *_src, size_t _length);
+extern Uint32 __divmod32_asm(Uint32 Num, Uint32 Den, Uint32 *Rem);
// === PROTOTYPES ===
Uint64 __divmod64(Uint64 Num, Uint64 Den, Uint64 *Rem);
+Uint32 __divmod32(Uint32 Num, Uint32 Den, Uint32 *Rem);
Uint64 __udivdi3(Uint64 Num, Uint64 Den);
Uint64 __umoddi3(Uint64 Num, Uint64 Den);
Uint32 __udivsi3(Uint32 Num, Uint32 Den);
// === CODE ===
void *memcpy(void *_dest, const void *_src, size_t _length)
{
- Uint32 *dst;
- const Uint32 *src;
Uint8 *dst8 = _dest;
const Uint8 *src8 = _src;
+ if( ((tVAddr)_dest & 3) == 0 && ((tVAddr)_src & 3) == 0 )
+ {
+ __memcpy_align4(_dest, _src, _length);
+ return _dest;
+ }
+
// Handle small copies / Non-aligned
if( _length < 4 || ((tVAddr)_dest & 3) != ((tVAddr)_src & 3) )
{
- for( ; _length--; dst8++,src8++ )
- *dst8 = *src8;
+ __memcpy_byte(_dest, _src, _length);
return _dest;
}
// Force alignment
- while( (tVAddr)dst8 & 3 ) *dst8 ++ = *src8++;
- dst = (void *)dst8; src = (void *)src8;
+ while( (tVAddr)dst8 & 3 ) *dst8 ++ = *src8++, _length --;
- // DWORD copies
- for( ; _length > 3; _length -= 4)
- *dst++ = *src++;
-
- // Trailing bytes
- dst8 = (void*)dst; src8 = (void*)src;
- for( ; _length; _length -- )
- *dst8 ++ = *src8 ++;
+ __memcpy_align4(dst8, src8, _length);
return _dest;
}
_value = (Uint8)_value;
- // Handle small copies / Non-aligned
+ // Handle small copies
if( _length < 4 )
{
for( ; _length--; dst8++ )
return _dest;
}
-Uint64 __divmod64(Uint64 Num, Uint64 Den, Uint64 *Rem)
-{
- Uint64 ret, add;
-
- ret = 0;
- add = 1;
-
- // Find what power of two times Den is > Num
- while( Num >= Den )
- {
- Den <<= 1;
- add <<= 1;
- }
-
- // Search backwards
- while( add > 1 )
- {
- add >>= 1;
- Den >>= 1;
- // If the numerator is > Den, subtract and add to return value
- if( Num > Den )
- {
- ret += add;
- Num -= Den;
- }
- }
- if(Rem) *Rem = Num;
- return ret;
-}
+DEF_DIVMOD(64)
+DEF_DIVMOD(32)
Uint64 DivMod64U(Uint64 Num, Uint64 Den, Uint64 *Rem)
{
Uint64 ret;
if(Den == 0) return 0; // TODO: #div0
+ if(Num < Den) {
+ if(Rem) *Rem = Num;
+ return 0;
+ }
if(Num == 0) {
if(Rem) *Rem = 0;
return 0;
if(Rem) *Rem = Num & 0xFFF;
return Num >> 12;
}
-
- #if 0
- {
- // http://www.tofla.iconbar.com/tofla/arm/arm02/index.htm
- Uint64 tmp = 1;
- __asm__ __volatile__(
- "1:"
- "cmpl %2,%1"
- "movls %2,%2,lsl#1"
- "movls %3,%3,lsl#1"
- "bls 1b"
- "2:"
- "cmpl %"
- while(Num > Den) {
- Den <<= 1;
- tmp <<= 1;
- }
- Den >>= 1; tmp >>= 1;
- while(
+
+ if( !(Den >> 32) && !(Num >> 32) ) {
+ if(Rem) *Rem = 0; // Clear high bits
+ return __divmod32_asm(Num, Den, (Uint32*)Rem);
}
- if(Rem) *Rem = Num;
- return ret;
- #elif 0
- for( ret = 0; Num > Den; ret ++, Num -= Den) ;
- if(Rem) *Rem = Num;
- return ret;
- #else
+
ret = __divmod64(Num, Den, Rem);
return ret;
- #endif
}
// Unsigned Divide 64-bit Integer
Uint64 __udivdi3(Uint64 Num, Uint64 Den)
{
return DivMod64U(Num, Den, NULL);
- #if 0
-// if( Den == 0 ) return 5 / (Uint32)Den; // Force a #DIV0
- if( Den == 16 ) return Num >> 4;
- if( Den == 256 ) return Num >> 8;
- if( Den == 512 ) return Num >> 9;
- if( Den == 1024 ) return Num >> 10;
- if( Den == 2048 ) return Num >> 11;
- if( Den == 4096 ) return Num >> 12;
- if( Num < Den ) return 0;
- if( Num <= 0xFFFFFFFF && Den <= 0xFFFFFFFF )
- return (Uint32)Num / (Uint32)Den;
-
- #if 0
- if( Den <= 0xFFFFFFFF ) {
- (Uint32)(Num >> 32) / (Uint32)Den
- }
- #endif
- Uint64 ret = 0;
- for( ret = 0; Num > Den; ret ++, Num -= Den );
- return ret;
- #endif
}
// Unsigned Modulus 64-bit Integer
Uint64 ret = 0;
DivMod64U(Num, Den, &ret);
return ret;
- #if 0
- if( Den == 0 ) return 5 / (Uint32)Den; // Force a #DIV0
- if( Num < Den ) return Num;
- if( Den == 1 ) return 0;
- if( Den == 2 ) return Num & 1;
- if( Den == 16 ) return Num & 3;
- if( Den == 256 ) return Num & 0xFF;
- if( Den == 512 ) return Num & 0x1FF;
- if( Den == 1024 ) return Num & 0x3FF;
- if( Den == 2048 ) return Num & 0x7FF;
- if( Den == 4096 ) return Num & 0xFFF;
-// if( Num <= 0xFFFFFFFF && Den <= 0xFFFFFFFF )
-// return (Uint32)Num % (Uint32)Den;
-
- #if 0
- if( Den <= 0xFFFFFFFF ) {
- (Uint32)(Num >> 32) / (Uint32)Den
- }
- #endif
- for( ; Num > Den; Num -= Den );
- return Num;
- #endif
}
-#define _divide_s_32(Num, Den, rem) __asm__ __volatile__ ( \
- "mov %0, #0\n" \
- " adds %1, %1, %1\n" \
- " .rept 32\n" \
- " adcs %0, %2, %0, lsl #1\n" \
- " subcc %0, %0, %3\n" \
- " adcs %1, %1, %1\n" \
- " .endr\n" \
- : "=r" (rem), "=r" (Num) \
- : "r" (Den) \
- : "cc" \
- )
Uint32 __udivsi3(Uint32 Num, Uint32 Den)
{
- register Uint32 ret;
- Uint64 P, D;
- int i;
-
- if( Num == 0 ) return 0;
- if( Den == 0 ) return 0xFFFFFFFF; // TODO: Throw an error
- if( Den == 1 ) return Num;
-
- D = ((Uint64)Den) << 32;
-
- for( i = 32; i --; )
- {
- P = 2*P - D;
- if( P >= 0 )
- ret |= 1;
- else
- P += D;
- ret <<= 1;
- }
-
-// _divide_s_32(Num, Den, rem);
- return Num;
+ return __divmod32_asm(Num, Den, NULL);
}
Uint32 __umodsi3(Uint32 Num, Uint32 Den)
{
- return Num - __udivsi3(Num, Den)*Den;
+ Uint32 rem;
+ __divmod32_asm(Num, Den, &rem);
+ return rem;
}
-Sint32 __divsi3(Sint32 Num, Sint32 Den)
+static inline Sint32 DivMod32S(Sint32 Num, Sint32 Den, Sint32 *Rem)
{
- if( (Num < 0) && (Den < 0) )
- return __udivsi3(-Num, -Den);
- else if( Num < 0 )
- return __udivsi3(-Num, Den);
- else if( Den < 0 )
- return __udivsi3(Den, -Den);
+ Sint32 ret = 1;
+ if( Num < 0 ) {
+ ret = -ret;
+ Num = -Num;
+ }
+ if( Den < 0 ) {
+ ret = -ret;
+ Den = -Den;
+ }
+ if(ret < 0)
+ ret = -__divmod32(Num, Den, (Uint32*)Rem);
else
- return __udivsi3(Den, Den);
+ ret = __divmod32(Num, Den, (Uint32*)Rem);
+ return ret;
+}
+
+Sint32 __divsi3(Sint32 Num, Sint32 Den)
+{
+ return DivMod32S(Num, Den, NULL);
}
Sint32 __modsi3(Sint32 Num, Sint32 Den)
{
- //register Sint32 rem;
- //_divide_s_32(Num, Den, rem);
- return Num - __divsi3(Num, Den) * Den;
+ Sint32 rem;
+ DivMod32S(Num, Den, &rem);
+ return rem;
}