3: pop {r4}
mov pc, lr
+
+@
+@ Division
+@
+.globl __divmod32_asm
+__divmod32_asm:
+ push {r4}
+ mov r4, #0 @ Return value
+ mov r3, #1 @ add value
+
+ @ Scan up for first larger multiple of 2
+1: cmp r0, r1 @ N < D
+ bmi 2f @ ^^
+ lsl r1, r1, #1 @ D <<= 1
+ lsls r3, r3, #1 @ add <<= 1
+ beq .err @ result is zero
+ b 1b
+
+ @ Go back down
+2: lsrs r3, r3, #1 @ add >>= 1
+ beq 3f @ Done (value is zero)
+ lsr r1, r1, #1 @ D >>= 1
+ cmp r0, r1 @ N < D
+ bmi 2b
+ sub r0, r1 @ N -= D
+ add r4, r3 @ ret += add
+ b 2b
+3:
+ tst r2, r2 @ Remainder (if wanted)
+ strne r0,[r2]
+ mov r0, r4 @ Return value
+ pop {r4}
+ mov pc, lr
+.err:
+ mov r0, #0
+ tst r2, r2
+ strne r0, [r2]
+ pop {r4}
+ mov pc, lr
+
// === IMPORTS ===
extern void __memcpy_align4(void *_dest, const void *_src, size_t _length);
extern void __memcpy_byte(void *_dest, const void *_src, size_t _length);
+extern Uint32 __divmod32_asm(Uint32 Num, Uint32 Den, Uint32 *Rem);
// === PROTOTYPES ===
Uint64 __divmod64(Uint64 Num, Uint64 Den, Uint64 *Rem);
// Force alignment
while( (tVAddr)dst8 & 3 ) *dst8 ++ = *src8++, _length --;
- __memcpy_align32(dst8, src8, _length);
+ __memcpy_align4(dst8, src8, _length);
return _dest;
}
{
Uint64 ret;
if(Den == 0) return 0; // TODO: #div0
+ if(Num < Den) {
+ if(Rem) *Rem = Num;
+ return 0;
+ }
if(Num == 0) {
if(Rem) *Rem = 0;
return 0;
if(Rem) *Rem = Num & 0xFFF;
return Num >> 12;
}
+
+ if( !(Den >> 32) && !(Num >> 32) ) {
+ if(Rem) *Rem = 0; // Clear high bits
+ return __divmod32_asm(Num, Den, (Uint32*)Rem);
+ }
ret = __divmod64(Num, Den, Rem);
return ret;
Uint32 __udivsi3(Uint32 Num, Uint32 Den)
{
- return __divmod32(Num, Den, NULL);
+ return __divmod32_asm(Num, Den, NULL);
}
Uint32 __umodsi3(Uint32 Num, Uint32 Den)
{
Uint32 rem;
- __divmod32(Num, Den, &rem);
+ __divmod32_asm(Num, Den, &rem);
return rem;
}