- u8 i;
- for (;ms;ms--) {
- for (i=0;i<DELAY_MAGIC; i++)
- asm("nop\nnop\nnop\nnop\nnop\n");
- }
-}
-
-void print_amount(u16 amt) {
- /* take amt and show it on screen with a $ sign */
- char str[10] = {' ', '$', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' '};
- u8 pos;
- for (pos = 7; amt; pos--, amt/=10) {
- str[pos] = amt%10 + '0';
- }
- set_msg(str);
+ /* delay routine written in assembly so we know what we're really getting.
+ * each inner loop should take ~1 ms to execute.
+ * 15 cycles * (1/4.9152Mhz) * 327 = 0.997 ms + a little bit on the fringes.
+ *
+ * XXX - how do we know gcc isn't optimising this? it seems to optimise after
+ * parsing C -> asm, but before asm -> machine code.
+ */
+ //asm volatile ("pshx\npsha\npshb\n"); /* save registers */
+ asm volatile ("ldx %0\n" :: "m" (ms) : "x");
+ asm volatile (
+ "delay_loop:\n"
+ //" ldd #327\n" /* 3 */
+ " ldd #150\n" /* 3 */
+ "delay_inner_loop:\n" /* 15 cycles each */
+ " cpd #0x0000\n" /* 5 */
+ " beq delay_inner_loop_end\n" /* 3 */
+ " subd #0x0001\n" /* 4 */
+ " bra delay_inner_loop\n" /* 3 */
+ "delay_inner_loop_end:\n"
+ " dex\n" /* 3 */
+ " beq delay_out\n" /* 3 */
+ " bra delay_loop\n" /* 3 */
+ "delay_out:\n" ::: "x", "d");
+ /*" pulb\n"
+ " pula\n"
+ " pulx\n");*/