+ /* delay routine written in assembly so we know what we're really getting.
+ * each inner loop should take ~1 ms to execute.
+ * 15 cycles * (1/4.9152Mhz) * 327 = 0.997 ms + a little bit on the fringes.
+ *
+ * XXX - how do we know gcc isn't optimising this? it seems to optimise after
+ * parsing C -> asm, but before asm -> machine code.
+ */
+ //asm volatile ("pshx\npsha\npshb\n"); /* save registers */
+ asm volatile ("ldx %0\n" :: "m" (ms) : "x");
+ asm volatile (
+ "delay_loop:\n"
+ //" ldd #327\n" /* 3 */
+ " ldd #150\n" /* 3 */
+ "delay_inner_loop:\n" /* 15 cycles each */
+ " cpd #0x0000\n" /* 5 */
+ " beq delay_inner_loop_end\n" /* 3 */
+ " subd #0x0001\n" /* 4 */
+ " bra delay_inner_loop\n" /* 3 */
+ "delay_inner_loop_end:\n"
+ " dex\n" /* 3 */
+ " beq delay_out\n" /* 3 */
+ " bra delay_loop\n" /* 3 */
+ "delay_out:\n" ::: "x", "d");
+ /*" pulb\n"
+ " pula\n"
+ " pulx\n");*/
+}
+
+u8 my_strlen(char* s) {
+ char *p = s;
+ while (*p) p++;
+ return p-s;
+}
+
+void my_strncpy(char* dst, char* src, u8 max_size) {