X-Git-Url: https://git.gir.st/Chiptunes.git/blobdiff_plain/d35c3d7044dbb354f6a7493291eecbe1319b769d..4466dd8b6842dcd98a892ece212799e475c2b1c2:/foo.c diff --git a/foo.c b/foo.c index e246b6c..ac7d6ad 100644 --- a/foo.c +++ b/foo.c @@ -2,10 +2,6 @@ #include "fakeasm.h" typedef unsigned char u8; -u8 data[] = { - 0x84, 0x9d, 0xb0, 0x69, 0x9d, 0x84, 0x69, 0x58, - 0x75, 0x8c, 0xb0, 0x69, 0x8c, 0x75, 0x69, 0x58 -}; u8 zero; //r16 u8 acc; //r17 u8 i0; //r18 @@ -15,15 +11,24 @@ u8 i3; //r21 u8 n; //r22 u8 s; //r23 u8 _; //r24 -u8 loop; //r25 -u8 t;/*==Ml*/ //r26 (Xlo) -u8 x;/*==Mh*/ //r27 (Xhi) + //r25 +u8 x;/*==Ml*/ //r26 (Xlo) +u8 t;/*==Mh*/ //r27 (Xhi) //r28 //r29 -/*fakestack_l*/ //r30 (Zlo) -/*fakestack_h*/ //r31 (Zhi) +void *Z; //r30 (Zlo) +/*...*/ //r31 (Zhi) #define Mh x //mod3 vars #define Ml t // -"- + +// .section .data +u8 data[] = { + /*.byte*/ 0x84, 0x9d, 0xb0, 0x69, 0x9d, 0x84, 0x69, 0x58, + /*.byte*/ 0x75, 0x8c, 0xb0, 0x69, 0x8c, 0x75, 0x69, 0x58 +}; + +// .section .text + //http://homepage.divms.uiowa.edu/~jones/bcd/mod.shtml void mod3(void) { // mod3(Mh.Ml) -> t @@ -55,6 +60,27 @@ void mod3(void) { RET #undef tmp } +//.macro definitions for mul-tree: +#define always(_bit) //nop; for when a test() is not necessary (see tree) +#define never(_bit) //nop; for when a test() is not necessary (see tree) +#define test(_bit,_jmpto) \ + SBRC (t, _bit) \ + RJMP (_jmpto) +#define shift16 \ + LSR (a2) \ + ROR (a1) +#define shift8 /*top three bits don't need to be corrrect, so save cycles by not carrying*/ \ + LSR (a1) +#define shift0 //nop; last shift is common +#define add_shift16 \ + ADD (a1, i0) \ + ADC (a2, i1, carry) \ + shift16 +#define add_shift8 /*ditto with carrying*/ \ + ADD (a1, i0) \ + shift8 +#define add_shift0 /*last shift is common*/ \ + ADD (a1, i0) void g(void) { // g(i, t) -> t // tempvars: `x` and `_` @@ -63,227 +89,136 @@ void g(void) { MOV (tmp, i2) ANDI (tmp, 3) TST (tmp) - #undef tmp - BREQ (skip) + CPSE (tmp, zero) SUBI (t, -8) - skip: - t = data[t]; - /*MOV X_hi==x, data_hi - MOV X_lo==t, data_lo - ADD X_lo, t - ADC X_hi, zero - LD t, X */ + #undef tmp + + #define tmp _ + MOV (tmp, t) //NOTE: must move value away from `t`, as that is also hi(X) + tmp = data[tmp];/* + LDI Xhi, hi8(data) + LDI Xlo, lo8(data) + ADD Xlo, tmp ;<-- the offset (formerly `t`) into data[] + ADC Xhi, zero + LD tmp, X */ + MOV (t, tmp) + #undef tmp + #define a1 x #define a2 _ #define a0 t - // start MUL CLR (a2) CLR (a1) - //sorted by ocurrence, then longest cycle count first - CPI (t, 0x69) - BREQ (mul_69) - CPI (t, 0x75) - BREQ (mul_75) - CPI (t, 0x9d) - BREQ (mul_9d) - CPI (t, 0x58) - BREQ (mul_58) - CPI (t, 0x8c) - BREQ (mul_8c) - CPI (t, 0x84) - BREQ (mul_84) - CPI (t, 0xb0) - BREQ (mul_b0) - mul_58: // 0101 1000 (24cy) - LSR (a2) - ROR (a1) - LSR (a2) - ROR (a1) - LSR (a2) - ROR (a1) - ADD (a1, i0) - ADC (a2, i1, carry) - LSR (a2) - ROR (a1) + /* decision tree multiplication saves cycles and (hopefully) reduces code size + _xxx? + / \ + _xx?0 _xx1? + | | + _x?00 _x?01 + / \ / \ + _?000 _?100 _?001 _?101 + / \ / \ | / \ + _0000 _1000 _0100 _1100 _1001 _0101 _1101 + | | | | | | | + ... ... ... ... ... ... ... + | | | | | | | + B0 58 84 8C 69 75 9D */ + test (0, m____1) + m____0: shift16 + never (1) + m___00: shift16 + test (2, m__100) + m__000: shift16 + test (3, m_1000) + m_0000: shift16 + always (4) + add_shift16 + always (5) + add_shift8 + never (6) + shift8 + always (7) + add_shift0 + RJMP (end_mul) // calc'd 0xb0 + + m_1000: add_shift16 + always (4) + add_shift16 + never (5) + shift8 + always (6) + add_shift8 + never (7) + shift0 + RJMP (end_mul) // calc'd 0x58 - ADD (a1, i0) - ADC (a2, i1, carry) - LSR (a2) - ROR (a1) - LSR (a2) - ROR (a1) - ADD (a1, i0) - ADC (a2, i1, carry) - LSR (a2) - ROR (a1) - LSR (a2) - ROR (a1) - RJMP (endmul) - mul_69: // 0110 1001 (26cy) - ADD (a1, i0) - ADC (a2, i1, carry) - LSR (a2) - ROR (a1) - LSR (a2) - ROR (a1) - LSR (a2) - ROR (a1) - ADD (a1, i0) - ADC (a2, i1, carry) - LSR (a2) - ROR (a1) + m__100: add_shift16 + test (3, m_1100) + m_0100: shift16 + RJMP (upper_8) //'ll calc 0x84 - LSR (a2) - ROR (a1) - ADD (a1, i0) - ADC (a2, i1, carry) - LSR (a2) - ROR (a1) - ADD (a1, i0) - ADC (a2, i1, carry) - LSR (a2) - ROR (a1) - LSR (a2) - ROR (a1) - RJMP (endmul) - mul_75: // 0111 0101 (28cy) - ADD (a1, i0) - ADC (a2, i1, carry) - LSR (a2) - ROR (a1) - LSR (a2) - ROR (a1) - ADD (a1, i0) - ADC (a2, i1, carry) - LSR (a2) - ROR (a1) - LSR (a2) - ROR (a1) + m_1100: add_shift16 + upper_8: /* used twice, so deduplicated */ + never (4) + shift16 + never (5) + shift8 + never (6) + shift8 + always (7) + add_shift0 + RJMP (end_mul) // calc'd 0x8c - ADD (a1, i0) - ADC (a2, i1, carry) - LSR (a2) - ROR (a1) - ADD (a1, i0) - ADC (a2, i1, carry) - LSR (a2) - ROR (a1) - ADD (a1, i0) - ADC (a2, i1, carry) - LSR (a2) - ROR (a1) - LSR (a2) - ROR (a1) - RJMP (endmul) - mul_84: // 1000 0100 (22cy) - LSR (a2) - ROR (a1) - LSR (a2) - ROR (a1) - ADD (a1, i0) - ADC (a2, i1, carry) - LSR (a2) - ROR (a1) - LSR (a2) - ROR (a1) + m____1: add_shift16 + never (1) + m___01: shift16 + test (2, m__101) + m__001: shift16 + always (3) + m_1001: add_shift16 + never (4) + shift16 + always (5) + add_shift8 + always (6) + add_shift8 + never (7) + shift0 + RJMP (end_mul) // calc'd 0x69 - LSR (a2) - ROR (a1) - LSR (a2) - ROR (a1) - LSR (a2) - ROR (a1) - ADD (a1, i0) - ADC (a2, i1, carry) - LSR (a2) - ROR (a1) - RJMP (endmul) - mul_8c: // 1000 1100 (24cy) - LSR (a2) - ROR (a1) - LSR (a2) - ROR (a1) - ADD (a1, i0) - ADC (a2, i1, carry) - LSR (a2) - ROR (a1) - ADD (a1, i0) - ADC (a2, i1, carry) - LSR (a2) - ROR (a1) + m__101: add_shift16 + test (3, m_1101) + m_0101: shift16 + always (4) + add_shift16 + always (5) + add_shift8 + always (6) + add_shift8 + never (7) + shift0 + RJMP (end_mul) // calc'd 0x75 - LSR (a2) - ROR (a1) - LSR (a2) - ROR (a1) - LSR (a2) - ROR (a1) - ADD (a1, i0) - ADC (a2, i1, carry) - LSR (a2) - ROR (a1) - RJMP (endmul) - mul_9d: // 1001 1101 (28cy) - ADD (a1, i0) - ADC (a2, i1, carry) - LSR (a2) - ROR (a1) - LSR (a2) - ROR (a1) - ADD (a1, i0) - ADC (a2, i1, carry) - LSR (a2) - ROR (a1) - ADD (a1, i0) - ADC (a2, i1, carry) - LSR (a2) - ROR (a1) + m_1101: add_shift16 + always (4) + add_shift16 + never (5) + shift8 + never (6) + shift8 + always (7) + add_shift0 + // calc'd 0x9d - ADD (a1, i0) - ADC (a2, i1, carry) - LSR (a2) - ROR (a1) - LSR (a2) - ROR (a1) - LSR (a2) - ROR (a1) - ADD (a1, i0) - ADC (a2, i1, carry) - LSR (a2) - ROR (a1) - RJMP (endmul) - mul_b0: // 1011 0000 (22cy) - LSR (a2) - ROR (a1) - LSR (a2) - ROR (a1) - LSR (a2) - ROR (a1) - LSR (a2) - ROR (a1) + end_mul: + LSR (a1) //final shift is a common operation for all - ADD (a1, i0) - ADC (a2, i1, carry) - LSR (a2) - ROR (a1) - ADD (a1, i0) - ADC (a2, i1, carry) - LSR (a2) - ROR (a1) - LSR (a2) - ROR (a1) - ADD (a1, i0) - ADC (a2, i1, carry) - LSR (a2) - ROR (a1) - endmul: - // end MUL + MOV (t, a1) //TODO: use a1 in main() directly #undef a0 #undef a1 #undef a2 - MOV (t, x) - RET //TODO: replace CALL/RET with IJMP? + RET //TODO: replace CALL/RET with IJMP? (requires undoing goto-mul-hack) }; int main(void) { @@ -439,8 +374,8 @@ int main(void) { putchar(acc<<4); //TODO SUBI (i0, -1) - ADC (i1, zero, !i0) - ADC (i2, zero, !i0&&!i1) - ADC (i3, zero, !i0&&!i1&&!i2) + ADC (i1, zero, !i0) //XXX: must use "sbci i1,-1" in the assembly version + ADC (i2, zero, !i0&&!i1) // sbci i2,-1 + ADC (i3, zero, !i0&&!i1&&!i2) // sbci i3,-1 } }