new version
[Chiptunes.git] / foo.c
diff --git a/foo.c b/foo.c
index 60999989a06fb2d3fe03503627d664b7cae48c49..c2bcb5d6c2d96651063ce533aae9b7c1f8f3ae57 100644 (file)
--- a/foo.c
+++ b/foo.c
@@ -15,7 +15,7 @@ u8 i3;                //r21
 u8 n;          //r22
 u8 s;          //r23
 u8 _;          //r24
-               //r25
+u8 loop;       //r25
 u8 t;/*==Ml*/  //r26 (Xlo)
 u8 x;/*==Mh*/  //r27 (Xhi)
                //r28
@@ -55,6 +55,219 @@ void mod3(void) {
        RET
        #undef tmp
 }
+void mul(void) { //don't need overhead of function (inline it)
+       // i1.i0 * t -> _.x.t
+       #define a1 x
+       #define a2 _
+       #define a0 t
+       // start MUL -- 92 cycles :( (unrolled and skipping second bit: 76)
+       CLR     (a2)
+       CLR     (a1)
+
+       CPI     (t, 0x58)
+       BREQ    (mul_58)
+       CPI     (t, 0x69)
+       BREQ    (mul_69)
+       CPI     (t, 0x75)
+       BREQ    (mul_75)
+       CPI     (t, 0x84)
+       BREQ    (mul_84)
+       CPI     (t, 0x8c)
+       BREQ    (mul_8c)
+       CPI     (t, 0x9d)
+       BREQ    (mul_9d)
+       CPI     (t, 0xb0)
+       BREQ    (mul_b0)
+       mul_58: // 0101 1000
+               LSR (a2)
+               ROR (a1)
+               LSR (a2)
+               ROR (a1)
+               LSR (a2)
+               ROR (a1)
+               ADD (a1, i0)
+               ADC (a2, i1, carry)
+               LSR (a2)
+               ROR (a1)
+
+               ADD (a1, i0)
+               ADC (a2, i1, carry)
+               LSR (a2)
+               ROR (a1)
+               LSR (a2)
+               ROR (a1)
+               ADD (a1, i0)
+               ADC (a2, i1, carry)
+               LSR (a2)
+               ROR (a1)
+               LSR (a2)
+               ROR (a1)
+               RJMP    (endmul)
+       mul_69: // 0110 1001
+               ADD (a1, i0)
+               ADC (a2, i1, carry)
+               LSR (a2)
+               ROR (a1)
+               LSR (a2)
+               ROR (a1)
+               LSR (a2)
+               ROR (a1)
+               ADD (a1, i0)
+               ADC (a2, i1, carry)
+               LSR (a2)
+               ROR (a1)
+
+               LSR (a2)
+               ROR (a1)
+               ADD (a1, i0)
+               ADC (a2, i1, carry)
+               LSR (a2)
+               ROR (a1)
+               ADD (a1, i0)
+               ADC (a2, i1, carry)
+               LSR (a2)
+               ROR (a1)
+               LSR (a2)
+               ROR (a1)
+               RJMP    (endmul)
+       mul_75: // 0111 0101
+               ADD (a1, i0)
+               ADC (a2, i1, carry)
+               LSR (a2)
+               ROR (a1)
+               LSR (a2)
+               ROR (a1)
+               ADD (a1, i0)
+               ADC (a2, i1, carry)
+               LSR (a2)
+               ROR (a1)
+               LSR (a2)
+               ROR (a1)
+
+               ADD (a1, i0)
+               ADC (a2, i1, carry)
+               LSR (a2)
+               ROR (a1)
+               ADD (a1, i0)
+               ADC (a2, i1, carry)
+               LSR (a2)
+               ROR (a1)
+               ADD (a1, i0)
+               ADC (a2, i1, carry)
+               LSR (a2)
+               ROR (a1)
+               LSR (a2)
+               ROR (a1)
+               RJMP    (endmul)
+       mul_84: // 1000 0100
+               LSR (a2)
+               ROR (a1)
+               LSR (a2)
+               ROR (a1)
+               ADD (a1, i0)
+               ADC (a2, i1, carry)
+               LSR (a2)
+               ROR (a1)
+               LSR (a2)
+               ROR (a1)
+
+               LSR (a2)
+               ROR (a1)
+               LSR (a2)
+               ROR (a1)
+               LSR (a2)
+               ROR (a1)
+               ADD (a1, i0)
+               ADC (a2, i1, carry)
+               LSR (a2)
+               ROR (a1)
+               RJMP    (endmul)
+       mul_8c: // 1000 1100
+               LSR (a2)
+               ROR (a1)
+               LSR (a2)
+               ROR (a1)
+               ADD (a1, i0)
+               ADC (a2, i1, carry)
+               LSR (a2)
+               ROR (a1)
+               ADD (a1, i0)
+               ADC (a2, i1, carry)
+               LSR (a2)
+               ROR (a1)
+
+               LSR (a2)
+               ROR (a1)
+               LSR (a2)
+               ROR (a1)
+               LSR (a2)
+               ROR (a1)
+               ADD (a1, i0)
+               ADC (a2, i1, carry)
+               LSR (a2)
+               ROR (a1)
+               RJMP    (endmul)
+       mul_9d: // 1001 1101
+               ADD (a1, i0)
+               ADC (a2, i1, carry)
+               LSR (a2)
+               ROR (a1)
+               LSR (a2)
+               ROR (a1)
+               ADD (a1, i0)
+               ADC (a2, i1, carry)
+               LSR (a2)
+               ROR (a1)
+               ADD (a1, i0)
+               ADC (a2, i1, carry)
+               LSR (a2)
+               ROR (a1)
+
+               ADD (a1, i0)
+               ADC (a2, i1, carry)
+               LSR (a2)
+               ROR (a1)
+               LSR (a2)
+               ROR (a1)
+               LSR (a2)
+               ROR (a1)
+               ADD (a1, i0)
+               ADC (a2, i1, carry)
+               LSR (a2)
+               ROR (a1)
+               RJMP    (endmul)
+       mul_b0: // 1011 0000
+               LSR (a2)
+               ROR (a1)
+               LSR (a2)
+               ROR (a1)
+               LSR (a2)
+               ROR (a1)
+               LSR (a2)
+               ROR (a1)
+
+               ADD (a1, i0)
+               ADC (a2, i1, carry)
+               LSR (a2)
+               ROR (a1)
+               ADD (a1, i0)
+               ADC (a2, i1, carry)
+               LSR (a2)
+               ROR (a1)
+               LSR (a2)
+               ROR (a1)
+               ADD (a1, i0)
+               ADC (a2, i1, carry)
+               LSR (a2)
+               ROR (a1)
+       endmul:
+
+       // end MUL
+       #undef a0
+       #undef a1
+       #undef a2
+       RET
+}
 void g(void) {
        // g(i, t) -> t
        // tempvars: `x` and `_`
@@ -73,26 +286,8 @@ void g(void) {
          ADD X_lo, t
          ADC X_hi, zero
          LD  t, X         */
-       #define a1 x
-       #define a2 _
-       #define a0 t
-       CLR     (a2)
-       CLR     (a1)
-       for (u8 loop = 0; loop < 8; loop++) { //Note: t&2 always zero
-               SBRS    (t, 0)
-               goto skip2;
-               ADD     (a1, i0)
-               ADC     (a2, i1, carry)
-               skip2:
-               LSR     (a2)
-               ROR     (a1)
-               ROR     (t)
-       }
-       MOV     (t, a1) //can't return x or _ as a1, both needed later besides t
-       #undef a0
-       #undef a1
-       #undef a2
-       
+       RCALL   mul(); //stores used value in in x
+       MOV     (t, x)
        RET //TODO: replace CALL/RET with IJMP?
 };
 
Imprint / Impressum