Commit | Line | Data |
---|---|---|
f180febe | 1 | /* REGISTER NAMES */ |
6d672b87 TG |
2 | #define acc r16 |
3 | #define i0 r17 | |
4 | #define i1 r18 | |
5 | #define i2 r19 | |
6 | #define i3 r20 | |
7 | #define n r21 | |
8 | #define s r22 | |
9 | #define t r23 //==Ml | |
f99fd6f3 TG |
10 | #define x r24 //==a1==Mh |
11 | #define _ r25 //==a2 | |
12 | #define Xlo r26 | |
13 | #define Xhi r27 | |
ce618731 | 14 | #define one r28 |
f180febe TG |
15 | ; r29 |
16 | ; r30 Zlo | |
17 | ; r31 Zhi | |
18 | ; aliases: | |
ce618731 TG |
19 | #define Ml t //mod3 vars |
20 | #define Mh x // -"- | |
21 | #define a1 x //mul_ vars | |
22 | #define a2 _ // -"- | |
da32ed67 | 23 | |
f180febe TG |
24 | /* I/O REGISTERS */ |
25 | OCR0AL = 0x26 | |
26 | DDRB = 0x01 | |
34fa6d04 | 27 | PORTB = 0x02 |
f180febe TG |
28 | PUEB = 0x03 |
29 | SPL = 0x3D | |
30 | SPH = 0x3E | |
31 | CCP = 0x3C | |
32 | CLKPSR = 0x36 | |
2af726bc | 33 | OSCCAL = 0x39 |
19e320a6 | 34 | WDTCSR = 0x31 |
f180febe TG |
35 | SMCR = 0x3A |
36 | TCCR0A = 0x2E | |
37 | TCCR0B = 0x2D | |
38 | TIMSK0 = 0x2B | |
39 | TIFR0 = 0x2A | |
f99fd6f3 TG |
40 | RAMEND = 0x5F |
41 | FLASHM = 0x4000 | |
4466dd8b | 42 | |
f180febe | 43 | .section .text |
19e320a6 | 44 | .org 0x0000 ; RESET interrupt |
cf36b4c3 TG |
45 | CLR i0 |
46 | CLR i1 | |
47 | CLR i2 | |
19e320a6 TG |
48 | RJMP main |
49 | .org 0x0008 ; TIM0_OVF interrupt | |
50 | RJMP sample | |
4466dd8b | 51 | |
f99fd6f3 TG |
52 | notes: |
53 | .byte 0x84, 0x9d, 0xb0, 0x69, 0x9d, 0x84, 0x69, 0x58 | |
54 | .byte 0x75, 0x8c, 0xb0, 0x69, 0x8c, 0x75, 0x69, 0x58 | |
55 | ||
f180febe | 56 | mod3: ; mod3(Mh.Ml) -> t |
8d8c00e4 | 57 | #define tmp _ |
65aa7cd6 TG |
58 | ADD Ml, Mh |
59 | CLR Mh | |
6d672b87 | 60 | ADC Mh, Mh ; store carry in Mh |
65aa7cd6 TG |
61 | MOV tmp, Ml |
62 | SWAP tmp | |
63 | ANDI tmp, 0x0f | |
64 | SWAP Mh | |
65 | OR tmp, Mh | |
66 | ANDI Ml, 0x0f | |
67 | ADD Ml, tmp | |
68 | MOV tmp, Ml | |
69 | LSR tmp | |
70 | LSR tmp | |
71 | ANDI Ml, 0x03 | |
72 | ADD Ml, tmp | |
73 | MOV tmp, Ml | |
74 | LSR tmp | |
75 | LSR tmp | |
76 | ANDI Ml, 0x03 | |
77 | ADD Ml, tmp | |
78 | CPI Ml, 3 | |
26799bab | 79 | BRCS skip |
65aa7cd6 TG |
80 | SUBI Ml, 3 |
81 | skip: | |
4283632d | 82 | RET |
8d8c00e4 | 83 | #undef tmp |
f180febe TG |
84 | |
85 | ; definitions to mul-tree readable: | |
86 | .macro always _bit ; nop; for when a test() is not necessary (see tree) | |
87 | .endm | |
88 | .macro never _bit ; nop; for when a test() is not necessary (see tree) | |
89 | .endm | |
90 | .macro test _bit,_jmpto | |
65958d9d TG |
91 | SBRC t, \_bit |
92 | RJMP \_jmpto | |
f180febe | 93 | .endm |
f8861a90 TG |
94 | .macro i_test _bit,_jmpto ; inverted test (for reordered 0x8_) |
95 | SBRS t, \_bit | |
96 | RJMP \_jmpto | |
97 | .endm | |
f180febe TG |
98 | .macro shift16 |
99 | LSR a2 | |
100 | ROR a1 | |
101 | .endm | |
102 | .macro shift8 ; top three bits don't need to be corrrect, so save cycles by not carrying | |
103 | LSR a1 | |
104 | .endm | |
105 | .macro shift0 ; nop; last shift is common | |
106 | .endm | |
ea40b11f | 107 | .macro add16 |
f180febe TG |
108 | ADD a1, i0 |
109 | ADC a2, i1 | |
f180febe | 110 | .endm |
ea40b11f | 111 | .macro add8 ; ditto with carrying |
f180febe TG |
112 | ADD a1, i0 |
113 | .endm | |
114 | ||
115 | g: ; g(i, t) -> t | |
6d672b87 TG |
116 | CLR a1 |
117 | ||
d35c3d70 | 118 | #define tmp _ |
6d672b87 | 119 | #define zero a1 |
65aa7cd6 TG |
120 | ANDI t, 0x07 |
121 | MOV tmp, i2 | |
122 | ANDI tmp, 3 | |
65aa7cd6 TG |
123 | CPSE tmp, zero |
124 | SUBI t, -8 | |
6d672b87 | 125 | #undef zero |
02f61e33 | 126 | #undef tmp |
4466dd8b | 127 | |
d44d4b47 TG |
128 | LDI Xlo, lo8(notes) |
129 | ADD Xlo, t ; NOTE: can't overflow, since RAMEND == 0x5F | |
130 | LD t, X | |
4466dd8b | 131 | |
986f12ae TG |
132 | CLR a2 |
133 | ||
58515c6c TG |
134 | /* decision tree multiplication: |
135 | there is only a limited number of coefficients, so we heavily | |
136 | optimize for those only, and only compute the bits we | |
137 | actually need. this reduces cycle count from 38 for the | |
138 | (optimized) classic approach to 31. instruction count | |
139 | increases from 38 to 100. in the end it turned out that we | |
140 | would've had enough cycles to spare to just use the standard | |
141 | algorithm. | |
4466dd8b TG |
142 | _xxx? |
143 | / \ | |
144 | _xx?0 _xx1? | |
145 | | | | |
146 | _x?00 _x?01 | |
147 | / \ / \ | |
148 | _?000 _?100 _?001 _?101 | |
149 | / \ / \ | / \ | |
150 | _0000 _1000 _0100 _1100 _1001 _0101 _1101 | |
151 | | | | | | | | | |
152 | ... ... ... ... ... ... ... | |
153 | | | | | | | | | |
58515c6c TG |
154 | B0 58 84 8C 69 75 9D |
155 | 27cy 28cy 26cy 28cy 26cy 31cy 30cy */ | |
65aa7cd6 | 156 | test 0, m____1 |
4466dd8b | 157 | m____0: shift16 |
65aa7cd6 | 158 | never 1 |
4466dd8b | 159 | m___00: shift16 |
65aa7cd6 | 160 | test 2, m__100 |
4466dd8b | 161 | m__000: shift16 |
65aa7cd6 | 162 | test 3, m_1000 |
4466dd8b | 163 | m_0000: shift16 |
65aa7cd6 | 164 | always 4 |
ea40b11f | 165 | add16 $ shift16 |
65aa7cd6 | 166 | always 5 |
ea40b11f | 167 | add8 $ shift8 |
65aa7cd6 | 168 | never 6 |
4466dd8b | 169 | shift8 |
65aa7cd6 | 170 | always 7 |
ea40b11f | 171 | add8 $ shift0 |
65aa7cd6 | 172 | RJMP end_mul ; calc'd 0xb0 |
d0324785 | 173 | |
ea40b11f | 174 | m_1000: add16 $ shift16 |
65aa7cd6 | 175 | always 4 |
ea40b11f | 176 | add16 $ shift16 |
65aa7cd6 | 177 | never 5 |
4466dd8b | 178 | shift8 |
65aa7cd6 | 179 | always 6 |
ea40b11f | 180 | add8 $ shift8 |
65aa7cd6 | 181 | never 7 |
4466dd8b | 182 | shift0 |
65aa7cd6 | 183 | RJMP end_mul ; calc'd 0x58 |
d0324785 | 184 | |
ea40b11f | 185 | m__100: add16 $ shift16 |
f8861a90 TG |
186 | i_test 3, m_0100 |
187 | m_1100: add16 | |
4466dd8b | 188 | m_0100: shift16 |
65aa7cd6 | 189 | never 4 |
4466dd8b | 190 | shift16 |
65aa7cd6 | 191 | never 5 |
4466dd8b | 192 | shift8 |
65aa7cd6 | 193 | never 6 |
4466dd8b | 194 | shift8 |
65aa7cd6 | 195 | always 7 |
ea40b11f | 196 | add8 $ shift0 |
f8861a90 | 197 | RJMP end_mul ; calc'd 0x8c / 0x84 |
d0324785 | 198 | |
ea40b11f | 199 | m____1: add16 $ shift16 |
65aa7cd6 | 200 | never 1 |
4466dd8b | 201 | m___01: shift16 |
65aa7cd6 | 202 | test 2, m__101 |
4466dd8b | 203 | m__001: shift16 |
65aa7cd6 | 204 | always 3 |
ea40b11f | 205 | m_1001: add16 $ shift16 |
65aa7cd6 | 206 | never 4 |
4466dd8b | 207 | shift16 |
65aa7cd6 | 208 | always 5 |
ea40b11f | 209 | add8 $ shift8 |
65aa7cd6 | 210 | always 6 |
ea40b11f | 211 | add8 $ shift8 |
65aa7cd6 | 212 | never 7 |
4466dd8b | 213 | shift0 |
65aa7cd6 | 214 | RJMP end_mul ; calc'd 0x69 |
d0324785 | 215 | |
ea40b11f | 216 | m__101: add16 $ shift16 |
65aa7cd6 | 217 | test 3, m_1101 |
4466dd8b | 218 | m_0101: shift16 |
65aa7cd6 | 219 | always 4 |
ea40b11f | 220 | add16 $ shift16 |
65aa7cd6 | 221 | always 5 |
ea40b11f | 222 | add8 $ shift8 |
65aa7cd6 | 223 | always 6 |
ea40b11f | 224 | add8 $ shift8 |
65aa7cd6 | 225 | never 7 |
4466dd8b | 226 | shift0 |
65aa7cd6 | 227 | RJMP end_mul ; calc'd 0x75 |
d0324785 | 228 | |
ea40b11f | 229 | m_1101: add16 $ shift16 |
65aa7cd6 | 230 | always 4 |
ea40b11f | 231 | add16 $ shift16 |
65aa7cd6 | 232 | never 5 |
4466dd8b | 233 | shift8 |
65aa7cd6 | 234 | never 6 |
4466dd8b | 235 | shift8 |
65aa7cd6 | 236 | always 7 |
ea40b11f | 237 | add8 $ shift0 |
65aa7cd6 | 238 | ; calc'd 0x9d |
d0324785 | 239 | |
4466dd8b | 240 | end_mul: |
65aa7cd6 | 241 | LSR a1 ;final shift is a common operation for all |
4466dd8b | 242 | |
2af726bc | 243 | MOV t, a1 ;;TODO: use a1 in loop: directly |
ce618731 | 244 | RET |
61fab018 | 245 | |
19e320a6 | 246 | main: ; setup routine |
cf36b4c3 | 247 | ; NOTE: clr i0..i2 moved to .ord 0x0 |
65aa7cd6 | 248 | CLR i3 |
19e320a6 | 249 | CLR acc ; we output a dummy sample before the actual first one |
f99fd6f3 | 250 | LDI Xhi, hi8(FLASHM + notes) ; never changes |
ce618731 | 251 | LDI one, 1 ; mostly for clearing TIM0_OVF bit |
19e320a6 | 252 | |
6d672b87 | 253 | #define zero i0 |
f99fd6f3 | 254 | LDI x, RAMEND |
19e320a6 TG |
255 | OUT SPL, x ; init stack ptr |
256 | OUT SPH, zero ; -"- | |
257 | OUT PUEB, zero ; disable pullups | |
ea40b11f TG |
258 | LDI x, 0x05 |
259 | OUT DDRB, x ; PORTB0:pwm, PORTB2:debug | |
19e320a6 TG |
260 | LDI x, 0xd8 |
261 | OUT CCP, x ; change protected ioregs | |
262 | OUT CLKPSR, one ; clock prescaler 1/2 (4Mhz) | |
2af726bc TG |
263 | LDI x, 0xa7 ; determined by trial-and-error (->PORTB2) |
264 | OUT OSCCAL, x ; set oscillator calibration | |
ce618731 | 265 | OUT WDTCSR, zero; turn off watchdog |
19e320a6 TG |
266 | |
267 | ;set timer/counter0 to 8bit fastpwm, non-inverting, no prescaler | |
268 | LDI x, 0x81 | |
269 | OUT TCCR0A, x | |
270 | LDI x, 0x09 | |
271 | OUT TCCR0B, x | |
272 | OUT TIMSK0, one ; enable tim0_ovf | |
19e320a6 | 273 | SEI |
6d672b87 | 274 | #undef zero |
19e320a6 TG |
275 | |
276 | loop: | |
277 | SLEEP ; wait for interrupt | |
278 | RJMP loop | |
279 | ||
280 | sample: | |
19e320a6 | 281 | OUT OCR0AL, acc ; start by outputting a sample, because routine has variable runtime |
ce618731 | 282 | #ifdef DEBUG |
34fa6d04 | 283 | SBI PORTB, 2 ; to measure runtime |
ce618731 | 284 | #endif // DEBUG |
19e320a6 | 285 | |
65aa7cd6 TG |
286 | MOV n, i2 |
287 | LSL n | |
288 | LSL n | |
f6ef1520 | 289 | #define tmp _ |
65aa7cd6 TG |
290 | MOV tmp, i1 |
291 | SWAP tmp | |
292 | ANDI tmp, 0x0f | |
293 | LSR tmp | |
294 | LSR tmp | |
295 | OR n, tmp | |
f6ef1520 | 296 | #undef tmp |
65aa7cd6 TG |
297 | MOV s, i3 |
298 | LSR s | |
299 | ROR s | |
300 | ANDI s, 0x80 | |
f6ef1520 | 301 | #define tmp _ |
65aa7cd6 TG |
302 | MOV tmp, i2 |
303 | LSR tmp | |
304 | OR s, tmp | |
f6ef1520 | 305 | #undef tmp |
3b86ca43 | 306 | |
65aa7cd6 TG |
307 | ; voice 1: |
308 | MOV t, n | |
309 | RCALL g | |
310 | SWAP t | |
311 | ANDI t, 1 | |
312 | MOV acc, t | |
3b86ca43 | 313 | |
65aa7cd6 | 314 | ; voice 2: |
f6ef1520 | 315 | #define tmp _ |
65aa7cd6 TG |
316 | MOV tmp, i2 |
317 | LSL tmp | |
318 | LSL tmp | |
319 | LSL tmp | |
320 | MOV t, i1 | |
321 | SWAP t | |
322 | ANDI t, 0xf | |
323 | LSR t | |
324 | OR t, tmp | |
f6ef1520 | 325 | #undef tmp |
65aa7cd6 TG |
326 | EOR t, n |
327 | RCALL g | |
328 | LSR t | |
329 | LSR t | |
330 | ANDI t, 3 | |
331 | AND t, s | |
332 | ADD acc, t | |
3b86ca43 | 333 | |
65aa7cd6 TG |
334 | ; voice 3: |
335 | MOV Ml, i2 | |
336 | SWAP Ml | |
337 | ANDI Ml, 0xf0 | |
338 | LSL Ml | |
f6ef1520 | 339 | #define tmp _ |
65aa7cd6 TG |
340 | MOV tmp, i1 |
341 | LSR tmp | |
342 | LSR tmp | |
343 | LSR tmp | |
344 | OR Ml, tmp | |
f6ef1520 | 345 | #undef tmp |
65aa7cd6 TG |
346 | MOV Mh, i3 |
347 | SWAP Mh | |
348 | ANDI Mh, 0xf0 | |
349 | LSL Mh | |
f6ef1520 | 350 | #define tmp _ |
65aa7cd6 TG |
351 | MOV tmp, i2 |
352 | LSR tmp | |
353 | LSR tmp | |
354 | LSR tmp | |
355 | OR Mh, tmp | |
f6ef1520 | 356 | #undef tmp |
65aa7cd6 TG |
357 | RCALL mod3 |
358 | ADD t, n | |
359 | RCALL g | |
360 | LSR t | |
361 | LSR t | |
362 | ANDI t, 3 | |
363 | MOV x, s | |
364 | INC x | |
f6ef1520 | 365 | #define tmp _ |
65aa7cd6 TG |
366 | MOV tmp, x |
367 | LSR tmp | |
368 | LSR tmp | |
369 | ADD tmp, x | |
370 | ROR tmp | |
371 | LSR tmp | |
372 | ADD tmp, x | |
373 | ROR tmp | |
374 | LSR tmp | |
375 | ADD tmp, x | |
376 | ROR tmp | |
377 | LSR tmp | |
378 | AND t, tmp | |
f6ef1520 | 379 | #undef tmp |
65aa7cd6 | 380 | ADD acc, t |
f6ef1520 | 381 | |
65aa7cd6 TG |
382 | ; voice 4: |
383 | MOV Ml, i2 | |
384 | SWAP Ml | |
385 | ANDI Ml, 0xf0 | |
386 | LSL Ml | |
387 | LSL Ml | |
f6ef1520 | 388 | #define tmp _ |
65aa7cd6 TG |
389 | MOV tmp, i1 |
390 | LSR tmp | |
391 | LSR tmp | |
392 | OR Ml, tmp | |
f6ef1520 | 393 | #undef tmp |
65aa7cd6 TG |
394 | MOV Mh, i3 |
395 | SWAP Mh | |
396 | ANDI Mh, 0xf0 | |
397 | LSL Mh | |
398 | LSL Mh | |
f6ef1520 | 399 | #define tmp _ |
65aa7cd6 TG |
400 | MOV tmp, i2 |
401 | LSR tmp | |
402 | LSR tmp | |
403 | OR Mh, tmp | |
f6ef1520 | 404 | #undef tmp |
65aa7cd6 TG |
405 | RCALL mod3 |
406 | SUB t, n | |
407 | NEG t | |
408 | SUBI t, -8 | |
409 | RCALL g | |
410 | LSR t | |
411 | ANDI t, 3 | |
412 | INC s | |
f6ef1520 | 413 | #define tmp _ |
65aa7cd6 TG |
414 | MOV tmp, s |
415 | LSR tmp | |
416 | ADD tmp, s | |
417 | ROR tmp | |
418 | LSR tmp | |
419 | LSR tmp | |
420 | ADD tmp, s | |
421 | ROR tmp | |
422 | ADD tmp, s | |
423 | ROR tmp | |
424 | LSR tmp | |
425 | LSR tmp | |
426 | AND t, tmp | |
f6ef1520 | 427 | #undef tmp |
65aa7cd6 | 428 | ADD acc, t |
3b86ca43 | 429 | |
19e320a6 TG |
430 | SWAP acc ; acc<<4, to be passed to OCR0AL |
431 | ||
f6ef1520 TG |
432 | SUBI i0, -1 |
433 | SBCI i1, -1 | |
434 | SBCI i2, -1 | |
435 | SBCI i3, -1 | |
bfce2f8c | 436 | |
ce618731 TG |
437 | #ifdef DEBUG |
438 | CBI PORTB, 2 ; end runtime measurement | |
439 | #endif // DEBUG | |
440 | OUT TIFR0, one ; clear pending interrupt (routine takes two intr.cycles) | |
19e320a6 | 441 | RETI ; reenables interrupts |