optimize mul_8c / mul_84
[Chiptunes.git] / foo.S
CommitLineData
f180febe
TG
1/* REGISTER NAMES */
2#define zero r16
3#define acc r17
4#define i0 r18
5#define i1 r19
6#define i2 r20
7#define i3 r21
8#define n r22
9#define s r23
10#define _ r24
11; r25
12#define x r26 //==Xlo==Mh
13#define t r27 //==Xhi==Ml
14; r28
15; r29
16; r30 Zlo
17; r31 Zhi
18; aliases:
19#define Xlo r26
20#define Xhi r27
21#define Mh r26 //mod3 vars
22#define Ml r27 // -"-
da32ed67 23
f180febe
TG
24/* I/O REGISTERS */
25OCR0AL = 0x26
26DDRB = 0x01
34fa6d04 27PORTB = 0x02
f180febe
TG
28PUEB = 0x03
29SPL = 0x3D
30SPH = 0x3E
31CCP = 0x3C
32CLKPSR = 0x36
19e320a6 33WDTCSR = 0x31
f180febe
TG
34SMCR = 0x3A
35TCCR0A = 0x2E
36TCCR0B = 0x2D
37TIMSK0 = 0x2B
38TIFR0 = 0x2A
4466dd8b 39
f180febe 40.section .data
65958d9d 41data:
f180febe
TG
42 .byte 0x84, 0x9d, 0xb0, 0x69, 0x9d, 0x84, 0x69, 0x58
43 .byte 0x75, 0x8c, 0xb0, 0x69, 0x8c, 0x75, 0x69, 0x58
4466dd8b 44
f180febe 45.section .text
19e320a6
TG
46.org 0x0000 ; RESET interrupt
47 RJMP main
48.org 0x0008 ; TIM0_OVF interrupt
49 RJMP sample
4466dd8b 50
f180febe 51mod3: ; mod3(Mh.Ml) -> t
8d8c00e4 52 #define tmp _
65aa7cd6
TG
53 ADD Ml, Mh
54 CLR Mh
55 ADC Mh, zero
56 MOV tmp, Ml
57 SWAP tmp
58 ANDI tmp, 0x0f
59 SWAP Mh
60 OR tmp, Mh
61 ANDI Ml, 0x0f
62 ADD Ml, tmp
63 MOV tmp, Ml
64 LSR tmp
65 LSR tmp
66 ANDI Ml, 0x03
67 ADD Ml, tmp
68 MOV tmp, Ml
69 LSR tmp
70 LSR tmp
71 ANDI Ml, 0x03
72 ADD Ml, tmp
73 CPI Ml, 3
74 BRPL skip
75 SUBI Ml, 3
76 skip:
4283632d 77 RET
8d8c00e4 78 #undef tmp
f180febe
TG
79
80; definitions to mul-tree readable:
65958d9d
TG
81#define a1 x
82#define a2 _
f180febe
TG
83.macro always _bit ; nop; for when a test() is not necessary (see tree)
84.endm
85.macro never _bit ; nop; for when a test() is not necessary (see tree)
86.endm
87.macro test _bit,_jmpto
65958d9d
TG
88 SBRC t, \_bit
89 RJMP \_jmpto
f180febe 90.endm
f8861a90
TG
91.macro i_test _bit,_jmpto ; inverted test (for reordered 0x8_)
92 SBRS t, \_bit
93 RJMP \_jmpto
94.endm
f180febe
TG
95.macro shift16
96 LSR a2
97 ROR a1
98.endm
99.macro shift8 ; top three bits don't need to be corrrect, so save cycles by not carrying
100 LSR a1
101.endm
102.macro shift0 ; nop; last shift is common
103.endm
ea40b11f 104.macro add16
f180febe
TG
105 ADD a1, i0
106 ADC a2, i1
f180febe 107.endm
ea40b11f 108.macro add8 ; ditto with carrying
f180febe
TG
109 ADD a1, i0
110.endm
65958d9d
TG
111#undef a2
112#undef a1
f180febe
TG
113
114g: ; g(i, t) -> t
d35c3d70 115 #define tmp _
65aa7cd6
TG
116 ANDI t, 0x07
117 MOV tmp, i2
118 ANDI tmp, 3
65aa7cd6
TG
119 CPSE tmp, zero
120 SUBI t, -8
02f61e33 121 #undef tmp
4466dd8b 122
34fa6d04 123 ;TODO: check correctness!
4466dd8b 124 #define tmp _
65aa7cd6 125 MOV tmp, t ; NOTE: must move value away from `t`, as that is also hi(X)
34fa6d04 126 LDI Xhi, hi8(data) ; hi(data) always zero, but still need to clear the register
4466dd8b
TG
127 LDI Xlo, lo8(data)
128 ADD Xlo, tmp ;<-- the offset (formerly `t`) into data[]
34fa6d04 129 ;ADC Xhi, zero ; data == 0x40 t <= 0x10, so can never overflow
65aa7cd6
TG
130 LD tmp, X
131 MOV t, tmp
4466dd8b
TG
132 #undef tmp
133
cc428230
TG
134 #define a1 x
135 #define a2 _
136 #define a0 t
65aa7cd6
TG
137 CLR a2
138 CLR a1
d0324785 139
4466dd8b
TG
140 /* decision tree multiplication saves cycles and (hopefully) reduces code size
141 _xxx?
142 / \
143 _xx?0 _xx1?
144 | |
145 _x?00 _x?01
146 / \ / \
147 _?000 _?100 _?001 _?101
148 / \ / \ | / \
149 _0000 _1000 _0100 _1100 _1001 _0101 _1101
150 | | | | | | |
151 ... ... ... ... ... ... ...
152 | | | | | | |
153 B0 58 84 8C 69 75 9D */
65aa7cd6 154 test 0, m____1
4466dd8b 155 m____0: shift16
65aa7cd6 156 never 1
4466dd8b 157 m___00: shift16
65aa7cd6 158 test 2, m__100
4466dd8b 159 m__000: shift16
65aa7cd6 160 test 3, m_1000
4466dd8b 161 m_0000: shift16
65aa7cd6 162 always 4
ea40b11f 163 add16 $ shift16
65aa7cd6 164 always 5
ea40b11f 165 add8 $ shift8
65aa7cd6 166 never 6
4466dd8b 167 shift8
65aa7cd6 168 always 7
ea40b11f 169 add8 $ shift0
65aa7cd6 170 RJMP end_mul ; calc'd 0xb0
d0324785 171
ea40b11f 172 m_1000: add16 $ shift16
65aa7cd6 173 always 4
ea40b11f 174 add16 $ shift16
65aa7cd6 175 never 5
4466dd8b 176 shift8
65aa7cd6 177 always 6
ea40b11f 178 add8 $ shift8
65aa7cd6 179 never 7
4466dd8b 180 shift0
65aa7cd6 181 RJMP end_mul ; calc'd 0x58
d0324785 182
ea40b11f 183 m__100: add16 $ shift16
f8861a90
TG
184 i_test 3, m_0100
185 m_1100: add16
4466dd8b 186 m_0100: shift16
65aa7cd6 187 never 4
4466dd8b 188 shift16
65aa7cd6 189 never 5
4466dd8b 190 shift8
65aa7cd6 191 never 6
4466dd8b 192 shift8
65aa7cd6 193 always 7
ea40b11f 194 add8 $ shift0
f8861a90 195 RJMP end_mul ; calc'd 0x8c / 0x84
d0324785 196
ea40b11f 197 m____1: add16 $ shift16
65aa7cd6 198 never 1
4466dd8b 199 m___01: shift16
65aa7cd6 200 test 2, m__101
4466dd8b 201 m__001: shift16
65aa7cd6 202 always 3
ea40b11f 203 m_1001: add16 $ shift16
65aa7cd6 204 never 4
4466dd8b 205 shift16
65aa7cd6 206 always 5
ea40b11f 207 add8 $ shift8
65aa7cd6 208 always 6
ea40b11f 209 add8 $ shift8
65aa7cd6 210 never 7
4466dd8b 211 shift0
65aa7cd6 212 RJMP end_mul ; calc'd 0x69
d0324785 213
ea40b11f 214 m__101: add16 $ shift16
65aa7cd6 215 test 3, m_1101
4466dd8b 216 m_0101: shift16
65aa7cd6 217 always 4
ea40b11f 218 add16 $ shift16
65aa7cd6 219 always 5
ea40b11f 220 add8 $ shift8
65aa7cd6 221 always 6
ea40b11f 222 add8 $ shift8
65aa7cd6 223 never 7
4466dd8b 224 shift0
65aa7cd6 225 RJMP end_mul ; calc'd 0x75
d0324785 226
ea40b11f 227 m_1101: add16 $ shift16
65aa7cd6 228 always 4
ea40b11f 229 add16 $ shift16
65aa7cd6 230 never 5
4466dd8b 231 shift8
65aa7cd6 232 never 6
4466dd8b 233 shift8
65aa7cd6 234 always 7
ea40b11f 235 add8 $ shift0
65aa7cd6 236 ; calc'd 0x9d
d0324785 237
4466dd8b 238 end_mul:
65aa7cd6 239 LSR a1 ;final shift is a common operation for all
4466dd8b 240
65aa7cd6 241 MOV t, a1 ;;TODO: use a1 in main() directly
d0324785
TG
242 #undef a0
243 #undef a1
244 #undef a2
65aa7cd6 245 RET ; TODO: replace CALL/RET with IJMP?
61fab018 246
19e320a6 247main: ; setup routine
65aa7cd6
TG
248 CLR zero
249 CLR i0
250 CLR i1
251 CLR i2
252 CLR i3
19e320a6
TG
253 CLR acc ; we output a dummy sample before the actual first one
254
255 #define one _
256 LDI one, 1
257 LDI x, 0x5f ; RAMEND
258 OUT SPL, x ; init stack ptr
259 OUT SPH, zero ; -"-
260 OUT PUEB, zero ; disable pullups
ea40b11f
TG
261 LDI x, 0x05
262 OUT DDRB, x ; PORTB0:pwm, PORTB2:debug
19e320a6
TG
263 LDI x, 0xd8
264 OUT CCP, x ; change protected ioregs
265 OUT CLKPSR, one ; clock prescaler 1/2 (4Mhz)
266 OUT WDTCSR, zero; turn off watchdog ;;TODO: incomplete - see datasheet pg48
267 ; OUT SMCR, 2 ; sleep mode 'power down' ('idle' (default) has faster response time)
268
269 ;set timer/counter0 to 8bit fastpwm, non-inverting, no prescaler
270 LDI x, 0x81
271 OUT TCCR0A, x
272 LDI x, 0x09
273 OUT TCCR0B, x
274 OUT TIMSK0, one ; enable tim0_ovf
275 OUT TIFR0, one ; TODO: why?
276 SEI
277 #undef one
f180febe 278 RJMP sample
19e320a6
TG
279
280loop:
281 SLEEP ; wait for interrupt
282 RJMP loop
283
284sample:
34fa6d04
TG
285 ; potential TODO: softcounter in r25 to only update duty cicle every n iterations
286 ; potential TODO: save/restore status register (SREG=0x3f) (only if something in mainloop)
287
19e320a6 288 OUT OCR0AL, acc ; start by outputting a sample, because routine has variable runtime
34fa6d04 289 SBI PORTB, 2 ; to measure runtime
19e320a6 290
65aa7cd6
TG
291 MOV n, i2
292 LSL n
293 LSL n
f6ef1520 294 #define tmp _
65aa7cd6
TG
295 MOV tmp, i1
296 SWAP tmp
297 ANDI tmp, 0x0f
298 LSR tmp
299 LSR tmp
300 OR n, tmp
f6ef1520 301 #undef tmp
65aa7cd6
TG
302 MOV s, i3
303 LSR s
304 ROR s
305 ANDI s, 0x80
f6ef1520 306 #define tmp _
65aa7cd6
TG
307 MOV tmp, i2
308 LSR tmp
309 OR s, tmp
f6ef1520 310 #undef tmp
3b86ca43 311
65aa7cd6
TG
312 ; voice 1:
313 MOV t, n
314 RCALL g
315 SWAP t
316 ANDI t, 1
317 MOV acc, t
3b86ca43 318
65aa7cd6 319 ; voice 2:
f6ef1520 320 #define tmp _
65aa7cd6
TG
321 MOV tmp, i2
322 LSL tmp
323 LSL tmp
324 LSL tmp
325 MOV t, i1
326 SWAP t
327 ANDI t, 0xf
328 LSR t
329 OR t, tmp
f6ef1520 330 #undef tmp
65aa7cd6
TG
331 EOR t, n
332 RCALL g
333 LSR t
334 LSR t
335 ANDI t, 3
336 AND t, s
337 ADD acc, t
3b86ca43 338
65aa7cd6
TG
339 ; voice 3:
340 MOV Ml, i2
341 SWAP Ml
342 ANDI Ml, 0xf0
343 LSL Ml
f6ef1520 344 #define tmp _
65aa7cd6
TG
345 MOV tmp, i1
346 LSR tmp
347 LSR tmp
348 LSR tmp
349 OR Ml, tmp
f6ef1520 350 #undef tmp
65aa7cd6
TG
351 MOV Mh, i3
352 SWAP Mh
353 ANDI Mh, 0xf0
354 LSL Mh
f6ef1520 355 #define tmp _
65aa7cd6
TG
356 MOV tmp, i2
357 LSR tmp
358 LSR tmp
359 LSR tmp
360 OR Mh, tmp
f6ef1520 361 #undef tmp
65aa7cd6
TG
362 RCALL mod3
363 ADD t, n
364 RCALL g
365 LSR t
366 LSR t
367 ANDI t, 3
368 MOV x, s
369 INC x
f6ef1520 370 #define tmp _
65aa7cd6
TG
371 MOV tmp, x
372 LSR tmp
373 LSR tmp
374 ADD tmp, x
375 ROR tmp
376 LSR tmp
377 ADD tmp, x
378 ROR tmp
379 LSR tmp
380 ADD tmp, x
381 ROR tmp
382 LSR tmp
383 AND t, tmp
f6ef1520 384 #undef tmp
65aa7cd6 385 ADD acc, t
f6ef1520 386
65aa7cd6
TG
387 ; voice 4:
388 MOV Ml, i2
389 SWAP Ml
390 ANDI Ml, 0xf0
391 LSL Ml
392 LSL Ml
f6ef1520 393 #define tmp _
65aa7cd6
TG
394 MOV tmp, i1
395 LSR tmp
396 LSR tmp
397 OR Ml, tmp
f6ef1520 398 #undef tmp
65aa7cd6
TG
399 MOV Mh, i3
400 SWAP Mh
401 ANDI Mh, 0xf0
402 LSL Mh
403 LSL Mh
f6ef1520 404 #define tmp _
65aa7cd6
TG
405 MOV tmp, i2
406 LSR tmp
407 LSR tmp
408 OR Mh, tmp
f6ef1520 409 #undef tmp
65aa7cd6
TG
410 RCALL mod3
411 SUB t, n
412 NEG t
413 SUBI t, -8
414 RCALL g
415 LSR t
416 ANDI t, 3
417 INC s
f6ef1520 418 #define tmp _
65aa7cd6
TG
419 MOV tmp, s
420 LSR tmp
421 ADD tmp, s
422 ROR tmp
423 LSR tmp
424 LSR tmp
425 ADD tmp, s
426 ROR tmp
427 ADD tmp, s
428 ROR tmp
429 LSR tmp
430 LSR tmp
431 AND t, tmp
f6ef1520 432 #undef tmp
65aa7cd6 433 ADD acc, t
3b86ca43 434
19e320a6
TG
435 SWAP acc ; acc<<4, to be passed to OCR0AL
436
f6ef1520
TG
437 SUBI i0, -1
438 SBCI i1, -1
439 SBCI i2, -1
440 SBCI i3, -1
bfce2f8c 441
34fa6d04
TG
442 CBI PORTB, 2 ; end runtime measurement
443 ;TODO: to reduce jitter: clear pending tim0_ovf (TIFR0[TOV0] <- 1) ?
19e320a6 444 RETI ; reenables interrupts
Imprint / Impressum