]>
Commit | Line | Data |
---|---|---|
1 | /* REGISTER NAMES */ | |
2 | #define acc r16 | |
3 | #define i0 r17 | |
4 | #define i1 r18 | |
5 | #define i2 r19 | |
6 | #define i3 r20 | |
7 | #define n r21 | |
8 | #define s r22 | |
9 | #define t r23 //==Ml | |
10 | #define x r24 //==a1==Mh | |
11 | #define _ r25 //==a2 | |
12 | #define Xlo r26 | |
13 | #define Xhi r27 | |
14 | #define one r28 | |
15 | ; r29 | |
16 | ; r30 Zlo | |
17 | ; r31 Zhi | |
18 | ; aliases: | |
19 | #define Ml t //mod3 vars | |
20 | #define Mh x // -"- | |
21 | #define a1 x //mul_ vars | |
22 | #define a2 _ // -"- | |
23 | ||
24 | /* I/O REGISTERS */ | |
25 | OCR0AL = 0x26 | |
26 | DDRB = 0x01 | |
27 | PORTB = 0x02 | |
28 | PUEB = 0x03 | |
29 | SPL = 0x3D | |
30 | SPH = 0x3E | |
31 | CCP = 0x3C | |
32 | CLKPSR = 0x36 | |
33 | OSCCAL = 0x39 | |
34 | WDTCSR = 0x31 | |
35 | SMCR = 0x3A | |
36 | TCCR0A = 0x2E | |
37 | TCCR0B = 0x2D | |
38 | TIMSK0 = 0x2B | |
39 | TIFR0 = 0x2A | |
40 | RAMEND = 0x5F | |
41 | FLASHM = 0x4000 | |
42 | ||
43 | .section .text | |
44 | .org 0x0000 ; RESET interrupt | |
45 | CLR i0 | |
46 | CLR i1 | |
47 | CLR i2 | |
48 | RJMP main | |
49 | .org 0x0008 ; TIM0_OVF interrupt | |
50 | RJMP sample | |
51 | ||
52 | notes: | |
53 | .byte 0x84, 0x9d, 0xb0, 0x69, 0x9d, 0x84, 0x69, 0x58 | |
54 | .byte 0x75, 0x8c, 0xb0, 0x69, 0x8c, 0x75, 0x69, 0x58 | |
55 | ||
56 | mod3: ; mod3(Mh.Ml) -> t | |
57 | #define tmp _ | |
58 | ADD Ml, Mh | |
59 | CLR Mh | |
60 | ADC Mh, Mh ; store carry in Mh | |
61 | MOV tmp, Ml | |
62 | SWAP tmp | |
63 | ANDI tmp, 0x0f | |
64 | SWAP Mh | |
65 | OR tmp, Mh | |
66 | ANDI Ml, 0x0f | |
67 | ADD Ml, tmp | |
68 | MOV tmp, Ml | |
69 | LSR tmp | |
70 | LSR tmp | |
71 | ANDI Ml, 0x03 | |
72 | ADD Ml, tmp | |
73 | MOV tmp, Ml | |
74 | LSR tmp | |
75 | LSR tmp | |
76 | ANDI Ml, 0x03 | |
77 | ADD Ml, tmp | |
78 | CPI Ml, 3 | |
79 | BRCS skip | |
80 | SUBI Ml, 3 | |
81 | skip: | |
82 | RET | |
83 | #undef tmp | |
84 | ||
85 | ; definitions to mul-tree readable: | |
86 | .macro always _bit ; nop; for when a test() is not necessary (see tree) | |
87 | .endm | |
88 | .macro never _bit ; nop; for when a test() is not necessary (see tree) | |
89 | .endm | |
90 | .macro test _bit,_jmpto | |
91 | SBRC t, \_bit | |
92 | RJMP \_jmpto | |
93 | .endm | |
94 | .macro i_test _bit,_jmpto ; inverted test (for reordered 0x8_) | |
95 | SBRS t, \_bit | |
96 | RJMP \_jmpto | |
97 | .endm | |
98 | .macro shift16 | |
99 | LSR a2 | |
100 | ROR a1 | |
101 | .endm | |
102 | .macro shift8 ; top three bits don't need to be corrrect, so save cycles by not carrying | |
103 | LSR a1 | |
104 | .endm | |
105 | .macro shift0 ; nop; last shift is common | |
106 | .endm | |
107 | .macro add16 | |
108 | ADD a1, i0 | |
109 | ADC a2, i1 | |
110 | .endm | |
111 | .macro add8 ; ditto with carrying | |
112 | ADD a1, i0 | |
113 | .endm | |
114 | ||
115 | g: ; g(i, t) -> t | |
116 | CLR a1 | |
117 | ||
118 | #define tmp _ | |
119 | #define zero a1 | |
120 | ANDI t, 0x07 | |
121 | MOV tmp, i2 | |
122 | ANDI tmp, 3 | |
123 | CPSE tmp, zero | |
124 | SUBI t, -8 | |
125 | #undef zero | |
126 | #undef tmp | |
127 | ||
128 | LDI Xlo, lo8(notes) | |
129 | ADD Xlo, t ; NOTE: can't overflow, since RAMEND == 0x5F | |
130 | LD t, X | |
131 | ||
132 | CLR a2 | |
133 | ||
134 | /* decision tree multiplication: | |
135 | there is only a limited number of coefficients, so we heavily | |
136 | optimize for those only, and only compute the bits we | |
137 | actually need. this reduces cycle count from 38 for the | |
138 | (optimized) classic approach to 31. instruction count | |
139 | increases from 38 to 100. in the end it turned out that we | |
140 | would've had enough cycles to spare to just use the standard | |
141 | algorithm. | |
142 | _xxx? | |
143 | / \ | |
144 | _xx?0 _xx1? | |
145 | | | | |
146 | _x?00 _x?01 | |
147 | / \ / \ | |
148 | _?000 _?100 _?001 _?101 | |
149 | / \ / \ | / \ | |
150 | _0000 _1000 _0100 _1100 _1001 _0101 _1101 | |
151 | | | | | | | | | |
152 | ... ... ... ... ... ... ... | |
153 | | | | | | | | | |
154 | B0 58 84 8C 69 75 9D | |
155 | 27cy 28cy 26cy 28cy 26cy 31cy 30cy */ | |
156 | test 0, m____1 | |
157 | m____0: shift16 | |
158 | never 1 | |
159 | m___00: shift16 | |
160 | test 2, m__100 | |
161 | m__000: shift16 | |
162 | test 3, m_1000 | |
163 | m_0000: shift16 | |
164 | always 4 | |
165 | add16 $ shift16 | |
166 | always 5 | |
167 | add8 $ shift8 | |
168 | never 6 | |
169 | shift8 | |
170 | always 7 | |
171 | add8 $ shift0 | |
172 | RJMP end_mul ; calc'd 0xb0 | |
173 | ||
174 | m_1000: add16 $ shift16 | |
175 | always 4 | |
176 | add16 $ shift16 | |
177 | never 5 | |
178 | shift8 | |
179 | always 6 | |
180 | add8 $ shift8 | |
181 | never 7 | |
182 | shift0 | |
183 | RJMP end_mul ; calc'd 0x58 | |
184 | ||
185 | m__100: add16 $ shift16 | |
186 | i_test 3, m_0100 | |
187 | m_1100: add16 | |
188 | m_0100: shift16 | |
189 | never 4 | |
190 | shift16 | |
191 | never 5 | |
192 | shift8 | |
193 | never 6 | |
194 | shift8 | |
195 | always 7 | |
196 | add8 $ shift0 | |
197 | RJMP end_mul ; calc'd 0x8c / 0x84 | |
198 | ||
199 | m____1: add16 $ shift16 | |
200 | never 1 | |
201 | m___01: shift16 | |
202 | test 2, m__101 | |
203 | m__001: shift16 | |
204 | always 3 | |
205 | m_1001: add16 $ shift16 | |
206 | never 4 | |
207 | shift16 | |
208 | always 5 | |
209 | add8 $ shift8 | |
210 | always 6 | |
211 | add8 $ shift8 | |
212 | never 7 | |
213 | shift0 | |
214 | RJMP end_mul ; calc'd 0x69 | |
215 | ||
216 | m__101: add16 $ shift16 | |
217 | test 3, m_1101 | |
218 | m_0101: shift16 | |
219 | always 4 | |
220 | add16 $ shift16 | |
221 | always 5 | |
222 | add8 $ shift8 | |
223 | always 6 | |
224 | add8 $ shift8 | |
225 | never 7 | |
226 | shift0 | |
227 | RJMP end_mul ; calc'd 0x75 | |
228 | ||
229 | m_1101: add16 $ shift16 | |
230 | always 4 | |
231 | add16 $ shift16 | |
232 | never 5 | |
233 | shift8 | |
234 | never 6 | |
235 | shift8 | |
236 | always 7 | |
237 | add8 $ shift0 | |
238 | ; calc'd 0x9d | |
239 | ||
240 | end_mul: | |
241 | LSR a1 ;final shift is a common operation for all | |
242 | ||
243 | MOV t, a1 ;;TODO: use a1 in loop: directly | |
244 | RET | |
245 | ||
246 | main: ; setup routine | |
247 | ; NOTE: clr i0..i2 moved to .ord 0x0 | |
248 | CLR i3 | |
249 | CLR acc ; we output a dummy sample before the actual first one | |
250 | LDI Xhi, hi8(FLASHM + notes) ; never changes | |
251 | LDI one, 1 ; mostly for clearing TIM0_OVF bit | |
252 | ||
253 | #define zero i0 | |
254 | LDI x, RAMEND | |
255 | OUT SPL, x ; init stack ptr | |
256 | OUT SPH, zero ; -"- | |
257 | OUT PUEB, zero ; disable pullups | |
258 | LDI x, 0x05 | |
259 | OUT DDRB, x ; PORTB0:pwm, PORTB2:debug | |
260 | LDI x, 0xd8 | |
261 | OUT CCP, x ; change protected ioregs | |
262 | OUT CLKPSR, one ; clock prescaler 1/2 (4Mhz) | |
263 | LDI x, 0xa7 ; determined by trial-and-error (->PORTB2) | |
264 | OUT OSCCAL, x ; set oscillator calibration | |
265 | OUT WDTCSR, zero; turn off watchdog | |
266 | ||
267 | ;set timer/counter0 to 8bit fastpwm, non-inverting, no prescaler | |
268 | LDI x, 0x81 | |
269 | OUT TCCR0A, x | |
270 | LDI x, 0x09 | |
271 | OUT TCCR0B, x | |
272 | OUT TIMSK0, one ; enable tim0_ovf | |
273 | SEI | |
274 | #undef zero | |
275 | ||
276 | loop: | |
277 | SLEEP ; wait for interrupt | |
278 | RJMP loop | |
279 | ||
280 | sample: | |
281 | OUT OCR0AL, acc ; start by outputting a sample, because routine has variable runtime | |
282 | #ifdef DEBUG | |
283 | SBI PORTB, 2 ; to measure runtime | |
284 | #endif // DEBUG | |
285 | ||
286 | MOV n, i2 | |
287 | LSL n | |
288 | LSL n | |
289 | #define tmp _ | |
290 | MOV tmp, i1 | |
291 | SWAP tmp | |
292 | ANDI tmp, 0x0f | |
293 | LSR tmp | |
294 | LSR tmp | |
295 | OR n, tmp | |
296 | #undef tmp | |
297 | MOV s, i3 | |
298 | LSR s | |
299 | ROR s | |
300 | ANDI s, 0x80 | |
301 | #define tmp _ | |
302 | MOV tmp, i2 | |
303 | LSR tmp | |
304 | OR s, tmp | |
305 | #undef tmp | |
306 | ||
307 | ; voice 1: | |
308 | MOV t, n | |
309 | RCALL g | |
310 | SWAP t | |
311 | ANDI t, 1 | |
312 | MOV acc, t | |
313 | ||
314 | ; voice 2: | |
315 | #define tmp _ | |
316 | MOV tmp, i2 | |
317 | LSL tmp | |
318 | LSL tmp | |
319 | LSL tmp | |
320 | MOV t, i1 | |
321 | SWAP t | |
322 | ANDI t, 0xf | |
323 | LSR t | |
324 | OR t, tmp | |
325 | #undef tmp | |
326 | EOR t, n | |
327 | RCALL g | |
328 | LSR t | |
329 | LSR t | |
330 | ANDI t, 3 | |
331 | AND t, s | |
332 | ADD acc, t | |
333 | ||
334 | ; voice 3: | |
335 | MOV Ml, i2 | |
336 | SWAP Ml | |
337 | ANDI Ml, 0xf0 | |
338 | LSL Ml | |
339 | #define tmp _ | |
340 | MOV tmp, i1 | |
341 | LSR tmp | |
342 | LSR tmp | |
343 | LSR tmp | |
344 | OR Ml, tmp | |
345 | #undef tmp | |
346 | MOV Mh, i3 | |
347 | SWAP Mh | |
348 | ANDI Mh, 0xf0 | |
349 | LSL Mh | |
350 | #define tmp _ | |
351 | MOV tmp, i2 | |
352 | LSR tmp | |
353 | LSR tmp | |
354 | LSR tmp | |
355 | OR Mh, tmp | |
356 | #undef tmp | |
357 | RCALL mod3 | |
358 | ADD t, n | |
359 | RCALL g | |
360 | LSR t | |
361 | LSR t | |
362 | ANDI t, 3 | |
363 | MOV x, s | |
364 | INC x | |
365 | #define tmp _ | |
366 | MOV tmp, x | |
367 | LSR tmp | |
368 | LSR tmp | |
369 | ADD tmp, x | |
370 | ROR tmp | |
371 | LSR tmp | |
372 | ADD tmp, x | |
373 | ROR tmp | |
374 | LSR tmp | |
375 | ADD tmp, x | |
376 | ROR tmp | |
377 | LSR tmp | |
378 | AND t, tmp | |
379 | #undef tmp | |
380 | ADD acc, t | |
381 | ||
382 | ; voice 4: | |
383 | MOV Ml, i2 | |
384 | SWAP Ml | |
385 | ANDI Ml, 0xf0 | |
386 | LSL Ml | |
387 | LSL Ml | |
388 | #define tmp _ | |
389 | MOV tmp, i1 | |
390 | LSR tmp | |
391 | LSR tmp | |
392 | OR Ml, tmp | |
393 | #undef tmp | |
394 | MOV Mh, i3 | |
395 | SWAP Mh | |
396 | ANDI Mh, 0xf0 | |
397 | LSL Mh | |
398 | LSL Mh | |
399 | #define tmp _ | |
400 | MOV tmp, i2 | |
401 | LSR tmp | |
402 | LSR tmp | |
403 | OR Mh, tmp | |
404 | #undef tmp | |
405 | RCALL mod3 | |
406 | SUB t, n | |
407 | NEG t | |
408 | SUBI t, -8 | |
409 | RCALL g | |
410 | LSR t | |
411 | ANDI t, 3 | |
412 | INC s | |
413 | #define tmp _ | |
414 | MOV tmp, s | |
415 | LSR tmp | |
416 | ADD tmp, s | |
417 | ROR tmp | |
418 | LSR tmp | |
419 | LSR tmp | |
420 | ADD tmp, s | |
421 | ROR tmp | |
422 | ADD tmp, s | |
423 | ROR tmp | |
424 | LSR tmp | |
425 | LSR tmp | |
426 | AND t, tmp | |
427 | #undef tmp | |
428 | ADD acc, t | |
429 | ||
430 | SWAP acc ; acc<<4, to be passed to OCR0AL | |
431 | ||
432 | SUBI i0, -1 | |
433 | SBCI i1, -1 | |
434 | SBCI i2, -1 | |
435 | SBCI i3, -1 | |
436 | ||
437 | #ifdef DEBUG | |
438 | CBI PORTB, 2 ; end runtime measurement | |
439 | #endif // DEBUG | |
440 | OUT TIFR0, one ; clear pending interrupt (routine takes two intr.cycles) | |
441 | RETI ; reenables interrupts |