optimize multiplication for space
[Chiptunes.git] / foo.S
1 /* REGISTER NAMES */
2 #define acc r16
3 #define i0 r17
4 #define i1 r18
5 #define i2 r19
6 #define i3 r20
7 #define n r21
8 #define s r22
9 #define t r23 //==Ml
10 #define x r24 //==a1==Mh
11 #define _ r25 //==a2
12 #define Xlo r26
13 #define Xhi r27
14 #define one r28
15 ; r29
16 ; r30 Zlo
17 ; r31 Zhi
18 ; aliases:
19 #define Ml t //mod3 vars
20 #define Mh x // -"-
21 #define a1 x //mul_ vars
22 #define a2 _ // -"-
23
24 /* I/O REGISTERS */
25 OCR0AL = 0x26
26 DDRB = 0x01
27 PORTB = 0x02
28 PUEB = 0x03
29 SPL = 0x3D
30 SPH = 0x3E
31 CCP = 0x3C
32 CLKPSR = 0x36
33 OSCCAL = 0x39
34 WDTCSR = 0x31
35 SMCR = 0x3A
36 TCCR0A = 0x2E
37 TCCR0B = 0x2D
38 TIMSK0 = 0x2B
39 TIFR0 = 0x2A
40 RAMEND = 0x5F
41 FLASHM = 0x4000
42
43 .section .text
44 .org 0x0000 ; RESET interrupt
45 CLR i0
46 CLR i1
47 CLR i2
48 RJMP main
49 .org 0x0008 ; TIM0_OVF interrupt
50 RJMP sample
51
52 notes:
53 .byte 0x84, 0x9d, 0xb0, 0x69, 0x9d, 0x84, 0x69, 0x58
54 .byte 0x75, 0x8c, 0xb0, 0x69, 0x8c, 0x75, 0x69, 0x58
55
56 mod3: ; mod3(Mh.Ml) -> t
57 #define tmp _
58 ADD Ml, Mh
59 CLR Mh
60 ADC Mh, Mh ; store carry in Mh
61 MOV tmp, Ml
62 SWAP tmp
63 ANDI tmp, 0x0f
64 SWAP Mh
65 OR tmp, Mh
66 ANDI Ml, 0x0f
67 ADD Ml, tmp
68 MOV tmp, Ml
69 LSR tmp
70 LSR tmp
71 ANDI Ml, 0x03
72 ADD Ml, tmp
73 MOV tmp, Ml
74 LSR tmp
75 LSR tmp
76 ANDI Ml, 0x03
77 ADD Ml, tmp
78 CPI Ml, 3
79 BRCS skip
80 SUBI Ml, 3
81 skip:
82 RET
83 #undef tmp
84
85 g: ; g(i, t) -> t
86 CLR a1
87
88 #define tmp _
89 #define zero a1
90 ANDI t, 0x07
91 MOV tmp, i2
92 ANDI tmp, 3
93 CPSE tmp, zero
94 SUBI t, -8
95 #undef zero
96 #undef tmp
97
98 LDI Xlo, lo8(notes)
99 ADD Xlo, t ; NOTE: can't overflow, since RAMEND == 0x5F
100 LD t, X
101
102 CLR a2
103
104 ; begin of mulitiplication:
105 LSR t
106 BRCC skip1
107 ADD a1, i0
108 ADC a2, i1
109 skip1:
110 LSR a2
111 ROR a1
112 LSR t
113 ; BRCC skip2 -- this bit is always zero
114 ; ADD a1, i0
115 ; ADC a2, i1
116 ;skip2:
117 LSR a2
118 ROR a1
119 LSR t
120 BRCC skip3
121 ADD a1, i0
122 ADC a2, i1
123 skip3:
124 LSR a2
125 ROR a1
126 LSR t
127 BRCC skip4
128 ADD a1, i0
129 ADC a2, i1
130 skip4:
131 LSR a2
132 ROR a1
133 LSR t
134 BRCC skip5
135 ADD a1, i0
136 ADC a2, i1
137 skip5:
138 LSR a2
139 ROR a1
140 LSR t
141 BRCC skip6 ;sbrc t, NNN
142 ADD a1, i0
143 skip6:
144 LSR a1
145 LSR t
146 BRCC skip7
147 ADD a1, i0
148 skip7:
149 LSR a1
150 LSR t
151 BRCC skip8
152 ADD a1, i0
153 skip8:
154 LSR a1
155
156 MOV t, a1 ;;TODO: use a1 in loop: directly
157 RET
158
159 main: ; setup routine
160 ; NOTE: clr i0..i2 moved to .ord 0x0
161 CLR i3
162 CLR acc ; we output a dummy sample before the actual first one
163 LDI Xhi, hi8(FLASHM + notes) ; never changes
164 LDI one, 1 ; mostly for clearing TIM0_OVF bit
165
166 #define zero i0
167 LDI x, RAMEND
168 OUT SPL, x ; init stack ptr
169 OUT SPH, zero ; -"-
170 OUT PUEB, zero ; disable pullups
171 LDI x, 0x05
172 OUT DDRB, x ; PORTB0:pwm, PORTB2:debug
173 LDI x, 0xd8
174 OUT CCP, x ; change protected ioregs
175 OUT CLKPSR, one ; clock prescaler 1/2 (4Mhz)
176 LDI x, 0xa7 ; determined by trial-and-error (->PORTB2)
177 OUT OSCCAL, x ; set oscillator calibration
178 OUT WDTCSR, zero; turn off watchdog
179
180 ;set timer/counter0 to 8bit fastpwm, non-inverting, no prescaler
181 LDI x, 0x81
182 OUT TCCR0A, x
183 LDI x, 0x09
184 OUT TCCR0B, x
185 OUT TIMSK0, one ; enable tim0_ovf
186 SEI
187 #undef zero
188
189 loop:
190 SLEEP ; wait for interrupt
191 RJMP loop
192
193 sample:
194 OUT OCR0AL, acc ; start by outputting a sample, because routine has variable runtime
195 #ifdef DEBUG
196 SBI PORTB, 2 ; to measure runtime
197 #endif // DEBUG
198
199 MOV n, i2
200 LSL n
201 LSL n
202 #define tmp _
203 MOV tmp, i1
204 SWAP tmp
205 ANDI tmp, 0x0f
206 LSR tmp
207 LSR tmp
208 OR n, tmp
209 #undef tmp
210 MOV s, i3
211 LSR s
212 ROR s
213 ANDI s, 0x80
214 #define tmp _
215 MOV tmp, i2
216 LSR tmp
217 OR s, tmp
218 #undef tmp
219
220 ; voice 1:
221 MOV t, n
222 RCALL g
223 SWAP t
224 ANDI t, 1
225 MOV acc, t
226
227 ; voice 2:
228 #define tmp _
229 MOV tmp, i2
230 LSL tmp
231 LSL tmp
232 LSL tmp
233 MOV t, i1
234 SWAP t
235 ANDI t, 0xf
236 LSR t
237 OR t, tmp
238 #undef tmp
239 EOR t, n
240 RCALL g
241 LSR t
242 LSR t
243 ANDI t, 3
244 AND t, s
245 ADD acc, t
246
247 ; voice 3:
248 MOV Ml, i2
249 SWAP Ml
250 ANDI Ml, 0xf0
251 LSL Ml
252 #define tmp _
253 MOV tmp, i1
254 LSR tmp
255 LSR tmp
256 LSR tmp
257 OR Ml, tmp
258 #undef tmp
259 MOV Mh, i3
260 SWAP Mh
261 ANDI Mh, 0xf0
262 LSL Mh
263 #define tmp _
264 MOV tmp, i2
265 LSR tmp
266 LSR tmp
267 LSR tmp
268 OR Mh, tmp
269 #undef tmp
270 RCALL mod3
271 ADD t, n
272 RCALL g
273 LSR t
274 LSR t
275 ANDI t, 3
276 MOV x, s
277 INC x
278 #define tmp _
279 MOV tmp, x
280 LSR tmp
281 LSR tmp
282 ADD tmp, x
283 ROR tmp
284 LSR tmp
285 ADD tmp, x
286 ROR tmp
287 LSR tmp
288 ADD tmp, x
289 ROR tmp
290 LSR tmp
291 AND t, tmp
292 #undef tmp
293 ADD acc, t
294
295 ; voice 4:
296 MOV Ml, i2
297 SWAP Ml
298 ANDI Ml, 0xf0
299 LSL Ml
300 LSL Ml
301 #define tmp _
302 MOV tmp, i1
303 LSR tmp
304 LSR tmp
305 OR Ml, tmp
306 #undef tmp
307 MOV Mh, i3
308 SWAP Mh
309 ANDI Mh, 0xf0
310 LSL Mh
311 LSL Mh
312 #define tmp _
313 MOV tmp, i2
314 LSR tmp
315 LSR tmp
316 OR Mh, tmp
317 #undef tmp
318 RCALL mod3
319 SUB t, n
320 NEG t
321 SUBI t, -8
322 RCALL g
323 LSR t
324 ANDI t, 3
325 INC s
326 #define tmp _
327 MOV tmp, s
328 LSR tmp
329 ADD tmp, s
330 ROR tmp
331 LSR tmp
332 LSR tmp
333 ADD tmp, s
334 ROR tmp
335 ADD tmp, s
336 ROR tmp
337 LSR tmp
338 LSR tmp
339 AND t, tmp
340 #undef tmp
341 ADD acc, t
342
343 SWAP acc ; acc<<4, to be passed to OCR0AL
344
345 SUBI i0, -1
346 SBCI i1, -1
347 SBCI i2, -1
348 SBCI i3, -1
349
350 #ifdef DEBUG
351 CBI PORTB, 2 ; end runtime measurement
352 #endif // DEBUG
353 OUT TIFR0, one ; clear pending interrupt (routine takes two intr.cycles)
354 RETI ; reenables interrupts
Imprint / Impressum