10 #define x r24 //==a1==Mh
19 #define Ml t //mod3 vars
21 #define a1 x //mul_ vars
44 .org 0x0000 ; RESET interrupt
49 .org 0x0008 ; TIM0_OVF interrupt
53 .byte 0x84, 0x9d, 0xb0, 0x69, 0x9d, 0x84, 0x69, 0x58
54 .byte 0x75, 0x8c, 0xb0, 0x69, 0x8c, 0x75, 0x69, 0x58
56 mod3: ; mod3(Mh.Ml) -> t
60 ADC Mh, Mh ; store carry in Mh
85 ; definitions to mul-tree readable:
86 .macro always _bit ; nop; for when a test() is not necessary (see tree)
88 .macro never _bit ; nop; for when a test() is not necessary (see tree)
90 .macro test _bit,_jmpto
94 .macro i_test _bit,_jmpto ; inverted test (for reordered 0x8_)
102 .macro shift8 ; top three bits don't need to be corrrect, so save cycles by not carrying
105 .macro shift0 ; nop; last shift is common
111 .macro add8 ; ditto with carrying
129 ADD Xlo, t ; NOTE: can't overflow, since RAMEND == 0x5F
134 /* decision tree multiplication:
135 there is only a limited number of coefficients, so we heavily
136 optimize for those only, and only compute the bits we
137 actually need. this reduces cycle count from 38 for the
138 (optimized) classic approach to 31. instruction count
139 increases from 38 to 100. in the end it turned out that we
140 would've had enough cycles to spare to just use the standard
148 _?000 _?100 _?001 _?101
150 _0000 _1000 _0100 _1100 _1001 _0101 _1101
152 ... ... ... ... ... ... ...
155 27cy 28cy 26cy 28cy 26cy 31cy 30cy */
172 RJMP end_mul ; calc'd 0xb0
174 m_1000: add16 $ shift16
183 RJMP end_mul ; calc'd 0x58
185 m__100: add16 $ shift16
197 RJMP end_mul ; calc'd 0x8c / 0x84
199 m____1: add16 $ shift16
205 m_1001: add16 $ shift16
214 RJMP end_mul ; calc'd 0x69
216 m__101: add16 $ shift16
227 RJMP end_mul ; calc'd 0x75
229 m_1101: add16 $ shift16
241 LSR a1 ;final shift is a common operation for all
243 MOV t, a1 ;;TODO: use a1 in loop: directly
246 main: ; setup routine
247 ; NOTE: clr i0..i2 moved to .ord 0x0
249 CLR acc ; we output a dummy sample before the actual first one
250 LDI Xhi, hi8(FLASHM + notes) ; never changes
251 LDI one, 1 ; mostly for clearing TIM0_OVF bit
255 OUT SPL, x ; init stack ptr
257 OUT PUEB, zero ; disable pullups
259 OUT DDRB, x ; PORTB0:pwm, PORTB2:debug
261 OUT CCP, x ; change protected ioregs
262 OUT CLKPSR, one ; clock prescaler 1/2 (4Mhz)
263 LDI x, 0xa7 ; determined by trial-and-error (->PORTB2)
264 OUT OSCCAL, x ; set oscillator calibration
265 OUT WDTCSR, zero; turn off watchdog
267 ;set timer/counter0 to 8bit fastpwm, non-inverting, no prescaler
272 OUT TIMSK0, one ; enable tim0_ovf
277 SLEEP ; wait for interrupt
281 OUT OCR0AL, acc ; start by outputting a sample, because routine has variable runtime
283 SBI PORTB, 2 ; to measure runtime
430 SWAP acc ; acc<<4, to be passed to OCR0AL
438 CBI PORTB, 2 ; end runtime measurement
440 OUT TIFR0, one ; clear pending interrupt (routine takes two intr.cycles)
441 RETI ; reenables interrupts