X-Git-Url: https://git.gir.st/Chiptunes.git/blobdiff_plain/26799babfc57e999f575a4b07f20d93f778600a0..58515c6c1136cdae2fe2d5826dc449f4c9de9e6a:/foo.S

diff --git a/foo.S b/foo.S
index bde793c..d89dec5 100644
--- a/foo.S
+++ b/foo.S
@@ -125,14 +125,20 @@ g: ; g(i, t) -> t
 	#undef zero
 	#undef tmp
 
-	;TODO: check correctness!
 	LDI	Xlo, lo8(notes)
 	ADD	Xlo, t ; NOTE: can't overflow, since RAMEND == 0x5F
 	LD	t, X
 
 	CLR	a2
 
-	/* decision tree multiplication saves cycles and (hopefully) reduces code size
+	/* decision tree multiplication:
+	   there is only a limited number of coefficients, so we heavily
+	   optimize for those only, and only compute the bits we
+	   actually need. this reduces cycle count from 38 for the
+	   (optimized) classic approach to 31. instruction count
+	   increases from 38 to 100. in the end it turned out that we
+	   would've had enough cycles to spare to just use the standard
+	   algorithm.
 	                     _xxx?
 	                 /           \
 	           _xx?0                _xx1?
@@ -145,7 +151,8 @@ g: ; g(i, t) -> t
 	   |     |       |     |       |     |     |
 	  ...   ...     ...   ...     ...   ...   ...
 	   |     |       |     |       |     |     |
-	   B0    58     84    8C      69     75    9D   */
+	   B0    58     84    8C      69     75    9D
+	  27cy  28cy   26cy  28cy    26cy   31cy  30cy  */
 		test	0, m____1
 	m____0:	shift16
 		never	1