1 # `originalElements` should be an array of arrays. The first item of each sub-array should be a
2 # _weight_, which is a positive number. The sub-arrays may then contain whatever data that should be
3 # associated with the weight and the _code word_ which will be appended to each sub-array. The
4 # larger the weight, the shorter the code word. The code words will use the `{alphabet}` provided.
5 # Note that the function modifies the `originalElements` array and returns `null`.
6 exports.addHuffmanCodeWordsTo = (originalElements, {alphabet}) ->
7 if typeof(alphabet) != 'string'
8 throw new TypeError('`alphabet` must be a string.')
10 nonUnique = /([\s\S])[\s\S]*\1/.exec(alphabet)
12 throw new Error("`alphabet` must consist of unique letters. Found '#{nonUnique[1]}' more than once.")
14 if alphabet.length < 2
15 throw new Error('`alphabet` must consist of at least 2 characters.')
17 # The algorithm is so optimized, that it does not even produce a code word at all if there is only
18 # one element! We still need a code word even if there is only one link, though.
19 if originalElements.length == 1
20 originalElements[0].push(alphabet[0])
23 elements = ({ index, weight } for [weight], index in originalElements)
25 numBranches = alphabet.length
26 numElements = elements.length
27 # A `numBranches`-ary tree can be formed by `1 + (numBranches - 1) * n` elements (there needs to
28 # be 1 element left, and each parent node replaces `numBranches` other nodes). `n` is the number
29 # of points where the tree branches. If numElements does not exist in mentioned set, we have to
30 # pad it to the nearest larger such number. Thus, we need to find an `n` such that `1 +
31 # (numBranches - 1) * n >= numElements`. Solving for `n = numBranchPoints` gives:
32 numBranchPoints = Math.ceil((numElements - 1) / (numBranches - 1))
33 # It is required that `(numElements + padding) - (numBranches - 1) * numBranchPoints == 1` (see
34 # above). Otherwise we cannot form a `numBranches`-ary tree. Solving for `padding` gives:
35 padding = 1 + (numBranches - 1) * numBranchPoints - numElements
37 # Pad with zero-weights to be able to form a `numBranches` tree.
38 for i in [0...padding] by 1
39 elements.push({weight: 0})
41 # Construct the Huffman tree.
42 for i in [0...numBranchPoints] by 1
43 # Sort the weights in descending order, so that the last ones will be the ones with lowest
45 elements.sort((a, b) -> b.weight - a.weight)
47 # Replace `numBranches` weights with their sum.
48 sum = {weight: 0, children: []}
49 for i in [0...numBranches] by 1
50 lowestWeight = elements.pop()
51 sum.weight += lowestWeight.weight
52 sum.children.unshift(lowestWeight)
55 root = elements[0] # `elements.length == 1` by now.
57 # Create the code words by walking the tree. Store them on `originalElements`.
58 do walk = (node = root, codeWord = '') ->
60 for childNode, index in node.children
61 walk(childNode, codeWord + alphabet[index])
63 originalElements[node.index].push(codeWord) unless node.weight == 0