1 # `originalElements` should be an array of arrays. The first item of each sub-array should be a
2 # _weight_, which is a positive number. The sub-arrays may then contain whatever data that should be
3 # associated with the weight and the _code word_ which will be appended to each sub-array. The
4 # larger the weight, the shorter the code word. The code words will use the `{alphabet}` provided.
5 # Note that the function modifies the `originalElements` array and returns `null`.
6 exports.addHuffmanCodeWordsTo = (originalElements, {alphabet}) ->
7 if typeof alphabet isnt 'string'
8 throw new TypeError '`alphabet` must be a string.'
10 nonUnique = /([\s\S])[\s\S]*\1/.exec alphabet
12 throw new Error "`alphabet` must consist of unique letters. Found '#{nonUnique[1]}' more than once."
14 if alphabet.length < 2
15 throw new Error '`alphabet` must consist of at least 2 characters.'
17 elements = ({index, weight} for [weight], index in originalElements)
19 numBranches = alphabet.length
20 numElements = elements.length
21 # A `numBranches`-ary tree can be formed by `1 + (numBranches - 1) * n` elements (there needs to
22 # be 1 element left, and each parent node replaces `numBranches` other nodes). `n` is the number
23 # of points where the tree branches. If numElements does not exist in mentioned set, we have to
24 # pad it to the nearest larger such number. Thus, we need to find an `n` such that `1 +
25 # (numBranches - 1) * n >= numElements`. Solving for `n = numBranchPoints` gives:
26 numBranchPoints = Math.ceil (numElements - 1) / (numBranches - 1)
27 # It is required that `(numElements + padding) - (numBranches - 1) * numBranchPoints == 1` (see
28 # above). Otherwise we cannot form a `numBranches`-ary tree. Solving for `padding` gives:
29 padding = 1 + (numBranches - 1) * numBranchPoints - numElements
31 # Pad with zero-weights to be able to form a `numBranches` tree.
32 for i in [0...padding] by 1
33 elements.push {weight: 0}
35 # Construct the Huffman tree.
36 for i in [0...numBranchPoints] by 1
37 # Sort the weights in descending order, so that the last ones will be the ones with lowest
39 elements.sort (a, b) -> b.weight - a.weight
41 # Replace `numBranches` weights with their sum.
42 sum = {weight: 0, children: []}
43 for i in [0...numBranches] by 1
44 lowestWeight = elements.pop()
45 sum.weight += lowestWeight.weight
46 sum.children.unshift lowestWeight
49 root = elements[0] # `elements.length is 1` by now.
51 # Create the code words by walking the tree.
52 do walk = (node=root, codeWord='') ->
54 for childNode, index in node.children
55 walk childNode, codeWord + alphabet[index]
57 originalElements[node.index].push codeWord unless node.weight is 0