]> git.gir.st - VimFx.git/blob - extension/packages/huffman.coffee
Improve coding style and clarify a few things
[VimFx.git] / extension / packages / huffman.coffee
1 # `originalElements` should be an array of arrays. The first item of each sub-array should be a
2 # _weight_, which is a positive number. The sub-arrays may then contain whatever data that should be
3 # associated with the weight and the _code word_ which will be appended to each sub-array. The
4 # larger the weight, the shorter the code word. The code words will use the `{alphabet}` provided.
5 # Note that the function modifies the `originalElements` array and returns `null`.
6 exports.addHuffmanCodeWordsTo = (originalElements, {alphabet}) ->
7 if typeof(alphabet) != 'string'
8 throw new TypeError('`alphabet` must be a string.')
9
10 nonUnique = /([\s\S])[\s\S]*\1/.exec(alphabet)
11 if nonUnique
12 throw new Error("`alphabet` must consist of unique letters. Found '#{nonUnique[1]}' more than once.")
13
14 if alphabet.length < 2
15 throw new Error('`alphabet` must consist of at least 2 characters.')
16
17 # The algorithm is so optimized, that it does not even produce a code word at all if there is only
18 # one element! We still need a code word even if there is only one link, though.
19 if originalElements.length == 1
20 originalElements[0].push(alphabet[0])
21 return null
22
23 elements = ({ index, weight } for [weight], index in originalElements)
24
25 numBranches = alphabet.length
26 numElements = elements.length
27 # A `numBranches`-ary tree can be formed by `1 + (numBranches - 1) * n` elements (there needs to
28 # be 1 element left, and each parent node replaces `numBranches` other nodes). `n` is the number
29 # of points where the tree branches. If numElements does not exist in mentioned set, we have to
30 # pad it to the nearest larger such number. Thus, we need to find an `n` such that `1 +
31 # (numBranches - 1) * n >= numElements`. Solving for `n = numBranchPoints` gives:
32 numBranchPoints = Math.ceil((numElements - 1) / (numBranches - 1))
33 # It is required that `(numElements + padding) - (numBranches - 1) * numBranchPoints == 1` (see
34 # above). Otherwise we cannot form a `numBranches`-ary tree. Solving for `padding` gives:
35 padding = 1 + (numBranches - 1) * numBranchPoints - numElements
36
37 # Pad with zero-weights to be able to form a `numBranches` tree.
38 for i in [0...padding] by 1
39 elements.push({weight: 0})
40
41 # Construct the Huffman tree.
42 for i in [0...numBranchPoints] by 1
43 # Sort the weights in descending order, so that the last ones will be the ones with lowest
44 # weight.
45 elements.sort((a, b) -> b.weight - a.weight)
46
47 # Replace `numBranches` weights with their sum.
48 sum = {weight: 0, children: []}
49 for i in [0...numBranches] by 1
50 lowestWeight = elements.pop()
51 sum.weight += lowestWeight.weight
52 sum.children.unshift(lowestWeight)
53 elements.push(sum)
54
55 root = elements[0] # `elements.length == 1` by now.
56
57 # Create the code words by walking the tree. Store them on `originalElements`.
58 do walk = (node = root, codeWord = '') ->
59 if node.children
60 for childNode, index in node.children
61 walk(childNode, codeWord + alphabet[index])
62 else
63 originalElements[node.index].push(codeWord) unless node.weight == 0
64 return null
Imprint / Impressum