]> git.gir.st - VimFx.git/blob - extension/packages/huffman.coffee
Lint cleanup
[VimFx.git] / extension / packages / huffman.coffee
1 # `originalElements` should be an array of objects. Each object is expected to have a property (see
2 # the `options` argument) which represents the _weight_ of the object, which is a positive number.
3 # Each object will be given a _code word_ (see the `options` argument). The larger the weight, the
4 # shorter the code word. A weight of 0 means that the element should not get a code word at all. The
5 # code words will use the `options.alphabet` provided. Note that the function modifies the
6 # `originalElements` array (see the `options` argument) and returns `undefined`.
7 exports.addHuffmanCodeWordsTo = (originalElements, options = {}) ->
8 weightProperty = options.weightProperty or 'weight'
9 codeWordProperty = options.codeWordProperty or 'codeWord'
10 setCodeWord = options.setCodeWord or (element, codeWord, index) -> element[codeWordProperty] = codeWord
11 { alphabet } = options
12
13 if typeof(alphabet) != 'string'
14 throw new TypeError('`options.alphabet` must be provided and be a string.')
15
16 nonUnique = /([\s\S])[\s\S]*\1/.exec(alphabet)
17 if nonUnique
18 throw new Error("`options.alphabet` must consist of unique letters. Found '#{nonUnique[1]}' more than once.")
19
20 if alphabet.length < 2
21 throw new Error('`options.alphabet` must consist of at least 2 characters.')
22
23
24 # The algorithm is so optimized, that it does not produce a code word at all if there is only one
25 # element! We still need a code word even if there is only one link, though.
26 if originalElements.length == 1
27 setCodeWord(originalElements[0], alphabet[0], 0)
28 return
29
30
31 elements = ({weight: obj[weightProperty], index} for obj, index in originalElements)
32
33 numBranches = alphabet.length
34 numElements = elements.length
35
36 # The Huffman algorithm needs to create a `numBranches`-ary tree (one branch for each character in
37 # the `alphabet`). Such a tree can be formed by `1 + (numBranches - 1) * n` elements: There is the
38 # root of the tree (`1`), and each branching adds `numBranches` elements to the total number or
39 # elements, but replaces itself (`numBranches - 1`). `n` is the number of points where the tree
40 # branches. In order to create the tree using `numElements` elements, we need to find an `n` such
41 # that `1 + (numBranches - 1) * n >= numElements` (1), and then pad `numElements`, such that `1 +
42 # (numBranches - 1) * n == numElements + padding` (2).
43 #
44 # Solving for `n = numBranchPoints` in (1) gives:
45 numBranchPoints = Math.ceil((numElements - 1) / (numBranches - 1))
46 # Solving for `padding` in (2) gives:
47 padding = 1 + (numBranches - 1) * numBranchPoints - numElements
48
49 # Pad with zero-weights to be able to form a `numBranches`-ary tree.
50 for i in [0...padding] by 1
51 elements.push({weight: 0})
52
53 # Construct the Huffman tree.
54 for i in [0...numBranchPoints] by 1
55 # Sort the elements after their weights, in descending order, so that the last ones will be the
56 # ones with lowest weight.
57 elements.sort((a, b) -> b.weight - a.weight)
58
59 # Replace `numBranches` of the lightest weights with their sum.
60 sum = {weight: 0, children: []}
61 for i in [0...numBranches] by 1
62 lowestWeight = elements.pop()
63 sum.weight += lowestWeight.weight
64 sum.children.unshift(lowestWeight)
65 elements.push(sum)
66
67 root = elements[0] # `elements.length == 1` by now.
68
69 # Create the code words by walking the tree. Store them using `setCodeWord`.
70 do walk = (node = root, codeWord = '') ->
71 if node.children
72 for childNode, index in node.children
73 walk(childNode, codeWord + alphabet[index])
74 else
75 setCodeWord(originalElements[node.index], codeWord, node.index) unless node.weight == 0
76 return
Imprint / Impressum