]> git.gir.st - VimFx.git/blob - extension/packages/huffman.coffee
Merge pull request #153 from LordJZ/develop-minify
[VimFx.git] / extension / packages / huffman.coffee
1 # `originalElements` should be an array of objects. Each object is expected to have a property (see
2 # the `options` argument) which represents the _weight_ of the object, which is a positive number.
3 # Each object will be given a _code word_ (see the `options` argument). The larger the weight, the
4 # shorter the code word. A weight of 0 means that the element should not get a code word at all. The
5 # code words will use the `options.alphabet` provided. Note that the function modifies the
6 # `originalElements` array (see the `options` argument) and returns `undefined`.
7 exports.addHuffmanCodeWordsTo = (originalElements, options = {}) ->
8 weightProperty = options.weightProperty or 'weight'
9 codeWordProperty = options.codeWordProperty or 'codeWord'
10 setCodeWord = options.setCodeWord or (element, codeWord, index) ->
11 element[codeWordProperty] = codeWord
12 { alphabet } = options
13
14 if typeof(alphabet) != 'string'
15 throw new TypeError('`options.alphabet` must be provided and be a string.')
16
17 nonUnique = /([\s\S])[\s\S]*\1/.exec(alphabet)
18 if nonUnique
19 throw new Error("`options.alphabet` must consist of unique letters. Found '#{nonUnique[1]}' more than once.")
20
21 if alphabet.length < 2
22 throw new Error('`options.alphabet` must consist of at least 2 characters.')
23
24
25 # The algorithm is so optimized, that it does not produce a code word at all if there is only one
26 # element! We still need a code word even if there is only one link, though.
27 if originalElements.length == 1
28 setCodeWord(originalElements[0], alphabet[0], 0)
29 return
30
31
32 elements = ({weight: obj[weightProperty], index} for obj, index in originalElements)
33
34 numBranches = alphabet.length
35 numElements = elements.length
36
37 # The Huffman algorithm needs to create a `numBranches`-ary tree (one branch for each character in
38 # the `alphabet`). Such a tree can be formed by `1 + (numBranches - 1) * n` elements: There is the
39 # root of the tree (`1`), and each branching adds `numBranches` elements to the total number or
40 # elements, but replaces itself (`numBranches - 1`). `n` is the number of points where the tree
41 # branches. In order to create the tree using `numElements` elements, we need to find an `n` such
42 # that `1 + (numBranches - 1) * n >= numElements` (1), and then pad `numElements`, such that `1 +
43 # (numBranches - 1) * n == numElements + padding` (2).
44 #
45 # Solving for `n = numBranchPoints` in (1) gives:
46 numBranchPoints = Math.ceil((numElements - 1) / (numBranches - 1))
47 # Solving for `padding` in (2) gives:
48 padding = 1 + (numBranches - 1) * numBranchPoints - numElements
49
50 # Pad with zero-weights to be able to form a `numBranches`-ary tree.
51 for i in [0...padding] by 1
52 elements.push({weight: 0})
53
54 # Construct the Huffman tree.
55 for i in [0...numBranchPoints] by 1
56 # Sort the elements after their weights, in descending order, so that the last ones will be the
57 # ones with lowest weight.
58 elements.sort((a, b) -> b.weight - a.weight)
59
60 # Replace `numBranches` of the lightest weights with their sum.
61 sum = {weight: 0, children: []}
62 for i in [0...numBranches] by 1
63 lowestWeight = elements.pop()
64 sum.weight += lowestWeight.weight
65 sum.children.unshift(lowestWeight)
66 elements.push(sum)
67
68 root = elements[0] # `elements.length == 1` by now.
69
70 # Create the code words by walking the tree. Store them using `setCodeWord`.
71 do walk = (node = root, codeWord = '') ->
72 if node.children
73 for childNode, index in node.children
74 walk(childNode, codeWord + alphabet[index])
75 else
76 setCodeWord(originalElements[node.index], codeWord, node.index) unless node.weight == 0
77 return
Imprint / Impressum