2 # Copyright Simon Lydell 2013, 2014.
4 # This file is part of VimFx.
6 # VimFx is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation, either version 3 of the License, or
9 # (at your option) any later version.
11 # VimFx is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with VimFx. If not, see <http://www.gnu.org/licenses/>.
20 # `originalElements` should be an array of objects. Each object is expected to
21 # have a `weight` property which represents the _weight_ of the object, which is
22 # a positive number. Each object will be given a _code word_. The larger the
23 # weight, the shorter the code word. A weight of 0 means that the element should
24 # not get a code word at all. The code words will use the `alphabet` provided.
25 # The function runs `callback(element, codeWord)` for each object in
26 # `originalElements` and returns `undefined`.
27 exports.addHuffmanCodeWordsTo = (originalElements, { alphabet }, callback) ->
28 unless typeof alphabet == 'string'
29 throw new TypeError('`alphabet` must be provided and be a string.')
31 nonUnique = /([\s\S])[\s\S]*\1/.exec(alphabet)
33 throw new Error("`alphabet` must consist of unique letters. Found
34 '#{ nonUnique[1] }' more than once.")
36 if alphabet.length < 2
37 throw new Error('`alphabet` must consist of at least 2 characters.')
39 unless typeof callback == "function"
40 throw new TypeError("`callback` must be provided and be a function.")
43 # Shallow working copy.
44 elements = originalElements[..]
47 # The algorithm is so optimized, that it does not produce a code word at all
48 # if there is only one element! We still need a code word even if there is
49 # only one link, though.
50 if elements.length == 1
51 callback(elements[0], alphabet[0])
55 numBranches = alphabet.length
56 numElements = elements.length
58 # The Huffman algorithm needs to create a `numBranches`-ary tree (one branch
59 # for each character in the `alphabet`). Such a tree can be formed by `1 +
60 # (numBranches - 1) * n` elements: There is the root of the tree (`1`), and
61 # each branching adds `numBranches` elements to the total number or elements,
62 # but replaces itself (`numBranches - 1`). `n` is the number of points where
63 # the tree branches. In order to create the tree using `numElements` elements,
64 # we need to find an `n` such that `1 + (numBranches - 1) * n >= numElements`
65 # (1), and then pad `numElements`, such that `1 + (numBranches - 1) * n ==
66 # numElements + padding` (2).
68 # Solving for `n = numBranchPoints` in (1) gives:
69 numBranchPoints = Math.ceil((numElements - 1) / (numBranches - 1))
70 # Solving for `padding` in (2) gives:
71 padding = 1 + (numBranches - 1) * numBranchPoints - numElements
73 # Sort the elements after their weights, in descending order, so that the last
74 # ones will be the ones with lowest weight.
75 elements.sort((a, b) -> b.weight - a.weight)
77 # Pad with zero-weights to be able to form a `numBranches`-ary tree.
78 for i in [0...padding] by 1
79 elements.push({weight: 0})
81 # Construct the Huffman tree.
82 for i in [0...numBranchPoints] by 1
83 # Replace `numBranches` of the lightest weights with their sum.
84 sum = new BranchingPoint
85 for i in [0...numBranches] by 1
86 lowestWeight = elements.pop()
87 sum.weight += lowestWeight.weight
88 sum.children.unshift(lowestWeight)
90 # Find the index to insert the sum so that the sorting is maintained. That
91 # is faster than sorting the elements in each iteration.
92 break for element, index in elements by -1 when sum.weight <= element.weight
93 elements.splice(index + 1, 0, sum)
95 [ root ] = elements # `elements.length == 1` by now.
97 # Create the code words by walking the tree. Store them using `callback`.
98 do walk = (node = root, codeWord = '') ->
99 if node instanceof BranchingPoint
100 for childNode, index in node.children
101 walk(childNode, codeWord + alphabet[index])
103 callback(node, codeWord) unless node.weight == 0