]> git.gir.st - VimFx.git/blob - extension/packages/huffman.coffee
Merge remote-tracking branch 'upstream/develop' into zh-CN
[VimFx.git] / extension / packages / huffman.coffee
1 # `originalElements` should be an array of objects. Each object is expected to have a `weight`
2 # property which represents the _weight_ of the object, which is a positive number. Each object will
3 # be given a _code word_. The larger the weight, the shorter the code word. A weight of 0 means that
4 # the element should not get a code word at all. The code words will use the `alphabet` provided.
5 # The functions runs `callback(element, codeWord)` for each object in `originalElements` and returns
6 # `undefined`.
7 exports.addHuffmanCodeWordsTo = (originalElements, {alphabet}, callback) ->
8 unless typeof(alphabet) == 'string'
9 throw new TypeError('`alphabet` must be provided and be a string.')
10
11 nonUnique = /([\s\S])[\s\S]*\1/.exec(alphabet)
12 if nonUnique
13 throw new Error("`alphabet` must consist of unique letters. Found '#{nonUnique[1]}' more than once.")
14
15 if alphabet.length < 2
16 throw new Error('`alphabet` must consist of at least 2 characters.')
17
18 unless typeof(callback) == "function"
19 throw new TypeError "`callback` must be provided and be a function."
20
21
22 # Shallow working copy.
23 elements = originalElements[..]
24
25
26 # The algorithm is so optimized, that it does not produce a code word at all if there is only one
27 # element! We still need a code word even if there is only one link, though.
28 if elements.length == 1
29 callback(elements[0], alphabet[0])
30 return
31
32
33 numBranches = alphabet.length
34 numElements = elements.length
35
36 # The Huffman algorithm needs to create a `numBranches`-ary tree (one branch for each character in
37 # the `alphabet`). Such a tree can be formed by `1 + (numBranches - 1) * n` elements: There is the
38 # root of the tree (`1`), and each branching adds `numBranches` elements to the total number or
39 # elements, but replaces itself (`numBranches - 1`). `n` is the number of points where the tree
40 # branches. In order to create the tree using `numElements` elements, we need to find an `n` such
41 # that `1 + (numBranches - 1) * n >= numElements` (1), and then pad `numElements`, such that `1 +
42 # (numBranches - 1) * n == numElements + padding` (2).
43 #
44 # Solving for `n = numBranchPoints` in (1) gives:
45 numBranchPoints = Math.ceil((numElements - 1) / (numBranches - 1))
46 # Solving for `padding` in (2) gives:
47 padding = 1 + (numBranches - 1) * numBranchPoints - numElements
48
49 # Sort the elements after their weights, in descending order, so that the last ones will be the
50 # ones with lowest weight.
51 elements.sort((a, b) -> b.weight - a.weight)
52
53 # Pad with zero-weights to be able to form a `numBranches`-ary tree.
54 for i in [0...padding] by 1
55 elements.push({weight: 0})
56
57 # Construct the Huffman tree.
58 for i in [0...numBranchPoints] by 1
59 # Replace `numBranches` of the lightest weights with their sum.
60 sum = new BranchingPoint
61 for i in [0...numBranches] by 1
62 lowestWeight = elements.pop()
63 sum.weight += lowestWeight.weight
64 sum.children.unshift(lowestWeight)
65
66 # Find the index to insert the sum so that the sorting is maintained. That is faster than
67 # sorting the elements in each iteration.
68 break for element, index in elements by -1 when sum.weight <= element.weight
69 elements.splice(index + 1, 0, sum)
70
71 root = elements[0] # `elements.length == 1` by now.
72
73 # Create the code words by walking the tree. Store them using `callback`.
74 do walk = (node = root, codeWord = '') ->
75 if node instanceof BranchingPoint
76 for childNode, index in node.children
77 walk(childNode, codeWord + alphabet[index])
78 else
79 callback(node, codeWord) unless node.weight == 0
80 return
81
82
83 class BranchingPoint
84 constructor: ->
85 @weight = 0
86 @children = []
Imprint / Impressum