From e9f24221f8750808f9204afe9adc21fed906c2e9 Mon Sep 17 00:00:00 2001 From: Anton Khodakivskiy Date: Wed, 31 Jul 2013 01:34:43 +0300 Subject: [PATCH] Implemented Bloom filters to achieve shorter hints for those shortcuts that are used often --- extension/packages/bloomfilter.coffee | 152 ++++++++++++++++++++++++++ extension/packages/commands.coffee | 2 + extension/packages/hints.coffee | 5 +- extension/packages/marker.coffee | 44 ++++++++ 4 files changed, 200 insertions(+), 3 deletions(-) create mode 100644 extension/packages/bloomfilter.coffee diff --git a/extension/packages/bloomfilter.coffee b/extension/packages/bloomfilter.coffee new file mode 100644 index 0000000..92ac04f --- /dev/null +++ b/extension/packages/bloomfilter.coffee @@ -0,0 +1,152 @@ +{ getPref +, setPref } = require 'prefs' + +` +(function(exports) { + exports.BloomFilter = BloomFilter; + exports.fnv_1a = fnv_1a; + exports.fnv_1a_b = fnv_1a_b; + + var typedArrays = typeof ArrayBuffer !== "undefined"; + + // Creates a new bloom filter. If *m* is an array-like object, with a length + // property, then the bloom filter is loaded with data from the array, where + // each element is a 32-bit integer. Otherwise, *m* should specify the + // number of bits. *k* specifies the number of hashing functions. + function BloomFilter(m, k) { + var a; + if (typeof m !== "number") a = m, m = a.length * 32; + + this.m = m; + this.k = k; + var n = Math.ceil(m / 32), + i = -1; + + if (typedArrays) { + var kbytes = 1 << Math.ceil(Math.log(Math.ceil(Math.log(m) / Math.LN2 / 8)) / Math.LN2), + array = kbytes === 1 ? Uint8Array : kbytes === 2 ? Uint16Array : Uint32Array, + kbuffer = new ArrayBuffer(kbytes * k), + buckets = this.buckets = new Int32Array(n); + if (a) while (++i < n) buckets[i] = a[i]; + this._locations = new array(kbuffer); + } else { + var buckets = this.buckets = []; + if (a) while (++i < n) buckets[i] = a[i]; + else while (++i < n) buckets[i] = 0; + this._locations = []; + } + } + + // See http://willwhim.wordpress.com/2011/09/03/producing-n-hash-functions-by-hashing-only-once/ + BloomFilter.prototype.locations = function(v) { + var k = this.k, + m = this.m, + r = this._locations, + a = fnv_1a(v), + b = fnv_1a_b(a), + i = -1, + x = a % m; + while (++i < k) { + r[i] = x < 0 ? (x + m) : x; + x = (x + b) % m; + } + return r; + }; + + BloomFilter.prototype.add = function(v) { + var l = this.locations(v + ""), + i = -1, + k = this.k, + buckets = this.buckets; + while (++i < k) buckets[Math.floor(l[i] / 32)] |= 1 << (l[i] % 32); + }; + + BloomFilter.prototype.test = function(v) { + var l = this.locations(v + ""), + i = -1, + k = this.k, + b, + buckets = this.buckets; + while (++i < k) { + b = l[i]; + if ((buckets[Math.floor(b / 32)] & (1 << (b % 32))) === 0) { + return false; + } + } + return true; + }; + + // Estimated cardinality. + BloomFilter.prototype.size = function() { + var buckets = this.buckets, + bits = 0; + for (var i = 0, n = buckets.length; i < n; ++i) bits += popcnt(buckets[i]); + return -this.m * Math.log(1 - bits / this.m) / this.k; + }; + + // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel + function popcnt(v) { + v -= (v >> 1) & 0x55555555; + v = (v & 0x33333333) + ((v >> 2) & 0x33333333); + return ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24; + } + + // Fowler/Noll/Vo hashing. + function fnv_1a(v) { + var n = v.length, + a = 2166136261, + c, + d, + i = -1; + while (++i < n) { + c = v.charCodeAt(i); + if (d = c & 0xff000000) { + a ^= d >> 24; + a += (a << 1) + (a << 4) + (a << 7) + (a << 8) + (a << 24); + } + if (d = c & 0xff0000) { + a ^= d >> 16; + a += (a << 1) + (a << 4) + (a << 7) + (a << 8) + (a << 24); + } + if (d = c & 0xff00) { + a ^= d >> 8; + a += (a << 1) + (a << 4) + (a << 7) + (a << 8) + (a << 24); + } + a ^= c & 0xff; + a += (a << 1) + (a << 4) + (a << 7) + (a << 8) + (a << 24); + } + // From http://home.comcast.net/~bretm/hash/6.html + a += a << 13; + a ^= a >> 7; + a += a << 3; + a ^= a >> 17; + a += a << 5; + return a & 0xffffffff; + } + + // One additional iteration of FNV, given a hash. + function fnv_1a_b(a) { + a += (a << 1) + (a << 4) + (a << 7) + (a << 8) + (a << 24); + a += a << 13; + a ^= a >> 7; + a += a << 3; + a ^= a >> 17; + a += a << 5; + return a & 0xffffffff; + } +})(typeof exports !== "undefined" ? exports : this); +` + +class SerializableBloomFilter extends exports.BloomFilter + constructor: (@prefName, m, k) -> + if data = getPref(@prefName) + super(JSON.parse(data), k) + else + super(m, k) + + save: -> + data = JSON.stringify([].slice.call(@buckets)) + setPref(@prefName, data) + + +exports.SerializableBloomFilter = SerializableBloomFilter diff --git a/extension/packages/commands.coffee b/extension/packages/commands.coffee index 0bf9df5..5182042 100644 --- a/extension/packages/commands.coffee +++ b/extension/packages/commands.coffee @@ -379,6 +379,8 @@ hintCharHandler = (vim, keyStr, charCode) -> marker.matchHintChar(key) if marker.isMatched() + # Add element features to the bloom filter + marker.reward() vim.cb(marker) hints.removeHints(vim.window.document) vim.enterNormalMode() diff --git a/extension/packages/hints.coffee b/extension/packages/hints.coffee index c707739..3cfaafd 100644 --- a/extension/packages/hints.coffee +++ b/extension/packages/hints.coffee @@ -86,7 +86,7 @@ injectMarkers = (document) -> marker.setPosition(rect) fragment.appendChild(marker.markerElement) - marker.weight = rect.area + marker.weight = rect.area * marker.calcBloomRating() markers.push(marker) @@ -148,13 +148,12 @@ getElementRect = (element) -> clientRect = element.getBoundingClientRect() if isRectOk(clientRect, window) - areaRatio = if (element instanceof HTMLAnchorElement) then 100 else 1 return { top: clientRect.top + scrollTop - clientTop left: clientRect.left + scrollLeft - clientLeft width: clientRect.width height: clientRect.height - area: areaRatio * (clientRect.width * clientRect.height) + area: clientRect.width * clientRect.height } # If the rect has 0 dimensions, then check what's inside. diff --git a/extension/packages/marker.coffee b/extension/packages/marker.coffee index ea4ab56..191a53d 100644 --- a/extension/packages/marker.coffee +++ b/extension/packages/marker.coffee @@ -1,3 +1,8 @@ +{ SerializableBloomFilter } = require 'bloomfilter' + +HTMLDocument = Ci.nsIDOMHTMLDocument +HTMLAnchorElement = Ci.nsIDOMHTMLAnchorElement + # Marker class wraps the markable element and provides # methods to manipulate the markers class Marker @@ -66,5 +71,44 @@ class Marker isMatched: -> return @hintChars == @enteredHintChars + # Returns string features of the element that can be used in the bloom filter + # in order to add relevance to the hint marker + extractBloomFeatures: -> + features = {} + + # Class name of an element (walks up the node tree to find first element with at least one class) + suffix = '' + el = @element + while el.classList?.length == 0 and not el instanceof HTMLDocument + suffix = "#{ suffix } #{ el.tagName }" + el = el.parentNode + for className in el.classList + features["#{ el.tagName }.#{ className }#{ suffix }"] = 10 + + # Element id + if @element.id + features["#{ el.tagName }.#{ @element.id }"] = 5 + + if @element instanceof HTMLAnchorElement + features["a"] = 20 # Reward links no matter what + features["#{ el.tagName }.#{ @element.href }"] = 60 + features["#{ el.tagName }.#{ @element.title }"] = 40 + + return features + + # Returns rating of all present bloom features (plus 1) + calcBloomRating: -> + rating = 1 + for feature, weight of @extractBloomFeatures() + rating += if Marker.bloomFilter.test(feature) then weight else 0 + + return rating + + reward: -> + for feature, weight of @extractBloomFeatures() + Marker.bloomFilter.add(feature) + Marker.bloomFilter.save() + +Marker.bloomFilter = new SerializableBloomFilter('hint_bloom_data', 256 * 32, 16) exports.Marker = Marker -- 2.39.3