Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: Use trie data structure to decrease file size (#33)
- Loading branch information
Showing
30 changed files
with
2,102 additions
and
985 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
"use strict"; | ||
|
||
const WILDCARD = "*"; | ||
const EXCEPTION = "!"; | ||
|
||
function lookUp(trie, hostname) { | ||
const domains = hostname.split(".").reverse(); | ||
const tlds = []; | ||
let currentTrie = trie; | ||
|
||
for (let i = 0; i < domains.length; i++) { | ||
const domain = domains[i]; | ||
const isWildcardRule = currentTrie.has(WILDCARD); | ||
|
||
if (isWildcardRule) { | ||
if (currentTrie.has(EXCEPTION + domain) === false) { | ||
tlds.push(domain); | ||
} | ||
break; | ||
} | ||
if (currentTrie.has(domain) === false) { | ||
break; | ||
} | ||
tlds.push(domain); | ||
|
||
const value = currentTrie.get(domain); | ||
|
||
if (value === true) { | ||
break; | ||
} | ||
currentTrie = value; | ||
} | ||
|
||
return tlds.length === 0 ? null : tlds.reverse().join("."); | ||
} | ||
|
||
module.exports = lookUp; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
"use strict"; | ||
|
||
const matchNewLine = /\r?\n/; | ||
const matchComments = /^\s*\/\//; | ||
const matchWhitespace = /^\s*$/; | ||
const lists = [ | ||
{ | ||
name: "icann", | ||
markers: { | ||
start: "// ===BEGIN ICANN DOMAINS===", | ||
end: "// ===END ICANN DOMAINS===", | ||
}, | ||
}, | ||
{ | ||
name: "private", | ||
markers: { | ||
start: "// ===BEGIN PRIVATE DOMAINS===", | ||
end: "// ===END PRIVATE DOMAINS===", | ||
}, | ||
}, | ||
]; | ||
|
||
function isWanted(line) { | ||
return matchComments.test(line) === false && matchWhitespace.test(line) === false; | ||
} | ||
|
||
function parsePubSuffixList(listContent) { | ||
return lists | ||
.map(list => { | ||
const start = listContent.indexOf(list.markers.start); | ||
const end = listContent.indexOf(list.markers.end); | ||
|
||
if (start === -1) { | ||
throw new Error(`Missing start marker of ${ list.name } list`); | ||
} | ||
if (end === -1) { | ||
throw new Error(`Missing end marker of ${ list.name } list`); | ||
} | ||
|
||
return listContent.slice(start, end); | ||
}) | ||
.map(listContent => listContent.split(matchNewLine).filter(isWanted)) | ||
.reduce((result, lines, i) => { | ||
const listName = lists[i].name; | ||
|
||
result[listName] = lines; | ||
|
||
return result; | ||
}, {}); | ||
} | ||
|
||
module.exports = parsePubSuffixList; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
"use strict"; | ||
|
||
const SEPARATORS = require("./separators"); | ||
|
||
function parseTrie(input) { | ||
let map = new Map(); | ||
const parentMaps = [map]; | ||
let domain = ""; | ||
|
||
function setDomain(value) { | ||
if (domain === "") { | ||
return; | ||
} | ||
map.set(domain, value); | ||
domain = ""; | ||
} | ||
|
||
for (let i = 0; i < input.length; i++) { | ||
const char = input.charAt(i); | ||
|
||
switch (char) { | ||
case SEPARATORS.SAME: { | ||
setDomain(true); | ||
continue; | ||
} | ||
case SEPARATORS.DOWN: { | ||
const childMap = new Map(); | ||
|
||
setDomain(childMap); | ||
parentMaps.push(map); | ||
map = childMap; | ||
continue; | ||
} | ||
case SEPARATORS.RESET: { | ||
setDomain(true); | ||
// Remove all parent maps but the top most | ||
parentMaps.length = 1; | ||
map = parentMaps[0]; | ||
continue; | ||
} | ||
case SEPARATORS.UP: { | ||
setDomain(true); | ||
map = parentMaps.pop(); | ||
continue; | ||
} | ||
} | ||
|
||
domain += char; | ||
} | ||
|
||
setDomain(true); | ||
|
||
return parentMaps[0]; | ||
} | ||
|
||
module.exports = parseTrie; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
"use strict"; | ||
|
||
module.exports = { | ||
UP: "<", // one level up | ||
SAME: ",", // same level | ||
DOWN: ">", // one level down | ||
RESET: "|", // reset level index and start new | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
"use strict"; | ||
|
||
const SEPARATORS = require("./separators"); | ||
|
||
const TYPE_COMPLETE = "complete"; | ||
const TYPE_LIGHT = "light"; | ||
const POSSIBLE_TYPES = [TYPE_COMPLETE, TYPE_LIGHT]; | ||
const LINE_FILTERS = { | ||
[TYPE_COMPLETE]: () => true, | ||
[TYPE_LIGHT]: line => line.length > 1, | ||
}; | ||
|
||
function compareLinesAt(lineA, lineB, i) { | ||
const endOfLineA = i === lineA.length; | ||
const endOfLineB = i === lineB.length; | ||
|
||
if (endOfLineA || endOfLineB) { | ||
return lineA.length - lineB.length; | ||
} | ||
|
||
return lineA[i].localeCompare(lineB[i]) || compareLinesAt(lineA, lineB, i + 1); | ||
} | ||
|
||
function findIndexOfDifference(lineA, lineB) { | ||
const maxLength = Math.max(lineA.length, lineB.length); | ||
let i; | ||
|
||
for (i = 0; i < maxLength; i++) { | ||
if (lineA[i] !== lineB[i]) { | ||
return i; | ||
} | ||
} | ||
|
||
return -1; | ||
} | ||
|
||
function lineToString(line, i, arr) { | ||
let indexOfDifference = 0; | ||
let separatorFromPrev = ""; | ||
|
||
if (i > 0) { | ||
const prevLine = arr[i - 1]; | ||
|
||
indexOfDifference = findIndexOfDifference(line, prevLine); | ||
if (indexOfDifference === -1) { | ||
// Identical lines | ||
return ""; | ||
} | ||
if (indexOfDifference === 0) { | ||
// line and prevLine are completely different | ||
separatorFromPrev = SEPARATORS.RESET; | ||
} else if (prevLine.length === line.length && indexOfDifference === line.length - 1) { | ||
// only the last part of line and prevLine are different | ||
separatorFromPrev = SEPARATORS.SAME; | ||
} else if (indexOfDifference > prevLine.length - 1) { | ||
// we don't need to go up the hierarchy first because prevLine is part of line | ||
// so let's just start with an initial down separator | ||
separatorFromPrev = SEPARATORS.DOWN; | ||
} else { | ||
// line and prevLine are different, but share a common root at indexOfDifference - 1 | ||
// we now need to go up the hierarchy to the common root | ||
separatorFromPrev = new Array(prevLine.length - indexOfDifference - 1).fill(SEPARATORS.UP) | ||
.join(""); | ||
} | ||
} | ||
|
||
return separatorFromPrev + line.slice(indexOfDifference).join(SEPARATORS.DOWN); | ||
} | ||
|
||
function serializeTrie(parsedList, type) { | ||
type = type || TYPE_COMPLETE; | ||
/** | ||
* parsedList looks like: | ||
* [ | ||
* "com", | ||
* "co.uk", | ||
* "gov.uk", | ||
* "静岡.jp", | ||
* "岐阜.jp", | ||
* "موقع" | ||
* ] | ||
* | ||
* The resulting tree looks like this: | ||
* com uk jp موقع | ||
* / \ / \ | ||
* co gov 静岡 岐阜 | ||
* | ||
* And the textual representation of the trie looks like (using SEPARATORS): | ||
* com|uk>co,gov|jp>静岡,岐阜|موقع | ||
* | ||
* With type "light", all domains with no subdomain are excluded from the serialized trie: | ||
* uk>co,gov|jp>静岡,岐阜 | ||
*/ | ||
|
||
if (POSSIBLE_TYPES.indexOf(type) === -1) { | ||
throw new Error( | ||
`Cannot serialize trie: Unknown trie type "${ type }". Expected type to be one of ${ POSSIBLE_TYPES.map( | ||
JSON.stringify | ||
).join(", ") }` | ||
); | ||
} | ||
|
||
return parsedList | ||
.map(line => line.split(".")) | ||
.filter(LINE_FILTERS[type]) | ||
.map(line => line.reverse()) | ||
.sort((lineA, lineB) => compareLinesAt(lineA, lineB, 0)) | ||
.map(lineToString) | ||
.join(""); | ||
} | ||
|
||
serializeTrie.TYPE_COMPLETE = TYPE_COMPLETE; | ||
serializeTrie.TYPE_LIGHT = TYPE_LIGHT; | ||
|
||
module.exports = serializeTrie; |
Oops, something went wrong.