/
parseDomain.js
96 lines (77 loc) · 2.65 KB
/
parseDomain.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
"use strict";
const icannTrie = require("../lists/icann.complete");
const privateTrie = require("../lists/private.complete");
const normalize = require("./normalize.js");
const lookUp = require("./tries/lookUp");
// eslint-disable-next-line
const urlParts = /^(:?\/\/|https?:\/\/)?([^/]*@)?(.+?)(:\d{2,5})?([/?].*)?$/; // 1 = protocol, 2 = auth, 3 = domain, 4 = port, 5 = path
const dot = /\./g;
const emptyArr = [];
function matchTld(domain, options) {
// for potentially unrecognized tlds, try matching against custom tlds
if (options.customTlds) {
// try matching against a built regexp of custom tlds
const tld = domain.match(options.customTlds);
if (tld !== null) {
return tld[0];
}
}
const tries = (options.privateTlds ? [privateTrie] : emptyArr).concat(icannTrie);
for (const trie of tries) {
const tld = lookUp(trie, domain);
if (tld !== null) {
return "." + tld;
}
}
return null;
}
/* eslint-disable jsdoc/no-undefined-types */
/**
* Removes all unnecessary parts of the domain (e.g. protocol, auth, port, path, query)
* and parses the remaining domain. The returned object contains the properties 'subdomain', 'domain' and 'tld'.
*
* Since the top-level domain is handled differently by every country, this function only
* supports all tlds listed in src/build/tld.txt.
*
* If the given url is not valid or isn't supported by the tld.txt, this function returns null.
*
* @param {string} url
* @param {Object} [options]
* @param {Array<string>|RegExp} [options.customTlds]
* @param {boolean} [options.privateTlds]
* @returns {Object|null}
*/
function parseDomain(url, options) {
const normalizedUrl = normalize.url(url);
let tld = null;
let urlSplit;
let domain;
if (!normalizedUrl) {
return null;
}
const normalizedOptions = normalize.options(options);
urlSplit = normalizedUrl.match(urlParts);
// urlSplit is null if the url contains certain characters like '\n', '\r'.
if (urlSplit === null) {
return null;
}
domain = urlSplit[3]; // domain will now be something like sub.domain.example.com
tld = matchTld(domain, normalizedOptions);
if (tld === null) {
return null;
}
// remove tld and split by dot
urlSplit = domain.slice(0, -tld.length).split(dot);
if (tld.charAt(0) === ".") {
// removes the remaining dot, if present (added to handle localhost)
tld = tld.slice(1);
}
domain = urlSplit.pop();
const subdomain = urlSplit.join(".");
return {
tld,
domain,
subdomain,
};
}
module.exports = parseDomain;