Support multiple phishing configurations (#7079)

* Support multiple phishing configurations The phishing detector has been updated to support multiple phishing configurations. Both the configuration object and the result object have been updated to accommodate the need to identify the name of the config that the checked domain matched. Since the config and return value was already being changed, the nomenclature has been updated to replace `black/white` with `block/allow` as well, which is a change we have been meaning to make for some time. This change to both the configuration and result object applies only when the new configuration format is used. The old format preserves the old config and result value, making this a non-breaking change. The old configuration accepted three lists (`blacklist`, `whitelist`, and `fuzzylist`), and a `tolerance` value for the fuzzylist match. The new configuration is an array of objects rather than an object, to accommodate multiple configurations. Each configuration option accepts three lists (`blocklist`, `allowlist`, and `fuzzylist`), `tolerance` for the fuzzylist match, and two new properties: `name` and `version`. The `version` parameter was already used by the old configuration, but it was not required or used by the detector itself. It is now required with the new configuration, and it is returned with each match. The new `name` parameter describes which configuration matched the origin being checked (if any). This was critical for us because it allows us to direct the user to the appropriate place when they want to dispute a blocked site. The return value was updated to include the `name` and `version` parameters. The `type` was updated from `blacklist` to `blocklist` and from `whitelist` to `allowlist` as well. * v1.2.0 This release adds support for multiple phishing configurations, and includes changes to the configuration object and return value if an array of configuration values is passed to the phishing detector constructor. This is a non-breaking change because the old configuration format is still supported, and the return values remain the same if the old configuration format is used. Co-authored-by: Deven Blake <trinity@trinity.moe> Co-authored-by: H <409H@users.noreply.github.com>
MetaMask · Apr 19, 2022 · fb540b0 · fb540b0 · pongit · Jul 25, 2022
1 parent 59aa4c1
commit fb540b0
Show file tree

Hide file tree

Showing 3 changed files with 1,118 additions and 41 deletions.
diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "eth-phishing-detect",
-  "version": "1.1.16",
+  "version": "1.2.0",
   "description": "Utility for detecting phishing domains targeting Ethereum users",
   "main": "src/index.js",
   "scripts": {

diff --git a/src/detector.js b/src/detector.js
@@ -3,47 +3,112 @@ const DEFAULT_TOLERANCE = 3
 
 class PhishingDetector {
 
+  /**
+   * Legacy phishing detector configuration.
+   *
+   * @typedef {object} LegacyPhishingDetectorConfiguration
+   * @property {string[]} [whitelist] - Origins that should not be blocked.
+   * @property {string[]} [blacklist] - Origins to block.
+   * @property {string[]} [fuzzylist] - Origins of common phishing targets.
+   * @property {number} [tolerance] - Tolerance to use for the fuzzylist levenshtein match.
+   */
+
+  /**
+   * A configuration object for phishing detection.
+   *
+   * @typedef {object} PhishingDetectorConfiguration
+   * @property {string[]} [allowlist] - Origins that should not be blocked.
+   * @property {string[]} [blocklist] - Origins to block.
+   * @property {string[]} [fuzzylist] - Origins of common phishing targets.
+   * @property {string} name - The name of this configuration. Used to explain to users why a site is being blocked.
+   * @property {number} [tolerance] - Tolerance to use for the fuzzylist levenshtein match.
+   * @property {number} version - The current version of the configuration.
+   */
+
+  /**
+   * Construct a phishing detector, which can check whether origins are known
+   * to be malicious or similar to common phishing targets.
+   *
+   * A list of configurations is accepted. Each origin checked is processed
+   * using each configuration in sequence, so the order defines which
+   * configurations take precedence.
+   *
+   * @param {LegacyPhishingDetectorConfiguration | PhishingDetectorConfiguration[]} opts - Phishing detection options
+   */
   constructor (opts) {
-    this.whitelist = processDomainList(opts.whitelist || [])
-    this.blacklist = processDomainList(opts.blacklist || [])
-    this.fuzzylist = processDomainList(opts.fuzzylist || [])
-    this.tolerance = ('tolerance' in opts) ? opts.tolerance : DEFAULT_TOLERANCE
+    // recommended configuration
+    if (Array.isArray(opts)) {
+      this.configs = processConfigs(opts)
+      this.legacyConfig = false
+    // legacy configuration
+    } else {
+      this.configs = [{
+        allowlist: processDomainList(opts.whitelist || []),
+        blocklist: processDomainList(opts.blacklist || []),
+        fuzzylist: processDomainList(opts.fuzzylist || []),
+        tolerance: ('tolerance' in opts) ? opts.tolerance : DEFAULT_TOLERANCE
+      }]
+      this.legacyConfig = true
+    }
   }
 
-  check (domain) {
-    let fqdn = domain.substring(domain.length - 1) === "." 
+  check(domain) {
+    const result = this._check(domain)
+
+    if (this.legacyConfig) {
+      let legacyType = result.type;
+      if (legacyType === 'allowlist') {
+        legacyType = 'whitelist'
+      } else if (legacyType === 'blocklist') {
+        legacyType = 'blacklist'
+      }
+      return {
+        match: result.match,
+        result: result.result,
+        type: legacyType,
+      }
+    }
+    return result
+  }
+
+  _check (domain) {
+    let fqdn = domain.substring(domain.length - 1) === "."
       ? domain.slice(0, -1)
       : domain;
 
     const source = domainToParts(fqdn)
 
-    // if source matches whitelist domain (or subdomain thereof), PASS
-    const whitelistMatch = matchPartsAgainstList(source, this.whitelist)
-    if (whitelistMatch) return { type: 'whitelist', result: false }
-
-    // if source matches blacklist domain (or subdomain thereof), FAIL
-    const blacklistMatch = matchPartsAgainstList(source, this.blacklist)
-    if (blacklistMatch) return { type: 'blacklist', result: true }
-
-    if (this.tolerance > 0) {
-      // check if near-match of whitelist domain, FAIL
-      let fuzzyForm = domainPartsToFuzzyForm(source)
-      // strip www
-      fuzzyForm = fuzzyForm.replace('www.', '')
-      // check against fuzzylist
-      const levenshteinMatched = this.fuzzylist.find((targetParts) => {
-        const fuzzyTarget = domainPartsToFuzzyForm(targetParts)
-        const distance = levenshtein.get(fuzzyForm, fuzzyTarget)
-        return distance <= this.tolerance
-      })
-      if (levenshteinMatched) {
-        const match = domainPartsToDomain(levenshteinMatched)
-        return { type: 'fuzzy', result: true, match }
+    for (const { allowlist, name, version } of this.configs) {
+      // if source matches whitelist domain (or subdomain thereof), PASS
+      const whitelistMatch = matchPartsAgainstList(source, allowlist)
+      if (whitelistMatch) return { name, result: false, type: 'allowlist', version }
+    }
+
+    for (const { blocklist, fuzzylist, name, tolerance, version } of this.configs) {
+      // if source matches blacklist domain (or subdomain thereof), FAIL
+      const blacklistMatch = matchPartsAgainstList(source, blocklist)
+      if (blacklistMatch) return { name, result: true, type: 'blocklist', version }
+
+      if (tolerance > 0) {
+        // check if near-match of whitelist domain, FAIL
+        let fuzzyForm = domainPartsToFuzzyForm(source)
+        // strip www
+        fuzzyForm = fuzzyForm.replace('www.', '')
+        // check against fuzzylist
+        const levenshteinMatched = fuzzylist.find((targetParts) => {
+          const fuzzyTarget = domainPartsToFuzzyForm(targetParts)
+          const distance = levenshtein.get(fuzzyForm, fuzzyTarget)
+          return distance <= tolerance
+        })
+        if (levenshteinMatched) {
+          const match = domainPartsToDomain(levenshteinMatched)
+          return { name, match, result: true, type: 'fuzzy', version }
+        }
       }
     }
 
     // matched nothing, PASS
-    return { type: 'all', result: false }
+    return { result: false, type: 'all' }
   }
 
 }
@@ -52,12 +117,52 @@ module.exports = PhishingDetector
 
 // util
 
+function processConfigs(configs = []) {
+  return configs.map((config) => {
+    validateConfig(config)
+    return Object.assign({}, config, {
+      allowlist: processDomainList(config.allowlist || []),
+      blocklist: processDomainList(config.blocklist || []),
+      fuzzylist: processDomainList(config.fuzzylist || []),
+      tolerance: ('tolerance' in config) ? config.tolerance : DEFAULT_TOLERANCE
+    })
+  });
+}
+
+function validateConfig(config) {
+  if (config === null || typeof config !== 'object') {
+    throw new Error('Invalid config')
+  }
+
+  if (config.tolerance && !config.fuzzylist) {
+    throw new Error('Fuzzylist tolerance provided without fuzzylist')
+  }
+
+  if (
+    typeof config.name !== 'string' ||
+    config.name === ''
+  ) {
+    throw new Error("Invalid config parameter: 'name'")
+  }
+
+  if (
+    !['number', 'string'].includes(typeof config.version) ||
+    config.version === ''
+  ) {
+    throw new Error("Invalid config parameter: 'version'")
+  }
+}
+
 function processDomainList (list) {
   return list.map(domainToParts)
 }
 
 function domainToParts (domain) {
+  try {
   return domain.split('.').reverse()
+  } catch (e) {
+    throw new Error(JSON.stringify(domain))
+  }
 }
 
 function domainPartsToDomain(domainParts) {
@@ -80,4 +185,4 @@ function matchPartsAgainstList(source, list) {
     // source matches target or (is deeper subdomain)
     return target.every((part, index) => source[index] === part)
   })
-}
+}