From 14b9c7bf84c196c855a16e3be23d976c40ba1a1b Mon Sep 17 00:00:00 2001 From: Rich Trott Date: Fri, 21 Oct 2022 21:48:54 -0700 Subject: [PATCH 1/2] url: remove \t \n \r in url.parse() similar to WHATWG WHATWG URL removes tab, new line, and carraige return characters before processing URL strings. To narrow the differences between WHATWG URL and url.parse(), and thus reduce opportunities for host spoofing etc. due to differences between the two APIs, let's do the same with url.parse(). --- lib/url.js | 3 +++ test/parallel/test-url-parse-format.js | 20 ++++++++++---------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/lib/url.js b/lib/url.js index fa7a3d21d20963..f157f2c5cd3cba 100644 --- a/lib/url.js +++ b/lib/url.js @@ -175,6 +175,9 @@ const forbiddenHostCharsIpv6 = /[\0\t\n\r #%/<>?@\\^|]/; Url.prototype.parse = function parse(url, parseQueryString, slashesDenoteHost) { validateString(url, 'url'); + // WHATWG URL removes tabs, newlines, and carriage returns. Let's do that too. + url = url.replace(/[\t\r\n]/g, ''); + // Copy chrome, IE, opera backslash-handling behavior. // Back slashes before the query string get converted to forward slashes // See: https://code.google.com/p/chromium/issues/detail?id=25916 diff --git a/test/parallel/test-url-parse-format.js b/test/parallel/test-url-parse-format.js index f046df74b1c0f7..bede4f08c6a34a 100644 --- a/test/parallel/test-url-parse-format.js +++ b/test/parallel/test-url-parse-format.js @@ -843,26 +843,26 @@ const parseTests = { port: null, hostname: 'a.b', hash: null, - pathname: '/%09bc%0Adr%0Def%20g%22hq%27j%3Ckl%3E', - path: '/%09bc%0Adr%0Def%20g%22hq%27j%3Ckl%3E?mn%5Cop%5Eq=r%6099%7Bst%7Cuv%7Dwz', + pathname: '/bcdref%20g%22hq%27j%3Ckl%3E', + path: '/bcdref%20g%22hq%27j%3Ckl%3E?mn%5Cop%5Eq=r%6099%7Bst%7Cuv%7Dwz', search: '?mn%5Cop%5Eq=r%6099%7Bst%7Cuv%7Dwz', query: 'mn%5Cop%5Eq=r%6099%7Bst%7Cuv%7Dwz', - href: 'http://a.b/%09bc%0Adr%0Def%20g%22hq%27j%3Ckl%3E?mn%5Cop%5Eq=r%6099%7Bst%7Cuv%7Dwz' + href: 'http://a.b/bcdref%20g%22hq%27j%3Ckl%3E?mn%5Cop%5Eq=r%6099%7Bst%7Cuv%7Dwz' }, 'http://a\r" \t\n<\'b:b@c\r\nd/e?f': { protocol: 'http:', slashes: true, - auth: 'a\r" \t\n<\'b:b', - host: 'c', + auth: 'a" <\'b:b', + host: 'cd', port: null, - hostname: 'c', + hostname: 'cd', hash: null, search: '?f', query: 'f', - pathname: '%0D%0Ad/e', - path: '%0D%0Ad/e?f', - href: 'http://a%0D%22%20%09%0A%3C\'b:b@c/%0D%0Ad/e?f' + pathname: '/e', + path: '/e?f', + href: 'http://a%22%20%3C\'b:b@cd/e?f' }, 'https://*': { @@ -1007,7 +1007,7 @@ for (const u in parseTests) { assert.deepStrictEqual( actual, expected, - `expected ${inspect(expected)}, got ${inspect(actual)}` + `parsing ${u} and expected ${inspect(expected)} but got ${inspect(actual)}` ); assert.deepStrictEqual( spaced, From 607d3481bea89981294f82e15a9e242846c6cb11 Mon Sep 17 00:00:00 2001 From: Rich Trott Date: Sat, 22 Oct 2022 08:43:27 -0700 Subject: [PATCH 2/2] fixup! url: remove \t \n \r in url.parse() similar to WHATWG --- lib/url.js | 7 ++++--- test/parallel/test-url-parse-format.js | 6 +++--- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/lib/url.js b/lib/url.js index f157f2c5cd3cba..95b7f8afe29e77 100644 --- a/lib/url.js +++ b/lib/url.js @@ -175,9 +175,6 @@ const forbiddenHostCharsIpv6 = /[\0\t\n\r #%/<>?@\\^|]/; Url.prototype.parse = function parse(url, parseQueryString, slashesDenoteHost) { validateString(url, 'url'); - // WHATWG URL removes tabs, newlines, and carriage returns. Let's do that too. - url = url.replace(/[\t\r\n]/g, ''); - // Copy chrome, IE, opera backslash-handling behavior. // Back slashes before the query string get converted to forward slashes // See: https://code.google.com/p/chromium/issues/detail?id=25916 @@ -322,6 +319,10 @@ Url.prototype.parse = function parse(url, parseQueryString, slashesDenoteHost) { case CHAR_TAB: case CHAR_LINE_FEED: case CHAR_CARRIAGE_RETURN: + // WHATWG URL removes tabs, newlines, and carriage returns. Let's do that too. + rest = rest.slice(0, i) + rest.slice(i + 1); + i -= 1; + break; case CHAR_SPACE: case CHAR_DOUBLE_QUOTE: case CHAR_PERCENT: diff --git a/test/parallel/test-url-parse-format.js b/test/parallel/test-url-parse-format.js index bede4f08c6a34a..03e7b7ece5ef5c 100644 --- a/test/parallel/test-url-parse-format.js +++ b/test/parallel/test-url-parse-format.js @@ -843,11 +843,11 @@ const parseTests = { port: null, hostname: 'a.b', hash: null, - pathname: '/bcdref%20g%22hq%27j%3Ckl%3E', - path: '/bcdref%20g%22hq%27j%3Ckl%3E?mn%5Cop%5Eq=r%6099%7Bst%7Cuv%7Dwz', + pathname: '/%09bc%0Adr%0Def%20g%22hq%27j%3Ckl%3E', + path: '/%09bc%0Adr%0Def%20g%22hq%27j%3Ckl%3E?mn%5Cop%5Eq=r%6099%7Bst%7Cuv%7Dwz', search: '?mn%5Cop%5Eq=r%6099%7Bst%7Cuv%7Dwz', query: 'mn%5Cop%5Eq=r%6099%7Bst%7Cuv%7Dwz', - href: 'http://a.b/bcdref%20g%22hq%27j%3Ckl%3E?mn%5Cop%5Eq=r%6099%7Bst%7Cuv%7Dwz' + href: 'http://a.b/%09bc%0Adr%0Def%20g%22hq%27j%3Ckl%3E?mn%5Cop%5Eq=r%6099%7Bst%7Cuv%7Dwz' }, 'http://a\r" \t\n<\'b:b@c\r\nd/e?f': {