Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

test, url: update WHATWG URL parser to align with latest spec #43190

Merged
merged 5 commits into from
Jun 30, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
81 changes: 58 additions & 23 deletions src/node_url.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include "node_i18n.h"
#include "util-inl.h"

#include <algorithm>
#include <cmath>
#include <cstdio>
#include <numeric>
Expand Down Expand Up @@ -58,7 +59,7 @@ class URLHost {
public:
~URLHost();

void ParseIPv4Host(const char* input, size_t length, bool* is_ipv4);
void ParseIPv4Host(const char* input, size_t length);
void ParseIPv6Host(const char* input, size_t length);
void ParseOpaqueHost(const char* input, size_t length);
void ParseHost(const char* input,
Expand Down Expand Up @@ -165,6 +166,9 @@ enum url_cb_args {
// https://infra.spec.whatwg.org/#ascii-tab-or-newline
CHAR_TEST(8, IsASCIITabOrNewline, (ch == '\t' || ch == '\n' || ch == '\r'))

// https://infra.spec.whatwg.org/#c0-control
CHAR_TEST(8, IsC0Control, (ch >= '\0' && ch <= '\x1f'))

// https://infra.spec.whatwg.org/#c0-control-or-space
CHAR_TEST(8, IsC0ControlOrSpace, (ch >= '\0' && ch <= ' '))

Expand All @@ -190,12 +194,18 @@ T ASCIILowercase(T ch) {
}

// https://url.spec.whatwg.org/#forbidden-host-code-point
CHAR_TEST(8, IsForbiddenHostCodePoint,
ch == '\0' || ch == '\t' || ch == '\n' || ch == '\r' ||
ch == ' ' || ch == '#' || ch == '%' || ch == '/' ||
ch == ':' || ch == '?' || ch == '@' || ch == '[' ||
ch == '<' || ch == '>' || ch == '\\' || ch == ']' ||
ch == '^' || ch == '|')
CHAR_TEST(8,
IsForbiddenHostCodePoint,
ch == '\0' || ch == '\t' || ch == '\n' || ch == '\r' || ch == ' ' ||
ch == '#' || ch == '/' || ch == ':' || ch == '?' || ch == '@' ||
ch == '[' || ch == '<' || ch == '>' || ch == '\\' || ch == ']' ||
ch == '^' || ch == '|')

// https://url.spec.whatwg.org/#forbidden-domain-code-point
CHAR_TEST(8,
IsForbiddenDomainCodePoint,
IsForbiddenHostCodePoint(ch) || IsC0Control(ch) || ch == '%' ||
ch == '\x7f')

// https://url.spec.whatwg.org/#windows-drive-letter
TWO_CHAR_STRING_TEST(8, IsWindowsDriveLetter,
Expand Down Expand Up @@ -359,18 +369,21 @@ void URLHost::ParseIPv6Host(const char* input, size_t length) {
type_ = HostType::H_IPV6;
}

int64_t ParseNumber(const char* start, const char* end) {
// https://url.spec.whatwg.org/#ipv4-number-parser
int64_t ParseIPv4Number(const char* start, const char* end) {
if (end - start == 0) return -1;

unsigned R = 10;
if (end - start >= 2 && start[0] == '0' && (start[1] | 0x20) == 'x') {
start += 2;
R = 16;
}
if (end - start == 0) {
return 0;
} else if (R == 10 && end - start > 1 && start[0] == '0') {
} else if (end - start >= 2 && start[0] == '0') {
start++;
R = 8;
}

if (end - start == 0) return 0;

const char* p = start;

while (p < end) {
Expand All @@ -394,9 +407,33 @@ int64_t ParseNumber(const char* start, const char* end) {
return strtoll(start, nullptr, R);
}

void URLHost::ParseIPv4Host(const char* input, size_t length, bool* is_ipv4) {
// https://url.spec.whatwg.org/#ends-in-a-number-checker
bool EndsInANumber(const std::string& input) {
std::vector<std::string> parts = SplitString(input, '.', false);

if (parts.empty()) return false;

if (parts.back().empty()) {
if (parts.size() == 1) return false;
parts.pop_back();
}

const std::string& last = parts.back();

// If last is non-empty and contains only ASCII digits, then return true
if (!last.empty() && std::all_of(last.begin(), last.end(), ::isdigit)) {
return true;
}

const char* last_str = last.c_str();
int64_t num = ParseIPv4Number(last_str, last_str + last.size());
if (num >= 0) return true;

return false;
}

void URLHost::ParseIPv4Host(const char* input, size_t length) {
CHECK_EQ(type_, HostType::H_FAILED);
*is_ipv4 = false;
const char* pointer = input;
const char* mark = input;
const char* end = pointer + length;
Expand All @@ -414,7 +451,7 @@ void URLHost::ParseIPv4Host(const char* input, size_t length, bool* is_ipv4) {
if (++parts > static_cast<int>(arraysize(numbers))) return;
if (pointer == mark)
return;
int64_t n = ParseNumber(mark, pointer);
int64_t n = ParseIPv4Number(mark, pointer);
if (n < 0)
return;

Expand All @@ -429,7 +466,6 @@ void URLHost::ParseIPv4Host(const char* input, size_t length, bool* is_ipv4) {
pointer++;
}
CHECK_GT(parts, 0);
*is_ipv4 = true;

// If any but the last item in numbers is greater than 255, return failure.
// If the last item in numbers is greater than or equal to
Expand Down Expand Up @@ -457,7 +493,7 @@ void URLHost::ParseOpaqueHost(const char* input, size_t length) {
output.reserve(length);
for (size_t i = 0; i < length; i++) {
const char ch = input[i];
if (ch != '%' && IsForbiddenHostCodePoint(ch)) {
if (IsForbiddenHostCodePoint(ch)) {
return;
} else {
AppendOrEscape(&output, ch, C0_CONTROL_ENCODE_SET);
Expand Down Expand Up @@ -496,16 +532,15 @@ void URLHost::ParseHost(const char* input,
// If any of the following characters are still present, we have to fail
for (size_t n = 0; n < decoded.size(); n++) {
const char ch = decoded[n];
if (IsForbiddenHostCodePoint(ch)) {
if (IsForbiddenDomainCodePoint(ch)) {
return;
}
}

// Check to see if it's an IPv4 IP address
bool is_ipv4;
ParseIPv4Host(decoded.c_str(), decoded.length(), &is_ipv4);
if (is_ipv4)
return;
// If domain ends in a number, then return the result of IPv4 parsing domain
if (EndsInANumber(decoded)) {
return ParseIPv4Host(decoded.c_str(), decoded.length());
}

// If the unicode flag is set, run the result through punycode ToUnicode
if (unicode && !ToUnicode(decoded, &decoded))
Expand Down
6 changes: 4 additions & 2 deletions src/util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -164,15 +164,17 @@ std::string GetHumanReadableProcessName() {
return SPrintF("%s[%d]", GetProcessTitle("Node.js"), uv_os_getpid());
}

std::vector<std::string> SplitString(const std::string& in, char delim) {
std::vector<std::string> SplitString(const std::string& in,
char delim,
bool skipEmpty) {
std::vector<std::string> out;
if (in.empty())
return out;
std::istringstream in_stream(in);
while (in_stream.good()) {
std::string item;
std::getline(in_stream, item, delim);
if (item.empty()) continue;
if (item.empty() && skipEmpty) continue;
out.emplace_back(std::move(item));
}
return out;
Expand Down
4 changes: 3 additions & 1 deletion src/util.h
Original file line number Diff line number Diff line change
Expand Up @@ -645,7 +645,9 @@ struct FunctionDeleter {
template <typename T, void (*function)(T*)>
using DeleteFnPtr = typename FunctionDeleter<T, function>::Pointer;

std::vector<std::string> SplitString(const std::string& in, char delim);
std::vector<std::string> SplitString(const std::string& in,
char delim,
bool skipEmpty = true);

inline v8::MaybeLocal<v8::Value> ToV8Value(v8::Local<v8::Context> context,
std::string_view str,
Expand Down
3 changes: 2 additions & 1 deletion test/common/wpt/worker.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ const resource = new ResourceLoader(workerData.wptPath);

global.self = global;
global.GLOBAL = {
isWindow() { return false; }
isWindow() { return false; },
isShadowRealm() { return false; }
};
global.require = require;

Expand Down
4 changes: 2 additions & 2 deletions test/fixtures/wpt/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@ Last update:
- html/webappapis/timers: https://github.com/web-platform-tests/wpt/tree/5873f2d8f1/html/webappapis/timers
- interfaces: https://github.com/web-platform-tests/wpt/tree/fc086c82d5/interfaces
- performance-timeline: https://github.com/web-platform-tests/wpt/tree/17ebc3aea0/performance-timeline
- resources: https://github.com/web-platform-tests/wpt/tree/fbee645164/resources
- resources: https://github.com/web-platform-tests/wpt/tree/c5b428f15a/resources
- streams: https://github.com/web-platform-tests/wpt/tree/8f60d94439/streams
- url: https://github.com/web-platform-tests/wpt/tree/77d54aa9e0/url
- url: https://github.com/web-platform-tests/wpt/tree/0e5b126cd0/url
- user-timing: https://github.com/web-platform-tests/wpt/tree/df24fb604e/user-timing
- wasm/jsapi: https://github.com/web-platform-tests/wpt/tree/1dd414c796/wasm/jsapi
- wasm/webapi: https://github.com/web-platform-tests/wpt/tree/fd1b23eeaa/wasm/webapi
Expand Down
34 changes: 34 additions & 0 deletions test/fixtures/wpt/resources/accesskey.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
/*
* Function that sends an accesskey using the proper key combination depending on the browser and OS.
*
* This needs that the test imports the following scripts:
* <script src="/resources/testdriver.js"></script>
* <script src="/resources/testdriver-actions.js"></script>
* <script src="/resources/testdriver-vendor.js"></script>
*/
function pressAccessKey(accessKey){
let controlKey = '\uE009'; // left Control key
let altKey = '\uE00A'; // left Alt key
let optionKey = altKey; // left Option key
let shiftKey = '\uE008'; // left Shift key
// There are differences in using accesskey across browsers and OS's.
// See: // https://developer.mozilla.org/en-US/docs/Web/HTML/Global_attributes/accesskey
let isMacOSX = navigator.userAgent.indexOf("Mac") != -1;
let osAccessKey = isMacOSX ? [controlKey, optionKey] : [shiftKey, altKey];
let actions = new test_driver.Actions();
// Press keys.
for (let key of osAccessKey) {
actions = actions.keyDown(key);
}
actions = actions
.keyDown(accessKey)
.addTick()
.keyUp(accessKey);
osAccessKey.reverse();
for (let key of osAccessKey) {
actions = actions.keyUp(key);
}
return actions.send();
}


16 changes: 16 additions & 0 deletions test/fixtures/wpt/resources/blank.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8" />
<title>Blank Page</title>
<script>
window.onload = function(event) {
// This is needed to ensure the onload event fires when this page is
// opened as a popup.
// See https://github.com/web-platform-tests/wpt/pull/18157
};
</script>
</head>
<body>
</body>
</html>