Skip to content

Commit

Permalink
deps: backport 4e18190 from V8 upstream
Browse files Browse the repository at this point in the history
Original commit message:

    Timezone name check fix

    1. Location names with more than one underscores (e.g. Ho_Chi_Minh)
       didn't work because of the way capturing works with repeated patterns
       in RE. It's now supported by changing the RE to capture the whole string
       and splitting on '_' in the next step.

    2. Adds support for location names with a hyphen

    3. Adds support for timezone ids with three parts (e.g.
       American/Argentina/Buenos_Aires)

    4. Adds special handling of 'au', 'es' and 'of' in zone ids. They need to be kept in lowercase. (see the full list at https://en.wikipedia.org/wiki/List_of_tz_database_time_zones )

    5. Adds regression tests for all the above and make the existing tests
       more robust against future ICU changes. ICU canonicalizes zone names to
       deprecated names, but it may change. (
       http://bugs.icu-project.org/trac/ticket/12044 )

    BUG=364374
    LOG=Y

    Review URL: https://codereview.chromium.org/1529363005

    Cr-Commit-Position: refs/heads/master@{#33097}

PR-URL: #15562
Reviewed-By: James M Snell <jasnell@gmail.com>
Reviewed-By: Ben Noordhuis <info@bnoordhuis.nl>
Reviewed-By: Steven R Loomis <srloomis@us.ibm.com>
  • Loading branch information
jungshik authored and MylesBorins committed Oct 25, 2017
1 parent 43d1ac3 commit 9c3e246
Show file tree
Hide file tree
Showing 6 changed files with 146 additions and 27 deletions.
2 changes: 1 addition & 1 deletion deps/v8/include/v8-version.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
#define V8_MAJOR_VERSION 4
#define V8_MINOR_VERSION 5
#define V8_BUILD_NUMBER 103
#define V8_PATCH_LEVEL 52
#define V8_PATCH_LEVEL 53

// Use 1 for candidates and 0 otherwise.
// (Boolean macro values are not supported by all preprocessors.)
Expand Down
71 changes: 59 additions & 12 deletions deps/v8/src/i18n.js
Original file line number Diff line number Diff line change
Expand Up @@ -170,12 +170,25 @@ var TIMEZONE_NAME_CHECK_RE = UNDEFINED;

function GetTimezoneNameCheckRE() {
if (IS_UNDEFINED(TIMEZONE_NAME_CHECK_RE)) {
TIMEZONE_NAME_CHECK_RE =
new GlobalRegExp('^([A-Za-z]+)/([A-Za-z]+)(?:_([A-Za-z]+))*$');
TIMEZONE_NAME_CHECK_RE = new GlobalRegExp(
'^([A-Za-z]+)/([A-Za-z_-]+)((?:\/[A-Za-z_-]+)+)*$')
}
return TIMEZONE_NAME_CHECK_RE;
}

/**
* Matches valid location parts of IANA time zone names.
*/
var TIMEZONE_NAME_LOCATION_PART_RE = UNDEFINED;

function GetTimezoneNameLocationPartRE() {
if (IS_UNDEFINED(TIMEZONE_NAME_LOCATION_PART_RE)) {
TIMEZONE_NAME_LOCATION_PART_RE =
new GlobalRegExp('^([A-Za-z]+)((?:[_-][A-Za-z]+)+)*$');
}
return TIMEZONE_NAME_LOCATION_PART_RE;
}

/**
* Adds bound method to the prototype of the given object.
*/
Expand Down Expand Up @@ -672,6 +685,34 @@ function toTitleCaseWord(word) {
%StringToLowerCase(%_CallFunction(word, 1, StringSubstr));
}

/**
* Returns titlecased location, bueNos_airES -> Buenos_Aires
* or ho_cHi_minH -> Ho_Chi_Minh. It is locale-agnostic and only
* deals with ASCII only characters.
* 'of', 'au' and 'es' are special-cased and lowercased.
*/
function toTitleCaseTimezoneLocation(location) {
var match = %_CallFunction(location, GetTimezoneNameLocationPartRE(), StringMatch);
if (IS_NULL(match)) throw MakeRangeError(kExpectedLocation, location);

var result = toTitleCaseWord(match[1]);
if (!IS_UNDEFINED(match[2]) && 2 < match.length) {
// The first character is a separator, '_' or '-'.
// None of IANA zone names has both '_' and '-'.
var separator = %_CallFunction(match[2], 0, 1, StringSubstring);
var parts = %_CallFunction(match[2], separator, StringSplit);
for (var i = 1; i < parts.length; i++) {
var part = parts[i]
var lowercasedPart = %StringToLowerCase(part);
result = result + separator +
((lowercasedPart !== 'es' &&
lowercasedPart !== 'of' && lowercasedPart !== 'au') ?
toTitleCaseWord(part) : lowercasedPart);
}
}
return result;
}

/**
* Canonicalizes the language tag, or throws in case the tag is invalid.
*/
Expand Down Expand Up @@ -1723,8 +1764,8 @@ addBoundMethod(Intl.DateTimeFormat, 'v8Parse', parseDate, 1);


/**
* Returns canonical Area/Location name, or throws an exception if the zone
* name is invalid IANA name.
* Returns canonical Area/Location(/Location) name, or throws an exception
* if the zone name is invalid IANA name.
*/
function canonicalizeTimeZoneID(tzID) {
// Skip undefined zones.
Expand All @@ -1739,16 +1780,22 @@ function canonicalizeTimeZoneID(tzID) {
return 'UTC';
}

// We expect only _ and / beside ASCII letters.
// All inputs should conform to Area/Location from now on.
// TODO(jshin): Add support for Etc/GMT[+-]([1-9]|1[0-2])

// We expect only _, '-' and / beside ASCII letters.
// All inputs should conform to Area/Location(/Location)* from now on.
var match = %_CallFunction(tzID, GetTimezoneNameCheckRE(), StringMatch);
if (IS_NULL(match)) throw MakeRangeError(kExpectedLocation, tzID);
if (IS_NULL(match)) throw MakeRangeError(kExpectedTimezoneID, tzID);

var result = toTitleCaseTimezoneLocation(match[1]) + '/' +
toTitleCaseTimezoneLocation(match[2]);

var result = toTitleCaseWord(match[1]) + '/' + toTitleCaseWord(match[2]);
var i = 3;
while (!IS_UNDEFINED(match[i]) && i < match.length) {
result = result + '_' + toTitleCaseWord(match[i]);
i++;
if (!IS_UNDEFINED(match[3]) && 3 < match.length) {
var locations = %_CallFunction(match[3], '/', StringSplit);
// The 1st element is empty. Starts with i=1.
for (var i = 1; i < locations.length; i++) {
result = result + '/' + toTitleCaseTimezoneLocation(locations[i]);
}
}

return result;
Expand Down
6 changes: 5 additions & 1 deletion deps/v8/src/messages.h
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,11 @@ class CallSite {
T(UnsupportedSuper, "Unsupported reference to 'super'") \
/* RangeError */ \
T(DateRange, "Provided date is not in valid range.") \
T(ExpectedLocation, "Expected Area/Location for time zone, got %") \
T(ExpectedTimezoneID, \
"Expected Area/Location(/Location)* for time zone, got %") \
T(ExpectedLocation, \
"Expected letters optionally connected with underscores or hyphens for " \
"a location, got %") \
T(InvalidArrayBufferLength, "Invalid array buffer length") \
T(InvalidArrayLength, "Invalid array length") \
T(InvalidCodePoint, "Invalid code point %") \
Expand Down
13 changes: 0 additions & 13 deletions deps/v8/test/mjsunit/regress/regress-487322.js

This file was deleted.

56 changes: 56 additions & 0 deletions deps/v8/test/mjsunit/regress/regress-crbug-364374.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
// Copyright 2015 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

if (this.Intl) {
// chromium:364374

// Locations with 2 underscores are accepted and normalized.
// 'of' and 'es' are always lowercased.
df = new Intl.DateTimeFormat('en-US', {'timeZone': 'eUrope/isLe_OF_man'})
assertEquals('Europe/Isle_of_Man', df.resolvedOptions().timeZone);

df = new Intl.DateTimeFormat('en-US', {'timeZone': 'africa/Dar_eS_salaam'})
assertEquals('Africa/Dar_es_Salaam', df.resolvedOptions().timeZone);

df = new Intl.DateTimeFormat('en-US', {'timeZone': 'America/port_of_spain'})
assertEquals('America/Port_of_Spain', df.resolvedOptions().timeZone);

// Zone ids with more than 2 parts are accepted and normalized.
df = new Intl.DateTimeFormat('en-US', {'timeZone': 'America/north_Dakota/new_salem'})
assertEquals('America/North_Dakota/New_Salem', df.resolvedOptions().timeZone);

// 3-part zone IDs are accepted and normalized.
// Two Buenose Aires aliases are identical.
df1 = new Intl.DateTimeFormat('en-US', {'timeZone': 'America/aRgentina/buenos_aIres'})
df2 = new Intl.DateTimeFormat('en-US', {'timeZone': 'America/Argentina/Buenos_Aires'})
assertEquals(df1.resolvedOptions().timeZone, df2.resolvedOptions().timeZone);

df2 = new Intl.DateTimeFormat('en-US', {'timeZone': 'America/Buenos_Aires'})
assertEquals(df1.resolvedOptions().timeZone, df2.resolvedOptions().timeZone);

df1 = new Intl.DateTimeFormat('en-US', {'timeZone': 'America/Indiana/Indianapolis'})
df2 = new Intl.DateTimeFormat('en-US', {'timeZone': 'America/Indianapolis'})
assertEquals(df1.resolvedOptions().timeZone, df2.resolvedOptions().timeZone);

// ICU does not recognize East-Indiana. Add later when it does.
// df2 = new Intl.DateTimeFormat('en-US', {'timeZone': 'America/East-Indiana'})
// assertEquals(df1.resolvedOptions().timeZone, df2.resolvedOptions().timeZone);


// Zone IDs with hyphens. 'au' has to be in lowercase.
df = new Intl.DateTimeFormat('en-US', {'timeZone': 'America/port-aU-pRince'})
assertEquals('America/Port-au-Prince', df.resolvedOptions().timeZone);

// Accepts Ho_Chi_Minh and treats it as identical to Saigon
df1 = new Intl.DateTimeFormat('en-US', {'timeZone': 'Asia/Ho_Chi_Minh'})
df2 = new Intl.DateTimeFormat('en-US', {'timeZone': 'Asia/Saigon'})
assertEquals(df1.resolvedOptions().timeZone, df2.resolvedOptions().timeZone);

// Throws for invalid timezone ids.
assertThrows(() => Intl.DateTimeFormat(undefined, {timeZone: 'Europe/_Paris'}));
assertThrows(() => Intl.DateTimeFormat(undefined, {timeZone: 'America/New__York'}));
assertThrows(() => Intl.DateTimeFormat(undefined, {timeZone: 'America//New_York'}));
assertThrows(() => Intl.DateTimeFormat(undefined, {timeZone: 'America/New_York_'}));
assertThrows(() => Intl.DateTimeFormat(undefined, {timeZone: 'America/New_Y0rk'}));
}
25 changes: 25 additions & 0 deletions deps/v8/test/mjsunit/regress/regress-crbug-487322.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
// Copyright 2015 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

if (this.Intl) {
// Normalizes Kat{h,}mandu (chromium:487322)
// According to the IANA timezone db, Kathmandu is the current canonical
// name, but ICU got it backward. To make this test robust against a future
// ICU change ( http://bugs.icu-project.org/trac/ticket/12044 ),
// just check that Kat(h)mandu is resolved identically.
df1 = new Intl.DateTimeFormat('en-US', {'timeZone': 'Asia/Katmandu'})
df2 = new Intl.DateTimeFormat('en-US', {'timeZone': 'Asia/Kathmandu'})
assertEquals(df1.resolvedOptions().timeZone, df2.resolvedOptions().timeZone);

// Normalizes Ulan_Bator to Ulaanbaatar. Unlike Kat(h)mandu, ICU got this
// right so that we make sure that Ulan_Bator is resolved to Ulaanbaatar.
df = new Intl.DateTimeFormat('en-US', {'timeZone': 'Asia/Ulaanbaatar'})
assertEquals('Asia/Ulaanbaatar', df.resolvedOptions().timeZone);

df = new Intl.DateTimeFormat('en-US', {'timeZone': 'Asia/Ulan_Bator'})
assertEquals('Asia/Ulaanbaatar', df.resolvedOptions().timeZone);

// Throws for unsupported time zones.
assertThrows(() => Intl.DateTimeFormat(undefined, {timeZone: 'Aurope/Paris'}));
}

0 comments on commit 9c3e246

Please sign in to comment.