Skip to content

Commit

Permalink
fix: crash with icu when input locale is invalid (#21969)
Browse files Browse the repository at this point in the history
* fix: crash with icu when input locale is invalid

Backports unicode-org/icu#632

* spec: add regression tests
  • Loading branch information
deepak1556 committed Jan 31, 2020
1 parent d17dfab commit 538ebab
Show file tree
Hide file tree
Showing 6 changed files with 186 additions and 0 deletions.
2 changes: 2 additions & 0 deletions patches/config.json
Expand Up @@ -9,6 +9,8 @@

"src/electron/patches/perfetto": "src/third_party/perfetto",

"src/electron/patches/icu": "src/third_party/icu",

"src/electron/patches/v8": "src/v8",

"src/electron/patches/node": "src/third_party/electron_node"
Expand Down
2 changes: 2 additions & 0 deletions patches/icu/.patches
@@ -0,0 +1,2 @@
date_time_generator_default_locale.patch
posix_util_invalid_locale.patch
68 changes: 68 additions & 0 deletions patches/icu/date_time_generator_default_locale.patch
@@ -0,0 +1,68 @@
From 3da02cadcd589bc1b660179860e2125b4277ceca Mon Sep 17 00:00:00 2001
From: Jeff Genovy <29107334+jefgen@users.noreply.github.com>
Date: Wed, 24 Apr 2019 14:02:06 -0700
Subject: [PATCH] ICU-20558 Fix regression in DateTimePatternGenerator

This fixes a regression introduced by commit
b12a927c9365bb38831afbf76fdd0999f8f33deb for issue ICU-13778.

The above commit improved the error checking in the
DateTimePatternGenerator class, adding checks for errors/failures
where there previously was none at all. This was done in order to
catch catastrophic errors like out-of-memory (OOM), and properly
report them to the caller, rather than ignoring/hiding these errors.

However, in doing so it exposed a case where the code was depending
on ignoring errors in order to fall-back to the Gregorian calendar
when the default ICU locale is set to root.

This restores the previous behavior, by allowing the error of
U_MISSING_RESOURCE_ERROR to fall-though and continue without
reporting back an error to the caller.

Note: This regression was technically introduced in ICU 63, and
also effects ICU 64 as well.

diff --git a/source/i18n/dtptngen.cpp b/source/i18n/dtptngen.cpp
index fcc5977c56d78901e8c3e99c94550baeb386393d..8128d679129c8d82f200528a9b7119ca7ac3302e 100644
--- a/source/i18n/dtptngen.cpp
+++ b/source/i18n/dtptngen.cpp
@@ -787,6 +787,7 @@ void
DateTimePatternGenerator::getCalendarTypeToUse(const Locale& locale, CharString& destination, UErrorCode& err) {
destination.clear().append(DT_DateTimeGregorianTag, -1, err); // initial default
if ( U_SUCCESS(err) ) {
+ UErrorCode localStatus = U_ZERO_ERROR;
char localeWithCalendarKey[ULOC_LOCALE_IDENTIFIER_CAPACITY];
// obtain a locale that always has the calendar key value that should be used
ures_getFunctionalEquivalent(
@@ -798,8 +799,7 @@ DateTimePatternGenerator::getCalendarTypeToUse(const Locale& locale, CharString&
locale.getName(),
nullptr,
FALSE,
- &err);
- if (U_FAILURE(err)) { return; }
+ &localStatus);
localeWithCalendarKey[ULOC_LOCALE_IDENTIFIER_CAPACITY-1] = 0; // ensure null termination
// now get the calendar key value from that locale
char calendarType[ULOC_KEYWORDS_CAPACITY];
@@ -808,13 +808,17 @@ DateTimePatternGenerator::getCalendarTypeToUse(const Locale& locale, CharString&
"calendar",
calendarType,
ULOC_KEYWORDS_CAPACITY,
- &err);
- if (U_FAILURE(err)) { return; }
+ &localStatus);
+ // If the input locale was invalid, don't fail with missing resource error, instead
+ // continue with default of Gregorian.
+ if (U_FAILURE(localStatus) && localStatus != U_MISSING_RESOURCE_ERROR) {
+ err = localStatus;
+ return;
+ }
if (calendarTypeLen < ULOC_KEYWORDS_CAPACITY) {
destination.clear().append(calendarType, -1, err);
if (U_FAILURE(err)) { return; }
}
- err = U_ZERO_ERROR;
}
}

76 changes: 76 additions & 0 deletions patches/icu/posix_util_invalid_locale.patch
@@ -0,0 +1,76 @@
From 974193d9ea7ea14544538c85fdb04dd5fd6ac216 Mon Sep 17 00:00:00 2001
From: "Steven R. Loomis" <srloomis@us.ibm.com>
Date: Thu, 25 Apr 2019 10:40:28 -0700
Subject: [PATCH] ICU-20575 fix broken default locale mapping for C.UTF-8

Regression was in 1afef30549d93c17bb966c6803d5d943cf055925
PR #418 [ICU-20187]

- We dropped the mapping from "C" in uloc_canonicalize,
but then putil did not handle cases where a codepage was
set (such as C.UTF-8).

- Add an additional check in uprv_getDefaultLocaleID() for
locales that end up as "C" or "POSIX" after removing codepage
suffix.

- Also fix regression where aa@bb would become aa__BB__BB
(incorrectly doubled __BB)

diff --git a/source/common/putil.cpp b/source/common/putil.cpp
index 532a0903cdd..289a8aaa141 100644
--- a/source/common/putil.cpp
+++ b/source/common/putil.cpp
@@ -1560,6 +1560,10 @@ static const char *uprv_getPOSIXIDForCategory(int category)
{
/* Nothing worked. Give it a nice POSIX default value. */
posixID = "en_US_POSIX";
+ // Note: this test will not catch 'C.UTF-8',
+ // that will be handled in uprv_getDefaultLocaleID().
+ // Leave this mapping here for the uprv_getPOSIXIDForDefaultCodepage()
+ // caller which expects to see "en_US_POSIX" in many branches.
}
return posixID;
}
@@ -1631,8 +1635,8 @@ The leftmost codepage (.xxx) wins.
}

// Copy the ID into owned memory.
- // Over-allocate in case we replace "@" with "__".
- char *correctedPOSIXLocale = static_cast<char *>(uprv_malloc(uprv_strlen(posixID) + 1 + 1));
+ // Over-allocate in case we replace "C" with "en_US_POSIX" (+10), + null termination
+ char *correctedPOSIXLocale = static_cast<char *>(uprv_malloc(uprv_strlen(posixID) + 10 + 1));
if (correctedPOSIXLocale == nullptr) {
return nullptr;
}
@@ -1641,11 +1645,18 @@ The leftmost codepage (.xxx) wins.
char *limit;
if ((limit = uprv_strchr(correctedPOSIXLocale, '.')) != nullptr) {
*limit = 0;
- if ((limit = uprv_strchr(correctedPOSIXLocale, '@')) != nullptr) {
- *limit = 0;
- }
+ }
+ if ((limit = uprv_strchr(correctedPOSIXLocale, '@')) != nullptr) {
+ *limit = 0;
}

+ if ((uprv_strcmp("C", correctedPOSIXLocale) == 0) // no @ variant
+ || (uprv_strcmp("POSIX", correctedPOSIXLocale) == 0)) {
+ // Raw input was C.* or POSIX.*, Give it a nice POSIX default value.
+ // (The "C"/"POSIX" case is handled in uprv_getPOSIXIDForCategory())
+ uprv_strcpy(correctedPOSIXLocale, "en_US_POSIX");
+ }
+
/* Note that we scan the *uncorrected* ID. */
const char *p;
if ((p = uprv_strrchr(posixID, '@')) != nullptr) {
@@ -1668,7 +1679,7 @@ The leftmost codepage (.xxx) wins.
if ((q = uprv_strchr(p, '.')) != nullptr) {
/* How big will the resulting string be? */
int32_t len = (int32_t)(uprv_strlen(correctedPOSIXLocale) + (q-p));
- uprv_strncat(correctedPOSIXLocale, p, q-p);
+ uprv_strncat(correctedPOSIXLocale, p, q-p); // do not include charset
correctedPOSIXLocale[len] = 0;
}
else {
2 changes: 2 additions & 0 deletions spec/fixtures/module/icu.js
@@ -0,0 +1,2 @@
console.log(new Date().toLocaleString())
console.log(new Intl.NumberFormat().resolvedOptions().locale)
36 changes: 36 additions & 0 deletions spec/node-spec.js
Expand Up @@ -154,6 +154,42 @@ describe('node feature', () => {
done()
})
})

it('doesnt crash for locale that has missing resource from icu', (done) => {
child = ChildProcess.spawn(process.execPath, [path.join(__dirname, 'fixtures', 'module', 'icu.js')], {
env: {
ELECTRON_RUN_AS_NODE: true,
LC_ALL: 'ja'
}
})

let output = ''
child.stdout.on('data', data => {
output += data
})
child.stdout.on('close', () => {
expect(output).to.not.be.null()
done()
})
})

it('doesnt crash for locale of the format aa@BB', (done) => {
child = ChildProcess.spawn(process.execPath, [path.join(__dirname, 'fixtures', 'module', 'icu.js')], {
env: {
ELECTRON_RUN_AS_NODE: true,
LC_ALL: 'fr@EURO'
}
})

let output = ''
child.stdout.on('data', data => {
output += data
})
child.stdout.on('close', () => {
expect(output).to.not.be.null()
done()
})
})
})

describe('child_process.exec', () => {
Expand Down

0 comments on commit 538ebab

Please sign in to comment.