Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fix: crash with icu when input locale is invalid (#21969)
* fix: crash with icu when input locale is invalid Backports unicode-org/icu#632 * spec: add regression tests
- Loading branch information
1 parent
d17dfab
commit 538ebab
Showing
6 changed files
with
186 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
date_time_generator_default_locale.patch | ||
posix_util_invalid_locale.patch |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
From 3da02cadcd589bc1b660179860e2125b4277ceca Mon Sep 17 00:00:00 2001 | ||
From: Jeff Genovy <29107334+jefgen@users.noreply.github.com> | ||
Date: Wed, 24 Apr 2019 14:02:06 -0700 | ||
Subject: [PATCH] ICU-20558 Fix regression in DateTimePatternGenerator | ||
|
||
This fixes a regression introduced by commit | ||
b12a927c9365bb38831afbf76fdd0999f8f33deb for issue ICU-13778. | ||
|
||
The above commit improved the error checking in the | ||
DateTimePatternGenerator class, adding checks for errors/failures | ||
where there previously was none at all. This was done in order to | ||
catch catastrophic errors like out-of-memory (OOM), and properly | ||
report them to the caller, rather than ignoring/hiding these errors. | ||
|
||
However, in doing so it exposed a case where the code was depending | ||
on ignoring errors in order to fall-back to the Gregorian calendar | ||
when the default ICU locale is set to root. | ||
|
||
This restores the previous behavior, by allowing the error of | ||
U_MISSING_RESOURCE_ERROR to fall-though and continue without | ||
reporting back an error to the caller. | ||
|
||
Note: This regression was technically introduced in ICU 63, and | ||
also effects ICU 64 as well. | ||
|
||
diff --git a/source/i18n/dtptngen.cpp b/source/i18n/dtptngen.cpp | ||
index fcc5977c56d78901e8c3e99c94550baeb386393d..8128d679129c8d82f200528a9b7119ca7ac3302e 100644 | ||
--- a/source/i18n/dtptngen.cpp | ||
+++ b/source/i18n/dtptngen.cpp | ||
@@ -787,6 +787,7 @@ void | ||
DateTimePatternGenerator::getCalendarTypeToUse(const Locale& locale, CharString& destination, UErrorCode& err) { | ||
destination.clear().append(DT_DateTimeGregorianTag, -1, err); // initial default | ||
if ( U_SUCCESS(err) ) { | ||
+ UErrorCode localStatus = U_ZERO_ERROR; | ||
char localeWithCalendarKey[ULOC_LOCALE_IDENTIFIER_CAPACITY]; | ||
// obtain a locale that always has the calendar key value that should be used | ||
ures_getFunctionalEquivalent( | ||
@@ -798,8 +799,7 @@ DateTimePatternGenerator::getCalendarTypeToUse(const Locale& locale, CharString& | ||
locale.getName(), | ||
nullptr, | ||
FALSE, | ||
- &err); | ||
- if (U_FAILURE(err)) { return; } | ||
+ &localStatus); | ||
localeWithCalendarKey[ULOC_LOCALE_IDENTIFIER_CAPACITY-1] = 0; // ensure null termination | ||
// now get the calendar key value from that locale | ||
char calendarType[ULOC_KEYWORDS_CAPACITY]; | ||
@@ -808,13 +808,17 @@ DateTimePatternGenerator::getCalendarTypeToUse(const Locale& locale, CharString& | ||
"calendar", | ||
calendarType, | ||
ULOC_KEYWORDS_CAPACITY, | ||
- &err); | ||
- if (U_FAILURE(err)) { return; } | ||
+ &localStatus); | ||
+ // If the input locale was invalid, don't fail with missing resource error, instead | ||
+ // continue with default of Gregorian. | ||
+ if (U_FAILURE(localStatus) && localStatus != U_MISSING_RESOURCE_ERROR) { | ||
+ err = localStatus; | ||
+ return; | ||
+ } | ||
if (calendarTypeLen < ULOC_KEYWORDS_CAPACITY) { | ||
destination.clear().append(calendarType, -1, err); | ||
if (U_FAILURE(err)) { return; } | ||
} | ||
- err = U_ZERO_ERROR; | ||
} | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
From 974193d9ea7ea14544538c85fdb04dd5fd6ac216 Mon Sep 17 00:00:00 2001 | ||
From: "Steven R. Loomis" <srloomis@us.ibm.com> | ||
Date: Thu, 25 Apr 2019 10:40:28 -0700 | ||
Subject: [PATCH] ICU-20575 fix broken default locale mapping for C.UTF-8 | ||
|
||
Regression was in 1afef30549d93c17bb966c6803d5d943cf055925 | ||
PR #418 [ICU-20187] | ||
|
||
- We dropped the mapping from "C" in uloc_canonicalize, | ||
but then putil did not handle cases where a codepage was | ||
set (such as C.UTF-8). | ||
|
||
- Add an additional check in uprv_getDefaultLocaleID() for | ||
locales that end up as "C" or "POSIX" after removing codepage | ||
suffix. | ||
|
||
- Also fix regression where aa@bb would become aa__BB__BB | ||
(incorrectly doubled __BB) | ||
|
||
diff --git a/source/common/putil.cpp b/source/common/putil.cpp | ||
index 532a0903cdd..289a8aaa141 100644 | ||
--- a/source/common/putil.cpp | ||
+++ b/source/common/putil.cpp | ||
@@ -1560,6 +1560,10 @@ static const char *uprv_getPOSIXIDForCategory(int category) | ||
{ | ||
/* Nothing worked. Give it a nice POSIX default value. */ | ||
posixID = "en_US_POSIX"; | ||
+ // Note: this test will not catch 'C.UTF-8', | ||
+ // that will be handled in uprv_getDefaultLocaleID(). | ||
+ // Leave this mapping here for the uprv_getPOSIXIDForDefaultCodepage() | ||
+ // caller which expects to see "en_US_POSIX" in many branches. | ||
} | ||
return posixID; | ||
} | ||
@@ -1631,8 +1635,8 @@ The leftmost codepage (.xxx) wins. | ||
} | ||
|
||
// Copy the ID into owned memory. | ||
- // Over-allocate in case we replace "@" with "__". | ||
- char *correctedPOSIXLocale = static_cast<char *>(uprv_malloc(uprv_strlen(posixID) + 1 + 1)); | ||
+ // Over-allocate in case we replace "C" with "en_US_POSIX" (+10), + null termination | ||
+ char *correctedPOSIXLocale = static_cast<char *>(uprv_malloc(uprv_strlen(posixID) + 10 + 1)); | ||
if (correctedPOSIXLocale == nullptr) { | ||
return nullptr; | ||
} | ||
@@ -1641,11 +1645,18 @@ The leftmost codepage (.xxx) wins. | ||
char *limit; | ||
if ((limit = uprv_strchr(correctedPOSIXLocale, '.')) != nullptr) { | ||
*limit = 0; | ||
- if ((limit = uprv_strchr(correctedPOSIXLocale, '@')) != nullptr) { | ||
- *limit = 0; | ||
- } | ||
+ } | ||
+ if ((limit = uprv_strchr(correctedPOSIXLocale, '@')) != nullptr) { | ||
+ *limit = 0; | ||
} | ||
|
||
+ if ((uprv_strcmp("C", correctedPOSIXLocale) == 0) // no @ variant | ||
+ || (uprv_strcmp("POSIX", correctedPOSIXLocale) == 0)) { | ||
+ // Raw input was C.* or POSIX.*, Give it a nice POSIX default value. | ||
+ // (The "C"/"POSIX" case is handled in uprv_getPOSIXIDForCategory()) | ||
+ uprv_strcpy(correctedPOSIXLocale, "en_US_POSIX"); | ||
+ } | ||
+ | ||
/* Note that we scan the *uncorrected* ID. */ | ||
const char *p; | ||
if ((p = uprv_strrchr(posixID, '@')) != nullptr) { | ||
@@ -1668,7 +1679,7 @@ The leftmost codepage (.xxx) wins. | ||
if ((q = uprv_strchr(p, '.')) != nullptr) { | ||
/* How big will the resulting string be? */ | ||
int32_t len = (int32_t)(uprv_strlen(correctedPOSIXLocale) + (q-p)); | ||
- uprv_strncat(correctedPOSIXLocale, p, q-p); | ||
+ uprv_strncat(correctedPOSIXLocale, p, q-p); // do not include charset | ||
correctedPOSIXLocale[len] = 0; | ||
} | ||
else { |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
console.log(new Date().toLocaleString()) | ||
console.log(new Intl.NumberFormat().resolvedOptions().locale) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters