diff --git a/deps/icu-small/source/common/cmemory.h b/deps/icu-small/source/common/cmemory.h index b5e0c2b7a17296..c9156f253cf1c7 100644 --- a/deps/icu-small/source/common/cmemory.h +++ b/deps/icu-small/source/common/cmemory.h @@ -725,9 +725,14 @@ class MemoryPool : public UMemory { } MemoryPool& operator=(MemoryPool&& other) U_NOEXCEPT { - fCount = other.fCount; - fPool = std::move(other.fPool); - other.fCount = 0; + // Since `this` may contain instances that need to be deleted, we can't + // just throw them away and replace them with `other`. The normal way of + // dealing with this in C++ is to swap `this` and `other`, rather than + // simply overwrite: the destruction of `other` can then take care of + // running MemoryPool::~MemoryPool() over the still-to-be-deallocated + // instances. + std::swap(fCount, other.fCount); + std::swap(fPool, other.fPool); return *this; } @@ -796,9 +801,6 @@ class MemoryPool : public UMemory { template class MaybeStackVector : protected MemoryPool { public: - using MemoryPool::MemoryPool; - using MemoryPool::operator=; - template T* emplaceBack(Args&&... args) { return this->create(args...); diff --git a/deps/icu-small/source/common/locid.cpp b/deps/icu-small/source/common/locid.cpp index 2804e36bf62cb1..874e4a70556f31 100644 --- a/deps/icu-small/source/common/locid.cpp +++ b/deps/icu-small/source/common/locid.cpp @@ -35,6 +35,7 @@ #include "unicode/bytestream.h" #include "unicode/locid.h" +#include "unicode/localebuilder.h" #include "unicode/strenum.h" #include "unicode/stringpiece.h" #include "unicode/uloc.h" @@ -1028,7 +1029,7 @@ class AliasReplacer { // place the the replaced locale ID in out and return true. // Otherwise return false for no replacement or error. bool replace( - const Locale& locale, CharString& out, UErrorCode status); + const Locale& locale, CharString& out, UErrorCode& status); private: const char* language; @@ -1336,10 +1337,13 @@ AliasReplacer::replaceTerritory(UVector& toBeFreed, UErrorCode& status) // Cannot use nullptr for language because that will construct // the default locale, in that case, use "und" to get the correct // locale. - Locale l(language == nullptr ? "und" : language, nullptr, script); + Locale l = LocaleBuilder() + .setLanguage(language == nullptr ? "und" : language) + .setScript(script) + .build(status); l.addLikelySubtags(status); const char* likelyRegion = l.getCountry(); - CharString* item = nullptr; + LocalPointer item; if (likelyRegion != nullptr && uprv_strlen(likelyRegion) > 0) { size_t len = uprv_strlen(likelyRegion); const char* foundInReplacement = uprv_strstr(replacement, @@ -1351,20 +1355,22 @@ AliasReplacer::replaceTerritory(UVector& toBeFreed, UErrorCode& status) *(foundInReplacement-1) == ' '); U_ASSERT(foundInReplacement[len] == ' ' || foundInReplacement[len] == '\0'); - item = new CharString(foundInReplacement, (int32_t)len, status); + item.adoptInsteadAndCheckErrorCode( + new CharString(foundInReplacement, (int32_t)len, status), status); } } - if (item == nullptr) { - item = new CharString(replacement, - (int32_t)(firstSpace - replacement), status); + if (item.isNull() && U_SUCCESS(status)) { + item.adoptInsteadAndCheckErrorCode( + new CharString(replacement, + (int32_t)(firstSpace - replacement), status), status); } if (U_FAILURE(status)) { return false; } - if (item == nullptr) { + if (item.isNull()) { status = U_MEMORY_ALLOCATION_ERROR; return false; } replacedRegion = item->data(); - toBeFreed.addElement(item, status); + toBeFreed.addElement(item.orphan(), status); } U_ASSERT(!same(region, replacedRegion)); region = replacedRegion; @@ -1453,7 +1459,7 @@ AliasReplacer::outputToString( int32_t variantsStart = out.length(); for (int32_t i = 0; i < variants.size(); i++) { out.append(SEP_CHAR, status) - .append((const char*)((UVector*)variants.elementAt(i)), + .append((const char*)(variants.elementAt(i)), status); } T_CString_toUpperCase(out.data() + variantsStart); @@ -1470,7 +1476,7 @@ AliasReplacer::outputToString( } bool -AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode status) +AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode& status) { data = AliasData::singleton(status); if (U_FAILURE(status)) { @@ -2453,9 +2459,13 @@ Locale::setKeywordValue(const char* keywordName, const char* keywordValue, UErro if (U_FAILURE(status)) { return; } + if (status == U_STRING_NOT_TERMINATED_WARNING) { + status = U_ZERO_ERROR; + } int32_t bufferLength = uprv_max((int32_t)(uprv_strlen(fullName) + 1), ULOC_FULLNAME_CAPACITY); int32_t newLength = uloc_setKeywordValue(keywordName, keywordValue, fullName, bufferLength, &status) + 1; + U_ASSERT(status != U_STRING_NOT_TERMINATED_WARNING); /* Handle the case the current buffer is not enough to hold the new id */ if (status == U_BUFFER_OVERFLOW_ERROR) { U_ASSERT(newLength > bufferLength); @@ -2472,6 +2482,7 @@ Locale::setKeywordValue(const char* keywordName, const char* keywordValue, UErro fullName = newFullName; status = U_ZERO_ERROR; uloc_setKeywordValue(keywordName, keywordValue, fullName, newLength, &status); + U_ASSERT(status != U_STRING_NOT_TERMINATED_WARNING); } else { U_ASSERT(newLength <= bufferLength); } diff --git a/deps/icu-small/source/common/rbbitblb.cpp b/deps/icu-small/source/common/rbbitblb.cpp index 65b597c4a936b0..cbd8f315c252d8 100644 --- a/deps/icu-small/source/common/rbbitblb.cpp +++ b/deps/icu-small/source/common/rbbitblb.cpp @@ -1402,12 +1402,13 @@ void RBBITableBuilder::exportTable(void *where) { U_ASSERT (sd->fAccepting <= 255); U_ASSERT (sd->fLookAhead <= 255); U_ASSERT (0 <= sd->fTagsIdx && sd->fTagsIdx <= 255); - row->r8.fAccepting = sd->fAccepting; - row->r8.fLookAhead = sd->fLookAhead; - row->r8.fTagsIdx = sd->fTagsIdx; + RBBIStateTableRow8 *r8 = (RBBIStateTableRow8*)row; + r8->fAccepting = sd->fAccepting; + r8->fLookAhead = sd->fLookAhead; + r8->fTagsIdx = sd->fTagsIdx; for (col=0; colfDtran->elementAti(col) <= kMaxStateFor8BitsTable); - row->r8.fNextState[col] = sd->fDtran->elementAti(col); + r8->fNextState[col] = sd->fDtran->elementAti(col); } } else { U_ASSERT (sd->fAccepting <= 0xffff); @@ -1603,12 +1604,13 @@ void RBBITableBuilder::exportSafeTable(void *where) { UnicodeString *rowString = (UnicodeString *)fSafeTable->elementAt(state); RBBIStateTableRow *row = (RBBIStateTableRow *)(table->fTableData + state*table->fRowLen); if (use8BitsForSafeTable()) { - row->r8.fAccepting = 0; - row->r8.fLookAhead = 0; - row->r8.fTagsIdx = 0; + RBBIStateTableRow8 *r8 = (RBBIStateTableRow8*)row; + r8->fAccepting = 0; + r8->fLookAhead = 0; + r8->fTagsIdx = 0; for (col=0; colcharAt(col) <= kMaxStateFor8BitsTable); - row->r8.fNextState[col] = static_cast(rowString->charAt(col)); + r8->fNextState[col] = static_cast(rowString->charAt(col)); } } else { row->r16.fAccepting = 0; diff --git a/deps/icu-small/source/common/uloc.cpp b/deps/icu-small/source/common/uloc.cpp index 522f33dbe243a9..ebfbb506508c1b 100644 --- a/deps/icu-small/source/common/uloc.cpp +++ b/deps/icu-small/source/common/uloc.cpp @@ -877,6 +877,9 @@ uloc_setKeywordValue(const char* keywordName, if(U_FAILURE(*status)) { return -1; } + if (*status == U_STRING_NOT_TERMINATED_WARNING) { + *status = U_ZERO_ERROR; + } if (keywordName == NULL || keywordName[0] == 0 || bufferCapacity <= 1) { *status = U_ILLEGAL_ARGUMENT_ERROR; return 0; @@ -914,6 +917,7 @@ uloc_setKeywordValue(const char* keywordName, startSearchHere = (char*)locale_getKeywordsStart(buffer); if(startSearchHere == NULL || (startSearchHere[1]==0)) { if(keywordValueLen == 0) { /* no keywords = nothing to remove */ + U_ASSERT(*status != U_STRING_NOT_TERMINATED_WARNING); return bufLen; } @@ -933,6 +937,7 @@ uloc_setKeywordValue(const char* keywordName, startSearchHere += keywordNameLen; *startSearchHere++ = '='; uprv_strcpy(startSearchHere, keywordValueBuffer); + U_ASSERT(*status != U_STRING_NOT_TERMINATED_WARNING); return needLen; } /* end shortcut - no @ */ @@ -1047,13 +1052,27 @@ uloc_setKeywordValue(const char* keywordName, if (!handledInputKeyAndValue || U_FAILURE(*status)) { /* if input key/value specified removal of a keyword not present in locale, or * there was an error in CharString.append, leave original locale alone. */ + U_ASSERT(*status != U_STRING_NOT_TERMINATED_WARNING); return bufLen; } // needLen = length of the part before '@' needLen = (int32_t)(startSearchHere - buffer); - return needLen + updatedKeysAndValues.extract( + // Check to see can we fit the startSearchHere, if not, return + // U_BUFFER_OVERFLOW_ERROR without copy updatedKeysAndValues into it. + // We do this because this API function does not behave like most others: + // It promises never to set a U_STRING_NOT_TERMINATED_WARNING. + // When the contents fits but without the terminating NUL, in this case we need to not change + // the buffer contents and return with a buffer overflow error. + int32_t appendLength = updatedKeysAndValues.length(); + if (appendLength >= bufferCapacity - needLen) { + *status = U_BUFFER_OVERFLOW_ERROR; + return needLen + appendLength; + } + needLen += updatedKeysAndValues.extract( startSearchHere, bufferCapacity - needLen, *status); + U_ASSERT(*status != U_STRING_NOT_TERMINATED_WARNING); + return needLen; } /* ### ID parsing implementation **************************************************/ diff --git a/deps/icu-small/source/common/unicode/docmain.h b/deps/icu-small/source/common/unicode/docmain.h index f09d7e1dc29cec..14491494c5ca7a 100644 --- a/deps/icu-small/source/common/unicode/docmain.h +++ b/deps/icu-small/source/common/unicode/docmain.h @@ -143,6 +143,11 @@ * icu::MessageFormat * * + * List Formatting + * ulistformatter.h + * icu::ListFormatter + * + * * Number Formatting
(includes currency and unit formatting) * unumberformatter.h, unum.h * icu::number::NumberFormatter (ICU 60+) or icu::NumberFormat (older versions) diff --git a/deps/icu-small/source/common/unicode/urename.h b/deps/icu-small/source/common/unicode/urename.h index 20232cd209c2d6..fe59fdd893d940 100644 --- a/deps/icu-small/source/common/unicode/urename.h +++ b/deps/icu-small/source/common/unicode/urename.h @@ -1137,6 +1137,7 @@ #define ulocimp_toLanguageTag U_ICU_ENTRY_POINT_RENAME(ulocimp_toLanguageTag) #define ulocimp_toLegacyKey U_ICU_ENTRY_POINT_RENAME(ulocimp_toLegacyKey) #define ulocimp_toLegacyType U_ICU_ENTRY_POINT_RENAME(ulocimp_toLegacyType) +#define ultag_getTKeyStart U_ICU_ENTRY_POINT_RENAME(ultag_getTKeyStart) #define ultag_isExtensionSubtags U_ICU_ENTRY_POINT_RENAME(ultag_isExtensionSubtags) #define ultag_isLanguageSubtag U_ICU_ENTRY_POINT_RENAME(ultag_isLanguageSubtag) #define ultag_isPrivateuseValueSubtags U_ICU_ENTRY_POINT_RENAME(ultag_isPrivateuseValueSubtags) diff --git a/deps/icu-small/source/common/unicode/uvernum.h b/deps/icu-small/source/common/unicode/uvernum.h index a4cbb9e0fe8661..a46481a3fe610c 100644 --- a/deps/icu-small/source/common/unicode/uvernum.h +++ b/deps/icu-small/source/common/unicode/uvernum.h @@ -66,7 +66,7 @@ * This value will change in the subsequent releases of ICU * @stable ICU 2.6 */ -#define U_ICU_VERSION_MINOR_NUM 1 +#define U_ICU_VERSION_MINOR_NUM 2 /** The current ICU patchlevel version as an integer. * This value will change in the subsequent releases of ICU @@ -139,7 +139,7 @@ * This value will change in the subsequent releases of ICU * @stable ICU 2.4 */ -#define U_ICU_VERSION "68.1" +#define U_ICU_VERSION "68.2" /** * The current ICU library major version number as a string, for library name suffixes. @@ -158,7 +158,7 @@ /** Data version in ICU4C. * @internal ICU 4.4 Internal Use Only **/ -#define U_ICU_DATA_VERSION "68.1" +#define U_ICU_DATA_VERSION "68.2" #endif /* U_HIDE_INTERNAL_API */ /*=========================================================================== diff --git a/deps/icu-small/source/common/wintz.cpp b/deps/icu-small/source/common/wintz.cpp index 047f4290f10d0e..6805297a38a161 100644 --- a/deps/icu-small/source/common/wintz.cpp +++ b/deps/icu-small/source/common/wintz.cpp @@ -36,17 +36,58 @@ U_NAMESPACE_BEGIN +// Note these constants and the struct are only used when dealing with the fallback path for RDP sesssions. + +// This is the location of the time zones in the registry on Vista+ systems. +// See: https://docs.microsoft.com/windows/win32/api/timezoneapi/ns-timezoneapi-dynamic_time_zone_information +#define WINDOWS_TIMEZONES_REG_KEY_PATH L"SOFTWARE\\Microsoft\\Windows NT\\CurrentVersion\\Time Zones" + +// Max length for a registry key is 255. +1 for null. +// See: https://docs.microsoft.com/windows/win32/sysinfo/registry-element-size-limits +#define WINDOWS_MAX_REG_KEY_LENGTH 256 + +#if U_PLATFORM_HAS_WINUWP_API == 0 + +// This is the layout of the TZI binary value in the registry. +// See: https://docs.microsoft.com/windows/win32/api/timezoneapi/ns-timezoneapi-time_zone_information +typedef struct _REG_TZI_FORMAT { + LONG Bias; + LONG StandardBias; + LONG DaylightBias; + SYSTEMTIME StandardDate; + SYSTEMTIME DaylightDate; +} REG_TZI_FORMAT; + +#endif // U_PLATFORM_HAS_WINUWP_API + /** -* Main Windows time zone detection function. -* Returns the Windows time zone converted to an ICU time zone as a heap-allocated buffer, or nullptr upon failure. +* This is main Windows time zone detection function. +* +* It returns the Windows time zone converted to an ICU time zone as a heap-allocated buffer, or nullptr upon failure. +* +* We use the Win32 API GetDynamicTimeZoneInformation (which is available since Vista) to get the current time zone info, +* as this API returns a non-localized time zone name which can be then mapped to an ICU time zone. +* +* However, in some RDP/terminal services situations, this struct isn't always fully complete, and the TimeZoneKeyName +* field of the struct might be NULL. This can happen with some 3rd party RDP clients, and also when using older versions +* of the RDP protocol, which don't send the newer TimeZoneKeyNamei information and only send the StandardName and DaylightName. +* +* Since these 3rd party clients and older RDP clients only send the pre-Vista time zone information to the server, this means that we +* need to fallback on using the pre-Vista methods to determine the time zone. This unfortunately requires examining the registry directly +* in order to try and determine the current time zone. +* +* Note that this can however still fail in some cases though if the client and server are using different languages, as the StandardName +* that is sent by client is localized in the client's language. However, we must compare this to the names that are on the server, which +* are localized in registry using the server's language. Despite that, this is the best we can do. * -* Note: We use the Win32 API GetDynamicTimeZoneInformation (available since Vista+) to get the current time zone info. -* This API returns a non-localized time zone name, which is mapped to an ICU time zone ID (~ Olsen ID). +* Note: This fallback method won't work for the UWP version though, as we can't use the registry APIs in UWP. +* +* Once we have the current Windows time zone, then we can then map it to an ICU time zone ID (~ Olsen ID). */ U_CAPI const char* U_EXPORT2 uprv_detectWindowsTimeZone() { - // Obtain the DYNAMIC_TIME_ZONE_INFORMATION info to get the non-localized time zone name. + // We first try to obtain the time zone directly by using the TimeZoneKeyName field of the DYNAMIC_TIME_ZONE_INFORMATION struct. DYNAMIC_TIME_ZONE_INFORMATION dynamicTZI; uprv_memset(&dynamicTZI, 0, sizeof(dynamicTZI)); SYSTEMTIME systemTimeAllZero; @@ -86,22 +127,138 @@ uprv_detectWindowsTimeZone() // Note '-' before 'utcOffsetMin'. The timezone ID's sign convention // is that a timezone ahead of UTC is Etc/GMT- and a timezone // behind UTC is Etc/GMT+. - int ret = snprintf(gmtOffsetTz, UPRV_LENGTHOF(gmtOffsetTz), "Etc/GMT%+d", -utcOffsetMins / 60); + int ret = snprintf(gmtOffsetTz, UPRV_LENGTHOF(gmtOffsetTz), "Etc/GMT%+ld", -utcOffsetMins / 60); if (ret > 0 && ret < UPRV_LENGTHOF(gmtOffsetTz)) { return uprv_strdup(gmtOffsetTz); } } } - // If DST is NOT disabled, but we have an empty TimeZoneKeyName, then it is unclear - // what we should do as this should not happen. + // If DST is NOT disabled, but the TimeZoneKeyName field of the struct is NULL, then we may be dealing with a + // RDP/terminal services session where the 'Time Zone Redirection' feature is enabled. However, either the RDP + // client sent the server incomplete info (some 3rd party RDP clients only send the StandardName and DaylightName, + // but do not send the important TimeZoneKeyName), or if the RDP server has not appropriately populated the struct correctly. + // + // In this case we unfortunately have no choice but to fallback to using the pre-Vista method of determining the + // time zone, which requires examining the registry directly. + // + // Note that this can however still fail though if the client and server are using different languages, as the StandardName + // that is sent by client is *localized* in the client's language. However, we must compare this to the names that are + // on the server, which are *localized* in registry using the server's language. + // + // One other note is that this fallback method doesn't work for the UWP version, as we can't use the registry APIs. + + // windowsTimeZoneName will point at timezoneSubKeyName if we had to fallback to using the registry, and we found a match. + WCHAR timezoneSubKeyName[WINDOWS_MAX_REG_KEY_LENGTH]; + WCHAR *windowsTimeZoneName = dynamicTZI.TimeZoneKeyName; + if (dynamicTZI.TimeZoneKeyName[0] == 0) { + +// We can't use the registry APIs in the UWP version. +#if U_PLATFORM_HAS_WINUWP_API == 1 + (void)timezoneSubKeyName; // suppress unused variable warnings. return nullptr; +#else + // Open the path to the time zones in the Windows registry. + LONG ret; + HKEY hKeyAllTimeZones = nullptr; + ret = RegOpenKeyExW(HKEY_LOCAL_MACHINE, WINDOWS_TIMEZONES_REG_KEY_PATH, 0, KEY_READ, + reinterpret_cast(&hKeyAllTimeZones)); + + if (ret != ERROR_SUCCESS) { + // If we can't open the key, then we can't do much, so fail. + return nullptr; + } + + // Read the number of subkeys under the time zone registry path. + DWORD numTimeZoneSubKeys; + ret = RegQueryInfoKeyW(hKeyAllTimeZones, nullptr, nullptr, nullptr, &numTimeZoneSubKeys, + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr); + + if (ret != ERROR_SUCCESS) { + RegCloseKey(hKeyAllTimeZones); + return nullptr; + } + + // Examine each of the subkeys to try and find a match for the localized standard name ("Std"). + // + // Note: The name of the time zone subkey itself is not localized, but the "Std" name is localized. This means + // that we could fail to find a match if the RDP client and RDP server are using different languages, but unfortunately + // there isn't much we can do about it. + HKEY hKeyTimeZoneSubKey = nullptr; + ULONG registryValueType; + WCHAR registryStandardName[WINDOWS_MAX_REG_KEY_LENGTH]; + + for (DWORD i = 0; i < numTimeZoneSubKeys; i++) { + // Note: RegEnumKeyExW wants the size of the buffer in characters. + DWORD size = UPRV_LENGTHOF(timezoneSubKeyName); + ret = RegEnumKeyExW(hKeyAllTimeZones, i, timezoneSubKeyName, &size, nullptr, nullptr, nullptr, nullptr); + + if (ret != ERROR_SUCCESS) { + RegCloseKey(hKeyAllTimeZones); + return nullptr; + } + + ret = RegOpenKeyExW(hKeyAllTimeZones, timezoneSubKeyName, 0, KEY_READ, + reinterpret_cast(&hKeyTimeZoneSubKey)); + + if (ret != ERROR_SUCCESS) { + RegCloseKey(hKeyAllTimeZones); + return nullptr; + } + + // Note: RegQueryValueExW wants the size of the buffer in bytes. + size = sizeof(registryStandardName); + ret = RegQueryValueExW(hKeyTimeZoneSubKey, L"Std", nullptr, ®istryValueType, + reinterpret_cast(registryStandardName), &size); + + if (ret != ERROR_SUCCESS || registryValueType != REG_SZ) { + RegCloseKey(hKeyTimeZoneSubKey); + RegCloseKey(hKeyAllTimeZones); + return nullptr; + } + + // Note: wcscmp does an ordinal (byte) comparison. + if (wcscmp(reinterpret_cast(registryStandardName), dynamicTZI.StandardName) == 0) { + // Since we are comparing the *localized* time zone name, it's possible that some languages might use + // the same string for more than one time zone. Thus we need to examine the TZI data in the registry to + // compare the GMT offset (the bias), and the DST transition dates, to ensure it's the same time zone + // as the currently reported one. + REG_TZI_FORMAT registryTziValue; + uprv_memset(®istryTziValue, 0, sizeof(registryTziValue)); + + // Note: RegQueryValueExW wants the size of the buffer in bytes. + DWORD timezoneTziValueSize = sizeof(registryTziValue); + ret = RegQueryValueExW(hKeyTimeZoneSubKey, L"TZI", nullptr, ®istryValueType, + reinterpret_cast(®istryTziValue), &timezoneTziValueSize); + + if (ret == ERROR_SUCCESS) { + if ((dynamicTZI.Bias == registryTziValue.Bias) && + (memcmp((const void *)&dynamicTZI.StandardDate, (const void *)®istryTziValue.StandardDate, sizeof(SYSTEMTIME)) == 0) && + (memcmp((const void *)&dynamicTZI.DaylightDate, (const void *)®istryTziValue.DaylightDate, sizeof(SYSTEMTIME)) == 0)) + { + // We found a matching time zone. + windowsTimeZoneName = timezoneSubKeyName; + break; + } + } + } + RegCloseKey(hKeyTimeZoneSubKey); + hKeyTimeZoneSubKey = nullptr; + } + + if (hKeyTimeZoneSubKey != nullptr) { + RegCloseKey(hKeyTimeZoneSubKey); + } + if (hKeyAllTimeZones != nullptr) { + RegCloseKey(hKeyAllTimeZones); + } +#endif // U_PLATFORM_HAS_WINUWP_API } CharString winTZ; UErrorCode status = U_ZERO_ERROR; - winTZ.appendInvariantChars(UnicodeString(TRUE, dynamicTZI.TimeZoneKeyName, -1), status); + winTZ.appendInvariantChars(UnicodeString(TRUE, windowsTimeZoneName, -1), status); // Map Windows Timezone name (non-localized) to ICU timezone ID (~ Olson timezone id). StackUResourceBundle winTZBundle; @@ -123,18 +280,29 @@ uprv_detectWindowsTimeZone() int regionCodeLen = GetGeoInfoW(geoId, GEO_ISO2, regionCodeW, UPRV_LENGTHOF(regionCodeW), 0); const UChar *icuTZ16 = nullptr; - int32_t tzLen; + int32_t tzListLen = 0; if (regionCodeLen != 0) { for (int i = 0; i < UPRV_LENGTHOF(regionCodeW); i++) { regionCode[i] = static_cast(regionCodeW[i]); } - icuTZ16 = ures_getStringByKey(winTZBundle.getAlias(), regionCode, &tzLen, &status); + icuTZ16 = ures_getStringByKey(winTZBundle.getAlias(), regionCode, &tzListLen, &status); } if (regionCodeLen == 0 || U_FAILURE(status)) { // fallback to default "001" (world) status = U_ZERO_ERROR; - icuTZ16 = ures_getStringByKey(winTZBundle.getAlias(), "001", &tzLen, &status); + icuTZ16 = ures_getStringByKey(winTZBundle.getAlias(), "001", &tzListLen, &status); + } + + // Note: We want the first entry in the string returned by ures_getStringByKey. + // However this string can be a space delimited list of timezones: + // Ex: "America/New_York America/Detroit America/Indiana/Petersburg ..." + // We need to stop at the first space, so we pass tzLen (instead of tzListLen) to appendInvariantChars below. + int32_t tzLen = 0; + if (tzListLen > 0) { + while (!(icuTZ16[tzLen] == u'\0' || icuTZ16[tzLen] == u' ')) { + tzLen++; + } } // Note: cloneData returns nullptr if the status is a failure, so this diff --git a/deps/icu-small/source/data/in/icudt68l.dat.bz2 b/deps/icu-small/source/data/in/icudt68l.dat.bz2 index fcee4ecfa17efe..8fd32b7471d648 100644 Binary files a/deps/icu-small/source/data/in/icudt68l.dat.bz2 and b/deps/icu-small/source/data/in/icudt68l.dat.bz2 differ diff --git a/deps/icu-small/source/i18n/dtitvfmt.cpp b/deps/icu-small/source/i18n/dtitvfmt.cpp index a913dc43c808d6..d6ec501af88701 100644 --- a/deps/icu-small/source/i18n/dtitvfmt.cpp +++ b/deps/icu-small/source/i18n/dtitvfmt.cpp @@ -1422,7 +1422,11 @@ DateIntervalFormat::setIntervalPattern(UCalendarDateFields field, if ( field == UCAL_AM_PM ) { fInfo->getIntervalPattern(*bestSkeleton, UCAL_HOUR, pattern,status); if ( !pattern.isEmpty() ) { - setIntervalPattern(field, pattern); + UBool suppressDayPeriodField = fSkeleton.indexOf(CAP_J) != -1; + UnicodeString adjustIntervalPattern; + adjustFieldWidth(*skeleton, *bestSkeleton, pattern, differenceInfo, + suppressDayPeriodField, adjustIntervalPattern); + setIntervalPattern(field, adjustIntervalPattern); } return false; } @@ -1694,27 +1698,23 @@ DateIntervalFormat::adjustFieldWidth(const UnicodeString& inputSkeleton, DateIntervalInfo::parseSkeleton(inputSkeleton, inputSkeletonFieldWidth); DateIntervalInfo::parseSkeleton(bestMatchSkeleton, bestMatchSkeletonFieldWidth); if (suppressDayPeriodField) { - adjustedPtn.findAndReplace(UnicodeString(LOW_A), UnicodeString()); - adjustedPtn.findAndReplace(UnicodeString(" "), UnicodeString(" ")); + findReplaceInPattern(adjustedPtn, UnicodeString(LOW_A), UnicodeString()); + findReplaceInPattern(adjustedPtn, UnicodeString(" "), UnicodeString(" ")); adjustedPtn.trim(); } if ( differenceInfo == 2 ) { if (inputSkeleton.indexOf(LOW_Z) != -1) { - adjustedPtn.findAndReplace(UnicodeString(LOW_V), - UnicodeString(LOW_Z)); - } - if (inputSkeleton.indexOf(CAP_K) != -1) { - adjustedPtn.findAndReplace(UnicodeString(LOW_H), - UnicodeString(CAP_K)); - } - if (inputSkeleton.indexOf(LOW_K) != -1) { - adjustedPtn.findAndReplace(UnicodeString(CAP_H), - UnicodeString(LOW_K)); - } - if (inputSkeleton.indexOf(LOW_B) != -1) { - adjustedPtn.findAndReplace(UnicodeString(LOW_A), - UnicodeString(LOW_B)); - } + findReplaceInPattern(adjustedPtn, UnicodeString(LOW_V), UnicodeString(LOW_Z)); + } + if (inputSkeleton.indexOf(CAP_K) != -1) { + findReplaceInPattern(adjustedPtn, UnicodeString(LOW_H), UnicodeString(CAP_K)); + } + if (inputSkeleton.indexOf(LOW_K) != -1) { + findReplaceInPattern(adjustedPtn, UnicodeString(CAP_H), UnicodeString(LOW_K)); + } + if (inputSkeleton.indexOf(LOW_B) != -1) { + findReplaceInPattern(adjustedPtn, UnicodeString(LOW_A), UnicodeString(LOW_B)); + } } if (adjustedPtn.indexOf(LOW_A) != -1 && bestMatchSkeletonFieldWidth[LOW_A - PATTERN_CHAR_BASE] == 0) { bestMatchSkeletonFieldWidth[LOW_A - PATTERN_CHAR_BASE] = 1; @@ -1792,6 +1792,39 @@ DateIntervalFormat::adjustFieldWidth(const UnicodeString& inputSkeleton, } } +void +DateIntervalFormat::findReplaceInPattern(UnicodeString& targetString, + const UnicodeString& strToReplace, + const UnicodeString& strToReplaceWith) { + int32_t firstQuoteIndex = targetString.indexOf(u'\''); + if (firstQuoteIndex == -1) { + targetString.findAndReplace(strToReplace, strToReplaceWith); + } else { + UnicodeString result; + UnicodeString source = targetString; + + while (firstQuoteIndex >= 0) { + int32_t secondQuoteIndex = source.indexOf(u'\'', firstQuoteIndex + 1); + if (secondQuoteIndex == -1) { + secondQuoteIndex = source.length() - 1; + } + + UnicodeString unquotedText(source, 0, firstQuoteIndex); + UnicodeString quotedText(source, firstQuoteIndex, secondQuoteIndex - firstQuoteIndex + 1); + + unquotedText.findAndReplace(strToReplace, strToReplaceWith); + result += unquotedText; + result += quotedText; + + source.remove(0, secondQuoteIndex + 1); + firstQuoteIndex = source.indexOf(u'\''); + } + source.findAndReplace(strToReplace, strToReplaceWith); + result += source; + targetString = result; + } +} + void diff --git a/deps/icu-small/source/i18n/formatted_string_builder.cpp b/deps/icu-small/source/i18n/formatted_string_builder.cpp index 5aabc31cc4391b..b370f14f2ac4ff 100644 --- a/deps/icu-small/source/i18n/formatted_string_builder.cpp +++ b/deps/icu-small/source/i18n/formatted_string_builder.cpp @@ -276,6 +276,11 @@ int32_t FormattedStringBuilder::prepareForInsertHelper(int32_t index, int32_t co char16_t *oldChars = getCharPtr(); Field *oldFields = getFieldPtr(); if (fLength + count > oldCapacity) { + if ((fLength + count) > INT32_MAX / 2) { + // If we continue, then newCapacity will overlow int32_t in the next line. + status = U_INPUT_TOO_LONG_ERROR; + return -1; + } int32_t newCapacity = (fLength + count) * 2; int32_t newZero = newCapacity / 2 - (fLength + count) / 2; @@ -330,12 +335,14 @@ int32_t FormattedStringBuilder::prepareForInsertHelper(int32_t index, int32_t co fZero = newZero; fLength += count; } + U_ASSERT((fZero + index) >= 0); return fZero + index; } int32_t FormattedStringBuilder::remove(int32_t index, int32_t count) { // TODO: Reset the heap here? (If the string after removal can fit on stack?) int32_t position = index + fZero; + U_ASSERT(position >= 0); uprv_memmove2(getCharPtr() + position, getCharPtr() + position + count, sizeof(char16_t) * (fLength - index - count)); diff --git a/deps/icu-small/source/i18n/formattedval_impl.h b/deps/icu-small/source/i18n/formattedval_impl.h index 8005b0abb4d848..1e6eb1e639f809 100644 --- a/deps/icu-small/source/i18n/formattedval_impl.h +++ b/deps/icu-small/source/i18n/formattedval_impl.h @@ -117,6 +117,12 @@ class FormattedValueFieldPositionIteratorImpl : public UMemory, public Formatted }; +// Internal struct that must be exported for MSVC +struct U_I18N_API SpanInfo { + int32_t spanValue; + int32_t length; +}; + // Export an explicit template instantiation of the MaybeStackArray that // is used as a data member of CEBuffer. // @@ -126,7 +132,7 @@ class FormattedValueFieldPositionIteratorImpl : public UMemory, public Formatted // See digitlst.h, pluralaffix.h, datefmt.h, and others for similar examples. // #if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN -template class U_I18N_API MaybeStackArray; +template class U_I18N_API MaybeStackArray; #endif /** @@ -162,13 +168,19 @@ class U_I18N_API FormattedValueStringBuilderImpl : public UMemory, public Format return fString; } - void appendSpanIndex(int32_t index); - void prependSpanIndex(int32_t index); + /** + * Adds additional metadata used for span fields. + * + * spanValue: the index of the list item, for example. + * length: the length of the span, used to split adjacent fields. + */ + void appendSpanInfo(int32_t spanValue, int32_t length, UErrorCode& status); + void prependSpanInfo(int32_t spanValue, int32_t length, UErrorCode& status); private: FormattedStringBuilder fString; FormattedStringBuilder::Field fNumericField; - MaybeStackArray spanIndices; + MaybeStackArray spanIndices; bool nextPositionImpl(ConstrainedFieldPosition& cfpos, FormattedStringBuilder::Field numericField, UErrorCode& status) const; static bool isIntOrGroup(FormattedStringBuilder::Field field); diff --git a/deps/icu-small/source/i18n/formattedval_sbimpl.cpp b/deps/icu-small/source/i18n/formattedval_sbimpl.cpp index b2ae4c34c0a66d..84c2d00666c2be 100644 --- a/deps/icu-small/source/i18n/formattedval_sbimpl.cpp +++ b/deps/icu-small/source/i18n/formattedval_sbimpl.cpp @@ -46,19 +46,19 @@ Appendable& FormattedValueStringBuilderImpl::appendTo(Appendable& appendable, UE UBool FormattedValueStringBuilderImpl::nextPosition(ConstrainedFieldPosition& cfpos, UErrorCode& status) const { // NOTE: MSVC sometimes complains when implicitly converting between bool and UBool - return nextPositionImpl(cfpos, fNumericField, status) ? TRUE : FALSE; + return nextPositionImpl(cfpos, fNumericField, status) ? true : false; } UBool FormattedValueStringBuilderImpl::nextFieldPosition(FieldPosition& fp, UErrorCode& status) const { int32_t rawField = fp.getField(); if (rawField == FieldPosition::DONT_CARE) { - return FALSE; + return false; } if (rawField < 0 || rawField >= UNUM_FIELD_COUNT) { status = U_ILLEGAL_ARGUMENT_ERROR; - return FALSE; + return false; } ConstrainedFieldPosition cfpos; @@ -67,7 +67,7 @@ UBool FormattedValueStringBuilderImpl::nextFieldPosition(FieldPosition& fp, UErr if (nextPositionImpl(cfpos, kUndefinedField, status)) { fp.setBeginIndex(cfpos.getStart()); fp.setEndIndex(cfpos.getLimit()); - return TRUE; + return true; } // Special case: fraction should start after integer if fraction is not present @@ -85,7 +85,7 @@ UBool FormattedValueStringBuilderImpl::nextFieldPosition(FieldPosition& fp, UErr fp.setEndIndex(i - fString.fZero); } - return FALSE; + return false; } void FormattedValueStringBuilderImpl::getAllFieldPositions(FieldPositionIteratorHandler& fpih, @@ -103,23 +103,12 @@ static constexpr Field kEndField = Field(0xf, 0xf); bool FormattedValueStringBuilderImpl::nextPositionImpl(ConstrainedFieldPosition& cfpos, Field numericField, UErrorCode& /*status*/) const { int32_t fieldStart = -1; Field currField = kUndefinedField; - UFieldCategory spanCategory = UFIELD_CATEGORY_UNDEFINED; - int32_t spanValue; for (int32_t i = fString.fZero + cfpos.getLimit(); i <= fString.fZero + fString.fLength; i++) { Field _field = (i < fString.fZero + fString.fLength) ? fString.getFieldPtr()[i] : kEndField; // Case 1: currently scanning a field. if (currField != kUndefinedField) { if (currField != _field) { int32_t end = i - fString.fZero; - // Handle span fields; don't trim them - if (spanCategory != UFIELD_CATEGORY_UNDEFINED) { - cfpos.setState( - spanCategory, - spanValue, - fieldStart, - end); - return true; - } // Grouping separators can be whitespace; don't throw them out! if (isTrimmable(currField)) { end = trimBack(i - fString.fZero); @@ -182,13 +171,11 @@ bool FormattedValueStringBuilderImpl::nextPositionImpl(ConstrainedFieldPosition& if (elementField == Field(UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD) && cfpos.matchesField(elementField.getCategory(), elementField.getField()) && (cfpos.getLimit() < i - fString.fZero || cfpos.getCategory() != elementField.getCategory())) { - // Re-wind to the beginning of the field and then emit it - int32_t j = i - 1; - for (; j >= fString.fZero && fString.getFieldPtr()[j] == fString.getFieldPtr()[i-1]; j--) {} + int64_t si = cfpos.getInt64IterationContext() - 1; cfpos.setState( elementField.getCategory(), elementField.getField(), - j - fString.fZero + 1, + i - fString.fZero - spanIndices[si].length, i - fString.fZero); return true; } @@ -203,22 +190,28 @@ bool FormattedValueStringBuilderImpl::nextPositionImpl(ConstrainedFieldPosition& } // Case 3: check for field starting at this position // Case 3a: Need to add a SpanField - if (_field == Field(UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD) - // don't return the same field twice in a row: - && (i == fString.fZero - || fString.getFieldPtr()[i-1].getCategory() != UFIELD_CATEGORY_LIST - || fString.getFieldPtr()[i-1].getField() != ULISTFMT_ELEMENT_FIELD)) { + if (_field == Field(UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD)) { int64_t si = cfpos.getInt64IterationContext(); - spanValue = spanIndices[si]; + int32_t spanValue = spanIndices[si].spanValue; + int32_t length = spanIndices[si].length; cfpos.setInt64IterationContext(si + 1); if (cfpos.matchesField(UFIELD_CATEGORY_LIST_SPAN, spanValue)) { - spanCategory = UFIELD_CATEGORY_LIST_SPAN; + UFieldCategory spanCategory = UFIELD_CATEGORY_LIST_SPAN; fieldStart = i - fString.fZero; - currField = _field; + int32_t end = fieldStart + length; + cfpos.setState( + spanCategory, + spanValue, + fieldStart, + end); + return true; + } else { + // Failed to match; jump ahead + i += length - 1; continue; } } - // Case 3b: No SpanField or SpanField did not match + // Case 3b: No SpanField if (cfpos.matchesField(_field.getCategory(), _field.getField())) { fieldStart = i - fString.fZero; currField = _field; @@ -226,24 +219,40 @@ bool FormattedValueStringBuilderImpl::nextPositionImpl(ConstrainedFieldPosition& } U_ASSERT(currField == kUndefinedField); + // Always set the position to the end so that we don't revisit previous sections + cfpos.setState( + cfpos.getCategory(), + cfpos.getField(), + fString.fLength, + fString.fLength); return false; } -void FormattedValueStringBuilderImpl::appendSpanIndex(int32_t position) { - if (spanIndices.getCapacity() <= position) { - spanIndices.resize(position * 2); +void FormattedValueStringBuilderImpl::appendSpanInfo(int32_t spanValue, int32_t length, UErrorCode& status) { + if (U_FAILURE(status)) { return; } + U_ASSERT(spanIndices.getCapacity() >= spanValue); + if (spanIndices.getCapacity() == spanValue) { + if (!spanIndices.resize(spanValue * 2, spanValue)) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } } - spanIndices[position] = position; + spanIndices[spanValue] = {spanValue, length}; } -void FormattedValueStringBuilderImpl::prependSpanIndex(int32_t position) { - if (spanIndices.getCapacity() <= position) { - spanIndices.resize(position * 2); +void FormattedValueStringBuilderImpl::prependSpanInfo(int32_t spanValue, int32_t length, UErrorCode& status) { + if (U_FAILURE(status)) { return; } + U_ASSERT(spanIndices.getCapacity() >= spanValue); + if (spanIndices.getCapacity() == spanValue) { + if (!spanIndices.resize(spanValue * 2, spanValue)) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } } - for (int32_t i = 0; i < position; i++) { + for (int32_t i = spanValue - 1; i >= 0; i--) { spanIndices[i+1] = spanIndices[i]; } - spanIndices[0] = position; + spanIndices[0] = {spanValue, length}; } bool FormattedValueStringBuilderImpl::isIntOrGroup(Field field) { diff --git a/deps/icu-small/source/i18n/listformatter.cpp b/deps/icu-small/source/i18n/listformatter.cpp index ab04ac9080a5ab..be0d16bc7f52b3 100644 --- a/deps/icu-small/source/i18n/listformatter.cpp +++ b/deps/icu-small/source/i18n/listformatter.cpp @@ -16,6 +16,10 @@ * created by: Umesh P. Nair */ +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + #include "cmemory.h" #include "unicode/fpositer.h" // FieldPositionIterator #include "unicode/listformatter.h" @@ -171,21 +175,21 @@ PatternHandler* createPatternHandler( UErrorCode& status) { if (uprv_strcmp(lang, "es") == 0) { // Spanish - UnicodeString spanishYStr(TRUE, spanishY, -1); + UnicodeString spanishYStr(true, spanishY, -1); bool twoIsY = two == spanishYStr; bool endIsY = end == spanishYStr; if (twoIsY || endIsY) { - UnicodeString replacement(TRUE, spanishE, -1); + UnicodeString replacement(true, spanishE, -1); return new ContextualHandler( shouldChangeToE, twoIsY ? replacement : two, two, endIsY ? replacement : end, end, status); } - UnicodeString spanishOStr(TRUE, spanishO, -1); + UnicodeString spanishOStr(true, spanishO, -1); bool twoIsO = two == spanishOStr; bool endIsO = end == spanishOStr; if (twoIsO || endIsO) { - UnicodeString replacement(TRUE, spanishU, -1); + UnicodeString replacement(true, spanishU, -1); return new ContextualHandler( shouldChangeToU, twoIsO ? replacement : two, two, @@ -193,11 +197,11 @@ PatternHandler* createPatternHandler( } } else if (uprv_strcmp(lang, "he") == 0 || uprv_strcmp(lang, "iw") == 0) { // Hebrew - UnicodeString hebrewVavStr(TRUE, hebrewVav, -1); + UnicodeString hebrewVavStr(true, hebrewVav, -1); bool twoIsVav = two == hebrewVavStr; bool endIsVav = end == hebrewVavStr; if (twoIsVav || endIsVav) { - UnicodeString replacement(TRUE, hebrewVavDash, -1); + UnicodeString replacement(true, hebrewVavDash, -1); return new ContextualHandler( shouldChangeToVavDash, twoIsVav ? replacement : two, two, @@ -238,7 +242,6 @@ ListFormatInternal(const ListFormatInternal &other) : }; -#if !UCONFIG_NO_FORMATTING class FormattedListData : public FormattedValueStringBuilderImpl { public: FormattedListData(UErrorCode&) : FormattedValueStringBuilderImpl(kUndefinedField) {} @@ -248,7 +251,6 @@ class FormattedListData : public FormattedValueStringBuilderImpl { FormattedListData::~FormattedListData() = default; UPRV_FORMATTED_VALUE_SUBCLASS_AUTO_IMPL(FormattedList) -#endif static Hashtable* listPatternHash = nullptr; @@ -257,7 +259,7 @@ U_CDECL_BEGIN static UBool U_CALLCONV uprv_listformatter_cleanup() { delete listPatternHash; listPatternHash = nullptr; - return TRUE; + return true; } static void U_CALLCONV @@ -350,7 +352,6 @@ const ListFormatInternal* ListFormatter::getListFormatInternal( return result; } -#if !UCONFIG_NO_FORMATTING static const char* typeWidthToStyleString(UListFormatterType type, UListFormatterWidth width) { switch (type) { case ULISTFMT_TYPE_AND: @@ -394,7 +395,6 @@ static const char* typeWidthToStyleString(UListFormatterType type, UListFormatte return nullptr; } -#endif static const UChar solidus = 0x2F; static const UChar aliasPrefix[] = { 0x6C,0x69,0x73,0x74,0x50,0x61,0x74,0x74,0x65,0x72,0x6E,0x2F }; // "listPattern/" @@ -515,14 +515,9 @@ ListFormatter* ListFormatter::createInstance(UErrorCode& errorCode) { } ListFormatter* ListFormatter::createInstance(const Locale& locale, UErrorCode& errorCode) { -#if !UCONFIG_NO_FORMATTING return createInstance(locale, ULISTFMT_TYPE_AND, ULISTFMT_WIDTH_WIDE, errorCode); -#else - return createInstance(locale, "standard", errorCode); -#endif } -#if !UCONFIG_NO_FORMATTING ListFormatter* ListFormatter::createInstance( const Locale& locale, UListFormatterType type, UListFormatterWidth width, UErrorCode& errorCode) { const char* style = typeWidthToStyleString(type, width); @@ -532,7 +527,6 @@ ListFormatter* ListFormatter::createInstance( } return createInstance(locale, style, errorCode); } -#endif ListFormatter* ListFormatter::createInstance(const Locale& locale, const char *style, UErrorCode& errorCode) { const ListFormatInternal* listFormatInternal = getListFormatInternal(locale, style, errorCode); @@ -573,7 +567,7 @@ class FormattedListBuilder { start, {UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD}, status); - data->appendSpanIndex(0); + data->appendSpanInfo(0, start.length(), status); } } @@ -609,7 +603,7 @@ class FormattedListBuilder { next, {UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD}, status); - data->appendSpanIndex(position); + data->appendSpanInfo(position, next.length(), status); data->getStringRef().append( temp.tempSubString(offsets[1]), {UFIELD_CATEGORY_LIST, ULISTFMT_LITERAL_FIELD}, @@ -628,7 +622,7 @@ class FormattedListBuilder { next, {UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD}, status); - data->prependSpanIndex(position); + data->prependSpanInfo(position, next.length(), status); data->getStringRef().insert( 0, temp.tempSubStringBetween(0, offsets[1]), @@ -660,7 +654,6 @@ UnicodeString& ListFormatter::format( int32_t index, int32_t &offset, UErrorCode& errorCode) const { -#if !UCONFIG_NO_FORMATTING int32_t initialOffset = appendTo.length(); auto result = formatStringsToValue(items, nItems, errorCode); UnicodeStringAppendable appendable(appendTo); @@ -671,11 +664,9 @@ UnicodeString& ListFormatter::format( result.nextPosition(cfpos, errorCode); offset = initialOffset + cfpos.getStart(); } -#endif return appendTo; } -#if !UCONFIG_NO_FORMATTING FormattedList ListFormatter::formatStringsToValue( const UnicodeString items[], int32_t nItems, @@ -741,7 +732,8 @@ FormattedList ListFormatter::formatStringsToValue( return FormattedList(result.data.orphan()); } } -#endif U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/deps/icu-small/source/i18n/measunit.cpp b/deps/icu-small/source/i18n/measunit.cpp index dab3abb5e21ff6..ece83177625513 100644 --- a/deps/icu-small/source/i18n/measunit.cpp +++ b/deps/icu-small/source/i18n/measunit.cpp @@ -33,7 +33,8 @@ UOBJECT_DEFINE_RTTI_IMPLEMENTATION(MeasureUnit) // update this code, refer to: // http://site.icu-project.org/design/formatting/measureformat/updating-measure-unit // -// Start generated code for measunit.cpp +// Start generated code +// TODO(ICU-21076): improve how this generated code is produced. // Maps from Type ID to offset in gSubTypes. static const int32_t gOffsets[] = { @@ -54,15 +55,44 @@ static const int32_t gOffsets[] = { 404, 408, 423, - 424, - 430, - 440, - 444, - 448, + 426, + 432, + 442, + 446, 450, - 484 + 452, + 486 }; +// TODO: FIX CODE GENERATION - leaving this here but commented-out to make it +// clear that we no longer want this array. We needed it for only one thing: efficient checking of "currency". +// +// static const int32_t gIndexes[] = { +// 0, +// 2, +// 7, +// 17, +// 25, +// 29, +// 29, +// 40, +// 56, +// 60, +// 69, +// 71, +// 75, +// 83, +// 105, +// 109, +// 124, +// 127, +// 133, +// 143, +// 147, +// 151, +// 153, +// 187 +// }; static const int32_t kCurrencyOffset = 5; // Must be sorted alphabetically. @@ -517,7 +547,9 @@ static const char * const gSubTypes[] = { "solar-mass", "stone", "ton", - "", + "", // TODO(ICU-21076): manual edit of what should have been generated by Java. + "percent", // TODO(ICU-21076): regenerate, deal with duplication. + "permille", // TODO(ICU-21076): regenerate, deal with duplication. "gigawatt", "horsepower", "kilowatt", @@ -580,6 +612,8 @@ static const char * const gSubTypes[] = { "teaspoon" }; +// unitPerUnitToSingleUnit no longer in use! TODO: remove from code-generation code. + // Shortcuts to the base unit in order to make the default constructor fast static const int32_t kBaseTypeIdx = 16; static const int32_t kBaseSubTypeIdx = 0; @@ -2056,7 +2090,7 @@ MeasureUnit MeasureUnit::getTeaspoon() { return MeasureUnit(22, 33); } -// End generated code for measunit.cpp +// End generated code static int32_t binarySearch( const char * const * array, int32_t start, int32_t end, StringPiece key) { @@ -2105,7 +2139,9 @@ MeasureUnit &MeasureUnit::operator=(const MeasureUnit &other) { if (this == &other) { return *this; } - delete fImpl; + if (fImpl != nullptr) { + delete fImpl; + } if (other.fImpl) { ErrorCode localStatus; fImpl = new MeasureUnitImpl(other.fImpl->copy(localStatus)); @@ -2126,7 +2162,9 @@ MeasureUnit &MeasureUnit::operator=(MeasureUnit &&other) noexcept { if (this == &other) { return *this; } - delete fImpl; + if (fImpl != nullptr) { + delete fImpl; + } fImpl = other.fImpl; other.fImpl = nullptr; fTypeId = other.fTypeId; @@ -2139,8 +2177,10 @@ MeasureUnit *MeasureUnit::clone() const { } MeasureUnit::~MeasureUnit() { - delete fImpl; - fImpl = nullptr; + if (fImpl != nullptr) { + delete fImpl; + fImpl = nullptr; + } } const char *MeasureUnit::getType() const { @@ -2237,11 +2277,9 @@ StringEnumeration* MeasureUnit::getAvailableTypes(UErrorCode &errorCode) { } bool MeasureUnit::findBySubType(StringPiece subType, MeasureUnit* output) { - // Sanity checking kCurrencyOffset and final entry in gOffsets - U_ASSERT(uprv_strcmp(gTypes[kCurrencyOffset], "currency") == 0); - U_ASSERT(gOffsets[UPRV_LENGTHOF(gOffsets) - 1] == UPRV_LENGTHOF(gSubTypes)); - for (int32_t t = 0; t < UPRV_LENGTHOF(gOffsets) - 1; t++) { + // Ensure kCurrencyOffset is set correctly + U_ASSERT(uprv_strcmp(gTypes[kCurrencyOffset], "currency") == 0); // Skip currency units if (t == kCurrencyOffset) { continue; @@ -2298,8 +2336,10 @@ void MeasureUnit::initCurrency(StringPiece isoCurrency) { void MeasureUnit::setTo(int32_t typeId, int32_t subTypeId) { fTypeId = typeId; fSubTypeId = subTypeId; - delete fImpl; - fImpl = nullptr; + if (fImpl != nullptr) { + delete fImpl; + fImpl = nullptr; + } } int32_t MeasureUnit::getOffset() const { diff --git a/deps/icu-small/source/i18n/number_skeletons.cpp b/deps/icu-small/source/i18n/number_skeletons.cpp index e6d94d27b2ba5b..028525a589db91 100644 --- a/deps/icu-small/source/i18n/number_skeletons.cpp +++ b/deps/icu-small/source/i18n/number_skeletons.cpp @@ -732,6 +732,7 @@ skeleton::parseStem(const StringSegment& segment, const UCharsTrie& stemTrie, Se case STEM_CURRENCY: CHECK_NULL(seen, unit, status); + CHECK_NULL(seen, perUnit, status); return STATE_CURRENCY_UNIT; case STEM_INTEGER_WIDTH: @@ -1500,32 +1501,33 @@ bool GeneratorHelpers::notation(const MacroProps& macros, UnicodeString& sb, UEr } bool GeneratorHelpers::unit(const MacroProps& macros, UnicodeString& sb, UErrorCode& status) { - if (utils::unitIsCurrency(macros.unit)) { + MeasureUnit unit = macros.unit; + if (!utils::unitIsBaseUnit(macros.perUnit)) { + if (utils::unitIsCurrency(macros.unit) || utils::unitIsCurrency(macros.perUnit)) { + status = U_UNSUPPORTED_ERROR; + return false; + } + unit = unit.product(macros.perUnit.reciprocal(status), status); + } + + if (utils::unitIsCurrency(unit)) { sb.append(u"currency/", -1); - CurrencyUnit currency(macros.unit, status); + CurrencyUnit currency(unit, status); if (U_FAILURE(status)) { return false; } blueprint_helpers::generateCurrencyOption(currency, sb, status); return true; - } else if (utils::unitIsBaseUnit(macros.unit)) { + } else if (utils::unitIsBaseUnit(unit)) { // Default value is not shown in normalized form return false; - } else if (utils::unitIsPercent(macros.unit)) { + } else if (utils::unitIsPercent(unit)) { sb.append(u"percent", -1); return true; - } else if (utils::unitIsPermille(macros.unit)) { + } else if (utils::unitIsPermille(unit)) { sb.append(u"permille", -1); return true; } else { - MeasureUnit unit = macros.unit; - if (utils::unitIsCurrency(macros.perUnit)) { - status = U_UNSUPPORTED_ERROR; - return false; - } - if (!utils::unitIsBaseUnit(macros.perUnit)) { - unit = unit.product(macros.perUnit.reciprocal(status), status); - } sb.append(u"unit/", -1); sb.append(unit.getIdentifier()); return true; diff --git a/deps/icu-small/source/i18n/unicode/dtitvfmt.h b/deps/icu-small/source/i18n/unicode/dtitvfmt.h index 3d20d8e9c44990..4a1ab801a04c9d 100644 --- a/deps/icu-small/source/i18n/unicode/dtitvfmt.h +++ b/deps/icu-small/source/i18n/unicode/dtitvfmt.h @@ -1037,6 +1037,17 @@ class U_I18N_API DateIntervalFormat : public Format { UBool suppressDayPeriodField, UnicodeString& adjustedIntervalPattern); + /** + * Does the same thing as UnicodeString::findAndReplace(), except that it won't perform + * the substitution inside quoted literal text. + * @param targetString The string to perform the find-replace operation on. + * @param strToReplace The string to search for and replace in the target string. + * @param strToReplaceWith The string to substitute in wherever `stringToReplace` was found. + */ + static void U_EXPORT2 findReplaceInPattern(UnicodeString& targetString, + const UnicodeString& strToReplace, + const UnicodeString& strToReplaceWith); + /** * Concat a single date pattern with a time interval pattern, * set it into the intervalPatterns, while field is time field. diff --git a/deps/icu-small/source/i18n/unicode/listformatter.h b/deps/icu-small/source/i18n/unicode/listformatter.h index a969a8744dcf58..eddb5dab6701b3 100644 --- a/deps/icu-small/source/i18n/unicode/listformatter.h +++ b/deps/icu-small/source/i18n/unicode/listformatter.h @@ -23,6 +23,8 @@ #if U_SHOW_CPLUSPLUS_API +#if !UCONFIG_NO_FORMATTING + #include "unicode/unistr.h" #include "unicode/locid.h" #include "unicode/formattedvalue.h" @@ -65,7 +67,6 @@ struct ListFormatData : public UMemory { */ -#if !UCONFIG_NO_FORMATTING /** * An immutable class containing the result of a list formatting operation. * @@ -135,7 +136,6 @@ class U_I18N_API FormattedList : public UMemory, public FormattedValue { : fData(nullptr), fErrorCode(errorCode) {} friend class ListFormatter; }; -#endif // !UCONFIG_NO_FORMATTING /** @@ -185,8 +185,6 @@ class U_I18N_API ListFormatter : public UObject{ */ static ListFormatter* createInstance(const Locale& locale, UErrorCode& errorCode); -#ifndef U_HIDE_DRAFT_API -#if !UCONFIG_NO_FORMATTING /** * Creates a ListFormatter for the given locale, list type, and style. * @@ -195,12 +193,10 @@ class U_I18N_API ListFormatter : public UObject{ * @param width The width of formatting to use. * @param errorCode ICU error code, set if no data available for the given locale. * @return A ListFormatter object created from internal data derived from CLDR data. - * @draft ICU 67 + * @stable ICU 67 */ static ListFormatter* createInstance( const Locale& locale, UListFormatterType type, UListFormatterWidth width, UErrorCode& errorCode); -#endif /* !UCONFIG_NO_FORMATTING */ -#endif /* U_HIDE_DRAFT_API */ #ifndef U_HIDE_INTERNAL_API /** @@ -239,7 +235,6 @@ class U_I18N_API ListFormatter : public UObject{ UnicodeString& format(const UnicodeString items[], int32_t n_items, UnicodeString& appendTo, UErrorCode& errorCode) const; -#if !UCONFIG_NO_FORMATTING /** * Formats a list of strings to a FormattedList, which exposes field * position information. The FormattedList contains more information than @@ -255,7 +250,6 @@ class U_I18N_API ListFormatter : public UObject{ const UnicodeString items[], int32_t n_items, UErrorCode& errorCode) const; -#endif // !UCONFIG_NO_FORMATTING #ifndef U_HIDE_INTERNAL_API /** @@ -296,6 +290,8 @@ class U_I18N_API ListFormatter : public UObject{ U_NAMESPACE_END +#endif /* #if !UCONFIG_NO_FORMATTING */ + #endif /* U_SHOW_CPLUSPLUS_API */ #endif // __LISTFORMATTER_H__ diff --git a/deps/icu-small/source/i18n/unicode/measunit.h b/deps/icu-small/source/i18n/unicode/measunit.h index b9f732ae99011d..ed8773c7710f3e 100644 --- a/deps/icu-small/source/i18n/unicode/measunit.h +++ b/deps/icu-small/source/i18n/unicode/measunit.h @@ -3519,6 +3519,7 @@ class U_I18N_API MeasureUnit: public UObject { */ static MeasureUnit getTeaspoon(); + // End generated createXXX methods protected: diff --git a/deps/icu-small/source/i18n/unicode/ulistformatter.h b/deps/icu-small/source/i18n/unicode/ulistformatter.h index 3dfa9f2d56171f..28a1e580370006 100644 --- a/deps/icu-small/source/i18n/unicode/ulistformatter.h +++ b/deps/icu-small/source/i18n/unicode/ulistformatter.h @@ -62,17 +62,16 @@ typedef enum UListFormatterField { ULISTFMT_ELEMENT_FIELD } UListFormatterField; -#ifndef U_HIDE_DRAFT_API /** * Type of meaning expressed by the list. * - * @draft ICU 67 + * @stable ICU 67 */ typedef enum UListFormatterType { /** * Conjunction formatting, e.g. "Alice, Bob, Charlie, and Delta". * - * @draft ICU 67 + * @stable ICU 67 */ ULISTFMT_TYPE_AND, @@ -80,14 +79,14 @@ typedef enum UListFormatterType { * Disjunction (or alternative, or simply one of) formatting, e.g. * "Alice, Bob, Charlie, or Delta". * - * @draft ICU 67 + * @stable ICU 67 */ ULISTFMT_TYPE_OR, /** * Formatting of a list of values with units, e.g. "5 pounds, 12 ounces". * - * @draft ICU 67 + * @stable ICU 67 */ ULISTFMT_TYPE_UNITS } UListFormatterType; @@ -95,29 +94,28 @@ typedef enum UListFormatterType { /** * Verbosity level of the list patterns. * - * @draft ICU 67 + * @stable ICU 67 */ typedef enum UListFormatterWidth { /** * Use list formatting with full words (no abbreviations) when possible. * - * @draft ICU 67 + * @stable ICU 67 */ ULISTFMT_WIDTH_WIDE, /** * Use list formatting of typical length. - * @draft ICU 67 + * @stable ICU 67 */ ULISTFMT_WIDTH_SHORT, /** * Use list formatting of the shortest possible length. - * @draft ICU 67 + * @stable ICU 67 */ ULISTFMT_WIDTH_NARROW, } UListFormatterWidth; -#endif /* U_HIDE_DRAFT_API */ /** * Open a new UListFormatter object using the rules for a given locale. @@ -141,7 +139,6 @@ U_CAPI UListFormatter* U_EXPORT2 ulistfmt_open(const char* locale, UErrorCode* status); -#ifndef U_HIDE_DRAFT_API /** * Open a new UListFormatter object appropriate for the given locale, list type, * and style. @@ -162,12 +159,11 @@ ulistfmt_open(const char* locale, * @return * A pointer to a UListFormatter object for the specified locale, * or NULL if an error occurred. - * @draft ICU 67 + * @stable ICU 67 */ U_CAPI UListFormatter* U_EXPORT2 ulistfmt_openForType(const char* locale, UListFormatterType type, UListFormatterWidth width, UErrorCode* status); -#endif /* U_HIDE_DRAFT_API */ /** * Close a UListFormatter object. Once closed it may no longer be used. diff --git a/doc/guides/maintaining-icu.md b/doc/guides/maintaining-icu.md index 4b0c413c3f9fcb..f98e129c5a86df 100644 --- a/doc/guides/maintaining-icu.md +++ b/doc/guides/maintaining-icu.md @@ -158,7 +158,7 @@ process.versions.icu; new Intl.DateTimeFormat('es', { month: 'long' }).format(new Date(9E8)); ``` -(This should print your updated ICU version number, and also `January` again.) +(This should print your updated ICU version number, and also `enero` again.) You are ready to check in the updated `deps/icu-small`. This is a big commit, so make this a separate commit from the smaller changes. diff --git a/tools/icu/current_ver.dep b/tools/icu/current_ver.dep index b4caab129071e1..1c7f45879621c0 100644 --- a/tools/icu/current_ver.dep +++ b/tools/icu/current_ver.dep @@ -1,6 +1,6 @@ [ { - "url": "https://github.com/unicode-org/icu/releases/download/release-68-1/icu4c-68_1-src.tgz", - "md5": "6a99b541ea01f271257b121a4433c7c0" + "url": "https://github.com/unicode-org/icu/releases/download/release-68-2/icu4c-68_2-src.tgz", + "md5": "c21cbdfe31a1e325afe765a16f907d20" } ] diff --git a/tools/icu/icu-generic.gyp b/tools/icu/icu-generic.gyp index 680f8528df1384..36339144926096 100644 --- a/tools/icu/icu-generic.gyp +++ b/tools/icu/icu-generic.gyp @@ -107,80 +107,6 @@ 'sources': [ '<@(icu_src_i18n)' ], - ## if your compiler can dead-strip, these exclusions will - ## make ZERO difference to binary size. - ## Made ICU-specific for future-proofing. - 'conditions': [ - [ 'icu_ver_major == 55', { 'sources!': [ - # alphabetic index - '<(icu_path)/source/i18n/alphaindex.cpp', - # BOCSU - # misc - '<(icu_path)/source/i18n/regexcmp.cpp', - '<(icu_path)/source/i18n/regexcmp.h', - '<(icu_path)/source/i18n/regexcst.h', - '<(icu_path)/source/i18n/regeximp.cpp', - '<(icu_path)/source/i18n/regeximp.h', - '<(icu_path)/source/i18n/regexst.cpp', - '<(icu_path)/source/i18n/regexst.h', - '<(icu_path)/source/i18n/regextxt.cpp', - '<(icu_path)/source/i18n/regextxt.h', - '<(icu_path)/source/i18n/region.cpp', - '<(icu_path)/source/i18n/region_impl.h', - '<(icu_path)/source/i18n/reldatefmt.cpp', - '<(icu_path)/source/i18n/reldatefmt.h' - '<(icu_path)/source/i18n/scientificformathelper.cpp', - '<(icu_path)/source/i18n/tmunit.cpp', - '<(icu_path)/source/i18n/tmutamt.cpp', - '<(icu_path)/source/i18n/tmutfmt.cpp', - '<(icu_path)/source/i18n/uregex.cpp', - '<(icu_path)/source/i18n/uregexc.cpp', - '<(icu_path)/source/i18n/uregion.cpp', - '<(icu_path)/source/i18n/uspoof.cpp', - '<(icu_path)/source/i18n/uspoof_build.cpp', - '<(icu_path)/source/i18n/uspoof_conf.cpp', - '<(icu_path)/source/i18n/uspoof_conf.h', - '<(icu_path)/source/i18n/uspoof_impl.cpp', - '<(icu_path)/source/i18n/uspoof_impl.h', - '<(icu_path)/source/i18n/uspoof_wsconf.cpp', - '<(icu_path)/source/i18n/uspoof_wsconf.h', - ]}], - [ 'icu_ver_major == 57', { 'sources!': [ - - # alphabetic index - '<(icu_path)/source/i18n/alphaindex.cpp', - # BOCSU - # misc - '<(icu_path)/source/i18n/regexcmp.cpp', - '<(icu_path)/source/i18n/regexcmp.h', - '<(icu_path)/source/i18n/regexcst.h', - '<(icu_path)/source/i18n/regeximp.cpp', - '<(icu_path)/source/i18n/regeximp.h', - '<(icu_path)/source/i18n/regexst.cpp', - '<(icu_path)/source/i18n/regexst.h', - '<(icu_path)/source/i18n/regextxt.cpp', - '<(icu_path)/source/i18n/regextxt.h', - '<(icu_path)/source/i18n/region.cpp', - '<(icu_path)/source/i18n/region_impl.h', - '<(icu_path)/source/i18n/reldatefmt.cpp', - '<(icu_path)/source/i18n/reldatefmt.h' - '<(icu_path)/source/i18n/scientificformathelper.cpp', - '<(icu_path)/source/i18n/tmunit.cpp', - '<(icu_path)/source/i18n/tmutamt.cpp', - '<(icu_path)/source/i18n/tmutfmt.cpp', - '<(icu_path)/source/i18n/uregex.cpp', - '<(icu_path)/source/i18n/uregexc.cpp', - '<(icu_path)/source/i18n/uregion.cpp', - '<(icu_path)/source/i18n/uspoof.cpp', - '<(icu_path)/source/i18n/uspoof_build.cpp', - '<(icu_path)/source/i18n/uspoof_conf.cpp', - '<(icu_path)/source/i18n/uspoof_conf.h', - '<(icu_path)/source/i18n/uspoof_impl.cpp', - '<(icu_path)/source/i18n/uspoof_impl.h', - '<(icu_path)/source/i18n/uspoof_wsconf.cpp', - '<(icu_path)/source/i18n/uspoof_wsconf.h', - ]}], - ], 'include_dirs': [ '<(icu_path)/source/i18n', ], @@ -410,35 +336,6 @@ ## make ZERO difference to binary size. ## Made ICU-specific for future-proofing. 'conditions': [ - [ 'icu_ver_major == 55', { 'sources!': [ - - # bidi- not needed (yet!) - '<(icu_path)/source/common/ubidi.c', - '<(icu_path)/source/common/ubidiimp.h', - '<(icu_path)/source/common/ubidiln.c', - '<(icu_path)/source/common/ubidiwrt.c', - #'<(icu_path)/source/common/ubidi_props.c', - #'<(icu_path)/source/common/ubidi_props.h', - #'<(icu_path)/source/common/ubidi_props_data.h', - # and the callers - '<(icu_path)/source/common/ushape.cpp', - ]}], - [ 'icu_ver_major == 57', { 'sources!': [ - # work around http://bugs.icu-project.org/trac/ticket/12451 - # (benign afterwards) - '<(icu_path)/source/common/cstr.cpp', - - # bidi- not needed (yet!) - '<(icu_path)/source/common/ubidi.c', - '<(icu_path)/source/common/ubidiimp.h', - '<(icu_path)/source/common/ubidiln.c', - '<(icu_path)/source/common/ubidiwrt.c', - #'<(icu_path)/source/common/ubidi_props.c', - #'<(icu_path)/source/common/ubidi_props.h', - #'<(icu_path)/source/common/ubidi_props_data.h', - # and the callers - '<(icu_path)/source/common/ushape.cpp', - ]}], [ 'OS == "solaris"', { 'defines': [ '_XOPEN_SOURCE_EXTENDED=0', ]}], diff --git a/tools/icu/patches/64/source/common/putil.cpp b/tools/icu/patches/64/source/common/putil.cpp deleted file mode 100644 index 59cf232afee6a8..00000000000000 --- a/tools/icu/patches/64/source/common/putil.cpp +++ /dev/null @@ -1,2415 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 1997-2016, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* -* FILE NAME : putil.c (previously putil.cpp and ptypes.cpp) -* -* Date Name Description -* 04/14/97 aliu Creation. -* 04/24/97 aliu Added getDefaultDataDirectory() and -* getDefaultLocaleID(). -* 04/28/97 aliu Rewritten to assume Unix and apply general methods -* for assumed case. Non-UNIX platforms must be -* special-cased. Rewrote numeric methods dealing -* with NaN and Infinity to be platform independent -* over all IEEE 754 platforms. -* 05/13/97 aliu Restored sign of timezone -* (semantics are hours West of GMT) -* 06/16/98 erm Added IEEE_754 stuff, cleaned up isInfinite, isNan, -* nextDouble.. -* 07/22/98 stephen Added remainder, max, min, trunc -* 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity -* 08/24/98 stephen Added longBitsFromDouble -* 09/08/98 stephen Minor changes for Mac Port -* 03/02/99 stephen Removed openFile(). Added AS400 support. -* Fixed EBCDIC tables -* 04/15/99 stephen Converted to C. -* 06/28/99 stephen Removed mutex locking in u_isBigEndian(). -* 08/04/99 jeffrey R. Added OS/2 changes -* 11/15/99 helena Integrated S/390 IEEE support. -* 04/26/01 Barry N. OS/400 support for uprv_getDefaultLocaleID -* 08/15/01 Steven H. OS/400 support for uprv_getDefaultCodepage -* 01/03/08 Steven L. Fake Time Support -****************************************************************************** -*/ - -// Defines _XOPEN_SOURCE for access to POSIX functions. -// Must be before any other #includes. -#include "uposixdefs.h" - -// First, the platform type. Need this for U_PLATFORM. -#include "unicode/platform.h" - -#if U_PLATFORM == U_PF_MINGW && defined __STRICT_ANSI__ -/* tzset isn't defined in strict ANSI on MinGW. */ -#undef __STRICT_ANSI__ -#endif - -/* - * Cygwin with GCC requires inclusion of time.h after the above disabling strict asci mode statement. - */ -#include - -#if !U_PLATFORM_USES_ONLY_WIN32_API -#include -#endif - -/* include the rest of the ICU headers */ -#include "unicode/putil.h" -#include "unicode/ustring.h" -#include "putilimp.h" -#include "uassert.h" -#include "umutex.h" -#include "cmemory.h" -#include "cstring.h" -#include "locmap.h" -#include "ucln_cmn.h" -#include "charstr.h" - -/* Include standard headers. */ -#include -#include -#include -#include -#include -#include - -#ifndef U_COMMON_IMPLEMENTATION -#error U_COMMON_IMPLEMENTATION not set - must be set for all ICU source files in common/ - see http://userguide.icu-project.org/howtouseicu -#endif - - -/* include system headers */ -#if U_PLATFORM_USES_ONLY_WIN32_API - /* - * TODO: U_PLATFORM_USES_ONLY_WIN32_API includes MinGW. - * Should Cygwin be included as well (U_PLATFORM_HAS_WIN32_API) - * to use native APIs as much as possible? - */ -#ifndef WIN32_LEAN_AND_MEAN -# define WIN32_LEAN_AND_MEAN -#endif -# define VC_EXTRALEAN -# define NOUSER -# define NOSERVICE -# define NOIME -# define NOMCX -# include -# include "unicode/uloc.h" -# include "wintz.h" -#elif U_PLATFORM == U_PF_OS400 -# include -# include /* error code structure */ -# include -# include /* EPT_CALL macro - this include must be after all other "QSYSINCs" */ -# include /* For uprv_maximumPtr */ -#elif U_PLATFORM == U_PF_OS390 -# include "unicode/ucnv.h" /* Needed for UCNV_SWAP_LFNL_OPTION_STRING */ -#elif U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS -# include -# include -# if U_PLATFORM == U_PF_SOLARIS -# ifndef _XPG4_2 -# define _XPG4_2 -# endif -# endif -#elif U_PLATFORM == U_PF_QNX -# include -#endif - -/* - * Only include langinfo.h if we have a way to get the codeset. If we later - * depend on more feature, we can test on U_HAVE_NL_LANGINFO. - * - */ - -#if U_HAVE_NL_LANGINFO_CODESET -#include -#endif - -/** - * Simple things (presence of functions, etc) should just go in configure.in and be added to - * icucfg.h via autoheader. - */ -#if U_PLATFORM_IMPLEMENTS_POSIX -# if U_PLATFORM == U_PF_OS400 -# define HAVE_DLFCN_H 0 -# define HAVE_DLOPEN 0 -# else -# ifndef HAVE_DLFCN_H -# define HAVE_DLFCN_H 1 -# endif -# ifndef HAVE_DLOPEN -# define HAVE_DLOPEN 1 -# endif -# endif -# ifndef HAVE_GETTIMEOFDAY -# define HAVE_GETTIMEOFDAY 1 -# endif -#else -# define HAVE_DLFCN_H 0 -# define HAVE_DLOPEN 0 -# define HAVE_GETTIMEOFDAY 0 -#endif - -U_NAMESPACE_USE - -/* Define the extension for data files, again... */ -#define DATA_TYPE "dat" - -/* Leave this copyright notice here! */ -static const char copyright[] = U_COPYRIGHT_STRING; - -/* floating point implementations ------------------------------------------- */ - -/* We return QNAN rather than SNAN*/ -#define SIGN 0x80000000U - -/* Make it easy to define certain types of constants */ -typedef union { - int64_t i64; /* This must be defined first in order to allow the initialization to work. This is a C89 feature. */ - double d64; -} BitPatternConversion; -static const BitPatternConversion gNan = { (int64_t) INT64_C(0x7FF8000000000000) }; -static const BitPatternConversion gInf = { (int64_t) INT64_C(0x7FF0000000000000) }; - -/*--------------------------------------------------------------------------- - Platform utilities - Our general strategy is to assume we're on a POSIX platform. Platforms which - are non-POSIX must declare themselves so. The default POSIX implementation - will sometimes work for non-POSIX platforms as well (e.g., the NaN-related - functions). - ---------------------------------------------------------------------------*/ - -#if U_PLATFORM_USES_ONLY_WIN32_API || U_PLATFORM == U_PF_OS400 -# undef U_POSIX_LOCALE -#else -# define U_POSIX_LOCALE 1 -#endif - -/* - WARNING! u_topNBytesOfDouble and u_bottomNBytesOfDouble - can't be properly optimized by the gcc compiler sometimes (i.e. gcc 3.2). -*/ -#if !IEEE_754 -static char* -u_topNBytesOfDouble(double* d, int n) -{ -#if U_IS_BIG_ENDIAN - return (char*)d; -#else - return (char*)(d + 1) - n; -#endif -} - -static char* -u_bottomNBytesOfDouble(double* d, int n) -{ -#if U_IS_BIG_ENDIAN - return (char*)(d + 1) - n; -#else - return (char*)d; -#endif -} -#endif /* !IEEE_754 */ - -#if IEEE_754 -static UBool -u_signBit(double d) { - uint8_t hiByte; -#if U_IS_BIG_ENDIAN - hiByte = *(uint8_t *)&d; -#else - hiByte = *(((uint8_t *)&d) + sizeof(double) - 1); -#endif - return (hiByte & 0x80) != 0; -} -#endif - - - -#if defined (U_DEBUG_FAKETIME) -/* Override the clock to test things without having to move the system clock. - * Assumes POSIX gettimeofday() will function - */ -UDate fakeClock_t0 = 0; /** Time to start the clock from **/ -UDate fakeClock_dt = 0; /** Offset (fake time - real time) **/ -UBool fakeClock_set = FALSE; /** True if fake clock has spun up **/ - -static UDate getUTCtime_real() { - struct timeval posixTime; - gettimeofday(&posixTime, NULL); - return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000)); -} - -static UDate getUTCtime_fake() { - static UMutex fakeClockMutex = U_MUTEX_INTIALIZER; - umtx_lock(&fakeClockMutex); - if(!fakeClock_set) { - UDate real = getUTCtime_real(); - const char *fake_start = getenv("U_FAKETIME_START"); - if((fake_start!=NULL) && (fake_start[0]!=0)) { - sscanf(fake_start,"%lf",&fakeClock_t0); - fakeClock_dt = fakeClock_t0 - real; - fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, so the ICU clock will start at a preset value\n" - "env variable U_FAKETIME_START=%.0f (%s) for an offset of %.0f ms from the current time %.0f\n", - fakeClock_t0, fake_start, fakeClock_dt, real); - } else { - fakeClock_dt = 0; - fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, but U_FAKETIME_START was not set.\n" - "Set U_FAKETIME_START to the number of milliseconds since 1/1/1970 to set the ICU clock.\n"); - } - fakeClock_set = TRUE; - } - umtx_unlock(&fakeClockMutex); - - return getUTCtime_real() + fakeClock_dt; -} -#endif - -#if U_PLATFORM_USES_ONLY_WIN32_API -typedef union { - int64_t int64; - FILETIME fileTime; -} FileTimeConversion; /* This is like a ULARGE_INTEGER */ - -/* Number of 100 nanoseconds from 1/1/1601 to 1/1/1970 */ -#define EPOCH_BIAS INT64_C(116444736000000000) -#define HECTONANOSECOND_PER_MILLISECOND 10000 - -#endif - -/*--------------------------------------------------------------------------- - Universal Implementations - These are designed to work on all platforms. Try these, and if they - don't work on your platform, then special case your platform with new - implementations. ----------------------------------------------------------------------------*/ - -U_CAPI UDate U_EXPORT2 -uprv_getUTCtime() -{ -#if defined(U_DEBUG_FAKETIME) - return getUTCtime_fake(); /* Hook for overriding the clock */ -#else - return uprv_getRawUTCtime(); -#endif -} - -/* Return UTC (GMT) time measured in milliseconds since 0:00 on 1/1/70.*/ -U_CAPI UDate U_EXPORT2 -uprv_getRawUTCtime() -{ -#if U_PLATFORM_USES_ONLY_WIN32_API - - FileTimeConversion winTime; - GetSystemTimeAsFileTime(&winTime.fileTime); - return (UDate)((winTime.int64 - EPOCH_BIAS) / HECTONANOSECOND_PER_MILLISECOND); -#else - -#if HAVE_GETTIMEOFDAY - struct timeval posixTime; - gettimeofday(&posixTime, NULL); - return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000)); -#else - time_t epochtime; - time(&epochtime); - return (UDate)epochtime * U_MILLIS_PER_SECOND; -#endif - -#endif -} - -/*----------------------------------------------------------------------------- - IEEE 754 - These methods detect and return NaN and infinity values for doubles - conforming to IEEE 754. Platforms which support this standard include X86, - Mac 680x0, Mac PowerPC, AIX RS/6000, and most others. - If this doesn't work on your platform, you have non-IEEE floating-point, and - will need to code your own versions. A naive implementation is to return 0.0 - for getNaN and getInfinity, and false for isNaN and isInfinite. - ---------------------------------------------------------------------------*/ - -U_CAPI UBool U_EXPORT2 -uprv_isNaN(double number) -{ -#if IEEE_754 - BitPatternConversion convertedNumber; - convertedNumber.d64 = number; - /* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */ - return (UBool)((convertedNumber.i64 & U_INT64_MAX) > gInf.i64); - -#elif U_PLATFORM == U_PF_OS390 - uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number, - sizeof(uint32_t)); - uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number, - sizeof(uint32_t)); - - return ((highBits & 0x7F080000L) == 0x7F080000L) && - (lowBits == 0x00000000L); - -#else - /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/ - /* you'll need to replace this default implementation with what's correct*/ - /* for your platform.*/ - return number != number; -#endif -} - -U_CAPI UBool U_EXPORT2 -uprv_isInfinite(double number) -{ -#if IEEE_754 - BitPatternConversion convertedNumber; - convertedNumber.d64 = number; - /* Infinity is exactly 0x7FF0000000000000U. */ - return (UBool)((convertedNumber.i64 & U_INT64_MAX) == gInf.i64); -#elif U_PLATFORM == U_PF_OS390 - uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number, - sizeof(uint32_t)); - uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number, - sizeof(uint32_t)); - - return ((highBits & ~SIGN) == 0x70FF0000L) && (lowBits == 0x00000000L); - -#else - /* If your platform doesn't support IEEE 754 but *does* have an infinity*/ - /* value, you'll need to replace this default implementation with what's*/ - /* correct for your platform.*/ - return number == (2.0 * number); -#endif -} - -U_CAPI UBool U_EXPORT2 -uprv_isPositiveInfinity(double number) -{ -#if IEEE_754 || U_PLATFORM == U_PF_OS390 - return (UBool)(number > 0 && uprv_isInfinite(number)); -#else - return uprv_isInfinite(number); -#endif -} - -U_CAPI UBool U_EXPORT2 -uprv_isNegativeInfinity(double number) -{ -#if IEEE_754 || U_PLATFORM == U_PF_OS390 - return (UBool)(number < 0 && uprv_isInfinite(number)); - -#else - uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number, - sizeof(uint32_t)); - return((highBits & SIGN) && uprv_isInfinite(number)); - -#endif -} - -U_CAPI double U_EXPORT2 -uprv_getNaN() -{ -#if IEEE_754 || U_PLATFORM == U_PF_OS390 - return gNan.d64; -#else - /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/ - /* you'll need to replace this default implementation with what's correct*/ - /* for your platform.*/ - return 0.0; -#endif -} - -U_CAPI double U_EXPORT2 -uprv_getInfinity() -{ -#if IEEE_754 || U_PLATFORM == U_PF_OS390 - return gInf.d64; -#else - /* If your platform doesn't support IEEE 754 but *does* have an infinity*/ - /* value, you'll need to replace this default implementation with what's*/ - /* correct for your platform.*/ - return 0.0; -#endif -} - -U_CAPI double U_EXPORT2 -uprv_floor(double x) -{ - return floor(x); -} - -U_CAPI double U_EXPORT2 -uprv_ceil(double x) -{ - return ceil(x); -} - -U_CAPI double U_EXPORT2 -uprv_round(double x) -{ - return uprv_floor(x + 0.5); -} - -U_CAPI double U_EXPORT2 -uprv_fabs(double x) -{ - return fabs(x); -} - -U_CAPI double U_EXPORT2 -uprv_modf(double x, double* y) -{ - return modf(x, y); -} - -U_CAPI double U_EXPORT2 -uprv_fmod(double x, double y) -{ - return fmod(x, y); -} - -U_CAPI double U_EXPORT2 -uprv_pow(double x, double y) -{ - /* This is declared as "double pow(double x, double y)" */ - return pow(x, y); -} - -U_CAPI double U_EXPORT2 -uprv_pow10(int32_t x) -{ - return pow(10.0, (double)x); -} - -U_CAPI double U_EXPORT2 -uprv_fmax(double x, double y) -{ -#if IEEE_754 - /* first handle NaN*/ - if(uprv_isNaN(x) || uprv_isNaN(y)) - return uprv_getNaN(); - - /* check for -0 and 0*/ - if(x == 0.0 && y == 0.0 && u_signBit(x)) - return y; - -#endif - - /* this should work for all flt point w/o NaN and Inf special cases */ - return (x > y ? x : y); -} - -U_CAPI double U_EXPORT2 -uprv_fmin(double x, double y) -{ -#if IEEE_754 - /* first handle NaN*/ - if(uprv_isNaN(x) || uprv_isNaN(y)) - return uprv_getNaN(); - - /* check for -0 and 0*/ - if(x == 0.0 && y == 0.0 && u_signBit(y)) - return y; - -#endif - - /* this should work for all flt point w/o NaN and Inf special cases */ - return (x > y ? y : x); -} - -U_CAPI UBool U_EXPORT2 -uprv_add32_overflow(int32_t a, int32_t b, int32_t* res) { - // NOTE: Some compilers (GCC, Clang) have primitives available, like __builtin_add_overflow. - // This function could be optimized by calling one of those primitives. - auto a64 = static_cast(a); - auto b64 = static_cast(b); - int64_t res64 = a64 + b64; - *res = static_cast(res64); - return res64 != *res; -} - -U_CAPI UBool U_EXPORT2 -uprv_mul32_overflow(int32_t a, int32_t b, int32_t* res) { - // NOTE: Some compilers (GCC, Clang) have primitives available, like __builtin_mul_overflow. - // This function could be optimized by calling one of those primitives. - auto a64 = static_cast(a); - auto b64 = static_cast(b); - int64_t res64 = a64 * b64; - *res = static_cast(res64); - return res64 != *res; -} - -/** - * Truncates the given double. - * trunc(3.3) = 3.0, trunc (-3.3) = -3.0 - * This is different than calling floor() or ceil(): - * floor(3.3) = 3, floor(-3.3) = -4 - * ceil(3.3) = 4, ceil(-3.3) = -3 - */ -U_CAPI double U_EXPORT2 -uprv_trunc(double d) -{ -#if IEEE_754 - /* handle error cases*/ - if(uprv_isNaN(d)) - return uprv_getNaN(); - if(uprv_isInfinite(d)) - return uprv_getInfinity(); - - if(u_signBit(d)) /* Signbit() picks up -0.0; d<0 does not. */ - return ceil(d); - else - return floor(d); - -#else - return d >= 0 ? floor(d) : ceil(d); - -#endif -} - -/** - * Return the largest positive number that can be represented by an integer - * type of arbitrary bit length. - */ -U_CAPI double U_EXPORT2 -uprv_maxMantissa(void) -{ - return pow(2.0, DBL_MANT_DIG + 1.0) - 1.0; -} - -U_CAPI double U_EXPORT2 -uprv_log(double d) -{ - return log(d); -} - -U_CAPI void * U_EXPORT2 -uprv_maximumPtr(void * base) -{ -#if U_PLATFORM == U_PF_OS400 - /* - * With the provided function we should never be out of range of a given segment - * (a traditional/typical segment that is). Our segments have 5 bytes for the - * id and 3 bytes for the offset. The key is that the casting takes care of - * only retrieving the offset portion minus x1000. Hence, the smallest offset - * seen in a program is x001000 and when casted to an int would be 0. - * That's why we can only add 0xffefff. Otherwise, we would exceed the segment. - * - * Currently, 16MB is the current addressing limitation on i5/OS if the activation is - * non-TERASPACE. If it is TERASPACE it is 2GB - 4k(header information). - * This function determines the activation based on the pointer that is passed in and - * calculates the appropriate maximum available size for - * each pointer type (TERASPACE and non-TERASPACE) - * - * Unlike other operating systems, the pointer model isn't determined at - * compile time on i5/OS. - */ - if ((base != NULL) && (_TESTPTR(base, _C_TERASPACE_CHECK))) { - /* if it is a TERASPACE pointer the max is 2GB - 4k */ - return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0x7fffefff))); - } - /* otherwise 16MB since NULL ptr is not checkable or the ptr is not TERASPACE */ - return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0xffefff))); - -#else - return U_MAX_PTR(base); -#endif -} - -/*--------------------------------------------------------------------------- - Platform-specific Implementations - Try these, and if they don't work on your platform, then special case your - platform with new implementations. - ---------------------------------------------------------------------------*/ - -/* Generic time zone layer -------------------------------------------------- */ - -/* Time zone utilities */ -U_CAPI void U_EXPORT2 -uprv_tzset() -{ -#if defined(U_TZSET) - U_TZSET(); -#else - /* no initialization*/ -#endif -} - -U_CAPI int32_t U_EXPORT2 -uprv_timezone() -{ -#ifdef U_TIMEZONE - return U_TIMEZONE; -#else - time_t t, t1, t2; - struct tm tmrec; - int32_t tdiff = 0; - - time(&t); - uprv_memcpy( &tmrec, localtime(&t), sizeof(tmrec) ); -#if U_PLATFORM != U_PF_IPHONE - UBool dst_checked = (tmrec.tm_isdst != 0); /* daylight savings time is checked*/ -#endif - t1 = mktime(&tmrec); /* local time in seconds*/ - uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) ); - t2 = mktime(&tmrec); /* GMT (or UTC) in seconds*/ - tdiff = t2 - t1; - -#if U_PLATFORM != U_PF_IPHONE - /* imitate NT behaviour, which returns same timezone offset to GMT for - winter and summer. - This does not work on all platforms. For instance, on glibc on Linux - and on Mac OS 10.5, tdiff calculated above remains the same - regardless of whether DST is in effect or not. iOS is another - platform where this does not work. Linux + glibc and Mac OS 10.5 - have U_TIMEZONE defined so that this code is not reached. - */ - if (dst_checked) - tdiff += 3600; -#endif - return tdiff; -#endif -} - -/* Note that U_TZNAME does *not* have to be tzname, but if it is, - some platforms need to have it declared here. */ - -#if defined(U_TZNAME) && (U_PLATFORM == U_PF_IRIX || U_PLATFORM_IS_DARWIN_BASED) -/* RS6000 and others reject char **tzname. */ -extern U_IMPORT char *U_TZNAME[]; -#endif - -#if !UCONFIG_NO_FILE_IO && ((U_PLATFORM_IS_DARWIN_BASED && (U_PLATFORM != U_PF_IPHONE || defined(U_TIMEZONE))) || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS) -/* These platforms are likely to use Olson timezone IDs. */ -/* common targets of the symbolic link at TZDEFAULT are: - * "/usr/share/zoneinfo/" default, older Linux distros, macOS to 10.12 - * "../usr/share/zoneinfo/" newer Linux distros: Red Hat Enterprise Linux 7, Ubuntu 16, SuSe Linux 12 - * "/usr/share/lib/zoneinfo/" Solaris - * "../usr/share/lib/zoneinfo/" Solaris - * "/var/db/timezone/zoneinfo/" macOS 10.13 - * To avoid checking lots of paths, just check that the target path - * before the ends with "/zoneinfo/", and the is valid. - */ - -#define CHECK_LOCALTIME_LINK 1 -#if U_PLATFORM_IS_DARWIN_BASED -#include -#define TZZONEINFO (TZDIR "/") -#elif U_PLATFORM == U_PF_SOLARIS -#define TZDEFAULT "/etc/localtime" -#define TZZONEINFO "/usr/share/lib/zoneinfo/" -#define TZ_ENV_CHECK "localtime" -#else -#define TZDEFAULT "/etc/localtime" -#define TZZONEINFO "/usr/share/zoneinfo/" -#endif -#define TZZONEINFOTAIL "/zoneinfo/" -#if U_HAVE_DIRENT_H -#define TZFILE_SKIP "posixrules" /* tz file to skip when searching. */ -/* Some Linux distributions have 'localtime' in /usr/share/zoneinfo - symlinked to /etc/localtime, which makes searchForTZFile return - 'localtime' when it's the first match. */ -#define TZFILE_SKIP2 "localtime" -#define SEARCH_TZFILE -#include /* Needed to search through system timezone files */ -#endif -static char gTimeZoneBuffer[PATH_MAX]; -static char *gTimeZoneBufferPtr = NULL; -#endif - -#if !U_PLATFORM_USES_ONLY_WIN32_API -#define isNonDigit(ch) (ch < '0' || '9' < ch) -static UBool isValidOlsonID(const char *id) { - int32_t idx = 0; - - /* Determine if this is something like Iceland (Olson ID) - or AST4ADT (non-Olson ID) */ - while (id[idx] && isNonDigit(id[idx]) && id[idx] != ',') { - idx++; - } - - /* If we went through the whole string, then it might be okay. - The timezone is sometimes set to "CST-7CDT", "CST6CDT5,J129,J131/19:30", - "GRNLNDST3GRNLNDDT" or similar, so we cannot use it. - The rest of the time it could be an Olson ID. George */ - return (UBool)(id[idx] == 0 - || uprv_strcmp(id, "PST8PDT") == 0 - || uprv_strcmp(id, "MST7MDT") == 0 - || uprv_strcmp(id, "CST6CDT") == 0 - || uprv_strcmp(id, "EST5EDT") == 0); -} - -/* On some Unix-like OS, 'posix' subdirectory in - /usr/share/zoneinfo replicates the top-level contents. 'right' - subdirectory has the same set of files, but individual files - are different from those in the top-level directory or 'posix' - because 'right' has files for TAI (Int'l Atomic Time) while 'posix' - has files for UTC. - When the first match for /etc/localtime is in either of them - (usually in posix because 'right' has different file contents), - or TZ environment variable points to one of them, createTimeZone - fails because, say, 'posix/America/New_York' is not an Olson - timezone id ('America/New_York' is). So, we have to skip - 'posix/' and 'right/' at the beginning. */ -static void skipZoneIDPrefix(const char** id) { - if (uprv_strncmp(*id, "posix/", 6) == 0 - || uprv_strncmp(*id, "right/", 6) == 0) - { - *id += 6; - } -} -#endif - -#if defined(U_TZNAME) && !U_PLATFORM_USES_ONLY_WIN32_API - -#define CONVERT_HOURS_TO_SECONDS(offset) (int32_t)(offset*3600) -typedef struct OffsetZoneMapping { - int32_t offsetSeconds; - int32_t daylightType; /* 0=U_DAYLIGHT_NONE, 1=daylight in June-U_DAYLIGHT_JUNE, 2=daylight in December=U_DAYLIGHT_DECEMBER*/ - const char *stdID; - const char *dstID; - const char *olsonID; -} OffsetZoneMapping; - -enum { U_DAYLIGHT_NONE=0,U_DAYLIGHT_JUNE=1,U_DAYLIGHT_DECEMBER=2 }; - -/* -This list tries to disambiguate a set of abbreviated timezone IDs and offsets -and maps it to an Olson ID. -Before adding anything to this list, take a look at -icu/source/tools/tzcode/tz.alias -Sometimes no daylight savings (0) is important to define due to aliases. -This list can be tested with icu/source/test/compat/tzone.pl -More values could be added to daylightType to increase precision. -*/ -static const struct OffsetZoneMapping OFFSET_ZONE_MAPPINGS[] = { - {-45900, 2, "CHAST", "CHADT", "Pacific/Chatham"}, - {-43200, 1, "PETT", "PETST", "Asia/Kamchatka"}, - {-43200, 2, "NZST", "NZDT", "Pacific/Auckland"}, - {-43200, 1, "ANAT", "ANAST", "Asia/Anadyr"}, - {-39600, 1, "MAGT", "MAGST", "Asia/Magadan"}, - {-37800, 2, "LHST", "LHST", "Australia/Lord_Howe"}, - {-36000, 2, "EST", "EST", "Australia/Sydney"}, - {-36000, 1, "SAKT", "SAKST", "Asia/Sakhalin"}, - {-36000, 1, "VLAT", "VLAST", "Asia/Vladivostok"}, - {-34200, 2, "CST", "CST", "Australia/South"}, - {-32400, 1, "YAKT", "YAKST", "Asia/Yakutsk"}, - {-32400, 1, "CHOT", "CHOST", "Asia/Choibalsan"}, - {-31500, 2, "CWST", "CWST", "Australia/Eucla"}, - {-28800, 1, "IRKT", "IRKST", "Asia/Irkutsk"}, - {-28800, 1, "ULAT", "ULAST", "Asia/Ulaanbaatar"}, - {-28800, 2, "WST", "WST", "Australia/West"}, - {-25200, 1, "HOVT", "HOVST", "Asia/Hovd"}, - {-25200, 1, "KRAT", "KRAST", "Asia/Krasnoyarsk"}, - {-21600, 1, "NOVT", "NOVST", "Asia/Novosibirsk"}, - {-21600, 1, "OMST", "OMSST", "Asia/Omsk"}, - {-18000, 1, "YEKT", "YEKST", "Asia/Yekaterinburg"}, - {-14400, 1, "SAMT", "SAMST", "Europe/Samara"}, - {-14400, 1, "AMT", "AMST", "Asia/Yerevan"}, - {-14400, 1, "AZT", "AZST", "Asia/Baku"}, - {-10800, 1, "AST", "ADT", "Asia/Baghdad"}, - {-10800, 1, "MSK", "MSD", "Europe/Moscow"}, - {-10800, 1, "VOLT", "VOLST", "Europe/Volgograd"}, - {-7200, 0, "EET", "CEST", "Africa/Tripoli"}, - {-7200, 1, "EET", "EEST", "Europe/Athens"}, /* Conflicts with Africa/Cairo */ - {-7200, 1, "IST", "IDT", "Asia/Jerusalem"}, - {-3600, 0, "CET", "WEST", "Africa/Algiers"}, - {-3600, 2, "WAT", "WAST", "Africa/Windhoek"}, - {0, 1, "GMT", "IST", "Europe/Dublin"}, - {0, 1, "GMT", "BST", "Europe/London"}, - {0, 0, "WET", "WEST", "Africa/Casablanca"}, - {0, 0, "WET", "WET", "Africa/El_Aaiun"}, - {3600, 1, "AZOT", "AZOST", "Atlantic/Azores"}, - {3600, 1, "EGT", "EGST", "America/Scoresbysund"}, - {10800, 1, "PMST", "PMDT", "America/Miquelon"}, - {10800, 2, "UYT", "UYST", "America/Montevideo"}, - {10800, 1, "WGT", "WGST", "America/Godthab"}, - {10800, 2, "BRT", "BRST", "Brazil/East"}, - {12600, 1, "NST", "NDT", "America/St_Johns"}, - {14400, 1, "AST", "ADT", "Canada/Atlantic"}, - {14400, 2, "AMT", "AMST", "America/Cuiaba"}, - {14400, 2, "CLT", "CLST", "Chile/Continental"}, - {14400, 2, "FKT", "FKST", "Atlantic/Stanley"}, - {14400, 2, "PYT", "PYST", "America/Asuncion"}, - {18000, 1, "CST", "CDT", "America/Havana"}, - {18000, 1, "EST", "EDT", "US/Eastern"}, /* Conflicts with America/Grand_Turk */ - {21600, 2, "EAST", "EASST", "Chile/EasterIsland"}, - {21600, 0, "CST", "MDT", "Canada/Saskatchewan"}, - {21600, 0, "CST", "CDT", "America/Guatemala"}, - {21600, 1, "CST", "CDT", "US/Central"}, /* Conflicts with Mexico/General */ - {25200, 1, "MST", "MDT", "US/Mountain"}, /* Conflicts with Mexico/BajaSur */ - {28800, 0, "PST", "PST", "Pacific/Pitcairn"}, - {28800, 1, "PST", "PDT", "US/Pacific"}, /* Conflicts with Mexico/BajaNorte */ - {32400, 1, "AKST", "AKDT", "US/Alaska"}, - {36000, 1, "HAST", "HADT", "US/Aleutian"} -}; - -/*#define DEBUG_TZNAME*/ - -static const char* remapShortTimeZone(const char *stdID, const char *dstID, int32_t daylightType, int32_t offset) -{ - int32_t idx; -#ifdef DEBUG_TZNAME - fprintf(stderr, "TZ=%s std=%s dst=%s daylight=%d offset=%d\n", getenv("TZ"), stdID, dstID, daylightType, offset); -#endif - for (idx = 0; idx < UPRV_LENGTHOF(OFFSET_ZONE_MAPPINGS); idx++) - { - if (offset == OFFSET_ZONE_MAPPINGS[idx].offsetSeconds - && daylightType == OFFSET_ZONE_MAPPINGS[idx].daylightType - && strcmp(OFFSET_ZONE_MAPPINGS[idx].stdID, stdID) == 0 - && strcmp(OFFSET_ZONE_MAPPINGS[idx].dstID, dstID) == 0) - { - return OFFSET_ZONE_MAPPINGS[idx].olsonID; - } - } - return NULL; -} -#endif - -#ifdef SEARCH_TZFILE -#define MAX_READ_SIZE 512 - -typedef struct DefaultTZInfo { - char* defaultTZBuffer; - int64_t defaultTZFileSize; - FILE* defaultTZFilePtr; - UBool defaultTZstatus; - int32_t defaultTZPosition; -} DefaultTZInfo; - -/* - * This method compares the two files given to see if they are a match. - * It is currently use to compare two TZ files. - */ -static UBool compareBinaryFiles(const char* defaultTZFileName, const char* TZFileName, DefaultTZInfo* tzInfo) { - FILE* file; - int64_t sizeFile; - int64_t sizeFileLeft; - int32_t sizeFileRead; - int32_t sizeFileToRead; - char bufferFile[MAX_READ_SIZE]; - UBool result = TRUE; - - if (tzInfo->defaultTZFilePtr == NULL) { - tzInfo->defaultTZFilePtr = fopen(defaultTZFileName, "r"); - } - file = fopen(TZFileName, "r"); - - tzInfo->defaultTZPosition = 0; /* reset position to begin search */ - - if (file != NULL && tzInfo->defaultTZFilePtr != NULL) { - /* First check that the file size are equal. */ - if (tzInfo->defaultTZFileSize == 0) { - fseek(tzInfo->defaultTZFilePtr, 0, SEEK_END); - tzInfo->defaultTZFileSize = ftell(tzInfo->defaultTZFilePtr); - } - fseek(file, 0, SEEK_END); - sizeFile = ftell(file); - sizeFileLeft = sizeFile; - - if (sizeFile != tzInfo->defaultTZFileSize) { - result = FALSE; - } else { - /* Store the data from the files in seperate buffers and - * compare each byte to determine equality. - */ - if (tzInfo->defaultTZBuffer == NULL) { - rewind(tzInfo->defaultTZFilePtr); - tzInfo->defaultTZBuffer = (char*)uprv_malloc(sizeof(char) * tzInfo->defaultTZFileSize); - sizeFileRead = fread(tzInfo->defaultTZBuffer, 1, tzInfo->defaultTZFileSize, tzInfo->defaultTZFilePtr); - } - rewind(file); - while(sizeFileLeft > 0) { - uprv_memset(bufferFile, 0, MAX_READ_SIZE); - sizeFileToRead = sizeFileLeft < MAX_READ_SIZE ? sizeFileLeft : MAX_READ_SIZE; - - sizeFileRead = fread(bufferFile, 1, sizeFileToRead, file); - if (memcmp(tzInfo->defaultTZBuffer + tzInfo->defaultTZPosition, bufferFile, sizeFileRead) != 0) { - result = FALSE; - break; - } - sizeFileLeft -= sizeFileRead; - tzInfo->defaultTZPosition += sizeFileRead; - } - } - } else { - result = FALSE; - } - - if (file != NULL) { - fclose(file); - } - - return result; -} - - -/* dirent also lists two entries: "." and ".." that we can safely ignore. */ -#define SKIP1 "." -#define SKIP2 ".." -static UBool U_CALLCONV putil_cleanup(void); -static CharString *gSearchTZFileResult = NULL; - -/* - * This method recursively traverses the directory given for a matching TZ file and returns the first match. - * This function is not thread safe - it uses a global, gSearchTZFileResult, to hold its results. - */ -static char* searchForTZFile(const char* path, DefaultTZInfo* tzInfo) { - DIR* dirp = NULL; - struct dirent* dirEntry = NULL; - char* result = NULL; - UErrorCode status = U_ZERO_ERROR; - - /* Save the current path */ - CharString curpath(path, -1, status); - if (U_FAILURE(status)) { - goto cleanupAndReturn; - } - - dirp = opendir(path); - if (dirp == NULL) { - goto cleanupAndReturn; - } - - if (gSearchTZFileResult == NULL) { - gSearchTZFileResult = new CharString; - if (gSearchTZFileResult == NULL) { - goto cleanupAndReturn; - } - ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup); - } - - /* Check each entry in the directory. */ - while((dirEntry = readdir(dirp)) != NULL) { - const char* dirName = dirEntry->d_name; - if (uprv_strcmp(dirName, SKIP1) != 0 && uprv_strcmp(dirName, SKIP2) != 0 - && uprv_strcmp(TZFILE_SKIP, dirName) != 0 && uprv_strcmp(TZFILE_SKIP2, dirName) != 0) { - /* Create a newpath with the new entry to test each entry in the directory. */ - CharString newpath(curpath, status); - newpath.append(dirName, -1, status); - if (U_FAILURE(status)) { - break; - } - - DIR* subDirp = NULL; - if ((subDirp = opendir(newpath.data())) != NULL) { - /* If this new path is a directory, make a recursive call with the newpath. */ - closedir(subDirp); - newpath.append('/', status); - if (U_FAILURE(status)) { - break; - } - result = searchForTZFile(newpath.data(), tzInfo); - /* - Have to get out here. Otherwise, we'd keep looking - and return the first match in the top-level directory - if there's a match in the top-level. If not, this function - would return NULL and set gTimeZoneBufferPtr to NULL in initDefault(). - It worked without this in most cases because we have a fallback of calling - localtime_r to figure out the default timezone. - */ - if (result != NULL) - break; - } else { - if(compareBinaryFiles(TZDEFAULT, newpath.data(), tzInfo)) { - int32_t amountToSkip = sizeof(TZZONEINFO) - 1; - if (amountToSkip > newpath.length()) { - amountToSkip = newpath.length(); - } - const char* zoneid = newpath.data() + amountToSkip; - skipZoneIDPrefix(&zoneid); - gSearchTZFileResult->clear(); - gSearchTZFileResult->append(zoneid, -1, status); - if (U_FAILURE(status)) { - break; - } - result = gSearchTZFileResult->data(); - /* Get out after the first one found. */ - break; - } - } - } - } - - cleanupAndReturn: - if (dirp) { - closedir(dirp); - } - return result; -} -#endif - -U_CAPI void U_EXPORT2 -uprv_tzname_clear_cache() -{ -#if defined(CHECK_LOCALTIME_LINK) && !defined(DEBUG_SKIP_LOCALTIME_LINK) - gTimeZoneBufferPtr = NULL; -#endif -} - -U_CAPI const char* U_EXPORT2 -uprv_tzname(int n) -{ - (void)n; // Avoid unreferenced parameter warning. - const char *tzid = NULL; -#if U_PLATFORM_USES_ONLY_WIN32_API - tzid = uprv_detectWindowsTimeZone(); - - if (tzid != NULL) { - return tzid; - } - -#ifndef U_TZNAME - // The return value is free'd in timezone.cpp on Windows because - // the other code path returns a pointer to a heap location. - // If we don't have a name already, then tzname wouldn't be any - // better, so just fall back. - return uprv_strdup(""); -#endif // !U_TZNAME - -#else - -/*#if U_PLATFORM_IS_DARWIN_BASED - int ret; - - tzid = getenv("TZFILE"); - if (tzid != NULL) { - return tzid; - } -#endif*/ - -/* This code can be temporarily disabled to test tzname resolution later on. */ -#ifndef DEBUG_TZNAME - tzid = getenv("TZ"); - if (tzid != NULL && isValidOlsonID(tzid) -#if U_PLATFORM == U_PF_SOLARIS - /* When TZ equals localtime on Solaris, check the /etc/localtime file. */ - && uprv_strcmp(tzid, TZ_ENV_CHECK) != 0 -#endif - ) { - /* The colon forces tzset() to treat the remainder as zoneinfo path */ - if (tzid[0] == ':') { - tzid++; - } - /* This might be a good Olson ID. */ - skipZoneIDPrefix(&tzid); - return tzid; - } - /* else U_TZNAME will give a better result. */ -#endif - -#if defined(CHECK_LOCALTIME_LINK) && !defined(DEBUG_SKIP_LOCALTIME_LINK) - /* Caller must handle threading issues */ - if (gTimeZoneBufferPtr == NULL) { - /* - This is a trick to look at the name of the link to get the Olson ID - because the tzfile contents is underspecified. - This isn't guaranteed to work because it may not be a symlink. - */ - int32_t ret = (int32_t)readlink(TZDEFAULT, gTimeZoneBuffer, sizeof(gTimeZoneBuffer)-1); - if (0 < ret) { - int32_t tzZoneInfoTailLen = uprv_strlen(TZZONEINFOTAIL); - gTimeZoneBuffer[ret] = 0; - char * tzZoneInfoTailPtr = uprv_strstr(gTimeZoneBuffer, TZZONEINFOTAIL); - - if (tzZoneInfoTailPtr != NULL - && isValidOlsonID(tzZoneInfoTailPtr + tzZoneInfoTailLen)) - { - return (gTimeZoneBufferPtr = tzZoneInfoTailPtr + tzZoneInfoTailLen); - } - } else { -#if defined(SEARCH_TZFILE) - DefaultTZInfo* tzInfo = (DefaultTZInfo*)uprv_malloc(sizeof(DefaultTZInfo)); - if (tzInfo != NULL) { - tzInfo->defaultTZBuffer = NULL; - tzInfo->defaultTZFileSize = 0; - tzInfo->defaultTZFilePtr = NULL; - tzInfo->defaultTZstatus = FALSE; - tzInfo->defaultTZPosition = 0; - - gTimeZoneBufferPtr = searchForTZFile(TZZONEINFO, tzInfo); - - /* Free previously allocated memory */ - if (tzInfo->defaultTZBuffer != NULL) { - uprv_free(tzInfo->defaultTZBuffer); - } - if (tzInfo->defaultTZFilePtr != NULL) { - fclose(tzInfo->defaultTZFilePtr); - } - uprv_free(tzInfo); - } - - if (gTimeZoneBufferPtr != NULL && isValidOlsonID(gTimeZoneBufferPtr)) { - return gTimeZoneBufferPtr; - } -#endif - } - } - else { - return gTimeZoneBufferPtr; - } -#endif -#endif - -#ifdef U_TZNAME -#if U_PLATFORM_USES_ONLY_WIN32_API - /* The return value is free'd in timezone.cpp on Windows because - * the other code path returns a pointer to a heap location. */ - return uprv_strdup(U_TZNAME[n]); -#else - /* - U_TZNAME is usually a non-unique abbreviation, which isn't normally usable. - So we remap the abbreviation to an olson ID. - - Since Windows exposes a little more timezone information, - we normally don't use this code on Windows because - uprv_detectWindowsTimeZone should have already given the correct answer. - */ - { - struct tm juneSol, decemberSol; - int daylightType; - static const time_t juneSolstice=1182478260; /*2007-06-21 18:11 UT*/ - static const time_t decemberSolstice=1198332540; /*2007-12-22 06:09 UT*/ - - /* This probing will tell us when daylight savings occurs. */ - localtime_r(&juneSolstice, &juneSol); - localtime_r(&decemberSolstice, &decemberSol); - if(decemberSol.tm_isdst > 0) { - daylightType = U_DAYLIGHT_DECEMBER; - } else if(juneSol.tm_isdst > 0) { - daylightType = U_DAYLIGHT_JUNE; - } else { - daylightType = U_DAYLIGHT_NONE; - } - tzid = remapShortTimeZone(U_TZNAME[0], U_TZNAME[1], daylightType, uprv_timezone()); - if (tzid != NULL) { - return tzid; - } - } - return U_TZNAME[n]; -#endif -#else - return ""; -#endif -} - -/* Get and set the ICU data directory --------------------------------------- */ - -static icu::UInitOnce gDataDirInitOnce = U_INITONCE_INITIALIZER; -static char *gDataDirectory = NULL; - -UInitOnce gTimeZoneFilesInitOnce = U_INITONCE_INITIALIZER; -static CharString *gTimeZoneFilesDirectory = NULL; - -#if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API - static const char *gCorrectedPOSIXLocale = NULL; /* Sometimes heap allocated */ - static bool gCorrectedPOSIXLocaleHeapAllocated = false; -#endif - -static UBool U_CALLCONV putil_cleanup(void) -{ - if (gDataDirectory && *gDataDirectory) { - uprv_free(gDataDirectory); - } - gDataDirectory = NULL; - gDataDirInitOnce.reset(); - - delete gTimeZoneFilesDirectory; - gTimeZoneFilesDirectory = NULL; - gTimeZoneFilesInitOnce.reset(); - -#ifdef SEARCH_TZFILE - delete gSearchTZFileResult; - gSearchTZFileResult = NULL; -#endif - -#if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API - if (gCorrectedPOSIXLocale && gCorrectedPOSIXLocaleHeapAllocated) { - uprv_free(const_cast(gCorrectedPOSIXLocale)); - gCorrectedPOSIXLocale = NULL; - gCorrectedPOSIXLocaleHeapAllocated = false; - } -#endif - return TRUE; -} - -/* - * Set the data directory. - * Make a copy of the passed string, and set the global data dir to point to it. - */ -U_CAPI void U_EXPORT2 -u_setDataDirectory(const char *directory) { - char *newDataDir; - int32_t length; - - if(directory==NULL || *directory==0) { - /* A small optimization to prevent the malloc and copy when the - shared library is used, and this is a way to make sure that NULL - is never returned. - */ - newDataDir = (char *)""; - } - else { - length=(int32_t)uprv_strlen(directory); - newDataDir = (char *)uprv_malloc(length + 2); - /* Exit out if newDataDir could not be created. */ - if (newDataDir == NULL) { - return; - } - uprv_strcpy(newDataDir, directory); - -#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) - { - char *p; - while((p = uprv_strchr(newDataDir, U_FILE_ALT_SEP_CHAR)) != NULL) { - *p = U_FILE_SEP_CHAR; - } - } -#endif - } - - if (gDataDirectory && *gDataDirectory) { - uprv_free(gDataDirectory); - } - gDataDirectory = newDataDir; - ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup); -} - -U_CAPI UBool U_EXPORT2 -uprv_pathIsAbsolute(const char *path) -{ - if(!path || !*path) { - return FALSE; - } - - if(*path == U_FILE_SEP_CHAR) { - return TRUE; - } - -#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) - if(*path == U_FILE_ALT_SEP_CHAR) { - return TRUE; - } -#endif - -#if U_PLATFORM_USES_ONLY_WIN32_API - if( (((path[0] >= 'A') && (path[0] <= 'Z')) || - ((path[0] >= 'a') && (path[0] <= 'z'))) && - path[1] == ':' ) { - return TRUE; - } -#endif - - return FALSE; -} - -/* Backup setting of ICU_DATA_DIR_PREFIX_ENV_VAR - (needed for some Darwin ICU build environments) */ -#if U_PLATFORM_IS_DARWIN_BASED && TARGET_OS_SIMULATOR -# if !defined(ICU_DATA_DIR_PREFIX_ENV_VAR) -# define ICU_DATA_DIR_PREFIX_ENV_VAR "IPHONE_SIMULATOR_ROOT" -# endif -#endif - -#if U_PLATFORM_HAS_WINUWP_API != 0 -// Helper function to get the ICU Data Directory under the Windows directory location. -static BOOL U_CALLCONV getIcuDataDirectoryUnderWindowsDirectory(char* directoryBuffer, UINT bufferLength) -{ -#if defined(ICU_DATA_DIR_WINDOWS) - wchar_t windowsPath[MAX_PATH]; - char windowsPathUtf8[MAX_PATH]; - - UINT length = GetSystemWindowsDirectoryW(windowsPath, UPRV_LENGTHOF(windowsPath)); - if ((length > 0) && (length < (UPRV_LENGTHOF(windowsPath) - 1))) { - // Convert UTF-16 to a UTF-8 string. - UErrorCode status = U_ZERO_ERROR; - int32_t windowsPathUtf8Len = 0; - u_strToUTF8(windowsPathUtf8, static_cast(UPRV_LENGTHOF(windowsPathUtf8)), - &windowsPathUtf8Len, reinterpret_cast(windowsPath), -1, &status); - - if (U_SUCCESS(status) && (status != U_STRING_NOT_TERMINATED_WARNING) && - (windowsPathUtf8Len < (UPRV_LENGTHOF(windowsPathUtf8) - 1))) { - // Ensure it always has a separator, so we can append the ICU data path. - if (windowsPathUtf8[windowsPathUtf8Len - 1] != U_FILE_SEP_CHAR) { - windowsPathUtf8[windowsPathUtf8Len++] = U_FILE_SEP_CHAR; - windowsPathUtf8[windowsPathUtf8Len] = '\0'; - } - // Check if the concatenated string will fit. - if ((windowsPathUtf8Len + UPRV_LENGTHOF(ICU_DATA_DIR_WINDOWS)) < bufferLength) { - uprv_strcpy(directoryBuffer, windowsPathUtf8); - uprv_strcat(directoryBuffer, ICU_DATA_DIR_WINDOWS); - return TRUE; - } - } - } -#endif - - return FALSE; -} -#endif - -static void U_CALLCONV dataDirectoryInitFn() { - /* If we already have the directory, then return immediately. Will happen if user called - * u_setDataDirectory(). - */ - if (gDataDirectory) { - return; - } - - const char *path = NULL; -#if defined(ICU_DATA_DIR_PREFIX_ENV_VAR) - char datadir_path_buffer[PATH_MAX]; -#endif - - /* - When ICU_NO_USER_DATA_OVERRIDE is defined, users aren't allowed to - override ICU's data with the ICU_DATA environment variable. This prevents - problems where multiple custom copies of ICU's specific version of data - are installed on a system. Either the application must define the data - directory with u_setDataDirectory, define ICU_DATA_DIR when compiling - ICU, set the data with udata_setCommonData or trust that all of the - required data is contained in ICU's data library that contains - the entry point defined by U_ICUDATA_ENTRY_POINT. - - There may also be some platforms where environment variables - are not allowed. - */ -# if !defined(ICU_NO_USER_DATA_OVERRIDE) && !UCONFIG_NO_FILE_IO - /* First try to get the environment variable */ -# if U_PLATFORM_HAS_WINUWP_API == 0 // Windows UWP does not support getenv - path=getenv("ICU_DATA"); -# endif -# endif - - /* ICU_DATA_DIR may be set as a compile option. - * U_ICU_DATA_DEFAULT_DIR is provided and is set by ICU at compile time - * and is used only when data is built in archive mode eliminating the need - * for ICU_DATA_DIR to be set. U_ICU_DATA_DEFAULT_DIR is set to the installation - * directory of the data dat file. Users should use ICU_DATA_DIR if they want to - * set their own path. - */ -#if defined(ICU_DATA_DIR) || defined(U_ICU_DATA_DEFAULT_DIR) - if(path==NULL || *path==0) { -# if defined(ICU_DATA_DIR_PREFIX_ENV_VAR) - const char *prefix = getenv(ICU_DATA_DIR_PREFIX_ENV_VAR); -# endif -# ifdef ICU_DATA_DIR - path=ICU_DATA_DIR; -# else - path=U_ICU_DATA_DEFAULT_DIR; -# endif -# if defined(ICU_DATA_DIR_PREFIX_ENV_VAR) - if (prefix != NULL) { - snprintf(datadir_path_buffer, PATH_MAX, "%s%s", prefix, path); - path=datadir_path_buffer; - } -# endif - } -#endif - -#if U_PLATFORM_HAS_WINUWP_API != 0 && defined(ICU_DATA_DIR_WINDOWS) - char datadir_path_buffer[MAX_PATH]; - if (getIcuDataDirectoryUnderWindowsDirectory(datadir_path_buffer, UPRV_LENGTHOF(datadir_path_buffer))) { - path = datadir_path_buffer; - } -#endif - - if(path==NULL) { - /* It looks really bad, set it to something. */ - path = ""; - } - - u_setDataDirectory(path); - return; -} - -U_CAPI const char * U_EXPORT2 -u_getDataDirectory(void) { - umtx_initOnce(gDataDirInitOnce, &dataDirectoryInitFn); - return gDataDirectory; -} - -static void setTimeZoneFilesDir(const char *path, UErrorCode &status) { - if (U_FAILURE(status)) { - return; - } - gTimeZoneFilesDirectory->clear(); - gTimeZoneFilesDirectory->append(path, status); -#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) - char *p = gTimeZoneFilesDirectory->data(); - while ((p = uprv_strchr(p, U_FILE_ALT_SEP_CHAR)) != NULL) { - *p = U_FILE_SEP_CHAR; - } -#endif -} - -#define TO_STRING(x) TO_STRING_2(x) -#define TO_STRING_2(x) #x - -static void U_CALLCONV TimeZoneDataDirInitFn(UErrorCode &status) { - U_ASSERT(gTimeZoneFilesDirectory == NULL); - ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup); - gTimeZoneFilesDirectory = new CharString(); - if (gTimeZoneFilesDirectory == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - return; - } - - const char *dir = ""; - -#if U_PLATFORM_HAS_WINUWP_API != 0 - // The UWP version does not support the environment variable setting, but can possibly pick them up from the Windows directory. - char datadir_path_buffer[MAX_PATH]; - if (getIcuDataDirectoryUnderWindowsDirectory(datadir_path_buffer, UPRV_LENGTHOF(datadir_path_buffer))) { - dir = datadir_path_buffer; - } -#else - dir = getenv("ICU_TIMEZONE_FILES_DIR"); -#endif // U_PLATFORM_HAS_WINUWP_API - -#if defined(U_TIMEZONE_FILES_DIR) - if (dir == NULL) { - // Build time configuration setting. - dir = TO_STRING(U_TIMEZONE_FILES_DIR); - } -#endif - - if (dir == NULL) { - dir = ""; - } - - setTimeZoneFilesDir(dir, status); -} - - -U_CAPI const char * U_EXPORT2 -u_getTimeZoneFilesDirectory(UErrorCode *status) { - umtx_initOnce(gTimeZoneFilesInitOnce, &TimeZoneDataDirInitFn, *status); - return U_SUCCESS(*status) ? gTimeZoneFilesDirectory->data() : ""; -} - -U_CAPI void U_EXPORT2 -u_setTimeZoneFilesDirectory(const char *path, UErrorCode *status) { - umtx_initOnce(gTimeZoneFilesInitOnce, &TimeZoneDataDirInitFn, *status); - setTimeZoneFilesDir(path, *status); - - // Note: this function does some extra churn, first setting based on the - // environment, then immediately replacing with the value passed in. - // The logic is simpler that way, and performance shouldn't be an issue. -} - - -#if U_POSIX_LOCALE -/* A helper function used by uprv_getPOSIXIDForDefaultLocale and - * uprv_getPOSIXIDForDefaultCodepage. Returns the posix locale id for - * LC_CTYPE and LC_MESSAGES. It doesn't support other locale categories. - */ -static const char *uprv_getPOSIXIDForCategory(int category) -{ - const char* posixID = NULL; - if (category == LC_MESSAGES || category == LC_CTYPE) { - /* - * On Solaris two different calls to setlocale can result in - * different values. Only get this value once. - * - * We must check this first because an application can set this. - * - * LC_ALL can't be used because it's platform dependent. The LANG - * environment variable seems to affect LC_CTYPE variable by default. - * Here is what setlocale(LC_ALL, NULL) can return. - * HPUX can return 'C C C C C C C' - * Solaris can return /en_US/C/C/C/C/C on the second try. - * Linux can return LC_CTYPE=C;LC_NUMERIC=C;... - * - * The default codepage detection also needs to use LC_CTYPE. - * - * Do not call setlocale(LC_*, "")! Using an empty string instead - * of NULL, will modify the libc behavior. - */ - posixID = setlocale(category, NULL); - if ((posixID == 0) - || (uprv_strcmp("C", posixID) == 0) - || (uprv_strcmp("POSIX", posixID) == 0)) - { - /* Maybe we got some garbage. Try something more reasonable */ - posixID = getenv("LC_ALL"); - /* Solaris speaks POSIX - See IEEE Std 1003.1-2008 - * This is needed to properly handle empty env. variables - */ -#if U_PLATFORM == U_PF_SOLARIS - if ((posixID == 0) || (posixID[0] == '\0')) { - posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE"); - if ((posixID == 0) || (posixID[0] == '\0')) { -#else - if (posixID == 0) { - posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE"); - if (posixID == 0) { -#endif - posixID = getenv("LANG"); - } - } - } - } - if ((posixID==0) - || (uprv_strcmp("C", posixID) == 0) - || (uprv_strcmp("POSIX", posixID) == 0)) - { - /* Nothing worked. Give it a nice POSIX default value. */ - posixID = "en_US_POSIX"; - // Note: this test will not catch 'C.UTF-8', - // that will be handled in uprv_getDefaultLocaleID(). - // Leave this mapping here for the uprv_getPOSIXIDForDefaultCodepage() - // caller which expects to see "en_US_POSIX" in many branches. - } - return posixID; -} - -/* Return just the POSIX id for the default locale, whatever happens to be in - * it. It gets the value from LC_MESSAGES and indirectly from LC_ALL and LANG. - */ -static const char *uprv_getPOSIXIDForDefaultLocale(void) -{ - static const char* posixID = NULL; - if (posixID == 0) { - posixID = uprv_getPOSIXIDForCategory(LC_MESSAGES); - } - return posixID; -} - -#if !U_CHARSET_IS_UTF8 -/* Return just the POSIX id for the default codepage, whatever happens to be in - * it. It gets the value from LC_CTYPE and indirectly from LC_ALL and LANG. - */ -static const char *uprv_getPOSIXIDForDefaultCodepage(void) -{ - static const char* posixID = NULL; - if (posixID == 0) { - posixID = uprv_getPOSIXIDForCategory(LC_CTYPE); - } - return posixID; -} -#endif -#endif - -/* NOTE: The caller should handle thread safety */ -U_CAPI const char* U_EXPORT2 -uprv_getDefaultLocaleID() -{ -#if U_POSIX_LOCALE -/* - Note that: (a '!' means the ID is improper somehow) - LC_ALL ----> default_loc codepage --------------------------------------------------------- - ab.CD ab CD - ab@CD ab__CD - - ab@CD.EF ab__CD EF - - ab_CD.EF@GH ab_CD_GH EF - -Some 'improper' ways to do the same as above: - ! ab_CD@GH.EF ab_CD_GH EF - ! ab_CD.EF@GH.IJ ab_CD_GH EF - ! ab_CD@ZZ.EF@GH.IJ ab_CD_GH EF - - _CD@GH _CD_GH - - _CD.EF@GH _CD_GH EF - -The variant cannot have dots in it. -The 'rightmost' variant (@xxx) wins. -The leftmost codepage (.xxx) wins. -*/ - const char* posixID = uprv_getPOSIXIDForDefaultLocale(); - - /* Format: (no spaces) - ll [ _CC ] [ . MM ] [ @ VV] - - l = lang, C = ctry, M = charmap, V = variant - */ - - if (gCorrectedPOSIXLocale != nullptr) { - return gCorrectedPOSIXLocale; - } - - // Copy the ID into owned memory. - // Over-allocate in case we replace "C" with "en_US_POSIX" (+10), + null termination - char *correctedPOSIXLocale = static_cast(uprv_malloc(uprv_strlen(posixID) + 10 + 1)); - if (correctedPOSIXLocale == nullptr) { - return nullptr; - } - uprv_strcpy(correctedPOSIXLocale, posixID); - - char *limit; - if ((limit = uprv_strchr(correctedPOSIXLocale, '.')) != nullptr) { - *limit = 0; - } - if ((limit = uprv_strchr(correctedPOSIXLocale, '@')) != nullptr) { - *limit = 0; - } - - if ((uprv_strcmp("C", correctedPOSIXLocale) == 0) // no @ variant - || (uprv_strcmp("POSIX", correctedPOSIXLocale) == 0)) { - // Raw input was C.* or POSIX.*, Give it a nice POSIX default value. - // (The "C"/"POSIX" case is handled in uprv_getPOSIXIDForCategory()) - uprv_strcpy(correctedPOSIXLocale, "en_US_POSIX"); - } - - /* Note that we scan the *uncorrected* ID. */ - const char *p; - if ((p = uprv_strrchr(posixID, '@')) != nullptr) { - p++; - - /* Take care of any special cases here.. */ - if (!uprv_strcmp(p, "nynorsk")) { - p = "NY"; - /* Don't worry about no__NY. In practice, it won't appear. */ - } - - if (uprv_strchr(correctedPOSIXLocale,'_') == nullptr) { - uprv_strcat(correctedPOSIXLocale, "__"); /* aa@b -> aa__b (note this can make the new locale 1 char longer) */ - } - else { - uprv_strcat(correctedPOSIXLocale, "_"); /* aa_CC@b -> aa_CC_b */ - } - - const char *q; - if ((q = uprv_strchr(p, '.')) != nullptr) { - /* How big will the resulting string be? */ - int32_t len = (int32_t)(uprv_strlen(correctedPOSIXLocale) + (q-p)); - uprv_strncat(correctedPOSIXLocale, p, q-p); // do not include charset - correctedPOSIXLocale[len] = 0; - } - else { - /* Anything following the @ sign */ - uprv_strcat(correctedPOSIXLocale, p); - } - - /* Should there be a map from 'no@nynorsk' -> no_NO_NY here? - * How about 'russian' -> 'ru'? - * Many of the other locales using ISO codes will be handled by the - * canonicalization functions in uloc_getDefault. - */ - } - - if (gCorrectedPOSIXLocale == nullptr) { - gCorrectedPOSIXLocale = correctedPOSIXLocale; - gCorrectedPOSIXLocaleHeapAllocated = true; - ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup); - correctedPOSIXLocale = nullptr; - } - posixID = gCorrectedPOSIXLocale; - - if (correctedPOSIXLocale != nullptr) { /* Was already set - clean up. */ - uprv_free(correctedPOSIXLocale); - } - - return posixID; - -#elif U_PLATFORM_USES_ONLY_WIN32_API -#define POSIX_LOCALE_CAPACITY 64 - UErrorCode status = U_ZERO_ERROR; - char *correctedPOSIXLocale = nullptr; - - // If we have already figured this out just use the cached value - if (gCorrectedPOSIXLocale != nullptr) { - return gCorrectedPOSIXLocale; - } - - // No cached value, need to determine the current value - static WCHAR windowsLocale[LOCALE_NAME_MAX_LENGTH] = {}; - int length = GetLocaleInfoEx(LOCALE_NAME_USER_DEFAULT, LOCALE_SNAME, windowsLocale, LOCALE_NAME_MAX_LENGTH); - - // Now we should have a Windows locale name that needs converted to the POSIX style. - if (length > 0) // If length is 0, then the GetLocaleInfoEx failed. - { - // First we need to go from UTF-16 to char (and also convert from _ to - while we're at it.) - char modifiedWindowsLocale[LOCALE_NAME_MAX_LENGTH] = {}; - - int32_t i; - for (i = 0; i < UPRV_LENGTHOF(modifiedWindowsLocale); i++) - { - if (windowsLocale[i] == '_') - { - modifiedWindowsLocale[i] = '-'; - } - else - { - modifiedWindowsLocale[i] = static_cast(windowsLocale[i]); - } - - if (modifiedWindowsLocale[i] == '\0') - { - break; - } - } - - if (i >= UPRV_LENGTHOF(modifiedWindowsLocale)) - { - // Ran out of room, can't really happen, maybe we'll be lucky about a matching - // locale when tags are dropped - modifiedWindowsLocale[UPRV_LENGTHOF(modifiedWindowsLocale) - 1] = '\0'; - } - - // Now normalize the resulting name - correctedPOSIXLocale = static_cast(uprv_malloc(POSIX_LOCALE_CAPACITY + 1)); - /* TODO: Should we just exit on memory allocation failure? */ - if (correctedPOSIXLocale) - { - int32_t posixLen = uloc_canonicalize(modifiedWindowsLocale, correctedPOSIXLocale, POSIX_LOCALE_CAPACITY, &status); - if (U_SUCCESS(status)) - { - *(correctedPOSIXLocale + posixLen) = 0; - gCorrectedPOSIXLocale = correctedPOSIXLocale; - gCorrectedPOSIXLocaleHeapAllocated = true; - ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup); - } - else - { - uprv_free(correctedPOSIXLocale); - } - } - } - - // If unable to find a locale we can agree upon, use en-US by default - if (gCorrectedPOSIXLocale == nullptr) { - gCorrectedPOSIXLocale = "en_US"; - } - return gCorrectedPOSIXLocale; - -#elif U_PLATFORM == U_PF_OS400 - /* locales are process scoped and are by definition thread safe */ - static char correctedLocale[64]; - const char *localeID = getenv("LC_ALL"); - char *p; - - if (localeID == NULL) - localeID = getenv("LANG"); - if (localeID == NULL) - localeID = setlocale(LC_ALL, NULL); - /* Make sure we have something... */ - if (localeID == NULL) - return "en_US_POSIX"; - - /* Extract the locale name from the path. */ - if((p = uprv_strrchr(localeID, '/')) != NULL) - { - /* Increment p to start of locale name. */ - p++; - localeID = p; - } - - /* Copy to work location. */ - uprv_strcpy(correctedLocale, localeID); - - /* Strip off the '.locale' extension. */ - if((p = uprv_strchr(correctedLocale, '.')) != NULL) { - *p = 0; - } - - /* Upper case the locale name. */ - T_CString_toUpperCase(correctedLocale); - - /* See if we are using the POSIX locale. Any of the - * following are equivalent and use the same QLGPGCMA - * (POSIX) locale. - * QLGPGCMA2 means UCS2 - * QLGPGCMA_4 means UTF-32 - * QLGPGCMA_8 means UTF-8 - */ - if ((uprv_strcmp("C", correctedLocale) == 0) || - (uprv_strcmp("POSIX", correctedLocale) == 0) || - (uprv_strncmp("QLGPGCMA", correctedLocale, 8) == 0)) - { - uprv_strcpy(correctedLocale, "en_US_POSIX"); - } - else - { - int16_t LocaleLen; - - /* Lower case the lang portion. */ - for(p = correctedLocale; *p != 0 && *p != '_'; p++) - { - *p = uprv_tolower(*p); - } - - /* Adjust for Euro. After '_E' add 'URO'. */ - LocaleLen = uprv_strlen(correctedLocale); - if (correctedLocale[LocaleLen - 2] == '_' && - correctedLocale[LocaleLen - 1] == 'E') - { - uprv_strcat(correctedLocale, "URO"); - } - - /* If using Lotus-based locale then convert to - * equivalent non Lotus. - */ - else if (correctedLocale[LocaleLen - 2] == '_' && - correctedLocale[LocaleLen - 1] == 'L') - { - correctedLocale[LocaleLen - 2] = 0; - } - - /* There are separate simplified and traditional - * locales called zh_HK_S and zh_HK_T. - */ - else if (uprv_strncmp(correctedLocale, "zh_HK", 5) == 0) - { - uprv_strcpy(correctedLocale, "zh_HK"); - } - - /* A special zh_CN_GBK locale... - */ - else if (uprv_strcmp(correctedLocale, "zh_CN_GBK") == 0) - { - uprv_strcpy(correctedLocale, "zh_CN"); - } - - } - - return correctedLocale; -#endif - -} - -#if !U_CHARSET_IS_UTF8 -#if U_POSIX_LOCALE -/* -Due to various platform differences, one platform may specify a charset, -when they really mean a different charset. Remap the names so that they are -compatible with ICU. Only conflicting/ambiguous aliases should be resolved -here. Before adding anything to this function, please consider adding unique -names to the ICU alias table in the data directory. -*/ -static const char* -remapPlatformDependentCodepage(const char *locale, const char *name) { - if (locale != NULL && *locale == 0) { - /* Make sure that an empty locale is handled the same way. */ - locale = NULL; - } - if (name == NULL) { - return NULL; - } -#if U_PLATFORM == U_PF_AIX - if (uprv_strcmp(name, "IBM-943") == 0) { - /* Use the ASCII compatible ibm-943 */ - name = "Shift-JIS"; - } - else if (uprv_strcmp(name, "IBM-1252") == 0) { - /* Use the windows-1252 that contains the Euro */ - name = "IBM-5348"; - } -#elif U_PLATFORM == U_PF_SOLARIS - if (locale != NULL && uprv_strcmp(name, "EUC") == 0) { - /* Solaris underspecifies the "EUC" name. */ - if (uprv_strcmp(locale, "zh_CN") == 0) { - name = "EUC-CN"; - } - else if (uprv_strcmp(locale, "zh_TW") == 0) { - name = "EUC-TW"; - } - else if (uprv_strcmp(locale, "ko_KR") == 0) { - name = "EUC-KR"; - } - } - else if (uprv_strcmp(name, "eucJP") == 0) { - /* - ibm-954 is the best match. - ibm-33722 is the default for eucJP (similar to Windows). - */ - name = "eucjis"; - } - else if (uprv_strcmp(name, "646") == 0) { - /* - * The default codepage given by Solaris is 646 but the C library routines treat it as if it was - * ISO-8859-1 instead of US-ASCII(646). - */ - name = "ISO-8859-1"; - } -#elif U_PLATFORM_IS_DARWIN_BASED - if (locale == NULL && *name == 0) { - /* - No locale was specified, and an empty name was passed in. - This usually indicates that nl_langinfo didn't return valid information. - Mac OS X uses UTF-8 by default (especially the locale data and console). - */ - name = "UTF-8"; - } - else if (uprv_strcmp(name, "CP949") == 0) { - /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */ - name = "EUC-KR"; - } - else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 && uprv_strcmp(name, "US-ASCII") == 0) { - /* - * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII. - */ - name = "UTF-8"; - } -#elif U_PLATFORM == U_PF_BSD - if (uprv_strcmp(name, "CP949") == 0) { - /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */ - name = "EUC-KR"; - } -#elif U_PLATFORM == U_PF_HPUX - if (locale != NULL && uprv_strcmp(locale, "zh_HK") == 0 && uprv_strcmp(name, "big5") == 0) { - /* HP decided to extend big5 as hkbig5 even though it's not compatible :-( */ - /* zh_TW.big5 is not the same charset as zh_HK.big5! */ - name = "hkbig5"; - } - else if (uprv_strcmp(name, "eucJP") == 0) { - /* - ibm-1350 is the best match, but unavailable. - ibm-954 is mostly a superset of ibm-1350. - ibm-33722 is the default for eucJP (similar to Windows). - */ - name = "eucjis"; - } -#elif U_PLATFORM == U_PF_LINUX - if (locale != NULL && uprv_strcmp(name, "euc") == 0) { - /* Linux underspecifies the "EUC" name. */ - if (uprv_strcmp(locale, "korean") == 0) { - name = "EUC-KR"; - } - else if (uprv_strcmp(locale, "japanese") == 0) { - /* See comment below about eucJP */ - name = "eucjis"; - } - } - else if (uprv_strcmp(name, "eucjp") == 0) { - /* - ibm-1350 is the best match, but unavailable. - ibm-954 is mostly a superset of ibm-1350. - ibm-33722 is the default for eucJP (similar to Windows). - */ - name = "eucjis"; - } - else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 && - (uprv_strcmp(name, "ANSI_X3.4-1968") == 0 || uprv_strcmp(name, "US-ASCII") == 0)) { - /* - * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII. - */ - name = "UTF-8"; - } - /* - * Linux returns ANSI_X3.4-1968 for C/POSIX, but the call site takes care of - * it by falling back to 'US-ASCII' when NULL is returned from this - * function. So, we don't have to worry about it here. - */ -#endif - /* return NULL when "" is passed in */ - if (*name == 0) { - name = NULL; - } - return name; -} - -static const char* -getCodepageFromPOSIXID(const char *localeName, char * buffer, int32_t buffCapacity) -{ - char localeBuf[100]; - const char *name = NULL; - char *variant = NULL; - - if (localeName != NULL && (name = (uprv_strchr(localeName, '.'))) != NULL) { - size_t localeCapacity = uprv_min(sizeof(localeBuf), (name-localeName)+1); - uprv_strncpy(localeBuf, localeName, localeCapacity); - localeBuf[localeCapacity-1] = 0; /* ensure NULL termination */ - name = uprv_strncpy(buffer, name+1, buffCapacity); - buffer[buffCapacity-1] = 0; /* ensure NULL termination */ - if ((variant = const_cast(uprv_strchr(name, '@'))) != NULL) { - *variant = 0; - } - name = remapPlatformDependentCodepage(localeBuf, name); - } - return name; -} -#endif - -static const char* -int_getDefaultCodepage() -{ -#if U_PLATFORM == U_PF_OS400 - uint32_t ccsid = 37; /* Default to ibm-37 */ - static char codepage[64]; - Qwc_JOBI0400_t jobinfo; - Qus_EC_t error = { sizeof(Qus_EC_t) }; /* SPI error code */ - - EPT_CALL(QUSRJOBI)(&jobinfo, sizeof(jobinfo), "JOBI0400", - "* ", " ", &error); - - if (error.Bytes_Available == 0) { - if (jobinfo.Coded_Char_Set_ID != 0xFFFF) { - ccsid = (uint32_t)jobinfo.Coded_Char_Set_ID; - } - else if (jobinfo.Default_Coded_Char_Set_Id != 0xFFFF) { - ccsid = (uint32_t)jobinfo.Default_Coded_Char_Set_Id; - } - /* else use the default */ - } - sprintf(codepage,"ibm-%d", ccsid); - return codepage; - -#elif U_PLATFORM == U_PF_OS390 - static char codepage[64]; - - strncpy(codepage, nl_langinfo(CODESET),63-strlen(UCNV_SWAP_LFNL_OPTION_STRING)); - strcat(codepage,UCNV_SWAP_LFNL_OPTION_STRING); - codepage[63] = 0; /* NULL terminate */ - - return codepage; - -#elif U_PLATFORM_USES_ONLY_WIN32_API - static char codepage[64]; - DWORD codepageNumber = 0; - -#if U_PLATFORM_HAS_WINUWP_API > 0 - // UWP doesn't have a direct API to get the default ACP as Microsoft would rather - // have folks use Unicode than a "system" code page, however this is the same - // codepage as the system default locale codepage. (FWIW, the system locale is - // ONLY used for codepage, it should never be used for anything else) - GetLocaleInfoEx(LOCALE_NAME_SYSTEM_DEFAULT, LOCALE_IDEFAULTANSICODEPAGE | LOCALE_RETURN_NUMBER, - (LPWSTR)&codepageNumber, sizeof(codepageNumber) / sizeof(WCHAR)); -#else - // Win32 apps can call GetACP - codepageNumber = GetACP(); -#endif - // Special case for UTF-8 - if (codepageNumber == 65001) - { - return "UTF-8"; - } - // Windows codepages can look like windows-1252, so format the found number - // the numbers are eclectic, however all valid system code pages, besides UTF-8 - // are between 3 and 19999 - if (codepageNumber > 0 && codepageNumber < 20000) - { - sprintf(codepage, "windows-%ld", codepageNumber); - return codepage; - } - // If the codepage number call failed then return UTF-8 - return "UTF-8"; - -#elif U_POSIX_LOCALE - static char codesetName[100]; - const char *localeName = NULL; - const char *name = NULL; - - localeName = uprv_getPOSIXIDForDefaultCodepage(); - uprv_memset(codesetName, 0, sizeof(codesetName)); - /* On Solaris nl_langinfo returns C locale values unless setlocale - * was called earlier. - */ -#if (U_HAVE_NL_LANGINFO_CODESET && U_PLATFORM != U_PF_SOLARIS) - /* When available, check nl_langinfo first because it usually gives more - useful names. It depends on LC_CTYPE. - nl_langinfo may use the same buffer as setlocale. */ - { - const char *codeset = nl_langinfo(U_NL_LANGINFO_CODESET); -#if U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED - /* - * On Linux and MacOSX, ensure that default codepage for non C/POSIX locale is UTF-8 - * instead of ASCII. - */ - if (uprv_strcmp(localeName, "en_US_POSIX") != 0) { - codeset = remapPlatformDependentCodepage(localeName, codeset); - } else -#endif - { - codeset = remapPlatformDependentCodepage(NULL, codeset); - } - - if (codeset != NULL) { - uprv_strncpy(codesetName, codeset, sizeof(codesetName)); - codesetName[sizeof(codesetName)-1] = 0; - return codesetName; - } - } -#endif - - /* Use setlocale in a nice way, and then check some environment variables. - Maybe the application used setlocale already. - */ - uprv_memset(codesetName, 0, sizeof(codesetName)); - name = getCodepageFromPOSIXID(localeName, codesetName, sizeof(codesetName)); - if (name) { - /* if we can find the codeset name from setlocale, return that. */ - return name; - } - - if (*codesetName == 0) - { - /* Everything failed. Return US ASCII (ISO 646). */ - (void)uprv_strcpy(codesetName, "US-ASCII"); - } - return codesetName; -#else - return "US-ASCII"; -#endif -} - - -U_CAPI const char* U_EXPORT2 -uprv_getDefaultCodepage() -{ - static char const *name = NULL; - umtx_lock(NULL); - if (name == NULL) { - name = int_getDefaultCodepage(); - } - umtx_unlock(NULL); - return name; -} -#endif /* !U_CHARSET_IS_UTF8 */ - - -/* end of platform-specific implementation -------------- */ - -/* version handling --------------------------------------------------------- */ - -U_CAPI void U_EXPORT2 -u_versionFromString(UVersionInfo versionArray, const char *versionString) { - char *end; - uint16_t part=0; - - if(versionArray==NULL) { - return; - } - - if(versionString!=NULL) { - for(;;) { - versionArray[part]=(uint8_t)uprv_strtoul(versionString, &end, 10); - if(end==versionString || ++part==U_MAX_VERSION_LENGTH || *end!=U_VERSION_DELIMITER) { - break; - } - versionString=end+1; - } - } - - while(partU_MAX_VERSION_STRING_LENGTH) { - len = U_MAX_VERSION_STRING_LENGTH; - } - u_UCharsToChars(versionString, versionChars, len); - versionChars[len]=0; - u_versionFromString(versionArray, versionChars); - } -} - -U_CAPI void U_EXPORT2 -u_versionToString(const UVersionInfo versionArray, char *versionString) { - uint16_t count, part; - uint8_t field; - - if(versionString==NULL) { - return; - } - - if(versionArray==NULL) { - versionString[0]=0; - return; - } - - /* count how many fields need to be written */ - for(count=4; count>0 && versionArray[count-1]==0; --count) { - } - - if(count <= 1) { - count = 2; - } - - /* write the first part */ - /* write the decimal field value */ - field=versionArray[0]; - if(field>=100) { - *versionString++=(char)('0'+field/100); - field%=100; - } - if(field>=10) { - *versionString++=(char)('0'+field/10); - field%=10; - } - *versionString++=(char)('0'+field); - - /* write the following parts */ - for(part=1; part=100) { - *versionString++=(char)('0'+field/100); - field%=100; - } - if(field>=10) { - *versionString++=(char)('0'+field/10); - field%=10; - } - *versionString++=(char)('0'+field); - } - - /* NUL-terminate */ - *versionString=0; -} - -U_CAPI void U_EXPORT2 -u_getVersion(UVersionInfo versionArray) { - (void)copyright; // Suppress unused variable warning from clang. - u_versionFromString(versionArray, U_ICU_VERSION); -} - -/** - * icucfg.h dependent code - */ - -#if U_ENABLE_DYLOAD && HAVE_DLOPEN && !U_PLATFORM_USES_ONLY_WIN32_API - -#if HAVE_DLFCN_H -#ifdef __MVS__ -#ifndef __SUSV3 -#define __SUSV3 1 -#endif -#endif -#include -#endif /* HAVE_DLFCN_H */ - -U_INTERNAL void * U_EXPORT2 -uprv_dl_open(const char *libName, UErrorCode *status) { - void *ret = NULL; - if(U_FAILURE(*status)) return ret; - ret = dlopen(libName, RTLD_NOW|RTLD_GLOBAL); - if(ret==NULL) { -#ifdef U_TRACE_DYLOAD - printf("dlerror on dlopen(%s): %s\n", libName, dlerror()); -#endif - *status = U_MISSING_RESOURCE_ERROR; - } - return ret; -} - -U_INTERNAL void U_EXPORT2 -uprv_dl_close(void *lib, UErrorCode *status) { - if(U_FAILURE(*status)) return; - dlclose(lib); -} - -U_INTERNAL UVoidFunction* U_EXPORT2 -uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) { - union { - UVoidFunction *fp; - void *vp; - } uret; - uret.fp = NULL; - if(U_FAILURE(*status)) return uret.fp; - uret.vp = dlsym(lib, sym); - if(uret.vp == NULL) { -#ifdef U_TRACE_DYLOAD - printf("dlerror on dlsym(%p,%s): %s\n", lib,sym, dlerror()); -#endif - *status = U_MISSING_RESOURCE_ERROR; - } - return uret.fp; -} - -#elif U_ENABLE_DYLOAD && U_PLATFORM_USES_ONLY_WIN32_API && !U_PLATFORM_HAS_WINUWP_API - -/* Windows API implementation. */ -// Note: UWP does not expose/allow these APIs, so the UWP version gets the null implementation. */ - -U_INTERNAL void * U_EXPORT2 -uprv_dl_open(const char *libName, UErrorCode *status) { - HMODULE lib = NULL; - - if(U_FAILURE(*status)) return NULL; - - lib = LoadLibraryA(libName); - - if(lib==NULL) { - *status = U_MISSING_RESOURCE_ERROR; - } - - return (void*)lib; -} - -U_INTERNAL void U_EXPORT2 -uprv_dl_close(void *lib, UErrorCode *status) { - HMODULE handle = (HMODULE)lib; - if(U_FAILURE(*status)) return; - - FreeLibrary(handle); - - return; -} - -U_INTERNAL UVoidFunction* U_EXPORT2 -uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) { - HMODULE handle = (HMODULE)lib; - UVoidFunction* addr = NULL; - - if(U_FAILURE(*status) || lib==NULL) return NULL; - - addr = (UVoidFunction*)GetProcAddress(handle, sym); - - if(addr==NULL) { - DWORD lastError = GetLastError(); - if(lastError == ERROR_PROC_NOT_FOUND) { - *status = U_MISSING_RESOURCE_ERROR; - } else { - *status = U_UNSUPPORTED_ERROR; /* other unknown error. */ - } - } - - return addr; -} - -#else - -/* No dynamic loading, null (nonexistent) implementation. */ - -U_INTERNAL void * U_EXPORT2 -uprv_dl_open(const char *libName, UErrorCode *status) { - (void)libName; - if(U_FAILURE(*status)) return NULL; - *status = U_UNSUPPORTED_ERROR; - return NULL; -} - -U_INTERNAL void U_EXPORT2 -uprv_dl_close(void *lib, UErrorCode *status) { - (void)lib; - if(U_FAILURE(*status)) return; - *status = U_UNSUPPORTED_ERROR; - return; -} - -U_INTERNAL UVoidFunction* U_EXPORT2 -uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) { - (void)lib; - (void)sym; - if(U_SUCCESS(*status)) { - *status = U_UNSUPPORTED_ERROR; - } - return (UVoidFunction*)NULL; -} - -#endif - -/* - * Hey, Emacs, please set the following: - * - * Local Variables: - * indent-tabs-mode: nil - * End: - * - */ diff --git a/tools/icu/patches/64/source/i18n/dtptngen.cpp b/tools/icu/patches/64/source/i18n/dtptngen.cpp deleted file mode 100644 index eb8bcfb971f427..00000000000000 --- a/tools/icu/patches/64/source/i18n/dtptngen.cpp +++ /dev/null @@ -1,2778 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* Copyright (C) 2007-2016, International Business Machines Corporation and -* others. All Rights Reserved. -******************************************************************************* -* -* File DTPTNGEN.CPP -* -******************************************************************************* -*/ - -#include "unicode/utypes.h" -#if !UCONFIG_NO_FORMATTING - -#include "unicode/datefmt.h" -#include "unicode/decimfmt.h" -#include "unicode/dtfmtsym.h" -#include "unicode/dtptngen.h" -#include "unicode/localpointer.h" -#include "unicode/simpleformatter.h" -#include "unicode/smpdtfmt.h" -#include "unicode/udat.h" -#include "unicode/udatpg.h" -#include "unicode/uniset.h" -#include "unicode/uloc.h" -#include "unicode/ures.h" -#include "unicode/ustring.h" -#include "unicode/rep.h" -#include "cpputils.h" -#include "mutex.h" -#include "umutex.h" -#include "cmemory.h" -#include "cstring.h" -#include "locbased.h" -#include "hash.h" -#include "uhash.h" -#include "uresimp.h" -#include "dtptngen_impl.h" -#include "ucln_in.h" -#include "charstr.h" -#include "uassert.h" - -#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY -/** - * If we are on EBCDIC, use an iterator which will - * traverse the bundles in ASCII order. - */ -#define U_USE_ASCII_BUNDLE_ITERATOR -#define U_SORT_ASCII_BUNDLE_ITERATOR -#endif - -#if defined(U_USE_ASCII_BUNDLE_ITERATOR) - -#include "unicode/ustring.h" -#include "uarrsort.h" - -struct UResAEntry { - UChar *key; - UResourceBundle *item; -}; - -struct UResourceBundleAIterator { - UResourceBundle *bund; - UResAEntry *entries; - int32_t num; - int32_t cursor; -}; - -/* Must be C linkage to pass function pointer to the sort function */ - -U_CDECL_BEGIN - -static int32_t U_CALLCONV -ures_a_codepointSort(const void *context, const void *left, const void *right) { - //CompareContext *cmp=(CompareContext *)context; - return u_strcmp(((const UResAEntry *)left)->key, - ((const UResAEntry *)right)->key); -} - -U_CDECL_END - -static void ures_a_open(UResourceBundleAIterator *aiter, UResourceBundle *bund, UErrorCode *status) { - if(U_FAILURE(*status)) { - return; - } - aiter->bund = bund; - aiter->num = ures_getSize(aiter->bund); - aiter->cursor = 0; -#if !defined(U_SORT_ASCII_BUNDLE_ITERATOR) - aiter->entries = nullptr; -#else - aiter->entries = (UResAEntry*)uprv_malloc(sizeof(UResAEntry)*aiter->num); - for(int i=0;inum;i++) { - aiter->entries[i].item = ures_getByIndex(aiter->bund, i, nullptr, status); - const char *akey = ures_getKey(aiter->entries[i].item); - int32_t len = uprv_strlen(akey)+1; - aiter->entries[i].key = (UChar*)uprv_malloc(len*sizeof(UChar)); - u_charsToUChars(akey, aiter->entries[i].key, len); - } - uprv_sortArray(aiter->entries, aiter->num, sizeof(UResAEntry), ures_a_codepointSort, nullptr, TRUE, status); -#endif -} - -static void ures_a_close(UResourceBundleAIterator *aiter) { -#if defined(U_SORT_ASCII_BUNDLE_ITERATOR) - for(int i=0;inum;i++) { - uprv_free(aiter->entries[i].key); - ures_close(aiter->entries[i].item); - } -#endif -} - -static const UChar *ures_a_getNextString(UResourceBundleAIterator *aiter, int32_t *len, const char **key, UErrorCode *err) { -#if !defined(U_SORT_ASCII_BUNDLE_ITERATOR) - return ures_getNextString(aiter->bund, len, key, err); -#else - if(U_FAILURE(*err)) return nullptr; - UResourceBundle *item = aiter->entries[aiter->cursor].item; - const UChar* ret = ures_getString(item, len, err); - *key = ures_getKey(item); - aiter->cursor++; - return ret; -#endif -} - - -#endif - - -U_NAMESPACE_BEGIN - -// ***************************************************************************** -// class DateTimePatternGenerator -// ***************************************************************************** -static const UChar Canonical_Items[] = { - // GyQMwWEDFdaHmsSv - CAP_G, LOW_Y, CAP_Q, CAP_M, LOW_W, CAP_W, CAP_E, - CAP_D, CAP_F, LOW_D, LOW_A, // The UDATPG_x_FIELD constants and these fields have a different order than in ICU4J - CAP_H, LOW_M, LOW_S, CAP_S, LOW_V, 0 -}; - -static const dtTypeElem dtTypes[] = { - // patternChar, field, type, minLen, weight - {CAP_G, UDATPG_ERA_FIELD, DT_SHORT, 1, 3,}, - {CAP_G, UDATPG_ERA_FIELD, DT_LONG, 4, 0}, - {CAP_G, UDATPG_ERA_FIELD, DT_NARROW, 5, 0}, - - {LOW_Y, UDATPG_YEAR_FIELD, DT_NUMERIC, 1, 20}, - {CAP_Y, UDATPG_YEAR_FIELD, DT_NUMERIC + DT_DELTA, 1, 20}, - {LOW_U, UDATPG_YEAR_FIELD, DT_NUMERIC + 2*DT_DELTA, 1, 20}, - {LOW_R, UDATPG_YEAR_FIELD, DT_NUMERIC + 3*DT_DELTA, 1, 20}, - {CAP_U, UDATPG_YEAR_FIELD, DT_SHORT, 1, 3}, - {CAP_U, UDATPG_YEAR_FIELD, DT_LONG, 4, 0}, - {CAP_U, UDATPG_YEAR_FIELD, DT_NARROW, 5, 0}, - - {CAP_Q, UDATPG_QUARTER_FIELD, DT_NUMERIC, 1, 2}, - {CAP_Q, UDATPG_QUARTER_FIELD, DT_SHORT, 3, 0}, - {CAP_Q, UDATPG_QUARTER_FIELD, DT_LONG, 4, 0}, - {CAP_Q, UDATPG_QUARTER_FIELD, DT_NARROW, 5, 0}, - {LOW_Q, UDATPG_QUARTER_FIELD, DT_NUMERIC + DT_DELTA, 1, 2}, - {LOW_Q, UDATPG_QUARTER_FIELD, DT_SHORT - DT_DELTA, 3, 0}, - {LOW_Q, UDATPG_QUARTER_FIELD, DT_LONG - DT_DELTA, 4, 0}, - {LOW_Q, UDATPG_QUARTER_FIELD, DT_NARROW - DT_DELTA, 5, 0}, - - {CAP_M, UDATPG_MONTH_FIELD, DT_NUMERIC, 1, 2}, - {CAP_M, UDATPG_MONTH_FIELD, DT_SHORT, 3, 0}, - {CAP_M, UDATPG_MONTH_FIELD, DT_LONG, 4, 0}, - {CAP_M, UDATPG_MONTH_FIELD, DT_NARROW, 5, 0}, - {CAP_L, UDATPG_MONTH_FIELD, DT_NUMERIC + DT_DELTA, 1, 2}, - {CAP_L, UDATPG_MONTH_FIELD, DT_SHORT - DT_DELTA, 3, 0}, - {CAP_L, UDATPG_MONTH_FIELD, DT_LONG - DT_DELTA, 4, 0}, - {CAP_L, UDATPG_MONTH_FIELD, DT_NARROW - DT_DELTA, 5, 0}, - {LOW_L, UDATPG_MONTH_FIELD, DT_NUMERIC + DT_DELTA, 1, 1}, - - {LOW_W, UDATPG_WEEK_OF_YEAR_FIELD, DT_NUMERIC, 1, 2}, - - {CAP_W, UDATPG_WEEK_OF_MONTH_FIELD, DT_NUMERIC, 1, 0}, - - {CAP_E, UDATPG_WEEKDAY_FIELD, DT_SHORT, 1, 3}, - {CAP_E, UDATPG_WEEKDAY_FIELD, DT_LONG, 4, 0}, - {CAP_E, UDATPG_WEEKDAY_FIELD, DT_NARROW, 5, 0}, - {CAP_E, UDATPG_WEEKDAY_FIELD, DT_SHORTER, 6, 0}, - {LOW_C, UDATPG_WEEKDAY_FIELD, DT_NUMERIC + 2*DT_DELTA, 1, 2}, - {LOW_C, UDATPG_WEEKDAY_FIELD, DT_SHORT - 2*DT_DELTA, 3, 0}, - {LOW_C, UDATPG_WEEKDAY_FIELD, DT_LONG - 2*DT_DELTA, 4, 0}, - {LOW_C, UDATPG_WEEKDAY_FIELD, DT_NARROW - 2*DT_DELTA, 5, 0}, - {LOW_C, UDATPG_WEEKDAY_FIELD, DT_SHORTER - 2*DT_DELTA, 6, 0}, - {LOW_E, UDATPG_WEEKDAY_FIELD, DT_NUMERIC + DT_DELTA, 1, 2}, // LOW_E is currently not used in CLDR data, should not be canonical - {LOW_E, UDATPG_WEEKDAY_FIELD, DT_SHORT - DT_DELTA, 3, 0}, - {LOW_E, UDATPG_WEEKDAY_FIELD, DT_LONG - DT_DELTA, 4, 0}, - {LOW_E, UDATPG_WEEKDAY_FIELD, DT_NARROW - DT_DELTA, 5, 0}, - {LOW_E, UDATPG_WEEKDAY_FIELD, DT_SHORTER - DT_DELTA, 6, 0}, - - {LOW_D, UDATPG_DAY_FIELD, DT_NUMERIC, 1, 2}, - {LOW_G, UDATPG_DAY_FIELD, DT_NUMERIC + DT_DELTA, 1, 20}, // really internal use, so we don't care - - {CAP_D, UDATPG_DAY_OF_YEAR_FIELD, DT_NUMERIC, 1, 3}, - - {CAP_F, UDATPG_DAY_OF_WEEK_IN_MONTH_FIELD, DT_NUMERIC, 1, 0}, - - {LOW_A, UDATPG_DAYPERIOD_FIELD, DT_SHORT, 1, 3}, - {LOW_A, UDATPG_DAYPERIOD_FIELD, DT_LONG, 4, 0}, - {LOW_A, UDATPG_DAYPERIOD_FIELD, DT_NARROW, 5, 0}, - {LOW_B, UDATPG_DAYPERIOD_FIELD, DT_SHORT - DT_DELTA, 1, 3}, - {LOW_B, UDATPG_DAYPERIOD_FIELD, DT_LONG - DT_DELTA, 4, 0}, - {LOW_B, UDATPG_DAYPERIOD_FIELD, DT_NARROW - DT_DELTA, 5, 0}, - // b needs to be closer to a than to B, so we make this 3*DT_DELTA - {CAP_B, UDATPG_DAYPERIOD_FIELD, DT_SHORT - 3*DT_DELTA, 1, 3}, - {CAP_B, UDATPG_DAYPERIOD_FIELD, DT_LONG - 3*DT_DELTA, 4, 0}, - {CAP_B, UDATPG_DAYPERIOD_FIELD, DT_NARROW - 3*DT_DELTA, 5, 0}, - - {CAP_H, UDATPG_HOUR_FIELD, DT_NUMERIC + 10*DT_DELTA, 1, 2}, // 24 hour - {LOW_K, UDATPG_HOUR_FIELD, DT_NUMERIC + 11*DT_DELTA, 1, 2}, // 24 hour - {LOW_H, UDATPG_HOUR_FIELD, DT_NUMERIC, 1, 2}, // 12 hour - {CAP_K, UDATPG_HOUR_FIELD, DT_NUMERIC + DT_DELTA, 1, 2}, // 12 hour - // The C code has had versions of the following 3, keep & update. Should not need these, but... - // Without these, certain tests using e.g. staticGetSkeleton fail because j/J in patterns - // get skipped instead of mapped to the right hour chars, for example in - // DateFormatTest::TestPatternFromSkeleton - // IntlTestDateTimePatternGeneratorAPI:: testStaticGetSkeleton - // DateIntervalFormatTest::testTicket11985 - // Need to investigate better handling of jJC replacement e.g. in staticGetSkeleton. - {CAP_J, UDATPG_HOUR_FIELD, DT_NUMERIC + 5*DT_DELTA, 1, 2}, // 12/24 hour no AM/PM - {LOW_J, UDATPG_HOUR_FIELD, DT_NUMERIC + 6*DT_DELTA, 1, 6}, // 12/24 hour - {CAP_C, UDATPG_HOUR_FIELD, DT_NUMERIC + 7*DT_DELTA, 1, 6}, // 12/24 hour with preferred dayPeriods for 12 - - {LOW_M, UDATPG_MINUTE_FIELD, DT_NUMERIC, 1, 2}, - - {LOW_S, UDATPG_SECOND_FIELD, DT_NUMERIC, 1, 2}, - {CAP_A, UDATPG_SECOND_FIELD, DT_NUMERIC + DT_DELTA, 1, 1000}, - - {CAP_S, UDATPG_FRACTIONAL_SECOND_FIELD, DT_NUMERIC, 1, 1000}, - - {LOW_V, UDATPG_ZONE_FIELD, DT_SHORT - 2*DT_DELTA, 1, 0}, - {LOW_V, UDATPG_ZONE_FIELD, DT_LONG - 2*DT_DELTA, 4, 0}, - {LOW_Z, UDATPG_ZONE_FIELD, DT_SHORT, 1, 3}, - {LOW_Z, UDATPG_ZONE_FIELD, DT_LONG, 4, 0}, - {CAP_Z, UDATPG_ZONE_FIELD, DT_NARROW - DT_DELTA, 1, 3}, - {CAP_Z, UDATPG_ZONE_FIELD, DT_LONG - DT_DELTA, 4, 0}, - {CAP_Z, UDATPG_ZONE_FIELD, DT_SHORT - DT_DELTA, 5, 0}, - {CAP_O, UDATPG_ZONE_FIELD, DT_SHORT - DT_DELTA, 1, 0}, - {CAP_O, UDATPG_ZONE_FIELD, DT_LONG - DT_DELTA, 4, 0}, - {CAP_V, UDATPG_ZONE_FIELD, DT_SHORT - DT_DELTA, 1, 0}, - {CAP_V, UDATPG_ZONE_FIELD, DT_LONG - DT_DELTA, 2, 0}, - {CAP_V, UDATPG_ZONE_FIELD, DT_LONG-1 - DT_DELTA, 3, 0}, - {CAP_V, UDATPG_ZONE_FIELD, DT_LONG-2 - DT_DELTA, 4, 0}, - {CAP_X, UDATPG_ZONE_FIELD, DT_NARROW - DT_DELTA, 1, 0}, - {CAP_X, UDATPG_ZONE_FIELD, DT_SHORT - DT_DELTA, 2, 0}, - {CAP_X, UDATPG_ZONE_FIELD, DT_LONG - DT_DELTA, 4, 0}, - {LOW_X, UDATPG_ZONE_FIELD, DT_NARROW - DT_DELTA, 1, 0}, - {LOW_X, UDATPG_ZONE_FIELD, DT_SHORT - DT_DELTA, 2, 0}, - {LOW_X, UDATPG_ZONE_FIELD, DT_LONG - DT_DELTA, 4, 0}, - - {0, UDATPG_FIELD_COUNT, 0, 0, 0} , // last row of dtTypes[] - }; - -static const char* const CLDR_FIELD_APPEND[] = { - "Era", "Year", "Quarter", "Month", "Week", "*", "Day-Of-Week", - "*", "*", "Day", "*", // The UDATPG_x_FIELD constants and these fields have a different order than in ICU4J - "Hour", "Minute", "Second", "*", "Timezone" -}; - -static const char* const CLDR_FIELD_NAME[UDATPG_FIELD_COUNT] = { - "era", "year", "quarter", "month", "week", "weekOfMonth", "weekday", - "dayOfYear", "weekdayOfMonth", "day", "dayperiod", // The UDATPG_x_FIELD constants and these fields have a different order than in ICU4J - "hour", "minute", "second", "*", "zone" -}; - -static const char* const CLDR_FIELD_WIDTH[] = { // [UDATPG_WIDTH_COUNT] - "", "-short", "-narrow" -}; - -// TODO(ticket:13619): remove when definition uncommented in dtptngen.h. -static const int32_t UDATPG_WIDTH_COUNT = UDATPG_NARROW + 1; -static constexpr UDateTimePGDisplayWidth UDATPG_WIDTH_APPENDITEM = UDATPG_WIDE; -static constexpr int32_t UDATPG_FIELD_KEY_MAX = 24; // max length of CLDR field tag (type + width) - -// For appendItems -static const UChar UDATPG_ItemFormat[]= {0x7B, 0x30, 0x7D, 0x20, 0x251C, 0x7B, 0x32, 0x7D, 0x3A, - 0x20, 0x7B, 0x31, 0x7D, 0x2524, 0}; // {0} \u251C{2}: {1}\u2524 - -//static const UChar repeatedPatterns[6]={CAP_G, CAP_E, LOW_Z, LOW_V, CAP_Q, 0}; // "GEzvQ" - -static const char DT_DateTimePatternsTag[]="DateTimePatterns"; -static const char DT_DateTimeCalendarTag[]="calendar"; -static const char DT_DateTimeGregorianTag[]="gregorian"; -static const char DT_DateTimeAppendItemsTag[]="appendItems"; -static const char DT_DateTimeFieldsTag[]="fields"; -static const char DT_DateTimeAvailableFormatsTag[]="availableFormats"; -//static const UnicodeString repeatedPattern=UnicodeString(repeatedPatterns); - -UOBJECT_DEFINE_RTTI_IMPLEMENTATION(DateTimePatternGenerator) -UOBJECT_DEFINE_RTTI_IMPLEMENTATION(DTSkeletonEnumeration) -UOBJECT_DEFINE_RTTI_IMPLEMENTATION(DTRedundantEnumeration) - -DateTimePatternGenerator* U_EXPORT2 -DateTimePatternGenerator::createInstance(UErrorCode& status) { - return createInstance(Locale::getDefault(), status); -} - -DateTimePatternGenerator* U_EXPORT2 -DateTimePatternGenerator::createInstance(const Locale& locale, UErrorCode& status) { - if (U_FAILURE(status)) { - return nullptr; - } - LocalPointer result( - new DateTimePatternGenerator(locale, status), status); - return U_SUCCESS(status) ? result.orphan() : nullptr; -} - -DateTimePatternGenerator* U_EXPORT2 -DateTimePatternGenerator::createEmptyInstance(UErrorCode& status) { - if (U_FAILURE(status)) { - return nullptr; - } - LocalPointer result( - new DateTimePatternGenerator(status), status); - return U_SUCCESS(status) ? result.orphan() : nullptr; -} - -DateTimePatternGenerator::DateTimePatternGenerator(UErrorCode &status) : - skipMatcher(nullptr), - fAvailableFormatKeyHash(nullptr), - internalErrorCode(U_ZERO_ERROR) -{ - fp = new FormatParser(); - dtMatcher = new DateTimeMatcher(); - distanceInfo = new DistanceInfo(); - patternMap = new PatternMap(); - if (fp == nullptr || dtMatcher == nullptr || distanceInfo == nullptr || patternMap == nullptr) { - internalErrorCode = status = U_MEMORY_ALLOCATION_ERROR; - } -} - -DateTimePatternGenerator::DateTimePatternGenerator(const Locale& locale, UErrorCode &status) : - skipMatcher(nullptr), - fAvailableFormatKeyHash(nullptr), - internalErrorCode(U_ZERO_ERROR) -{ - fp = new FormatParser(); - dtMatcher = new DateTimeMatcher(); - distanceInfo = new DistanceInfo(); - patternMap = new PatternMap(); - if (fp == nullptr || dtMatcher == nullptr || distanceInfo == nullptr || patternMap == nullptr) { - internalErrorCode = status = U_MEMORY_ALLOCATION_ERROR; - } - else { - initData(locale, status); - } -} - -DateTimePatternGenerator::DateTimePatternGenerator(const DateTimePatternGenerator& other) : - UObject(), - skipMatcher(nullptr), - fAvailableFormatKeyHash(nullptr), - internalErrorCode(U_ZERO_ERROR) -{ - fp = new FormatParser(); - dtMatcher = new DateTimeMatcher(); - distanceInfo = new DistanceInfo(); - patternMap = new PatternMap(); - if (fp == nullptr || dtMatcher == nullptr || distanceInfo == nullptr || patternMap == nullptr) { - internalErrorCode = U_MEMORY_ALLOCATION_ERROR; - } - *this=other; -} - -DateTimePatternGenerator& -DateTimePatternGenerator::operator=(const DateTimePatternGenerator& other) { - // reflexive case - if (&other == this) { - return *this; - } - internalErrorCode = other.internalErrorCode; - pLocale = other.pLocale; - fDefaultHourFormatChar = other.fDefaultHourFormatChar; - *fp = *(other.fp); - dtMatcher->copyFrom(other.dtMatcher->skeleton); - *distanceInfo = *(other.distanceInfo); - dateTimeFormat = other.dateTimeFormat; - decimal = other.decimal; - // NUL-terminate for the C API. - dateTimeFormat.getTerminatedBuffer(); - decimal.getTerminatedBuffer(); - delete skipMatcher; - if ( other.skipMatcher == nullptr ) { - skipMatcher = nullptr; - } - else { - skipMatcher = new DateTimeMatcher(*other.skipMatcher); - if (skipMatcher == nullptr) - { - internalErrorCode = U_MEMORY_ALLOCATION_ERROR; - return *this; - } - } - for (int32_t i=0; i< UDATPG_FIELD_COUNT; ++i ) { - appendItemFormats[i] = other.appendItemFormats[i]; - appendItemFormats[i].getTerminatedBuffer(); // NUL-terminate for the C API. - for (int32_t j=0; j< UDATPG_WIDTH_COUNT; ++j ) { - fieldDisplayNames[i][j] = other.fieldDisplayNames[i][j]; - fieldDisplayNames[i][j].getTerminatedBuffer(); // NUL-terminate for the C API. - } - } - patternMap->copyFrom(*other.patternMap, internalErrorCode); - copyHashtable(other.fAvailableFormatKeyHash, internalErrorCode); - return *this; -} - - -UBool -DateTimePatternGenerator::operator==(const DateTimePatternGenerator& other) const { - if (this == &other) { - return TRUE; - } - if ((pLocale==other.pLocale) && (patternMap->equals(*other.patternMap)) && - (dateTimeFormat==other.dateTimeFormat) && (decimal==other.decimal)) { - for ( int32_t i=0 ; i list; - int32_t length = 0; - int32_t preferredFormat = ALLOWED_HOUR_FORMAT_UNKNOWN; - for (int32_t j = 0; formatList.getKeyAndValue(j, key, value); ++j) { - if (uprv_strcmp(key, "allowed") == 0) { - if (value.getType() == URES_STRING) { - length = 2; // 1 preferred to add later, 1 allowed to add now - if (list.allocateInsteadAndReset(length + 1) == nullptr) { - errorCode = U_MEMORY_ALLOCATION_ERROR; - return; - } - list[1] = getHourFormatFromUnicodeString(value.getUnicodeString(errorCode)); - } - else { - ResourceArray allowedFormats = value.getArray(errorCode); - length = allowedFormats.getSize() + 1; // 1 preferred, getSize allowed - if (list.allocateInsteadAndReset(length + 1) == nullptr) { - errorCode = U_MEMORY_ALLOCATION_ERROR; - return; - } - for (int32_t k = 1; k < length; ++k) { - allowedFormats.getValue(k-1, value); - list[k] = getHourFormatFromUnicodeString(value.getUnicodeString(errorCode)); - } - } - } else if (uprv_strcmp(key, "preferred") == 0) { - preferredFormat = getHourFormatFromUnicodeString(value.getUnicodeString(errorCode)); - } - } - if (length > 1) { - list[0] = (preferredFormat!=ALLOWED_HOUR_FORMAT_UNKNOWN)? preferredFormat: list[1]; - } else { - // fallback handling for missing data - length = 2; // 1 preferred, 1 allowed - if (list.allocateInsteadAndReset(length + 1) == nullptr) { - errorCode = U_MEMORY_ALLOCATION_ERROR; - return; - } - list[0] = (preferredFormat!=ALLOWED_HOUR_FORMAT_UNKNOWN)? preferredFormat: ALLOWED_HOUR_FORMAT_H; - list[1] = list[0]; - } - list[length] = ALLOWED_HOUR_FORMAT_UNKNOWN; - // At this point list[] will have at least two non-ALLOWED_HOUR_FORMAT_UNKNOWN entries, - // followed by ALLOWED_HOUR_FORMAT_UNKNOWN. - uhash_put(localeToAllowedHourFormatsMap, const_cast(regionOrLocale), list.orphan(), &errorCode); - if (U_FAILURE(errorCode)) { return; } - } - } - - AllowedHourFormat getHourFormatFromUnicodeString(const UnicodeString &s) { - if (s.length() == 1) { - if (s[0] == LOW_H) { return ALLOWED_HOUR_FORMAT_h; } - if (s[0] == CAP_H) { return ALLOWED_HOUR_FORMAT_H; } - if (s[0] == CAP_K) { return ALLOWED_HOUR_FORMAT_K; } - if (s[0] == LOW_K) { return ALLOWED_HOUR_FORMAT_k; } - } else if (s.length() == 2) { - if (s[0] == LOW_H && s[1] == LOW_B) { return ALLOWED_HOUR_FORMAT_hb; } - if (s[0] == LOW_H && s[1] == CAP_B) { return ALLOWED_HOUR_FORMAT_hB; } - if (s[0] == CAP_K && s[1] == LOW_B) { return ALLOWED_HOUR_FORMAT_Kb; } - if (s[0] == CAP_K && s[1] == CAP_B) { return ALLOWED_HOUR_FORMAT_KB; } - if (s[0] == CAP_H && s[1] == LOW_B) { return ALLOWED_HOUR_FORMAT_Hb; } - if (s[0] == CAP_H && s[1] == CAP_B) { return ALLOWED_HOUR_FORMAT_HB; } - } - - return ALLOWED_HOUR_FORMAT_UNKNOWN; - } -}; - -} // namespace - -AllowedHourFormatsSink::~AllowedHourFormatsSink() {} - -U_CFUNC void U_CALLCONV DateTimePatternGenerator::loadAllowedHourFormatsData(UErrorCode &status) { - if (U_FAILURE(status)) { return; } - localeToAllowedHourFormatsMap = uhash_open( - uhash_hashChars, uhash_compareChars, nullptr, &status); - if (U_FAILURE(status)) { return; } - - uhash_setValueDeleter(localeToAllowedHourFormatsMap, deleteAllowedHourFormats); - ucln_i18n_registerCleanup(UCLN_I18N_ALLOWED_HOUR_FORMATS, allowedHourFormatsCleanup); - - LocalUResourceBundlePointer rb(ures_openDirect(nullptr, "supplementalData", &status)); - if (U_FAILURE(status)) { return; } - - AllowedHourFormatsSink sink; - // TODO: Currently in the enumeration each table allocates a new array. - // Try to reduce the number of memory allocations. Consider storing a - // UVector32 with the concatenation of all of the sub-arrays, put the start index - // into the hashmap, store 6 single-value sub-arrays right at the beginning of the - // vector (at index enum*2) for easy data sharing, copy sub-arrays into runtime - // object. Remember to clean up the vector, too. - ures_getAllItemsWithFallback(rb.getAlias(), "timeData", sink, status); -} - -void DateTimePatternGenerator::getAllowedHourFormats(const Locale &locale, UErrorCode &status) { - if (U_FAILURE(status)) { return; } - Locale maxLocale(locale); - maxLocale.addLikelySubtags(status); - if (U_FAILURE(status)) { - return; - } - - const char *country = maxLocale.getCountry(); - if (*country == '\0') { country = "001"; } - const char *language = maxLocale.getLanguage(); - - CharString langCountry; - langCountry.append(language, static_cast(uprv_strlen(language)), status); - langCountry.append('_', status); - langCountry.append(country, static_cast(uprv_strlen(country)), status); - - int32_t *allowedFormats; - allowedFormats = (int32_t *)uhash_get(localeToAllowedHourFormatsMap, langCountry.data()); - if (allowedFormats == nullptr) { - allowedFormats = (int32_t *)uhash_get(localeToAllowedHourFormatsMap, const_cast(country)); - } - - if (allowedFormats != nullptr) { // Lookup is successful - // Here allowedFormats points to a list consisting of key for preferredFormat, - // followed by one or more keys for allowedFormats, then followed by ALLOWED_HOUR_FORMAT_UNKNOWN. - switch (allowedFormats[0]) { - case ALLOWED_HOUR_FORMAT_h: fDefaultHourFormatChar = LOW_H; break; - case ALLOWED_HOUR_FORMAT_H: fDefaultHourFormatChar = CAP_H; break; - case ALLOWED_HOUR_FORMAT_K: fDefaultHourFormatChar = CAP_K; break; - case ALLOWED_HOUR_FORMAT_k: fDefaultHourFormatChar = LOW_K; break; - default: fDefaultHourFormatChar = CAP_H; break; - } - for (int32_t i = 0; i < UPRV_LENGTHOF(fAllowedHourFormats); ++i) { - fAllowedHourFormats[i] = allowedFormats[i + 1]; - if (fAllowedHourFormats[i] == ALLOWED_HOUR_FORMAT_UNKNOWN) { - break; - } - } - } else { // Lookup failed, twice - fDefaultHourFormatChar = CAP_H; - fAllowedHourFormats[0] = ALLOWED_HOUR_FORMAT_H; - fAllowedHourFormats[1] = ALLOWED_HOUR_FORMAT_UNKNOWN; - } -} - -UnicodeString -DateTimePatternGenerator::getSkeleton(const UnicodeString& pattern, UErrorCode& -/*status*/) { - FormatParser fp2; - DateTimeMatcher matcher; - PtnSkeleton localSkeleton; - matcher.set(pattern, &fp2, localSkeleton); - return localSkeleton.getSkeleton(); -} - -UnicodeString -DateTimePatternGenerator::staticGetSkeleton( - const UnicodeString& pattern, UErrorCode& /*status*/) { - FormatParser fp; - DateTimeMatcher matcher; - PtnSkeleton localSkeleton; - matcher.set(pattern, &fp, localSkeleton); - return localSkeleton.getSkeleton(); -} - -UnicodeString -DateTimePatternGenerator::getBaseSkeleton(const UnicodeString& pattern, UErrorCode& /*status*/) { - FormatParser fp2; - DateTimeMatcher matcher; - PtnSkeleton localSkeleton; - matcher.set(pattern, &fp2, localSkeleton); - return localSkeleton.getBaseSkeleton(); -} - -UnicodeString -DateTimePatternGenerator::staticGetBaseSkeleton( - const UnicodeString& pattern, UErrorCode& /*status*/) { - FormatParser fp; - DateTimeMatcher matcher; - PtnSkeleton localSkeleton; - matcher.set(pattern, &fp, localSkeleton); - return localSkeleton.getBaseSkeleton(); -} - -void -DateTimePatternGenerator::addICUPatterns(const Locale& locale, UErrorCode& status) { - if (U_FAILURE(status)) { return; } - UnicodeString dfPattern; - UnicodeString conflictingString; - DateFormat* df; - - // Load with ICU patterns - for (int32_t i=DateFormat::kFull; i<=DateFormat::kShort; i++) { - DateFormat::EStyle style = (DateFormat::EStyle)i; - df = DateFormat::createDateInstance(style, locale); - SimpleDateFormat* sdf; - if (df != nullptr && (sdf = dynamic_cast(df)) != nullptr) { - sdf->toPattern(dfPattern); - addPattern(dfPattern, FALSE, conflictingString, status); - } - // TODO Maybe we should return an error when the date format isn't simple. - delete df; - if (U_FAILURE(status)) { return; } - - df = DateFormat::createTimeInstance(style, locale); - if (df != nullptr && (sdf = dynamic_cast(df)) != nullptr) { - sdf->toPattern(dfPattern); - addPattern(dfPattern, FALSE, conflictingString, status); - - // TODO: C++ and Java are inconsistent (see #12568). - // C++ uses MEDIUM, but Java uses SHORT. - if ( i==DateFormat::kShort && !dfPattern.isEmpty() ) { - consumeShortTimePattern(dfPattern, status); - } - } - // TODO Maybe we should return an error when the date format isn't simple. - delete df; - if (U_FAILURE(status)) { return; } - } -} - -void -DateTimePatternGenerator::hackTimes(const UnicodeString& hackPattern, UErrorCode& status) { - UnicodeString conflictingString; - - fp->set(hackPattern); - UnicodeString mmss; - UBool gotMm=FALSE; - for (int32_t i=0; iitemNumber; ++i) { - UnicodeString field = fp->items[i]; - if ( fp->isQuoteLiteral(field) ) { - if ( gotMm ) { - UnicodeString quoteLiteral; - fp->getQuoteLiteral(quoteLiteral, &i); - mmss += quoteLiteral; - } - } - else { - if (fp->isPatternSeparator(field) && gotMm) { - mmss+=field; - } - else { - UChar ch=field.charAt(0); - if (ch==LOW_M) { - gotMm=TRUE; - mmss+=field; - } - else { - if (ch==LOW_S) { - if (!gotMm) { - break; - } - mmss+= field; - addPattern(mmss, FALSE, conflictingString, status); - break; - } - else { - if (gotMm || ch==LOW_Z || ch==CAP_Z || ch==LOW_V || ch==CAP_V) { - break; - } - } - } - } - } - } -} - -#define ULOC_LOCALE_IDENTIFIER_CAPACITY (ULOC_FULLNAME_CAPACITY + 1 + ULOC_KEYWORD_AND_VALUES_CAPACITY) - -void -DateTimePatternGenerator::getCalendarTypeToUse(const Locale& locale, CharString& destination, UErrorCode& err) { - destination.clear().append(DT_DateTimeGregorianTag, -1, err); // initial default - if ( U_SUCCESS(err) ) { - UErrorCode localStatus = U_ZERO_ERROR; - char localeWithCalendarKey[ULOC_LOCALE_IDENTIFIER_CAPACITY]; - // obtain a locale that always has the calendar key value that should be used - ures_getFunctionalEquivalent( - localeWithCalendarKey, - ULOC_LOCALE_IDENTIFIER_CAPACITY, - nullptr, - "calendar", - "calendar", - locale.getName(), - nullptr, - FALSE, - &localStatus); - localeWithCalendarKey[ULOC_LOCALE_IDENTIFIER_CAPACITY-1] = 0; // ensure null termination - // now get the calendar key value from that locale - char calendarType[ULOC_KEYWORDS_CAPACITY]; - int32_t calendarTypeLen = uloc_getKeywordValue( - localeWithCalendarKey, - "calendar", - calendarType, - ULOC_KEYWORDS_CAPACITY, - &localStatus); - // If the input locale was invalid, don't fail with missing resource error, instead - // continue with default of Gregorian. - if (U_FAILURE(localStatus) && localStatus != U_MISSING_RESOURCE_ERROR) { - err = localStatus; - return; - } - if (calendarTypeLen < ULOC_KEYWORDS_CAPACITY) { - destination.clear().append(calendarType, -1, err); - if (U_FAILURE(err)) { return; } - } - } -} - -void -DateTimePatternGenerator::consumeShortTimePattern(const UnicodeString& shortTimePattern, - UErrorCode& status) { - if (U_FAILURE(status)) { return; } - // ICU-20383 No longer set fDefaultHourFormatChar to the hour format character from - // this pattern; instead it is set from localeToAllowedHourFormatsMap which now - // includes entries for both preferred and allowed formats. - - // HACK for hh:ss - hackTimes(shortTimePattern, status); -} - -struct DateTimePatternGenerator::AppendItemFormatsSink : public ResourceSink { - - // Destination for data, modified via setters. - DateTimePatternGenerator& dtpg; - - AppendItemFormatsSink(DateTimePatternGenerator& _dtpg) : dtpg(_dtpg) {} - virtual ~AppendItemFormatsSink(); - - virtual void put(const char *key, ResourceValue &value, UBool /*noFallback*/, - UErrorCode &errorCode) { - ResourceTable itemsTable = value.getTable(errorCode); - if (U_FAILURE(errorCode)) { return; } - for (int32_t i = 0; itemsTable.getKeyAndValue(i, key, value); ++i) { - UDateTimePatternField field = dtpg.getAppendFormatNumber(key); - if (field == UDATPG_FIELD_COUNT) { continue; } - const UnicodeString& valueStr = value.getUnicodeString(errorCode); - if (dtpg.getAppendItemFormat(field).isEmpty() && !valueStr.isEmpty()) { - dtpg.setAppendItemFormat(field, valueStr); - } - } - } - - void fillInMissing() { - UnicodeString defaultItemFormat(TRUE, UDATPG_ItemFormat, UPRV_LENGTHOF(UDATPG_ItemFormat)-1); // Read-only alias. - for (int32_t i = 0; i < UDATPG_FIELD_COUNT; i++) { - UDateTimePatternField field = (UDateTimePatternField)i; - if (dtpg.getAppendItemFormat(field).isEmpty()) { - dtpg.setAppendItemFormat(field, defaultItemFormat); - } - } - } -}; - -struct DateTimePatternGenerator::AppendItemNamesSink : public ResourceSink { - - // Destination for data, modified via setters. - DateTimePatternGenerator& dtpg; - - AppendItemNamesSink(DateTimePatternGenerator& _dtpg) : dtpg(_dtpg) {} - virtual ~AppendItemNamesSink(); - - virtual void put(const char *key, ResourceValue &value, UBool /*noFallback*/, - UErrorCode &errorCode) { - ResourceTable itemsTable = value.getTable(errorCode); - if (U_FAILURE(errorCode)) { return; } - for (int32_t i = 0; itemsTable.getKeyAndValue(i, key, value); ++i) { - UDateTimePGDisplayWidth width; - UDateTimePatternField field = dtpg.getFieldAndWidthIndices(key, &width); - if (field == UDATPG_FIELD_COUNT) { continue; } - ResourceTable detailsTable = value.getTable(errorCode); - if (U_FAILURE(errorCode)) { return; } - for (int32_t j = 0; detailsTable.getKeyAndValue(j, key, value); ++j) { - if (uprv_strcmp(key, "dn") != 0) { continue; } - const UnicodeString& valueStr = value.getUnicodeString(errorCode); - if (dtpg.getFieldDisplayName(field,width).isEmpty() && !valueStr.isEmpty()) { - dtpg.setFieldDisplayName(field,width,valueStr); - } - break; - } - } - } - - void fillInMissing() { - for (int32_t i = 0; i < UDATPG_FIELD_COUNT; i++) { - UnicodeString& valueStr = dtpg.getMutableFieldDisplayName((UDateTimePatternField)i, UDATPG_WIDE); - if (valueStr.isEmpty()) { - valueStr = CAP_F; - U_ASSERT(i < 20); - if (i < 10) { - // F0, F1, ..., F9 - valueStr += (UChar)(i+0x30); - } else { - // F10, F11, ... - valueStr += (UChar)0x31; - valueStr += (UChar)(i-10 + 0x30); - } - // NUL-terminate for the C API. - valueStr.getTerminatedBuffer(); - } - for (int32_t j = 1; j < UDATPG_WIDTH_COUNT; j++) { - UnicodeString& valueStr2 = dtpg.getMutableFieldDisplayName((UDateTimePatternField)i, (UDateTimePGDisplayWidth)j); - if (valueStr2.isEmpty()) { - valueStr2 = dtpg.getFieldDisplayName((UDateTimePatternField)i, (UDateTimePGDisplayWidth)(j-1)); - } - } - } - } -}; - -struct DateTimePatternGenerator::AvailableFormatsSink : public ResourceSink { - - // Destination for data, modified via setters. - DateTimePatternGenerator& dtpg; - - // Temporary variable, required for calling addPatternWithSkeleton. - UnicodeString conflictingPattern; - - AvailableFormatsSink(DateTimePatternGenerator& _dtpg) : dtpg(_dtpg) {} - virtual ~AvailableFormatsSink(); - - virtual void put(const char *key, ResourceValue &value, UBool isRoot, - UErrorCode &errorCode) { - ResourceTable itemsTable = value.getTable(errorCode); - if (U_FAILURE(errorCode)) { return; } - for (int32_t i = 0; itemsTable.getKeyAndValue(i, key, value); ++i) { - const UnicodeString formatKey(key, -1, US_INV); - if (!dtpg.isAvailableFormatSet(formatKey) ) { - dtpg.setAvailableFormat(formatKey, errorCode); - // Add pattern with its associated skeleton. Override any duplicate - // derived from std patterns, but not a previous availableFormats entry: - const UnicodeString& formatValue = value.getUnicodeString(errorCode); - conflictingPattern.remove(); - dtpg.addPatternWithSkeleton(formatValue, &formatKey, !isRoot, conflictingPattern, errorCode); - } - } - } -}; - -// Virtual destructors must be defined out of line. -DateTimePatternGenerator::AppendItemFormatsSink::~AppendItemFormatsSink() {} -DateTimePatternGenerator::AppendItemNamesSink::~AppendItemNamesSink() {} -DateTimePatternGenerator::AvailableFormatsSink::~AvailableFormatsSink() {} - -void -DateTimePatternGenerator::addCLDRData(const Locale& locale, UErrorCode& errorCode) { - if (U_FAILURE(errorCode)) { return; } - UnicodeString rbPattern, value, field; - CharString path; - - LocalUResourceBundlePointer rb(ures_open(nullptr, locale.getName(), &errorCode)); - if (U_FAILURE(errorCode)) { return; } - - CharString calendarTypeToUse; // to be filled in with the type to use, if all goes well - getCalendarTypeToUse(locale, calendarTypeToUse, errorCode); - if (U_FAILURE(errorCode)) { return; } - - // Local err to ignore resource not found exceptions - UErrorCode err = U_ZERO_ERROR; - - // Load append item formats. - AppendItemFormatsSink appendItemFormatsSink(*this); - path.clear() - .append(DT_DateTimeCalendarTag, errorCode) - .append('/', errorCode) - .append(calendarTypeToUse, errorCode) - .append('/', errorCode) - .append(DT_DateTimeAppendItemsTag, errorCode); // i.e., calendar/xxx/appendItems - if (U_FAILURE(errorCode)) { return; } - ures_getAllItemsWithFallback(rb.getAlias(), path.data(), appendItemFormatsSink, err); - appendItemFormatsSink.fillInMissing(); - - // Load CLDR item names. - err = U_ZERO_ERROR; - AppendItemNamesSink appendItemNamesSink(*this); - ures_getAllItemsWithFallback(rb.getAlias(), DT_DateTimeFieldsTag, appendItemNamesSink, err); - appendItemNamesSink.fillInMissing(); - - // Load the available formats from CLDR. - err = U_ZERO_ERROR; - initHashtable(errorCode); - if (U_FAILURE(errorCode)) { return; } - AvailableFormatsSink availableFormatsSink(*this); - path.clear() - .append(DT_DateTimeCalendarTag, errorCode) - .append('/', errorCode) - .append(calendarTypeToUse, errorCode) - .append('/', errorCode) - .append(DT_DateTimeAvailableFormatsTag, errorCode); // i.e., calendar/xxx/availableFormats - if (U_FAILURE(errorCode)) { return; } - ures_getAllItemsWithFallback(rb.getAlias(), path.data(), availableFormatsSink, err); -} - -void -DateTimePatternGenerator::initHashtable(UErrorCode& err) { - if (U_FAILURE(err)) { return; } - if (fAvailableFormatKeyHash!=nullptr) { - return; - } - LocalPointer hash(new Hashtable(FALSE, err), err); - if (U_SUCCESS(err)) { - fAvailableFormatKeyHash = hash.orphan(); - } -} - -void -DateTimePatternGenerator::setAppendItemFormat(UDateTimePatternField field, const UnicodeString& value) { - appendItemFormats[field] = value; - // NUL-terminate for the C API. - appendItemFormats[field].getTerminatedBuffer(); -} - -const UnicodeString& -DateTimePatternGenerator::getAppendItemFormat(UDateTimePatternField field) const { - return appendItemFormats[field]; -} - -void -DateTimePatternGenerator::setAppendItemName(UDateTimePatternField field, const UnicodeString& value) { - setFieldDisplayName(field, UDATPG_WIDTH_APPENDITEM, value); -} - -const UnicodeString& -DateTimePatternGenerator::getAppendItemName(UDateTimePatternField field) const { - return fieldDisplayNames[field][UDATPG_WIDTH_APPENDITEM]; -} - -void -DateTimePatternGenerator::setFieldDisplayName(UDateTimePatternField field, UDateTimePGDisplayWidth width, const UnicodeString& value) { - fieldDisplayNames[field][width] = value; - // NUL-terminate for the C API. - fieldDisplayNames[field][width].getTerminatedBuffer(); -} - -UnicodeString -DateTimePatternGenerator::getFieldDisplayName(UDateTimePatternField field, UDateTimePGDisplayWidth width) const { - return fieldDisplayNames[field][width]; -} - -UnicodeString& -DateTimePatternGenerator::getMutableFieldDisplayName(UDateTimePatternField field, UDateTimePGDisplayWidth width) { - return fieldDisplayNames[field][width]; -} - -void -DateTimePatternGenerator::getAppendName(UDateTimePatternField field, UnicodeString& value) { - value = SINGLE_QUOTE; - value += fieldDisplayNames[field][UDATPG_WIDTH_APPENDITEM]; - value += SINGLE_QUOTE; -} - -UnicodeString -DateTimePatternGenerator::getBestPattern(const UnicodeString& patternForm, UErrorCode& status) { - return getBestPattern(patternForm, UDATPG_MATCH_NO_OPTIONS, status); -} - -UnicodeString -DateTimePatternGenerator::getBestPattern(const UnicodeString& patternForm, UDateTimePatternMatchOptions options, UErrorCode& status) { - if (U_FAILURE(status)) { - return UnicodeString(); - } - if (U_FAILURE(internalErrorCode)) { - status = internalErrorCode; - return UnicodeString(); - } - const UnicodeString *bestPattern = nullptr; - UnicodeString dtFormat; - UnicodeString resultPattern; - int32_t flags = kDTPGNoFlags; - - int32_t dateMask=(1<set(patternFormMapped, fp); - const PtnSkeleton* specifiedSkeleton = nullptr; - bestPattern=getBestRaw(*dtMatcher, -1, distanceInfo, status, &specifiedSkeleton); - if (U_FAILURE(status)) { - return UnicodeString(); - } - - if ( distanceInfo->missingFieldMask==0 && distanceInfo->extraFieldMask==0 ) { - resultPattern = adjustFieldTypes(*bestPattern, specifiedSkeleton, flags, options); - - return resultPattern; - } - int32_t neededFields = dtMatcher->getFieldMask(); - UnicodeString datePattern=getBestAppending(neededFields & dateMask, flags, status, options); - UnicodeString timePattern=getBestAppending(neededFields & timeMask, flags, status, options); - if (U_FAILURE(status)) { - return UnicodeString(); - } - if (datePattern.length()==0) { - if (timePattern.length()==0) { - resultPattern.remove(); - } - else { - return timePattern; - } - } - if (timePattern.length()==0) { - return datePattern; - } - resultPattern.remove(); - status = U_ZERO_ERROR; - dtFormat=getDateTimeFormat(); - SimpleFormatter(dtFormat, 2, 2, status).format(timePattern, datePattern, resultPattern, status); - return resultPattern; -} - -/* - * Map a skeleton that may have metacharacters jJC to one without, by replacing - * the metacharacters with locale-appropriate fields of h/H/k/K and of a/b/B - * (depends on fDefaultHourFormatChar and fAllowedHourFormats being set, which in - * turn depends on initData having been run). This method also updates the flags - * as necessary. Returns the updated skeleton. - */ -UnicodeString -DateTimePatternGenerator::mapSkeletonMetacharacters(const UnicodeString& patternForm, int32_t* flags, UErrorCode& status) { - UnicodeString patternFormMapped; - patternFormMapped.remove(); - UBool inQuoted = FALSE; - int32_t patPos, patLen = patternForm.length(); - for (patPos = 0; patPos < patLen; patPos++) { - UChar patChr = patternForm.charAt(patPos); - if (patChr == SINGLE_QUOTE) { - inQuoted = !inQuoted; - } else if (!inQuoted) { - // Handle special mappings for 'j' and 'C' in which fields lengths - // 1,3,5 => hour field length 1 - // 2,4,6 => hour field length 2 - // 1,2 => abbreviated dayPeriod (field length 1..3) - // 3,4 => long dayPeriod (field length 4) - // 5,6 => narrow dayPeriod (field length 5) - if (patChr == LOW_J || patChr == CAP_C) { - int32_t extraLen = 0; // 1 less than total field length - while (patPos+1 < patLen && patternForm.charAt(patPos+1)==patChr) { - extraLen++; - patPos++; - } - int32_t hourLen = 1 + (extraLen & 1); - int32_t dayPeriodLen = (extraLen < 2)? 1: 3 + (extraLen >> 1); - UChar hourChar = LOW_H; - UChar dayPeriodChar = LOW_A; - if (patChr == LOW_J) { - hourChar = fDefaultHourFormatChar; - } else { - AllowedHourFormat bestAllowed; - if (fAllowedHourFormats[0] != ALLOWED_HOUR_FORMAT_UNKNOWN) { - bestAllowed = (AllowedHourFormat)fAllowedHourFormats[0]; - } else { - status = U_INVALID_FORMAT_ERROR; - return UnicodeString(); - } - if (bestAllowed == ALLOWED_HOUR_FORMAT_H || bestAllowed == ALLOWED_HOUR_FORMAT_HB || bestAllowed == ALLOWED_HOUR_FORMAT_Hb) { - hourChar = CAP_H; - } else if (bestAllowed == ALLOWED_HOUR_FORMAT_K || bestAllowed == ALLOWED_HOUR_FORMAT_KB || bestAllowed == ALLOWED_HOUR_FORMAT_Kb) { - hourChar = CAP_K; - } else if (bestAllowed == ALLOWED_HOUR_FORMAT_k) { - hourChar = LOW_K; - } - // in #13183 just add b/B to skeleton, no longer need to set special flags - if (bestAllowed == ALLOWED_HOUR_FORMAT_HB || bestAllowed == ALLOWED_HOUR_FORMAT_hB || bestAllowed == ALLOWED_HOUR_FORMAT_KB) { - dayPeriodChar = CAP_B; - } else if (bestAllowed == ALLOWED_HOUR_FORMAT_Hb || bestAllowed == ALLOWED_HOUR_FORMAT_hb || bestAllowed == ALLOWED_HOUR_FORMAT_Kb) { - dayPeriodChar = LOW_B; - } - } - if (hourChar==CAP_H || hourChar==LOW_K) { - dayPeriodLen = 0; - } - while (dayPeriodLen-- > 0) { - patternFormMapped.append(dayPeriodChar); - } - while (hourLen-- > 0) { - patternFormMapped.append(hourChar); - } - } else if (patChr == CAP_J) { - // Get pattern for skeleton with H, then replace H or k - // with fDefaultHourFormatChar (if different) - patternFormMapped.append(CAP_H); - *flags |= kDTPGSkeletonUsesCapJ; - } else { - patternFormMapped.append(patChr); - } - } - } - return patternFormMapped; -} - -UnicodeString -DateTimePatternGenerator::replaceFieldTypes(const UnicodeString& pattern, - const UnicodeString& skeleton, - UErrorCode& status) { - return replaceFieldTypes(pattern, skeleton, UDATPG_MATCH_NO_OPTIONS, status); -} - -UnicodeString -DateTimePatternGenerator::replaceFieldTypes(const UnicodeString& pattern, - const UnicodeString& skeleton, - UDateTimePatternMatchOptions options, - UErrorCode& status) { - if (U_FAILURE(status)) { - return UnicodeString(); - } - if (U_FAILURE(internalErrorCode)) { - status = internalErrorCode; - return UnicodeString(); - } - dtMatcher->set(skeleton, fp); - UnicodeString result = adjustFieldTypes(pattern, nullptr, kDTPGNoFlags, options); - return result; -} - -void -DateTimePatternGenerator::setDecimal(const UnicodeString& newDecimal) { - this->decimal = newDecimal; - // NUL-terminate for the C API. - this->decimal.getTerminatedBuffer(); -} - -const UnicodeString& -DateTimePatternGenerator::getDecimal() const { - return decimal; -} - -void -DateTimePatternGenerator::addCanonicalItems(UErrorCode& status) { - if (U_FAILURE(status)) { return; } - UnicodeString conflictingPattern; - - for (int32_t i=0; i 0) { - addPattern(UnicodeString(Canonical_Items[i]), FALSE, conflictingPattern, status); - } - if (U_FAILURE(status)) { return; } - } -} - -void -DateTimePatternGenerator::setDateTimeFormat(const UnicodeString& dtFormat) { - dateTimeFormat = dtFormat; - // NUL-terminate for the C API. - dateTimeFormat.getTerminatedBuffer(); -} - -const UnicodeString& -DateTimePatternGenerator::getDateTimeFormat() const { - return dateTimeFormat; -} - -void -DateTimePatternGenerator::setDateTimeFromCalendar(const Locale& locale, UErrorCode& status) { - if (U_FAILURE(status)) { return; } - - const UChar *resStr; - int32_t resStrLen = 0; - - LocalPointer fCalendar(Calendar::createInstance(locale, status), status); - if (U_FAILURE(status)) { return; } - - LocalUResourceBundlePointer calData(ures_open(nullptr, locale.getBaseName(), &status)); - if (U_FAILURE(status)) { return; } - ures_getByKey(calData.getAlias(), DT_DateTimeCalendarTag, calData.getAlias(), &status); - if (U_FAILURE(status)) { return; } - - LocalUResourceBundlePointer dateTimePatterns; - if (fCalendar->getType() != nullptr && *fCalendar->getType() != '\0' - && uprv_strcmp(fCalendar->getType(), DT_DateTimeGregorianTag) != 0) { - dateTimePatterns.adoptInstead(ures_getByKeyWithFallback(calData.getAlias(), fCalendar->getType(), - nullptr, &status)); - ures_getByKeyWithFallback(dateTimePatterns.getAlias(), DT_DateTimePatternsTag, - dateTimePatterns.getAlias(), &status); - } - - if (dateTimePatterns.isNull() || status == U_MISSING_RESOURCE_ERROR) { - status = U_ZERO_ERROR; - dateTimePatterns.adoptInstead(ures_getByKeyWithFallback(calData.getAlias(), DT_DateTimeGregorianTag, - dateTimePatterns.orphan(), &status)); - ures_getByKeyWithFallback(dateTimePatterns.getAlias(), DT_DateTimePatternsTag, - dateTimePatterns.getAlias(), &status); - } - if (U_FAILURE(status)) { return; } - - if (ures_getSize(dateTimePatterns.getAlias()) <= DateFormat::kDateTime) - { - status = U_INVALID_FORMAT_ERROR; - return; - } - resStr = ures_getStringByIndex(dateTimePatterns.getAlias(), (int32_t)DateFormat::kDateTime, &resStrLen, &status); - setDateTimeFormat(UnicodeString(TRUE, resStr, resStrLen)); -} - -void -DateTimePatternGenerator::setDecimalSymbols(const Locale& locale, UErrorCode& status) { - DecimalFormatSymbols dfs = DecimalFormatSymbols(locale, status); - if(U_SUCCESS(status)) { - decimal = dfs.getSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol); - // NUL-terminate for the C API. - decimal.getTerminatedBuffer(); - } -} - -UDateTimePatternConflict -DateTimePatternGenerator::addPattern( - const UnicodeString& pattern, - UBool override, - UnicodeString &conflictingPattern, - UErrorCode& status) -{ - if (U_FAILURE(internalErrorCode)) { - status = internalErrorCode; - return UDATPG_NO_CONFLICT; - } - - return addPatternWithSkeleton(pattern, nullptr, override, conflictingPattern, status); -} - -// For DateTimePatternGenerator::addPatternWithSkeleton - -// If skeletonToUse is specified, then an availableFormats entry is being added. In this case: -// 1. We pass that skeleton to matcher.set instead of having it derive a skeleton from the pattern. -// 2. If the new entry's skeleton or basePattern does match an existing entry but that entry also had a skeleton specified -// (i.e. it was also from availableFormats), then the new entry does not override it regardless of the value of the override -// parameter. This prevents later availableFormats entries from a parent locale overriding earlier ones from the actual -// specified locale. However, availableFormats entries *should* override entries with matching skeleton whose skeleton was -// derived (i.e. entries derived from the standard date/time patters for the specified locale). -// 3. When adding the pattern (patternMap->add), we set a new boolean to indicate that the added entry had a -// specified skeleton (which sets a new field in the PtnElem in the PatternMap). -UDateTimePatternConflict -DateTimePatternGenerator::addPatternWithSkeleton( - const UnicodeString& pattern, - const UnicodeString* skeletonToUse, - UBool override, - UnicodeString& conflictingPattern, - UErrorCode& status) -{ - if (U_FAILURE(internalErrorCode)) { - status = internalErrorCode; - return UDATPG_NO_CONFLICT; - } - - UnicodeString basePattern; - PtnSkeleton skeleton; - UDateTimePatternConflict conflictingStatus = UDATPG_NO_CONFLICT; - - DateTimeMatcher matcher; - if ( skeletonToUse == nullptr ) { - matcher.set(pattern, fp, skeleton); - matcher.getBasePattern(basePattern); - } else { - matcher.set(*skeletonToUse, fp, skeleton); // no longer trims skeleton fields to max len 3, per #7930 - matcher.getBasePattern(basePattern); // or perhaps instead: basePattern = *skeletonToUse; - } - // We only care about base conflicts - and replacing the pattern associated with a base - if: - // 1. the conflicting previous base pattern did *not* have an explicit skeleton; in that case the previous - // base + pattern combination was derived from either (a) a canonical item, (b) a standard format, or - // (c) a pattern specified programmatically with a previous call to addPattern (which would only happen - // if we are getting here from a subsequent call to addPattern). - // 2. a skeleton is specified for the current pattern, but override=false; in that case we are checking - // availableFormats items from root, which should not override any previous entry with the same base. - UBool entryHadSpecifiedSkeleton; - const UnicodeString *duplicatePattern = patternMap->getPatternFromBasePattern(basePattern, entryHadSpecifiedSkeleton); - if (duplicatePattern != nullptr && (!entryHadSpecifiedSkeleton || (skeletonToUse != nullptr && !override))) { - conflictingStatus = UDATPG_BASE_CONFLICT; - conflictingPattern = *duplicatePattern; - if (!override) { - return conflictingStatus; - } - } - // The only time we get here with override=true and skeletonToUse!=null is when adding availableFormats - // items from CLDR data. In that case, we don't want an item from a parent locale to replace an item with - // same skeleton from the specified locale, so skip the current item if skeletonWasSpecified is true for - // the previously-specified conflicting item. - const PtnSkeleton* entrySpecifiedSkeleton = nullptr; - duplicatePattern = patternMap->getPatternFromSkeleton(skeleton, &entrySpecifiedSkeleton); - if (duplicatePattern != nullptr ) { - conflictingStatus = UDATPG_CONFLICT; - conflictingPattern = *duplicatePattern; - if (!override || (skeletonToUse != nullptr && entrySpecifiedSkeleton != nullptr)) { - return conflictingStatus; - } - } - patternMap->add(basePattern, skeleton, pattern, skeletonToUse != nullptr, status); - if(U_FAILURE(status)) { - return conflictingStatus; - } - - return UDATPG_NO_CONFLICT; -} - - -UDateTimePatternField -DateTimePatternGenerator::getAppendFormatNumber(const char* field) const { - for (int32_t i=0; i0; --i) { - if (uprv_strcmp(CLDR_FIELD_WIDTH[i], hyphenPtr)==0) { - *widthP=(UDateTimePGDisplayWidth)i; - break; - } - } - *hyphenPtr = 0; // now delete width portion of key - } - for (int32_t i=0; igetPatternFromSkeleton(*trial.getSkeletonPtr(), &specifiedSkeleton); - missingFields->setTo(tempInfo); - if (distance==0) { - break; - } - } - } - - // If the best raw match had a specified skeleton and that skeleton was requested by the caller, - // then return it too. This generally happens when the caller needs to pass that skeleton - // through to adjustFieldTypes so the latter can do a better job. - if (bestPattern && specifiedSkeletonPtr) { - *specifiedSkeletonPtr = specifiedSkeleton; - } - return bestPattern; -} - -UnicodeString -DateTimePatternGenerator::adjustFieldTypes(const UnicodeString& pattern, - const PtnSkeleton* specifiedSkeleton, - int32_t flags, - UDateTimePatternMatchOptions options) { - UnicodeString newPattern; - fp->set(pattern); - for (int32_t i=0; i < fp->itemNumber; i++) { - UnicodeString field = fp->items[i]; - if ( fp->isQuoteLiteral(field) ) { - - UnicodeString quoteLiteral; - fp->getQuoteLiteral(quoteLiteral, &i); - newPattern += quoteLiteral; - } - else { - if (fp->isPatternSeparator(field)) { - newPattern+=field; - continue; - } - int32_t canonicalIndex = fp->getCanonicalIndex(field); - if (canonicalIndex < 0) { - newPattern+=field; - continue; // don't adjust - } - const dtTypeElem *row = &dtTypes[canonicalIndex]; - int32_t typeValue = row->field; - - // handle day periods - with #13183, no longer need special handling here, integrated with normal types - - if ((flags & kDTPGFixFractionalSeconds) != 0 && typeValue == UDATPG_SECOND_FIELD) { - field += decimal; - dtMatcher->skeleton.original.appendFieldTo(UDATPG_FRACTIONAL_SECOND_FIELD, field); - } else if (dtMatcher->skeleton.type[typeValue]!=0) { - // Here: - // - "reqField" is the field from the originally requested skeleton, with length - // "reqFieldLen". - // - "field" is the field from the found pattern. - // - // The adjusted field should consist of characters from the originally requested - // skeleton, except in the case of UDATPG_HOUR_FIELD or UDATPG_MONTH_FIELD or - // UDATPG_WEEKDAY_FIELD or UDATPG_YEAR_FIELD, in which case it should consist - // of characters from the found pattern. - // - // The length of the adjusted field (adjFieldLen) should match that in the originally - // requested skeleton, except that in the following cases the length of the adjusted field - // should match that in the found pattern (i.e. the length of this pattern field should - // not be adjusted): - // 1. typeValue is UDATPG_HOUR_FIELD/MINUTE/SECOND and the corresponding bit in options is - // not set (ticket #7180). Note, we may want to implement a similar change for other - // numeric fields (MM, dd, etc.) so the default behavior is to get locale preference for - // field length, but options bits can be used to override this. - // 2. There is a specified skeleton for the found pattern and one of the following is true: - // a) The length of the field in the skeleton (skelFieldLen) is equal to reqFieldLen. - // b) The pattern field is numeric and the skeleton field is not, or vice versa. - - UChar reqFieldChar = dtMatcher->skeleton.original.getFieldChar(typeValue); - int32_t reqFieldLen = dtMatcher->skeleton.original.getFieldLength(typeValue); - if (reqFieldChar == CAP_E && reqFieldLen < 3) - reqFieldLen = 3; // 1-3 for E are equivalent to 3 for c,e - int32_t adjFieldLen = reqFieldLen; - if ( (typeValue==UDATPG_HOUR_FIELD && (options & UDATPG_MATCH_HOUR_FIELD_LENGTH)==0) || - (typeValue==UDATPG_MINUTE_FIELD && (options & UDATPG_MATCH_MINUTE_FIELD_LENGTH)==0) || - (typeValue==UDATPG_SECOND_FIELD && (options & UDATPG_MATCH_SECOND_FIELD_LENGTH)==0) ) { - adjFieldLen = field.length(); - } else if (specifiedSkeleton) { - int32_t skelFieldLen = specifiedSkeleton->original.getFieldLength(typeValue); - UBool patFieldIsNumeric = (row->type > 0); - UBool skelFieldIsNumeric = (specifiedSkeleton->type[typeValue] > 0); - if (skelFieldLen == reqFieldLen || (patFieldIsNumeric && !skelFieldIsNumeric) || (skelFieldIsNumeric && !patFieldIsNumeric)) { - // don't adjust the field length in the found pattern - adjFieldLen = field.length(); - } - } - UChar c = (typeValue!= UDATPG_HOUR_FIELD - && typeValue!= UDATPG_MONTH_FIELD - && typeValue!= UDATPG_WEEKDAY_FIELD - && (typeValue!= UDATPG_YEAR_FIELD || reqFieldChar==CAP_Y)) - ? reqFieldChar - : field.charAt(0); - if (typeValue == UDATPG_HOUR_FIELD && (flags & kDTPGSkeletonUsesCapJ) != 0) { - c = fDefaultHourFormatChar; - } - field.remove(); - for (int32_t j=adjFieldLen; j>0; --j) { - field += c; - } - } - newPattern+=field; - } - } - return newPattern; -} - -UnicodeString -DateTimePatternGenerator::getBestAppending(int32_t missingFields, int32_t flags, UErrorCode &status, UDateTimePatternMatchOptions options) { - if (U_FAILURE(status)) { - return UnicodeString(); - } - UnicodeString resultPattern, tempPattern; - const UnicodeString* tempPatternPtr; - int32_t lastMissingFieldMask=0; - if (missingFields!=0) { - resultPattern=UnicodeString(); - const PtnSkeleton* specifiedSkeleton=nullptr; - tempPatternPtr = getBestRaw(*dtMatcher, missingFields, distanceInfo, status, &specifiedSkeleton); - if (U_FAILURE(status)) { - return UnicodeString(); - } - tempPattern = *tempPatternPtr; - resultPattern = adjustFieldTypes(tempPattern, specifiedSkeleton, flags, options); - if ( distanceInfo->missingFieldMask==0 ) { - return resultPattern; - } - while (distanceInfo->missingFieldMask!=0) { // precondition: EVERY single field must work! - if ( lastMissingFieldMask == distanceInfo->missingFieldMask ) { - break; // cannot find the proper missing field - } - if (((distanceInfo->missingFieldMask & UDATPG_SECOND_AND_FRACTIONAL_MASK)==UDATPG_FRACTIONAL_MASK) && - ((missingFields & UDATPG_SECOND_AND_FRACTIONAL_MASK) == UDATPG_SECOND_AND_FRACTIONAL_MASK)) { - resultPattern = adjustFieldTypes(resultPattern, specifiedSkeleton, flags | kDTPGFixFractionalSeconds, options); - distanceInfo->missingFieldMask &= ~UDATPG_FRACTIONAL_MASK; - continue; - } - int32_t startingMask = distanceInfo->missingFieldMask; - tempPatternPtr = getBestRaw(*dtMatcher, distanceInfo->missingFieldMask, distanceInfo, status, &specifiedSkeleton); - if (U_FAILURE(status)) { - return UnicodeString(); - } - tempPattern = *tempPatternPtr; - tempPattern = adjustFieldTypes(tempPattern, specifiedSkeleton, flags, options); - int32_t foundMask=startingMask& ~distanceInfo->missingFieldMask; - int32_t topField=getTopBitNumber(foundMask); - - if (appendItemFormats[topField].length() != 0) { - UnicodeString appendName; - getAppendName((UDateTimePatternField)topField, appendName); - const UnicodeString *values[3] = { - &resultPattern, - &tempPattern, - &appendName - }; - SimpleFormatter(appendItemFormats[topField], 2, 3, status). - formatAndReplace(values, 3, resultPattern, nullptr, 0, status); - } - lastMissingFieldMask = distanceInfo->missingFieldMask; - } - } - return resultPattern; -} - -int32_t -DateTimePatternGenerator::getTopBitNumber(int32_t foundMask) const { - if ( foundMask==0 ) { - return 0; - } - int32_t i=0; - while (foundMask!=0) { - foundMask >>=1; - ++i; - } - if (i-1 >UDATPG_ZONE_FIELD) { - return UDATPG_ZONE_FIELD; - } - else - return i-1; -} - -void -DateTimePatternGenerator::setAvailableFormat(const UnicodeString &key, UErrorCode& err) -{ - fAvailableFormatKeyHash->puti(key, 1, err); -} - -UBool -DateTimePatternGenerator::isAvailableFormatSet(const UnicodeString &key) const { - return (UBool)(fAvailableFormatKeyHash->geti(key) == 1); -} - -void -DateTimePatternGenerator::copyHashtable(Hashtable *other, UErrorCode &status) { - if (other == nullptr || U_FAILURE(status)) { - return; - } - if (fAvailableFormatKeyHash != nullptr) { - delete fAvailableFormatKeyHash; - fAvailableFormatKeyHash = nullptr; - } - initHashtable(status); - if(U_FAILURE(status)){ - return; - } - int32_t pos = UHASH_FIRST; - const UHashElement* elem = nullptr; - // walk through the hash table and create a deep clone - while((elem = other->nextElement(pos))!= nullptr){ - const UHashTok otherKeyTok = elem->key; - UnicodeString* otherKey = (UnicodeString*)otherKeyTok.pointer; - fAvailableFormatKeyHash->puti(*otherKey, 1, status); - if(U_FAILURE(status)){ - return; - } - } -} - -StringEnumeration* -DateTimePatternGenerator::getSkeletons(UErrorCode& status) const { - if (U_FAILURE(status)) { - return nullptr; - } - if (U_FAILURE(internalErrorCode)) { - status = internalErrorCode; - return nullptr; - } - LocalPointer skeletonEnumerator( - new DTSkeletonEnumeration(*patternMap, DT_SKELETON, status), status); - - return U_SUCCESS(status) ? skeletonEnumerator.orphan() : nullptr; -} - -const UnicodeString& -DateTimePatternGenerator::getPatternForSkeleton(const UnicodeString& skeleton) const { - PtnElem *curElem; - - if (skeleton.length() ==0) { - return emptyString; - } - curElem = patternMap->getHeader(skeleton.charAt(0)); - while ( curElem != nullptr ) { - if ( curElem->skeleton->getSkeleton()==skeleton ) { - return curElem->pattern; - } - curElem = curElem->next.getAlias(); - } - return emptyString; -} - -StringEnumeration* -DateTimePatternGenerator::getBaseSkeletons(UErrorCode& status) const { - if (U_FAILURE(status)) { - return nullptr; - } - if (U_FAILURE(internalErrorCode)) { - status = internalErrorCode; - return nullptr; - } - LocalPointer baseSkeletonEnumerator( - new DTSkeletonEnumeration(*patternMap, DT_BASESKELETON, status), status); - - return U_SUCCESS(status) ? baseSkeletonEnumerator.orphan() : nullptr; -} - -StringEnumeration* -DateTimePatternGenerator::getRedundants(UErrorCode& status) { - if (U_FAILURE(status)) { return nullptr; } - if (U_FAILURE(internalErrorCode)) { - status = internalErrorCode; - return nullptr; - } - LocalPointer output(new DTRedundantEnumeration(), status); - if (U_FAILURE(status)) { return nullptr; } - const UnicodeString *pattern; - PatternMapIterator it(status); - if (U_FAILURE(status)) { return nullptr; } - - for (it.set(*patternMap); it.hasNext(); ) { - DateTimeMatcher current = it.next(); - pattern = patternMap->getPatternFromSkeleton(*(it.getSkeleton())); - if ( isCanonicalItem(*pattern) ) { - continue; - } - if ( skipMatcher == nullptr ) { - skipMatcher = new DateTimeMatcher(current); - if (skipMatcher == nullptr) { - status = U_MEMORY_ALLOCATION_ERROR; - return nullptr; - } - } - else { - *skipMatcher = current; - } - UnicodeString trial = getBestPattern(current.getPattern(), status); - if (U_FAILURE(status)) { return nullptr; } - if (trial == *pattern) { - ((DTRedundantEnumeration *)output.getAlias())->add(*pattern, status); - if (U_FAILURE(status)) { return nullptr; } - } - if (current.equals(skipMatcher)) { - continue; - } - } - return output.orphan(); -} - -UBool -DateTimePatternGenerator::isCanonicalItem(const UnicodeString& item) const { - if ( item.length() != 1 ) { - return FALSE; - } - for (int32_t i=0; iisDupAllowed = other.isDupAllowed; - for (int32_t bootIndex = 0; bootIndex < MAX_PATTERN_ENTRIES; ++bootIndex) { - PtnElem *curElem, *otherElem, *prevElem=nullptr; - otherElem = other.boot[bootIndex]; - while (otherElem != nullptr) { - LocalPointer newElem(new PtnElem(otherElem->basePattern, otherElem->pattern), status); - if (U_FAILURE(status)) { - return; // out of memory - } - newElem->skeleton.adoptInsteadAndCheckErrorCode(new PtnSkeleton(*(otherElem->skeleton)), status); - if (U_FAILURE(status)) { - return; // out of memory - } - newElem->skeletonWasSpecified = otherElem->skeletonWasSpecified; - - // Release ownership from the LocalPointer of the PtnElem object. - // The PtnElem will now be owned by either the boot (for the first entry in the linked-list) - // or owned by the previous PtnElem object in the linked-list. - curElem = newElem.orphan(); - - if (this->boot[bootIndex] == nullptr) { - this->boot[bootIndex] = curElem; - } else { - if (prevElem != nullptr) { - prevElem->next.adoptInstead(curElem); - } else { - UPRV_UNREACHABLE; - } - } - prevElem = curElem; - otherElem = otherElem->next.getAlias(); - } - - } -} - -PtnElem* -PatternMap::getHeader(UChar baseChar) const { - PtnElem* curElem; - - if ( (baseChar >= CAP_A) && (baseChar <= CAP_Z) ) { - curElem = boot[baseChar-CAP_A]; - } - else { - if ( (baseChar >=LOW_A) && (baseChar <= LOW_Z) ) { - curElem = boot[26+baseChar-LOW_A]; - } - else { - return nullptr; - } - } - return curElem; -} - -PatternMap::~PatternMap() { - for (int32_t i=0; i < MAX_PATTERN_ENTRIES; ++i ) { - if (boot[i] != nullptr ) { - delete boot[i]; - boot[i] = nullptr; - } - } -} // PatternMap destructor - -void -PatternMap::add(const UnicodeString& basePattern, - const PtnSkeleton& skeleton, - const UnicodeString& value,// mapped pattern value - UBool skeletonWasSpecified, - UErrorCode &status) { - UChar baseChar = basePattern.charAt(0); - PtnElem *curElem, *baseElem; - status = U_ZERO_ERROR; - - // the baseChar must be A-Z or a-z - if ((baseChar >= CAP_A) && (baseChar <= CAP_Z)) { - baseElem = boot[baseChar-CAP_A]; - } - else { - if ((baseChar >=LOW_A) && (baseChar <= LOW_Z)) { - baseElem = boot[26+baseChar-LOW_A]; - } - else { - status = U_ILLEGAL_CHARACTER; - return; - } - } - - if (baseElem == nullptr) { - LocalPointer newElem(new PtnElem(basePattern, value), status); - if (U_FAILURE(status)) { - return; // out of memory - } - newElem->skeleton.adoptInsteadAndCheckErrorCode(new PtnSkeleton(skeleton), status); - if (U_FAILURE(status)) { - return; // out of memory - } - newElem->skeletonWasSpecified = skeletonWasSpecified; - if (baseChar >= LOW_A) { - boot[26 + (baseChar - LOW_A)] = newElem.orphan(); // the boot array now owns the PtnElem. - } - else { - boot[baseChar - CAP_A] = newElem.orphan(); // the boot array now owns the PtnElem. - } - } - if ( baseElem != nullptr ) { - curElem = getDuplicateElem(basePattern, skeleton, baseElem); - - if (curElem == nullptr) { - // add new element to the list. - curElem = baseElem; - while( curElem -> next != nullptr ) - { - curElem = curElem->next.getAlias(); - } - - LocalPointer newElem(new PtnElem(basePattern, value), status); - if (U_FAILURE(status)) { - return; // out of memory - } - newElem->skeleton.adoptInsteadAndCheckErrorCode(new PtnSkeleton(skeleton), status); - if (U_FAILURE(status)) { - return; // out of memory - } - newElem->skeletonWasSpecified = skeletonWasSpecified; - curElem->next.adoptInstead(newElem.orphan()); - curElem = curElem->next.getAlias(); - } - else { - // Pattern exists in the list already. - if ( !isDupAllowed ) { - return; - } - // Overwrite the value. - curElem->pattern = value; - // It was a bug that we were not doing the following previously, - // though that bug hid other problems by making things partly work. - curElem->skeletonWasSpecified = skeletonWasSpecified; - } - } -} // PatternMap::add - -// Find the pattern from the given basePattern string. -const UnicodeString * -PatternMap::getPatternFromBasePattern(const UnicodeString& basePattern, UBool& skeletonWasSpecified) const { // key to search for - PtnElem *curElem; - - if ((curElem=getHeader(basePattern.charAt(0)))==nullptr) { - return nullptr; // no match - } - - do { - if ( basePattern.compare(curElem->basePattern)==0 ) { - skeletonWasSpecified = curElem->skeletonWasSpecified; - return &(curElem->pattern); - } - curElem = curElem->next.getAlias(); - } while (curElem != nullptr); - - return nullptr; -} // PatternMap::getFromBasePattern - - -// Find the pattern from the given skeleton. -// At least when this is called from getBestRaw & addPattern (in which case specifiedSkeletonPtr is non-NULL), -// the comparison should be based on skeleton.original (which is unique and tied to the distance measurement in bestRaw) -// and not skeleton.baseOriginal (which is not unique); otherwise we may pick a different skeleton than the one with the -// optimum distance value in getBestRaw. When this is called from public getRedundants (specifiedSkeletonPtr is NULL), -// for now it will continue to compare based on baseOriginal so as not to change the behavior unnecessarily. -const UnicodeString * -PatternMap::getPatternFromSkeleton(const PtnSkeleton& skeleton, const PtnSkeleton** specifiedSkeletonPtr) const { // key to search for - PtnElem *curElem; - - if (specifiedSkeletonPtr) { - *specifiedSkeletonPtr = nullptr; - } - - // find boot entry - UChar baseChar = skeleton.getFirstChar(); - if ((curElem=getHeader(baseChar))==nullptr) { - return nullptr; // no match - } - - do { - UBool equal; - if (specifiedSkeletonPtr != nullptr) { // called from DateTimePatternGenerator::getBestRaw or addPattern, use original - equal = curElem->skeleton->original == skeleton.original; - } else { // called from DateTimePatternGenerator::getRedundants, use baseOriginal - equal = curElem->skeleton->baseOriginal == skeleton.baseOriginal; - } - if (equal) { - if (specifiedSkeletonPtr && curElem->skeletonWasSpecified) { - *specifiedSkeletonPtr = curElem->skeleton.getAlias(); - } - return &(curElem->pattern); - } - curElem = curElem->next.getAlias(); - } while (curElem != nullptr); - - return nullptr; -} - -UBool -PatternMap::equals(const PatternMap& other) const { - if ( this==&other ) { - return TRUE; - } - for (int32_t bootIndex = 0; bootIndex < MAX_PATTERN_ENTRIES; ++bootIndex) { - if (boot[bootIndex] == other.boot[bootIndex]) { - continue; - } - if ((boot[bootIndex] == nullptr) || (other.boot[bootIndex] == nullptr)) { - return FALSE; - } - PtnElem *otherElem = other.boot[bootIndex]; - PtnElem *myElem = boot[bootIndex]; - while ((otherElem != nullptr) || (myElem != nullptr)) { - if ( myElem == otherElem ) { - break; - } - if ((otherElem == nullptr) || (myElem == nullptr)) { - return FALSE; - } - if ( (myElem->basePattern != otherElem->basePattern) || - (myElem->pattern != otherElem->pattern) ) { - return FALSE; - } - if ((myElem->skeleton.getAlias() != otherElem->skeleton.getAlias()) && - !myElem->skeleton->equals(*(otherElem->skeleton))) { - return FALSE; - } - myElem = myElem->next.getAlias(); - otherElem = otherElem->next.getAlias(); - } - } - return TRUE; -} - -// find any key existing in the mapping table already. -// return TRUE if there is an existing key, otherwise return FALSE. -PtnElem* -PatternMap::getDuplicateElem( - const UnicodeString &basePattern, - const PtnSkeleton &skeleton, - PtnElem *baseElem) { - PtnElem *curElem; - - if ( baseElem == nullptr ) { - return nullptr; - } - else { - curElem = baseElem; - } - do { - if ( basePattern.compare(curElem->basePattern)==0 ) { - UBool isEqual = TRUE; - for (int32_t i = 0; i < UDATPG_FIELD_COUNT; ++i) { - if (curElem->skeleton->type[i] != skeleton.type[i] ) { - isEqual = FALSE; - break; - } - } - if (isEqual) { - return curElem; - } - } - curElem = curElem->next.getAlias(); - } while( curElem != nullptr ); - - // end of the list - return nullptr; - -} // PatternMap::getDuplicateElem - -DateTimeMatcher::DateTimeMatcher(void) { -} - -DateTimeMatcher::~DateTimeMatcher() {} - -DateTimeMatcher::DateTimeMatcher(const DateTimeMatcher& other) { - copyFrom(other.skeleton); -} - - -void -DateTimeMatcher::set(const UnicodeString& pattern, FormatParser* fp) { - PtnSkeleton localSkeleton; - return set(pattern, fp, localSkeleton); -} - -void -DateTimeMatcher::set(const UnicodeString& pattern, FormatParser* fp, PtnSkeleton& skeletonResult) { - int32_t i; - for (i=0; iset(pattern); - for (i=0; i < fp->itemNumber; i++) { - const UnicodeString& value = fp->items[i]; - // don't skip 'a' anymore, dayPeriod handled specially below - - if ( fp->isQuoteLiteral(value) ) { - UnicodeString quoteLiteral; - fp->getQuoteLiteral(quoteLiteral, &i); - continue; - } - int32_t canonicalIndex = fp->getCanonicalIndex(value); - if (canonicalIndex < 0) { - continue; - } - const dtTypeElem *row = &dtTypes[canonicalIndex]; - int32_t field = row->field; - skeletonResult.original.populate(field, value); - UChar repeatChar = row->patternChar; - int32_t repeatCount = row->minLen; - skeletonResult.baseOriginal.populate(field, repeatChar, repeatCount); - int16_t subField = row->type; - if (row->type > 0) { - U_ASSERT(value.length() < INT16_MAX); - subField += static_cast(value.length()); - } - skeletonResult.type[field] = subField; - } - // #13183, handle special behavior for day period characters (a, b, B) - if (!skeletonResult.original.isFieldEmpty(UDATPG_HOUR_FIELD)) { - if (skeletonResult.original.getFieldChar(UDATPG_HOUR_FIELD)==LOW_H || skeletonResult.original.getFieldChar(UDATPG_HOUR_FIELD)==CAP_K) { - // We have a skeleton with 12-hour-cycle format - if (skeletonResult.original.isFieldEmpty(UDATPG_DAYPERIOD_FIELD)) { - // But we do not have a day period in the skeleton; add the default DAYPERIOD (currently "a") - for (i = 0; dtTypes[i].patternChar != 0; i++) { - if ( dtTypes[i].field == UDATPG_DAYPERIOD_FIELD ) { - // first entry for UDATPG_DAYPERIOD_FIELD - skeletonResult.original.populate(UDATPG_DAYPERIOD_FIELD, dtTypes[i].patternChar, dtTypes[i].minLen); - skeletonResult.baseOriginal.populate(UDATPG_DAYPERIOD_FIELD, dtTypes[i].patternChar, dtTypes[i].minLen); - skeletonResult.type[UDATPG_DAYPERIOD_FIELD] = dtTypes[i].type; - skeletonResult.addedDefaultDayPeriod = TRUE; - break; - } - } - } - } else { - // Skeleton has 24-hour-cycle hour format and has dayPeriod, delete dayPeriod (i.e. ignore it) - skeletonResult.original.clearField(UDATPG_DAYPERIOD_FIELD); - skeletonResult.baseOriginal.clearField(UDATPG_DAYPERIOD_FIELD); - skeletonResult.type[UDATPG_DAYPERIOD_FIELD] = NONE; - } - } - copyFrom(skeletonResult); -} - -void -DateTimeMatcher::getBasePattern(UnicodeString &result ) { - result.remove(); // Reset the result first. - skeleton.baseOriginal.appendTo(result); -} - -UnicodeString -DateTimeMatcher::getPattern() { - UnicodeString result; - return skeleton.original.appendTo(result); -} - -int32_t -DateTimeMatcher::getDistance(const DateTimeMatcher& other, int32_t includeMask, DistanceInfo& distanceInfo) const { - int32_t result = 0; - distanceInfo.clear(); - for (int32_t i=0; iskeleton.original; -} - -int32_t -DateTimeMatcher::getFieldMask() const { - int32_t result = 0; - - for (int32_t i=0; i= pattern.length()) { - return DONE; - } - // check the current char is between A-Z or a-z - do { - UChar c=pattern.charAt(curLoc); - if ( (c>=CAP_A && c<=CAP_Z) || (c>=LOW_A && c<=LOW_Z) ) { - curLoc++; - } - else { - startPos = curLoc; - *len=1; - return ADD_TOKEN; - } - - if ( pattern.charAt(curLoc)!= pattern.charAt(startPos) ) { - break; // not the same token - } - } while(curLoc <= pattern.length()); - *len = curLoc-startPos; - return ADD_TOKEN; -} - -void -FormatParser::set(const UnicodeString& pattern) { - int32_t startPos = 0; - TokenStatus result = START; - int32_t len = 0; - itemNumber = 0; - - do { - result = setTokens( pattern, startPos, &len ); - if ( result == ADD_TOKEN ) - { - items[itemNumber++] = UnicodeString(pattern, startPos, len ); - startPos += len; - } - else { - break; - } - } while (result==ADD_TOKEN && itemNumber < MAX_DT_TOKEN); -} - -int32_t -FormatParser::getCanonicalIndex(const UnicodeString& s, UBool strict) { - int32_t len = s.length(); - if (len == 0) { - return -1; - } - UChar ch = s.charAt(0); - - // Verify that all are the same character. - for (int32_t l = 1; l < len; l++) { - if (ch != s.charAt(l)) { - return -1; - } - } - int32_t i = 0; - int32_t bestRow = -1; - while (dtTypes[i].patternChar != 0x0000) { - if ( dtTypes[i].patternChar != ch ) { - ++i; - continue; - } - bestRow = i; - if (dtTypes[i].patternChar != dtTypes[i+1].patternChar) { - return i; - } - if (dtTypes[i+1].minLen <= len) { - ++i; - continue; - } - return i; - } - return strict ? -1 : bestRow; -} - -UBool -FormatParser::isQuoteLiteral(const UnicodeString& s) { - return (UBool)(s.charAt(0) == SINGLE_QUOTE); -} - -// This function assumes the current itemIndex points to the quote literal. -// Please call isQuoteLiteral prior to this function. -void -FormatParser::getQuoteLiteral(UnicodeString& quote, int32_t *itemIndex) { - int32_t i = *itemIndex; - - quote.remove(); - if (items[i].charAt(0)==SINGLE_QUOTE) { - quote += items[i]; - ++i; - } - while ( i < itemNumber ) { - if ( items[i].charAt(0)==SINGLE_QUOTE ) { - if ( (i+1patternMap=&newPatternMap; -} - -PtnSkeleton* -PatternMapIterator::getSkeleton() const { - if ( nodePtr == nullptr ) { - return nullptr; - } - else { - return nodePtr->skeleton.getAlias(); - } -} - -UBool -PatternMapIterator::hasNext() const { - int32_t headIndex = bootIndex; - PtnElem *curPtr = nodePtr; - - if (patternMap==nullptr) { - return FALSE; - } - while ( headIndex < MAX_PATTERN_ENTRIES ) { - if ( curPtr != nullptr ) { - if ( curPtr->next != nullptr ) { - return TRUE; - } - else { - headIndex++; - curPtr=nullptr; - continue; - } - } - else { - if ( patternMap->boot[headIndex] != nullptr ) { - return TRUE; - } - else { - headIndex++; - continue; - } - } - } - return FALSE; -} - -DateTimeMatcher& -PatternMapIterator::next() { - while ( bootIndex < MAX_PATTERN_ENTRIES ) { - if ( nodePtr != nullptr ) { - if ( nodePtr->next != nullptr ) { - nodePtr = nodePtr->next.getAlias(); - break; - } - else { - bootIndex++; - nodePtr=nullptr; - continue; - } - } - else { - if ( patternMap->boot[bootIndex] != nullptr ) { - nodePtr = patternMap->boot[bootIndex]; - break; - } - else { - bootIndex++; - continue; - } - } - } - if (nodePtr!=nullptr) { - matcher->copyFrom(*nodePtr->skeleton); - } - else { - matcher->copyFrom(); - } - return *matcher; -} - - -SkeletonFields::SkeletonFields() { - // Set initial values to zero - clear(); -} - -void SkeletonFields::clear() { - uprv_memset(chars, 0, sizeof(chars)); - uprv_memset(lengths, 0, sizeof(lengths)); -} - -void SkeletonFields::copyFrom(const SkeletonFields& other) { - uprv_memcpy(chars, other.chars, sizeof(chars)); - uprv_memcpy(lengths, other.lengths, sizeof(lengths)); -} - -void SkeletonFields::clearField(int32_t field) { - chars[field] = 0; - lengths[field] = 0; -} - -UChar SkeletonFields::getFieldChar(int32_t field) const { - return chars[field]; -} - -int32_t SkeletonFields::getFieldLength(int32_t field) const { - return lengths[field]; -} - -void SkeletonFields::populate(int32_t field, const UnicodeString& value) { - populate(field, value.charAt(0), value.length()); -} - -void SkeletonFields::populate(int32_t field, UChar ch, int32_t length) { - chars[field] = (int8_t) ch; - lengths[field] = (int8_t) length; -} - -UBool SkeletonFields::isFieldEmpty(int32_t field) const { - return lengths[field] == 0; -} - -UnicodeString& SkeletonFields::appendTo(UnicodeString& string) const { - for (int32_t i = 0; i < UDATPG_FIELD_COUNT; ++i) { - appendFieldTo(i, string); - } - return string; -} - -UnicodeString& SkeletonFields::appendFieldTo(int32_t field, UnicodeString& string) const { - UChar ch(chars[field]); - int32_t length = (int32_t) lengths[field]; - - for (int32_t i=0; i= 0) { - // for backward compatibility: if DateTimeMatcher.set added a single 'a' that - // was not in the provided skeleton, remove it here before returning skeleton. - result.remove(pos, 1); - } - return result; -} - -UnicodeString -PtnSkeleton::getBaseSkeleton() const { - UnicodeString result; - result = baseOriginal.appendTo(result); - int32_t pos; - if (addedDefaultDayPeriod && (pos = result.indexOf(LOW_A)) >= 0) { - // for backward compatibility: if DateTimeMatcher.set added a single 'a' that - // was not in the provided skeleton, remove it here before returning skeleton. - result.remove(pos, 1); - } - return result; -} - -UChar -PtnSkeleton::getFirstChar() const { - return baseOriginal.getFirstChar(); -} - -PtnSkeleton::~PtnSkeleton() { -} - -PtnElem::PtnElem(const UnicodeString &basePat, const UnicodeString &pat) : - basePattern(basePat), skeleton(nullptr), pattern(pat), next(nullptr) -{ -} - -PtnElem::~PtnElem() { -} - -DTSkeletonEnumeration::DTSkeletonEnumeration(PatternMap& patternMap, dtStrEnum type, UErrorCode& status) : fSkeletons(nullptr) { - PtnElem *curElem; - PtnSkeleton *curSkeleton; - UnicodeString s; - int32_t bootIndex; - - pos=0; - fSkeletons.adoptInsteadAndCheckErrorCode(new UVector(status), status); - if (U_FAILURE(status)) { - return; - } - - for (bootIndex=0; bootIndexbasePattern; - break; - case DT_PATTERN: - s=curElem->pattern; - break; - case DT_SKELETON: - curSkeleton=curElem->skeleton.getAlias(); - s=curSkeleton->getSkeleton(); - break; - } - if ( !isCanonicalItem(s) ) { - LocalPointer newElem(new UnicodeString(s), status); - if (U_FAILURE(status)) { - return; - } - fSkeletons->addElement(newElem.getAlias(), status); - if (U_FAILURE(status)) { - fSkeletons.adoptInstead(nullptr); - return; - } - newElem.orphan(); // fSkeletons vector now owns the UnicodeString. - } - curElem = curElem->next.getAlias(); - } - } - if ((bootIndex==MAX_PATTERN_ENTRIES) && (curElem!=nullptr) ) { - status = U_BUFFER_OVERFLOW_ERROR; - } -} - -const UnicodeString* -DTSkeletonEnumeration::snext(UErrorCode& status) { - if (U_SUCCESS(status) && fSkeletons.isValid() && pos < fSkeletons->size()) { - return (const UnicodeString*)fSkeletons->elementAt(pos++); - } - return nullptr; -} - -void -DTSkeletonEnumeration::reset(UErrorCode& /*status*/) { - pos=0; -} - -int32_t -DTSkeletonEnumeration::count(UErrorCode& /*status*/) const { - return (fSkeletons.isNull()) ? 0 : fSkeletons->size(); -} - -UBool -DTSkeletonEnumeration::isCanonicalItem(const UnicodeString& item) { - if ( item.length() != 1 ) { - return FALSE; - } - for (int32_t i=0; isize(); ++i) { - if ((s = (UnicodeString *)fSkeletons->elementAt(i)) != nullptr) { - delete s; - } - } - } -} - -DTRedundantEnumeration::DTRedundantEnumeration() : pos(0), fPatterns(nullptr) { -} - -void -DTRedundantEnumeration::add(const UnicodeString& pattern, UErrorCode& status) { - if (U_FAILURE(status)) { return; } - if (fPatterns.isNull()) { - fPatterns.adoptInsteadAndCheckErrorCode(new UVector(status), status); - if (U_FAILURE(status)) { - return; - } - } - LocalPointer newElem(new UnicodeString(pattern), status); - if (U_FAILURE(status)) { - return; - } - fPatterns->addElement(newElem.getAlias(), status); - if (U_FAILURE(status)) { - fPatterns.adoptInstead(nullptr); - return; - } - newElem.orphan(); // fPatterns now owns the string. -} - -const UnicodeString* -DTRedundantEnumeration::snext(UErrorCode& status) { - if (U_SUCCESS(status) && fPatterns.isValid() && pos < fPatterns->size()) { - return (const UnicodeString*)fPatterns->elementAt(pos++); - } - return nullptr; -} - -void -DTRedundantEnumeration::reset(UErrorCode& /*status*/) { - pos=0; -} - -int32_t -DTRedundantEnumeration::count(UErrorCode& /*status*/) const { - return (fPatterns.isNull()) ? 0 : fPatterns->size(); -} - -UBool -DTRedundantEnumeration::isCanonicalItem(const UnicodeString& item) const { - if ( item.length() != 1 ) { - return FALSE; - } - for (int32_t i=0; isize(); ++i) { - if ((s = (UnicodeString *)fPatterns->elementAt(i)) != nullptr) { - delete s; - } - } - } -} - -U_NAMESPACE_END - - -#endif /* #if !UCONFIG_NO_FORMATTING */ - -//eof