3rdparty: Update simpleini to 4.25

Signed-off-by: SternXD <stern@sidestore.io>
This commit is contained in:
SternXD 2025-11-26 15:25:52 -05:00 committed by Ty
parent 3d2ecafb01
commit 775f381685
4 changed files with 577 additions and 500 deletions

View File

@ -1,6 +1,6 @@
The MIT License (MIT) The MIT License (MIT)
Copyright (c) 2006-2022 Brodie Thiesfield Copyright (c) 2006-2024 Brodie Thiesfield
Permission is hereby granted, free of charge, to any person obtaining a copy of Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in this software and associated documentation files (the "Software"), to deal in

View File

@ -1,4 +1,6 @@
/* /*
* https://web.archive.org/web/20090529064329/http://www.unicode.org:80/Public/PROGRAMS/CVTUTF/
*
* Copyright 2001-2004 Unicode, Inc. * Copyright 2001-2004 Unicode, Inc.
* *
* Disclaimer * Disclaimer
@ -24,7 +26,7 @@
Conversions between UTF32, UTF-16, and UTF-8. Header file. Conversions between UTF32, UTF-16, and UTF-8. Header file.
Several functions are included here, forming a complete set of Several funtions are included here, forming a complete set of
conversions between the three formats. UTF-7 is not included conversions between the three formats. UTF-7 is not included
here, but is handled in a separate source file. here, but is handled in a separate source file.
@ -87,7 +89,7 @@
bit mask & shift operations. bit mask & shift operations.
------------------------------------------------------------------------ */ ------------------------------------------------------------------------ */
typedef unsigned int UTF32; /* at least 32 bits */ typedef unsigned long UTF32; /* at least 32 bits */
typedef unsigned short UTF16; /* at least 16 bits */ typedef unsigned short UTF16; /* at least 16 bits */
typedef unsigned char UTF8; /* typically 8 bits */ typedef unsigned char UTF8; /* typically 8 bits */
typedef unsigned char Boolean; /* 0 or 1 */ typedef unsigned char Boolean; /* 0 or 1 */
@ -102,7 +104,7 @@ typedef unsigned char Boolean; /* 0 or 1 */
typedef enum { typedef enum {
conversionOK, /* conversion successful */ conversionOK, /* conversion successful */
sourceExhausted, /* partial character in source, but hit end */ sourceExhausted, /* partial character in source, but hit end */
targetExhausted, /* insufficient room in target for conversion */ targetExhausted, /* insuff. room in target for conversion */
sourceIllegal /* source sequence is illegal/malformed */ sourceIllegal /* source sequence is illegal/malformed */
} ConversionResult; } ConversionResult;

View File

@ -5,7 +5,7 @@
<tr><th>File <td>SimpleIni.h <tr><th>File <td>SimpleIni.h
<tr><th>Author <td>Brodie Thiesfield <tr><th>Author <td>Brodie Thiesfield
<tr><th>Source <td>https://github.com/brofield/simpleini <tr><th>Source <td>https://github.com/brofield/simpleini
<tr><th>Version <td>4.22 <tr><th>Version <td>4.25
</table> </table>
Jump to the @link CSimpleIniTempl CSimpleIni @endlink interface documentation. Jump to the @link CSimpleIniTempl CSimpleIni @endlink interface documentation.
@ -53,7 +53,7 @@
-# If you will only be using straight utf8 files and access the data via the -# If you will only be using straight utf8 files and access the data via the
char interface, then you do not need any conversion library and could define char interface, then you do not need any conversion library and could define
SI_NO_CONVERSION. Note that no conversion also means no validation of the data. SI_NO_CONVERSION. Note that no conversion also means no validation of the data.
If no converter is specified then the default converter is SI_CONVERT_GENERIC If no converter is specified then the default converter is SI_NO_CONVERSION
on Mac/Linux and SI_CONVERT_WIN32 on Windows. If you need widechar support on on Mac/Linux and SI_CONVERT_WIN32 on Windows. If you need widechar support on
Mac/Linux then use either SI_CONVERT_GENERIC or SI_CONVERT_ICU. These are also Mac/Linux then use either SI_CONVERT_GENERIC or SI_CONVERT_ICU. These are also
supported on all platforms. supported on all platforms.
@ -161,6 +161,9 @@
@section notes NOTES @section notes NOTES
- The maximum supported file size is 1 GiB (SI_MAX_FILE_SIZE). Files larger
than this will be rejected with SI_FILE error to prevent excessive memory
allocation and potential denial of service attacks.
- To load UTF-8 data on Windows 95, you need to use Microsoft Layer for - To load UTF-8 data on Windows 95, you need to use Microsoft Layer for
Unicode, or SI_CONVERT_GENERIC, or SI_CONVERT_ICU. Unicode, or SI_CONVERT_GENERIC, or SI_CONVERT_ICU.
- When using SI_CONVERT_GENERIC, ConvertUTF.c must be compiled and linked. - When using SI_CONVERT_GENERIC, ConvertUTF.c must be compiled and linked.
@ -261,6 +264,10 @@ constexpr int SI_FAIL = -1; //!< Generic failure
constexpr int SI_NOMEM = -2; //!< Out of memory error constexpr int SI_NOMEM = -2; //!< Out of memory error
constexpr int SI_FILE = -3; //!< File error (see errno for detail error) constexpr int SI_FILE = -3; //!< File error (see errno for detail error)
//! Maximum supported file size (1 GiB). Files larger than this will be rejected
//! to prevent excessive memory allocation and potential denial of service.
constexpr size_t SI_MAX_FILE_SIZE = 1024ULL * 1024ULL * 1024ULL;
#define SI_UTF8_SIGNATURE "\xEF\xBB\xBF" #define SI_UTF8_SIGNATURE "\xEF\xBB\xBF"
#ifdef _WIN32 #ifdef _WIN32
@ -357,7 +364,7 @@ public:
if (lhs.nOrder != rhs.nOrder) { if (lhs.nOrder != rhs.nOrder) {
return lhs.nOrder < rhs.nOrder; return lhs.nOrder < rhs.nOrder;
} }
return KeyOrder()(lhs.pItem, rhs.pItem); return KeyOrder()(lhs, rhs);
} }
}; };
}; };
@ -1463,8 +1470,13 @@ CSimpleIniTempl<SI_CHAR,SI_STRLESS,SI_CONVERTER>::LoadFile(
return SI_OK; return SI_OK;
} }
// check file size is within supported limits (SI_MAX_FILE_SIZE)
if (static_cast<size_t>(lSize) > SI_MAX_FILE_SIZE) {
return SI_FILE;
}
// allocate and ensure NULL terminated // allocate and ensure NULL terminated
char * pData = new(std::nothrow) char[lSize+static_cast<size_t>(1)]; char * pData = new(std::nothrow) char[static_cast<size_t>(lSize) + 1];
if (!pData) { if (!pData) {
return SI_NOMEM; return SI_NOMEM;
} }
@ -1516,6 +1528,11 @@ CSimpleIniTempl<SI_CHAR,SI_STRLESS,SI_CONVERTER>::LoadData(
return SI_FAIL; return SI_FAIL;
} }
// check converted data size is within supported limits (SI_MAX_FILE_SIZE)
if (uLen >= (SI_MAX_FILE_SIZE / sizeof(SI_CHAR))) {
return SI_FILE;
}
// allocate memory for the data, ensure that there is a NULL // allocate memory for the data, ensure that there is a NULL
// terminator wherever the converted data ends // terminator wherever the converted data ends
SI_CHAR * pData = new(std::nothrow) SI_CHAR[uLen + 1]; SI_CHAR * pData = new(std::nothrow) SI_CHAR[uLen + 1];
@ -1800,6 +1817,7 @@ CSimpleIniTempl<SI_CHAR,SI_STRLESS,SI_CONVERTER>::IsMultiLineData(
} }
// embedded newlines // embedded newlines
const SI_CHAR * pStart = a_pData;
while (*a_pData) { while (*a_pData) {
if (IsNewLineChar(*a_pData)) { if (IsNewLineChar(*a_pData)) {
return true; return true;
@ -1807,8 +1825,8 @@ CSimpleIniTempl<SI_CHAR,SI_STRLESS,SI_CONVERTER>::IsMultiLineData(
++a_pData; ++a_pData;
} }
// check for suffix // check for suffix (ensure we don't go before start of string)
if (IsSpace(*--a_pData)) { if (a_pData > pStart && IsSpace(*(a_pData - 1))) {
return true; return true;
} }
@ -1835,6 +1853,7 @@ CSimpleIniTempl<SI_CHAR, SI_STRLESS, SI_CONVERTER>::IsSingleLineQuotedValue(
} }
// embedded newlines // embedded newlines
const SI_CHAR * pStart = a_pData;
while (*a_pData) { while (*a_pData) {
if (IsNewLineChar(*a_pData)) { if (IsNewLineChar(*a_pData)) {
return false; return false;
@ -1842,8 +1861,8 @@ CSimpleIniTempl<SI_CHAR, SI_STRLESS, SI_CONVERTER>::IsSingleLineQuotedValue(
++a_pData; ++a_pData;
} }
// check for suffix // check for suffix (ensure we don't go before start of string)
if (IsSpace(*--a_pData)) { if (a_pData > pStart && IsSpace(*(a_pData - 1))) {
return true; return true;
} }
@ -2088,7 +2107,8 @@ CSimpleIniTempl<SI_CHAR,SI_STRLESS,SI_CONVERTER>::AddEntry(
if (pComment) { if (pComment) {
DeleteString(a_pComment); DeleteString(a_pComment);
a_pComment = pComment; a_pComment = pComment;
CopyString(a_pComment); rc = CopyString(a_pComment);
if (rc < 0) return rc;
} }
Delete(a_pSection, a_pKey); Delete(a_pSection, a_pKey);
iKey = keyval.end(); iKey = keyval.end();
@ -2257,11 +2277,12 @@ CSimpleIniTempl<SI_CHAR,SI_STRLESS,SI_CONVERTER>::GetDoubleValue(
return a_nDefault; return a_nDefault;
} }
char * pszSuffix = NULL; char * pszSuffix = szValue;
double nValue = strtod(szValue, &pszSuffix); double nValue = strtod(szValue, &pszSuffix);
// any invalid strings will return the default value // any invalid strings will return the default value
if (!pszSuffix || *pszSuffix) { // check if no conversion was performed or if there are trailing characters
if (pszSuffix == szValue || *pszSuffix) {
return a_nDefault; return a_nDefault;
} }
@ -2421,7 +2442,7 @@ CSimpleIniTempl<SI_CHAR,SI_STRLESS,SI_CONVERTER>::GetSectionSize(
int nCount = 0; int nCount = 0;
const SI_CHAR * pLastKey = NULL; const SI_CHAR * pLastKey = NULL;
typename TKeyVal::const_iterator iKeyVal = section.begin(); typename TKeyVal::const_iterator iKeyVal = section.begin();
for (int n = 0; iKeyVal != section.end(); ++iKeyVal, ++n) { for (; iKeyVal != section.end(); ++iKeyVal) {
if (!pLastKey || IsLess(pLastKey, iKeyVal->first.pItem)) { if (!pLastKey || IsLess(pLastKey, iKeyVal->first.pItem)) {
++nCount; ++nCount;
pLastKey = iKeyVal->first.pItem; pLastKey = iKeyVal->first.pItem;
@ -2464,7 +2485,7 @@ CSimpleIniTempl<SI_CHAR,SI_STRLESS,SI_CONVERTER>::GetAllSections(
{ {
a_names.clear(); a_names.clear();
typename TSection::const_iterator i = m_data.begin(); typename TSection::const_iterator i = m_data.begin();
for (int n = 0; i != m_data.end(); ++i, ++n ) { for (; i != m_data.end(); ++i) {
a_names.push_back(i->first); a_names.push_back(i->first);
} }
} }
@ -2490,7 +2511,7 @@ CSimpleIniTempl<SI_CHAR,SI_STRLESS,SI_CONVERTER>::GetAllKeys(
const TKeyVal & section = iSection->second; const TKeyVal & section = iSection->second;
const SI_CHAR * pLastKey = NULL; const SI_CHAR * pLastKey = NULL;
typename TKeyVal::const_iterator iKeyVal = section.begin(); typename TKeyVal::const_iterator iKeyVal = section.begin();
for (int n = 0; iKeyVal != section.end(); ++iKeyVal, ++n ) { for (; iKeyVal != section.end(); ++iKeyVal) {
if (!pLastKey || IsLess(pLastKey, iKeyVal->first.pItem)) { if (!pLastKey || IsLess(pLastKey, iKeyVal->first.pItem)) {
a_names.push_back(iKeyVal->first); a_names.push_back(iKeyVal->first);
pLastKey = iKeyVal->first.pItem; pLastKey = iKeyVal->first.pItem;
@ -2828,7 +2849,7 @@ CSimpleIniTempl<SI_CHAR,SI_STRLESS,SI_CONVERTER>::DeleteString(
// strings may exist either inside the data block, or they will be // strings may exist either inside the data block, or they will be
// individually allocated and stored in m_strings. We only physically // individually allocated and stored in m_strings. We only physically
// delete those stored in m_strings. // delete those stored in m_strings.
if (a_pString < m_pData || a_pString >= m_pData + m_uDataLen) { if (!m_pData || a_pString < m_pData || a_pString >= m_pData + m_uDataLen) {
typename TNamesDepend::iterator i = m_strings.begin(); typename TNamesDepend::iterator i = m_strings.begin();
for (;i != m_strings.end(); ++i) { for (;i != m_strings.end(); ++i) {
if (a_pString == i->pItem) { if (a_pString == i->pItem) {
@ -2850,17 +2871,19 @@ CSimpleIniTempl<SI_CHAR,SI_STRLESS,SI_CONVERTER>::DeleteString(
// //
// SI_NO_CONVERSION Do not make the "W" wide character version of the // SI_NO_CONVERSION Do not make the "W" wide character version of the
// library available. Only CSimpleIniA etc is defined. // library available. Only CSimpleIniA etc is defined.
// Default on Linux/MacOS/etc.
// SI_CONVERT_WIN32 Use the Win32 API functions for conversion.
// Default on Windows.
// SI_CONVERT_GENERIC Use the Unicode reference conversion library in // SI_CONVERT_GENERIC Use the Unicode reference conversion library in
// the accompanying files ConvertUTF.h/c // the accompanying files ConvertUTF.h/c
// SI_CONVERT_ICU Use the IBM ICU conversion library. Requires // SI_CONVERT_ICU Use the IBM ICU conversion library. Requires
// ICU headers on include path and icuuc.lib // ICU headers on include path and icuuc.lib
// SI_CONVERT_WIN32 Use the Win32 API functions for conversion.
#if !defined(SI_NO_CONVERSION) && !defined(SI_CONVERT_GENERIC) && !defined(SI_CONVERT_WIN32) && !defined(SI_CONVERT_ICU) #if !defined(SI_NO_CONVERSION) && !defined(SI_CONVERT_GENERIC) && !defined(SI_CONVERT_WIN32) && !defined(SI_CONVERT_ICU)
# ifdef _WIN32 # ifdef _WIN32
# define SI_CONVERT_WIN32 # define SI_CONVERT_WIN32
# else # else
# define SI_CONVERT_GENERIC # define SI_NO_CONVERSION
# endif # endif
#endif #endif
@ -3079,14 +3102,18 @@ public:
return a_uInputDataLen; return a_uInputDataLen;
} }
#if defined(SI_NO_MBSTOWCS_NULL) || (!defined(_MSC_VER) && !defined(_linux)) // get the required buffer size
#if defined(_MSC_VER)
size_t uBufSiz;
errno_t e = mbstowcs_s(&uBufSiz, NULL, 0, a_pInputData, a_uInputDataLen);
return (e == 0) ? uBufSiz : (size_t) -1;
#elif !defined(SI_NO_MBSTOWCS_NULL)
return mbstowcs(NULL, a_pInputData, a_uInputDataLen);
#else
// fall back processing for platforms that don't support a NULL dest to mbstowcs // fall back processing for platforms that don't support a NULL dest to mbstowcs
// worst case scenario is 1:1, this will be a sufficient buffer size // worst case scenario is 1:1, this will be a sufficient buffer size
(void)a_pInputData; (void)a_pInputData;
return a_uInputDataLen; return a_uInputDataLen;
#else
// get the actual required buffer size
return mbstowcs(NULL, a_pInputData, a_uInputDataLen);
#endif #endif
} }
@ -3135,9 +3162,18 @@ public:
} }
// convert to wchar_t // convert to wchar_t
#if defined(_MSC_VER)
size_t uBufSiz;
errno_t e = mbstowcs_s(&uBufSiz,
a_pOutputData, a_uOutputDataSize,
a_pInputData, a_uInputDataLen);
(void)uBufSiz;
return (e == 0);
#else
size_t retval = mbstowcs(a_pOutputData, size_t retval = mbstowcs(a_pOutputData,
a_pInputData, a_uOutputDataSize); a_pInputData, a_uOutputDataSize);
return retval != (size_t)(-1); return retval != (size_t)(-1);
#endif
} }
/** Calculate the number of char required by the storage format of this /** Calculate the number of char required by the storage format of this

View File

@ -1,4 +1,6 @@
/* /*
* https://web.archive.org/web/20090529064329/http://www.unicode.org:80/Public/PROGRAMS/CVTUTF/
*
* Copyright 2001-2004 Unicode, Inc. * Copyright 2001-2004 Unicode, Inc.
* *
* Disclaimer * Disclaimer
@ -77,19 +79,24 @@ ConversionResult ConvertUTF32toUTF16 (
--source; /* return to the illegal value itself */ --source; /* return to the illegal value itself */
result = sourceIllegal; result = sourceIllegal;
break; break;
} else { }
else {
*target++ = UNI_REPLACEMENT_CHAR; *target++ = UNI_REPLACEMENT_CHAR;
} }
} else { }
else {
*target++ = (UTF16)ch; /* normal case */ *target++ = (UTF16)ch; /* normal case */
} }
} else if (ch > UNI_MAX_LEGAL_UTF32) { }
else if (ch > UNI_MAX_LEGAL_UTF32) {
if (flags == strictConversion) { if (flags == strictConversion) {
result = sourceIllegal; result = sourceIllegal;
} else { }
else {
*target++ = UNI_REPLACEMENT_CHAR; *target++ = UNI_REPLACEMENT_CHAR;
} }
} else { }
else {
/* target is a character in range 0xFFFF - 0x10FFFF. */ /* target is a character in range 0xFFFF - 0x10FFFF. */
if (target + 1 >= targetEnd) { if (target + 1 >= targetEnd) {
--source; /* Back up source pointer! */ --source; /* Back up source pointer! */
@ -127,17 +134,20 @@ ConversionResult ConvertUTF16toUTF32 (
ch = ((ch - UNI_SUR_HIGH_START) << halfShift) ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
+ (ch2 - UNI_SUR_LOW_START) + halfBase; + (ch2 - UNI_SUR_LOW_START) + halfBase;
++source; ++source;
} else if (flags == strictConversion) { /* it's an unpaired high surrogate */ }
else if (flags == strictConversion) { /* it's an unpaired high surrogate */
--source; /* return to the illegal value itself */ --source; /* return to the illegal value itself */
result = sourceIllegal; result = sourceIllegal;
break; break;
} }
} else { /* We don't have the 16 bits following the high surrogate. */ }
else { /* We don't have the 16 bits following the high surrogate. */
--source; /* return to the high surrogate */ --source; /* return to the high surrogate */
result = sourceExhausted; result = sourceExhausted;
break; break;
} }
} else if (flags == strictConversion) { }
else if (flags == strictConversion) {
/* UTF-16 surrogate values are illegal in UTF-32 */ /* UTF-16 surrogate values are illegal in UTF-32 */
if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) { if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) {
--source; /* return to the illegal value itself */ --source; /* return to the illegal value itself */
@ -194,7 +204,7 @@ static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080
* Once the bits are split out into bytes of UTF-8, this is a mask OR-ed * Once the bits are split out into bytes of UTF-8, this is a mask OR-ed
* into the first byte, depending on how many bytes follow. There are * into the first byte, depending on how many bytes follow. There are
* as many entries in this table as there are UTF-8 sequence types. * as many entries in this table as there are UTF-8 sequence types.
* (I.e., one byte sequence, two byte... etc.). Remember that sequences * (I.e., one byte sequence, two byte... etc.). Remember that sequencs
* for *legal* UTF-8 will be 4 or fewer bytes total. * for *legal* UTF-8 will be 4 or fewer bytes total.
*/ */
static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
@ -234,17 +244,20 @@ ConversionResult ConvertUTF16toUTF8 (
ch = ((ch - UNI_SUR_HIGH_START) << halfShift) ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
+ (ch2 - UNI_SUR_LOW_START) + halfBase; + (ch2 - UNI_SUR_LOW_START) + halfBase;
++source; ++source;
} else if (flags == strictConversion) { /* it's an unpaired high surrogate */ }
else if (flags == strictConversion) { /* it's an unpaired high surrogate */
--source; /* return to the illegal value itself */ --source; /* return to the illegal value itself */
result = sourceIllegal; result = sourceIllegal;
break; break;
} }
} else { /* We don't have the 16 bits following the high surrogate. */ }
else { /* We don't have the 16 bits following the high surrogate. */
--source; /* return to the high surrogate */ --source; /* return to the high surrogate */
result = sourceExhausted; result = sourceExhausted;
break; break;
} }
} else if (flags == strictConversion) { }
else if (flags == strictConversion) {
/* UTF-16 surrogate values are illegal in UTF-32 */ /* UTF-16 surrogate values are illegal in UTF-32 */
if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) { if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) {
--source; /* return to the illegal value itself */ --source; /* return to the illegal value itself */
@ -253,11 +266,20 @@ ConversionResult ConvertUTF16toUTF8 (
} }
} }
/* Figure out how many bytes the result will require */ /* Figure out how many bytes the result will require */
if (ch < (UTF32)0x80) { bytesToWrite = 1; if (ch < (UTF32)0x80) {
} else if (ch < (UTF32)0x800) { bytesToWrite = 2; bytesToWrite = 1;
} else if (ch < (UTF32)0x10000) { bytesToWrite = 3; }
} else if (ch < (UTF32)0x110000) { bytesToWrite = 4; else if (ch < (UTF32)0x800) {
} else { bytesToWrite = 3; bytesToWrite = 2;
}
else if (ch < (UTF32)0x10000) {
bytesToWrite = 3;
}
else if (ch < (UTF32)0x110000) {
bytesToWrite = 4;
}
else {
bytesToWrite = 3;
ch = UNI_REPLACEMENT_CHAR; ch = UNI_REPLACEMENT_CHAR;
} }
@ -374,21 +396,26 @@ ConversionResult ConvertUTF8toUTF16 (
source -= (extraBytesToRead + 1); /* return to the illegal value itself */ source -= (extraBytesToRead + 1); /* return to the illegal value itself */
result = sourceIllegal; result = sourceIllegal;
break; break;
} else { }
else {
*target++ = UNI_REPLACEMENT_CHAR; *target++ = UNI_REPLACEMENT_CHAR;
} }
} else { }
else {
*target++ = (UTF16)ch; /* normal case */ *target++ = (UTF16)ch; /* normal case */
} }
} else if (ch > UNI_MAX_UTF16) { }
else if (ch > UNI_MAX_UTF16) {
if (flags == strictConversion) { if (flags == strictConversion) {
result = sourceIllegal; result = sourceIllegal;
source -= (extraBytesToRead + 1); /* return to the start */ source -= (extraBytesToRead + 1); /* return to the start */
break; /* Bail out; shouldn't continue */ break; /* Bail out; shouldn't continue */
} else { }
else {
*target++ = UNI_REPLACEMENT_CHAR; *target++ = UNI_REPLACEMENT_CHAR;
} }
} else { }
else {
/* target is a character in range 0xFFFF - 0x10FFFF. */ /* target is a character in range 0xFFFF - 0x10FFFF. */
if (target + 1 >= targetEnd) { if (target + 1 >= targetEnd) {
source -= (extraBytesToRead + 1); /* Back up source pointer! */ source -= (extraBytesToRead + 1); /* Back up source pointer! */
@ -430,11 +457,20 @@ ConversionResult ConvertUTF32toUTF8 (
* Figure out how many bytes the result will require. Turn any * Figure out how many bytes the result will require. Turn any
* illegally large UTF32 things (> Plane 17) into replacement chars. * illegally large UTF32 things (> Plane 17) into replacement chars.
*/ */
if (ch < (UTF32)0x80) { bytesToWrite = 1; if (ch < (UTF32)0x80) {
} else if (ch < (UTF32)0x800) { bytesToWrite = 2; bytesToWrite = 1;
} else if (ch < (UTF32)0x10000) { bytesToWrite = 3; }
} else if (ch <= UNI_MAX_LEGAL_UTF32) { bytesToWrite = 4; else if (ch < (UTF32)0x800) {
} else { bytesToWrite = 3; bytesToWrite = 2;
}
else if (ch < (UTF32)0x10000) {
bytesToWrite = 3;
}
else if (ch <= UNI_MAX_LEGAL_UTF32) {
bytesToWrite = 4;
}
else {
bytesToWrite = 3;
ch = UNI_REPLACEMENT_CHAR; ch = UNI_REPLACEMENT_CHAR;
result = sourceIllegal; result = sourceIllegal;
} }
@ -503,13 +539,16 @@ ConversionResult ConvertUTF8toUTF32 (
source -= (extraBytesToRead + 1); /* return to the illegal value itself */ source -= (extraBytesToRead + 1); /* return to the illegal value itself */
result = sourceIllegal; result = sourceIllegal;
break; break;
} else { }
else {
*target++ = UNI_REPLACEMENT_CHAR; *target++ = UNI_REPLACEMENT_CHAR;
} }
} else { }
else {
*target++ = ch; *target++ = ch;
} }
} else { /* i.e., ch > UNI_MAX_LEGAL_UTF32 */ }
else { /* i.e., ch > UNI_MAX_LEGAL_UTF32 */
result = sourceIllegal; result = sourceIllegal;
*target++ = UNI_REPLACEMENT_CHAR; *target++ = UNI_REPLACEMENT_CHAR;
} }