Import Tcl 8.6.10

This commit is contained in:
Steve Dower
2020-09-24 22:53:56 +01:00
parent 0343d03b22
commit 3bb8e3e086
1005 changed files with 593700 additions and 41637 deletions

View File

@@ -69,11 +69,11 @@ static const unsigned char totalBytes[256] = {
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
#if TCL_UTF_MAX > 3
4,4,4,4,4,4,4,4,
4,4,4,4,4,
#else
1,1,1,1,1,1,1,1,
3,3,3,3,3, /* Tcl_UtfCharComplete() only checks TCL_UTF_MAX bytes */
#endif
1,1,1,1,1,1,1,1
1,1,1,1,1,1,1,1,1,1,1
};
/*
@@ -154,20 +154,26 @@ Tcl_UniCharToUtf(
return 2;
}
if (ch <= 0xFFFF) {
#if TCL_UTF_MAX == 4
#if TCL_UTF_MAX > 3
if ((ch & 0xF800) == 0xD800) {
if (ch & 0x0400) {
/* Low surrogate */
buf[3] = (char) ((ch | 0x80) & 0xBF);
buf[2] |= (char) (((ch >> 6) | 0x80) & 0x8F);
return 4;
if (((buf[0] & 0xC0) == 0x80) && ((buf[1] & 0xCF) == 0)) {
/* Previous Tcl_UniChar was a high surrogate, so combine */
buf[2] = (char) ((ch & 0x3F) | 0x80);
buf[1] |= (char) (((ch >> 6) & 0x0F) | 0x80);
return 3;
}
/* Previous Tcl_UniChar was not a high surrogate, so just output */
} else {
/* High surrogate */
ch += 0x40;
buf[2] = (char) (((ch << 4) | 0x80) & 0xB0);
buf[1] = (char) (((ch >> 2) | 0x80) & 0xBF);
buf[0] = (char) (((ch >> 8) | 0xF0) & 0xF7);
return 0;
/* Fill buffer with specific 3-byte (invalid) byte combination,
so following low surrogate can recognize it and combine */
buf[2] = (char) ((ch << 4) & 0x30);
buf[1] = (char) (((ch >> 2) & 0x3F) | 0x80);
buf[0] = (char) (((ch >> 8) & 0x07) | 0xF0);
return 1;
}
}
#endif
@@ -182,6 +188,16 @@ Tcl_UniCharToUtf(
buf[0] = (char) ((ch >> 18) | 0xF0);
return 4;
}
} else if (ch == -1) {
if (((buf[0] & 0xC0) == 0x80) && ((buf[1] & 0xCF) == 0)
&& ((buf[-1] & 0xF8) == 0xF0)) {
ch = 0xD7C0 + ((buf[-1] & 0x07) << 8) + ((buf[0] & 0x3F) << 2)
+ ((buf[1] & 0x30) >> 4);
buf[1] = (char) ((ch | 0x80) & 0xBF);
buf[0] = (char) (((ch >> 6) | 0x80) & 0xBF);
buf[-1] = (char) ((ch >> 12) | 0xE0);
return 2;
}
#endif
}
@@ -229,7 +245,7 @@ Tcl_UniCharToUtfDString(
*/
oldLength = Tcl_DStringLength(dsPtr);
Tcl_DStringSetLength(dsPtr, (oldLength + uniLength + 1) * TCL_UTF_MAX);
Tcl_DStringSetLength(dsPtr, oldLength + (uniLength + 1) * TCL_UTF_MAX);
string = Tcl_DStringValue(dsPtr) + oldLength;
p = string;
@@ -259,7 +275,7 @@ Tcl_UniCharToUtfDString(
* Tcl_UtfCharComplete() before calling this routine to ensure that
* enough bytes remain in the string.
*
* If TCL_UTF_MAX == 4, special handling of Surrogate pairs is done:
* If TCL_UTF_MAX <= 4, special handling of Surrogate pairs is done:
* For any UTF-8 string containing a character outside of the BMP, the
* first call to this function will fill *chPtr with the high surrogate
* and generate a return value of 0. Calling Tcl_UtfToUniChar again
@@ -284,7 +300,7 @@ Tcl_UtfToUniChar(
register Tcl_UniChar *chPtr)/* Filled with the Tcl_UniChar represented by
* the UTF-8 string. */
{
register int byte;
Tcl_UniChar byte;
/*
* Unroll 1 to 3 (or 4) byte UTF-8 sequences.
@@ -298,7 +314,21 @@ Tcl_UtfToUniChar(
* characters representing themselves.
*/
*chPtr = (Tcl_UniChar) byte;
#if TCL_UTF_MAX <= 4
/* If *chPtr contains a high surrogate (produced by a previous
* Tcl_UtfToUniChar() call) and the next 3 bytes are UTF-8 continuation
* bytes, then we must produce a follow-up low surrogate. We only
* do that if the high surrogate matches the bits we encounter.
*/
if ((byte >= 0x80)
&& (((((byte - 0x10) << 2) & 0xFC) | 0xD800) == (*chPtr & 0xFCFC))
&& ((src[1] & 0xF0) == (((*chPtr << 4) & 0x30) | 0x80))
&& ((src[2] & 0xC0) == 0x80)) {
*chPtr = ((src[1] & 0x0F) << 6) + (src[2] & 0x3F) + 0xDC00;
return 3;
}
#endif
*chPtr = byte;
return 1;
} else if (byte < 0xE0) {
if ((src[1] & 0xC0) == 0x80) {
@@ -306,7 +336,7 @@ Tcl_UtfToUniChar(
* Two-byte-character lead-byte followed by a trail-byte.
*/
*chPtr = (Tcl_UniChar) (((byte & 0x1F) << 6) | (src[1] & 0x3F));
*chPtr = (((byte & 0x1F) << 6) | (src[1] & 0x3F));
if ((unsigned)(*chPtr - 1) >= (UNICODE_SELF - 1)) {
return 2;
}
@@ -322,7 +352,7 @@ Tcl_UtfToUniChar(
* Three-byte-character lead byte followed by two trail bytes.
*/
*chPtr = (Tcl_UniChar) (((byte & 0x0F) << 12)
*chPtr = (((byte & 0x0F) << 12)
| ((src[1] & 0x3F) << 6) | (src[2] & 0x3F));
if (*chPtr > 0x7FF) {
return 3;
@@ -334,31 +364,23 @@ Tcl_UtfToUniChar(
* represents itself.
*/
}
#if TCL_UTF_MAX > 3
else if (byte < 0xF8) {
if (((src[1] & 0xC0) == 0x80) && ((src[2] & 0xC0) == 0x80) && ((src[3] & 0xC0) == 0x80)) {
/*
* Four-byte-character lead byte followed by three trail bytes.
*/
#if TCL_UTF_MAX == 4
Tcl_UniChar surrogate;
byte = (((byte & 0x07) << 18) | ((src[1] & 0x3F) << 12)
| ((src[2] & 0x3F) << 6) | (src[3] & 0x3F)) - 0x10000;
surrogate = (Tcl_UniChar) (0xD800 + (byte >> 10));
if (byte & 0x100000) {
#if TCL_UTF_MAX <= 4
Tcl_UniChar high = (((byte & 0x07) << 8) | ((src[1] & 0x3F) << 2)
| ((src[2] & 0x3F) >> 4)) - 0x40;
if (high >= 0x400) {
/* out of range, < 0x10000 or > 0x10ffff */
} else if (*chPtr != surrogate) {
/* produce high surrogate, but don't advance source pointer */
*chPtr = surrogate;
return 0;
} else {
/* produce low surrogate, and advance source pointer */
*chPtr = (Tcl_UniChar) (0xDC00 | (byte & 0x3FF));
return 4;
/* produce high surrogate, advance source pointer */
*chPtr = 0xD800 + high;
return 1;
}
#else
*chPtr = (Tcl_UniChar) (((byte & 0x07) << 18) | ((src[1] & 0x3F) << 12)
*chPtr = (((byte & 0x07) << 18) | ((src[1] & 0x3F) << 12)
| ((src[2] & 0x3F) << 6) | (src[3] & 0x3F));
if ((unsigned)(*chPtr - 0x10000) <= 0xFFFFF) {
return 4;
@@ -367,13 +389,12 @@ Tcl_UtfToUniChar(
}
/*
* A four-byte-character lead-byte not followed by two trail-bytes
* A four-byte-character lead-byte not followed by three trail-bytes
* represents itself.
*/
}
#endif
*chPtr = (Tcl_UniChar) byte;
*chPtr = byte;
return 1;
}
@@ -418,17 +439,27 @@ Tcl_UtfToUniCharDString(
*/
oldLength = Tcl_DStringLength(dsPtr);
/* TODO: fix overreach! */
Tcl_DStringSetLength(dsPtr,
(int) ((oldLength + length + 1) * sizeof(Tcl_UniChar)));
oldLength + (int) ((length + 1) * sizeof(Tcl_UniChar)));
wString = (Tcl_UniChar *) (Tcl_DStringValue(dsPtr) + oldLength);
w = wString;
end = src + length;
for (p = src; p < end; ) {
p = src;
end = src + length - TCL_UTF_MAX;
while (p < end) {
p += TclUtfToUniChar(p, &ch);
*w++ = ch;
}
end += TCL_UTF_MAX;
while (p < end) {
if (Tcl_UtfCharComplete(p, end-p)) {
p += TclUtfToUniChar(p, &ch);
} else {
ch = UCHAR(*p++);
}
*w++ = ch;
}
*w = '\0';
Tcl_DStringSetLength(dsPtr,
(oldLength + ((char *) w - (char *) wString)));
@@ -553,9 +584,9 @@ Tcl_UtfFindFirst(
while (1) {
len = TclUtfToUniChar(src, &find);
fullchar = find;
#if TCL_UTF_MAX == 4
if (!len) {
len += TclUtfToUniChar(src, &find);
#if TCL_UTF_MAX <= 4
if ((fullchar != ch) && (find >= 0xD800) && (len < 3)) {
len += TclUtfToUniChar(src + len, &find);
fullchar = (((fullchar & 0x3ff) << 10) | (find & 0x3ff)) + 0x10000;
}
#endif
@@ -601,9 +632,9 @@ Tcl_UtfFindLast(
while (1) {
len = TclUtfToUniChar(src, &find);
fullchar = find;
#if TCL_UTF_MAX == 4
if (!len) {
len += TclUtfToUniChar(src, &find);
#if TCL_UTF_MAX <= 4
if ((fullchar != ch) && (find >= 0xD800) && (len < 3)) {
len += TclUtfToUniChar(src + len, &find);
fullchar = (((fullchar & 0x3ff) << 10) | (find & 0x3ff)) + 0x10000;
}
#endif
@@ -644,9 +675,9 @@ Tcl_UtfNext(
Tcl_UniChar ch = 0;
int len = TclUtfToUniChar(src, &ch);
#if TCL_UTF_MAX == 4
if (len == 0) {
len = TclUtfToUniChar(src, &ch);
#if TCL_UTF_MAX <= 4
if ((ch >= 0xD800) && (len < 3)) {
len += TclUtfToUniChar(src + len, &ch);
}
#endif
return src + len;
@@ -683,7 +714,7 @@ Tcl_UtfPrev(
int i, byte;
look = --src;
for (i = 0; i < TCL_UTF_MAX; i++) {
for (i = 0; i < 4; i++) {
if (look < start) {
if (src < start) {
src = start;
@@ -726,8 +757,7 @@ Tcl_UniCharAtIndex(
{
Tcl_UniChar ch = 0;
while (index >= 0) {
index--;
while (index-- >= 0) {
src += TclUtfToUniChar(src, &ch);
}
return ch;
@@ -756,11 +786,18 @@ Tcl_UtfAtIndex(
register int index) /* The position of the desired character. */
{
Tcl_UniChar ch = 0;
int len = 0;
while (index > 0) {
index--;
while (index-- > 0) {
len = TclUtfToUniChar(src, &ch);
src += len;
}
#if TCL_UTF_MAX == 4
if ((ch >= 0xD800) && (len < 3)) {
/* Index points at character following high Surrogate */
src += TclUtfToUniChar(src, &ch);
}
#endif
return src;
}
@@ -841,7 +878,7 @@ Tcl_UtfToUpper(
{
Tcl_UniChar ch = 0, upChar;
char *src, *dst;
int bytes;
int len;
/*
* Iterate over the string until we hit the terminating null.
@@ -849,7 +886,7 @@ Tcl_UtfToUpper(
src = dst = str;
while (*src) {
bytes = TclUtfToUniChar(src, &ch);
len = TclUtfToUniChar(src, &ch);
upChar = Tcl_UniCharToUpper(ch);
/*
@@ -858,13 +895,13 @@ Tcl_UtfToUpper(
* char to dst if its size is <= the original char.
*/
if (bytes < UtfCount(upChar)) {
memcpy(dst, src, (size_t) bytes);
dst += bytes;
if (len < UtfCount(upChar)) {
memmove(dst, src, len);
dst += len;
} else {
dst += Tcl_UniCharToUtf(upChar, dst);
}
src += bytes;
src += len;
}
*dst = '\0';
return (dst - str);
@@ -894,7 +931,7 @@ Tcl_UtfToLower(
{
Tcl_UniChar ch = 0, lowChar;
char *src, *dst;
int bytes;
int len;
/*
* Iterate over the string until we hit the terminating null.
@@ -902,7 +939,7 @@ Tcl_UtfToLower(
src = dst = str;
while (*src) {
bytes = TclUtfToUniChar(src, &ch);
len = TclUtfToUniChar(src, &ch);
lowChar = Tcl_UniCharToLower(ch);
/*
@@ -911,13 +948,13 @@ Tcl_UtfToLower(
* char to dst if its size is <= the original char.
*/
if (bytes < UtfCount(lowChar)) {
memcpy(dst, src, (size_t) bytes);
dst += bytes;
if (len < UtfCount(lowChar)) {
memmove(dst, src, len);
dst += len;
} else {
dst += Tcl_UniCharToUtf(lowChar, dst);
}
src += bytes;
src += len;
}
*dst = '\0';
return (dst - str);
@@ -948,7 +985,7 @@ Tcl_UtfToTitle(
{
Tcl_UniChar ch = 0, titleChar, lowChar;
char *src, *dst;
int bytes;
int len;
/*
* Capitalize the first character and then lowercase the rest of the
@@ -958,28 +995,32 @@ Tcl_UtfToTitle(
src = dst = str;
if (*src) {
bytes = TclUtfToUniChar(src, &ch);
len = TclUtfToUniChar(src, &ch);
titleChar = Tcl_UniCharToTitle(ch);
if (bytes < UtfCount(titleChar)) {
memcpy(dst, src, (size_t) bytes);
dst += bytes;
if (len < UtfCount(titleChar)) {
memmove(dst, src, len);
dst += len;
} else {
dst += Tcl_UniCharToUtf(titleChar, dst);
}
src += bytes;
src += len;
}
while (*src) {
bytes = TclUtfToUniChar(src, &ch);
lowChar = Tcl_UniCharToLower(ch);
len = TclUtfToUniChar(src, &ch);
lowChar = ch;
/* Special exception for Georgian Asomtavruli chars, no titlecase. */
if ((unsigned)(lowChar - 0x1C90) >= 0x30) {
lowChar = Tcl_UniCharToLower(lowChar);
}
if (bytes < UtfCount(lowChar)) {
memcpy(dst, src, (size_t) bytes);
dst += bytes;
if (len < UtfCount(lowChar)) {
memmove(dst, src, len);
dst += len;
} else {
dst += Tcl_UniCharToUtf(lowChar, dst);
}
src += bytes;
src += len;
}
*dst = '\0';
return (dst - str);
@@ -1072,16 +1113,17 @@ Tcl_UtfNcmp(
cs += TclUtfToUniChar(cs, &ch1);
ct += TclUtfToUniChar(ct, &ch2);
#if TCL_UTF_MAX == 4
/* map high surrogate characters to values > 0xffff */
if ((ch1 & 0xFC00) == 0xD800) {
ch1 += 0x4000;
}
if ((ch2 & 0xFC00) == 0xD800) {
ch2 += 0x4000;
}
#endif
if (ch1 != ch2) {
#if TCL_UTF_MAX == 4
/* Surrogates always report higher than non-surrogates */
if (((ch1 & 0xFC00) == 0xD800)) {
if ((ch2 & 0xFC00) != 0xD800) {
return ch1;
}
} else if ((ch2 & 0xFC00) == 0xD800) {
return -ch2;
}
#endif
return (ch1 - ch2);
}
}
@@ -1122,16 +1164,17 @@ Tcl_UtfNcasecmp(
*/
cs += TclUtfToUniChar(cs, &ch1);
ct += TclUtfToUniChar(ct, &ch2);
#if TCL_UTF_MAX == 4
/* map high surrogate characters to values > 0xffff */
if ((ch1 & 0xFC00) == 0xD800) {
ch1 += 0x4000;
}
if ((ch2 & 0xFC00) == 0xD800) {
ch2 += 0x4000;
}
#endif
if (ch1 != ch2) {
#if TCL_UTF_MAX == 4
/* Surrogates always report higher than non-surrogates */
if (((ch1 & 0xFC00) == 0xD800)) {
if ((ch2 & 0xFC00) != 0xD800) {
return ch1;
}
} else if ((ch2 & 0xFC00) == 0xD800) {
return -ch2;
}
#endif
ch1 = Tcl_UniCharToLower(ch1);
ch2 = Tcl_UniCharToLower(ch2);
if (ch1 != ch2) {
@@ -1170,16 +1213,17 @@ TclUtfCasecmp(
while (*cs && *ct) {
cs += TclUtfToUniChar(cs, &ch1);
ct += TclUtfToUniChar(ct, &ch2);
#if TCL_UTF_MAX == 4
/* map high surrogate characters to values > 0xffff */
if ((ch1 & 0xFC00) == 0xD800) {
ch1 += 0x4000;
}
if ((ch2 & 0xFC00) == 0xD800) {
ch2 += 0x4000;
}
#endif
if (ch1 != ch2) {
#if TCL_UTF_MAX == 4
/* Surrogates always report higher than non-surrogates */
if (((ch1 & 0xFC00) == 0xD800)) {
if ((ch2 & 0xFC00) != 0xD800) {
return ch1;
}
} else if ((ch2 & 0xFC00) == 0xD800) {
return -ch2;
}
#endif
ch1 = Tcl_UniCharToLower(ch1);
ch2 = Tcl_UniCharToLower(ch2);
if (ch1 != ch2) {
@@ -1240,8 +1284,9 @@ Tcl_UniCharToLower(
int ch) /* Unicode character to convert. */
{
int info = GetUniCharInfo(ch);
int mode = GetCaseType(info);
if (GetCaseType(info) & 0x02) {
if ((mode & 0x02) && (mode != 0x7)) {
ch += GetDelta(info);
}
return (Tcl_UniChar) ch;
@@ -1275,7 +1320,9 @@ Tcl_UniCharToTitle(
* Subtract or add one depending on the original case.
*/
ch += ((mode & 0x4) ? -1 : 1);
if (mode != 0x7) {
ch += ((mode & 0x4) ? -1 : 1);
}
} else if (mode == 0x4) {
ch -= GetDelta(info);
}