Import Tcl 8.6.12

This commit is contained in:
Steve Dower
2021-11-08 17:30:58 +00:00
parent 1aadb2455c
commit 674867e7e6
608 changed files with 78089 additions and 60360 deletions

View File

@@ -90,6 +90,8 @@ static const unsigned char complete[256] = {
#if TCL_UTF_MAX > 3
4,4,4,4,4,
#else
/* Tcl_UtfToUniChar() accesses src[1] and src[2] to check whether
* the UTF-8 sequence is valid, so we cannot use 1 here. */
3,3,3,3,3,
#endif
1,1,1,1,1,1,1,1,1,1,1
@@ -536,7 +538,7 @@ Tcl_UtfToUniCharDString(
w = wString;
p = src;
endPtr = src + length;
optPtr = endPtr - TCL_UTF_MAX;
optPtr = endPtr - ((TCL_UTF_MAX > 3) ? 4 : 3) ;
while (p <= optPtr) {
p += TclUtfToUniChar(p, &ch);
*w++ = ch;
@@ -623,7 +625,7 @@ Tcl_NumUtfChars(
/* Pointer to the end of string. Never read endPtr[0] */
const char *endPtr = src + length;
/* Pointer to last byte where optimization still can be used */
const char *optPtr = endPtr - TCL_UTF_MAX;
const char *optPtr = endPtr - ((TCL_UTF_MAX > 3) ? 4 : 3);
/*
* Optimize away the call in this loop. Justified because...
@@ -759,6 +761,19 @@ Tcl_UtfNext(
int left;
const char *next;
#if TCL_UTF_MAX > 3
if (((*src) & 0xC0) == 0x80) {
/* Continuation byte, so we start 'inside' a (possible valid) UTF-8
* sequence. Since we are not allowed to access src[-1], we cannot
* check if the sequence is actually valid, the best we can do is
* just assume it is valid and locate the end. */
if ((((*++src) & 0xC0) == 0x80) && (((*++src) & 0xC0) == 0x80)) {
++src;
}
return src;
}
#endif
left = totalBytes[UCHAR(*src)];
next = src + 1;
while (--left) {
@@ -888,14 +903,13 @@ Tcl_UtfPrev(
/* Continue the search backwards... */
look--;
} while (trailBytesSeen < 3);
} while (trailBytesSeen < (TCL_UTF_MAX < 4 ? 3 : 4));
/*
* We've seen 3 trail bytes, so we know there will not be a
* properly formed byte sequence to find, and we can stop looking,
* accepting the fallback.
*/
return fallback;
}
@@ -1282,11 +1296,11 @@ Tcl_UtfNcmp(
if (ch1 != ch2) {
#if TCL_UTF_MAX == 4
/* Surrogates always report higher than non-surrogates */
if (((ch1 & ~0x3FF) == 0xD800)) {
if ((ch2 & ~0x3FF) != 0xD800) {
if (((ch1 & 0xFC00) == 0xD800)) {
if ((ch2 & 0xFC00) != 0xD800) {
return ch1;
}
} else if ((ch2 & ~0x3FF) == 0xD800) {
} else if ((ch2 & 0xFC00) == 0xD800) {
return -ch2;
}
#endif
@@ -1578,7 +1592,7 @@ Tcl_UniCharNcmp(
const Tcl_UniChar *uct, /* Unicode string ucs is compared to. */
unsigned long numChars) /* Number of unichars to compare. */
{
#ifdef WORDS_BIGENDIAN
#if defined(WORDS_BIGENDIAN) && (TCL_UTF_MAX != 4)
/*
* We are definitely on a big-endian machine; memcmp() is safe
*/
@@ -1592,6 +1606,14 @@ Tcl_UniCharNcmp(
for ( ; numChars != 0; ucs++, uct++, numChars--) {
if (*ucs != *uct) {
#if TCL_UTF_MAX == 4
/* special case for handling upper surrogates */
if (((*ucs & 0xFC00) == 0xD800) && ((*uct & 0xFC00) != 0xD800)) {
return 1;
} else if (((*uct & 0xFC00) == 0xD800)) {
return -1;
}
#endif
return (*ucs - *uct);
}
}
@@ -1629,6 +1651,14 @@ Tcl_UniCharNcasecmp(
Tcl_UniChar lct = Tcl_UniCharToLower(*uct);
if (lcs != lct) {
#if TCL_UTF_MAX == 4
/* special case for handling upper surrogates */
if (((lcs & 0xFC00) == 0xD800) && ((lct & 0xFC00) != 0xD800)) {
return 1;
} else if (((lct & 0xFC00) == 0xD800)) {
return -1;
}
#endif
return (lcs - lct);
}
}