Import Tcl 8.6.12
This commit is contained in:
@@ -90,6 +90,8 @@ static const unsigned char complete[256] = {
|
||||
#if TCL_UTF_MAX > 3
|
||||
4,4,4,4,4,
|
||||
#else
|
||||
/* Tcl_UtfToUniChar() accesses src[1] and src[2] to check whether
|
||||
* the UTF-8 sequence is valid, so we cannot use 1 here. */
|
||||
3,3,3,3,3,
|
||||
#endif
|
||||
1,1,1,1,1,1,1,1,1,1,1
|
||||
@@ -536,7 +538,7 @@ Tcl_UtfToUniCharDString(
|
||||
w = wString;
|
||||
p = src;
|
||||
endPtr = src + length;
|
||||
optPtr = endPtr - TCL_UTF_MAX;
|
||||
optPtr = endPtr - ((TCL_UTF_MAX > 3) ? 4 : 3) ;
|
||||
while (p <= optPtr) {
|
||||
p += TclUtfToUniChar(p, &ch);
|
||||
*w++ = ch;
|
||||
@@ -623,7 +625,7 @@ Tcl_NumUtfChars(
|
||||
/* Pointer to the end of string. Never read endPtr[0] */
|
||||
const char *endPtr = src + length;
|
||||
/* Pointer to last byte where optimization still can be used */
|
||||
const char *optPtr = endPtr - TCL_UTF_MAX;
|
||||
const char *optPtr = endPtr - ((TCL_UTF_MAX > 3) ? 4 : 3);
|
||||
|
||||
/*
|
||||
* Optimize away the call in this loop. Justified because...
|
||||
@@ -759,6 +761,19 @@ Tcl_UtfNext(
|
||||
int left;
|
||||
const char *next;
|
||||
|
||||
#if TCL_UTF_MAX > 3
|
||||
if (((*src) & 0xC0) == 0x80) {
|
||||
/* Continuation byte, so we start 'inside' a (possible valid) UTF-8
|
||||
* sequence. Since we are not allowed to access src[-1], we cannot
|
||||
* check if the sequence is actually valid, the best we can do is
|
||||
* just assume it is valid and locate the end. */
|
||||
if ((((*++src) & 0xC0) == 0x80) && (((*++src) & 0xC0) == 0x80)) {
|
||||
++src;
|
||||
}
|
||||
return src;
|
||||
}
|
||||
#endif
|
||||
|
||||
left = totalBytes[UCHAR(*src)];
|
||||
next = src + 1;
|
||||
while (--left) {
|
||||
@@ -888,14 +903,13 @@ Tcl_UtfPrev(
|
||||
|
||||
/* Continue the search backwards... */
|
||||
look--;
|
||||
} while (trailBytesSeen < 3);
|
||||
} while (trailBytesSeen < (TCL_UTF_MAX < 4 ? 3 : 4));
|
||||
|
||||
/*
|
||||
* We've seen 3 trail bytes, so we know there will not be a
|
||||
* properly formed byte sequence to find, and we can stop looking,
|
||||
* accepting the fallback.
|
||||
*/
|
||||
|
||||
return fallback;
|
||||
}
|
||||
|
||||
@@ -1282,11 +1296,11 @@ Tcl_UtfNcmp(
|
||||
if (ch1 != ch2) {
|
||||
#if TCL_UTF_MAX == 4
|
||||
/* Surrogates always report higher than non-surrogates */
|
||||
if (((ch1 & ~0x3FF) == 0xD800)) {
|
||||
if ((ch2 & ~0x3FF) != 0xD800) {
|
||||
if (((ch1 & 0xFC00) == 0xD800)) {
|
||||
if ((ch2 & 0xFC00) != 0xD800) {
|
||||
return ch1;
|
||||
}
|
||||
} else if ((ch2 & ~0x3FF) == 0xD800) {
|
||||
} else if ((ch2 & 0xFC00) == 0xD800) {
|
||||
return -ch2;
|
||||
}
|
||||
#endif
|
||||
@@ -1578,7 +1592,7 @@ Tcl_UniCharNcmp(
|
||||
const Tcl_UniChar *uct, /* Unicode string ucs is compared to. */
|
||||
unsigned long numChars) /* Number of unichars to compare. */
|
||||
{
|
||||
#ifdef WORDS_BIGENDIAN
|
||||
#if defined(WORDS_BIGENDIAN) && (TCL_UTF_MAX != 4)
|
||||
/*
|
||||
* We are definitely on a big-endian machine; memcmp() is safe
|
||||
*/
|
||||
@@ -1592,6 +1606,14 @@ Tcl_UniCharNcmp(
|
||||
|
||||
for ( ; numChars != 0; ucs++, uct++, numChars--) {
|
||||
if (*ucs != *uct) {
|
||||
#if TCL_UTF_MAX == 4
|
||||
/* special case for handling upper surrogates */
|
||||
if (((*ucs & 0xFC00) == 0xD800) && ((*uct & 0xFC00) != 0xD800)) {
|
||||
return 1;
|
||||
} else if (((*uct & 0xFC00) == 0xD800)) {
|
||||
return -1;
|
||||
}
|
||||
#endif
|
||||
return (*ucs - *uct);
|
||||
}
|
||||
}
|
||||
@@ -1629,6 +1651,14 @@ Tcl_UniCharNcasecmp(
|
||||
Tcl_UniChar lct = Tcl_UniCharToLower(*uct);
|
||||
|
||||
if (lcs != lct) {
|
||||
#if TCL_UTF_MAX == 4
|
||||
/* special case for handling upper surrogates */
|
||||
if (((lcs & 0xFC00) == 0xD800) && ((lct & 0xFC00) != 0xD800)) {
|
||||
return 1;
|
||||
} else if (((lct & 0xFC00) == 0xD800)) {
|
||||
return -1;
|
||||
}
|
||||
#endif
|
||||
return (lcs - lct);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user