aarch64: Unify scalar fp and hfa handling
Since an HFA of a single element is exactly the same as scalar, this tidies things up a bit.
This commit is contained in:
committed by
Richard Henderson
parent
12cf89ee04
commit
4a3cbcaa4f
@@ -71,16 +71,7 @@ ffi_clear_cache (void *start, void *end)
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Test for an FFI floating point representation. */
|
/* A subroutine of is_vfp_type. Given a structure type, return the type code
|
||||||
|
|
||||||
static unsigned
|
|
||||||
is_floating_type (unsigned short type)
|
|
||||||
{
|
|
||||||
return (type == FFI_TYPE_FLOAT || type == FFI_TYPE_DOUBLE
|
|
||||||
|| type == FFI_TYPE_LONGDOUBLE);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* A subroutine of is_hfa. Given a structure type, return the type code
|
|
||||||
of the first non-structure element. Recurse for structure elements.
|
of the first non-structure element. Recurse for structure elements.
|
||||||
Return -1 if the structure is in fact empty, i.e. no nested elements. */
|
Return -1 if the structure is in fact empty, i.e. no nested elements. */
|
||||||
|
|
||||||
@@ -106,7 +97,7 @@ is_hfa0 (const ffi_type *ty)
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* A subroutine of is_hfa. Given a structure type, return true if all
|
/* A subroutine of is_vfp_type. Given a structure type, return true if all
|
||||||
of the non-structure elements are the same as CANDIDATE. */
|
of the non-structure elements are the same as CANDIDATE. */
|
||||||
|
|
||||||
static int
|
static int
|
||||||
@@ -131,23 +122,35 @@ is_hfa1 (const ffi_type *ty, int candidate)
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Determine if TY is an homogenous floating point aggregate (HFA).
|
/* Determine if TY may be allocated to the FP registers. This is both an
|
||||||
|
fp scalar type as well as an homogenous floating point aggregate (HFA).
|
||||||
That is, a structure consisting of 1 to 4 members of all the same type,
|
That is, a structure consisting of 1 to 4 members of all the same type,
|
||||||
where that type is a floating point scalar.
|
where that type is an fp scalar.
|
||||||
|
|
||||||
Returns non-zero iff TY is an HFA. The result is an encoded value where
|
Returns non-zero iff TY is an HFA. The result is the AARCH64_RET_*
|
||||||
bits 0-7 contain the type code, and bits 8-10 contain the element count. */
|
constant for the type. */
|
||||||
|
|
||||||
static int
|
static int
|
||||||
is_hfa(const ffi_type *ty)
|
is_vfp_type (const ffi_type *ty)
|
||||||
{
|
{
|
||||||
ffi_type **elements;
|
ffi_type **elements;
|
||||||
int candidate, i;
|
int candidate, i;
|
||||||
size_t size, ele_count;
|
size_t size, ele_count;
|
||||||
|
|
||||||
/* Quickest tests first. */
|
/* Quickest tests first. */
|
||||||
if (ty->type != FFI_TYPE_STRUCT)
|
switch (ty->type)
|
||||||
return 0;
|
{
|
||||||
|
default:
|
||||||
|
return 0;
|
||||||
|
case FFI_TYPE_FLOAT:
|
||||||
|
return AARCH64_RET_S1;
|
||||||
|
case FFI_TYPE_DOUBLE:
|
||||||
|
return AARCH64_RET_D1;
|
||||||
|
case FFI_TYPE_LONGDOUBLE:
|
||||||
|
return AARCH64_RET_Q1;
|
||||||
|
case FFI_TYPE_STRUCT:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
/* No HFA types are smaller than 4 bytes, or larger than 64 bytes. */
|
/* No HFA types are smaller than 4 bytes, or larger than 64 bytes. */
|
||||||
size = ty->size;
|
size = ty->size;
|
||||||
@@ -205,17 +208,7 @@ is_hfa(const ffi_type *ty)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* All tests succeeded. Encode the result. */
|
/* All tests succeeded. Encode the result. */
|
||||||
return (ele_count << 8) | candidate;
|
return candidate * 4 + (4 - ele_count);
|
||||||
}
|
|
||||||
|
|
||||||
/* Test if an ffi_type argument or result is a candidate for a vector
|
|
||||||
register. */
|
|
||||||
|
|
||||||
static int
|
|
||||||
is_v_register_candidate (ffi_type *ty)
|
|
||||||
{
|
|
||||||
return is_floating_type (ty->type)
|
|
||||||
|| (ty->type == FFI_TYPE_STRUCT && is_hfa (ty));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Representation of the procedure call argument marshalling
|
/* Representation of the procedure call argument marshalling
|
||||||
@@ -302,9 +295,7 @@ extend_integer_type (void *source, int type)
|
|||||||
static void
|
static void
|
||||||
extend_hfa_type (void *dest, void *src, int h)
|
extend_hfa_type (void *dest, void *src, int h)
|
||||||
{
|
{
|
||||||
int n = (h >> 8);
|
int f = h - AARCH64_RET_S4;
|
||||||
int t = h & 0xff;
|
|
||||||
int f = (t - FFI_TYPE_FLOAT) * 4 + 4 - n;
|
|
||||||
void *x0;
|
void *x0;
|
||||||
|
|
||||||
asm volatile (
|
asm volatile (
|
||||||
@@ -358,82 +349,68 @@ extend_hfa_type (void *dest, void *src, int h)
|
|||||||
static void *
|
static void *
|
||||||
compress_hfa_type (void *dest, void *reg, int h)
|
compress_hfa_type (void *dest, void *reg, int h)
|
||||||
{
|
{
|
||||||
int n = h >> 8;
|
switch (h)
|
||||||
switch (h & 0xff)
|
|
||||||
{
|
{
|
||||||
case FFI_TYPE_FLOAT:
|
case AARCH64_RET_S1:
|
||||||
switch (n)
|
if (dest == reg)
|
||||||
{
|
{
|
||||||
default:
|
|
||||||
if (dest == reg)
|
|
||||||
{
|
|
||||||
#ifdef __AARCH64EB__
|
#ifdef __AARCH64EB__
|
||||||
dest += 12;
|
dest += 12;
|
||||||
#endif
|
#endif
|
||||||
}
|
|
||||||
else
|
|
||||||
*(float *)dest = *(float *)reg;
|
|
||||||
break;
|
|
||||||
case 2:
|
|
||||||
asm("ldp q16, q17, [%1]\n\t"
|
|
||||||
"st2 { v16.s, v17.s }[0], [%0]"
|
|
||||||
: : "r"(dest), "r"(reg) : "memory", "v16", "v17");
|
|
||||||
break;
|
|
||||||
case 3:
|
|
||||||
asm("ldp q16, q17, [%1]\n\t"
|
|
||||||
"ldr q18, [%1, #32]\n\t"
|
|
||||||
"st3 { v16.s, v17.s, v18.s }[0], [%0]"
|
|
||||||
: : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18");
|
|
||||||
break;
|
|
||||||
case 4:
|
|
||||||
asm("ldp q16, q17, [%1]\n\t"
|
|
||||||
"ldp q18, q19, [%1, #32]\n\t"
|
|
||||||
"st4 { v16.s, v17.s, v18.s, v19.s }[0], [%0]"
|
|
||||||
: : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18", "v19");
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
*(float *)dest = *(float *)reg;
|
||||||
|
break;
|
||||||
|
case AARCH64_RET_S2:
|
||||||
|
asm ("ldp q16, q17, [%1]\n\t"
|
||||||
|
"st2 { v16.s, v17.s }[0], [%0]"
|
||||||
|
: : "r"(dest), "r"(reg) : "memory", "v16", "v17");
|
||||||
|
break;
|
||||||
|
case AARCH64_RET_S3:
|
||||||
|
asm ("ldp q16, q17, [%1]\n\t"
|
||||||
|
"ldr q18, [%1, #32]\n\t"
|
||||||
|
"st3 { v16.s, v17.s, v18.s }[0], [%0]"
|
||||||
|
: : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18");
|
||||||
|
break;
|
||||||
|
case AARCH64_RET_S4:
|
||||||
|
asm ("ldp q16, q17, [%1]\n\t"
|
||||||
|
"ldp q18, q19, [%1, #32]\n\t"
|
||||||
|
"st4 { v16.s, v17.s, v18.s, v19.s }[0], [%0]"
|
||||||
|
: : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18", "v19");
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case FFI_TYPE_DOUBLE:
|
case AARCH64_RET_D1:
|
||||||
switch (n)
|
if (dest == reg)
|
||||||
{
|
{
|
||||||
default:
|
|
||||||
if (dest == reg)
|
|
||||||
{
|
|
||||||
#ifdef __AARCH64EB__
|
#ifdef __AARCH64EB__
|
||||||
dest += 8;
|
dest += 8;
|
||||||
#endif
|
#endif
|
||||||
}
|
|
||||||
else
|
|
||||||
*(double *)dest = *(double *)reg;
|
|
||||||
break;
|
|
||||||
case 2:
|
|
||||||
asm("ldp q16, q17, [%1]\n\t"
|
|
||||||
"st2 { v16.d, v17.d }[0], [%0]"
|
|
||||||
: : "r"(dest), "r"(reg) : "memory", "v16", "v17");
|
|
||||||
break;
|
|
||||||
case 3:
|
|
||||||
asm("ldp q16, q17, [%1]\n\t"
|
|
||||||
"ldr q18, [%1, #32]\n\t"
|
|
||||||
"st3 { v16.d, v17.d, v18.d }[0], [%0]"
|
|
||||||
: : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18");
|
|
||||||
break;
|
|
||||||
case 4:
|
|
||||||
asm("ldp q16, q17, [%1]\n\t"
|
|
||||||
"ldp q18, q19, [%1, #32]\n\t"
|
|
||||||
"st4 { v16.d, v17.d, v18.d, v19.d }[0], [%0]"
|
|
||||||
: : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18", "v19");
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
*(double *)dest = *(double *)reg;
|
||||||
break;
|
break;
|
||||||
|
case AARCH64_RET_D2:
|
||||||
case FFI_TYPE_LONGDOUBLE:
|
asm ("ldp q16, q17, [%1]\n\t"
|
||||||
if (dest != reg)
|
"st2 { v16.d, v17.d }[0], [%0]"
|
||||||
return memcpy (dest, reg, 16 * n);
|
: : "r"(dest), "r"(reg) : "memory", "v16", "v17");
|
||||||
|
break;
|
||||||
|
case AARCH64_RET_D3:
|
||||||
|
asm ("ldp q16, q17, [%1]\n\t"
|
||||||
|
"ldr q18, [%1, #32]\n\t"
|
||||||
|
"st3 { v16.d, v17.d, v18.d }[0], [%0]"
|
||||||
|
: : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18");
|
||||||
|
break;
|
||||||
|
case AARCH64_RET_D4:
|
||||||
|
asm ("ldp q16, q17, [%1]\n\t"
|
||||||
|
"ldp q18, q19, [%1, #32]\n\t"
|
||||||
|
"st4 { v16.d, v17.d, v18.d, v19.d }[0], [%0]"
|
||||||
|
: : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18", "v19");
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
FFI_ASSERT (0);
|
if (dest != reg)
|
||||||
|
return memcpy (dest, reg, 16 * (4 - (h & 3)));
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
return dest;
|
return dest;
|
||||||
}
|
}
|
||||||
@@ -494,34 +471,25 @@ ffi_prep_cif_machdep (ffi_cif *cif)
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case FFI_TYPE_FLOAT:
|
case FFI_TYPE_FLOAT:
|
||||||
flags = AARCH64_RET_S1;
|
|
||||||
break;
|
|
||||||
case FFI_TYPE_DOUBLE:
|
case FFI_TYPE_DOUBLE:
|
||||||
flags = AARCH64_RET_D1;
|
|
||||||
break;
|
|
||||||
case FFI_TYPE_LONGDOUBLE:
|
case FFI_TYPE_LONGDOUBLE:
|
||||||
flags = AARCH64_RET_Q1;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case FFI_TYPE_STRUCT:
|
case FFI_TYPE_STRUCT:
|
||||||
{
|
flags = is_vfp_type (rtype);
|
||||||
int h = is_hfa (rtype);
|
if (flags == 0)
|
||||||
size_t s = rtype->size;
|
{
|
||||||
|
size_t s = rtype->size;
|
||||||
if (h)
|
if (s > 16)
|
||||||
flags = (h & 0xff) * 4 + 4 - (h >> 8);
|
{
|
||||||
else if (s > 16)
|
flags = AARCH64_RET_VOID | AARCH64_RET_IN_MEM;
|
||||||
{
|
bytes += 8;
|
||||||
flags = AARCH64_RET_VOID | AARCH64_RET_IN_MEM;
|
}
|
||||||
bytes += 8;
|
else if (s == 16)
|
||||||
}
|
flags = AARCH64_RET_INT128;
|
||||||
else if (s == 16)
|
else if (s == 8)
|
||||||
flags = AARCH64_RET_INT128;
|
flags = AARCH64_RET_INT64;
|
||||||
else if (s == 8)
|
else
|
||||||
flags = AARCH64_RET_INT64;
|
flags = AARCH64_RET_INT128 | AARCH64_RET_NEED_COPY;
|
||||||
else
|
}
|
||||||
flags = AARCH64_RET_INT128 | AARCH64_RET_NEED_COPY;
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
@@ -530,7 +498,7 @@ ffi_prep_cif_machdep (ffi_cif *cif)
|
|||||||
|
|
||||||
aarch64_flags = 0;
|
aarch64_flags = 0;
|
||||||
for (i = 0, n = cif->nargs; i < n; i++)
|
for (i = 0, n = cif->nargs; i < n; i++)
|
||||||
if (is_v_register_candidate (cif->arg_types[i]))
|
if (is_vfp_type (cif->arg_types[i]))
|
||||||
{
|
{
|
||||||
aarch64_flags = AARCH64_FLAG_ARG_V;
|
aarch64_flags = AARCH64_FLAG_ARG_V;
|
||||||
flags |= AARCH64_FLAG_ARG_V;
|
flags |= AARCH64_FLAG_ARG_V;
|
||||||
@@ -652,20 +620,14 @@ ffi_call (ffi_cif *cif, void (*fn)(void), void *orig_rvalue, void **avalue)
|
|||||||
case FFI_TYPE_FLOAT:
|
case FFI_TYPE_FLOAT:
|
||||||
case FFI_TYPE_DOUBLE:
|
case FFI_TYPE_DOUBLE:
|
||||||
case FFI_TYPE_LONGDOUBLE:
|
case FFI_TYPE_LONGDOUBLE:
|
||||||
/* Scalar float is a degenerate case of HFA. */
|
|
||||||
h = t + 0x100;
|
|
||||||
goto do_hfa;
|
|
||||||
|
|
||||||
case FFI_TYPE_STRUCT:
|
case FFI_TYPE_STRUCT:
|
||||||
{
|
{
|
||||||
void *dest;
|
void *dest;
|
||||||
int elems;
|
|
||||||
|
|
||||||
h = is_hfa (ty);
|
h = is_vfp_type (ty);
|
||||||
if (h)
|
if (h)
|
||||||
{
|
{
|
||||||
do_hfa:
|
int elems = 4 - (h & 3);
|
||||||
elems = h >> 8;
|
|
||||||
if (state.nsrn + elems <= N_V_ARG_REG)
|
if (state.nsrn + elems <= N_V_ARG_REG)
|
||||||
{
|
{
|
||||||
dest = &context->v[state.nsrn];
|
dest = &context->v[state.nsrn];
|
||||||
@@ -828,16 +790,11 @@ ffi_closure_SYSV_inner (ffi_cif *cif,
|
|||||||
case FFI_TYPE_FLOAT:
|
case FFI_TYPE_FLOAT:
|
||||||
case FFI_TYPE_DOUBLE:
|
case FFI_TYPE_DOUBLE:
|
||||||
case FFI_TYPE_LONGDOUBLE:
|
case FFI_TYPE_LONGDOUBLE:
|
||||||
/* Scalar float is a degenerate case of HFA. */
|
|
||||||
h = t + 0x100;
|
|
||||||
goto do_hfa;
|
|
||||||
|
|
||||||
case FFI_TYPE_STRUCT:
|
case FFI_TYPE_STRUCT:
|
||||||
h = is_hfa (ty);
|
h = is_vfp_type (ty);
|
||||||
if (h)
|
if (h)
|
||||||
{
|
{
|
||||||
do_hfa:
|
n = 4 - (h & 3);
|
||||||
n = h >> 8;
|
|
||||||
if (state.nsrn + n <= N_V_ARG_REG)
|
if (state.nsrn + n <= N_V_ARG_REG)
|
||||||
{
|
{
|
||||||
void *reg = &context->v[state.nsrn];
|
void *reg = &context->v[state.nsrn];
|
||||||
|
|||||||
Reference in New Issue
Block a user