aarch64: Move return value handling into ffi_call_SYSV

This lets us pass return data directly to the caller of ffi_call
in most cases, rather than storing it into temporary storage first.
This commit is contained in:
Richard Henderson
2014-10-22 17:06:19 -04:00
committed by Richard Henderson
parent 325471ea6a
commit 4fe1aea121
3 changed files with 260 additions and 116 deletions

View File

@@ -523,30 +523,90 @@ allocate_int_to_reg_or_stack (struct call_context *context,
ffi_status
ffi_prep_cif_machdep (ffi_cif *cif)
{
ffi_type *rtype = cif->rtype;
size_t bytes = cif->bytes;
int flags, aarch64_flags, i, n;
switch (rtype->type)
{
case FFI_TYPE_VOID:
flags = AARCH64_RET_VOID;
break;
case FFI_TYPE_UINT8:
flags = AARCH64_RET_UINT8;
break;
case FFI_TYPE_UINT16:
flags = AARCH64_RET_UINT16;
break;
case FFI_TYPE_UINT32:
flags = AARCH64_RET_UINT32;
break;
case FFI_TYPE_SINT8:
flags = AARCH64_RET_SINT8;
break;
case FFI_TYPE_SINT16:
flags = AARCH64_RET_SINT16;
break;
case FFI_TYPE_INT:
case FFI_TYPE_SINT32:
flags = AARCH64_RET_SINT32;
break;
case FFI_TYPE_SINT64:
case FFI_TYPE_UINT64:
flags = AARCH64_RET_INT64;
break;
case FFI_TYPE_POINTER:
flags = (sizeof(void *) == 4 ? AARCH64_RET_UINT32 : AARCH64_RET_INT64);
break;
case FFI_TYPE_FLOAT:
flags = AARCH64_RET_S1;
break;
case FFI_TYPE_DOUBLE:
flags = AARCH64_RET_D1;
break;
case FFI_TYPE_LONGDOUBLE:
flags = AARCH64_RET_Q1;
break;
case FFI_TYPE_STRUCT:
{
int h = is_hfa (rtype);
size_t s = rtype->size;
if (h)
flags = (h & 0xff) * 4 + 4 - (h >> 8);
else if (s > 16)
{
flags = AARCH64_RET_VOID | AARCH64_RET_IN_MEM;
bytes += 8;
}
else if (s == 16)
flags = AARCH64_RET_INT128;
else if (s == 8)
flags = AARCH64_RET_INT64;
else
flags = AARCH64_RET_INT128 | AARCH64_RET_NEED_COPY;
}
break;
default:
abort();
}
aarch64_flags = 0;
for (i = 0, n = cif->nargs; i < n; i++)
if (is_v_register_candidate (cif->arg_types[i]))
{
aarch64_flags = AARCH64_FLAG_ARG_V;
flags |= AARCH64_FLAG_ARG_V;
break;
}
/* Round the stack up to a multiple of the stack alignment requirement. */
cif->bytes = ALIGN(cif->bytes, 16);
/* Initialize our flags. We are interested if this CIF will touch a
vector register, if so we will enable context save and load to
those registers, otherwise not. This is intended to be friendly
to lazy float context switching in the kernel. */
cif->aarch64_flags = 0;
if (is_v_register_candidate (cif->rtype))
{
cif->aarch64_flags |= AARCH64_FLAG_ARG_V;
}
else
{
int i;
for (i = 0; i < cif->nargs; i++)
if (is_v_register_candidate (cif->arg_types[i]))
{
cif->aarch64_flags |= AARCH64_FLAG_ARG_V;
break;
}
}
cif->bytes = ALIGN(bytes, 16);
cif->flags = flags;
cif->aarch64_flags = aarch64_flags;
#if defined (__APPLE__)
cif->aarch64_nfixedargs = 0;
#endif
@@ -555,51 +615,65 @@ ffi_prep_cif_machdep (ffi_cif *cif)
}
#if defined (__APPLE__)
/* Perform Apple-specific cif processing for variadic calls */
ffi_status ffi_prep_cif_machdep_var(ffi_cif *cif,
unsigned int nfixedargs,
unsigned int ntotalargs)
{
ffi_status status;
status = ffi_prep_cif_machdep (cif);
ffi_status status = ffi_prep_cif_machdep (cif);
cif->aarch64_nfixedargs = nfixedargs;
return status;
}
#endif /* __APPLE__ */
#endif
extern void ffi_call_SYSV (void *stack, void *frame,
void (*fn)(void), int flags) FFI_HIDDEN;
extern void ffi_call_SYSV (struct call_context *context, void *frame,
void (*fn)(void), void *rvalue, int flags)
FFI_HIDDEN;
/* Call a function with the provided arguments and capture the return
value. */
void
ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
ffi_call (ffi_cif *cif, void (*fn)(void), void *orig_rvalue, void **avalue)
{
struct call_context *context;
void *stack, *frame;
void *stack, *frame, *rvalue;
struct arg_state state;
size_t stack_bytes;
int i, nargs = cif->nargs;
int h, t;
size_t stack_bytes, rtype_size, rsize;
int i, nargs, flags;
ffi_type *rtype;
/* Allocate consectutive stack for everything we'll need. */
flags = cif->flags;
rtype = cif->rtype;
rtype_size = rtype->size;
stack_bytes = cif->bytes;
stack = alloca (stack_bytes + 32 + sizeof(struct call_context));
/* If the target function returns a structure via hidden pointer,
then we cannot allow a null rvalue. Otherwise, mash a null
rvalue to void return type. */
rsize = 0;
if (flags & AARCH64_RET_IN_MEM)
{
if (orig_rvalue == NULL)
rsize = rtype_size;
}
else if (orig_rvalue == NULL)
flags &= AARCH64_FLAG_ARG_V;
else if (flags & AARCH64_RET_NEED_COPY)
rsize = 16;
/* Allocate consectutive stack for everything we'll need. */
context = alloca (sizeof(struct call_context) + stack_bytes + 32 + rsize);
stack = context + 1;
frame = stack + stack_bytes;
context = frame + 32;
rvalue = (rsize ? frame + 32 : orig_rvalue);
arg_init (&state);
for (i = 0; i < nargs; i++)
for (i = 0, nargs = cif->nargs; i < nargs; i++)
{
ffi_type *ty = cif->arg_types[i];
size_t s = ty->size;
void *a = avalue[i];
int h, t;
t = ty->type;
switch (t)
@@ -717,54 +791,10 @@ ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
#endif
}
rtype = cif->rtype;
if (is_register_candidate (rtype))
{
ffi_call_SYSV (stack, frame, fn, cif->aarch64_flags);
ffi_call_SYSV (context, frame, fn, rvalue, flags);
t = rtype->type;
switch (t)
{
case FFI_TYPE_INT:
case FFI_TYPE_UINT8:
case FFI_TYPE_SINT8:
case FFI_TYPE_UINT16:
case FFI_TYPE_SINT16:
case FFI_TYPE_UINT32:
case FFI_TYPE_SINT32:
case FFI_TYPE_POINTER:
case FFI_TYPE_UINT64:
case FFI_TYPE_SINT64:
*(ffi_arg *)rvalue = extend_integer_type (&context->x[0], t);
break;
case FFI_TYPE_FLOAT:
case FFI_TYPE_DOUBLE:
case FFI_TYPE_LONGDOUBLE:
compress_hfa_type (rvalue, &context->v[0], 0x100 + t);
break;
case FFI_TYPE_STRUCT:
h = is_hfa (cif->rtype);
if (h)
compress_hfa_type (rvalue, &context->v[0], h);
else
{
FFI_ASSERT (rtype->size <= 16);
memcpy (rvalue, &context->x[0], rtype->size);
}
break;
default:
FFI_ASSERT (0);
break;
}
}
else
{
context->x8 = (uintptr_t)rvalue;
ffi_call_SYSV (stack, frame, fn, cif->aarch64_flags);
}
if (flags & AARCH64_RET_NEED_COPY)
memcpy (orig_rvalue, rvalue, rtype_size);
}
static unsigned char trampoline [] =

View File

@@ -18,7 +18,48 @@ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
#define AARCH64_FLAG_ARG_V_BIT 0
#define AARCH64_RET_VOID 0
#define AARCH64_RET_INT64 1
#define AARCH64_RET_INT128 2
#define AARCH64_RET_UNUSED3 3
#define AARCH64_RET_UNUSED4 4
#define AARCH64_RET_UNUSED5 5
#define AARCH64_RET_UNUSED6 6
#define AARCH64_RET_UNUSED7 7
/* Note that FFI_TYPE_FLOAT == 2, _DOUBLE == 3, _LONGDOUBLE == 4,
so _S4 through _Q1 are layed out as (TYPE * 4) + (4 - COUNT). */
#define AARCH64_RET_S4 8
#define AARCH64_RET_S3 9
#define AARCH64_RET_S2 10
#define AARCH64_RET_S1 11
#define AARCH64_RET_D4 12
#define AARCH64_RET_D3 13
#define AARCH64_RET_D2 14
#define AARCH64_RET_D1 15
#define AARCH64_RET_Q4 16
#define AARCH64_RET_Q3 17
#define AARCH64_RET_Q2 18
#define AARCH64_RET_Q1 19
/* Note that each of the sub-64-bit integers gets two entries. */
#define AARCH64_RET_UINT8 20
#define AARCH64_RET_UINT16 22
#define AARCH64_RET_UINT32 24
#define AARCH64_RET_SINT8 26
#define AARCH64_RET_SINT16 28
#define AARCH64_RET_SINT32 30
#define AARCH64_RET_MASK 31
#define AARCH64_RET_IN_MEM (1 << 5)
#define AARCH64_RET_NEED_COPY (1 << 6)
#define AARCH64_FLAG_ARG_V_BIT 7
#define AARCH64_FLAG_ARG_V (1 << AARCH64_FLAG_ARG_V_BIT)
#define N_X_ARG_REG 8

View File

@@ -40,9 +40,9 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
#endif
.text
.align 2
.align 4
.globl CNAME(ffi_call_SYSV)
.globl CNAME(ffi_call_SYSV)
#ifdef __ELF__
.type CNAME(ffi_call_SYSV), #function
.hidden CNAME(ffi_call_SYSV)
@@ -50,14 +50,15 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
/* ffi_call_SYSV
extern void ffi_call_SYSV (void *stack, void *frame,
void (*fn)(void), int flags);
void (*fn)(void), void *rvalue, int flags);
Therefore on entry we have:
x0 stack
x1 frame
x2 fn
x3 flags
x3 rvalue
x4 flags
*/
cfi_startproc
@@ -71,43 +72,111 @@ CNAME(ffi_call_SYSV):
cfi_rel_offset (x29, 0)
cfi_rel_offset (x30, 8)
str w3, [x29, #16] /* save flags */
mov x9, x2 /* save fn */
mov x8, x3 /* install structure return */
stp x3, x4, [x29, #16] /* save rvalue and flags */
/* Load the vector argument passing registers, if necessary. */
tbz w3, #AARCH64_FLAG_ARG_V_BIT, 1f
ldp q0, q1, [x29, #32 + 0]
ldp q2, q3, [x29, #32 + 32]
ldp q4, q5, [x29, #32 + 64]
ldp q6, q7, [x29, #32 + 96]
tbz w4, #AARCH64_FLAG_ARG_V_BIT, 1f
ldp q0, q1, [sp, #0]
ldp q2, q3, [sp, #32]
ldp q4, q5, [sp, #64]
ldp q6, q7, [sp, #96]
1:
/* Load the core argument passing registers, including
the structure return pointer. */
ldp x0, x1, [x29, #32 + 16*N_V_ARG_REG + 0]
ldp x2, x3, [x29, #32 + 16*N_V_ARG_REG + 16]
ldp x4, x5, [x29, #32 + 16*N_V_ARG_REG + 32]
ldp x6, x7, [x29, #32 + 16*N_V_ARG_REG + 48]
ldr x8, [x29, #32 + 16*N_V_ARG_REG + 64]
ldp x0, x1, [sp, #16*N_V_ARG_REG + 0]
ldp x2, x3, [sp, #16*N_V_ARG_REG + 16]
ldp x4, x5, [sp, #16*N_V_ARG_REG + 32]
ldp x6, x7, [sp, #16*N_V_ARG_REG + 48]
/* Deallocate the context, leaving the stacked arguments. */
add sp, sp, #CALL_CONTEXT_SIZE
blr x9 /* call fn */
ldr w3, [x29, #16] /* reload flags */
ldp x3, x4, [x29, #16] /* reload rvalue and flags */
/* Partially deconstruct the stack frame. */
mov sp, x29
cfi_def_cfa_register (sp)
ldp x29, x30, [x29]
/* Save the core return registers. */
stp x0, x1, [sp, #32 + 16*N_V_ARG_REG]
/* Save the return value as directed. */
adr x5, 0f
and w4, w4, #AARCH64_RET_MASK
add x5, x5, x4, lsl #3
br x5
/* Save the vector return registers, if necessary. */
tbz w3, #AARCH64_FLAG_ARG_V_BIT, 1f
stp q0, q1, [sp, #32 + 0]
stp q2, q3, [sp, #32 + 32]
1:
/* All done. */
/* Note that each table entry is 2 insns, and thus 8 bytes.
For integer data, note that we're storing into ffi_arg
and therefore we want to extend to 64 bits; these types
have two consecutive entries allocated for them. */
.align 4
0: ret /* VOID */
nop
1: str x0, [x3] /* INT64 */
ret
2: stp x0, x1, [x3] /* INT128 */
ret
3: brk #1000 /* UNUSED */
ret
4: brk #1000 /* UNUSED */
ret
5: brk #1000 /* UNUSED */
ret
6: brk #1000 /* UNUSED */
ret
7: brk #1000 /* UNUSED */
ret
8: st4 { v0.s-v3.s }[0], [x3] /* S4 */
ret
9: st3 { v0.s-v2.s }[0], [x3] /* S3 */
ret
10: stp s0, s1, [x3] /* S2 */
ret
11: str s0, [x3] /* S1 */
ret
12: st4 { v0.d-v3.d }[0], [x3] /* D4 */
ret
13: st3 { v0.d-v2.d }[0], [x3] /* D3 */
ret
14: stp d0, d1, [x3] /* D2 */
ret
15: str d0, [x3] /* D1 */
ret
16: str q3, [x3, #48] /* Q4 */
nop
17: str q2, [x3, #32] /* Q3 */
nop
18: stp q0, q1, [x3] /* Q2 */
ret
19: str q0, [x3] /* Q1 */
ret
20: uxtb w0, w0 /* UINT8 */
str x0, [x3]
21: ret /* reserved */
nop
22: uxth w0, w0 /* UINT16 */
str x0, [x3]
23: ret /* reserved */
nop
24: mov w0, w0 /* UINT32 */
str x0, [x3]
25: ret /* reserved */
nop
26: sxtb x0, w0 /* SINT8 */
str x0, [x3]
27: ret /* reserved */
nop
28: sxth x0, w0 /* SINT16 */
str x0, [x3]
29: ret /* reserved */
nop
30: sxtw x0, w0 /* SINT32 */
str x0, [x3]
31: ret /* reserved */
nop
cfi_endproc
#ifdef __ELF__
@@ -154,9 +223,13 @@ CNAME(ffi_call_SYSV):
Voila! */
.text
.align 2
.align 4
.globl CNAME(ffi_closure_SYSV)
.globl CNAME(ffi_closure_SYSV)
#ifdef __ELF__
.type CNAME(ffi_closure_SYSV), #function
.hidden CNAME(ffi_closure_SYSV)
#endif
cfi_startproc
CNAME(ffi_closure_SYSV):
stp x29, x30, [sp, #-16]!