aarch64: Move return value handling into ffi_call_SYSV
This lets us pass return data directly to the caller of ffi_call in most cases, rather than storing it into temporary storage first.
This commit is contained in:
committed by
Richard Henderson
parent
325471ea6a
commit
4fe1aea121
@@ -523,30 +523,90 @@ allocate_int_to_reg_or_stack (struct call_context *context,
|
||||
ffi_status
|
||||
ffi_prep_cif_machdep (ffi_cif *cif)
|
||||
{
|
||||
ffi_type *rtype = cif->rtype;
|
||||
size_t bytes = cif->bytes;
|
||||
int flags, aarch64_flags, i, n;
|
||||
|
||||
switch (rtype->type)
|
||||
{
|
||||
case FFI_TYPE_VOID:
|
||||
flags = AARCH64_RET_VOID;
|
||||
break;
|
||||
case FFI_TYPE_UINT8:
|
||||
flags = AARCH64_RET_UINT8;
|
||||
break;
|
||||
case FFI_TYPE_UINT16:
|
||||
flags = AARCH64_RET_UINT16;
|
||||
break;
|
||||
case FFI_TYPE_UINT32:
|
||||
flags = AARCH64_RET_UINT32;
|
||||
break;
|
||||
case FFI_TYPE_SINT8:
|
||||
flags = AARCH64_RET_SINT8;
|
||||
break;
|
||||
case FFI_TYPE_SINT16:
|
||||
flags = AARCH64_RET_SINT16;
|
||||
break;
|
||||
case FFI_TYPE_INT:
|
||||
case FFI_TYPE_SINT32:
|
||||
flags = AARCH64_RET_SINT32;
|
||||
break;
|
||||
case FFI_TYPE_SINT64:
|
||||
case FFI_TYPE_UINT64:
|
||||
flags = AARCH64_RET_INT64;
|
||||
break;
|
||||
case FFI_TYPE_POINTER:
|
||||
flags = (sizeof(void *) == 4 ? AARCH64_RET_UINT32 : AARCH64_RET_INT64);
|
||||
break;
|
||||
|
||||
case FFI_TYPE_FLOAT:
|
||||
flags = AARCH64_RET_S1;
|
||||
break;
|
||||
case FFI_TYPE_DOUBLE:
|
||||
flags = AARCH64_RET_D1;
|
||||
break;
|
||||
case FFI_TYPE_LONGDOUBLE:
|
||||
flags = AARCH64_RET_Q1;
|
||||
break;
|
||||
|
||||
case FFI_TYPE_STRUCT:
|
||||
{
|
||||
int h = is_hfa (rtype);
|
||||
size_t s = rtype->size;
|
||||
|
||||
if (h)
|
||||
flags = (h & 0xff) * 4 + 4 - (h >> 8);
|
||||
else if (s > 16)
|
||||
{
|
||||
flags = AARCH64_RET_VOID | AARCH64_RET_IN_MEM;
|
||||
bytes += 8;
|
||||
}
|
||||
else if (s == 16)
|
||||
flags = AARCH64_RET_INT128;
|
||||
else if (s == 8)
|
||||
flags = AARCH64_RET_INT64;
|
||||
else
|
||||
flags = AARCH64_RET_INT128 | AARCH64_RET_NEED_COPY;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
abort();
|
||||
}
|
||||
|
||||
aarch64_flags = 0;
|
||||
for (i = 0, n = cif->nargs; i < n; i++)
|
||||
if (is_v_register_candidate (cif->arg_types[i]))
|
||||
{
|
||||
aarch64_flags = AARCH64_FLAG_ARG_V;
|
||||
flags |= AARCH64_FLAG_ARG_V;
|
||||
break;
|
||||
}
|
||||
|
||||
/* Round the stack up to a multiple of the stack alignment requirement. */
|
||||
cif->bytes = ALIGN(cif->bytes, 16);
|
||||
|
||||
/* Initialize our flags. We are interested if this CIF will touch a
|
||||
vector register, if so we will enable context save and load to
|
||||
those registers, otherwise not. This is intended to be friendly
|
||||
to lazy float context switching in the kernel. */
|
||||
cif->aarch64_flags = 0;
|
||||
|
||||
if (is_v_register_candidate (cif->rtype))
|
||||
{
|
||||
cif->aarch64_flags |= AARCH64_FLAG_ARG_V;
|
||||
}
|
||||
else
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < cif->nargs; i++)
|
||||
if (is_v_register_candidate (cif->arg_types[i]))
|
||||
{
|
||||
cif->aarch64_flags |= AARCH64_FLAG_ARG_V;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
cif->bytes = ALIGN(bytes, 16);
|
||||
cif->flags = flags;
|
||||
cif->aarch64_flags = aarch64_flags;
|
||||
#if defined (__APPLE__)
|
||||
cif->aarch64_nfixedargs = 0;
|
||||
#endif
|
||||
@@ -555,51 +615,65 @@ ffi_prep_cif_machdep (ffi_cif *cif)
|
||||
}
|
||||
|
||||
#if defined (__APPLE__)
|
||||
|
||||
/* Perform Apple-specific cif processing for variadic calls */
|
||||
ffi_status ffi_prep_cif_machdep_var(ffi_cif *cif,
|
||||
unsigned int nfixedargs,
|
||||
unsigned int ntotalargs)
|
||||
{
|
||||
ffi_status status;
|
||||
|
||||
status = ffi_prep_cif_machdep (cif);
|
||||
|
||||
ffi_status status = ffi_prep_cif_machdep (cif);
|
||||
cif->aarch64_nfixedargs = nfixedargs;
|
||||
|
||||
return status;
|
||||
}
|
||||
#endif /* __APPLE__ */
|
||||
|
||||
#endif
|
||||
|
||||
extern void ffi_call_SYSV (void *stack, void *frame,
|
||||
void (*fn)(void), int flags) FFI_HIDDEN;
|
||||
extern void ffi_call_SYSV (struct call_context *context, void *frame,
|
||||
void (*fn)(void), void *rvalue, int flags)
|
||||
FFI_HIDDEN;
|
||||
|
||||
/* Call a function with the provided arguments and capture the return
|
||||
value. */
|
||||
void
|
||||
ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
|
||||
ffi_call (ffi_cif *cif, void (*fn)(void), void *orig_rvalue, void **avalue)
|
||||
{
|
||||
struct call_context *context;
|
||||
void *stack, *frame;
|
||||
void *stack, *frame, *rvalue;
|
||||
struct arg_state state;
|
||||
size_t stack_bytes;
|
||||
int i, nargs = cif->nargs;
|
||||
int h, t;
|
||||
size_t stack_bytes, rtype_size, rsize;
|
||||
int i, nargs, flags;
|
||||
ffi_type *rtype;
|
||||
|
||||
/* Allocate consectutive stack for everything we'll need. */
|
||||
flags = cif->flags;
|
||||
rtype = cif->rtype;
|
||||
rtype_size = rtype->size;
|
||||
stack_bytes = cif->bytes;
|
||||
stack = alloca (stack_bytes + 32 + sizeof(struct call_context));
|
||||
|
||||
/* If the target function returns a structure via hidden pointer,
|
||||
then we cannot allow a null rvalue. Otherwise, mash a null
|
||||
rvalue to void return type. */
|
||||
rsize = 0;
|
||||
if (flags & AARCH64_RET_IN_MEM)
|
||||
{
|
||||
if (orig_rvalue == NULL)
|
||||
rsize = rtype_size;
|
||||
}
|
||||
else if (orig_rvalue == NULL)
|
||||
flags &= AARCH64_FLAG_ARG_V;
|
||||
else if (flags & AARCH64_RET_NEED_COPY)
|
||||
rsize = 16;
|
||||
|
||||
/* Allocate consectutive stack for everything we'll need. */
|
||||
context = alloca (sizeof(struct call_context) + stack_bytes + 32 + rsize);
|
||||
stack = context + 1;
|
||||
frame = stack + stack_bytes;
|
||||
context = frame + 32;
|
||||
rvalue = (rsize ? frame + 32 : orig_rvalue);
|
||||
|
||||
arg_init (&state);
|
||||
for (i = 0; i < nargs; i++)
|
||||
for (i = 0, nargs = cif->nargs; i < nargs; i++)
|
||||
{
|
||||
ffi_type *ty = cif->arg_types[i];
|
||||
size_t s = ty->size;
|
||||
void *a = avalue[i];
|
||||
int h, t;
|
||||
|
||||
t = ty->type;
|
||||
switch (t)
|
||||
@@ -717,54 +791,10 @@ ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
|
||||
#endif
|
||||
}
|
||||
|
||||
rtype = cif->rtype;
|
||||
if (is_register_candidate (rtype))
|
||||
{
|
||||
ffi_call_SYSV (stack, frame, fn, cif->aarch64_flags);
|
||||
ffi_call_SYSV (context, frame, fn, rvalue, flags);
|
||||
|
||||
t = rtype->type;
|
||||
switch (t)
|
||||
{
|
||||
case FFI_TYPE_INT:
|
||||
case FFI_TYPE_UINT8:
|
||||
case FFI_TYPE_SINT8:
|
||||
case FFI_TYPE_UINT16:
|
||||
case FFI_TYPE_SINT16:
|
||||
case FFI_TYPE_UINT32:
|
||||
case FFI_TYPE_SINT32:
|
||||
case FFI_TYPE_POINTER:
|
||||
case FFI_TYPE_UINT64:
|
||||
case FFI_TYPE_SINT64:
|
||||
*(ffi_arg *)rvalue = extend_integer_type (&context->x[0], t);
|
||||
break;
|
||||
|
||||
case FFI_TYPE_FLOAT:
|
||||
case FFI_TYPE_DOUBLE:
|
||||
case FFI_TYPE_LONGDOUBLE:
|
||||
compress_hfa_type (rvalue, &context->v[0], 0x100 + t);
|
||||
break;
|
||||
|
||||
case FFI_TYPE_STRUCT:
|
||||
h = is_hfa (cif->rtype);
|
||||
if (h)
|
||||
compress_hfa_type (rvalue, &context->v[0], h);
|
||||
else
|
||||
{
|
||||
FFI_ASSERT (rtype->size <= 16);
|
||||
memcpy (rvalue, &context->x[0], rtype->size);
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
FFI_ASSERT (0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
context->x8 = (uintptr_t)rvalue;
|
||||
ffi_call_SYSV (stack, frame, fn, cif->aarch64_flags);
|
||||
}
|
||||
if (flags & AARCH64_RET_NEED_COPY)
|
||||
memcpy (orig_rvalue, rvalue, rtype_size);
|
||||
}
|
||||
|
||||
static unsigned char trampoline [] =
|
||||
|
||||
@@ -18,7 +18,48 @@ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
|
||||
|
||||
#define AARCH64_FLAG_ARG_V_BIT 0
|
||||
#define AARCH64_RET_VOID 0
|
||||
#define AARCH64_RET_INT64 1
|
||||
#define AARCH64_RET_INT128 2
|
||||
|
||||
#define AARCH64_RET_UNUSED3 3
|
||||
#define AARCH64_RET_UNUSED4 4
|
||||
#define AARCH64_RET_UNUSED5 5
|
||||
#define AARCH64_RET_UNUSED6 6
|
||||
#define AARCH64_RET_UNUSED7 7
|
||||
|
||||
/* Note that FFI_TYPE_FLOAT == 2, _DOUBLE == 3, _LONGDOUBLE == 4,
|
||||
so _S4 through _Q1 are layed out as (TYPE * 4) + (4 - COUNT). */
|
||||
#define AARCH64_RET_S4 8
|
||||
#define AARCH64_RET_S3 9
|
||||
#define AARCH64_RET_S2 10
|
||||
#define AARCH64_RET_S1 11
|
||||
|
||||
#define AARCH64_RET_D4 12
|
||||
#define AARCH64_RET_D3 13
|
||||
#define AARCH64_RET_D2 14
|
||||
#define AARCH64_RET_D1 15
|
||||
|
||||
#define AARCH64_RET_Q4 16
|
||||
#define AARCH64_RET_Q3 17
|
||||
#define AARCH64_RET_Q2 18
|
||||
#define AARCH64_RET_Q1 19
|
||||
|
||||
/* Note that each of the sub-64-bit integers gets two entries. */
|
||||
#define AARCH64_RET_UINT8 20
|
||||
#define AARCH64_RET_UINT16 22
|
||||
#define AARCH64_RET_UINT32 24
|
||||
|
||||
#define AARCH64_RET_SINT8 26
|
||||
#define AARCH64_RET_SINT16 28
|
||||
#define AARCH64_RET_SINT32 30
|
||||
|
||||
#define AARCH64_RET_MASK 31
|
||||
|
||||
#define AARCH64_RET_IN_MEM (1 << 5)
|
||||
#define AARCH64_RET_NEED_COPY (1 << 6)
|
||||
|
||||
#define AARCH64_FLAG_ARG_V_BIT 7
|
||||
#define AARCH64_FLAG_ARG_V (1 << AARCH64_FLAG_ARG_V_BIT)
|
||||
|
||||
#define N_X_ARG_REG 8
|
||||
|
||||
@@ -40,9 +40,9 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
|
||||
#endif
|
||||
|
||||
.text
|
||||
.align 2
|
||||
.align 4
|
||||
|
||||
.globl CNAME(ffi_call_SYSV)
|
||||
.globl CNAME(ffi_call_SYSV)
|
||||
#ifdef __ELF__
|
||||
.type CNAME(ffi_call_SYSV), #function
|
||||
.hidden CNAME(ffi_call_SYSV)
|
||||
@@ -50,14 +50,15 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
|
||||
|
||||
/* ffi_call_SYSV
|
||||
extern void ffi_call_SYSV (void *stack, void *frame,
|
||||
void (*fn)(void), int flags);
|
||||
void (*fn)(void), void *rvalue, int flags);
|
||||
|
||||
Therefore on entry we have:
|
||||
|
||||
x0 stack
|
||||
x1 frame
|
||||
x2 fn
|
||||
x3 flags
|
||||
x3 rvalue
|
||||
x4 flags
|
||||
*/
|
||||
|
||||
cfi_startproc
|
||||
@@ -71,43 +72,111 @@ CNAME(ffi_call_SYSV):
|
||||
cfi_rel_offset (x29, 0)
|
||||
cfi_rel_offset (x30, 8)
|
||||
|
||||
str w3, [x29, #16] /* save flags */
|
||||
mov x9, x2 /* save fn */
|
||||
mov x8, x3 /* install structure return */
|
||||
stp x3, x4, [x29, #16] /* save rvalue and flags */
|
||||
|
||||
/* Load the vector argument passing registers, if necessary. */
|
||||
tbz w3, #AARCH64_FLAG_ARG_V_BIT, 1f
|
||||
ldp q0, q1, [x29, #32 + 0]
|
||||
ldp q2, q3, [x29, #32 + 32]
|
||||
ldp q4, q5, [x29, #32 + 64]
|
||||
ldp q6, q7, [x29, #32 + 96]
|
||||
tbz w4, #AARCH64_FLAG_ARG_V_BIT, 1f
|
||||
ldp q0, q1, [sp, #0]
|
||||
ldp q2, q3, [sp, #32]
|
||||
ldp q4, q5, [sp, #64]
|
||||
ldp q6, q7, [sp, #96]
|
||||
1:
|
||||
/* Load the core argument passing registers, including
|
||||
the structure return pointer. */
|
||||
ldp x0, x1, [x29, #32 + 16*N_V_ARG_REG + 0]
|
||||
ldp x2, x3, [x29, #32 + 16*N_V_ARG_REG + 16]
|
||||
ldp x4, x5, [x29, #32 + 16*N_V_ARG_REG + 32]
|
||||
ldp x6, x7, [x29, #32 + 16*N_V_ARG_REG + 48]
|
||||
ldr x8, [x29, #32 + 16*N_V_ARG_REG + 64]
|
||||
ldp x0, x1, [sp, #16*N_V_ARG_REG + 0]
|
||||
ldp x2, x3, [sp, #16*N_V_ARG_REG + 16]
|
||||
ldp x4, x5, [sp, #16*N_V_ARG_REG + 32]
|
||||
ldp x6, x7, [sp, #16*N_V_ARG_REG + 48]
|
||||
|
||||
/* Deallocate the context, leaving the stacked arguments. */
|
||||
add sp, sp, #CALL_CONTEXT_SIZE
|
||||
|
||||
blr x9 /* call fn */
|
||||
|
||||
ldr w3, [x29, #16] /* reload flags */
|
||||
ldp x3, x4, [x29, #16] /* reload rvalue and flags */
|
||||
|
||||
/* Partially deconstruct the stack frame. */
|
||||
mov sp, x29
|
||||
cfi_def_cfa_register (sp)
|
||||
ldp x29, x30, [x29]
|
||||
|
||||
/* Save the core return registers. */
|
||||
stp x0, x1, [sp, #32 + 16*N_V_ARG_REG]
|
||||
/* Save the return value as directed. */
|
||||
adr x5, 0f
|
||||
and w4, w4, #AARCH64_RET_MASK
|
||||
add x5, x5, x4, lsl #3
|
||||
br x5
|
||||
|
||||
/* Save the vector return registers, if necessary. */
|
||||
tbz w3, #AARCH64_FLAG_ARG_V_BIT, 1f
|
||||
stp q0, q1, [sp, #32 + 0]
|
||||
stp q2, q3, [sp, #32 + 32]
|
||||
1:
|
||||
/* All done. */
|
||||
/* Note that each table entry is 2 insns, and thus 8 bytes.
|
||||
For integer data, note that we're storing into ffi_arg
|
||||
and therefore we want to extend to 64 bits; these types
|
||||
have two consecutive entries allocated for them. */
|
||||
.align 4
|
||||
0: ret /* VOID */
|
||||
nop
|
||||
1: str x0, [x3] /* INT64 */
|
||||
ret
|
||||
2: stp x0, x1, [x3] /* INT128 */
|
||||
ret
|
||||
3: brk #1000 /* UNUSED */
|
||||
ret
|
||||
4: brk #1000 /* UNUSED */
|
||||
ret
|
||||
5: brk #1000 /* UNUSED */
|
||||
ret
|
||||
6: brk #1000 /* UNUSED */
|
||||
ret
|
||||
7: brk #1000 /* UNUSED */
|
||||
ret
|
||||
8: st4 { v0.s-v3.s }[0], [x3] /* S4 */
|
||||
ret
|
||||
9: st3 { v0.s-v2.s }[0], [x3] /* S3 */
|
||||
ret
|
||||
10: stp s0, s1, [x3] /* S2 */
|
||||
ret
|
||||
11: str s0, [x3] /* S1 */
|
||||
ret
|
||||
12: st4 { v0.d-v3.d }[0], [x3] /* D4 */
|
||||
ret
|
||||
13: st3 { v0.d-v2.d }[0], [x3] /* D3 */
|
||||
ret
|
||||
14: stp d0, d1, [x3] /* D2 */
|
||||
ret
|
||||
15: str d0, [x3] /* D1 */
|
||||
ret
|
||||
16: str q3, [x3, #48] /* Q4 */
|
||||
nop
|
||||
17: str q2, [x3, #32] /* Q3 */
|
||||
nop
|
||||
18: stp q0, q1, [x3] /* Q2 */
|
||||
ret
|
||||
19: str q0, [x3] /* Q1 */
|
||||
ret
|
||||
20: uxtb w0, w0 /* UINT8 */
|
||||
str x0, [x3]
|
||||
21: ret /* reserved */
|
||||
nop
|
||||
22: uxth w0, w0 /* UINT16 */
|
||||
str x0, [x3]
|
||||
23: ret /* reserved */
|
||||
nop
|
||||
24: mov w0, w0 /* UINT32 */
|
||||
str x0, [x3]
|
||||
25: ret /* reserved */
|
||||
nop
|
||||
26: sxtb x0, w0 /* SINT8 */
|
||||
str x0, [x3]
|
||||
27: ret /* reserved */
|
||||
nop
|
||||
28: sxth x0, w0 /* SINT16 */
|
||||
str x0, [x3]
|
||||
29: ret /* reserved */
|
||||
nop
|
||||
30: sxtw x0, w0 /* SINT32 */
|
||||
str x0, [x3]
|
||||
31: ret /* reserved */
|
||||
nop
|
||||
|
||||
cfi_endproc
|
||||
#ifdef __ELF__
|
||||
@@ -154,9 +223,13 @@ CNAME(ffi_call_SYSV):
|
||||
Voila! */
|
||||
|
||||
.text
|
||||
.align 2
|
||||
.align 4
|
||||
|
||||
.globl CNAME(ffi_closure_SYSV)
|
||||
.globl CNAME(ffi_closure_SYSV)
|
||||
#ifdef __ELF__
|
||||
.type CNAME(ffi_closure_SYSV), #function
|
||||
.hidden CNAME(ffi_closure_SYSV)
|
||||
#endif
|
||||
cfi_startproc
|
||||
CNAME(ffi_closure_SYSV):
|
||||
stp x29, x30, [sp, #-16]!
|
||||
|
||||
Reference in New Issue
Block a user