aarch64: Move return value handling into ffi_closure_SYSV
As with the change to ffi_call_SYSV, this avoids copying data into a temporary buffer.
This commit is contained in:
committed by
Richard Henderson
parent
4fe1aea121
commit
12cf89ee04
@@ -71,9 +71,6 @@ ffi_clear_cache (void *start, void *end)
|
||||
#endif
|
||||
}
|
||||
|
||||
extern void
|
||||
ffi_closure_SYSV (ffi_closure *);
|
||||
|
||||
/* Test for an FFI floating point representation. */
|
||||
|
||||
static unsigned
|
||||
@@ -211,69 +208,6 @@ is_hfa(const ffi_type *ty)
|
||||
return (ele_count << 8) | candidate;
|
||||
}
|
||||
|
||||
/* Test if an ffi_type is a candidate for passing in a register.
|
||||
|
||||
This test does not check that sufficient registers of the
|
||||
appropriate class are actually available, merely that IFF
|
||||
sufficient registers are available then the argument will be passed
|
||||
in register(s).
|
||||
|
||||
Note that an ffi_type that is deemed to be a register candidate
|
||||
will always be returned in registers.
|
||||
|
||||
Returns 1 if a register candidate else 0. */
|
||||
|
||||
static int
|
||||
is_register_candidate (ffi_type *ty)
|
||||
{
|
||||
switch (ty->type)
|
||||
{
|
||||
case FFI_TYPE_VOID:
|
||||
return 0;
|
||||
case FFI_TYPE_FLOAT:
|
||||
case FFI_TYPE_DOUBLE:
|
||||
case FFI_TYPE_LONGDOUBLE:
|
||||
case FFI_TYPE_UINT8:
|
||||
case FFI_TYPE_UINT16:
|
||||
case FFI_TYPE_UINT32:
|
||||
case FFI_TYPE_UINT64:
|
||||
case FFI_TYPE_POINTER:
|
||||
case FFI_TYPE_SINT8:
|
||||
case FFI_TYPE_SINT16:
|
||||
case FFI_TYPE_SINT32:
|
||||
case FFI_TYPE_INT:
|
||||
case FFI_TYPE_SINT64:
|
||||
return 1;
|
||||
|
||||
case FFI_TYPE_STRUCT:
|
||||
if (is_hfa (ty))
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
else if (ty->size > 16)
|
||||
{
|
||||
/* Too large. Will be replaced with a pointer to memory. The
|
||||
pointer MAY be passed in a register, but the value will
|
||||
not. This test specifically fails since the argument will
|
||||
never be passed by value in registers. */
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Might be passed in registers depending on the number of
|
||||
registers required. */
|
||||
return (ty->size + 7) / 8 < N_X_ARG_REG;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
FFI_ASSERT (0);
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Test if an ffi_type argument or result is a candidate for a vector
|
||||
register. */
|
||||
|
||||
@@ -797,25 +731,10 @@ ffi_call (ffi_cif *cif, void (*fn)(void), void *orig_rvalue, void **avalue)
|
||||
memcpy (orig_rvalue, rvalue, rtype_size);
|
||||
}
|
||||
|
||||
static unsigned char trampoline [] =
|
||||
{ 0x70, 0x00, 0x00, 0x58, /* ldr x16, 1f */
|
||||
0x91, 0x00, 0x00, 0x10, /* adr x17, 2f */
|
||||
0x00, 0x02, 0x1f, 0xd6 /* br x16 */
|
||||
};
|
||||
|
||||
/* Build a trampoline. */
|
||||
|
||||
#define FFI_INIT_TRAMPOLINE(TRAMP,FUN,CTX,FLAGS) \
|
||||
({unsigned char *__tramp = (unsigned char*)(TRAMP); \
|
||||
UINT64 __fun = (UINT64)(FUN); \
|
||||
UINT64 __ctx = (UINT64)(CTX); \
|
||||
UINT64 __flags = (UINT64)(FLAGS); \
|
||||
memcpy (__tramp, trampoline, sizeof (trampoline)); \
|
||||
memcpy (__tramp + 12, &__fun, sizeof (__fun)); \
|
||||
memcpy (__tramp + 20, &__ctx, sizeof (__ctx)); \
|
||||
memcpy (__tramp + 28, &__flags, sizeof (__flags)); \
|
||||
ffi_clear_cache(__tramp, __tramp + FFI_TRAMPOLINE_SIZE); \
|
||||
})
|
||||
extern void ffi_closure_SYSV (void) FFI_HIDDEN;
|
||||
extern void ffi_closure_SYSV_V (void) FFI_HIDDEN;
|
||||
|
||||
ffi_status
|
||||
ffi_prep_closure_loc (ffi_closure *closure,
|
||||
@@ -824,15 +743,30 @@ ffi_prep_closure_loc (ffi_closure* closure,
|
||||
void *user_data,
|
||||
void *codeloc)
|
||||
{
|
||||
static const unsigned char trampoline[16] = {
|
||||
0x90, 0x00, 0x00, 0x58, /* ldr x16, tramp+16 */
|
||||
0xf1, 0xff, 0xff, 0x10, /* adr x17, tramp+0 */
|
||||
0x00, 0x02, 0x1f, 0xd6 /* br x16 */
|
||||
};
|
||||
char *tramp = closure->tramp;
|
||||
void (*start)(void);
|
||||
|
||||
if (cif->abi != FFI_SYSV)
|
||||
return FFI_BAD_ABI;
|
||||
|
||||
FFI_INIT_TRAMPOLINE (&closure->tramp[0], &ffi_closure_SYSV, codeloc,
|
||||
cif->aarch64_flags);
|
||||
|
||||
closure->cif = cif;
|
||||
closure->user_data = user_data;
|
||||
closure->fun = fun;
|
||||
closure->user_data = user_data;
|
||||
|
||||
memcpy (tramp, trampoline, sizeof(trampoline));
|
||||
|
||||
if (cif->flags & AARCH64_FLAG_ARG_V)
|
||||
start = ffi_closure_SYSV_V;
|
||||
else
|
||||
start = ffi_closure_SYSV;
|
||||
*(UINT64 *)(tramp + 16) = (uintptr_t)start;
|
||||
|
||||
ffi_clear_cache(tramp, tramp + FFI_TRAMPOLINE_SIZE);
|
||||
|
||||
return FFI_OK;
|
||||
}
|
||||
@@ -853,20 +787,20 @@ ffi_prep_closure_loc (ffi_closure* closure,
|
||||
descriptors, invokes the wrapped function, then marshalls the return
|
||||
value back into the call context. */
|
||||
|
||||
void FFI_HIDDEN
|
||||
ffi_closure_SYSV_inner (ffi_closure *closure, struct call_context *context,
|
||||
void *stack)
|
||||
int FFI_HIDDEN
|
||||
ffi_closure_SYSV_inner (ffi_cif *cif,
|
||||
void (*fun)(ffi_cif*,void*,void**,void*),
|
||||
void *user_data,
|
||||
struct call_context *context,
|
||||
void *stack, void *rvalue)
|
||||
{
|
||||
ffi_cif *cif = closure->cif;
|
||||
void **avalue = (void**) alloca (cif->nargs * sizeof (void*));
|
||||
void *rvalue = NULL;
|
||||
int i, h, nargs = cif->nargs;
|
||||
int i, h, nargs, flags;
|
||||
struct arg_state state;
|
||||
ffi_type *rtype;
|
||||
|
||||
arg_init (&state);
|
||||
|
||||
for (i = 0; i < nargs; i++)
|
||||
for (i = 0, nargs = cif->nargs; i < nargs; i++)
|
||||
{
|
||||
ffi_type *ty = cif->arg_types[i];
|
||||
int t = ty->type;
|
||||
@@ -955,69 +889,11 @@ ffi_closure_SYSV_inner (ffi_closure *closure, struct call_context *context,
|
||||
}
|
||||
}
|
||||
|
||||
/* Figure out where the return value will be passed, either in registers
|
||||
or in a memory block allocated by the caller and passed in x8. */
|
||||
rtype = cif->rtype;
|
||||
if (is_register_candidate (rtype))
|
||||
{
|
||||
size_t s = rtype->size;
|
||||
int t;
|
||||
|
||||
/* Register candidates are *always* returned in registers. */
|
||||
|
||||
/* Allocate a scratchpad for the return value, we will let the
|
||||
callee scrible the result into the scratch pad then move the
|
||||
contents into the appropriate return value location for the
|
||||
call convention. */
|
||||
rvalue = alloca (s);
|
||||
(closure->fun) (cif, rvalue, avalue, closure->user_data);
|
||||
|
||||
/* Copy the return value into the call context so that it is returned
|
||||
as expected to our caller. */
|
||||
t = rtype->type;
|
||||
switch (t)
|
||||
{
|
||||
case FFI_TYPE_VOID:
|
||||
break;
|
||||
|
||||
case FFI_TYPE_INT:
|
||||
case FFI_TYPE_UINT8:
|
||||
case FFI_TYPE_UINT16:
|
||||
case FFI_TYPE_UINT32:
|
||||
case FFI_TYPE_UINT64:
|
||||
case FFI_TYPE_SINT8:
|
||||
case FFI_TYPE_SINT16:
|
||||
case FFI_TYPE_SINT32:
|
||||
case FFI_TYPE_SINT64:
|
||||
case FFI_TYPE_POINTER:
|
||||
context->x[0] = extend_integer_type (rvalue, t);
|
||||
break;
|
||||
|
||||
case FFI_TYPE_FLOAT:
|
||||
case FFI_TYPE_DOUBLE:
|
||||
case FFI_TYPE_LONGDOUBLE:
|
||||
extend_hfa_type (&context->v[0], rvalue, 0x100 + t);
|
||||
break;
|
||||
|
||||
case FFI_TYPE_STRUCT:
|
||||
h = is_hfa (cif->rtype);
|
||||
if (h)
|
||||
extend_hfa_type (&context->v[0], rvalue, h);
|
||||
else
|
||||
{
|
||||
FFI_ASSERT (s <= 16);
|
||||
memcpy (&context->x[0], rvalue, s);
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
abort();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
flags = cif->flags;
|
||||
if (flags & AARCH64_RET_IN_MEM)
|
||||
rvalue = (void *)(uintptr_t)context->x8;
|
||||
(closure->fun) (cif, rvalue, avalue, closure->user_data);
|
||||
}
|
||||
}
|
||||
|
||||
fun (cif, rvalue, avalue, user_data);
|
||||
|
||||
return flags;
|
||||
}
|
||||
|
||||
@@ -42,7 +42,7 @@ typedef enum ffi_abi
|
||||
/* ---- Definitions for closures ----------------------------------------- */
|
||||
|
||||
#define FFI_CLOSURES 1
|
||||
#define FFI_TRAMPOLINE_SIZE 36
|
||||
#define FFI_TRAMPOLINE_SIZE 24
|
||||
#define FFI_NATIVE_RAW_API 0
|
||||
|
||||
/* ---- Internal ---- */
|
||||
|
||||
@@ -37,17 +37,17 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
|
||||
#else
|
||||
#define CNAME(x) x
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef __AARCH64EB__
|
||||
# define BE(X) X
|
||||
#else
|
||||
# define BE(X) 0
|
||||
#endif
|
||||
|
||||
.text
|
||||
.align 4
|
||||
|
||||
.globl CNAME(ffi_call_SYSV)
|
||||
#ifdef __ELF__
|
||||
.type CNAME(ffi_call_SYSV), #function
|
||||
.hidden CNAME(ffi_call_SYSV)
|
||||
#endif
|
||||
|
||||
/* ffi_call_SYSV
|
||||
extern void ffi_call_SYSV (void *stack, void *frame,
|
||||
void (*fn)(void), void *rvalue, int flags);
|
||||
@@ -179,131 +179,160 @@ CNAME(ffi_call_SYSV):
|
||||
nop
|
||||
|
||||
cfi_endproc
|
||||
|
||||
.globl CNAME(ffi_call_SYSV)
|
||||
#ifdef __ELF__
|
||||
.type CNAME(ffi_call_SYSV), #function
|
||||
.hidden CNAME(ffi_call_SYSV)
|
||||
.size CNAME(ffi_call_SYSV), .-CNAME(ffi_call_SYSV)
|
||||
#endif
|
||||
|
||||
#define ffi_closure_SYSV_FS (8 * 2 + CALL_CONTEXT_SIZE)
|
||||
|
||||
/* ffi_closure_SYSV
|
||||
|
||||
Closure invocation glue. This is the low level code invoked directly by
|
||||
the closure trampoline to setup and call a closure.
|
||||
|
||||
On entry x17 points to a struct trampoline_data, x16 has been clobbered
|
||||
On entry x17 points to a struct ffi_closure, x16 has been clobbered
|
||||
all other registers are preserved.
|
||||
|
||||
We allocate a call context and save the argument passing registers,
|
||||
then invoked the generic C ffi_closure_SYSV_inner() function to do all
|
||||
the real work, on return we load the result passing registers back from
|
||||
the call context.
|
||||
*/
|
||||
|
||||
On entry
|
||||
#define ffi_closure_SYSV_FS (8*2 + CALL_CONTEXT_SIZE + 64)
|
||||
|
||||
extern void
|
||||
ffi_closure_SYSV (struct trampoline_data *);
|
||||
|
||||
struct trampoline_data
|
||||
{
|
||||
UINT64 *ffi_closure;
|
||||
UINT64 flags;
|
||||
};
|
||||
|
||||
This function uses the following stack frame layout:
|
||||
|
||||
==
|
||||
saved x30(lr)
|
||||
x29(fp)-> saved x29(fp)
|
||||
saved x22
|
||||
saved x21
|
||||
...
|
||||
sp -> call_context
|
||||
==
|
||||
|
||||
Voila! */
|
||||
|
||||
.text
|
||||
.align 4
|
||||
CNAME(ffi_closure_SYSV_V):
|
||||
cfi_startproc
|
||||
stp x29, x30, [sp, #-ffi_closure_SYSV_FS]!
|
||||
cfi_adjust_cfa_offset (ffi_closure_SYSV_FS)
|
||||
cfi_rel_offset (x29, 0)
|
||||
cfi_rel_offset (x30, 8)
|
||||
|
||||
/* Save the argument passing vector registers. */
|
||||
stp q0, q1, [sp, #16 + 0]
|
||||
stp q2, q3, [sp, #16 + 32]
|
||||
stp q4, q5, [sp, #16 + 64]
|
||||
stp q6, q7, [sp, #16 + 96]
|
||||
b 0f
|
||||
cfi_endproc
|
||||
|
||||
.globl CNAME(ffi_closure_SYSV_V)
|
||||
#ifdef __ELF__
|
||||
.type CNAME(ffi_closure_SYSV_V), #function
|
||||
.hidden CNAME(ffi_closure_SYSV_V)
|
||||
.size CNAME(ffi_closure_SYSV_V), . - CNAME(ffi_closure_SYSV_V)
|
||||
#endif
|
||||
|
||||
.align 4
|
||||
cfi_startproc
|
||||
CNAME(ffi_closure_SYSV):
|
||||
stp x29, x30, [sp, #-ffi_closure_SYSV_FS]!
|
||||
cfi_adjust_cfa_offset (ffi_closure_SYSV_FS)
|
||||
cfi_rel_offset (x29, 0)
|
||||
cfi_rel_offset (x30, 8)
|
||||
0:
|
||||
mov x29, sp
|
||||
|
||||
/* Save the argument passing core registers. */
|
||||
stp x0, x1, [sp, #16 + 16*N_V_ARG_REG + 0]
|
||||
stp x2, x3, [sp, #16 + 16*N_V_ARG_REG + 16]
|
||||
stp x4, x5, [sp, #16 + 16*N_V_ARG_REG + 32]
|
||||
stp x6, x7, [sp, #16 + 16*N_V_ARG_REG + 48]
|
||||
str x8, [sp, #16 + 16*N_V_ARG_REG + 64]
|
||||
|
||||
/* Load ffi_closure_inner arguments. */
|
||||
ldp x0, x1, [x17, #FFI_TRAMPOLINE_SIZE] /* load cif, fn */
|
||||
ldr x2, [x17, #FFI_TRAMPOLINE_SIZE+16] /* load user_data */
|
||||
add x3, sp, #16 /* load context */
|
||||
add x4, sp, #ffi_closure_SYSV_FS /* load stack */
|
||||
add x5, sp, #16+CALL_CONTEXT_SIZE /* load rvalue */
|
||||
bl CNAME(ffi_closure_SYSV_inner)
|
||||
|
||||
/* Load the return value as directed. */
|
||||
adr x1, 0f
|
||||
and w0, w0, #AARCH64_RET_MASK
|
||||
add x1, x1, x0, lsl #3
|
||||
add x3, sp, #16+CALL_CONTEXT_SIZE
|
||||
br x1
|
||||
|
||||
/* Note that each table entry is 2 insns, and thus 8 bytes. */
|
||||
.align 4
|
||||
0: b 99f /* VOID */
|
||||
nop
|
||||
1: ldr x0, [x3] /* INT64 */
|
||||
b 99f
|
||||
2: ldp x0, x1, [x3] /* INT128 */
|
||||
b 99f
|
||||
3: brk #1000 /* UNUSED */
|
||||
nop
|
||||
4: brk #1000 /* UNUSED */
|
||||
nop
|
||||
5: brk #1000 /* UNUSED */
|
||||
nop
|
||||
6: brk #1000 /* UNUSED */
|
||||
nop
|
||||
7: brk #1000 /* UNUSED */
|
||||
nop
|
||||
8: ldr s3, [x3, #12] /* S4 */
|
||||
nop
|
||||
9: ldr s2, [x2, #8] /* S3 */
|
||||
nop
|
||||
10: ldp s0, s1, [x3] /* S2 */
|
||||
b 99f
|
||||
11: ldr s0, [x3] /* S1 */
|
||||
b 99f
|
||||
12: ldr d3, [x3, #24] /* D4 */
|
||||
nop
|
||||
13: ldr d2, [x3, #16] /* D3 */
|
||||
nop
|
||||
14: ldp d0, d1, [x3] /* D2 */
|
||||
b 99f
|
||||
15: ldr d0, [x3] /* D1 */
|
||||
b 99f
|
||||
16: ldr q3, [x3, #48] /* Q4 */
|
||||
nop
|
||||
17: ldr q2, [x3, #32] /* Q3 */
|
||||
nop
|
||||
18: ldp q0, q1, [x3] /* Q2 */
|
||||
b 99f
|
||||
19: ldr q0, [x3] /* Q1 */
|
||||
b 99f
|
||||
20: ldrb w0, [x3, #BE(7)] /* UINT8 */
|
||||
b 99f
|
||||
21: brk #1000 /* reserved */
|
||||
nop
|
||||
22: ldrh w0, [x3, #BE(6)] /* UINT16 */
|
||||
b 99f
|
||||
23: brk #1000 /* reserved */
|
||||
nop
|
||||
24: ldr w0, [x3, #BE(4)] /* UINT32 */
|
||||
b 99f
|
||||
25: brk #1000 /* reserved */
|
||||
nop
|
||||
26: ldrsb x0, [x3, #BE(7)] /* SINT8 */
|
||||
b 99f
|
||||
27: brk #1000 /* reserved */
|
||||
nop
|
||||
28: ldrsh x0, [x3, #BE(6)] /* SINT16 */
|
||||
b 99f
|
||||
29: brk #1000 /* reserved */
|
||||
nop
|
||||
30: ldrsw x0, [x3, #BE(4)] /* SINT32 */
|
||||
nop
|
||||
31: /* reserved */
|
||||
99: ldp x29, x30, [sp], #ffi_closure_SYSV_FS
|
||||
cfi_adjust_cfa_offset (-ffi_closure_SYSV_FS)
|
||||
cfi_restore (x29)
|
||||
cfi_restore (x30)
|
||||
ret
|
||||
cfi_endproc
|
||||
|
||||
.globl CNAME(ffi_closure_SYSV)
|
||||
#ifdef __ELF__
|
||||
.type CNAME(ffi_closure_SYSV), #function
|
||||
.hidden CNAME(ffi_closure_SYSV)
|
||||
#endif
|
||||
cfi_startproc
|
||||
CNAME(ffi_closure_SYSV):
|
||||
stp x29, x30, [sp, #-16]!
|
||||
cfi_adjust_cfa_offset (16)
|
||||
cfi_rel_offset (x29, 0)
|
||||
cfi_rel_offset (x30, 8)
|
||||
|
||||
mov x29, sp
|
||||
cfi_def_cfa_register (x29)
|
||||
|
||||
sub sp, sp, #ffi_closure_SYSV_FS
|
||||
|
||||
stp x21, x22, [x29, #-16]
|
||||
cfi_rel_offset (x21, -16)
|
||||
cfi_rel_offset (x22, -8)
|
||||
|
||||
/* Load x21 with &call_context. */
|
||||
mov x21, sp
|
||||
/* Preserve our struct trampoline_data * */
|
||||
mov x22, x17
|
||||
|
||||
/* Save the rest of the argument passing registers, including
|
||||
the structure return pointer. */
|
||||
stp x0, x1, [x21, #16*N_V_ARG_REG + 0]
|
||||
stp x2, x3, [x21, #16*N_V_ARG_REG + 16]
|
||||
stp x4, x5, [x21, #16*N_V_ARG_REG + 32]
|
||||
stp x6, x7, [x21, #16*N_V_ARG_REG + 48]
|
||||
str x8, [x21, #16*N_V_ARG_REG + 64]
|
||||
|
||||
/* Figure out if we should touch the vector registers. */
|
||||
ldr x0, [x22, #8]
|
||||
tbz x0, #AARCH64_FLAG_ARG_V_BIT, 1f
|
||||
|
||||
/* Save the argument passing vector registers. */
|
||||
stp q0, q1, [x21, #0]
|
||||
stp q2, q3, [x21, #32]
|
||||
stp q4, q5, [x21, #64]
|
||||
stp q6, q7, [x21, #96]
|
||||
1:
|
||||
/* Load &ffi_closure.. */
|
||||
ldr x0, [x22, #0]
|
||||
mov x1, x21
|
||||
/* Compute the location of the stack at the point that the
|
||||
trampoline was called. */
|
||||
add x2, x29, #16
|
||||
|
||||
bl CNAME(ffi_closure_SYSV_inner)
|
||||
|
||||
/* Figure out if we should touch the vector registers. */
|
||||
ldr x0, [x22, #8]
|
||||
tbz x0, #AARCH64_FLAG_ARG_V_BIT, 1f
|
||||
|
||||
/* Load the result passing vector registers. */
|
||||
ldp q0, q1, [x21, #0]
|
||||
ldp q2, q3, [x21, #32]
|
||||
1:
|
||||
/* Load the result passing core registers. */
|
||||
ldp x0, x1, [x21, #16*N_V_ARG_REG + 0]
|
||||
|
||||
/* We are done, unwind our frame. */
|
||||
ldp x21, x22, [x29, #-16]
|
||||
cfi_restore (x21)
|
||||
cfi_restore (x22)
|
||||
|
||||
mov sp, x29
|
||||
cfi_def_cfa_register (sp)
|
||||
|
||||
ldp x29, x30, [sp], #16
|
||||
cfi_adjust_cfa_offset (-16)
|
||||
cfi_restore (x29)
|
||||
cfi_restore (x30)
|
||||
|
||||
ret
|
||||
cfi_endproc
|
||||
#ifdef __ELF__
|
||||
.size CNAME(ffi_closure_SYSV), . - CNAME(ffi_closure_SYSV)
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user