aarch64: Merge prep_args with ffi_call

Use the trick to allocate the stack frame for ffi_call_SYSV
within ffi_call itself.
This commit is contained in:
Richard Henderson
2014-10-22 13:58:59 -04:00
committed by Richard Henderson
parent 8c8161cb62
commit 325471ea6a
2 changed files with 143 additions and 240 deletions

View File

@@ -71,14 +71,6 @@ ffi_clear_cache (void *start, void *end)
#endif
}
extern void
ffi_call_SYSV (unsigned (*)(struct call_context *context, unsigned char *,
extended_cif *),
struct call_context *context,
extended_cif *,
size_t,
void (*fn)(void));
extern void
ffi_closure_SYSV (ffi_closure *);
@@ -311,12 +303,11 @@ struct arg_state
/* Initialize a procedure call argument marshalling state. */
static void
arg_init (struct arg_state *state, size_t call_frame_size)
arg_init (struct arg_state *state)
{
state->ngrn = 0;
state->nsrn = 0;
state->nsaa = 0;
#if defined (__APPLE__)
state->allocating_variadic = 0;
#endif
@@ -529,27 +520,88 @@ allocate_int_to_reg_or_stack (struct call_context *context,
return allocate_to_stack (state, stack, size, size);
}
/* Marshall the arguments from FFI representation to procedure call
context and stack. */
static unsigned
aarch64_prep_args (struct call_context *context, unsigned char *stack,
extended_cif *ecif)
ffi_status
ffi_prep_cif_machdep (ffi_cif *cif)
{
ffi_cif *cif = ecif->cif;
void **avalue = ecif->avalue;
int i, nargs = cif->nargs;
/* Round the stack up to a multiple of the stack alignment requirement. */
cif->bytes = ALIGN(cif->bytes, 16);
/* Initialize our flags. We are interested if this CIF will touch a
vector register, if so we will enable context save and load to
those registers, otherwise not. This is intended to be friendly
to lazy float context switching in the kernel. */
cif->aarch64_flags = 0;
if (is_v_register_candidate (cif->rtype))
{
cif->aarch64_flags |= AARCH64_FLAG_ARG_V;
}
else
{
int i;
for (i = 0; i < cif->nargs; i++)
if (is_v_register_candidate (cif->arg_types[i]))
{
cif->aarch64_flags |= AARCH64_FLAG_ARG_V;
break;
}
}
#if defined (__APPLE__)
cif->aarch64_nfixedargs = 0;
#endif
return FFI_OK;
}
#if defined (__APPLE__)
/* Perform Apple-specific cif processing for variadic calls */
ffi_status ffi_prep_cif_machdep_var(ffi_cif *cif,
unsigned int nfixedargs,
unsigned int ntotalargs)
{
ffi_status status;
status = ffi_prep_cif_machdep (cif);
cif->aarch64_nfixedargs = nfixedargs;
return status;
}
#endif
extern void ffi_call_SYSV (void *stack, void *frame,
void (*fn)(void), int flags) FFI_HIDDEN;
/* Call a function with the provided arguments and capture the return
value. */
void
ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
{
struct call_context *context;
void *stack, *frame;
struct arg_state state;
size_t stack_bytes;
int i, nargs = cif->nargs;
int h, t;
ffi_type *rtype;
arg_init (&state, cif->bytes);
/* Allocate consectutive stack for everything we'll need. */
stack_bytes = cif->bytes;
stack = alloca (stack_bytes + 32 + sizeof(struct call_context));
frame = stack + stack_bytes;
context = frame + 32;
arg_init (&state);
for (i = 0; i < nargs; i++)
{
ffi_type *ty = cif->arg_types[i];
size_t s = ty->size;
int h, t = ty->type;
void *a = avalue[i];
t = ty->type;
switch (t)
{
case FFI_TYPE_VOID:
@@ -665,83 +717,12 @@ aarch64_prep_args (struct call_context *context, unsigned char *stack,
#endif
}
return cif->aarch64_flags;
}
ffi_status
ffi_prep_cif_machdep (ffi_cif *cif)
{
/* Round the stack up to a multiple of the stack alignment requirement. */
cif->bytes = ALIGN(cif->bytes, 16);
/* Initialize our flags. We are interested if this CIF will touch a
vector register, if so we will enable context save and load to
those registers, otherwise not. This is intended to be friendly
to lazy float context switching in the kernel. */
cif->aarch64_flags = 0;
if (is_v_register_candidate (cif->rtype))
rtype = cif->rtype;
if (is_register_candidate (rtype))
{
cif->aarch64_flags |= AARCH64_FLAG_ARG_V;
}
else
{
int i;
for (i = 0; i < cif->nargs; i++)
if (is_v_register_candidate (cif->arg_types[i]))
{
cif->aarch64_flags |= AARCH64_FLAG_ARG_V;
break;
}
}
ffi_call_SYSV (stack, frame, fn, cif->aarch64_flags);
#if defined (__APPLE__)
cif->aarch64_nfixedargs = 0;
#endif
return FFI_OK;
}
#if defined (__APPLE__)
/* Perform Apple-specific cif processing for variadic calls */
ffi_status ffi_prep_cif_machdep_var(ffi_cif *cif,
unsigned int nfixedargs,
unsigned int ntotalargs)
{
ffi_status status;
status = ffi_prep_cif_machdep (cif);
cif->aarch64_nfixedargs = nfixedargs;
return status;
}
#endif
/* Call a function with the provided arguments and capture the return
value. */
void
ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
{
extended_cif ecif;
struct call_context context;
size_t stack_bytes;
int h, t;
ecif.cif = cif;
ecif.avalue = avalue;
ecif.rvalue = rvalue;
stack_bytes = cif->bytes;
memset (&context, 0, sizeof (context));
if (is_register_candidate (cif->rtype))
{
ffi_call_SYSV (aarch64_prep_args, &context, &ecif, stack_bytes, fn);
t = cif->rtype->type;
t = rtype->type;
switch (t)
{
case FFI_TYPE_INT:
@@ -754,33 +735,35 @@ ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
case FFI_TYPE_POINTER:
case FFI_TYPE_UINT64:
case FFI_TYPE_SINT64:
*(ffi_arg *)rvalue = extend_integer_type (&context.x[0], t);
*(ffi_arg *)rvalue = extend_integer_type (&context->x[0], t);
break;
case FFI_TYPE_FLOAT:
case FFI_TYPE_DOUBLE:
case FFI_TYPE_LONGDOUBLE:
compress_hfa_type (rvalue, &context.v[0], 0x100 + t);
compress_hfa_type (rvalue, &context->v[0], 0x100 + t);
break;
case FFI_TYPE_STRUCT:
h = is_hfa (cif->rtype);
if (h)
compress_hfa_type (rvalue, &context.v[0], h);
else if ((cif->rtype->size + 7) / 8 < N_X_ARG_REG)
memcpy (rvalue, &context.x[0], cif->rtype->size);
compress_hfa_type (rvalue, &context->v[0], h);
else
abort();
{
FFI_ASSERT (rtype->size <= 16);
memcpy (rvalue, &context->x[0], rtype->size);
}
break;
default:
abort();
FFI_ASSERT (0);
break;
}
}
else
{
context.x8 = (uintptr_t)rvalue;
ffi_call_SYSV (aarch64_prep_args, &context, &ecif, stack_bytes, fn);
context->x8 = (uintptr_t)rvalue;
ffi_call_SYSV (stack, frame, fn, cif->aarch64_flags);
}
}
@@ -851,7 +834,7 @@ ffi_closure_SYSV_inner (ffi_closure *closure, struct call_context *context,
struct arg_state state;
ffi_type *rtype;
arg_init (&state, ALIGN(cif->bytes, 16));
arg_init (&state);
for (i = 0; i < nargs; i++)
{

View File

@@ -22,6 +22,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
#define LIBFFI_ASM
#include <fficonfig.h>
#include <ffi.h>
#include <ffi_cfi.h>
#include "internal.h"
#ifdef HAVE_MACHINE_ASM_H
@@ -38,158 +39,77 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
#endif
#endif
#define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
#define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
#define cfi_restore(reg) .cfi_restore reg
#define cfi_def_cfa_register(reg) .cfi_def_cfa_register reg
.text
.align 2
.text
.align 2
.globl CNAME(ffi_call_SYSV)
.globl CNAME(ffi_call_SYSV)
#ifdef __ELF__
.type CNAME(ffi_call_SYSV), #function
.type CNAME(ffi_call_SYSV), #function
.hidden CNAME(ffi_call_SYSV)
#endif
/* ffi_call_SYSV()
Create a stack frame, setup an argument context, call the callee
and extract the result.
The maximum required argument stack size is provided,
ffi_call_SYSV() allocates that stack space then calls the
prepare_fn to populate register context and stack. The
argument passing registers are loaded from the register
context and the callee called, on return the register passing
register are saved back to the context. Our caller will
extract the return value from the final state of the saved
register context.
Prototype:
extern unsigned
ffi_call_SYSV (void (*)(struct call_context *context, unsigned char *,
extended_cif *),
struct call_context *context,
extended_cif *,
size_t required_stack_size,
void (*fn)(void));
/* ffi_call_SYSV
extern void ffi_call_SYSV (void *stack, void *frame,
void (*fn)(void), int flags);
Therefore on entry we have:
x0 prepare_fn
x1 &context
x2 &ecif
x3 bytes
x4 fn
x0 stack
x1 frame
x2 fn
x3 flags
*/
This function uses the following stack frame layout:
==
saved x30(lr)
x29(fp)-> saved x29(fp)
saved x24
saved x23
saved x22
sp' -> saved x21
...
sp -> (constructed callee stack arguments)
==
Voila! */
#define ffi_call_SYSV_FS (8 * 4)
.cfi_startproc
cfi_startproc
CNAME(ffi_call_SYSV):
stp x29, x30, [sp, #-16]!
cfi_adjust_cfa_offset (16)
cfi_rel_offset (x29, 0)
cfi_rel_offset (x30, 8)
/* Use a stack frame allocated by our caller. */
cfi_def_cfa(x1, 32);
stp x29, x30, [x1]
mov x29, x1
mov sp, x0
cfi_def_cfa_register(x29)
cfi_rel_offset (x29, 0)
cfi_rel_offset (x30, 8)
mov x29, sp
cfi_def_cfa_register (x29)
sub sp, sp, #ffi_call_SYSV_FS
str w3, [x29, #16] /* save flags */
mov x9, x2 /* save fn */
stp x21, x22, [sp, #0]
cfi_rel_offset (x21, 0 - ffi_call_SYSV_FS)
cfi_rel_offset (x22, 8 - ffi_call_SYSV_FS)
stp x23, x24, [sp, #16]
cfi_rel_offset (x23, 16 - ffi_call_SYSV_FS)
cfi_rel_offset (x24, 24 - ffi_call_SYSV_FS)
mov x21, x1
mov x22, x2
mov x24, x4
/* Allocate the stack space for the actual arguments, many
arguments will be passed in registers, but we assume
worst case and allocate sufficient stack for ALL of
the arguments. */
sub sp, sp, x3
/* unsigned (*prepare_fn) (struct call_context *context,
unsigned char *stack, extended_cif *ecif);
*/
mov x23, x0
mov x0, x1
mov x1, sp
/* x2 already in place */
blr x23
/* Preserve the flags returned. */
mov x23, x0
/* Figure out if we should touch the vector registers. */
tbz x23, #AARCH64_FLAG_ARG_V_BIT, 1f
/* Load the vector argument passing registers. */
ldp q0, q1, [x21, #0]
ldp q2, q3, [x21, #32]
ldp q4, q5, [x21, #64]
ldp q6, q7, [x21, #96]
/* Load the vector argument passing registers, if necessary. */
tbz w3, #AARCH64_FLAG_ARG_V_BIT, 1f
ldp q0, q1, [x29, #32 + 0]
ldp q2, q3, [x29, #32 + 32]
ldp q4, q5, [x29, #32 + 64]
ldp q6, q7, [x29, #32 + 96]
1:
/* Load the core argument passing registers, including
/* Load the core argument passing registers, including
the structure return pointer. */
ldp x0, x1, [x21, #16*N_V_ARG_REG + 0]
ldp x2, x3, [x21, #16*N_V_ARG_REG + 16]
ldp x4, x5, [x21, #16*N_V_ARG_REG + 32]
ldp x6, x7, [x21, #16*N_V_ARG_REG + 48]
ldr x8, [x21, #16*N_V_ARG_REG + 64]
ldp x0, x1, [x29, #32 + 16*N_V_ARG_REG + 0]
ldp x2, x3, [x29, #32 + 16*N_V_ARG_REG + 16]
ldp x4, x5, [x29, #32 + 16*N_V_ARG_REG + 32]
ldp x6, x7, [x29, #32 + 16*N_V_ARG_REG + 48]
ldr x8, [x29, #32 + 16*N_V_ARG_REG + 64]
blr x24
blr x9 /* call fn */
/* Save the core return registers. */
stp x0, x1, [x21, #16*N_V_ARG_REG]
ldr w3, [x29, #16] /* reload flags */
/* Figure out if we should touch the vector registers. */
tbz x23, #AARCH64_FLAG_ARG_V_BIT, 1f
/* Save the vector return registers. */
stp q0, q1, [x21, #0]
stp q2, q3, [x21, #32]
1:
/* All done, unwind our stack frame. */
ldp x21, x22, [x29, # - ffi_call_SYSV_FS]
cfi_restore (x21)
cfi_restore (x22)
ldp x23, x24, [x29, # - ffi_call_SYSV_FS + 16]
cfi_restore (x23)
cfi_restore (x24)
mov sp, x29
/* Partially deconstruct the stack frame. */
mov sp, x29
cfi_def_cfa_register (sp)
ldp x29, x30, [x29]
ldp x29, x30, [sp], #16
cfi_adjust_cfa_offset (-16)
cfi_restore (x29)
cfi_restore (x30)
/* Save the core return registers. */
stp x0, x1, [sp, #32 + 16*N_V_ARG_REG]
ret
/* Save the vector return registers, if necessary. */
tbz w3, #AARCH64_FLAG_ARG_V_BIT, 1f
stp q0, q1, [sp, #32 + 0]
stp q2, q3, [sp, #32 + 32]
1:
/* All done. */
ret
.cfi_endproc
cfi_endproc
#ifdef __ELF__
.size CNAME(ffi_call_SYSV), .-CNAME(ffi_call_SYSV)
#endif
@@ -237,7 +157,7 @@ CNAME(ffi_call_SYSV):
.align 2
.globl CNAME(ffi_closure_SYSV)
.cfi_startproc
cfi_startproc
CNAME(ffi_closure_SYSV):
stp x29, x30, [sp, #-16]!
cfi_adjust_cfa_offset (16)
@@ -310,7 +230,7 @@ CNAME(ffi_closure_SYSV):
cfi_restore (x30)
ret
.cfi_endproc
cfi_endproc
#ifdef __ELF__
.size CNAME(ffi_closure_SYSV), .-CNAME(ffi_closure_SYSV)
#endif