aarch64: Merge prep_args with ffi_call
Use the trick to allocate the stack frame for ffi_call_SYSV within ffi_call itself.
This commit is contained in:
committed by
Richard Henderson
parent
8c8161cb62
commit
325471ea6a
@@ -71,14 +71,6 @@ ffi_clear_cache (void *start, void *end)
|
||||
#endif
|
||||
}
|
||||
|
||||
extern void
|
||||
ffi_call_SYSV (unsigned (*)(struct call_context *context, unsigned char *,
|
||||
extended_cif *),
|
||||
struct call_context *context,
|
||||
extended_cif *,
|
||||
size_t,
|
||||
void (*fn)(void));
|
||||
|
||||
extern void
|
||||
ffi_closure_SYSV (ffi_closure *);
|
||||
|
||||
@@ -311,12 +303,11 @@ struct arg_state
|
||||
|
||||
/* Initialize a procedure call argument marshalling state. */
|
||||
static void
|
||||
arg_init (struct arg_state *state, size_t call_frame_size)
|
||||
arg_init (struct arg_state *state)
|
||||
{
|
||||
state->ngrn = 0;
|
||||
state->nsrn = 0;
|
||||
state->nsaa = 0;
|
||||
|
||||
#if defined (__APPLE__)
|
||||
state->allocating_variadic = 0;
|
||||
#endif
|
||||
@@ -529,27 +520,88 @@ allocate_int_to_reg_or_stack (struct call_context *context,
|
||||
return allocate_to_stack (state, stack, size, size);
|
||||
}
|
||||
|
||||
/* Marshall the arguments from FFI representation to procedure call
|
||||
context and stack. */
|
||||
|
||||
static unsigned
|
||||
aarch64_prep_args (struct call_context *context, unsigned char *stack,
|
||||
extended_cif *ecif)
|
||||
ffi_status
|
||||
ffi_prep_cif_machdep (ffi_cif *cif)
|
||||
{
|
||||
ffi_cif *cif = ecif->cif;
|
||||
void **avalue = ecif->avalue;
|
||||
int i, nargs = cif->nargs;
|
||||
/* Round the stack up to a multiple of the stack alignment requirement. */
|
||||
cif->bytes = ALIGN(cif->bytes, 16);
|
||||
|
||||
/* Initialize our flags. We are interested if this CIF will touch a
|
||||
vector register, if so we will enable context save and load to
|
||||
those registers, otherwise not. This is intended to be friendly
|
||||
to lazy float context switching in the kernel. */
|
||||
cif->aarch64_flags = 0;
|
||||
|
||||
if (is_v_register_candidate (cif->rtype))
|
||||
{
|
||||
cif->aarch64_flags |= AARCH64_FLAG_ARG_V;
|
||||
}
|
||||
else
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < cif->nargs; i++)
|
||||
if (is_v_register_candidate (cif->arg_types[i]))
|
||||
{
|
||||
cif->aarch64_flags |= AARCH64_FLAG_ARG_V;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
#if defined (__APPLE__)
|
||||
cif->aarch64_nfixedargs = 0;
|
||||
#endif
|
||||
|
||||
return FFI_OK;
|
||||
}
|
||||
|
||||
#if defined (__APPLE__)
|
||||
|
||||
/* Perform Apple-specific cif processing for variadic calls */
|
||||
ffi_status ffi_prep_cif_machdep_var(ffi_cif *cif,
|
||||
unsigned int nfixedargs,
|
||||
unsigned int ntotalargs)
|
||||
{
|
||||
ffi_status status;
|
||||
|
||||
status = ffi_prep_cif_machdep (cif);
|
||||
|
||||
cif->aarch64_nfixedargs = nfixedargs;
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
extern void ffi_call_SYSV (void *stack, void *frame,
|
||||
void (*fn)(void), int flags) FFI_HIDDEN;
|
||||
|
||||
/* Call a function with the provided arguments and capture the return
|
||||
value. */
|
||||
void
|
||||
ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
|
||||
{
|
||||
struct call_context *context;
|
||||
void *stack, *frame;
|
||||
struct arg_state state;
|
||||
size_t stack_bytes;
|
||||
int i, nargs = cif->nargs;
|
||||
int h, t;
|
||||
ffi_type *rtype;
|
||||
|
||||
arg_init (&state, cif->bytes);
|
||||
/* Allocate consectutive stack for everything we'll need. */
|
||||
stack_bytes = cif->bytes;
|
||||
stack = alloca (stack_bytes + 32 + sizeof(struct call_context));
|
||||
frame = stack + stack_bytes;
|
||||
context = frame + 32;
|
||||
|
||||
arg_init (&state);
|
||||
for (i = 0; i < nargs; i++)
|
||||
{
|
||||
ffi_type *ty = cif->arg_types[i];
|
||||
size_t s = ty->size;
|
||||
int h, t = ty->type;
|
||||
void *a = avalue[i];
|
||||
|
||||
t = ty->type;
|
||||
switch (t)
|
||||
{
|
||||
case FFI_TYPE_VOID:
|
||||
@@ -665,83 +717,12 @@ aarch64_prep_args (struct call_context *context, unsigned char *stack,
|
||||
#endif
|
||||
}
|
||||
|
||||
return cif->aarch64_flags;
|
||||
}
|
||||
|
||||
ffi_status
|
||||
ffi_prep_cif_machdep (ffi_cif *cif)
|
||||
{
|
||||
/* Round the stack up to a multiple of the stack alignment requirement. */
|
||||
cif->bytes = ALIGN(cif->bytes, 16);
|
||||
|
||||
/* Initialize our flags. We are interested if this CIF will touch a
|
||||
vector register, if so we will enable context save and load to
|
||||
those registers, otherwise not. This is intended to be friendly
|
||||
to lazy float context switching in the kernel. */
|
||||
cif->aarch64_flags = 0;
|
||||
|
||||
if (is_v_register_candidate (cif->rtype))
|
||||
rtype = cif->rtype;
|
||||
if (is_register_candidate (rtype))
|
||||
{
|
||||
cif->aarch64_flags |= AARCH64_FLAG_ARG_V;
|
||||
}
|
||||
else
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < cif->nargs; i++)
|
||||
if (is_v_register_candidate (cif->arg_types[i]))
|
||||
{
|
||||
cif->aarch64_flags |= AARCH64_FLAG_ARG_V;
|
||||
break;
|
||||
}
|
||||
}
|
||||
ffi_call_SYSV (stack, frame, fn, cif->aarch64_flags);
|
||||
|
||||
#if defined (__APPLE__)
|
||||
cif->aarch64_nfixedargs = 0;
|
||||
#endif
|
||||
|
||||
return FFI_OK;
|
||||
}
|
||||
|
||||
#if defined (__APPLE__)
|
||||
|
||||
/* Perform Apple-specific cif processing for variadic calls */
|
||||
ffi_status ffi_prep_cif_machdep_var(ffi_cif *cif,
|
||||
unsigned int nfixedargs,
|
||||
unsigned int ntotalargs)
|
||||
{
|
||||
ffi_status status;
|
||||
|
||||
status = ffi_prep_cif_machdep (cif);
|
||||
|
||||
cif->aarch64_nfixedargs = nfixedargs;
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/* Call a function with the provided arguments and capture the return
|
||||
value. */
|
||||
void
|
||||
ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
|
||||
{
|
||||
extended_cif ecif;
|
||||
struct call_context context;
|
||||
size_t stack_bytes;
|
||||
int h, t;
|
||||
|
||||
ecif.cif = cif;
|
||||
ecif.avalue = avalue;
|
||||
ecif.rvalue = rvalue;
|
||||
|
||||
stack_bytes = cif->bytes;
|
||||
|
||||
memset (&context, 0, sizeof (context));
|
||||
if (is_register_candidate (cif->rtype))
|
||||
{
|
||||
ffi_call_SYSV (aarch64_prep_args, &context, &ecif, stack_bytes, fn);
|
||||
|
||||
t = cif->rtype->type;
|
||||
t = rtype->type;
|
||||
switch (t)
|
||||
{
|
||||
case FFI_TYPE_INT:
|
||||
@@ -754,33 +735,35 @@ ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
|
||||
case FFI_TYPE_POINTER:
|
||||
case FFI_TYPE_UINT64:
|
||||
case FFI_TYPE_SINT64:
|
||||
*(ffi_arg *)rvalue = extend_integer_type (&context.x[0], t);
|
||||
*(ffi_arg *)rvalue = extend_integer_type (&context->x[0], t);
|
||||
break;
|
||||
|
||||
case FFI_TYPE_FLOAT:
|
||||
case FFI_TYPE_DOUBLE:
|
||||
case FFI_TYPE_LONGDOUBLE:
|
||||
compress_hfa_type (rvalue, &context.v[0], 0x100 + t);
|
||||
compress_hfa_type (rvalue, &context->v[0], 0x100 + t);
|
||||
break;
|
||||
|
||||
case FFI_TYPE_STRUCT:
|
||||
h = is_hfa (cif->rtype);
|
||||
if (h)
|
||||
compress_hfa_type (rvalue, &context.v[0], h);
|
||||
else if ((cif->rtype->size + 7) / 8 < N_X_ARG_REG)
|
||||
memcpy (rvalue, &context.x[0], cif->rtype->size);
|
||||
compress_hfa_type (rvalue, &context->v[0], h);
|
||||
else
|
||||
abort();
|
||||
{
|
||||
FFI_ASSERT (rtype->size <= 16);
|
||||
memcpy (rvalue, &context->x[0], rtype->size);
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
abort();
|
||||
FFI_ASSERT (0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
context.x8 = (uintptr_t)rvalue;
|
||||
ffi_call_SYSV (aarch64_prep_args, &context, &ecif, stack_bytes, fn);
|
||||
context->x8 = (uintptr_t)rvalue;
|
||||
ffi_call_SYSV (stack, frame, fn, cif->aarch64_flags);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -851,7 +834,7 @@ ffi_closure_SYSV_inner (ffi_closure *closure, struct call_context *context,
|
||||
struct arg_state state;
|
||||
ffi_type *rtype;
|
||||
|
||||
arg_init (&state, ALIGN(cif->bytes, 16));
|
||||
arg_init (&state);
|
||||
|
||||
for (i = 0; i < nargs; i++)
|
||||
{
|
||||
|
||||
@@ -22,6 +22,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
|
||||
#define LIBFFI_ASM
|
||||
#include <fficonfig.h>
|
||||
#include <ffi.h>
|
||||
#include <ffi_cfi.h>
|
||||
#include "internal.h"
|
||||
|
||||
#ifdef HAVE_MACHINE_ASM_H
|
||||
@@ -38,158 +39,77 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
|
||||
#define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
|
||||
#define cfi_restore(reg) .cfi_restore reg
|
||||
#define cfi_def_cfa_register(reg) .cfi_def_cfa_register reg
|
||||
|
||||
.text
|
||||
.align 2
|
||||
|
||||
.globl CNAME(ffi_call_SYSV)
|
||||
#ifdef __ELF__
|
||||
.type CNAME(ffi_call_SYSV), #function
|
||||
.hidden CNAME(ffi_call_SYSV)
|
||||
#endif
|
||||
|
||||
/* ffi_call_SYSV()
|
||||
|
||||
Create a stack frame, setup an argument context, call the callee
|
||||
and extract the result.
|
||||
|
||||
The maximum required argument stack size is provided,
|
||||
ffi_call_SYSV() allocates that stack space then calls the
|
||||
prepare_fn to populate register context and stack. The
|
||||
argument passing registers are loaded from the register
|
||||
context and the callee called, on return the register passing
|
||||
register are saved back to the context. Our caller will
|
||||
extract the return value from the final state of the saved
|
||||
register context.
|
||||
|
||||
Prototype:
|
||||
|
||||
extern unsigned
|
||||
ffi_call_SYSV (void (*)(struct call_context *context, unsigned char *,
|
||||
extended_cif *),
|
||||
struct call_context *context,
|
||||
extended_cif *,
|
||||
size_t required_stack_size,
|
||||
void (*fn)(void));
|
||||
/* ffi_call_SYSV
|
||||
extern void ffi_call_SYSV (void *stack, void *frame,
|
||||
void (*fn)(void), int flags);
|
||||
|
||||
Therefore on entry we have:
|
||||
|
||||
x0 prepare_fn
|
||||
x1 &context
|
||||
x2 &ecif
|
||||
x3 bytes
|
||||
x4 fn
|
||||
x0 stack
|
||||
x1 frame
|
||||
x2 fn
|
||||
x3 flags
|
||||
*/
|
||||
|
||||
This function uses the following stack frame layout:
|
||||
|
||||
==
|
||||
saved x30(lr)
|
||||
x29(fp)-> saved x29(fp)
|
||||
saved x24
|
||||
saved x23
|
||||
saved x22
|
||||
sp' -> saved x21
|
||||
...
|
||||
sp -> (constructed callee stack arguments)
|
||||
==
|
||||
|
||||
Voila! */
|
||||
|
||||
#define ffi_call_SYSV_FS (8 * 4)
|
||||
|
||||
.cfi_startproc
|
||||
cfi_startproc
|
||||
CNAME(ffi_call_SYSV):
|
||||
stp x29, x30, [sp, #-16]!
|
||||
cfi_adjust_cfa_offset (16)
|
||||
/* Use a stack frame allocated by our caller. */
|
||||
cfi_def_cfa(x1, 32);
|
||||
stp x29, x30, [x1]
|
||||
mov x29, x1
|
||||
mov sp, x0
|
||||
cfi_def_cfa_register(x29)
|
||||
cfi_rel_offset (x29, 0)
|
||||
cfi_rel_offset (x30, 8)
|
||||
|
||||
mov x29, sp
|
||||
cfi_def_cfa_register (x29)
|
||||
sub sp, sp, #ffi_call_SYSV_FS
|
||||
str w3, [x29, #16] /* save flags */
|
||||
mov x9, x2 /* save fn */
|
||||
|
||||
stp x21, x22, [sp, #0]
|
||||
cfi_rel_offset (x21, 0 - ffi_call_SYSV_FS)
|
||||
cfi_rel_offset (x22, 8 - ffi_call_SYSV_FS)
|
||||
|
||||
stp x23, x24, [sp, #16]
|
||||
cfi_rel_offset (x23, 16 - ffi_call_SYSV_FS)
|
||||
cfi_rel_offset (x24, 24 - ffi_call_SYSV_FS)
|
||||
|
||||
mov x21, x1
|
||||
mov x22, x2
|
||||
mov x24, x4
|
||||
|
||||
/* Allocate the stack space for the actual arguments, many
|
||||
arguments will be passed in registers, but we assume
|
||||
worst case and allocate sufficient stack for ALL of
|
||||
the arguments. */
|
||||
sub sp, sp, x3
|
||||
|
||||
/* unsigned (*prepare_fn) (struct call_context *context,
|
||||
unsigned char *stack, extended_cif *ecif);
|
||||
*/
|
||||
mov x23, x0
|
||||
mov x0, x1
|
||||
mov x1, sp
|
||||
/* x2 already in place */
|
||||
blr x23
|
||||
|
||||
/* Preserve the flags returned. */
|
||||
mov x23, x0
|
||||
|
||||
/* Figure out if we should touch the vector registers. */
|
||||
tbz x23, #AARCH64_FLAG_ARG_V_BIT, 1f
|
||||
|
||||
/* Load the vector argument passing registers. */
|
||||
ldp q0, q1, [x21, #0]
|
||||
ldp q2, q3, [x21, #32]
|
||||
ldp q4, q5, [x21, #64]
|
||||
ldp q6, q7, [x21, #96]
|
||||
/* Load the vector argument passing registers, if necessary. */
|
||||
tbz w3, #AARCH64_FLAG_ARG_V_BIT, 1f
|
||||
ldp q0, q1, [x29, #32 + 0]
|
||||
ldp q2, q3, [x29, #32 + 32]
|
||||
ldp q4, q5, [x29, #32 + 64]
|
||||
ldp q6, q7, [x29, #32 + 96]
|
||||
1:
|
||||
/* Load the core argument passing registers, including
|
||||
the structure return pointer. */
|
||||
ldp x0, x1, [x21, #16*N_V_ARG_REG + 0]
|
||||
ldp x2, x3, [x21, #16*N_V_ARG_REG + 16]
|
||||
ldp x4, x5, [x21, #16*N_V_ARG_REG + 32]
|
||||
ldp x6, x7, [x21, #16*N_V_ARG_REG + 48]
|
||||
ldr x8, [x21, #16*N_V_ARG_REG + 64]
|
||||
ldp x0, x1, [x29, #32 + 16*N_V_ARG_REG + 0]
|
||||
ldp x2, x3, [x29, #32 + 16*N_V_ARG_REG + 16]
|
||||
ldp x4, x5, [x29, #32 + 16*N_V_ARG_REG + 32]
|
||||
ldp x6, x7, [x29, #32 + 16*N_V_ARG_REG + 48]
|
||||
ldr x8, [x29, #32 + 16*N_V_ARG_REG + 64]
|
||||
|
||||
blr x24
|
||||
blr x9 /* call fn */
|
||||
|
||||
/* Save the core return registers. */
|
||||
stp x0, x1, [x21, #16*N_V_ARG_REG]
|
||||
|
||||
/* Figure out if we should touch the vector registers. */
|
||||
tbz x23, #AARCH64_FLAG_ARG_V_BIT, 1f
|
||||
|
||||
/* Save the vector return registers. */
|
||||
stp q0, q1, [x21, #0]
|
||||
stp q2, q3, [x21, #32]
|
||||
1:
|
||||
/* All done, unwind our stack frame. */
|
||||
ldp x21, x22, [x29, # - ffi_call_SYSV_FS]
|
||||
cfi_restore (x21)
|
||||
cfi_restore (x22)
|
||||
|
||||
ldp x23, x24, [x29, # - ffi_call_SYSV_FS + 16]
|
||||
cfi_restore (x23)
|
||||
cfi_restore (x24)
|
||||
ldr w3, [x29, #16] /* reload flags */
|
||||
|
||||
/* Partially deconstruct the stack frame. */
|
||||
mov sp, x29
|
||||
cfi_def_cfa_register (sp)
|
||||
ldp x29, x30, [x29]
|
||||
|
||||
ldp x29, x30, [sp], #16
|
||||
cfi_adjust_cfa_offset (-16)
|
||||
cfi_restore (x29)
|
||||
cfi_restore (x30)
|
||||
/* Save the core return registers. */
|
||||
stp x0, x1, [sp, #32 + 16*N_V_ARG_REG]
|
||||
|
||||
/* Save the vector return registers, if necessary. */
|
||||
tbz w3, #AARCH64_FLAG_ARG_V_BIT, 1f
|
||||
stp q0, q1, [sp, #32 + 0]
|
||||
stp q2, q3, [sp, #32 + 32]
|
||||
1:
|
||||
/* All done. */
|
||||
ret
|
||||
|
||||
.cfi_endproc
|
||||
cfi_endproc
|
||||
#ifdef __ELF__
|
||||
.size CNAME(ffi_call_SYSV), .-CNAME(ffi_call_SYSV)
|
||||
#endif
|
||||
@@ -237,7 +157,7 @@ CNAME(ffi_call_SYSV):
|
||||
.align 2
|
||||
|
||||
.globl CNAME(ffi_closure_SYSV)
|
||||
.cfi_startproc
|
||||
cfi_startproc
|
||||
CNAME(ffi_closure_SYSV):
|
||||
stp x29, x30, [sp, #-16]!
|
||||
cfi_adjust_cfa_offset (16)
|
||||
@@ -310,7 +230,7 @@ CNAME(ffi_closure_SYSV):
|
||||
cfi_restore (x30)
|
||||
|
||||
ret
|
||||
.cfi_endproc
|
||||
cfi_endproc
|
||||
#ifdef __ELF__
|
||||
.size CNAME(ffi_closure_SYSV), .-CNAME(ffi_closure_SYSV)
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user