arm: Rewrite ffi_closure

Move the push of the argument registers into ffi_closure_SYSV,
reducing the size of the trampoline.
This commit is contained in:
Richard Henderson
2014-10-17 02:07:32 -04:00
parent e7f15f60e8
commit a4b785ea69
6 changed files with 187 additions and 4833 deletions

View File

@@ -48,9 +48,9 @@ EXTRA_DIST = LICENSE ChangeLog.v1 ChangeLog.libgcj \
src/xtensa/ffitarget.h src/xtensa/ffi.c src/xtensa/sysv.S \
ChangeLog.libffi m4/libtool.m4 m4/lt~obsolete.m4 \
m4/ltoptions.m4 m4/ltsugar.m4 m4/ltversion.m4 \
m4/ltversion.m4 src/arm/gentramp.sh src/debug.c msvcc.sh \
m4/ltversion.m4 src/debug.c msvcc.sh \
generate-darwin-source-and-headers.py \
libffi.xcodeproj/project.pbxproj src/arm/trampoline.S \
libffi.xcodeproj/project.pbxproj \
libtool-ldflags ChangeLog.libffi-3.1
info_TEXINFOS = doc/libffi.texi
@@ -190,9 +190,6 @@ nodist_libffi_la_SOURCES += src/arc/arcompact.S src/arc/ffi.c
endif
if ARM
nodist_libffi_la_SOURCES += src/arm/sysv.S src/arm/ffi.c
if FFI_EXEC_TRAMPOLINE_TABLE
nodist_libffi_la_SOURCES += src/arm/trampoline.S
endif
endif
if AVR32
nodist_libffi_la_SOURCES += src/avr32/sysv.S src/avr32/ffi.c

View File

@@ -369,152 +369,82 @@ ffi_call (ffi_cif * cif, void (*fn) (void), void *rvalue, void **avalue)
memcpy (rvalue, new_rvalue, rtype->size);
}
/** private members **/
static void ffi_prep_incoming_args_SYSV (char *stack, void **ret,
void **args, ffi_cif *cif,
float *vfp_stack);
static void ffi_prep_incoming_args_VFP (char *stack, void **ret,
void **args, ffi_cif *cif,
float *vfp_stack);
void ffi_closure_SYSV (ffi_closure *);
void ffi_closure_VFP (ffi_closure *);
/* This function is jumped to by the trampoline */
unsigned int FFI_HIDDEN
ffi_closure_inner (ffi_closure *closure,
void **respp, void *args, void *vfp_args)
static void *
ffi_prep_incoming_args_SYSV (ffi_cif *cif, void *rvalue,
char *argp, void **avalue)
{
// our various things...
ffi_cif *cif;
void **arg_area;
ffi_type **arg_types = cif->arg_types;
int i, n;
cif = closure->cif;
arg_area = (void **) alloca (cif->nargs * sizeof (void *));
/* this call will initialize ARG_AREA, such that each
* element in that array points to the corresponding
* value on the stack; and if the function returns
* a structure, it will re-set RESP to point to the
* structure return address. */
if (cif->abi == FFI_VFP)
ffi_prep_incoming_args_VFP (args, respp, arg_area, cif, vfp_args);
else
ffi_prep_incoming_args_SYSV (args, respp, arg_area, cif, vfp_args);
(closure->fun) (cif, *respp, arg_area, closure->user_data);
return cif->flags;
}
/*@-exportheader@*/
static void
ffi_prep_incoming_args_SYSV (char *stack, void **rvalue,
void **avalue, ffi_cif *cif,
/* Used only under VFP hard-float ABI. */
float *vfp_stack)
/*@=exportheader@*/
if (cif->flags == ARM_TYPE_STRUCT)
{
register unsigned int i;
register void **p_argv;
register char *argp;
register ffi_type **p_arg;
argp = stack;
if (cif->flags == FFI_TYPE_STRUCT)
{
*rvalue = *(void **) argp;
rvalue = *(void **) argp;
argp += 4;
}
p_argv = avalue;
for (i = cif->nargs, p_arg = cif->arg_types; (i != 0); i--, p_arg++)
for (i = 0, n = cif->nargs; i < n; i++)
{
size_t z;
ffi_type *ty = arg_types[i];
size_t z = ty->size;
argp = ffi_align (*p_arg, argp);
z = (*p_arg)->size;
/* because we're little endian, this is what it turns into. */
*p_argv = (void *) argp;
p_argv++;
argp = ffi_align (ty, argp);
avalue[i] = (void *) argp;
argp += z;
}
return;
return rvalue;
}
/*@-exportheader@*/
static void
ffi_prep_incoming_args_VFP (char *stack, void **rvalue,
void **avalue, ffi_cif * cif,
/* Used only under VFP hard-float ABI. */
float *vfp_stack)
/*@=exportheader@*/
static void *
ffi_prep_incoming_args_VFP (ffi_cif *cif, void *rvalue, char *stack,
char *vfp_space, void **avalue)
{
register unsigned int i, vi = 0;
register void **p_argv;
register char *argp, *regp, *eo_regp;
register ffi_type **p_arg;
ffi_type **arg_types = cif->arg_types;
int i, n, vi = 0;
char *argp, *regp, *eo_regp;
char done_with_regs = 0;
char stack_used = 0;
FFI_ASSERT (cif->abi == FFI_VFP);
regp = stack;
eo_regp = argp = regp + 16;
if (cif->flags == FFI_TYPE_STRUCT)
if (cif->flags == ARM_TYPE_STRUCT)
{
*rvalue = *(void **) regp;
rvalue = *(void **) regp;
regp += 4;
}
p_argv = avalue;
for (i = cif->nargs, p_arg = cif->arg_types; (i != 0); i--, p_arg++)
for (i = 0, n = cif->nargs; i < n; i++)
{
int is_vfp_type = vfp_type_p (*p_arg);
size_t z;
ffi_type *ty = arg_types[i];
int is_vfp_type = vfp_type_p (ty);
size_t z = ty->size;
if (vi < cif->vfp_nargs && is_vfp_type)
{
*p_argv++ = (void *) (vfp_stack + cif->vfp_args[vi++]);
avalue[i] = vfp_space + cif->vfp_args[vi++] * 4;
continue;
}
else if (!done_with_regs && !is_vfp_type)
{
char *tregp = ffi_align (*p_arg, regp);
char *tregp = ffi_align (ty, regp);
z = (*p_arg)->size;
z = (z < 4) ? 4 : z; // pad
/* if the arguments either fits into the registers or uses registers
* and stack, while we haven't read other things from the stack */
/* If the arguments either fits into the registers or uses registers
and stack, while we haven't read other things from the stack */
if (tregp + z <= eo_regp || !stack_used)
{
/* because we're little endian, this is what it turns into. */
*p_argv = (void *) tregp;
p_argv++;
/* Because we're little endian, this is what it turns into. */
avalue[i] = (void *) tregp;
regp = tregp + z;
// if we read past the last core register, make sure we have not read
// from the stack before and continue reading after regp
/* If we read past the last core register, make sure we
have not read from the stack before and continue
reading after regp. */
if (regp > eo_regp)
{
if (stack_used)
{
abort (); // we should never read past the end of the register
// are if the stack is already in use
}
FFI_ASSERT (!stack_used);
argp = regp;
}
if (regp >= eo_regp)
@@ -525,26 +455,41 @@ ffi_prep_incoming_args_VFP (char *stack, void **rvalue,
continue;
}
}
stack_used = 1;
argp = ffi_align (*p_arg, argp);
z = (*p_arg)->size;
/* because we're little endian, this is what it turns into. */
*p_argv = (void *) argp;
p_argv++;
argp = ffi_align (ty, argp);
avalue[i] = (void *) argp;
argp += z;
}
return;
return rvalue;
}
/* How to make a trampoline. */
int FFI_HIDDEN
ffi_closure_inner_SYSV (ffi_closure *closure, void *rvalue, char *argp)
{
ffi_cif *cif = closure->cif;
void **avalue = (void **) alloca (cif->nargs * sizeof (void *));
extern unsigned int ffi_arm_trampoline[3];
rvalue = ffi_prep_incoming_args_SYSV (cif, rvalue, argp, avalue);
closure->fun (cif, rvalue, avalue, closure->user_data);
return cif->flags;
}
int FFI_HIDDEN
ffi_closure_inner_VFP (ffi_closure *closure, void *rvalue,
char *argp, char *vfp_space)
{
ffi_cif *cif = closure->cif;
void **avalue = (void **) alloca (cif->nargs * sizeof (void *));
rvalue = ffi_prep_incoming_args_VFP (cif, rvalue, argp, vfp_space, avalue);
closure->fun (cif, rvalue, avalue, closure->user_data);
return cif->flags;
}
void ffi_closure_SYSV (void) FFI_HIDDEN;
void ffi_closure_VFP (void) FFI_HIDDEN;
#if FFI_EXEC_TRAMPOLINE_TABLE
@@ -788,19 +733,7 @@ ffi_closure_free (void *ptr)
#else
#define FFI_INIT_TRAMPOLINE(TRAMP,FUN,CTX) \
({ unsigned char *__tramp = (unsigned char*)(TRAMP); \
unsigned int __fun = (unsigned int)(FUN); \
unsigned int __ctx = (unsigned int)(CTX); \
unsigned char *insns = (unsigned char *)(CTX); \
memcpy (__tramp, ffi_arm_trampoline, sizeof ffi_arm_trampoline); \
*(unsigned int*) &__tramp[12] = __ctx; \
*(unsigned int*) &__tramp[16] = __fun; \
__clear_cache((&__tramp[0]), (&__tramp[19])); /* Clear data mapping. */ \
__clear_cache(insns, insns + 3 * sizeof (unsigned int)); \
/* Clear instruction \
mapping. */ \
})
extern unsigned int ffi_arm_trampoline[2] FFI_HIDDEN;
#endif
@@ -812,15 +745,15 @@ ffi_prep_closure_loc (ffi_closure * closure,
void (*fun) (ffi_cif *, void *, void **, void *),
void *user_data, void *codeloc)
{
void (*closure_func) (ffi_closure *) = NULL;
void (*closure_func) (void) = ffi_closure_SYSV;
if (cif->abi == FFI_SYSV)
closure_func = &ffi_closure_SYSV;
#ifdef __ARM_EABI__
else if (cif->abi == FFI_VFP)
closure_func = &ffi_closure_VFP;
#endif
else
if (cif->abi == FFI_VFP)
{
/* We only need take the vfp path if there are vfp arguments. */
if (cif->vfp_used)
closure_func = ffi_closure_VFP;
}
else if (cif->abi != FFI_SYSV)
return FFI_BAD_ABI;
#if FFI_EXEC_TRAMPOLINE_TABLE
@@ -828,12 +761,15 @@ ffi_prep_closure_loc (ffi_closure * closure,
config[0] = closure;
config[1] = closure_func;
#else
FFI_INIT_TRAMPOLINE (&closure->tramp[0], closure_func, codeloc);
memcpy (closure->tramp, ffi_arm_trampoline, 8);
__clear_cache(closure->tramp, closure->tramp + 8); /* clear data map */
__clear_cache(codeloc, codeloc + 8); /* clear insn map */
*(void (**)(void))(closure->tramp + 8) = closure_func;
#endif
closure->cif = cif;
closure->user_data = user_data;
closure->fun = fun;
closure->user_data = user_data;
return FFI_OK;
}

View File

@@ -65,7 +65,7 @@ typedef enum ffi_abi {
/* ---- Definitions for closures ----------------------------------------- */
#define FFI_CLOSURES 1
#define FFI_TRAMPOLINE_SIZE 20
#define FFI_TRAMPOLINE_SIZE 12
#define FFI_NATIVE_RAW_API 0
#endif

View File

@@ -1,118 +0,0 @@
#!/bin/sh
# -----------------------------------------------------------------------
# gentramp.sh - Copyright (c) 2010, Plausible Labs Cooperative, Inc.
#
# ARM Trampoline Page Generator
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# ``Software''), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
# -----------------------------------------------------------------------
PROGNAME=$0
# Each trampoline is exactly 3 instructions, or 12 bytes. If any of these values change,
# the entire arm trampoline implementation must be updated to match, too.
# Size of an individual trampoline, in bytes
TRAMPOLINE_SIZE=12
# Page size, in bytes
PAGE_SIZE=4096
# Compute the size of the reachable config page; The first 16 bytes of the config page
# are unreachable due to our maximum pc-relative ldr offset.
PAGE_AVAIL=`expr $PAGE_SIZE - 16`
# Compute the number of of available trampolines.
TRAMPOLINE_COUNT=`expr $PAGE_AVAIL / $TRAMPOLINE_SIZE`
header () {
echo "# GENERATED CODE - DO NOT EDIT"
echo "# This file was generated by $PROGNAME"
echo ""
# Write out the license header
cat << EOF
# Copyright (c) 2010, Plausible Labs Cooperative, Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# ``Software''), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
# -----------------------------------------------------------------------
EOF
# Write out the trampoline table, aligned to the page boundary
echo ".text"
echo ".align 12"
echo ".globl _ffi_closure_trampoline_table_page"
echo "_ffi_closure_trampoline_table_page:"
}
# WARNING - Don't modify the trampoline code size without also updating the relevant libffi code
trampoline () {
cat << END
// trampoline
// Save to stack
stmfd sp!, {r0-r3}
// Load the context argument from the config page.
// This places the first usable config value at _ffi_closure_trampoline_table-4080
// This accounts for the above 4-byte stmfd instruction, plus 8 bytes constant when loading from pc.
ldr r0, [pc, #-4092]
// Load the jump address from the config page.
ldr pc, [pc, #-4092]
END
}
main () {
# Write out the header
header
# Write out the trampolines
local i=0
while [ $i -lt ${TRAMPOLINE_COUNT} ]; do
trampoline
local i=`expr $i + 1`
done
}
main

View File

@@ -189,139 +189,128 @@ ARM_FUNC_END(ffi_call_SYSV)
ARM_FUNC_START(ffi_closure_SYSV, 1)
UNWIND .fnstart
UNWIND .pad #16
add ip, sp, #16
stmfd sp!, {ip, lr}
UNWIND .save {r0, lr}
add r2, sp, #8
UNWIND .pad #16
sub sp, sp, #16
str sp, [sp, #8]
add r1, sp, #8
bl CNAME(ffi_closure_inner)
cmp r0, #FFI_TYPE_INT
beq .Lretint
cfi_startproc
stmdb sp!, {r0-r3} @ save argument regs
cfi_adjust_cfa_offset(16)
mov r0, ip @ load closure
add ip, sp, #16 @ compute entry sp
sub sp, sp, #32 @ allocate rvalue space
stmdb sp!, {sp,lr}
cmp r0, #FFI_TYPE_FLOAT
#if defined(__SOFTFP__) || defined(__ARM_EABI__)
beq .Lretint
#else
beq .Lretfloat
#endif
/* Remember that EABI unwind info only applies at call sites.
We need do nothing except note the save of the stack pointer
and the link registers. */
UNWIND .save {sp,lr}
cfi_adjust_cfa_offset(8)
cfi_rel_offset(lr, 4)
cmp r0, #FFI_TYPE_DOUBLE
#if defined(__SOFTFP__) || defined(__ARM_EABI__)
beq .Lretlonglong
#else
beq .Lretdouble
#endif
add r1, sp, #8 @ load respp
add r2, sp, #8+32 @ load args
mov r3, #0 @ load vfp_args
cmp r0, #FFI_TYPE_LONGDOUBLE
#if defined(__SOFTFP__) || defined(__ARM_EABI__)
beq .Lretlonglong
#else
beq .Lretlongdouble
#endif
bl CNAME(ffi_closure_inner_SYSV)
cmp r0, #FFI_TYPE_SINT64
beq .Lretlonglong
.Lclosure_epilogue:
add sp, sp, #16
ldmfd sp, {sp, pc}
.Lretint:
ldr r0, [sp]
b .Lclosure_epilogue
.Lretlonglong:
ldr r0, [sp]
ldr r1, [sp, #4]
b .Lclosure_epilogue
#if !defined(__SOFTFP__) && !defined(__ARM_EABI__)
.Lretfloat:
ldfs f0, [sp]
b .Lclosure_epilogue
.Lretdouble:
ldfd f0, [sp]
b .Lclosure_epilogue
.Lretlongdouble:
ldfd f0, [sp]
b .Lclosure_epilogue
#endif
@ Load values returned in registers.
add r2, sp, #8 @ load respp
adr r3, CNAME(ffi_closure_ret)
add pc, r3, r0, lsl #3
cfi_endproc
UNWIND .fnend
ARM_FUNC_END(ffi_closure_SYSV)
/* Below are VFP hard-float ABI call and closure implementations.
Add VFP FPU directive here. This is only compiled into the library
under EABI. */
#ifdef __ARM_EABI__
ARM_FUNC_START(ffi_closure_VFP, 1)
UNWIND .fnstart
fstmfdd sp!, {d0-d7}
@ r0-r3, then d0-d7
UNWIND .pad #80
add ip, sp, #80
stmfd sp!, {ip, lr}
UNWIND .save {r0, lr}
add r2, sp, #72
add r3, sp, #8
UNWIND .pad #72
sub sp, sp, #72
str sp, [sp, #64]
add r1, sp, #64
bl CNAME(ffi_closure_inner)
cfi_startproc
stmdb sp!, {r0-r3} @ save argument regs
cfi_adjust_cfa_offset(16)
sub sp, sp, #64+32 @ allocate vfp+rvalue space
cfi_adjust_cfa_offset(64+32)
stc p11, cr0, [sp], {16} @ vstm sp, {d0-d7}
mov r0, ip @ load closure
add ip, sp, #16+64+32 @ compute entry sp
stmdb sp!, {ip,lr}
cmp r0, #FFI_TYPE_INT
beq .Lretint_vfp
/* See above. */
UNWIND .save {sp,lr}
cfi_adjust_cfa_offset(8)
cfi_rel_offset(sp, 0)
cfi_rel_offset(lr, 4)
cmp r0, #FFI_TYPE_FLOAT
beq .Lretfloat_vfp
add r1, sp, #8+64 @ load respp
add r2, sp, #8+64+32 @ load args
add r3, sp, #8 @ load vfp_args
cmp r0, #FFI_TYPE_DOUBLE
cmpne r0, #FFI_TYPE_LONGDOUBLE
beq .Lretdouble_vfp
bl CNAME(ffi_closure_inner_VFP)
cmp r0, #FFI_TYPE_SINT64
beq .Lretlonglong_vfp
cmp r0, #FFI_TYPE_STRUCT_VFP_FLOAT
beq .Lretfloat_struct_vfp
cmp r0, #FFI_TYPE_STRUCT_VFP_DOUBLE
beq .Lretdouble_struct_vfp
.Lclosure_epilogue_vfp:
add sp, sp, #72
ldmfd sp, {sp, pc}
.Lretfloat_vfp:
flds s0, [sp]
b .Lclosure_epilogue_vfp
.Lretdouble_vfp:
fldd d0, [sp]
b .Lclosure_epilogue_vfp
.Lretint_vfp:
ldr r0, [sp]
b .Lclosure_epilogue_vfp
.Lretlonglong_vfp:
ldmia sp, {r0, r1}
b .Lclosure_epilogue_vfp
.Lretfloat_struct_vfp:
fldmiad sp, {d0-d1}
b .Lclosure_epilogue_vfp
.Lretdouble_struct_vfp:
fldmiad sp, {d0-d3}
b .Lclosure_epilogue_vfp
@ Load values returned in registers.
add r2, sp, #8+64 @ load respp
adr r3, CNAME(ffi_closure_ret)
add pc, r3, r0, lsl #3
cfi_endproc
UNWIND .fnend
ARM_FUNC_END(ffi_closure_VFP)
#endif
/* Load values returned in registers for both closure entry points.
Note that we use LDM with SP in the register set. This is deprecated
by ARM, but not yet unpredictable. */
ARM_FUNC_START(ffi_closure_ret, 0)
cfi_startproc
cfi_rel_offset(sp, 0)
cfi_rel_offset(lr, 4)
0:
E ARM_TYPE_VFP_S
ldc p10, cr0, [r2] @ vldr s0, [r2]
ldm sp, {sp,pc}
E ARM_TYPE_VFP_D
ldc p11, cr0, [r2] @ vldr d0, [r2]
ldm sp, {sp,pc}
E ARM_TYPE_VFP_N
ldc p11, cr0, [r2], {8} @ vldm r2, {d0-d3}
ldm sp, {sp,pc}
E ARM_TYPE_INT64
ldr r1, [r2, #4]
nop
E ARM_TYPE_INT
ldr r0, [r2]
ldm sp, {sp,pc}
E ARM_TYPE_VOID
ldm sp, {sp,pc}
nop
E ARM_TYPE_STRUCT
ldm sp, {sp,pc}
cfi_endproc
ARM_FUNC_END(ffi_closure_ret)
#if FFI_EXEC_TRAMPOLINE_TABLE
/* ??? The iOS support should be updated. The first insn used to
be STMFD, but that's been moved into ffi_closure_SYSV. If the
writable page is put after this one we can make use of the
pc+8 feature of the architecture. We can also reduce the size
of the thunk to 8 and pack more of these into the page.
In the meantime, simply replace the STMFD with a NOP so as to
keep all the magic numbers the same within ffi.c. */
.align 12
ARM_FUNC_START(ffi_closure_trampoline_table_page)
.rept 4096 / 12
nop
ldr ip, [pc, #-4092]
ldr pc, [pc, #-4092]
.endr
#else
ARM_FUNC_START(ffi_arm_trampoline, 1)
stmfd sp!, {r0-r3}
ldr r0, [pc]
ldr pc, [pc]
0: adr ip, 0b
ldr pc, 1f
1: .long 0
ARM_FUNC_END(ffi_arm_trampoline)
#endif /* FFI_EXEC_TRAMPOLINE_TABLE */
#if defined __ELF__ && defined __linux__
.section .note.GNU-stack,"",%progbits
#endif

File diff suppressed because it is too large Load Diff