aarch64: Reduce the size of register_context
We don't need to store 32 general and vector registers. Only 8 of each are used for parameter passing.
This commit is contained in:
committed by
Richard Henderson
parent
77c4cddca6
commit
95a04af134
@@ -21,8 +21,10 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <ffi.h>
|
||||
#include <ffi_common.h>
|
||||
#include "internal.h"
|
||||
|
||||
/* Force FFI_TYPE_LONGDOUBLE to be different than FFI_TYPE_DOUBLE;
|
||||
all further uses in this file will refer to the 128-bit type. */
|
||||
@@ -35,29 +37,26 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
|
||||
# define FFI_TYPE_LONGDOUBLE 4
|
||||
#endif
|
||||
|
||||
#define N_X_ARG_REG 8
|
||||
#define N_V_ARG_REG 8
|
||||
|
||||
#define AARCH64_FFI_WITH_V (1 << AARCH64_FFI_WITH_V_BIT)
|
||||
|
||||
union _d
|
||||
{
|
||||
UINT64 d;
|
||||
UINT32 s[2];
|
||||
};
|
||||
|
||||
struct _v
|
||||
{
|
||||
union _d d[2] __attribute__((aligned(16)));
|
||||
};
|
||||
|
||||
struct call_context
|
||||
{
|
||||
UINT64 x [AARCH64_N_XREG];
|
||||
struct
|
||||
{
|
||||
union _d d[2];
|
||||
} v [AARCH64_N_VREG];
|
||||
struct _v v[N_V_ARG_REG];
|
||||
UINT64 x[N_X_ARG_REG];
|
||||
UINT64 x8;
|
||||
};
|
||||
|
||||
#if defined (__clang__) && defined (__APPLE__)
|
||||
extern void
|
||||
sys_icache_invalidate (void *start, size_t len);
|
||||
extern void sys_icache_invalidate (void *start, size_t len);
|
||||
#endif
|
||||
|
||||
static inline void
|
||||
@@ -802,7 +801,7 @@ ffi_prep_cif_machdep (ffi_cif *cif)
|
||||
|
||||
if (is_v_register_candidate (cif->rtype))
|
||||
{
|
||||
cif->aarch64_flags |= AARCH64_FFI_WITH_V;
|
||||
cif->aarch64_flags |= AARCH64_FLAG_ARG_V;
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -810,7 +809,7 @@ ffi_prep_cif_machdep (ffi_cif *cif)
|
||||
for (i = 0; i < cif->nargs; i++)
|
||||
if (is_v_register_candidate (cif->arg_types[i]))
|
||||
{
|
||||
cif->aarch64_flags |= AARCH64_FFI_WITH_V;
|
||||
cif->aarch64_flags |= AARCH64_FLAG_ARG_V;
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -924,7 +923,7 @@ ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy (get_x_addr (&context, 8), &rvalue, sizeof (UINT64));
|
||||
context.x8 = (uintptr_t)rvalue;
|
||||
ffi_call_SYSV (aarch64_prep_args, &context, &ecif,
|
||||
stack_bytes, fn);
|
||||
}
|
||||
@@ -1201,7 +1200,7 @@ ffi_closure_SYSV_inner (ffi_closure *closure, struct call_context *context,
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy (&rvalue, get_x_addr (context, 8), sizeof (UINT64));
|
||||
rvalue = (void *)(uintptr_t)context->x8;
|
||||
(closure->fun) (cif, rvalue, avalue, closure->user_data);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -54,10 +54,4 @@ typedef enum ffi_abi
|
||||
#define FFI_EXTRA_CIF_FIELDS unsigned aarch64_flags
|
||||
#endif
|
||||
|
||||
#define AARCH64_FFI_WITH_V_BIT 0
|
||||
|
||||
#define AARCH64_N_XREG 32
|
||||
#define AARCH64_N_VREG 32
|
||||
#define AARCH64_CALL_CONTEXT_SIZE (AARCH64_N_XREG * 8 + AARCH64_N_VREG * 16)
|
||||
|
||||
#endif
|
||||
|
||||
26
src/aarch64/internal.h
Normal file
26
src/aarch64/internal.h
Normal file
@@ -0,0 +1,26 @@
|
||||
/*
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
``Software''), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
|
||||
|
||||
#define AARCH64_FLAG_ARG_V_BIT 0
|
||||
#define AARCH64_FLAG_ARG_V (1 << AARCH64_FLAG_ARG_V_BIT)
|
||||
|
||||
#define N_X_ARG_REG 8
|
||||
#define N_V_ARG_REG 8
|
||||
#define CALL_CONTEXT_SIZE (N_V_ARG_REG * 16 + N_X_ARG_REG * 8 + 16)
|
||||
@@ -22,6 +22,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
|
||||
#define LIBFFI_ASM
|
||||
#include <fficonfig.h>
|
||||
#include <ffi.h>
|
||||
#include "internal.h"
|
||||
|
||||
#ifdef HAVE_MACHINE_ASM_H
|
||||
#include <machine/asm.h>
|
||||
@@ -43,13 +44,12 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
|
||||
#define cfi_def_cfa_register(reg) .cfi_def_cfa_register reg
|
||||
|
||||
.text
|
||||
.align 2
|
||||
|
||||
.globl CNAME(ffi_call_SYSV)
|
||||
#ifdef __ELF__
|
||||
.type CNAME(ffi_call_SYSV), #function
|
||||
#endif
|
||||
#ifdef __APPLE__
|
||||
.align 2
|
||||
#endif
|
||||
|
||||
/* ffi_call_SYSV()
|
||||
|
||||
@@ -142,42 +142,40 @@ CNAME(ffi_call_SYSV):
|
||||
mov x23, x0
|
||||
|
||||
/* Figure out if we should touch the vector registers. */
|
||||
tbz x23, #AARCH64_FFI_WITH_V_BIT, 1f
|
||||
tbz x23, #AARCH64_FLAG_ARG_V_BIT, 1f
|
||||
|
||||
/* Load the vector argument passing registers. */
|
||||
ldp q0, q1, [x21, #8*32 + 0]
|
||||
ldp q2, q3, [x21, #8*32 + 32]
|
||||
ldp q4, q5, [x21, #8*32 + 64]
|
||||
ldp q6, q7, [x21, #8*32 + 96]
|
||||
ldp q0, q1, [x21, #0]
|
||||
ldp q2, q3, [x21, #32]
|
||||
ldp q4, q5, [x21, #64]
|
||||
ldp q6, q7, [x21, #96]
|
||||
1:
|
||||
/* Load the core argument passing registers. */
|
||||
ldp x0, x1, [x21, #0]
|
||||
ldp x2, x3, [x21, #16]
|
||||
ldp x4, x5, [x21, #32]
|
||||
ldp x6, x7, [x21, #48]
|
||||
|
||||
/* Don't forget x8 which may be holding the address of a return buffer.
|
||||
*/
|
||||
ldr x8, [x21, #8*8]
|
||||
/* Load the core argument passing registers, including
|
||||
the structure return pointer. */
|
||||
ldp x0, x1, [x21, #16*N_V_ARG_REG + 0]
|
||||
ldp x2, x3, [x21, #16*N_V_ARG_REG + 16]
|
||||
ldp x4, x5, [x21, #16*N_V_ARG_REG + 32]
|
||||
ldp x6, x7, [x21, #16*N_V_ARG_REG + 48]
|
||||
ldr x8, [x21, #16*N_V_ARG_REG + 64]
|
||||
|
||||
blr x24
|
||||
|
||||
/* Save the core argument passing registers. */
|
||||
stp x0, x1, [x21, #0]
|
||||
stp x2, x3, [x21, #16]
|
||||
stp x4, x5, [x21, #32]
|
||||
stp x6, x7, [x21, #48]
|
||||
stp x0, x1, [x21, #16*N_V_ARG_REG + 0]
|
||||
stp x2, x3, [x21, #16*N_V_ARG_REG + 16]
|
||||
stp x4, x5, [x21, #16*N_V_ARG_REG + 32]
|
||||
stp x6, x7, [x21, #16*N_V_ARG_REG + 48]
|
||||
|
||||
/* Note nothing useful ever comes back in x8! */
|
||||
|
||||
/* Figure out if we should touch the vector registers. */
|
||||
tbz x23, #AARCH64_FFI_WITH_V_BIT, 1f
|
||||
tbz x23, #AARCH64_FLAG_ARG_V_BIT, 1f
|
||||
|
||||
/* Save the vector argument passing registers. */
|
||||
stp q0, q1, [x21, #8*32 + 0]
|
||||
stp q2, q3, [x21, #8*32 + 32]
|
||||
stp q4, q5, [x21, #8*32 + 64]
|
||||
stp q6, q7, [x21, #8*32 + 96]
|
||||
stp q0, q1, [x21, #0]
|
||||
stp q2, q3, [x21, #32]
|
||||
stp q4, q5, [x21, #64]
|
||||
stp q6, q7, [x21, #96]
|
||||
1:
|
||||
/* All done, unwind our stack frame. */
|
||||
ldp x21, x22, [x29, # - ffi_call_SYSV_FS]
|
||||
@@ -203,7 +201,7 @@ CNAME(ffi_call_SYSV):
|
||||
.size CNAME(ffi_call_SYSV), .-CNAME(ffi_call_SYSV)
|
||||
#endif
|
||||
|
||||
#define ffi_closure_SYSV_FS (8 * 2 + AARCH64_CALL_CONTEXT_SIZE)
|
||||
#define ffi_closure_SYSV_FS (8 * 2 + CALL_CONTEXT_SIZE)
|
||||
|
||||
/* ffi_closure_SYSV
|
||||
|
||||
@@ -243,10 +241,9 @@ CNAME(ffi_call_SYSV):
|
||||
Voila! */
|
||||
|
||||
.text
|
||||
.globl CNAME(ffi_closure_SYSV)
|
||||
#ifdef __APPLE__
|
||||
.align 2
|
||||
#endif
|
||||
|
||||
.globl CNAME(ffi_closure_SYSV)
|
||||
.cfi_startproc
|
||||
CNAME(ffi_closure_SYSV):
|
||||
stp x29, x30, [sp, #-16]!
|
||||
@@ -268,24 +265,23 @@ CNAME(ffi_closure_SYSV):
|
||||
/* Preserve our struct trampoline_data * */
|
||||
mov x22, x17
|
||||
|
||||
/* Save the rest of the argument passing registers. */
|
||||
stp x0, x1, [x21, #0]
|
||||
stp x2, x3, [x21, #16]
|
||||
stp x4, x5, [x21, #32]
|
||||
stp x6, x7, [x21, #48]
|
||||
/* Don't forget we may have been given a result scratch pad address.
|
||||
*/
|
||||
str x8, [x21, #64]
|
||||
/* Save the rest of the argument passing registers, including
|
||||
the structure return pointer. */
|
||||
stp x0, x1, [x21, #16*N_V_ARG_REG + 0]
|
||||
stp x2, x3, [x21, #16*N_V_ARG_REG + 16]
|
||||
stp x4, x5, [x21, #16*N_V_ARG_REG + 32]
|
||||
stp x6, x7, [x21, #16*N_V_ARG_REG + 48]
|
||||
str x8, [x21, #16*N_V_ARG_REG + 64]
|
||||
|
||||
/* Figure out if we should touch the vector registers. */
|
||||
ldr x0, [x22, #8]
|
||||
tbz x0, #AARCH64_FFI_WITH_V_BIT, 1f
|
||||
tbz x0, #AARCH64_FLAG_ARG_V_BIT, 1f
|
||||
|
||||
/* Save the argument passing vector registers. */
|
||||
stp q0, q1, [x21, #8*32 + 0]
|
||||
stp q2, q3, [x21, #8*32 + 32]
|
||||
stp q4, q5, [x21, #8*32 + 64]
|
||||
stp q6, q7, [x21, #8*32 + 96]
|
||||
stp q0, q1, [x21, #0]
|
||||
stp q2, q3, [x21, #32]
|
||||
stp q4, q5, [x21, #64]
|
||||
stp q6, q7, [x21, #96]
|
||||
1:
|
||||
/* Load &ffi_closure.. */
|
||||
ldr x0, [x22, #0]
|
||||
@@ -298,19 +294,19 @@ CNAME(ffi_closure_SYSV):
|
||||
|
||||
/* Figure out if we should touch the vector registers. */
|
||||
ldr x0, [x22, #8]
|
||||
tbz x0, #AARCH64_FFI_WITH_V_BIT, 1f
|
||||
tbz x0, #AARCH64_FLAG_ARG_V_BIT, 1f
|
||||
|
||||
/* Load the result passing vector registers. */
|
||||
ldp q0, q1, [x21, #8*32 + 0]
|
||||
ldp q2, q3, [x21, #8*32 + 32]
|
||||
ldp q4, q5, [x21, #8*32 + 64]
|
||||
ldp q6, q7, [x21, #8*32 + 96]
|
||||
ldp q0, q1, [x21, #0]
|
||||
ldp q2, q3, [x21, #32]
|
||||
ldp q4, q5, [x21, #64]
|
||||
ldp q6, q7, [x21, #96]
|
||||
1:
|
||||
/* Load the result passing core registers. */
|
||||
ldp x0, x1, [x21, #0]
|
||||
ldp x2, x3, [x21, #16]
|
||||
ldp x4, x5, [x21, #32]
|
||||
ldp x6, x7, [x21, #48]
|
||||
ldp x0, x1, [x21, #16*N_V_ARG_REG + 0]
|
||||
ldp x2, x3, [x21, #16*N_V_ARG_REG + 16]
|
||||
ldp x4, x5, [x21, #16*N_V_ARG_REG + 32]
|
||||
ldp x6, x7, [x21, #16*N_V_ARG_REG + 48]
|
||||
/* Note nothing useful is returned in x8. */
|
||||
|
||||
/* We are done, unwind our frame. */
|
||||
|
||||
Reference in New Issue
Block a user