aarch64: Reduce the size of register_context
We don't need to store 32 general and vector registers. Only 8 of each are used for parameter passing.
This commit is contained in:
committed by
Richard Henderson
parent
77c4cddca6
commit
95a04af134
@@ -21,8 +21,10 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
|
|||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
#include <stdint.h>
|
||||||
#include <ffi.h>
|
#include <ffi.h>
|
||||||
#include <ffi_common.h>
|
#include <ffi_common.h>
|
||||||
|
#include "internal.h"
|
||||||
|
|
||||||
/* Force FFI_TYPE_LONGDOUBLE to be different than FFI_TYPE_DOUBLE;
|
/* Force FFI_TYPE_LONGDOUBLE to be different than FFI_TYPE_DOUBLE;
|
||||||
all further uses in this file will refer to the 128-bit type. */
|
all further uses in this file will refer to the 128-bit type. */
|
||||||
@@ -35,38 +37,35 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
|
|||||||
# define FFI_TYPE_LONGDOUBLE 4
|
# define FFI_TYPE_LONGDOUBLE 4
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define N_X_ARG_REG 8
|
|
||||||
#define N_V_ARG_REG 8
|
|
||||||
|
|
||||||
#define AARCH64_FFI_WITH_V (1 << AARCH64_FFI_WITH_V_BIT)
|
|
||||||
|
|
||||||
union _d
|
union _d
|
||||||
{
|
{
|
||||||
UINT64 d;
|
UINT64 d;
|
||||||
UINT32 s[2];
|
UINT32 s[2];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct _v
|
||||||
|
{
|
||||||
|
union _d d[2] __attribute__((aligned(16)));
|
||||||
|
};
|
||||||
|
|
||||||
struct call_context
|
struct call_context
|
||||||
{
|
{
|
||||||
UINT64 x [AARCH64_N_XREG];
|
struct _v v[N_V_ARG_REG];
|
||||||
struct
|
UINT64 x[N_X_ARG_REG];
|
||||||
{
|
UINT64 x8;
|
||||||
union _d d[2];
|
|
||||||
} v [AARCH64_N_VREG];
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#if defined (__clang__) && defined (__APPLE__)
|
#if defined (__clang__) && defined (__APPLE__)
|
||||||
extern void
|
extern void sys_icache_invalidate (void *start, size_t len);
|
||||||
sys_icache_invalidate (void *start, size_t len);
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static inline void
|
static inline void
|
||||||
ffi_clear_cache (void *start, void *end)
|
ffi_clear_cache (void *start, void *end)
|
||||||
{
|
{
|
||||||
#if defined (__clang__) && defined (__APPLE__)
|
#if defined (__clang__) && defined (__APPLE__)
|
||||||
sys_icache_invalidate (start, (char *)end - (char *)start);
|
sys_icache_invalidate (start, (char *)end - (char *)start);
|
||||||
#elif defined (__GNUC__)
|
#elif defined (__GNUC__)
|
||||||
__builtin___clear_cache (start, end);
|
__builtin___clear_cache (start, end);
|
||||||
#else
|
#else
|
||||||
#error "Missing builtin to flush instruction cache"
|
#error "Missing builtin to flush instruction cache"
|
||||||
#endif
|
#endif
|
||||||
@@ -802,7 +801,7 @@ ffi_prep_cif_machdep (ffi_cif *cif)
|
|||||||
|
|
||||||
if (is_v_register_candidate (cif->rtype))
|
if (is_v_register_candidate (cif->rtype))
|
||||||
{
|
{
|
||||||
cif->aarch64_flags |= AARCH64_FFI_WITH_V;
|
cif->aarch64_flags |= AARCH64_FLAG_ARG_V;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@@ -810,7 +809,7 @@ ffi_prep_cif_machdep (ffi_cif *cif)
|
|||||||
for (i = 0; i < cif->nargs; i++)
|
for (i = 0; i < cif->nargs; i++)
|
||||||
if (is_v_register_candidate (cif->arg_types[i]))
|
if (is_v_register_candidate (cif->arg_types[i]))
|
||||||
{
|
{
|
||||||
cif->aarch64_flags |= AARCH64_FFI_WITH_V;
|
cif->aarch64_flags |= AARCH64_FLAG_ARG_V;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -924,7 +923,7 @@ ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
memcpy (get_x_addr (&context, 8), &rvalue, sizeof (UINT64));
|
context.x8 = (uintptr_t)rvalue;
|
||||||
ffi_call_SYSV (aarch64_prep_args, &context, &ecif,
|
ffi_call_SYSV (aarch64_prep_args, &context, &ecif,
|
||||||
stack_bytes, fn);
|
stack_bytes, fn);
|
||||||
}
|
}
|
||||||
@@ -1201,7 +1200,7 @@ ffi_closure_SYSV_inner (ffi_closure *closure, struct call_context *context,
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
memcpy (&rvalue, get_x_addr (context, 8), sizeof (UINT64));
|
rvalue = (void *)(uintptr_t)context->x8;
|
||||||
(closure->fun) (cif, rvalue, avalue, closure->user_data);
|
(closure->fun) (cif, rvalue, avalue, closure->user_data);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -54,10 +54,4 @@ typedef enum ffi_abi
|
|||||||
#define FFI_EXTRA_CIF_FIELDS unsigned aarch64_flags
|
#define FFI_EXTRA_CIF_FIELDS unsigned aarch64_flags
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define AARCH64_FFI_WITH_V_BIT 0
|
|
||||||
|
|
||||||
#define AARCH64_N_XREG 32
|
|
||||||
#define AARCH64_N_VREG 32
|
|
||||||
#define AARCH64_CALL_CONTEXT_SIZE (AARCH64_N_XREG * 8 + AARCH64_N_VREG * 16)
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
26
src/aarch64/internal.h
Normal file
26
src/aarch64/internal.h
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
/*
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining
|
||||||
|
a copy of this software and associated documentation files (the
|
||||||
|
``Software''), to deal in the Software without restriction, including
|
||||||
|
without limitation the rights to use, copy, modify, merge, publish,
|
||||||
|
distribute, sublicense, and/or sell copies of the Software, and to
|
||||||
|
permit persons to whom the Software is furnished to do so, subject to
|
||||||
|
the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be
|
||||||
|
included in all copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||||
|
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||||
|
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||||
|
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
|
||||||
|
|
||||||
|
#define AARCH64_FLAG_ARG_V_BIT 0
|
||||||
|
#define AARCH64_FLAG_ARG_V (1 << AARCH64_FLAG_ARG_V_BIT)
|
||||||
|
|
||||||
|
#define N_X_ARG_REG 8
|
||||||
|
#define N_V_ARG_REG 8
|
||||||
|
#define CALL_CONTEXT_SIZE (N_V_ARG_REG * 16 + N_X_ARG_REG * 8 + 16)
|
||||||
@@ -22,6 +22,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
|
|||||||
#define LIBFFI_ASM
|
#define LIBFFI_ASM
|
||||||
#include <fficonfig.h>
|
#include <fficonfig.h>
|
||||||
#include <ffi.h>
|
#include <ffi.h>
|
||||||
|
#include "internal.h"
|
||||||
|
|
||||||
#ifdef HAVE_MACHINE_ASM_H
|
#ifdef HAVE_MACHINE_ASM_H
|
||||||
#include <machine/asm.h>
|
#include <machine/asm.h>
|
||||||
@@ -43,13 +44,12 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
|
|||||||
#define cfi_def_cfa_register(reg) .cfi_def_cfa_register reg
|
#define cfi_def_cfa_register(reg) .cfi_def_cfa_register reg
|
||||||
|
|
||||||
.text
|
.text
|
||||||
|
.align 2
|
||||||
|
|
||||||
.globl CNAME(ffi_call_SYSV)
|
.globl CNAME(ffi_call_SYSV)
|
||||||
#ifdef __ELF__
|
#ifdef __ELF__
|
||||||
.type CNAME(ffi_call_SYSV), #function
|
.type CNAME(ffi_call_SYSV), #function
|
||||||
#endif
|
#endif
|
||||||
#ifdef __APPLE__
|
|
||||||
.align 2
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* ffi_call_SYSV()
|
/* ffi_call_SYSV()
|
||||||
|
|
||||||
@@ -142,42 +142,40 @@ CNAME(ffi_call_SYSV):
|
|||||||
mov x23, x0
|
mov x23, x0
|
||||||
|
|
||||||
/* Figure out if we should touch the vector registers. */
|
/* Figure out if we should touch the vector registers. */
|
||||||
tbz x23, #AARCH64_FFI_WITH_V_BIT, 1f
|
tbz x23, #AARCH64_FLAG_ARG_V_BIT, 1f
|
||||||
|
|
||||||
/* Load the vector argument passing registers. */
|
/* Load the vector argument passing registers. */
|
||||||
ldp q0, q1, [x21, #8*32 + 0]
|
ldp q0, q1, [x21, #0]
|
||||||
ldp q2, q3, [x21, #8*32 + 32]
|
ldp q2, q3, [x21, #32]
|
||||||
ldp q4, q5, [x21, #8*32 + 64]
|
ldp q4, q5, [x21, #64]
|
||||||
ldp q6, q7, [x21, #8*32 + 96]
|
ldp q6, q7, [x21, #96]
|
||||||
1:
|
1:
|
||||||
/* Load the core argument passing registers. */
|
/* Load the core argument passing registers, including
|
||||||
ldp x0, x1, [x21, #0]
|
the structure return pointer. */
|
||||||
ldp x2, x3, [x21, #16]
|
ldp x0, x1, [x21, #16*N_V_ARG_REG + 0]
|
||||||
ldp x4, x5, [x21, #32]
|
ldp x2, x3, [x21, #16*N_V_ARG_REG + 16]
|
||||||
ldp x6, x7, [x21, #48]
|
ldp x4, x5, [x21, #16*N_V_ARG_REG + 32]
|
||||||
|
ldp x6, x7, [x21, #16*N_V_ARG_REG + 48]
|
||||||
/* Don't forget x8 which may be holding the address of a return buffer.
|
ldr x8, [x21, #16*N_V_ARG_REG + 64]
|
||||||
*/
|
|
||||||
ldr x8, [x21, #8*8]
|
|
||||||
|
|
||||||
blr x24
|
blr x24
|
||||||
|
|
||||||
/* Save the core argument passing registers. */
|
/* Save the core argument passing registers. */
|
||||||
stp x0, x1, [x21, #0]
|
stp x0, x1, [x21, #16*N_V_ARG_REG + 0]
|
||||||
stp x2, x3, [x21, #16]
|
stp x2, x3, [x21, #16*N_V_ARG_REG + 16]
|
||||||
stp x4, x5, [x21, #32]
|
stp x4, x5, [x21, #16*N_V_ARG_REG + 32]
|
||||||
stp x6, x7, [x21, #48]
|
stp x6, x7, [x21, #16*N_V_ARG_REG + 48]
|
||||||
|
|
||||||
/* Note nothing useful ever comes back in x8! */
|
/* Note nothing useful ever comes back in x8! */
|
||||||
|
|
||||||
/* Figure out if we should touch the vector registers. */
|
/* Figure out if we should touch the vector registers. */
|
||||||
tbz x23, #AARCH64_FFI_WITH_V_BIT, 1f
|
tbz x23, #AARCH64_FLAG_ARG_V_BIT, 1f
|
||||||
|
|
||||||
/* Save the vector argument passing registers. */
|
/* Save the vector argument passing registers. */
|
||||||
stp q0, q1, [x21, #8*32 + 0]
|
stp q0, q1, [x21, #0]
|
||||||
stp q2, q3, [x21, #8*32 + 32]
|
stp q2, q3, [x21, #32]
|
||||||
stp q4, q5, [x21, #8*32 + 64]
|
stp q4, q5, [x21, #64]
|
||||||
stp q6, q7, [x21, #8*32 + 96]
|
stp q6, q7, [x21, #96]
|
||||||
1:
|
1:
|
||||||
/* All done, unwind our stack frame. */
|
/* All done, unwind our stack frame. */
|
||||||
ldp x21, x22, [x29, # - ffi_call_SYSV_FS]
|
ldp x21, x22, [x29, # - ffi_call_SYSV_FS]
|
||||||
@@ -203,7 +201,7 @@ CNAME(ffi_call_SYSV):
|
|||||||
.size CNAME(ffi_call_SYSV), .-CNAME(ffi_call_SYSV)
|
.size CNAME(ffi_call_SYSV), .-CNAME(ffi_call_SYSV)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define ffi_closure_SYSV_FS (8 * 2 + AARCH64_CALL_CONTEXT_SIZE)
|
#define ffi_closure_SYSV_FS (8 * 2 + CALL_CONTEXT_SIZE)
|
||||||
|
|
||||||
/* ffi_closure_SYSV
|
/* ffi_closure_SYSV
|
||||||
|
|
||||||
@@ -243,10 +241,9 @@ CNAME(ffi_call_SYSV):
|
|||||||
Voila! */
|
Voila! */
|
||||||
|
|
||||||
.text
|
.text
|
||||||
.globl CNAME(ffi_closure_SYSV)
|
|
||||||
#ifdef __APPLE__
|
|
||||||
.align 2
|
.align 2
|
||||||
#endif
|
|
||||||
|
.globl CNAME(ffi_closure_SYSV)
|
||||||
.cfi_startproc
|
.cfi_startproc
|
||||||
CNAME(ffi_closure_SYSV):
|
CNAME(ffi_closure_SYSV):
|
||||||
stp x29, x30, [sp, #-16]!
|
stp x29, x30, [sp, #-16]!
|
||||||
@@ -268,24 +265,23 @@ CNAME(ffi_closure_SYSV):
|
|||||||
/* Preserve our struct trampoline_data * */
|
/* Preserve our struct trampoline_data * */
|
||||||
mov x22, x17
|
mov x22, x17
|
||||||
|
|
||||||
/* Save the rest of the argument passing registers. */
|
/* Save the rest of the argument passing registers, including
|
||||||
stp x0, x1, [x21, #0]
|
the structure return pointer. */
|
||||||
stp x2, x3, [x21, #16]
|
stp x0, x1, [x21, #16*N_V_ARG_REG + 0]
|
||||||
stp x4, x5, [x21, #32]
|
stp x2, x3, [x21, #16*N_V_ARG_REG + 16]
|
||||||
stp x6, x7, [x21, #48]
|
stp x4, x5, [x21, #16*N_V_ARG_REG + 32]
|
||||||
/* Don't forget we may have been given a result scratch pad address.
|
stp x6, x7, [x21, #16*N_V_ARG_REG + 48]
|
||||||
*/
|
str x8, [x21, #16*N_V_ARG_REG + 64]
|
||||||
str x8, [x21, #64]
|
|
||||||
|
|
||||||
/* Figure out if we should touch the vector registers. */
|
/* Figure out if we should touch the vector registers. */
|
||||||
ldr x0, [x22, #8]
|
ldr x0, [x22, #8]
|
||||||
tbz x0, #AARCH64_FFI_WITH_V_BIT, 1f
|
tbz x0, #AARCH64_FLAG_ARG_V_BIT, 1f
|
||||||
|
|
||||||
/* Save the argument passing vector registers. */
|
/* Save the argument passing vector registers. */
|
||||||
stp q0, q1, [x21, #8*32 + 0]
|
stp q0, q1, [x21, #0]
|
||||||
stp q2, q3, [x21, #8*32 + 32]
|
stp q2, q3, [x21, #32]
|
||||||
stp q4, q5, [x21, #8*32 + 64]
|
stp q4, q5, [x21, #64]
|
||||||
stp q6, q7, [x21, #8*32 + 96]
|
stp q6, q7, [x21, #96]
|
||||||
1:
|
1:
|
||||||
/* Load &ffi_closure.. */
|
/* Load &ffi_closure.. */
|
||||||
ldr x0, [x22, #0]
|
ldr x0, [x22, #0]
|
||||||
@@ -298,19 +294,19 @@ CNAME(ffi_closure_SYSV):
|
|||||||
|
|
||||||
/* Figure out if we should touch the vector registers. */
|
/* Figure out if we should touch the vector registers. */
|
||||||
ldr x0, [x22, #8]
|
ldr x0, [x22, #8]
|
||||||
tbz x0, #AARCH64_FFI_WITH_V_BIT, 1f
|
tbz x0, #AARCH64_FLAG_ARG_V_BIT, 1f
|
||||||
|
|
||||||
/* Load the result passing vector registers. */
|
/* Load the result passing vector registers. */
|
||||||
ldp q0, q1, [x21, #8*32 + 0]
|
ldp q0, q1, [x21, #0]
|
||||||
ldp q2, q3, [x21, #8*32 + 32]
|
ldp q2, q3, [x21, #32]
|
||||||
ldp q4, q5, [x21, #8*32 + 64]
|
ldp q4, q5, [x21, #64]
|
||||||
ldp q6, q7, [x21, #8*32 + 96]
|
ldp q6, q7, [x21, #96]
|
||||||
1:
|
1:
|
||||||
/* Load the result passing core registers. */
|
/* Load the result passing core registers. */
|
||||||
ldp x0, x1, [x21, #0]
|
ldp x0, x1, [x21, #16*N_V_ARG_REG + 0]
|
||||||
ldp x2, x3, [x21, #16]
|
ldp x2, x3, [x21, #16*N_V_ARG_REG + 16]
|
||||||
ldp x4, x5, [x21, #32]
|
ldp x4, x5, [x21, #16*N_V_ARG_REG + 32]
|
||||||
ldp x6, x7, [x21, #48]
|
ldp x6, x7, [x21, #16*N_V_ARG_REG + 48]
|
||||||
/* Note nothing useful is returned in x8. */
|
/* Note nothing useful is returned in x8. */
|
||||||
|
|
||||||
/* We are done, unwind our frame. */
|
/* We are done, unwind our frame. */
|
||||||
|
|||||||
Reference in New Issue
Block a user