233 lines
5.0 KiB
ArmAsm
233 lines
5.0 KiB
ArmAsm
#define LIBFFI_ASM
|
|
#include <fficonfig.h>
|
|
#include <ffi.h>
|
|
#include <ffi_cfi.h>
|
|
#include "asmnames.h"
|
|
|
|
#if defined(HAVE_AS_CFI_PSEUDO_OP)
|
|
.cfi_sections .debug_frame
|
|
#endif
|
|
|
|
#ifdef X86_WIN64
|
|
#define SEH(...) __VA_ARGS__
|
|
#define arg0 %rcx
|
|
#define arg1 %rdx
|
|
#define arg2 %r8
|
|
#define arg3 %r9
|
|
#else
|
|
#define SEH(...)
|
|
#define arg0 %rdi
|
|
#define arg1 %rsi
|
|
#define arg2 %rdx
|
|
#define arg3 %rcx
|
|
#endif
|
|
|
|
/* This macro allows the safe creation of jump tables without an
|
|
actual table. The entry points into the table are all 8 bytes.
|
|
The use of ORG asserts that we're at the correct location. */
|
|
/* ??? The clang assembler doesn't handle .org with symbolic expressions. */
|
|
#if defined(__clang__) || defined(__APPLE__) || (defined (__sun__) && defined(__svr4__))
|
|
# define E(BASE, X) .balign 8
|
|
#else
|
|
# define E(BASE, X) .balign 8; .org BASE + X * 8
|
|
#endif
|
|
|
|
.text
|
|
|
|
/* ffi_call_win64 (void *stack, struct win64_call_frame *frame, void *r10)
|
|
|
|
Bit o trickiness here -- FRAME is the base of the stack frame
|
|
for this function. This has been allocated by ffi_call. We also
|
|
deallocate some of the stack that has been alloca'd. */
|
|
|
|
.align 8
|
|
.globl C(ffi_call_win64)
|
|
|
|
SEH(.seh_proc ffi_call_win64)
|
|
C(ffi_call_win64):
|
|
cfi_startproc
|
|
/* Set up the local stack frame and install it in rbp/rsp. */
|
|
movq (%rsp), %rax
|
|
movq %rbp, (arg1)
|
|
movq %rax, 8(arg1)
|
|
movq arg1, %rbp
|
|
cfi_def_cfa(%rbp, 16)
|
|
cfi_rel_offset(%rbp, 0)
|
|
SEH(.seh_pushreg %rbp)
|
|
SEH(.seh_setframe %rbp, 0)
|
|
SEH(.seh_endprologue)
|
|
movq arg0, %rsp
|
|
|
|
movq arg2, %r10
|
|
|
|
/* Load all slots into both general and xmm registers. */
|
|
movq (%rsp), %rcx
|
|
movsd (%rsp), %xmm0
|
|
movq 8(%rsp), %rdx
|
|
movsd 8(%rsp), %xmm1
|
|
movq 16(%rsp), %r8
|
|
movsd 16(%rsp), %xmm2
|
|
movq 24(%rsp), %r9
|
|
movsd 24(%rsp), %xmm3
|
|
|
|
call *16(%rbp)
|
|
|
|
movl 24(%rbp), %ecx
|
|
movq 32(%rbp), %r8
|
|
leaq 0f(%rip), %r10
|
|
cmpl $FFI_TYPE_SMALL_STRUCT_4B, %ecx
|
|
leaq (%r10, %rcx, 8), %r10
|
|
ja 99f
|
|
jmp *%r10
|
|
|
|
/* Below, we're space constrained most of the time. Thus we eschew the
|
|
modern "mov, pop, ret" sequence (5 bytes) for "leave, ret" (2 bytes). */
|
|
.macro epilogue
|
|
leaveq
|
|
cfi_remember_state
|
|
cfi_def_cfa(%rsp, 8)
|
|
cfi_restore(%rbp)
|
|
ret
|
|
cfi_restore_state
|
|
.endm
|
|
|
|
.align 8
|
|
0:
|
|
E(0b, FFI_TYPE_VOID)
|
|
epilogue
|
|
E(0b, FFI_TYPE_INT)
|
|
movslq %eax, %rax
|
|
movq %rax, (%r8)
|
|
epilogue
|
|
E(0b, FFI_TYPE_FLOAT)
|
|
movss %xmm0, (%r8)
|
|
epilogue
|
|
E(0b, FFI_TYPE_DOUBLE)
|
|
movsd %xmm0, (%r8)
|
|
epilogue
|
|
E(0b, FFI_TYPE_LONGDOUBLE)
|
|
call PLT(C(abort))
|
|
E(0b, FFI_TYPE_UINT8)
|
|
movzbl %al, %eax
|
|
movq %rax, (%r8)
|
|
epilogue
|
|
E(0b, FFI_TYPE_SINT8)
|
|
movsbq %al, %rax
|
|
jmp 98f
|
|
E(0b, FFI_TYPE_UINT16)
|
|
movzwl %ax, %eax
|
|
movq %rax, (%r8)
|
|
epilogue
|
|
E(0b, FFI_TYPE_SINT16)
|
|
movswq %ax, %rax
|
|
jmp 98f
|
|
E(0b, FFI_TYPE_UINT32)
|
|
movl %eax, %eax
|
|
movq %rax, (%r8)
|
|
epilogue
|
|
E(0b, FFI_TYPE_SINT32)
|
|
movslq %eax, %rax
|
|
movq %rax, (%r8)
|
|
epilogue
|
|
E(0b, FFI_TYPE_UINT64)
|
|
98: movq %rax, (%r8)
|
|
epilogue
|
|
E(0b, FFI_TYPE_SINT64)
|
|
movq %rax, (%r8)
|
|
epilogue
|
|
E(0b, FFI_TYPE_STRUCT)
|
|
epilogue
|
|
E(0b, FFI_TYPE_POINTER)
|
|
movq %rax, (%r8)
|
|
epilogue
|
|
E(0b, FFI_TYPE_COMPLEX)
|
|
call PLT(C(abort))
|
|
E(0b, FFI_TYPE_SMALL_STRUCT_1B)
|
|
movb %al, (%r8)
|
|
epilogue
|
|
E(0b, FFI_TYPE_SMALL_STRUCT_2B)
|
|
movw %ax, (%r8)
|
|
epilogue
|
|
E(0b, FFI_TYPE_SMALL_STRUCT_4B)
|
|
movl %eax, (%r8)
|
|
epilogue
|
|
|
|
.align 8
|
|
99: call PLT(C(abort))
|
|
|
|
epilogue
|
|
|
|
cfi_endproc
|
|
SEH(.seh_endproc)
|
|
|
|
|
|
/* 32 bytes of outgoing register stack space, 8 bytes of alignment,
|
|
16 bytes of result, 32 bytes of xmm registers. */
|
|
#define ffi_clo_FS (32+8+16+32)
|
|
#define ffi_clo_OFF_R (32+8)
|
|
#define ffi_clo_OFF_X (32+8+16)
|
|
|
|
.align 8
|
|
.globl C(ffi_go_closure_win64)
|
|
|
|
SEH(.seh_proc ffi_go_closure_win64)
|
|
C(ffi_go_closure_win64):
|
|
cfi_startproc
|
|
/* Save all integer arguments into the incoming reg stack space. */
|
|
movq %rcx, 8(%rsp)
|
|
movq %rdx, 16(%rsp)
|
|
movq %r8, 24(%rsp)
|
|
movq %r9, 32(%rsp)
|
|
|
|
movq 8(%r10), %rcx /* load cif */
|
|
movq 16(%r10), %rdx /* load fun */
|
|
movq %r10, %r8 /* closure is user_data */
|
|
jmp 0f
|
|
cfi_endproc
|
|
SEH(.seh_endproc)
|
|
|
|
.align 8
|
|
.globl C(ffi_closure_win64)
|
|
|
|
SEH(.seh_proc ffi_closure_win64)
|
|
C(ffi_closure_win64):
|
|
cfi_startproc
|
|
/* Save all integer arguments into the incoming reg stack space. */
|
|
movq %rcx, 8(%rsp)
|
|
movq %rdx, 16(%rsp)
|
|
movq %r8, 24(%rsp)
|
|
movq %r9, 32(%rsp)
|
|
|
|
movq FFI_TRAMPOLINE_SIZE(%r10), %rcx /* load cif */
|
|
movq FFI_TRAMPOLINE_SIZE+8(%r10), %rdx /* load fun */
|
|
movq FFI_TRAMPOLINE_SIZE+16(%r10), %r8 /* load user_data */
|
|
0:
|
|
subq $ffi_clo_FS, %rsp
|
|
cfi_adjust_cfa_offset(ffi_clo_FS)
|
|
SEH(.seh_stackalloc ffi_clo_FS)
|
|
SEH(.seh_endprologue)
|
|
|
|
/* Save all sse arguments into the stack frame. */
|
|
movsd %xmm0, ffi_clo_OFF_X(%rsp)
|
|
movsd %xmm1, ffi_clo_OFF_X+8(%rsp)
|
|
movsd %xmm2, ffi_clo_OFF_X+16(%rsp)
|
|
movsd %xmm3, ffi_clo_OFF_X+24(%rsp)
|
|
|
|
leaq ffi_clo_OFF_R(%rsp), %r9
|
|
call PLT(C(ffi_closure_win64_inner))
|
|
|
|
/* Load the result into both possible result registers. */
|
|
movq ffi_clo_OFF_R(%rsp), %rax
|
|
movsd ffi_clo_OFF_R(%rsp), %xmm0
|
|
|
|
addq $ffi_clo_FS, %rsp
|
|
cfi_adjust_cfa_offset(-ffi_clo_FS)
|
|
ret
|
|
|
|
cfi_endproc
|
|
SEH(.seh_endproc)
|
|
|
|
#if defined __ELF__ && defined __linux__
|
|
.section .note.GNU-stack,"",@progbits
|
|
#endif
|