From d06751979bf0e4c9caabf0bca531d74de8cb9152 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 7 Mar 2016 12:14:22 -0500 Subject: [PATCH 1/2] x86: Copy fix for clang .org from unix64.S Clang doesn't understand .org with symbolic operands. --- src/x86/win64.S | 51 +++++++++++++++++++++++++++---------------------- 1 file changed, 28 insertions(+), 23 deletions(-) diff --git a/src/x86/win64.S b/src/x86/win64.S index 9d4f8b95..09b9854d 100644 --- a/src/x86/win64.S +++ b/src/x86/win64.S @@ -22,10 +22,15 @@ #define arg3 %rcx #endif -.macro E which - .align 8 - .org 0b + \which * 8 -.endm +/* This macro allows the safe creation of jump tables without an + actual table. The entry points into the table are all 8 bytes. + The use of ORG asserts that we're at the correct location. */ +/* ??? The clang assembler doesn't handle .org with symbolic expressions. */ +#if defined(__clang__) || defined(__APPLE__) || (defined (__sun__) && defined(__svr4__)) +# define E(BASE, X) .balign 8 +#else +# define E(BASE, X) .balign 8; .org BASE + X * 8 +#endif .text @@ -88,62 +93,62 @@ ffi_call_win64: .align 8 0: -E FFI_TYPE_VOID +E(0b, FFI_TYPE_VOID) epilogue -E FFI_TYPE_INT +E(0b, FFI_TYPE_INT) movslq %eax, %rax movq %rax, (%r8) epilogue -E FFI_TYPE_FLOAT +E(0b, FFI_TYPE_FLOAT) movss %xmm0, (%r8) epilogue -E FFI_TYPE_DOUBLE +E(0b, FFI_TYPE_DOUBLE) movsd %xmm0, (%r8) epilogue -E FFI_TYPE_LONGDOUBLE +E(0b, FFI_TYPE_LONGDOUBLE) call PLT(C(abort)) -E FFI_TYPE_UINT8 +E(0b, FFI_TYPE_UINT8) movzbl %al, %eax movq %rax, (%r8) epilogue -E FFI_TYPE_SINT8 +E(0b, FFI_TYPE_SINT8) movsbq %al, %rax jmp 98f -E FFI_TYPE_UINT16 +E(0b, FFI_TYPE_UINT16) movzwl %ax, %eax movq %rax, (%r8) epilogue -E FFI_TYPE_SINT16 +E(0b, FFI_TYPE_SINT16) movswq %ax, %rax jmp 98f -E FFI_TYPE_UINT32 +E(0b, FFI_TYPE_UINT32) movl %eax, %eax movq %rax, (%r8) epilogue -E FFI_TYPE_SINT32 +E(0b, FFI_TYPE_SINT32) movslq %eax, %rax movq %rax, (%r8) epilogue -E FFI_TYPE_UINT64 +E(0b, FFI_TYPE_UINT64) 98: movq %rax, (%r8) epilogue -E FFI_TYPE_SINT64 +E(0b, FFI_TYPE_SINT64) movq %rax, (%r8) epilogue -E FFI_TYPE_STRUCT +E(0b, FFI_TYPE_STRUCT) epilogue -E FFI_TYPE_POINTER +E(0b, FFI_TYPE_POINTER) movq %rax, (%r8) epilogue -E FFI_TYPE_COMPLEX +E(0b, FFI_TYPE_COMPLEX) call PLT(C(abort)) -E FFI_TYPE_SMALL_STRUCT_1B +E(0b, FFI_TYPE_SMALL_STRUCT_1B) movb %al, (%r8) epilogue -E FFI_TYPE_SMALL_STRUCT_2B +E(0b, FFI_TYPE_SMALL_STRUCT_2B) movw %ax, (%r8) epilogue -E FFI_TYPE_SMALL_STRUCT_4B +E(0b, FFI_TYPE_SMALL_STRUCT_4B) movl %eax, (%r8) epilogue From e5843a3a09976f9d8fa77671e9d6c188c890199d Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 15 Apr 2016 16:10:08 -0700 Subject: [PATCH 2/2] x86: Fix calling convention for ffi_closure_win64_inner Also enable testing for the cross-abi calls. --- src/x86/ffiw64.c | 6 +++++- src/x86/win64.S | 14 +++++++------- testsuite/lib/libffi.exp | 5 +++++ testsuite/libffi.call/ffitest.h | 1 + 4 files changed, 18 insertions(+), 8 deletions(-) diff --git a/src/x86/ffiw64.c b/src/x86/ffiw64.c index 0029be02..fd47c587 100644 --- a/src/x86/ffiw64.c +++ b/src/x86/ffiw64.c @@ -231,7 +231,11 @@ struct win64_closure_frame UINT64 args[]; }; -int FFI_HIDDEN +/* Force the inner function to use the MS ABI. When compiling on win64 + this is a nop. When compiling on unix, this simplifies the assembly, + and places the burden of saving the extra call-saved registers on + the compiler. */ +int FFI_HIDDEN __attribute__((ms_abi)) ffi_closure_win64_inner(ffi_cif *cif, void (*fun)(ffi_cif*, void*, void**, void*), void *user_data, diff --git a/src/x86/win64.S b/src/x86/win64.S index 09b9854d..1f82a3e5 100644 --- a/src/x86/win64.S +++ b/src/x86/win64.S @@ -179,9 +179,9 @@ ffi_go_closure_win64: movq %r8, 24(%rsp) movq %r9, 32(%rsp) - movq 8(%r10), arg0 /* load cif */ - movq 16(%r10), arg1 /* load fun */ - movq %r10, arg2 /* closure is user_data */ + movq 8(%r10), %rcx /* load cif */ + movq 16(%r10), %rdx /* load fun */ + movq %r10, %r8 /* closure is user_data */ jmp 0f cfi_endproc SEH(.seh_endproc) @@ -198,9 +198,9 @@ ffi_closure_win64: movq %r8, 24(%rsp) movq %r9, 32(%rsp) - movq FFI_TRAMPOLINE_SIZE(%r10), arg0 /* load cif */ - movq FFI_TRAMPOLINE_SIZE+8(%r10), arg1 /* load fun */ - movq FFI_TRAMPOLINE_SIZE+16(%r10), arg2 /* load user_data */ + movq FFI_TRAMPOLINE_SIZE(%r10), %rcx /* load cif */ + movq FFI_TRAMPOLINE_SIZE+8(%r10), %rdx /* load fun */ + movq FFI_TRAMPOLINE_SIZE+16(%r10), %r8 /* load user_data */ 0: subq $ffi_clo_FS, %rsp cfi_adjust_cfa_offset(ffi_clo_FS) @@ -213,7 +213,7 @@ ffi_closure_win64: movsd %xmm2, ffi_clo_OFF_X+16(%rsp) movsd %xmm3, ffi_clo_OFF_X+24(%rsp) - leaq ffi_clo_OFF_R(%rsp), arg3 + leaq ffi_clo_OFF_R(%rsp), %r9 call ffi_closure_win64_inner /* Load the result into both possible result registers. */ diff --git a/testsuite/lib/libffi.exp b/testsuite/lib/libffi.exp index 0d746272..6d19393f 100644 --- a/testsuite/lib/libffi.exp +++ b/testsuite/lib/libffi.exp @@ -315,6 +315,11 @@ proc run-many-tests { testcases extra_flags } { "-DABI_NUM=FFI_THISCALL -DABI_ATTR=__THISCALL__" "-DABI_NUM=FFI_FASTCALL -DABI_ATTR=__FASTCALL__" } + } elseif [istarget "x86_64-*-*"] { + set targetabis { + "" + "-DABI_NUM=FFI_WIN64 -DABI_ATTR=__MSABI__" + } } } diff --git a/testsuite/libffi.call/ffitest.h b/testsuite/libffi.call/ffitest.h index 15d5e441..5e194511 100644 --- a/testsuite/libffi.call/ffitest.h +++ b/testsuite/libffi.call/ffitest.h @@ -24,6 +24,7 @@ #define __STDCALL__ __attribute__((stdcall)) #define __THISCALL__ __attribute__((thiscall)) #define __FASTCALL__ __attribute__((fastcall)) +#define __MSABI__ __attribute__((ms_abi)) #else #define __UNUSED__ #define __STDCALL__ __stdcall