From b5fed601948237037513a9b7f967c8fc6c9ff1f6 Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Sat, 5 Apr 2014 17:33:42 -0700 Subject: [PATCH] Fix ABI on 32-bit non-Windows x86: go back to trampoline size 10 The trampoline size is part of the ABI, so it cannot change. Move the logic from the stdcall and thiscall trampolines to the functions they call, to reduce them both to 10 bytes. This drops the previously added support for raw THISCALL closures on non-Windows. (Non-raw THISCALL closures still work.) --- src/x86/ffi.c | 29 ++++++++++----------- src/x86/ffitarget.h | 12 +++++++-- src/x86/win32.S | 61 ++++++++++++++++++++++++++++++++------------- 3 files changed, 67 insertions(+), 35 deletions(-) diff --git a/src/x86/ffi.c b/src/x86/ffi.c index 79407ae2..72bed067 100644 --- a/src/x86/ffi.c +++ b/src/x86/ffi.c @@ -439,9 +439,11 @@ unsigned int FFI_HIDDEN ffi_closure_SYSV_inner (ffi_closure *, void **, void *) __attribute__ ((regparm(1))); void FFI_HIDDEN ffi_closure_raw_SYSV (ffi_raw_closure *) __attribute__ ((regparm(1))); -#ifndef X86_WIN64 +#ifdef X86_WIN32 void FFI_HIDDEN ffi_closure_raw_THISCALL (ffi_raw_closure *) __attribute__ ((regparm(1))); +#endif +#ifndef X86_WIN64 void FFI_HIDDEN ffi_closure_STDCALL (ffi_closure *) __attribute__ ((regparm(1))); void FFI_HIDDEN ffi_closure_THISCALL (ffi_closure *) @@ -605,7 +607,7 @@ ffi_prep_incoming_args_SYSV(char *stack, void **rvalue, void **avalue, *(unsigned int*) &__tramp[6] = __dis; /* jmp __fun */ \ } -#define FFI_INIT_TRAMPOLINE_THISCALL(TRAMP,FUN,CTX,SIZE) \ +#define FFI_INIT_TRAMPOLINE_RAW_THISCALL(TRAMP,FUN,CTX,SIZE) \ { unsigned char *__tramp = (unsigned char*)(TRAMP); \ unsigned int __fun = (unsigned int)(FUN); \ unsigned int __ctx = (unsigned int)(CTX); \ @@ -632,18 +634,15 @@ ffi_prep_incoming_args_SYSV(char *stack, void **rvalue, void **avalue, *(unsigned short*) &__tramp[50] = (__size + 8); /* ret (__size + 8) */ \ } -#define FFI_INIT_TRAMPOLINE_STDCALL(TRAMP,FUN,CTX,SIZE) \ +#define FFI_INIT_TRAMPOLINE_STDCALL(TRAMP,FUN,CTX) \ { unsigned char *__tramp = (unsigned char*)(TRAMP); \ unsigned int __fun = (unsigned int)(FUN); \ unsigned int __ctx = (unsigned int)(CTX); \ unsigned int __dis = __fun - (__ctx + 10); \ - unsigned short __size = (unsigned short)(SIZE); \ *(unsigned char*) &__tramp[0] = 0xb8; \ *(unsigned int*) &__tramp[1] = __ctx; /* movl __ctx, %eax */ \ *(unsigned char *) &__tramp[5] = 0xe8; \ *(unsigned int*) &__tramp[6] = __dis; /* call __fun */ \ - *(unsigned char *) &__tramp[10] = 0xc2; \ - *(unsigned short*) &__tramp[11] = __size; /* ret __size */ \ } /* the cif must already be prep'ed */ @@ -675,16 +674,15 @@ ffi_prep_closure_loc (ffi_closure* closure, } else if (cif->abi == FFI_THISCALL) { - FFI_INIT_TRAMPOLINE_THISCALL (&closure->tramp[0], - &ffi_closure_THISCALL, - (void*)codeloc, - cif->bytes); + FFI_INIT_TRAMPOLINE_STDCALL (&closure->tramp[0], + &ffi_closure_THISCALL, + (void*)codeloc); } else if (cif->abi == FFI_STDCALL) { FFI_INIT_TRAMPOLINE_STDCALL (&closure->tramp[0], &ffi_closure_STDCALL, - (void*)codeloc, cif->bytes); + (void*)codeloc); } #ifdef X86_WIN32 else if (cif->abi == FFI_MS_CDECL) @@ -721,7 +719,7 @@ ffi_prep_raw_closure_loc (ffi_raw_closure* closure, int i; if (cif->abi != FFI_SYSV -#ifndef X86_WIN64 +#ifdef X86_WIN32 && cif->abi != FFI_THISCALL #endif ) @@ -738,18 +736,17 @@ ffi_prep_raw_closure_loc (ffi_raw_closure* closure, FFI_ASSERT (cif->arg_types[i]->type != FFI_TYPE_LONGDOUBLE); } -#ifndef X86_WIN64 +#ifdef X86_WIN32 if (cif->abi == FFI_SYSV) { #endif FFI_INIT_TRAMPOLINE (&closure->tramp[0], &ffi_closure_raw_SYSV, codeloc); -#ifndef X86_WIN64 +#ifdef X86_WIN32 } else if (cif->abi == FFI_THISCALL) { - FFI_INIT_TRAMPOLINE_THISCALL (&closure->tramp[0], &ffi_closure_raw_THISCALL, - codeloc, cif->bytes); + FFI_INIT_TRAMPOLINE_RAW_THISCALL (&closure->tramp[0], &ffi_closure_raw_THISCALL, codeloc); } #endif closure->cif = cif; diff --git a/src/x86/ffitarget.h b/src/x86/ffitarget.h index d2aaf9d8..b2afe911 100644 --- a/src/x86/ffitarget.h +++ b/src/x86/ffitarget.h @@ -122,14 +122,22 @@ typedef enum ffi_abi { #if defined (X86_64) || (defined (__x86_64__) && defined (X86_DARWIN)) #define FFI_TRAMPOLINE_SIZE 24 #define FFI_NATIVE_RAW_API 0 -#elif defined(X86_WIN64) +#else +#ifdef X86_WIN32 +#define FFI_TRAMPOLINE_SIZE 52 +#else +#ifdef X86_WIN64 #define FFI_TRAMPOLINE_SIZE 29 #define FFI_NATIVE_RAW_API 0 #define FFI_NO_RAW_API 1 #else -#define FFI_TRAMPOLINE_SIZE 52 +#define FFI_TRAMPOLINE_SIZE 10 +#endif +#endif +#ifndef X86_WIN64 #define FFI_NATIVE_RAW_API 1 /* x86 has native raw api support */ #endif +#endif #endif diff --git a/src/x86/win32.S b/src/x86/win32.S index d71c8b86..0a655c47 100644 --- a/src/x86/win32.S +++ b/src/x86/win32.S @@ -33,8 +33,13 @@ #include #include +#define CIF_ABI_OFFSET 0 +#define CIF_BYTES_OFFSET 16 + #ifdef _MSC_VER +#define CLOSURE_CIF_OFFSET ((FFI_TRAMPOLINE_SIZE + 3) AND NOT 3) + .386 .MODEL FLAT, C @@ -188,12 +193,12 @@ ca_epilogue: ret ffi_call_win32 ENDP -ffi_closure_THISCALL PROC NEAR FORCEFRAME - sub esp, 40 - lea edx, [ebp -24] - mov [ebp - 12], edx /* resp */ - lea edx, [ebp + 12] /* account for stub return address on stack */ - jmp stub +ffi_closure_THISCALL PROC NEAR + ;; Insert the register argument on the stack as the first argument + xchg DWORD PTR [esp+4], ecx + xchg DWORD PTR [esp], ecx + push ecx + jmp ffi_closure_STDCALL ffi_closure_THISCALL ENDP ffi_closure_SYSV PROC NEAR FORCEFRAME @@ -464,8 +469,18 @@ cd_retlongdouble: jmp cd_epilogue cd_epilogue: - ;; Epilogue code is autogenerated. - ret + mov esp, ebp + pop ebp + pop ecx + mov ecx, DWORD PTR [ecx + (CLOSURE_CIF_OFFSET-10)] + cmp DWORD PTR [ecx + CIF_ABI_OFFSET], 3 + mov ecx, DWORD PTR [ecx + CIF_BYTES_OFFSET] + jne cd_not_thiscall + add ecx, 4 +cd_not_thiscall: + pop edx + add esp, ecx + jmp edx ffi_closure_STDCALL ENDP _TEXT ENDS @@ -473,6 +488,8 @@ END #else +#define CLOSURE_CIF_OFFSET ((FFI_TRAMPOLINE_SIZE + 3) & ~3) + #if defined(SYMBOL_UNDERSCORE) #define USCORE_SYMBOL(x) _##x #else @@ -657,13 +674,11 @@ FFI_HIDDEN(ffi_closure_THISCALL) .def _ffi_closure_THISCALL; .scl 2; .type 32; .endef #endif USCORE_SYMBOL(ffi_closure_THISCALL): - pushl %ebp - movl %esp, %ebp - subl $40, %esp - leal -24(%ebp), %edx - movl %edx, -12(%ebp) /* resp */ - leal 12(%ebp), %edx /* account for stub return address on stack */ - jmp .stub + /* Insert the register argument on the stack as the first argument */ + xchg %ecx, 4(%esp) + xchg %ecx, (%esp) + push %ecx + jmp .ffi_closure_STDCALL_internal .LFE1: # This assumes we are using gas. @@ -685,7 +700,6 @@ USCORE_SYMBOL(ffi_closure_SYSV): leal -24(%ebp), %edx movl %edx, -12(%ebp) /* resp */ leal 8(%ebp), %edx -.stub: movl %edx, 4(%esp) /* args = __builtin_dwarf_cfa () */ leal -12(%ebp), %edx movl %edx, (%esp) /* &resp */ @@ -811,6 +825,8 @@ USCORE_SYMBOL(ffi_closure_SYSV): #define RAW_CLOSURE_FUN_OFFSET (RAW_CLOSURE_CIF_OFFSET + 4) #define RAW_CLOSURE_USER_DATA_OFFSET (RAW_CLOSURE_FUN_OFFSET + 4) #define CIF_FLAGS_OFFSET 20 + +#ifdef X86_WIN32 .balign 16 FFI_HIDDEN(ffi_closure_raw_THISCALL) .globl USCORE_SYMBOL(ffi_closure_raw_THISCALL) @@ -827,6 +843,8 @@ USCORE_SYMBOL(ffi_closure_raw_THISCALL): movl %edx, 12(%esp) /* user_data */ leal 12(%ebp), %edx /* __builtin_dwarf_cfa () */ jmp .stubraw +#endif /* X86_WIN32 */ + # This assumes we are using gas. .balign 16 #if defined(X86_WIN32) @@ -958,6 +976,7 @@ FFI_HIDDEN(ffi_closure_STDCALL) .def _ffi_closure_STDCALL; .scl 2; .type 32; .endef #endif USCORE_SYMBOL(ffi_closure_STDCALL): +.ffi_closure_STDCALL_internal: .LFB5: pushl %ebp .LCFI9: @@ -1070,7 +1089,15 @@ USCORE_SYMBOL(ffi_closure_STDCALL): .Lscls_epilogue: movl %ebp, %esp popl %ebp - ret + popl %ecx + movl (CLOSURE_CIF_OFFSET-10)(%ecx), %ecx + cmpl $3, CIF_ABI_OFFSET(%ecx) /* FFI_THISCALL */ + movl CIF_BYTES_OFFSET(%ecx), %ecx + jne 1f + addl $4, %ecx +1: popl %edx + addl %ecx, %esp + jmp *%edx .ffi_closure_STDCALL_end: .LFE5: