From 098dca6b31e42dd0256ed7d966729e31961f1461 Mon Sep 17 00:00:00 2001 From: nielsAD Date: Sat, 23 Aug 2014 00:18:47 +0200 Subject: [PATCH 1/3] Support for calling functions with PASCAL and REGISTER calling conventions on x86 Windows/Linux. Also changed indentation to be more consistent throughout the (adjusted) files. --- src/x86/ffi.c | 247 ++++++------ src/x86/ffitarget.h | 4 + src/x86/win32.S | 958 +++++++++++++++++++++++--------------------- 3 files changed, 611 insertions(+), 598 deletions(-) diff --git a/src/x86/ffi.c b/src/x86/ffi.c index 64b19ecf..1dd00eb9 100644 --- a/src/x86/ffi.c +++ b/src/x86/ffi.c @@ -43,19 +43,21 @@ /* ffi_prep_args is called by the assembly routine once stack space has been allocated for the function's arguments */ -void ffi_prep_args(char *stack, extended_cif *ecif); -void ffi_prep_args(char *stack, extended_cif *ecif) +unsigned int ffi_prep_args(char *stack, extended_cif *ecif); +unsigned int ffi_prep_args(char *stack, extended_cif *ecif) { register unsigned int i; register void **p_argv; register char *argp; register ffi_type **p_arg; #ifndef X86_WIN64 - size_t p_stack_args[2]; - void *p_stack_data[2]; + void *p_stack_data[3]; char *argp2 = stack; - int stack_args_count = 0; - int cabi = ecif->cif->abi; + unsigned int stack_args_count = 0; + const int cabi = ecif->cif->abi; + const int dir = (cabi == FFI_PASCAL || cabi == FFI_REGISTER) ? -1 : +1; +#else + #define dir 1 #endif argp = stack; @@ -63,42 +65,46 @@ void ffi_prep_args(char *stack, extended_cif *ecif) if ((ecif->cif->flags == FFI_TYPE_STRUCT || ecif->cif->flags == FFI_TYPE_MS_STRUCT) #ifdef X86_WIN64 - && (ecif->cif->rtype->size != 1 && ecif->cif->rtype->size != 2 - && ecif->cif->rtype->size != 4 && ecif->cif->rtype->size != 8) + && ((ecif->cif->rtype->size & (1 | 2 | 4 | 8)) == 0) #endif ) { *(void **) argp = ecif->rvalue; #ifndef X86_WIN64 - /* For fastcall/thiscall this is first register-passed + /* For fastcall/thiscall/register this is first register-passed argument. */ - if (cabi == FFI_THISCALL || cabi == FFI_FASTCALL) - { - p_stack_args[stack_args_count] = sizeof (void*); - p_stack_data[stack_args_count] = argp; - ++stack_args_count; - } + if (cabi == FFI_THISCALL || cabi == FFI_FASTCALL || cabi == FFI_REGISTER) + { + p_stack_data[stack_args_count] = argp; + ++stack_args_count; + } #endif argp += sizeof(void*); } + p_arg = ecif->cif->arg_types; p_argv = ecif->avalue; - - for (i = ecif->cif->nargs, p_arg = ecif->cif->arg_types; - i != 0; - i--, p_arg++) + if (dir < 0) { - size_t z; + const unsigned int nargs = ecif->cif->nargs - 1; + if (nargs > 0) + { + p_arg += nargs; + p_argv += nargs; + } + } + for (i = ecif->cif->nargs; i != 0; i--) + { /* Align if necessary */ if ((sizeof(void*) - 1) & (size_t) argp) argp = (char *) ALIGN(argp, sizeof(void*)); - - z = (*p_arg)->size; + + size_t z = (*p_arg)->size; #ifdef X86_WIN64 if (z > sizeof(ffi_arg) || ((*p_arg)->type == FFI_TYPE_STRUCT - && (z != 1 && z != 2 && z != 4 && z != 8)) + && (z & (1 | 2 | 4 | 8)) == 0) #if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE || ((*p_arg)->type == FFI_TYPE_LONGDOUBLE) #endif @@ -156,23 +162,33 @@ void ffi_prep_args(char *stack, extended_cif *ecif) } #ifndef X86_WIN64 - /* For thiscall/fastcall convention register-passed arguments + /* For thiscall/fastcall/register convention register-passed arguments are the first two none-floating-point arguments with a size smaller or equal to sizeof (void*). */ - if ((cabi == FFI_THISCALL && stack_args_count < 1) - || (cabi == FFI_FASTCALL && stack_args_count < 2)) + if ((z == sizeof(ffi_arg)) + && ((cabi == FFI_REGISTER) + || (cabi == FFI_THISCALL && stack_args_count < 1) + || (cabi == FFI_FASTCALL && stack_args_count < 2)) + && ((*p_arg)->type != FFI_TYPE_FLOAT && (*p_arg)->type != FFI_TYPE_STRUCT) + ) { - if (z <= 4 - && ((*p_arg)->type != FFI_TYPE_FLOAT - && (*p_arg)->type != FFI_TYPE_STRUCT)) - { - p_stack_args[stack_args_count] = z; - p_stack_data[stack_args_count] = argp; - ++stack_args_count; - } + if (dir < 0 && stack_args_count > 2) + { + /* Iterating arguments backwards, so first register-passed argument + will be passed last. Shift temporary values to make place. */ + p_stack_data[0] = p_stack_data[1]; + p_stack_data[1] = p_stack_data[2]; + stack_args_count = 2; + } + + p_stack_data[stack_args_count] = argp; + ++stack_args_count; } #endif - p_argv++; + + p_arg += dir; + p_argv += dir; + #ifdef X86_WIN64 argp += (z + sizeof(void*) - 1) & ~(sizeof(void*) - 1); #else @@ -181,44 +197,35 @@ void ffi_prep_args(char *stack, extended_cif *ecif) } #ifndef X86_WIN64 - /* We need to move the register-passed arguments for thiscall/fastcall - on top of stack, so that those can be moved to registers ecx/edx by - call-handler. */ + /* We need to move the register-passed arguments for thiscall/fastcall/register + on top of stack, so that those can be moved to registers by call-handler. */ if (stack_args_count > 0) { - size_t zz = (p_stack_args[0] + 3) & ~3; - char *h; + int i; + if (dir < 0 && stack_args_count > 1) + { + /* Reverse order if iterating arguments backwards */ + ffi_arg tmp = *(ffi_arg*) p_stack_data[0]; + *(ffi_arg*) p_stack_data[0] = *(ffi_arg*) p_stack_data[stack_args_count - 1]; + *(ffi_arg*) p_stack_data[stack_args_count - 1] = tmp; + } + + for (i = 0; i < stack_args_count; i++) + { + if (p_stack_data[i] != argp2) + { + ffi_arg tmp = *(ffi_arg*) p_stack_data[i]; + memmove (argp2 + sizeof(ffi_arg), argp2, (size_t) ((char*) p_stack_data[i] - (char*)argp2)); + *(ffi_arg *) argp2 = tmp; + } - /* Move first argument to top-stack position. */ - if (p_stack_data[0] != argp2) - { - h = alloca (zz + 1); - memcpy (h, p_stack_data[0], zz); - memmove (argp2 + zz, argp2, - (size_t) ((char *) p_stack_data[0] - (char*)argp2)); - memcpy (argp2, h, zz); - } - - argp2 += zz; - --stack_args_count; - if (zz > 4) - stack_args_count = 0; - - /* If we have a second argument, then move it on top - after the first one. */ - if (stack_args_count > 0 && p_stack_data[1] != argp2) - { - zz = p_stack_args[1]; - zz = (zz + 3) & ~3; - h = alloca (zz + 1); - h = alloca (zz + 1); - memcpy (h, p_stack_data[1], zz); - memmove (argp2 + zz, argp2, (size_t) ((char*) p_stack_data[1] - (char*)argp2)); - memcpy (argp2, h, zz); - } + argp2 += sizeof(ffi_arg); + } } + + return stack_args_count; #endif - return; + return 0; } /* Perform machine dependent cif processing */ @@ -387,37 +394,12 @@ void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue) case FFI_MS_CDECL: #endif case FFI_STDCALL: - ffi_call_win32(ffi_prep_args, &ecif, cif->abi, cif->bytes, cif->flags, - ecif.rvalue, fn); - break; case FFI_THISCALL: case FFI_FASTCALL: - { - unsigned int abi = cif->abi; - unsigned int i, passed_regs = 0; - - if (cif->flags == FFI_TYPE_STRUCT) - ++passed_regs; - - for (i=0; i < cif->nargs && passed_regs < 2;i++) - { - size_t sz; - - if (cif->arg_types[i]->type == FFI_TYPE_FLOAT - || cif->arg_types[i]->type == FFI_TYPE_STRUCT) - continue; - sz = (cif->arg_types[i]->size + 3) & ~3; - if (sz == 0 || sz > 4) - continue; - ++passed_regs; - } - if (passed_regs < 2 && abi == FFI_FASTCALL) - abi = FFI_THISCALL; - if (passed_regs < 1 && abi == FFI_THISCALL) - abi = FFI_STDCALL; - ffi_call_win32(ffi_prep_args, &ecif, abi, cif->bytes, cif->flags, - ecif.rvalue, fn); - } + case FFI_PASCAL: + case FFI_REGISTER: + ffi_call_win32(ffi_prep_args, &ecif, cif->abi, cif->bytes, cif->flags, + ecif.rvalue, fn); break; #endif default: @@ -677,14 +659,14 @@ ffi_prep_closure_loc (ffi_closure* closure, else if (cif->abi == FFI_FASTCALL) { FFI_INIT_TRAMPOLINE_STDCALL (&closure->tramp[0], - &ffi_closure_FASTCALL, - (void*)codeloc); + &ffi_closure_FASTCALL, + (void*)codeloc); } else if (cif->abi == FFI_THISCALL) { FFI_INIT_TRAMPOLINE_STDCALL (&closure->tramp[0], - &ffi_closure_THISCALL, - (void*)codeloc); + &ffi_closure_THISCALL, + (void*)codeloc); } else if (cif->abi == FFI_STDCALL) { @@ -764,10 +746,36 @@ ffi_prep_raw_closure_loc (ffi_raw_closure* closure, return FFI_OK; } -static void +static unsigned int ffi_prep_args_raw(char *stack, extended_cif *ecif) { - memcpy (stack, ecif->avalue, ecif->cif->bytes); + const ffi_cif *cif = ecif->cif; + const unsigned int abi = cif->abi; + const unsigned int max = (abi == FFI_THISCALL) ? 1 + : (abi == FFI_FASTCALL) ? 2 + : (abi == FFI_REGISTER) ? 3 + : 0; + + unsigned int i, passed_regs = 0; + + if (cif->flags == FFI_TYPE_STRUCT) + ++passed_regs; + + for (i = 0; i < cif->nargs && passed_regs <= max; i++) + { + if (cif->arg_types[i]->type == FFI_TYPE_FLOAT + || cif->arg_types[i]->type == FFI_TYPE_STRUCT) + continue; + + size_t sz = (cif->arg_types[i]->size + 3) & ~3; + if (sz == 0 || sz > 4) + continue; + + ++passed_regs; + } + + memcpy (stack, ecif->avalue, cif->bytes); + return passed_regs; } /* we borrow this routine from libffi (it must be changed, though, to @@ -810,37 +818,12 @@ ffi_raw_call(ffi_cif *cif, void (*fn)(void), void *rvalue, ffi_raw *fake_avalue) #endif #ifndef X86_WIN64 case FFI_STDCALL: - ffi_call_win32(ffi_prep_args_raw, &ecif, cif->abi, cif->bytes, cif->flags, - ecif.rvalue, fn); - break; case FFI_THISCALL: case FFI_FASTCALL: - { - unsigned int abi = cif->abi; - unsigned int i, passed_regs = 0; - - if (cif->flags == FFI_TYPE_STRUCT) - ++passed_regs; - - for (i=0; i < cif->nargs && passed_regs < 2;i++) - { - size_t sz; - - if (cif->arg_types[i]->type == FFI_TYPE_FLOAT - || cif->arg_types[i]->type == FFI_TYPE_STRUCT) - continue; - sz = (cif->arg_types[i]->size + 3) & ~3; - if (sz == 0 || sz > 4) - continue; - ++passed_regs; - } - if (passed_regs < 2 && abi == FFI_FASTCALL) - cif->abi = abi = FFI_THISCALL; - if (passed_regs < 1 && abi == FFI_THISCALL) - cif->abi = abi = FFI_STDCALL; - ffi_call_win32(ffi_prep_args_raw, &ecif, abi, cif->bytes, cif->flags, - ecif.rvalue, fn); - } + case FFI_PASCAL: + case FFI_REGISTER: + ffi_call_win32(ffi_prep_args_raw, &ecif, cif->abi, cif->bytes, cif->flags, + ecif.rvalue, fn); break; #endif default: diff --git a/src/x86/ffitarget.h b/src/x86/ffitarget.h index b2afe911..c7161ca4 100644 --- a/src/x86/ffitarget.h +++ b/src/x86/ffitarget.h @@ -82,6 +82,8 @@ typedef enum ffi_abi { FFI_THISCALL, FFI_FASTCALL, FFI_MS_CDECL, + FFI_PASCAL, + FFI_REGISTER, FFI_LAST_ABI, #ifdef _MSC_VER FFI_DEFAULT_ABI = FFI_MS_CDECL @@ -101,6 +103,8 @@ typedef enum ffi_abi { FFI_THISCALL, FFI_FASTCALL, FFI_STDCALL, + FFI_PASCAL, + FFI_REGISTER, FFI_LAST_ABI, #if defined(__i386__) || defined(__i386) FFI_DEFAULT_ABI = FFI_SYSV diff --git a/src/x86/win32.S b/src/x86/win32.S index e42baf26..1104ead8 100644 --- a/src/x86/win32.S +++ b/src/x86/win32.S @@ -1,10 +1,10 @@ /* ----------------------------------------------------------------------- win32.S - Copyright (c) 2014 Anthony Green - Copyright (c) 1996, 1998, 2001, 2002, 2009 Red Hat, Inc. - Copyright (c) 2001 John Beniton - Copyright (c) 2002 Ranjit Mathew - Copyright (c) 2009 Daniel Witte - + Copyright (c) 1996, 1998, 2001, 2002, 2009 Red Hat, Inc. + Copyright (c) 2001 John Beniton + Copyright (c) 2002 Ranjit Mathew + Copyright (c) 2009 Daniel Witte + X86 Foreign Function Interface @@ -63,32 +63,44 @@ ffi_call_win32 PROC NEAR, mov eax, esp - ;; Place all of the ffi_prep_args in position + ;; Call ffi_prep_args push ecif push eax call ffi_prep_args - - ;; Return stack to previous state and call the function add esp, 8 - ;; Handle thiscall and fastcall - cmp cif_abi, 3 ;; FFI_THISCALL - jz do_thiscall - cmp cif_abi, 4 ;; FFI_FASTCALL - jnz do_stdcall - mov ecx, DWORD PTR [esp] - mov edx, DWORD PTR [esp+4] - add esp, 8 - jmp do_stdcall -do_thiscall: - mov ecx, DWORD PTR [esp] - add esp, 4 -do_stdcall: - call fn + ;; Prepare registers + ;; EAX stores the number of register arguments + cmp eax, 0 + je fun + cmp eax, 3 + jl prepr_two_cmp + + mov ecx, esp + add esp, 12 + mov eax, DWORD PTR [ecx+8] + jmp prepr_two +prepr_two_cmp: + cmp eax, 2 + jl prepr_one_prep + mov ecx, esp + add esp, 8 +prepr_two: + mov edx, DWORD PTR [ecx+4] + jmp prepr_one +prepr_one_prep: + mov ecx, esp + add esp, 4 +prepr_one: + mov ecx, DWORD PTR [ecx] + cmp cif_abi, 7 ;; FFI_REGISTER + jne fun - ;; cdecl: we restore esp in the epilogue, so there's no need to - ;; remove the space we pushed for the args. - ;; stdcall: the callee has already cleaned the stack. + xchg ecx, eax + +fun: + ;; Call function + call fn ;; Load ecx with the return type code mov ecx, cif_flags @@ -195,20 +207,20 @@ ca_epilogue: ffi_call_win32 ENDP ffi_closure_THISCALL PROC NEAR - ;; Insert the register argument on the stack as the first argument - xchg DWORD PTR [esp+4], ecx - xchg DWORD PTR [esp], ecx - push ecx - jmp ffi_closure_STDCALL + ;; Insert the register argument on the stack as the first argument + xchg DWORD PTR [esp+4], ecx + xchg DWORD PTR [esp], ecx + push ecx + jmp ffi_closure_STDCALL ffi_closure_THISCALL ENDP ffi_closure_FASTCALL PROC NEAR - ;; Insert the register argument on the stack as the first argument - xchg DWORD PTR [esp+4], edx - xchg DWORD PTR [esp], ecx - push edx - push ecx - jmp ffi_closure_STDCALL + ;; Insert the register argument on the stack as the first argument + xchg DWORD PTR [esp+4], edx + xchg DWORD PTR [esp], ecx + push edx + push ecx + jmp ffi_closure_STDCALL ffi_closure_FASTCALL ENDP ffi_closure_SYSV PROC NEAR FORCEFRAME @@ -311,12 +323,12 @@ ffi_closure_SYSV ENDP #define CIF_FLAGS_OFFSET 20 ffi_closure_raw_THISCALL PROC NEAR USES esi FORCEFRAME - sub esp, 36 - mov esi, [eax + RAW_CLOSURE_CIF_OFFSET] ;; closure->cif - mov edx, [eax + RAW_CLOSURE_USER_DATA_OFFSET] ;; closure->user_data - mov [esp + 12], edx - lea edx, [ebp + 12] - jmp stubraw + sub esp, 36 + mov esi, [eax + RAW_CLOSURE_CIF_OFFSET] ;; closure->cif + mov edx, [eax + RAW_CLOSURE_USER_DATA_OFFSET] ;; closure->user_data + mov [esp + 12], edx + lea edx, [ebp + 12] + jmp stubraw ffi_closure_raw_THISCALL ENDP ffi_closure_raw_SYSV PROC NEAR USES esi FORCEFRAME @@ -510,14 +522,14 @@ END #else #define USCORE_SYMBOL(x) x #endif - .text + .text # This assumes we are using gas. .balign 16 FFI_HIDDEN(ffi_call_win32) - .globl USCORE_SYMBOL(ffi_call_win32) + .globl USCORE_SYMBOL(ffi_call_win32) #if defined(X86_WIN32) && !defined(__OS2__) - .def _ffi_call_win32; .scl 2; .type 32; .endef + .def _ffi_call_win32; .scl 2; .type 32; .endef #endif USCORE_SYMBOL(ffi_call_win32): .LFB1: @@ -531,32 +543,46 @@ USCORE_SYMBOL(ffi_call_win32): movl %esp,%eax - # Place all of the ffi_prep_args in position + # Call ffi_prep_args pushl 12(%ebp) pushl %eax call *8(%ebp) - - # Return stack to previous state and call the function addl $8,%esp - # Handle fastcall and thiscall - cmpl $3, 16(%ebp) # FFI_THISCALL - jz .do_thiscall - cmpl $4, 16(%ebp) # FFI_FASTCALL - jnz .do_fncall - movl (%esp), %ecx - movl 4(%esp), %edx - addl $8, %esp - jmp .do_fncall -.do_thiscall: - movl (%esp), %ecx - addl $4, %esp + # Prepare registers + # EAX stores the number of register arguments + cmpl $0, %eax + je .fun + cmpl $3, %eax + jl .prepr_two_cmp + + movl %esp, %ecx + addl $12, %esp + movl 8(%ecx), %eax + jmp .prepr_two +.prepr_two_cmp: + cmpl $2, %eax + jl .prepr_one_prep + movl %esp, %ecx + addl $8, %esp +.prepr_two: + movl 4(%ecx), %edx + jmp .prepr_one +.prepr_one_prep: + movl %esp, %ecx + addl $4, %esp +.prepr_one: + movl (%ecx), %ecx + cmpl $7, 16(%ebp) # FFI_REGISTER + jne .fun -.do_fncall: - + xchgl %eax, %ecx + +.fun: # FIXME: Align the stack to a 128-bit boundary to avoid # potential performance hits. + # Call function call *32(%ebp) # stdcall functions pop arguments off the stack themselves @@ -577,52 +603,52 @@ USCORE_SYMBOL(ffi_call_win32): jmp .Lepilogue 0: - call 1f - # Do not insert anything here between the call and the jump table. + call 1f + # Do not insert anything here between the call and the jump table. .Lstore_table: - .long .Lnoretval-.Lstore_table /* FFI_TYPE_VOID */ - .long .Lretint-.Lstore_table /* FFI_TYPE_INT */ - .long .Lretfloat-.Lstore_table /* FFI_TYPE_FLOAT */ - .long .Lretdouble-.Lstore_table /* FFI_TYPE_DOUBLE */ - .long .Lretlongdouble-.Lstore_table /* FFI_TYPE_LONGDOUBLE */ - .long .Lretuint8-.Lstore_table /* FFI_TYPE_UINT8 */ - .long .Lretsint8-.Lstore_table /* FFI_TYPE_SINT8 */ - .long .Lretuint16-.Lstore_table /* FFI_TYPE_UINT16 */ - .long .Lretsint16-.Lstore_table /* FFI_TYPE_SINT16 */ - .long .Lretint-.Lstore_table /* FFI_TYPE_UINT32 */ - .long .Lretint-.Lstore_table /* FFI_TYPE_SINT32 */ - .long .Lretint64-.Lstore_table /* FFI_TYPE_UINT64 */ - .long .Lretint64-.Lstore_table /* FFI_TYPE_SINT64 */ - .long .Lretstruct-.Lstore_table /* FFI_TYPE_STRUCT */ - .long .Lretint-.Lstore_table /* FFI_TYPE_POINTER */ - .long .Lretstruct1b-.Lstore_table /* FFI_TYPE_SMALL_STRUCT_1B */ - .long .Lretstruct2b-.Lstore_table /* FFI_TYPE_SMALL_STRUCT_2B */ - .long .Lretstruct4b-.Lstore_table /* FFI_TYPE_SMALL_STRUCT_4B */ - .long .Lretstruct-.Lstore_table /* FFI_TYPE_MS_STRUCT */ + .long .Lnoretval-.Lstore_table /* FFI_TYPE_VOID */ + .long .Lretint-.Lstore_table /* FFI_TYPE_INT */ + .long .Lretfloat-.Lstore_table /* FFI_TYPE_FLOAT */ + .long .Lretdouble-.Lstore_table /* FFI_TYPE_DOUBLE */ + .long .Lretlongdouble-.Lstore_table /* FFI_TYPE_LONGDOUBLE */ + .long .Lretuint8-.Lstore_table /* FFI_TYPE_UINT8 */ + .long .Lretsint8-.Lstore_table /* FFI_TYPE_SINT8 */ + .long .Lretuint16-.Lstore_table /* FFI_TYPE_UINT16 */ + .long .Lretsint16-.Lstore_table /* FFI_TYPE_SINT16 */ + .long .Lretint-.Lstore_table /* FFI_TYPE_UINT32 */ + .long .Lretint-.Lstore_table /* FFI_TYPE_SINT32 */ + .long .Lretint64-.Lstore_table /* FFI_TYPE_UINT64 */ + .long .Lretint64-.Lstore_table /* FFI_TYPE_SINT64 */ + .long .Lretstruct-.Lstore_table /* FFI_TYPE_STRUCT */ + .long .Lretint-.Lstore_table /* FFI_TYPE_POINTER */ + .long .Lretstruct1b-.Lstore_table /* FFI_TYPE_SMALL_STRUCT_1B */ + .long .Lretstruct2b-.Lstore_table /* FFI_TYPE_SMALL_STRUCT_2B */ + .long .Lretstruct4b-.Lstore_table /* FFI_TYPE_SMALL_STRUCT_4B */ + .long .Lretstruct-.Lstore_table /* FFI_TYPE_MS_STRUCT */ 1: - shl $2, %ecx - add (%esp),%ecx - mov (%ecx),%ecx - add (%esp),%ecx - add $4, %esp - jmp *%ecx + shl $2, %ecx + add (%esp),%ecx + mov (%ecx),%ecx + add (%esp),%ecx + add $4, %esp + jmp *%ecx - /* Sign/zero extend as appropriate. */ + /* Sign/zero extend as appropriate. */ .Lretsint8: - movsbl %al, %eax - jmp .Lretint + movsbl %al, %eax + jmp .Lretint .Lretsint16: - movswl %ax, %eax - jmp .Lretint + movswl %ax, %eax + jmp .Lretint .Lretuint8: - movzbl %al, %eax - jmp .Lretint + movzbl %al, %eax + jmp .Lretint .Lretuint16: - movzwl %ax, %eax - jmp .Lretint + movzwl %ax, %eax + jmp .Lretint .Lretint: # Load %ecx with the pointer to storage for the return value @@ -653,7 +679,7 @@ USCORE_SYMBOL(ffi_call_win32): movl 28(%ebp),%ecx movl %eax,0(%ecx) movl %edx,4(%ecx) - jmp .Lepilogue + jmp .Lepilogue .Lretstruct1b: # Load %ecx with the pointer to storage for the return value @@ -684,167 +710,167 @@ USCORE_SYMBOL(ffi_call_win32): .ffi_call_win32_end: .balign 16 FFI_HIDDEN(ffi_closure_THISCALL) - .globl USCORE_SYMBOL(ffi_closure_THISCALL) + .globl USCORE_SYMBOL(ffi_closure_THISCALL) #if defined(X86_WIN32) && !defined(__OS2__) - .def _ffi_closure_THISCALL; .scl 2; .type 32; .endef + .def _ffi_closure_THISCALL; .scl 2; .type 32; .endef #endif USCORE_SYMBOL(ffi_closure_THISCALL): - /* Insert the register argument on the stack as the first argument */ - xchg %ecx, 4(%esp) - xchg %ecx, (%esp) - push %ecx - jmp .ffi_closure_STDCALL_internal + /* Insert the register argument on the stack as the first argument */ + xchg %ecx, 4(%esp) + xchg %ecx, (%esp) + push %ecx + jmp .ffi_closure_STDCALL_internal .balign 16 FFI_HIDDEN(ffi_closure_FASTCALL) - .globl USCORE_SYMBOL(ffi_closure_FASTCALL) + .globl USCORE_SYMBOL(ffi_closure_FASTCALL) #if defined(X86_WIN32) && !defined(__OS2__) - .def _ffi_closure_FASTCALL; .scl 2; .type 32; .endef + .def _ffi_closure_FASTCALL; .scl 2; .type 32; .endef #endif USCORE_SYMBOL(ffi_closure_FASTCALL): - /* Insert the register arguments on the stack as the first two arguments */ - xchg %edx, 4(%esp) - xchg %ecx, (%esp) - push %edx - push %ecx - jmp .ffi_closure_STDCALL_internal + /* Insert the register arguments on the stack as the first two arguments */ + xchg %edx, 4(%esp) + xchg %ecx, (%esp) + push %edx + push %ecx + jmp .ffi_closure_STDCALL_internal .LFE1: # This assumes we are using gas. .balign 16 FFI_HIDDEN(ffi_closure_SYSV) #if defined(X86_WIN32) - .globl USCORE_SYMBOL(ffi_closure_SYSV) + .globl USCORE_SYMBOL(ffi_closure_SYSV) #if defined(X86_WIN32) && !defined(__OS2__) - .def _ffi_closure_SYSV; .scl 2; .type 32; .endef + .def _ffi_closure_SYSV; .scl 2; .type 32; .endef #endif USCORE_SYMBOL(ffi_closure_SYSV): #endif .LFB3: - pushl %ebp + pushl %ebp .LCFI4: - movl %esp, %ebp + movl %esp, %ebp .LCFI5: - subl $40, %esp - leal -24(%ebp), %edx - movl %edx, -12(%ebp) /* resp */ - leal 8(%ebp), %edx - movl %edx, 4(%esp) /* args = __builtin_dwarf_cfa () */ - leal -12(%ebp), %edx - movl %edx, (%esp) /* &resp */ + subl $40, %esp + leal -24(%ebp), %edx + movl %edx, -12(%ebp) /* resp */ + leal 8(%ebp), %edx + movl %edx, 4(%esp) /* args = __builtin_dwarf_cfa () */ + leal -12(%ebp), %edx + movl %edx, (%esp) /* &resp */ #if defined(HAVE_HIDDEN_VISIBILITY_ATTRIBUTE) || !defined(__PIC__) - call USCORE_SYMBOL(ffi_closure_SYSV_inner) + call USCORE_SYMBOL(ffi_closure_SYSV_inner) #elif defined(X86_DARWIN) - calll L_ffi_closure_SYSV_inner$stub + calll L_ffi_closure_SYSV_inner$stub #else - movl %ebx, 8(%esp) - call 1f + movl %ebx, 8(%esp) + call 1f 1: popl %ebx - addl $_GLOBAL_OFFSET_TABLE_+[.-1b], %ebx - call ffi_closure_SYSV_inner@PLT - movl 8(%esp), %ebx + addl $_GLOBAL_OFFSET_TABLE_+[.-1b], %ebx + call ffi_closure_SYSV_inner@PLT + movl 8(%esp), %ebx #endif - movl -12(%ebp), %ecx + movl -12(%ebp), %ecx 0: - call 1f - # Do not insert anything here between the call and the jump table. + call 1f + # Do not insert anything here between the call and the jump table. .Lcls_store_table: - .long .Lcls_noretval-.Lcls_store_table /* FFI_TYPE_VOID */ - .long .Lcls_retint-.Lcls_store_table /* FFI_TYPE_INT */ - .long .Lcls_retfloat-.Lcls_store_table /* FFI_TYPE_FLOAT */ - .long .Lcls_retdouble-.Lcls_store_table /* FFI_TYPE_DOUBLE */ - .long .Lcls_retldouble-.Lcls_store_table /* FFI_TYPE_LONGDOUBLE */ - .long .Lcls_retuint8-.Lcls_store_table /* FFI_TYPE_UINT8 */ - .long .Lcls_retsint8-.Lcls_store_table /* FFI_TYPE_SINT8 */ - .long .Lcls_retuint16-.Lcls_store_table /* FFI_TYPE_UINT16 */ - .long .Lcls_retsint16-.Lcls_store_table /* FFI_TYPE_SINT16 */ - .long .Lcls_retint-.Lcls_store_table /* FFI_TYPE_UINT32 */ - .long .Lcls_retint-.Lcls_store_table /* FFI_TYPE_SINT32 */ - .long .Lcls_retllong-.Lcls_store_table /* FFI_TYPE_UINT64 */ - .long .Lcls_retllong-.Lcls_store_table /* FFI_TYPE_SINT64 */ - .long .Lcls_retstruct-.Lcls_store_table /* FFI_TYPE_STRUCT */ - .long .Lcls_retint-.Lcls_store_table /* FFI_TYPE_POINTER */ - .long .Lcls_retstruct1-.Lcls_store_table /* FFI_TYPE_SMALL_STRUCT_1B */ - .long .Lcls_retstruct2-.Lcls_store_table /* FFI_TYPE_SMALL_STRUCT_2B */ - .long .Lcls_retstruct4-.Lcls_store_table /* FFI_TYPE_SMALL_STRUCT_4B */ - .long .Lcls_retmsstruct-.Lcls_store_table /* FFI_TYPE_MS_STRUCT */ + .long .Lcls_noretval-.Lcls_store_table /* FFI_TYPE_VOID */ + .long .Lcls_retint-.Lcls_store_table /* FFI_TYPE_INT */ + .long .Lcls_retfloat-.Lcls_store_table /* FFI_TYPE_FLOAT */ + .long .Lcls_retdouble-.Lcls_store_table /* FFI_TYPE_DOUBLE */ + .long .Lcls_retldouble-.Lcls_store_table /* FFI_TYPE_LONGDOUBLE */ + .long .Lcls_retuint8-.Lcls_store_table /* FFI_TYPE_UINT8 */ + .long .Lcls_retsint8-.Lcls_store_table /* FFI_TYPE_SINT8 */ + .long .Lcls_retuint16-.Lcls_store_table /* FFI_TYPE_UINT16 */ + .long .Lcls_retsint16-.Lcls_store_table /* FFI_TYPE_SINT16 */ + .long .Lcls_retint-.Lcls_store_table /* FFI_TYPE_UINT32 */ + .long .Lcls_retint-.Lcls_store_table /* FFI_TYPE_SINT32 */ + .long .Lcls_retllong-.Lcls_store_table /* FFI_TYPE_UINT64 */ + .long .Lcls_retllong-.Lcls_store_table /* FFI_TYPE_SINT64 */ + .long .Lcls_retstruct-.Lcls_store_table /* FFI_TYPE_STRUCT */ + .long .Lcls_retint-.Lcls_store_table /* FFI_TYPE_POINTER */ + .long .Lcls_retstruct1-.Lcls_store_table /* FFI_TYPE_SMALL_STRUCT_1B */ + .long .Lcls_retstruct2-.Lcls_store_table /* FFI_TYPE_SMALL_STRUCT_2B */ + .long .Lcls_retstruct4-.Lcls_store_table /* FFI_TYPE_SMALL_STRUCT_4B */ + .long .Lcls_retmsstruct-.Lcls_store_table /* FFI_TYPE_MS_STRUCT */ 1: - shl $2, %eax - add (%esp),%eax - mov (%eax),%eax - add (%esp),%eax - add $4, %esp - jmp *%eax + shl $2, %eax + add (%esp),%eax + mov (%eax),%eax + add (%esp),%eax + add $4, %esp + jmp *%eax - /* Sign/zero extend as appropriate. */ + /* Sign/zero extend as appropriate. */ .Lcls_retsint8: - movsbl (%ecx), %eax - jmp .Lcls_epilogue + movsbl (%ecx), %eax + jmp .Lcls_epilogue .Lcls_retsint16: - movswl (%ecx), %eax - jmp .Lcls_epilogue + movswl (%ecx), %eax + jmp .Lcls_epilogue .Lcls_retuint8: - movzbl (%ecx), %eax - jmp .Lcls_epilogue + movzbl (%ecx), %eax + jmp .Lcls_epilogue .Lcls_retuint16: - movzwl (%ecx), %eax - jmp .Lcls_epilogue + movzwl (%ecx), %eax + jmp .Lcls_epilogue .Lcls_retint: - movl (%ecx), %eax - jmp .Lcls_epilogue + movl (%ecx), %eax + jmp .Lcls_epilogue .Lcls_retfloat: - flds (%ecx) - jmp .Lcls_epilogue + flds (%ecx) + jmp .Lcls_epilogue .Lcls_retdouble: - fldl (%ecx) - jmp .Lcls_epilogue + fldl (%ecx) + jmp .Lcls_epilogue .Lcls_retldouble: - fldt (%ecx) - jmp .Lcls_epilogue + fldt (%ecx) + jmp .Lcls_epilogue .Lcls_retllong: - movl (%ecx), %eax - movl 4(%ecx), %edx - jmp .Lcls_epilogue + movl (%ecx), %eax + movl 4(%ecx), %edx + jmp .Lcls_epilogue .Lcls_retstruct1: - movsbl (%ecx), %eax - jmp .Lcls_epilogue + movsbl (%ecx), %eax + jmp .Lcls_epilogue .Lcls_retstruct2: - movswl (%ecx), %eax - jmp .Lcls_epilogue + movswl (%ecx), %eax + jmp .Lcls_epilogue .Lcls_retstruct4: - movl (%ecx), %eax - jmp .Lcls_epilogue + movl (%ecx), %eax + jmp .Lcls_epilogue .Lcls_retstruct: # Caller expects us to pop struct return value pointer hidden arg. - movl %ebp, %esp - popl %ebp - ret $0x4 + movl %ebp, %esp + popl %ebp + ret $0x4 .Lcls_retmsstruct: - # Caller expects us to return a pointer to the real return value. - mov %ecx, %eax - # Caller doesn't expects us to pop struct return value pointer hidden arg. - jmp .Lcls_epilogue + # Caller expects us to return a pointer to the real return value. + mov %ecx, %eax + # Caller doesn't expects us to pop struct return value pointer hidden arg. + jmp .Lcls_epilogue .Lcls_noretval: .Lcls_epilogue: - movl %ebp, %esp - popl %ebp - ret + movl %ebp, %esp + popl %ebp + ret .ffi_closure_SYSV_end: .LFE3: @@ -858,277 +884,277 @@ USCORE_SYMBOL(ffi_closure_SYSV): #ifdef X86_WIN32 .balign 16 FFI_HIDDEN(ffi_closure_raw_THISCALL) - .globl USCORE_SYMBOL(ffi_closure_raw_THISCALL) + .globl USCORE_SYMBOL(ffi_closure_raw_THISCALL) #if defined(X86_WIN32) && !defined(__OS2__) - .def _ffi_closure_raw_THISCALL; .scl 2; .type 32; .endef + .def _ffi_closure_raw_THISCALL; .scl 2; .type 32; .endef #endif USCORE_SYMBOL(ffi_closure_raw_THISCALL): - pushl %ebp - movl %esp, %ebp - pushl %esi - subl $36, %esp - movl RAW_CLOSURE_CIF_OFFSET(%eax), %esi /* closure->cif */ - movl RAW_CLOSURE_USER_DATA_OFFSET(%eax), %edx /* closure->user_data */ - movl %edx, 12(%esp) /* user_data */ - leal 12(%ebp), %edx /* __builtin_dwarf_cfa () */ - jmp .stubraw + pushl %ebp + movl %esp, %ebp + pushl %esi + subl $36, %esp + movl RAW_CLOSURE_CIF_OFFSET(%eax), %esi /* closure->cif */ + movl RAW_CLOSURE_USER_DATA_OFFSET(%eax), %edx /* closure->user_data */ + movl %edx, 12(%esp) /* user_data */ + leal 12(%ebp), %edx /* __builtin_dwarf_cfa () */ + jmp .stubraw #endif /* X86_WIN32 */ # This assumes we are using gas. .balign 16 #if defined(X86_WIN32) - .globl USCORE_SYMBOL(ffi_closure_raw_SYSV) + .globl USCORE_SYMBOL(ffi_closure_raw_SYSV) #if defined(X86_WIN32) && !defined(__OS2__) - .def _ffi_closure_raw_SYSV; .scl 2; .type 32; .endef + .def _ffi_closure_raw_SYSV; .scl 2; .type 32; .endef #endif USCORE_SYMBOL(ffi_closure_raw_SYSV): #endif /* defined(X86_WIN32) */ .LFB4: - pushl %ebp + pushl %ebp .LCFI6: - movl %esp, %ebp + movl %esp, %ebp .LCFI7: - pushl %esi + pushl %esi .LCFI8: - subl $36, %esp - movl RAW_CLOSURE_CIF_OFFSET(%eax), %esi /* closure->cif */ - movl RAW_CLOSURE_USER_DATA_OFFSET(%eax), %edx /* closure->user_data */ - movl %edx, 12(%esp) /* user_data */ - leal 8(%ebp), %edx /* __builtin_dwarf_cfa () */ + subl $36, %esp + movl RAW_CLOSURE_CIF_OFFSET(%eax), %esi /* closure->cif */ + movl RAW_CLOSURE_USER_DATA_OFFSET(%eax), %edx /* closure->user_data */ + movl %edx, 12(%esp) /* user_data */ + leal 8(%ebp), %edx /* __builtin_dwarf_cfa () */ .stubraw: - movl %edx, 8(%esp) /* raw_args */ - leal -24(%ebp), %edx - movl %edx, 4(%esp) /* &res */ - movl %esi, (%esp) /* cif */ - call *RAW_CLOSURE_FUN_OFFSET(%eax) /* closure->fun */ - movl CIF_FLAGS_OFFSET(%esi), %eax /* rtype */ + movl %edx, 8(%esp) /* raw_args */ + leal -24(%ebp), %edx + movl %edx, 4(%esp) /* &res */ + movl %esi, (%esp) /* cif */ + call *RAW_CLOSURE_FUN_OFFSET(%eax) /* closure->fun */ + movl CIF_FLAGS_OFFSET(%esi), %eax /* rtype */ 0: - call 1f - # Do not insert anything here between the call and the jump table. + call 1f + # Do not insert anything here between the call and the jump table. .Lrcls_store_table: - .long .Lrcls_noretval-.Lrcls_store_table /* FFI_TYPE_VOID */ - .long .Lrcls_retint-.Lrcls_store_table /* FFI_TYPE_INT */ - .long .Lrcls_retfloat-.Lrcls_store_table /* FFI_TYPE_FLOAT */ - .long .Lrcls_retdouble-.Lrcls_store_table /* FFI_TYPE_DOUBLE */ - .long .Lrcls_retldouble-.Lrcls_store_table /* FFI_TYPE_LONGDOUBLE */ - .long .Lrcls_retuint8-.Lrcls_store_table /* FFI_TYPE_UINT8 */ - .long .Lrcls_retsint8-.Lrcls_store_table /* FFI_TYPE_SINT8 */ - .long .Lrcls_retuint16-.Lrcls_store_table /* FFI_TYPE_UINT16 */ - .long .Lrcls_retsint16-.Lrcls_store_table /* FFI_TYPE_SINT16 */ - .long .Lrcls_retint-.Lrcls_store_table /* FFI_TYPE_UINT32 */ - .long .Lrcls_retint-.Lrcls_store_table /* FFI_TYPE_SINT32 */ - .long .Lrcls_retllong-.Lrcls_store_table /* FFI_TYPE_UINT64 */ - .long .Lrcls_retllong-.Lrcls_store_table /* FFI_TYPE_SINT64 */ - .long .Lrcls_retstruct-.Lrcls_store_table /* FFI_TYPE_STRUCT */ - .long .Lrcls_retint-.Lrcls_store_table /* FFI_TYPE_POINTER */ - .long .Lrcls_retstruct1-.Lrcls_store_table /* FFI_TYPE_SMALL_STRUCT_1B */ - .long .Lrcls_retstruct2-.Lrcls_store_table /* FFI_TYPE_SMALL_STRUCT_2B */ - .long .Lrcls_retstruct4-.Lrcls_store_table /* FFI_TYPE_SMALL_STRUCT_4B */ - .long .Lrcls_retstruct-.Lrcls_store_table /* FFI_TYPE_MS_STRUCT */ + .long .Lrcls_noretval-.Lrcls_store_table /* FFI_TYPE_VOID */ + .long .Lrcls_retint-.Lrcls_store_table /* FFI_TYPE_INT */ + .long .Lrcls_retfloat-.Lrcls_store_table /* FFI_TYPE_FLOAT */ + .long .Lrcls_retdouble-.Lrcls_store_table /* FFI_TYPE_DOUBLE */ + .long .Lrcls_retldouble-.Lrcls_store_table /* FFI_TYPE_LONGDOUBLE */ + .long .Lrcls_retuint8-.Lrcls_store_table /* FFI_TYPE_UINT8 */ + .long .Lrcls_retsint8-.Lrcls_store_table /* FFI_TYPE_SINT8 */ + .long .Lrcls_retuint16-.Lrcls_store_table /* FFI_TYPE_UINT16 */ + .long .Lrcls_retsint16-.Lrcls_store_table /* FFI_TYPE_SINT16 */ + .long .Lrcls_retint-.Lrcls_store_table /* FFI_TYPE_UINT32 */ + .long .Lrcls_retint-.Lrcls_store_table /* FFI_TYPE_SINT32 */ + .long .Lrcls_retllong-.Lrcls_store_table /* FFI_TYPE_UINT64 */ + .long .Lrcls_retllong-.Lrcls_store_table /* FFI_TYPE_SINT64 */ + .long .Lrcls_retstruct-.Lrcls_store_table /* FFI_TYPE_STRUCT */ + .long .Lrcls_retint-.Lrcls_store_table /* FFI_TYPE_POINTER */ + .long .Lrcls_retstruct1-.Lrcls_store_table /* FFI_TYPE_SMALL_STRUCT_1B */ + .long .Lrcls_retstruct2-.Lrcls_store_table /* FFI_TYPE_SMALL_STRUCT_2B */ + .long .Lrcls_retstruct4-.Lrcls_store_table /* FFI_TYPE_SMALL_STRUCT_4B */ + .long .Lrcls_retstruct-.Lrcls_store_table /* FFI_TYPE_MS_STRUCT */ 1: - shl $2, %eax - add (%esp),%eax - mov (%eax),%eax - add (%esp),%eax - add $4, %esp - jmp *%eax + shl $2, %eax + add (%esp),%eax + mov (%eax),%eax + add (%esp),%eax + add $4, %esp + jmp *%eax - /* Sign/zero extend as appropriate. */ + /* Sign/zero extend as appropriate. */ .Lrcls_retsint8: - movsbl -24(%ebp), %eax - jmp .Lrcls_epilogue + movsbl -24(%ebp), %eax + jmp .Lrcls_epilogue .Lrcls_retsint16: - movswl -24(%ebp), %eax - jmp .Lrcls_epilogue + movswl -24(%ebp), %eax + jmp .Lrcls_epilogue .Lrcls_retuint8: - movzbl -24(%ebp), %eax - jmp .Lrcls_epilogue + movzbl -24(%ebp), %eax + jmp .Lrcls_epilogue .Lrcls_retuint16: - movzwl -24(%ebp), %eax - jmp .Lrcls_epilogue + movzwl -24(%ebp), %eax + jmp .Lrcls_epilogue .Lrcls_retint: - movl -24(%ebp), %eax - jmp .Lrcls_epilogue + movl -24(%ebp), %eax + jmp .Lrcls_epilogue .Lrcls_retfloat: - flds -24(%ebp) - jmp .Lrcls_epilogue + flds -24(%ebp) + jmp .Lrcls_epilogue .Lrcls_retdouble: - fldl -24(%ebp) - jmp .Lrcls_epilogue + fldl -24(%ebp) + jmp .Lrcls_epilogue .Lrcls_retldouble: - fldt -24(%ebp) - jmp .Lrcls_epilogue + fldt -24(%ebp) + jmp .Lrcls_epilogue .Lrcls_retllong: - movl -24(%ebp), %eax - movl -20(%ebp), %edx - jmp .Lrcls_epilogue + movl -24(%ebp), %eax + movl -20(%ebp), %edx + jmp .Lrcls_epilogue .Lrcls_retstruct1: - movsbl -24(%ebp), %eax - jmp .Lrcls_epilogue + movsbl -24(%ebp), %eax + jmp .Lrcls_epilogue .Lrcls_retstruct2: - movswl -24(%ebp), %eax - jmp .Lrcls_epilogue + movswl -24(%ebp), %eax + jmp .Lrcls_epilogue .Lrcls_retstruct4: - movl -24(%ebp), %eax - jmp .Lrcls_epilogue + movl -24(%ebp), %eax + jmp .Lrcls_epilogue .Lrcls_retstruct: - # Nothing to do! + # Nothing to do! .Lrcls_noretval: .Lrcls_epilogue: - addl $36, %esp - popl %esi - popl %ebp - ret + addl $36, %esp + popl %esi + popl %ebp + ret .ffi_closure_raw_SYSV_end: .LFE4: #endif /* !FFI_NO_RAW_API */ # This assumes we are using gas. - .balign 16 + .balign 16 FFI_HIDDEN(ffi_closure_STDCALL) - .globl USCORE_SYMBOL(ffi_closure_STDCALL) + .globl USCORE_SYMBOL(ffi_closure_STDCALL) #if defined(X86_WIN32) && !defined(__OS2__) - .def _ffi_closure_STDCALL; .scl 2; .type 32; .endef + .def _ffi_closure_STDCALL; .scl 2; .type 32; .endef #endif USCORE_SYMBOL(ffi_closure_STDCALL): .ffi_closure_STDCALL_internal: .LFB5: - pushl %ebp + pushl %ebp .LCFI9: - movl %esp, %ebp + movl %esp, %ebp .LCFI10: - subl $40, %esp - leal -24(%ebp), %edx - movl %edx, -12(%ebp) /* resp */ - leal 12(%ebp), %edx /* account for stub return address on stack */ - movl %edx, 4(%esp) /* args */ - leal -12(%ebp), %edx - movl %edx, (%esp) /* &resp */ + subl $40, %esp + leal -24(%ebp), %edx + movl %edx, -12(%ebp) /* resp */ + leal 12(%ebp), %edx /* account for stub return address on stack */ + movl %edx, 4(%esp) /* args */ + leal -12(%ebp), %edx + movl %edx, (%esp) /* &resp */ #if defined(HAVE_HIDDEN_VISIBILITY_ATTRIBUTE) || !defined(__PIC__) - call USCORE_SYMBOL(ffi_closure_SYSV_inner) + call USCORE_SYMBOL(ffi_closure_SYSV_inner) #elif defined(X86_DARWIN) - calll L_ffi_closure_SYSV_inner$stub + calll L_ffi_closure_SYSV_inner$stub #else - movl %ebx, 8(%esp) - call 1f + movl %ebx, 8(%esp) + call 1f 1: popl %ebx - addl $_GLOBAL_OFFSET_TABLE_+[.-1b], %ebx - call ffi_closure_SYSV_inner@PLT - movl 8(%esp), %ebx + addl $_GLOBAL_OFFSET_TABLE_+[.-1b], %ebx + call ffi_closure_SYSV_inner@PLT + movl 8(%esp), %ebx #endif - movl -12(%ebp), %ecx + movl -12(%ebp), %ecx 0: - call 1f - # Do not insert anything here between the call and the jump table. + call 1f + # Do not insert anything here between the call and the jump table. .Lscls_store_table: - .long .Lscls_noretval-.Lscls_store_table /* FFI_TYPE_VOID */ - .long .Lscls_retint-.Lscls_store_table /* FFI_TYPE_INT */ - .long .Lscls_retfloat-.Lscls_store_table /* FFI_TYPE_FLOAT */ - .long .Lscls_retdouble-.Lscls_store_table /* FFI_TYPE_DOUBLE */ - .long .Lscls_retldouble-.Lscls_store_table /* FFI_TYPE_LONGDOUBLE */ - .long .Lscls_retuint8-.Lscls_store_table /* FFI_TYPE_UINT8 */ - .long .Lscls_retsint8-.Lscls_store_table /* FFI_TYPE_SINT8 */ - .long .Lscls_retuint16-.Lscls_store_table /* FFI_TYPE_UINT16 */ - .long .Lscls_retsint16-.Lscls_store_table /* FFI_TYPE_SINT16 */ - .long .Lscls_retint-.Lscls_store_table /* FFI_TYPE_UINT32 */ - .long .Lscls_retint-.Lscls_store_table /* FFI_TYPE_SINT32 */ - .long .Lscls_retllong-.Lscls_store_table /* FFI_TYPE_UINT64 */ - .long .Lscls_retllong-.Lscls_store_table /* FFI_TYPE_SINT64 */ - .long .Lscls_retstruct-.Lscls_store_table /* FFI_TYPE_STRUCT */ - .long .Lscls_retint-.Lscls_store_table /* FFI_TYPE_POINTER */ - .long .Lscls_retstruct1-.Lscls_store_table /* FFI_TYPE_SMALL_STRUCT_1B */ - .long .Lscls_retstruct2-.Lscls_store_table /* FFI_TYPE_SMALL_STRUCT_2B */ - .long .Lscls_retstruct4-.Lscls_store_table /* FFI_TYPE_SMALL_STRUCT_4B */ + .long .Lscls_noretval-.Lscls_store_table /* FFI_TYPE_VOID */ + .long .Lscls_retint-.Lscls_store_table /* FFI_TYPE_INT */ + .long .Lscls_retfloat-.Lscls_store_table /* FFI_TYPE_FLOAT */ + .long .Lscls_retdouble-.Lscls_store_table /* FFI_TYPE_DOUBLE */ + .long .Lscls_retldouble-.Lscls_store_table /* FFI_TYPE_LONGDOUBLE */ + .long .Lscls_retuint8-.Lscls_store_table /* FFI_TYPE_UINT8 */ + .long .Lscls_retsint8-.Lscls_store_table /* FFI_TYPE_SINT8 */ + .long .Lscls_retuint16-.Lscls_store_table /* FFI_TYPE_UINT16 */ + .long .Lscls_retsint16-.Lscls_store_table /* FFI_TYPE_SINT16 */ + .long .Lscls_retint-.Lscls_store_table /* FFI_TYPE_UINT32 */ + .long .Lscls_retint-.Lscls_store_table /* FFI_TYPE_SINT32 */ + .long .Lscls_retllong-.Lscls_store_table /* FFI_TYPE_UINT64 */ + .long .Lscls_retllong-.Lscls_store_table /* FFI_TYPE_SINT64 */ + .long .Lscls_retstruct-.Lscls_store_table /* FFI_TYPE_STRUCT */ + .long .Lscls_retint-.Lscls_store_table /* FFI_TYPE_POINTER */ + .long .Lscls_retstruct1-.Lscls_store_table /* FFI_TYPE_SMALL_STRUCT_1B */ + .long .Lscls_retstruct2-.Lscls_store_table /* FFI_TYPE_SMALL_STRUCT_2B */ + .long .Lscls_retstruct4-.Lscls_store_table /* FFI_TYPE_SMALL_STRUCT_4B */ 1: - shl $2, %eax - add (%esp),%eax - mov (%eax),%eax - add (%esp),%eax - add $4, %esp - jmp *%eax + shl $2, %eax + add (%esp),%eax + mov (%eax),%eax + add (%esp),%eax + add $4, %esp + jmp *%eax - /* Sign/zero extend as appropriate. */ + /* Sign/zero extend as appropriate. */ .Lscls_retsint8: - movsbl (%ecx), %eax - jmp .Lscls_epilogue + movsbl (%ecx), %eax + jmp .Lscls_epilogue .Lscls_retsint16: - movswl (%ecx), %eax - jmp .Lscls_epilogue + movswl (%ecx), %eax + jmp .Lscls_epilogue .Lscls_retuint8: - movzbl (%ecx), %eax - jmp .Lscls_epilogue + movzbl (%ecx), %eax + jmp .Lscls_epilogue .Lscls_retuint16: - movzwl (%ecx), %eax - jmp .Lscls_epilogue + movzwl (%ecx), %eax + jmp .Lscls_epilogue .Lscls_retint: - movl (%ecx), %eax - jmp .Lscls_epilogue + movl (%ecx), %eax + jmp .Lscls_epilogue .Lscls_retfloat: - flds (%ecx) - jmp .Lscls_epilogue + flds (%ecx) + jmp .Lscls_epilogue .Lscls_retdouble: - fldl (%ecx) - jmp .Lscls_epilogue + fldl (%ecx) + jmp .Lscls_epilogue .Lscls_retldouble: - fldt (%ecx) - jmp .Lscls_epilogue + fldt (%ecx) + jmp .Lscls_epilogue .Lscls_retllong: - movl (%ecx), %eax - movl 4(%ecx), %edx - jmp .Lscls_epilogue + movl (%ecx), %eax + movl 4(%ecx), %edx + jmp .Lscls_epilogue .Lscls_retstruct1: - movsbl (%ecx), %eax - jmp .Lscls_epilogue + movsbl (%ecx), %eax + jmp .Lscls_epilogue .Lscls_retstruct2: - movswl (%ecx), %eax - jmp .Lscls_epilogue + movswl (%ecx), %eax + jmp .Lscls_epilogue .Lscls_retstruct4: - movl (%ecx), %eax - jmp .Lscls_epilogue + movl (%ecx), %eax + jmp .Lscls_epilogue .Lscls_retstruct: - # Nothing to do! + # Nothing to do! .Lscls_noretval: .Lscls_epilogue: - movl %ebp, %esp - popl %ebp - popl %ecx - popl %edx - movl (CLOSURE_CIF_OFFSET-10)(%ecx), %ecx - addl CIF_BYTES_OFFSET(%ecx), %esp - movl CIF_ABI_OFFSET(%ecx), %ecx - cmpl $3, %ecx /* FFI_THISCALL */ - je 1f - cmpl $4, %ecx /* FFI_FASTCALL */ - jne 2f + movl %ebp, %esp + popl %ebp + popl %ecx + popl %edx + movl (CLOSURE_CIF_OFFSET-10)(%ecx), %ecx + addl CIF_BYTES_OFFSET(%ecx), %esp + movl CIF_ABI_OFFSET(%ecx), %ecx + cmpl $3, %ecx /* FFI_THISCALL */ + je 1f + cmpl $4, %ecx /* FFI_FASTCALL */ + jne 2f - addl $4, %esp + addl $4, %esp 1: addl $4, %esp 2: jmp *%edx .ffi_closure_STDCALL_end: @@ -1137,174 +1163,174 @@ USCORE_SYMBOL(ffi_closure_STDCALL): #if defined(X86_DARWIN) .section __IMPORT,__jump_table,symbol_stubs,self_modifying_code+pure_instructions,5 L_ffi_closure_SYSV_inner$stub: - .indirect_symbol _ffi_closure_SYSV_inner - hlt ; hlt ; hlt ; hlt ; hlt + .indirect_symbol _ffi_closure_SYSV_inner + hlt ; hlt ; hlt ; hlt ; hlt #endif #if defined(X86_WIN32) && !defined(__OS2__) - .section .eh_frame,"w" + .section .eh_frame,"w" #endif .Lframe1: .LSCIE1: - .long .LECIE1-.LASCIE1 /* Length of Common Information Entry */ + .long .LECIE1-.LASCIE1 /* Length of Common Information Entry */ .LASCIE1: - .long 0x0 /* CIE Identifier Tag */ - .byte 0x1 /* CIE Version */ + .long 0x0 /* CIE Identifier Tag */ + .byte 0x1 /* CIE Version */ #ifdef __PIC__ - .ascii "zR\0" /* CIE Augmentation */ + .ascii "zR\0" /* CIE Augmentation */ #else - .ascii "\0" /* CIE Augmentation */ + .ascii "\0" /* CIE Augmentation */ #endif - .byte 0x1 /* .uleb128 0x1; CIE Code Alignment Factor */ - .byte 0x7c /* .sleb128 -4; CIE Data Alignment Factor */ - .byte 0x8 /* CIE RA Column */ + .byte 0x1 /* .uleb128 0x1; CIE Code Alignment Factor */ + .byte 0x7c /* .sleb128 -4; CIE Data Alignment Factor */ + .byte 0x8 /* CIE RA Column */ #ifdef __PIC__ - .byte 0x1 /* .uleb128 0x1; Augmentation size */ - .byte 0x1b /* FDE Encoding (pcrel sdata4) */ + .byte 0x1 /* .uleb128 0x1; Augmentation size */ + .byte 0x1b /* FDE Encoding (pcrel sdata4) */ #endif - .byte 0xc /* DW_CFA_def_cfa CFA = r4 + 4 = 4(%esp) */ - .byte 0x4 /* .uleb128 0x4 */ - .byte 0x4 /* .uleb128 0x4 */ - .byte 0x88 /* DW_CFA_offset, column 0x8 %eip at CFA + 1 * -4 */ - .byte 0x1 /* .uleb128 0x1 */ - .align 4 + .byte 0xc /* DW_CFA_def_cfa CFA = r4 + 4 = 4(%esp) */ + .byte 0x4 /* .uleb128 0x4 */ + .byte 0x4 /* .uleb128 0x4 */ + .byte 0x88 /* DW_CFA_offset, column 0x8 %eip at CFA + 1 * -4 */ + .byte 0x1 /* .uleb128 0x1 */ + .align 4 .LECIE1: .LSFDE1: - .long .LEFDE1-.LASFDE1 /* FDE Length */ + .long .LEFDE1-.LASFDE1 /* FDE Length */ .LASFDE1: - .long .LASFDE1-.Lframe1 /* FDE CIE offset */ + .long .LASFDE1-.Lframe1 /* FDE CIE offset */ #if defined __PIC__ && defined HAVE_AS_X86_PCREL - .long .LFB1-. /* FDE initial location */ + .long .LFB1-. /* FDE initial location */ #else - .long .LFB1 + .long .LFB1 #endif - .long .LFE1-.LFB1 /* FDE address range */ + .long .LFE1-.LFB1 /* FDE address range */ #ifdef __PIC__ - .byte 0x0 /* .uleb128 0x0; Augmentation size */ + .byte 0x0 /* .uleb128 0x0; Augmentation size */ #endif - /* DW_CFA_xxx CFI instructions go here. */ + /* DW_CFA_xxx CFI instructions go here. */ - .byte 0x4 /* DW_CFA_advance_loc4 */ - .long .LCFI0-.LFB1 - .byte 0xe /* DW_CFA_def_cfa_offset CFA = r4 + 8 = 8(%esp) */ - .byte 0x8 /* .uleb128 0x8 */ - .byte 0x85 /* DW_CFA_offset, column 0x5 %ebp at CFA + 2 * -4 */ - .byte 0x2 /* .uleb128 0x2 */ + .byte 0x4 /* DW_CFA_advance_loc4 */ + .long .LCFI0-.LFB1 + .byte 0xe /* DW_CFA_def_cfa_offset CFA = r4 + 8 = 8(%esp) */ + .byte 0x8 /* .uleb128 0x8 */ + .byte 0x85 /* DW_CFA_offset, column 0x5 %ebp at CFA + 2 * -4 */ + .byte 0x2 /* .uleb128 0x2 */ - .byte 0x4 /* DW_CFA_advance_loc4 */ - .long .LCFI1-.LCFI0 - .byte 0xd /* DW_CFA_def_cfa_register CFA = r5 = %ebp */ - .byte 0x5 /* .uleb128 0x5 */ + .byte 0x4 /* DW_CFA_advance_loc4 */ + .long .LCFI1-.LCFI0 + .byte 0xd /* DW_CFA_def_cfa_register CFA = r5 = %ebp */ + .byte 0x5 /* .uleb128 0x5 */ - /* End of DW_CFA_xxx CFI instructions. */ - .align 4 + /* End of DW_CFA_xxx CFI instructions. */ + .align 4 .LEFDE1: .LSFDE3: - .long .LEFDE3-.LASFDE3 /* FDE Length */ + .long .LEFDE3-.LASFDE3 /* FDE Length */ .LASFDE3: - .long .LASFDE3-.Lframe1 /* FDE CIE offset */ + .long .LASFDE3-.Lframe1 /* FDE CIE offset */ #if defined __PIC__ && defined HAVE_AS_X86_PCREL - .long .LFB3-. /* FDE initial location */ + .long .LFB3-. /* FDE initial location */ #else - .long .LFB3 + .long .LFB3 #endif - .long .LFE3-.LFB3 /* FDE address range */ + .long .LFE3-.LFB3 /* FDE address range */ #ifdef __PIC__ - .byte 0x0 /* .uleb128 0x0; Augmentation size */ + .byte 0x0 /* .uleb128 0x0; Augmentation size */ #endif - /* DW_CFA_xxx CFI instructions go here. */ + /* DW_CFA_xxx CFI instructions go here. */ - .byte 0x4 /* DW_CFA_advance_loc4 */ - .long .LCFI4-.LFB3 - .byte 0xe /* DW_CFA_def_cfa_offset CFA = r4 + 8 = 8(%esp) */ - .byte 0x8 /* .uleb128 0x8 */ - .byte 0x85 /* DW_CFA_offset, column 0x5 %ebp at CFA + 2 * -4 */ - .byte 0x2 /* .uleb128 0x2 */ + .byte 0x4 /* DW_CFA_advance_loc4 */ + .long .LCFI4-.LFB3 + .byte 0xe /* DW_CFA_def_cfa_offset CFA = r4 + 8 = 8(%esp) */ + .byte 0x8 /* .uleb128 0x8 */ + .byte 0x85 /* DW_CFA_offset, column 0x5 %ebp at CFA + 2 * -4 */ + .byte 0x2 /* .uleb128 0x2 */ - .byte 0x4 /* DW_CFA_advance_loc4 */ - .long .LCFI5-.LCFI4 - .byte 0xd /* DW_CFA_def_cfa_register CFA = r5 = %ebp */ - .byte 0x5 /* .uleb128 0x5 */ + .byte 0x4 /* DW_CFA_advance_loc4 */ + .long .LCFI5-.LCFI4 + .byte 0xd /* DW_CFA_def_cfa_register CFA = r5 = %ebp */ + .byte 0x5 /* .uleb128 0x5 */ - /* End of DW_CFA_xxx CFI instructions. */ - .align 4 + /* End of DW_CFA_xxx CFI instructions. */ + .align 4 .LEFDE3: #if !FFI_NO_RAW_API .LSFDE4: - .long .LEFDE4-.LASFDE4 /* FDE Length */ + .long .LEFDE4-.LASFDE4 /* FDE Length */ .LASFDE4: - .long .LASFDE4-.Lframe1 /* FDE CIE offset */ + .long .LASFDE4-.Lframe1 /* FDE CIE offset */ #if defined __PIC__ && defined HAVE_AS_X86_PCREL - .long .LFB4-. /* FDE initial location */ + .long .LFB4-. /* FDE initial location */ #else - .long .LFB4 + .long .LFB4 #endif - .long .LFE4-.LFB4 /* FDE address range */ + .long .LFE4-.LFB4 /* FDE address range */ #ifdef __PIC__ - .byte 0x0 /* .uleb128 0x0; Augmentation size */ + .byte 0x0 /* .uleb128 0x0; Augmentation size */ #endif - /* DW_CFA_xxx CFI instructions go here. */ + /* DW_CFA_xxx CFI instructions go here. */ - .byte 0x4 /* DW_CFA_advance_loc4 */ - .long .LCFI6-.LFB4 - .byte 0xe /* DW_CFA_def_cfa_offset CFA = r4 + 8 = 8(%esp) */ - .byte 0x8 /* .uleb128 0x8 */ - .byte 0x85 /* DW_CFA_offset, column 0x5 %ebp at CFA + 2 * -4 */ - .byte 0x2 /* .uleb128 0x2 */ + .byte 0x4 /* DW_CFA_advance_loc4 */ + .long .LCFI6-.LFB4 + .byte 0xe /* DW_CFA_def_cfa_offset CFA = r4 + 8 = 8(%esp) */ + .byte 0x8 /* .uleb128 0x8 */ + .byte 0x85 /* DW_CFA_offset, column 0x5 %ebp at CFA + 2 * -4 */ + .byte 0x2 /* .uleb128 0x2 */ - .byte 0x4 /* DW_CFA_advance_loc4 */ - .long .LCFI7-.LCFI6 - .byte 0xd /* DW_CFA_def_cfa_register CFA = r5 = %ebp */ - .byte 0x5 /* .uleb128 0x5 */ + .byte 0x4 /* DW_CFA_advance_loc4 */ + .long .LCFI7-.LCFI6 + .byte 0xd /* DW_CFA_def_cfa_register CFA = r5 = %ebp */ + .byte 0x5 /* .uleb128 0x5 */ - .byte 0x4 /* DW_CFA_advance_loc4 */ - .long .LCFI8-.LCFI7 - .byte 0x86 /* DW_CFA_offset, column 0x6 %esi at CFA + 3 * -4 */ - .byte 0x3 /* .uleb128 0x3 */ + .byte 0x4 /* DW_CFA_advance_loc4 */ + .long .LCFI8-.LCFI7 + .byte 0x86 /* DW_CFA_offset, column 0x6 %esi at CFA + 3 * -4 */ + .byte 0x3 /* .uleb128 0x3 */ - /* End of DW_CFA_xxx CFI instructions. */ - .align 4 + /* End of DW_CFA_xxx CFI instructions. */ + .align 4 .LEFDE4: #endif /* !FFI_NO_RAW_API */ .LSFDE5: - .long .LEFDE5-.LASFDE5 /* FDE Length */ + .long .LEFDE5-.LASFDE5 /* FDE Length */ .LASFDE5: - .long .LASFDE5-.Lframe1 /* FDE CIE offset */ + .long .LASFDE5-.Lframe1 /* FDE CIE offset */ #if defined __PIC__ && defined HAVE_AS_X86_PCREL - .long .LFB5-. /* FDE initial location */ + .long .LFB5-. /* FDE initial location */ #else - .long .LFB5 + .long .LFB5 #endif - .long .LFE5-.LFB5 /* FDE address range */ + .long .LFE5-.LFB5 /* FDE address range */ #ifdef __PIC__ - .byte 0x0 /* .uleb128 0x0; Augmentation size */ + .byte 0x0 /* .uleb128 0x0; Augmentation size */ #endif - /* DW_CFA_xxx CFI instructions go here. */ + /* DW_CFA_xxx CFI instructions go here. */ - .byte 0x4 /* DW_CFA_advance_loc4 */ - .long .LCFI9-.LFB5 - .byte 0xe /* DW_CFA_def_cfa_offset CFA = r4 + 8 = 8(%esp) */ - .byte 0x8 /* .uleb128 0x8 */ - .byte 0x85 /* DW_CFA_offset, column 0x5 %ebp at CFA + 2 * -4 */ - .byte 0x2 /* .uleb128 0x2 */ + .byte 0x4 /* DW_CFA_advance_loc4 */ + .long .LCFI9-.LFB5 + .byte 0xe /* DW_CFA_def_cfa_offset CFA = r4 + 8 = 8(%esp) */ + .byte 0x8 /* .uleb128 0x8 */ + .byte 0x85 /* DW_CFA_offset, column 0x5 %ebp at CFA + 2 * -4 */ + .byte 0x2 /* .uleb128 0x2 */ - .byte 0x4 /* DW_CFA_advance_loc4 */ - .long .LCFI10-.LCFI9 - .byte 0xd /* DW_CFA_def_cfa_register CFA = r5 = %ebp */ - .byte 0x5 /* .uleb128 0x5 */ + .byte 0x4 /* DW_CFA_advance_loc4 */ + .long .LCFI10-.LCFI9 + .byte 0xd /* DW_CFA_def_cfa_register CFA = r5 = %ebp */ + .byte 0x5 /* .uleb128 0x5 */ - /* End of DW_CFA_xxx CFI instructions. */ - .align 4 + /* End of DW_CFA_xxx CFI instructions. */ + .align 4 .LEFDE5: #endif /* !_MSC_VER */ #if defined __ELF__ && defined __linux__ - .section .note.GNU-stack,"",@progbits + .section .note.GNU-stack,"",@progbits #endif From 6e346487b879b4b056a847268e381ae6efec4c21 Mon Sep 17 00:00:00 2001 From: nielsAD Date: Mon, 25 Aug 2014 12:23:29 +0200 Subject: [PATCH 2/3] Fixed THISCALL/FASTCALL closures and added basic support for PASCAL/REGISTER closures. --- src/x86/ffi.c | 61 ++++++++++++++++++++++--------- src/x86/win32.S | 95 ++++++++++++++++++++++++++++--------------------- 2 files changed, 100 insertions(+), 56 deletions(-) diff --git a/src/x86/ffi.c b/src/x86/ffi.c index 1dd00eb9..cb5f634a 100644 --- a/src/x86/ffi.c +++ b/src/x86/ffi.c @@ -419,6 +419,8 @@ void FFI_HIDDEN ffi_closure_SYSV (ffi_closure *) __attribute__ ((regparm(1))); unsigned int FFI_HIDDEN ffi_closure_SYSV_inner (ffi_closure *, void **, void *) __attribute__ ((regparm(1))); +unsigned int FFI_HIDDEN ffi_closure_WIN32_inner (ffi_closure *, void **, void *) + __attribute__ ((regparm(1))); void FFI_HIDDEN ffi_closure_raw_SYSV (ffi_raw_closure *) __attribute__ ((regparm(1))); #ifdef X86_WIN32 @@ -426,12 +428,10 @@ void FFI_HIDDEN ffi_closure_raw_THISCALL (ffi_raw_closure *) __attribute__ ((regparm(1))); #endif #ifndef X86_WIN64 -void FFI_HIDDEN ffi_closure_STDCALL (ffi_closure *) - __attribute__ ((regparm(1))); -void FFI_HIDDEN ffi_closure_THISCALL (ffi_closure *) - __attribute__ ((regparm(1))); -void FFI_HIDDEN ffi_closure_FASTCALL (ffi_closure *) - __attribute__ ((regparm(1))); +void FFI_HIDDEN ffi_closure_STDCALL (ffi_closure *); +void FFI_HIDDEN ffi_closure_THISCALL (ffi_closure *); +void FFI_HIDDEN ffi_closure_FASTCALL (ffi_closure *); +void FFI_HIDDEN ffi_closure_REGISTER (ffi_closure *); #else void FFI_HIDDEN ffi_closure_win64 (ffi_closure *); #endif @@ -490,6 +490,29 @@ ffi_closure_SYSV_inner (ffi_closure *closure, void **respp, void *args) return cif->flags; } + +unsigned int FFI_HIDDEN __attribute__ ((regparm(1))) +ffi_closure_WIN32_inner (ffi_closure *closure, void **respp, void *args) +{ + /* our various things... */ + ffi_cif *cif; + void **arg_area; + + cif = closure->cif; + arg_area = (void**) alloca (cif->nargs * sizeof (void*)); + + /* this call will initialize ARG_AREA, such that each + * element in that array points to the corresponding + * value on the stack; and if the function returns + * a structure, it will change RESP to point to the + * structure return address. */ + + ffi_prep_incoming_args_SYSV(args, respp, arg_area, cif); + + (closure->fun) (cif, *respp, arg_area, closure->user_data); + + return cif->bytes; +} #endif /* !X86_WIN64 */ static void @@ -587,7 +610,7 @@ ffi_prep_incoming_args_SYSV(char *stack, void **rvalue, void **avalue, unsigned int __dis = __fun - (__ctx + 10); \ *(unsigned char*) &__tramp[0] = 0xb8; \ *(unsigned int*) &__tramp[1] = __ctx; /* movl __ctx, %eax */ \ - *(unsigned char *) &__tramp[5] = 0xe9; \ + *(unsigned char*) &__tramp[5] = 0xe9; \ *(unsigned int*) &__tramp[6] = __dis; /* jmp __fun */ \ } @@ -618,15 +641,15 @@ ffi_prep_incoming_args_SYSV(char *stack, void **rvalue, void **avalue, *(unsigned short*) &__tramp[50] = (__size + 8); /* ret (__size + 8) */ \ } -#define FFI_INIT_TRAMPOLINE_STDCALL(TRAMP,FUN,CTX) \ +#define FFI_INIT_TRAMPOLINE_WIN32(TRAMP,FUN,CTX) \ { unsigned char *__tramp = (unsigned char*)(TRAMP); \ unsigned int __fun = (unsigned int)(FUN); \ unsigned int __ctx = (unsigned int)(CTX); \ unsigned int __dis = __fun - (__ctx + 10); \ - *(unsigned char*) &__tramp[0] = 0xb8; \ - *(unsigned int*) &__tramp[1] = __ctx; /* movl __ctx, %eax */ \ - *(unsigned char *) &__tramp[5] = 0xe8; \ - *(unsigned int*) &__tramp[6] = __dis; /* call __fun */ \ + *(unsigned char*) &__tramp[0] = 0x68; \ + *(unsigned int*) &__tramp[1] = __ctx; /* push __ctx */ \ + *(unsigned char*) &__tramp[5] = 0xe9; \ + *(unsigned int*) &__tramp[6] = __dis; /* jmp __fun */ \ } /* the cif must already be prep'ed */ @@ -656,21 +679,27 @@ ffi_prep_closure_loc (ffi_closure* closure, &ffi_closure_SYSV, (void*)codeloc); } + else if (cif->abi == FFI_REGISTER) + { + FFI_INIT_TRAMPOLINE_WIN32 (&closure->tramp[0], + &ffi_closure_REGISTER, + (void*)codeloc); + } else if (cif->abi == FFI_FASTCALL) { - FFI_INIT_TRAMPOLINE_STDCALL (&closure->tramp[0], + FFI_INIT_TRAMPOLINE_WIN32 (&closure->tramp[0], &ffi_closure_FASTCALL, (void*)codeloc); } else if (cif->abi == FFI_THISCALL) { - FFI_INIT_TRAMPOLINE_STDCALL (&closure->tramp[0], + FFI_INIT_TRAMPOLINE_WIN32 (&closure->tramp[0], &ffi_closure_THISCALL, (void*)codeloc); } - else if (cif->abi == FFI_STDCALL) + else if (cif->abi == FFI_STDCALL || cif->abi == FFI_PASCAL) { - FFI_INIT_TRAMPOLINE_STDCALL (&closure->tramp[0], + FFI_INIT_TRAMPOLINE_WIN32 (&closure->tramp[0], &ffi_closure_STDCALL, (void*)codeloc); } diff --git a/src/x86/win32.S b/src/x86/win32.S index 1104ead8..96e27df2 100644 --- a/src/x86/win32.S +++ b/src/x86/win32.S @@ -34,8 +34,8 @@ #include #include -#define CIF_ABI_OFFSET 0 #define CIF_BYTES_OFFSET 16 +#define CIF_FLAGS_OFFSET 20 #ifdef _MSC_VER @@ -45,6 +45,7 @@ .MODEL FLAT, C EXTRN ffi_closure_SYSV_inner:NEAR +EXTRN ffi_closure_WIN32_inner:NEAR _TEXT SEGMENT @@ -215,7 +216,7 @@ ffi_closure_THISCALL PROC NEAR ffi_closure_THISCALL ENDP ffi_closure_FASTCALL PROC NEAR - ;; Insert the register argument on the stack as the first argument + ;; Insert the 2 register arguments on the stack as the first argument xchg DWORD PTR [esp+4], edx xchg DWORD PTR [esp], ecx push edx @@ -223,6 +224,16 @@ ffi_closure_FASTCALL PROC NEAR jmp ffi_closure_STDCALL ffi_closure_FASTCALL ENDP +ffi_closure_REGISTER PROC NEAR + ;; Insert the 3 register arguments on the stack as the first argument + push eax + xchg DWORD PTR [esp+8], ecx + xchg DWORD PTR [esp+4], edx + push ecx + push edx + jmp ffi_closure_STDCALL +ffi_closure_FASTCALL ENDP + ffi_closure_SYSV PROC NEAR FORCEFRAME ;; the ffi_closure ctx is passed in eax by the trampoline. @@ -320,7 +331,6 @@ ffi_closure_SYSV ENDP #define RAW_CLOSURE_CIF_OFFSET ((FFI_TRAMPOLINE_SIZE + 3) AND NOT 3) #define RAW_CLOSURE_FUN_OFFSET (RAW_CLOSURE_CIF_OFFSET + 4) #define RAW_CLOSURE_USER_DATA_OFFSET (RAW_CLOSURE_FUN_OFFSET + 4) -#define CIF_FLAGS_OFFSET 20 ffi_closure_raw_THISCALL PROC NEAR USES esi FORCEFRAME sub esp, 36 @@ -417,7 +427,7 @@ ffi_closure_raw_SYSV ENDP #endif /* !FFI_NO_RAW_API */ ffi_closure_STDCALL PROC NEAR FORCEFRAME - ;; the ffi_closure ctx is passed in eax by the trampoline. + mov eax, [esp] ;; the ffi_closure ctx passed by the trampoline. sub esp, 40 lea edx, [ebp - 24] @@ -427,9 +437,13 @@ ffi_closure_STDCALL PROC NEAR FORCEFRAME lea edx, [ebp - 12] mov [esp + 4], edx ;; &resp mov [esp], eax ;; closure - call ffi_closure_SYSV_inner + call ffi_closure_WIN32_inner mov ecx, [ebp - 12] + xchg [ebp + 4], eax ;;xchg size of stack parameters and ffi_closure ctx + mov eax, DWORD PTR [eax + CLOSURE_CIF_OFFSET] + mov eax, DWORD PTR [eax + CIF_FLAGS_OFFSET] + cd_jumptable: jmp [cd_jumpdata + 4 * eax] cd_jumpdata: @@ -493,21 +507,10 @@ cd_retlongdouble: cd_epilogue: mov esp, ebp pop ebp - pop ecx - pop edx - mov ecx, DWORD PTR [ecx + (CLOSURE_CIF_OFFSET-10)] - add esp, DWORD PTR [ecx + CIF_BYTES_OFFSET] - mov ecx, DWORD PTR [ecx + CIF_ABI_OFFSET] - cmp ecx, 3 - je cd_thiscall - cmp ecx, 4 - jne cd_not_fastcall - - add esp, 4 -cd_thiscall: - add esp, 4 -cd_not_fastcall: - jmp edx + mov ecx, [esp + 4] ;; Return address + add esp, [esp] ;; Parameters stack size + add esp, 8 + jmp ecx ffi_closure_STDCALL ENDP _TEXT ENDS @@ -728,14 +731,27 @@ FFI_HIDDEN(ffi_closure_FASTCALL) .def _ffi_closure_FASTCALL; .scl 2; .type 32; .endef #endif USCORE_SYMBOL(ffi_closure_FASTCALL): - /* Insert the register arguments on the stack as the first two arguments */ + /* Insert the 2 register arguments on the stack as the first two arguments */ xchg %edx, 4(%esp) xchg %ecx, (%esp) push %edx push %ecx jmp .ffi_closure_STDCALL_internal +FFI_HIDDEN(ffi_closure_REGISTER) + .globl USCORE_SYMBOL(ffi_closure_REGISTER) +#if defined(X86_WIN32) && !defined(__OS2__) + .def _ffi_closure_REGISTER; .scl 2; .type 32; .endef +#endif +USCORE_SYMBOL(ffi_closure_REGISTER): + /* Insert the 3 register arguments on the stack as the first two arguments */ + push %eax + xchg %ecx, 8(%esp) + xchg %edx, 4(%esp) + push %ecx + push %edx + jmp .ffi_closure_STDCALL_internal + .LFE1: - # This assumes we are using gas. .balign 16 FFI_HIDDEN(ffi_closure_SYSV) @@ -879,7 +895,6 @@ USCORE_SYMBOL(ffi_closure_SYSV): #define RAW_CLOSURE_CIF_OFFSET ((FFI_TRAMPOLINE_SIZE + 3) & ~3) #define RAW_CLOSURE_FUN_OFFSET (RAW_CLOSURE_CIF_OFFSET + 4) #define RAW_CLOSURE_USER_DATA_OFFSET (RAW_CLOSURE_FUN_OFFSET + 4) -#define CIF_FLAGS_OFFSET 20 #ifdef X86_WIN32 .balign 16 @@ -1032,6 +1047,8 @@ FFI_HIDDEN(ffi_closure_STDCALL) #endif USCORE_SYMBOL(ffi_closure_STDCALL): .ffi_closure_STDCALL_internal: + /* ffi_closure ctx is at top of the stack */ + movl (%esp), %eax .LFB5: pushl %ebp .LCFI9: @@ -1045,19 +1062,23 @@ USCORE_SYMBOL(ffi_closure_STDCALL): leal -12(%ebp), %edx movl %edx, (%esp) /* &resp */ #if defined(HAVE_HIDDEN_VISIBILITY_ATTRIBUTE) || !defined(__PIC__) - call USCORE_SYMBOL(ffi_closure_SYSV_inner) + call USCORE_SYMBOL(ffi_closure_WIN32_inner) #elif defined(X86_DARWIN) - calll L_ffi_closure_SYSV_inner$stub + calll L_ffi_closure_WIN32_inner$stub #else movl %ebx, 8(%esp) call 1f -1: popl %ebx +1: popl %ebx addl $_GLOBAL_OFFSET_TABLE_+[.-1b], %ebx - call ffi_closure_SYSV_inner@PLT + call ffi_closure_WIN32_inner@PLT movl 8(%esp), %ebx #endif movl -12(%ebp), %ecx 0: + xchgl 4(%ebp), %eax /* xchg size of stack parameters and ffi_closure ctx */ + movl CLOSURE_CIF_OFFSET(%eax), %eax + movl CIF_FLAGS_OFFSET(%eax), %eax + call 1f # Do not insert anything here between the call and the jump table. .Lscls_store_table: @@ -1144,19 +1165,10 @@ USCORE_SYMBOL(ffi_closure_STDCALL): .Lscls_epilogue: movl %ebp, %esp popl %ebp - popl %ecx - popl %edx - movl (CLOSURE_CIF_OFFSET-10)(%ecx), %ecx - addl CIF_BYTES_OFFSET(%ecx), %esp - movl CIF_ABI_OFFSET(%ecx), %ecx - cmpl $3, %ecx /* FFI_THISCALL */ - je 1f - cmpl $4, %ecx /* FFI_FASTCALL */ - jne 2f - - addl $4, %esp -1: addl $4, %esp -2: jmp *%edx + movl 4(%esp), %ecx /* Return address */ + addl (%esp), %esp /* Parameters stack size */ + addl $8, %esp + jmp *%ecx .ffi_closure_STDCALL_end: .LFE5: @@ -1165,6 +1177,9 @@ USCORE_SYMBOL(ffi_closure_STDCALL): L_ffi_closure_SYSV_inner$stub: .indirect_symbol _ffi_closure_SYSV_inner hlt ; hlt ; hlt ; hlt ; hlt +L_ffi_closure_WIN32_inner$stub: + .indirect_symbol _ffi_closure_WIN32_inner + hlt ; hlt ; hlt ; hlt ; hlt #endif #if defined(X86_WIN32) && !defined(__OS2__) From 5d6340ef2cf81432da79ac8b0b1b1218ab391438 Mon Sep 17 00:00:00 2001 From: nielsAD Date: Mon, 25 Aug 2014 17:29:44 +0200 Subject: [PATCH 3/3] Determine whether register arguments (THISCALL/FASTCALL/REGISTER) are really passed via register to closures. Use stack if not. --- src/x86/ffi.c | 288 +++++++++++++++++++++++++++----------------- src/x86/ffitarget.h | 2 +- src/x86/win32.S | 76 ++++++------ 3 files changed, 215 insertions(+), 151 deletions(-) diff --git a/src/x86/ffi.c b/src/x86/ffi.c index cb5f634a..680456b9 100644 --- a/src/x86/ffi.c +++ b/src/x86/ffi.c @@ -51,11 +51,11 @@ unsigned int ffi_prep_args(char *stack, extended_cif *ecif) register char *argp; register ffi_type **p_arg; #ifndef X86_WIN64 - void *p_stack_data[3]; - char *argp2 = stack; - unsigned int stack_args_count = 0; const int cabi = ecif->cif->abi; const int dir = (cabi == FFI_PASCAL || cabi == FFI_REGISTER) ? -1 : +1; + unsigned int stack_args_count = 0; + void *p_stack_data[3]; + char *argp2 = stack; #else #define dir 1 #endif @@ -69,7 +69,6 @@ unsigned int ffi_prep_args(char *stack, extended_cif *ecif) #endif ) { - *(void **) argp = ecif->rvalue; #ifndef X86_WIN64 /* For fastcall/thiscall/register this is first register-passed argument. */ @@ -79,14 +78,16 @@ unsigned int ffi_prep_args(char *stack, extended_cif *ecif) ++stack_args_count; } #endif + + *(void **) argp = ecif->rvalue; argp += sizeof(void*); } - p_arg = ecif->cif->arg_types; + p_arg = ecif->cif->arg_types; p_argv = ecif->avalue; if (dir < 0) { - const unsigned int nargs = ecif->cif->nargs - 1; + const int nargs = ecif->cif->nargs - 1; if (nargs > 0) { p_arg += nargs; @@ -94,15 +95,18 @@ unsigned int ffi_prep_args(char *stack, extended_cif *ecif) } } - for (i = ecif->cif->nargs; i != 0; i--) - { + for (i = ecif->cif->nargs; + i != 0; + i--, p_arg += dir, p_argv += dir) + { /* Align if necessary */ if ((sizeof(void*) - 1) & (size_t) argp) argp = (char *) ALIGN(argp, sizeof(void*)); - + size_t z = (*p_arg)->size; + #ifdef X86_WIN64 - if (z > sizeof(ffi_arg) + if (z > FFI_SIZEOF_ARG || ((*p_arg)->type == FFI_TYPE_STRUCT && (z & (1 | 2 | 4 | 8)) == 0) #if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE @@ -110,7 +114,7 @@ unsigned int ffi_prep_args(char *stack, extended_cif *ecif) #endif ) { - z = sizeof(ffi_arg); + z = FFI_SIZEOF_ARG; *(void **)argp = *p_argv; } else if ((*p_arg)->type == FFI_TYPE_FLOAT) @@ -119,9 +123,9 @@ unsigned int ffi_prep_args(char *stack, extended_cif *ecif) } else #endif - if (z < sizeof(ffi_arg)) + if (z < FFI_SIZEOF_ARG) { - z = sizeof(ffi_arg); + z = FFI_SIZEOF_ARG; switch ((*p_arg)->type) { case FFI_TYPE_SINT8: @@ -165,7 +169,7 @@ unsigned int ffi_prep_args(char *stack, extended_cif *ecif) /* For thiscall/fastcall/register convention register-passed arguments are the first two none-floating-point arguments with a size smaller or equal to sizeof (void*). */ - if ((z == sizeof(ffi_arg)) + if ((z == FFI_SIZEOF_ARG) && ((cabi == FFI_REGISTER) || (cabi == FFI_THISCALL && stack_args_count < 1) || (cabi == FFI_FASTCALL && stack_args_count < 2)) @@ -176,7 +180,7 @@ unsigned int ffi_prep_args(char *stack, extended_cif *ecif) { /* Iterating arguments backwards, so first register-passed argument will be passed last. Shift temporary values to make place. */ - p_stack_data[0] = p_stack_data[1]; + p_stack_data[0] = p_stack_data[1]; p_stack_data[1] = p_stack_data[2]; stack_args_count = 2; } @@ -186,9 +190,6 @@ unsigned int ffi_prep_args(char *stack, extended_cif *ecif) } #endif - p_arg += dir; - p_argv += dir; - #ifdef X86_WIN64 argp += (z + sizeof(void*) - 1) & ~(sizeof(void*) - 1); #else @@ -201,25 +202,25 @@ unsigned int ffi_prep_args(char *stack, extended_cif *ecif) on top of stack, so that those can be moved to registers by call-handler. */ if (stack_args_count > 0) { - int i; - if (dir < 0 && stack_args_count > 1) - { - /* Reverse order if iterating arguments backwards */ - ffi_arg tmp = *(ffi_arg*) p_stack_data[0]; - *(ffi_arg*) p_stack_data[0] = *(ffi_arg*) p_stack_data[stack_args_count - 1]; - *(ffi_arg*) p_stack_data[stack_args_count - 1] = tmp; - } + if (dir < 0 && stack_args_count > 1) + { + /* Reverse order if iterating arguments backwards */ + ffi_arg tmp = *(ffi_arg*) p_stack_data[0]; + *(ffi_arg*) p_stack_data[0] = *(ffi_arg*) p_stack_data[stack_args_count - 1]; + *(ffi_arg*) p_stack_data[stack_args_count - 1] = tmp; + } + int i; for (i = 0; i < stack_args_count; i++) { if (p_stack_data[i] != argp2) { ffi_arg tmp = *(ffi_arg*) p_stack_data[i]; - memmove (argp2 + sizeof(ffi_arg), argp2, (size_t) ((char*) p_stack_data[i] - (char*)argp2)); - *(ffi_arg *) argp2 = tmp; + memmove (argp2 + FFI_SIZEOF_ARG, argp2, (size_t) ((char*) p_stack_data[i] - (char*)argp2)); + *(ffi_arg *) argp2 = tmp; } - argp2 += sizeof(ffi_arg); + argp2 += FFI_SIZEOF_ARG; } } @@ -321,12 +322,12 @@ ffi_status ffi_prep_cif_machdep(ffi_cif *cif) #ifdef X86_WIN64 /* ensure space for storing four registers */ - cif->bytes += 4 * sizeof(ffi_arg); + cif->bytes += 4 * FFI_SIZEOF_ARG; #endif #ifndef X86_WIN32 #ifndef X86_WIN64 - if (cif->abi != FFI_STDCALL && cif->abi != FFI_THISCALL && cif->abi != FFI_FASTCALL) + if (cif->abi == FFI_SYSV || cif->abi == FFI_UNIX64) #endif cif->bytes = (cif->bytes + 15) & ~0xF; #endif @@ -336,11 +337,11 @@ ffi_status ffi_prep_cif_machdep(ffi_cif *cif) #ifdef X86_WIN64 extern int -ffi_call_win64(void (*)(char *, extended_cif *), extended_cif *, +ffi_call_win64(unsigned int (*)(char *, extended_cif *), extended_cif *, unsigned, unsigned, unsigned *, void (*fn)(void)); #else extern void -ffi_call_win32(void (*)(char *, extended_cif *), extended_cif *, +ffi_call_win32(unsigned int (*)(char *, extended_cif *), extended_cif *, unsigned, unsigned, unsigned, unsigned *, void (*fn)(void)); extern void ffi_call_SYSV(void (*)(char *, extended_cif *), extended_cif *, unsigned, unsigned, unsigned *, void (*fn)(void)); @@ -359,8 +360,7 @@ void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue) #ifdef X86_WIN64 if (rvalue == NULL && cif->flags == FFI_TYPE_STRUCT - && cif->rtype->size != 1 && cif->rtype->size != 2 - && cif->rtype->size != 4 && cif->rtype->size != 8) + && ((cif->rtype->size & (1 | 2 | 4 | 8)) == 0)) { ecif.rvalue = alloca((cif->rtype->size + 0xF) & ~0xF); } @@ -413,8 +413,8 @@ void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue) /* The following __attribute__((regparm(1))) decorations will have no effect on MSVC or SUNPRO_C -- standard conventions apply. */ -static void ffi_prep_incoming_args_SYSV (char *stack, void **ret, - void** args, ffi_cif* cif); +static unsigned int ffi_prep_incoming_args (char *stack, void **ret, + void** args, ffi_cif* cif); void FFI_HIDDEN ffi_closure_SYSV (ffi_closure *) __attribute__ ((regparm(1))); unsigned int FFI_HIDDEN ffi_closure_SYSV_inner (ffi_closure *, void **, void *) @@ -455,7 +455,7 @@ ffi_closure_win64_inner (ffi_closure *closure, void *args) { * a structure, it will change RESP to point to the * structure return address. */ - ffi_prep_incoming_args_SYSV(args, &resp, arg_area, cif); + ffi_prep_incoming_args(args, &resp, arg_area, cif); (closure->fun) (cif, resp, arg_area, closure->user_data); @@ -484,7 +484,7 @@ ffi_closure_SYSV_inner (ffi_closure *closure, void **respp, void *args) * a structure, it will change RESP to point to the * structure return address. */ - ffi_prep_incoming_args_SYSV(args, respp, arg_area, cif); + ffi_prep_incoming_args(args, respp, arg_area, cif); (closure->fun) (cif, *respp, arg_area, closure->user_data); @@ -497,6 +497,7 @@ ffi_closure_WIN32_inner (ffi_closure *closure, void **respp, void *args) /* our various things... */ ffi_cif *cif; void **arg_area; + unsigned int ret; cif = closure->cif; arg_area = (void**) alloca (cif->nargs * sizeof (void*)); @@ -507,80 +508,141 @@ ffi_closure_WIN32_inner (ffi_closure *closure, void **respp, void *args) * a structure, it will change RESP to point to the * structure return address. */ - ffi_prep_incoming_args_SYSV(args, respp, arg_area, cif); + ret = ffi_prep_incoming_args(args, respp, arg_area, cif); (closure->fun) (cif, *respp, arg_area, closure->user_data); - return cif->bytes; + return ret; } #endif /* !X86_WIN64 */ -static void -ffi_prep_incoming_args_SYSV(char *stack, void **rvalue, void **avalue, - ffi_cif *cif) +static unsigned int +ffi_prep_incoming_args(char *stack, void **rvalue, void **avalue, + ffi_cif *cif) { register unsigned int i; register void **p_argv; register char *argp; register ffi_type **p_arg; - - argp = stack; - -#ifdef X86_WIN64 - if (cif->rtype->size > sizeof(ffi_arg) - || (cif->flags == FFI_TYPE_STRUCT - && (cif->rtype->size != 1 && cif->rtype->size != 2 - && cif->rtype->size != 4 && cif->rtype->size != 8))) { - *rvalue = *(void **) argp; - argp += sizeof(void *); - } +#ifndef X86_WIN64 + const int cabi = cif->abi; + const int dir = (cabi == FFI_PASCAL || cabi == FFI_REGISTER) ? -1 : +1; + const unsigned int max_stack_count = (cabi == FFI_THISCALL) ? 1 + : (cabi == FFI_FASTCALL) ? 2 + : (cabi == FFI_REGISTER) ? 3 + : 0; + unsigned int passed_regs = 0; + void *p_stack_data[3] = { stack - 1 }; #else - if ( cif->flags == FFI_TYPE_STRUCT - || cif->flags == FFI_TYPE_MS_STRUCT ) { - *rvalue = *(void **) argp; - argp += sizeof(void *); - } + #define dir 1 #endif - p_argv = avalue; - - for (i = cif->nargs, p_arg = cif->arg_types; (i != 0); i--, p_arg++) - { - size_t z; - - /* Align if necessary */ - if ((sizeof(void*) - 1) & (size_t) argp) { - argp = (char *) ALIGN(argp, sizeof(void*)); - } + argp = stack; +#ifndef X86_WIN64 + argp += max_stack_count * FFI_SIZEOF_ARG; +#endif + if ((cif->flags == FFI_TYPE_STRUCT + || cif->flags == FFI_TYPE_MS_STRUCT) #ifdef X86_WIN64 - if ((*p_arg)->size > sizeof(ffi_arg) - || ((*p_arg)->type == FFI_TYPE_STRUCT - && ((*p_arg)->size != 1 && (*p_arg)->size != 2 - && (*p_arg)->size != 4 && (*p_arg)->size != 8))) + && ((cif->rtype->size & (1 | 2 | 4 | 8)) == 0) +#endif + ) + { +#ifndef X86_WIN64 + if (passed_regs < max_stack_count) { - z = sizeof(void *); - *p_argv = *(void **)argp; + *rvalue = *(void**) (stack + (passed_regs*FFI_SIZEOF_ARG)); + ++passed_regs; + } + else +#endif + { + *rvalue = *(void **) argp; + argp += sizeof(void *); + } + } + +#ifndef X86_WIN64 + /* Do register arguments first */ + for (i = 0, p_arg = cif->arg_types; + i < cif->nargs && passed_regs < max_stack_count; + i++, p_arg++) + { + if ((*p_arg)->type == FFI_TYPE_FLOAT + || (*p_arg)->type == FFI_TYPE_STRUCT) + continue; + + size_t sz = (*p_arg)->size; + if(sz == 0 || sz > FFI_SIZEOF_ARG) + continue; + + p_stack_data[passed_regs] = avalue + i; + avalue[i] = stack + (passed_regs*FFI_SIZEOF_ARG); + ++passed_regs; + } +#endif + + p_arg = cif->arg_types; + p_argv = avalue; + if (dir < 0) + { + const int nargs = cif->nargs - 1; + if (nargs > 0) + { + p_arg += nargs; + p_argv += nargs; + } + } + + for (i = cif->nargs; + i != 0; + i--, p_arg += dir, p_argv += dir) + { + /* Align if necessary */ + if ((sizeof(void*) - 1) & (size_t) argp) + argp = (char *) ALIGN(argp, sizeof(void*)); + + size_t z = (*p_arg)->size; + +#ifdef X86_WIN64 + if (z > FFI_SIZEOF_ARG + || ((*p_arg)->type == FFI_TYPE_STRUCT + && (z & (1 | 2 | 4 | 8)) == 0) +#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE + || ((*p_arg)->type == FFI_TYPE_LONGDOUBLE) +#endif + ) + { + z = FFI_SIZEOF_ARG; + *p_argv = *(void **)argp; + } + else +#else + if (passed_regs > 0 + && z <= FFI_SIZEOF_ARG + && (p_argv == p_stack_data[0] + || p_argv == p_stack_data[1] + || p_argv == p_stack_data[2])) + { + /* Already assigned a register value */ + continue; } else #endif { - z = (*p_arg)->size; - /* because we're little endian, this is what it turns into. */ - *p_argv = (void*) argp; } - - p_argv++; + #ifdef X86_WIN64 argp += (z + sizeof(void*) - 1) & ~(sizeof(void*) - 1); #else argp += z; #endif } - - return; + + return (size_t)argp - (size_t)stack; } #define FFI_INIT_TRAMPOLINE_WIN64(TRAMP,FUN,CTX,MASK) \ @@ -620,24 +682,24 @@ ffi_prep_incoming_args_SYSV(char *stack, void **rvalue, void **avalue, unsigned int __ctx = (unsigned int)(CTX); \ unsigned int __dis = __fun - (__ctx + 49); \ unsigned short __size = (unsigned short)(SIZE); \ - *(unsigned int *) &__tramp[0] = 0x8324048b; /* mov (%esp), %eax */ \ - *(unsigned int *) &__tramp[4] = 0x4c890cec; /* sub $12, %esp */ \ - *(unsigned int *) &__tramp[8] = 0x04890424; /* mov %ecx, 4(%esp) */ \ - *(unsigned char*) &__tramp[12] = 0x24; /* mov %eax, (%esp) */ \ + *(unsigned int *) &__tramp[0] = 0x8324048b; /* mov (%esp), %eax */ \ + *(unsigned int *) &__tramp[4] = 0x4c890cec; /* sub $12, %esp */ \ + *(unsigned int *) &__tramp[8] = 0x04890424; /* mov %ecx, 4(%esp) */ \ + *(unsigned char*) &__tramp[12] = 0x24; /* mov %eax, (%esp) */ \ *(unsigned char*) &__tramp[13] = 0xb8; \ - *(unsigned int *) &__tramp[14] = __size; /* mov __size, %eax */ \ - *(unsigned int *) &__tramp[18] = 0x08244c8d; /* lea 8(%esp), %ecx */ \ - *(unsigned int *) &__tramp[22] = 0x4802e8c1; /* shr $2, %eax ; dec %eax */ \ - *(unsigned short*) &__tramp[26] = 0x0b74; /* jz 1f */ \ - *(unsigned int *) &__tramp[28] = 0x8908518b; /* 2b: mov 8(%ecx), %edx */ \ - *(unsigned int *) &__tramp[32] = 0x04c18311; /* mov %edx, (%ecx) ; add $4, %ecx */ \ - *(unsigned char*) &__tramp[36] = 0x48; /* dec %eax */ \ - *(unsigned short*) &__tramp[37] = 0xf575; /* jnz 2b ; 1f: */ \ + *(unsigned int *) &__tramp[14] = __size; /* mov __size, %eax */ \ + *(unsigned int *) &__tramp[18] = 0x08244c8d; /* lea 8(%esp), %ecx */ \ + *(unsigned int *) &__tramp[22] = 0x4802e8c1; /* shr $2, %eax ; dec %eax */ \ + *(unsigned short*) &__tramp[26] = 0x0b74; /* jz 1f */ \ + *(unsigned int *) &__tramp[28] = 0x8908518b; /* 2b: mov 8(%ecx), %edx */ \ + *(unsigned int *) &__tramp[32] = 0x04c18311; /* mov %edx, (%ecx) ; add $4, %ecx */ \ + *(unsigned char*) &__tramp[36] = 0x48; /* dec %eax */ \ + *(unsigned short*) &__tramp[37] = 0xf575; /* jnz 2b ; 1f: */ \ *(unsigned char*) &__tramp[39] = 0xb8; \ - *(unsigned int*) &__tramp[40] = __ctx; /* movl __ctx, %eax */ \ + *(unsigned int*) &__tramp[40] = __ctx; /* movl __ctx, %eax */ \ *(unsigned char *) &__tramp[44] = 0xe8; \ - *(unsigned int*) &__tramp[45] = __dis; /* call __fun */ \ - *(unsigned char*) &__tramp[49] = 0xc2; /* ret */ \ + *(unsigned int*) &__tramp[45] = __dis; /* call __fun */ \ + *(unsigned char*) &__tramp[49] = 0xc2; /* ret */ \ *(unsigned short*) &__tramp[50] = (__size + 8); /* ret (__size + 8) */ \ } @@ -779,29 +841,31 @@ static unsigned int ffi_prep_args_raw(char *stack, extended_cif *ecif) { const ffi_cif *cif = ecif->cif; - const unsigned int abi = cif->abi; - const unsigned int max = (abi == FFI_THISCALL) ? 1 - : (abi == FFI_FASTCALL) ? 2 - : (abi == FFI_REGISTER) ? 3 - : 0; - unsigned int i, passed_regs = 0; + +#ifndef X86_WIN64 + const unsigned int abi = cif->abi; + const unsigned int max_regs = (abi == FFI_THISCALL) ? 1 + : (abi == FFI_FASTCALL) ? 2 + : (abi == FFI_REGISTER) ? 3 + : 0; if (cif->flags == FFI_TYPE_STRUCT) ++passed_regs; - for (i = 0; i < cif->nargs && passed_regs <= max; i++) + for (i = 0; i < cif->nargs && passed_regs <= max_regs; i++) { - if (cif->arg_types[i]->type == FFI_TYPE_FLOAT - || cif->arg_types[i]->type == FFI_TYPE_STRUCT) - continue; + if (cif->arg_types[i]->type == FFI_TYPE_FLOAT + || cif->arg_types[i]->type == FFI_TYPE_STRUCT) + continue; - size_t sz = (cif->arg_types[i]->size + 3) & ~3; - if (sz == 0 || sz > 4) - continue; + size_t sz = cif->arg_types[i]->size; + if (sz == 0 || sz > FFI_SIZEOF_ARG) + continue; - ++passed_regs; + ++passed_regs; } +#endif memcpy (stack, ecif->avalue, cif->bytes); return passed_regs; diff --git a/src/x86/ffitarget.h b/src/x86/ffitarget.h index c7161ca4..e52111eb 100644 --- a/src/x86/ffitarget.h +++ b/src/x86/ffitarget.h @@ -139,7 +139,7 @@ typedef enum ffi_abi { #endif #endif #ifndef X86_WIN64 -#define FFI_NATIVE_RAW_API 1 /* x86 has native raw api support */ +#define FFI_NATIVE_RAW_API 1 /* x86 has native raw api support */ #endif #endif diff --git a/src/x86/win32.S b/src/x86/win32.S index 96e27df2..d523eb01 100644 --- a/src/x86/win32.S +++ b/src/x86/win32.S @@ -4,8 +4,8 @@ Copyright (c) 2001 John Beniton Copyright (c) 2002 Ranjit Mathew Copyright (c) 2009 Daniel Witte - - + + X86 Foreign Function Interface Permission is hereby granted, free of charge, to any person obtaining @@ -100,7 +100,7 @@ prepr_one: xchg ecx, eax fun: - ;; Call function + ;; Call function call fn ;; Load ecx with the return type code @@ -443,7 +443,7 @@ ffi_closure_STDCALL PROC NEAR FORCEFRAME xchg [ebp + 4], eax ;;xchg size of stack parameters and ffi_closure ctx mov eax, DWORD PTR [eax + CLOSURE_CIF_OFFSET] mov eax, DWORD PTR [eax + CIF_FLAGS_OFFSET] - + cd_jumptable: jmp [cd_jumpdata + 4 * eax] cd_jumpdata: @@ -509,7 +509,7 @@ cd_epilogue: pop ebp mov ecx, [esp + 4] ;; Return address add esp, [esp] ;; Parameters stack size - add esp, 8 + add esp, 8 jmp ecx ffi_closure_STDCALL ENDP @@ -552,40 +552,40 @@ USCORE_SYMBOL(ffi_call_win32): call *8(%ebp) addl $8,%esp - # Prepare registers - # EAX stores the number of register arguments - cmpl $0, %eax - je .fun - cmpl $3, %eax - jl .prepr_two_cmp - - movl %esp, %ecx - addl $12, %esp - movl 8(%ecx), %eax - jmp .prepr_two + # Prepare registers + # EAX stores the number of register arguments + cmpl $0, %eax + je .fun + cmpl $3, %eax + jl .prepr_two_cmp + + movl %esp, %ecx + addl $12, %esp + movl 8(%ecx), %eax + jmp .prepr_two .prepr_two_cmp: - cmpl $2, %eax - jl .prepr_one_prep - movl %esp, %ecx - addl $8, %esp + cmpl $2, %eax + jl .prepr_one_prep + movl %esp, %ecx + addl $8, %esp .prepr_two: - movl 4(%ecx), %edx - jmp .prepr_one + movl 4(%ecx), %edx + jmp .prepr_one .prepr_one_prep: - movl %esp, %ecx - addl $4, %esp + movl %esp, %ecx + addl $4, %esp .prepr_one: - movl (%ecx), %ecx - cmpl $7, 16(%ebp) # FFI_REGISTER - jne .fun + movl (%ecx), %ecx + cmpl $7, 16(%ebp) # FFI_REGISTER + jne .fun - xchgl %eax, %ecx - + xchgl %eax, %ecx + .fun: # FIXME: Align the stack to a 128-bit boundary to avoid # potential performance hits. - # Call function + # Call function call *32(%ebp) # stdcall functions pop arguments off the stack themselves @@ -606,7 +606,7 @@ USCORE_SYMBOL(ffi_call_win32): jmp .Lepilogue 0: - call 1f + call 1f # Do not insert anything here between the call and the jump table. .Lstore_table: .long .Lnoretval-.Lstore_table /* FFI_TYPE_VOID */ @@ -750,7 +750,7 @@ USCORE_SYMBOL(ffi_closure_REGISTER): push %ecx push %edx jmp .ffi_closure_STDCALL_internal - + .LFE1: # This assumes we are using gas. .balign 16 @@ -781,7 +781,7 @@ USCORE_SYMBOL(ffi_closure_SYSV): #else movl %ebx, 8(%esp) call 1f -1: popl %ebx +1: popl %ebx addl $_GLOBAL_OFFSET_TABLE_+[.-1b], %ebx call ffi_closure_SYSV_inner@PLT movl 8(%esp), %ebx @@ -1075,10 +1075,10 @@ USCORE_SYMBOL(ffi_closure_STDCALL): #endif movl -12(%ebp), %ecx 0: - xchgl 4(%ebp), %eax /* xchg size of stack parameters and ffi_closure ctx */ + xchgl 4(%ebp), %eax /* xchg size of stack parameters and ffi_closure ctx */ movl CLOSURE_CIF_OFFSET(%eax), %eax movl CIF_FLAGS_OFFSET(%eax), %eax - + call 1f # Do not insert anything here between the call and the jump table. .Lscls_store_table: @@ -1165,9 +1165,9 @@ USCORE_SYMBOL(ffi_closure_STDCALL): .Lscls_epilogue: movl %ebp, %esp popl %ebp - movl 4(%esp), %ecx /* Return address */ - addl (%esp), %esp /* Parameters stack size */ - addl $8, %esp + movl 4(%esp), %ecx /* Return address */ + addl (%esp), %esp /* Parameters stack size */ + addl $8, %esp jmp *%ecx .ffi_closure_STDCALL_end: .LFE5: