From 3fa5d70cbb18b39a5e44f1c7984dedf73446bf6c Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 5 Jan 2015 13:03:06 -0800 Subject: [PATCH] x86: Avoid fastcall when building with pcc Apparently, PCC doesn't support the fastcall calling convention. Nor does it issue a warning or error for the attribute that it does not understand. --- src/x86/ffi.c | 10 ++++- src/x86/internal.h | 6 +++ src/x86/sysv.S | 94 ++++++++++++++++++++++++++++------------------ 3 files changed, 71 insertions(+), 39 deletions(-) diff --git a/src/x86/ffi.c b/src/x86/ffi.c index 1d474e32..3885e399 100644 --- a/src/x86/ffi.c +++ b/src/x86/ffi.c @@ -235,7 +235,10 @@ static const struct abi_params abi_params[FFI_LAST_ABI] = { }; extern void ffi_call_i386(struct call_frame *, char *) - FFI_HIDDEN __declspec(fastcall); +#if HAVE_FASTCALL + __declspec(fastcall) +#endif + FFI_HIDDEN; static void ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue, @@ -392,7 +395,10 @@ struct closure_frame void *user_data; /* 36 */ }; -int FFI_HIDDEN __declspec(fastcall) +int FFI_HIDDEN +#if HAVE_FASTCALL +__declspec(fastcall) +#endif ffi_closure_inner (struct closure_frame *frame, char *stack) { ffi_cif *cif = frame->cif; diff --git a/src/x86/internal.h b/src/x86/internal.h index 480c1d03..09771ba8 100644 --- a/src/x86/internal.h +++ b/src/x86/internal.h @@ -21,3 +21,9 @@ #define R_EAX 0 #define R_EDX 1 #define R_ECX 2 + +#ifdef __PCC__ +# define HAVE_FASTCALL 0 +#else +# define HAVE_FASTCALL 1 +#endif diff --git a/src/x86/sysv.S b/src/x86/sysv.S index 36e73b2c..ebbea5d1 100644 --- a/src/x86/sysv.S +++ b/src/x86/sysv.S @@ -90,6 +90,10 @@ ffi_call_i386: L(UW0): # cfi_startproc +#if !HAVE_FASTCALL + movl 4(%esp), %ecx + movl 8(%esp), %edx +#endif movl (%esp), %eax /* move the return address */ movl %ebp, (%ecx) /* store %ebp into local frame */ movl %eax, 4(%ecx) /* store retaddr into local frame */ @@ -210,29 +214,46 @@ ENDF(ffi_call_i386) /* Macros to help setting up the closure_data structure. */ -#define closure_FS (16 + 3*4 + 3*4 + 4) +#if HAVE_FASTCALL +# define closure_FS (40 + 4) +# define closure_CF 0 +#else +# define closure_FS (8 + 40 + 12) +# define closure_CF 8 +#endif #define FFI_CLOSURE_SAVE_REGS \ - movl %eax, 16+R_EAX*4(%esp); \ - movl %edx, 16+R_EDX*4(%esp); \ - movl %ecx, 16+R_ECX*4(%esp) + movl %eax, closure_CF+16+R_EAX*4(%esp); \ + movl %edx, closure_CF+16+R_EDX*4(%esp); \ + movl %ecx, closure_CF+16+R_ECX*4(%esp) #define FFI_CLOSURE_COPY_TRAMP_DATA \ movl FFI_TRAMPOLINE_SIZE(%eax), %edx; /* copy cif */ \ movl FFI_TRAMPOLINE_SIZE+4(%eax), %ecx; /* copy fun */ \ movl FFI_TRAMPOLINE_SIZE+8(%eax), %eax; /* copy user_data */ \ - movl %edx, 28(%esp); \ - movl %ecx, 32(%esp); \ - movl %eax, 36(%esp) + movl %edx, closure_CF+28(%esp); \ + movl %ecx, closure_CF+32(%esp); \ + movl %eax, closure_CF+36(%esp) -# define FFI_CLOSURE_CALL_INNER(UW) \ +#if HAVE_FASTCALL +# define FFI_CLOSURE_PREP_CALL \ movl %esp, %ecx; /* load closure_data */ \ + leal closure_FS+4(%esp), %edx; /* load incoming stack */ +#else +# define FFI_CLOSURE_PREP_CALL \ + leal closure_CF(%esp), %ecx; /* load closure_data */ \ leal closure_FS+4(%esp), %edx; /* load incoming stack */ \ + movl %ecx, (%esp); \ + movl %edx, 4(%esp) +#endif + +#define FFI_CLOSURE_CALL_INNER(UWN) \ call ffi_closure_inner + #define FFI_CLOSURE_MASK_AND_JUMP(N, UW) \ andl $X86_RET_TYPE_MASK, %eax; \ leal L(C1(load_table,N))(, %eax, 8), %edx; \ - movl (%esp), %eax; /* optimiztic load */ \ + movl closure_CF(%esp), %eax; /* optimiztic load */ \ jmp *%edx #ifdef __PIC__ @@ -243,14 +264,12 @@ ENDF(ffi_call_i386) call C(__x86.get_pc_thunk.dx); \ L(C1(pc,N)): \ leal L(C1(load_table,N))-L(C1(pc,N))(%edx, %eax, 8), %edx; \ - movl (%esp), %eax; /* optimiztic load */ \ + movl closure_CF(%esp), %eax; /* optimiztic load */ \ jmp *%edx # else # define FFI_CLOSURE_CALL_INNER_SAVE_EBX # undef FFI_CLOSURE_CALL_INNER # define FFI_CLOSURE_CALL_INNER(UWN) \ - movl %esp, %ecx; /* load closure_data */ \ - leal closure_FS+4(%esp), %edx; /* load incoming stack */ \ movl %ebx, 40(%esp); /* save ebx */ \ L(C1(UW,UWN)): \ # cfi_rel_offset(%ebx, 40); \ @@ -264,7 +283,7 @@ L(C1(UW,UWN)): \ movl 40(%esp), %ebx; /* restore ebx */ \ L(C1(UW,UWN)): \ # cfi_restore(%ebx); \ - movl (%esp), %eax; /* optimiztic load */ \ + movl closure_CF(%esp), %eax; /* optimiztic load */ \ jmp *%edx # endif /* DARWIN || HIDDEN */ #endif /* __PIC__ */ @@ -279,11 +298,11 @@ L(UW6): L(UW7): # cfi_def_cfa_offset(closure_FS + 4) FFI_CLOSURE_SAVE_REGS - movl 4(%eax), %edx /* copy cif */ - movl 8(%eax), %ecx /* copy fun */ - movl %edx, 28(%esp) - movl %ecx, 32(%esp) - movl %eax, 36(%esp) /* closure is user_data */ + movl 4(%eax), %edx /* copy cif */ + movl 8(%eax), %ecx /* copy fun */ + movl %edx, closure_CF+28(%esp) + movl %ecx, closure_CF+32(%esp) + movl %eax, closure_CF+36(%esp) /* closure is user_data */ jmp L(do_closure_i386) L(UW8): # cfi_endproc @@ -299,11 +318,11 @@ L(UW9): L(UW10): # cfi_def_cfa_offset(closure_FS + 4) FFI_CLOSURE_SAVE_REGS - movl 4(%ecx), %edx /* copy cif */ - movl 8(%ecx), %eax /* copy fun */ - movl %edx, 28(%esp) - movl %eax, 32(%esp) - movl %ecx, 36(%esp) /* closure is user_data */ + movl 4(%ecx), %edx /* copy cif */ + movl 8(%ecx), %eax /* copy fun */ + movl %edx, closure_CF+28(%esp) + movl %eax, closure_CF+32(%esp) + movl %ecx, closure_CF+36(%esp) /* closure is user_data */ jmp L(do_closure_i386) L(UW11): # cfi_endproc @@ -329,19 +348,20 @@ L(UW13): /* Entry point from preceeding Go closures. */ L(do_closure_i386): + FFI_CLOSURE_PREP_CALL FFI_CLOSURE_CALL_INNER(14) FFI_CLOSURE_MASK_AND_JUMP(2, 15) .balign 8 L(load_table2): E(L(load_table2), X86_RET_FLOAT) - flds (%esp) + flds closure_CF(%esp) jmp L(e2) E(L(load_table2), X86_RET_DOUBLE) - fldl (%esp) + fldl closure_CF(%esp) jmp L(e2) E(L(load_table2), X86_RET_LDOUBLE) - fldt (%esp) + fldt closure_CF(%esp) jmp L(e2) E(L(load_table2), X86_RET_SINT8) movsbl %al, %eax @@ -356,7 +376,7 @@ E(L(load_table2), X86_RET_UINT16) movzwl %ax, %eax jmp L(e2) E(L(load_table2), X86_RET_INT64) - movl 4(%esp), %edx + movl closure_CF+4(%esp), %edx jmp L(e2) E(L(load_table2), X86_RET_INT32) nop @@ -405,11 +425,11 @@ L(UW21): L(UW22): # cfi_def_cfa_offset(closure_FS + 4) FFI_CLOSURE_SAVE_REGS - movl 4(%ecx), %edx /* copy cif */ - movl 8(%ecx), %eax /* copy fun */ - movl %edx, 28(%esp) - movl %eax, 32(%esp) - movl %ecx, 36(%esp) /* closure is user_data */ + movl 4(%ecx), %edx /* copy cif */ + movl 8(%ecx), %eax /* copy fun */ + movl %edx, closure_CF+28(%esp) + movl %eax, closure_CF+32(%esp) + movl %ecx, closure_CF+36(%esp) /* closure is user_data */ jmp L(do_closure_STDCALL) L(UW23): # cfi_endproc @@ -462,6 +482,7 @@ L(do_closure_REGISTER): /* Entry point from preceeding Go closure. */ L(do_closure_STDCALL): + FFI_CLOSURE_PREP_CALL FFI_CLOSURE_CALL_INNER(29) movl %eax, %ecx @@ -481,15 +502,15 @@ L(do_closure_STDCALL): .balign 8 L(load_table3): E(L(load_table3), X86_RET_FLOAT) - flds (%esp) + flds closure_CF(%esp) movl %ecx, %esp ret E(L(load_table3), X86_RET_DOUBLE) - fldl (%esp) + fldl closure_CF(%esp) movl %ecx, %esp ret E(L(load_table3), X86_RET_LDOUBLE) - fldt (%esp) + fldt closure_CF(%esp) movl %ecx, %esp ret E(L(load_table3), X86_RET_SINT8) @@ -509,8 +530,7 @@ E(L(load_table3), X86_RET_UINT16) movl %ecx, %esp ret E(L(load_table3), X86_RET_INT64) - popl %eax - popl %edx + movl closure_CF+4(%esp), %edx movl %ecx, %esp ret E(L(load_table3), X86_RET_INT32)