Merge pull request #132 from nielsAD/master

Pascal and Register calling convention support on x86
This commit is contained in:
Anthony Green
2014-09-18 19:10:54 -04:00
3 changed files with 836 additions and 715 deletions

View File

@@ -43,19 +43,21 @@
/* ffi_prep_args is called by the assembly routine once stack space /* ffi_prep_args is called by the assembly routine once stack space
has been allocated for the function's arguments */ has been allocated for the function's arguments */
void ffi_prep_args(char *stack, extended_cif *ecif); unsigned int ffi_prep_args(char *stack, extended_cif *ecif);
void ffi_prep_args(char *stack, extended_cif *ecif) unsigned int ffi_prep_args(char *stack, extended_cif *ecif)
{ {
register unsigned int i; register unsigned int i;
register void **p_argv; register void **p_argv;
register char *argp; register char *argp;
register ffi_type **p_arg; register ffi_type **p_arg;
#ifndef X86_WIN64 #ifndef X86_WIN64
size_t p_stack_args[2]; const int cabi = ecif->cif->abi;
void *p_stack_data[2]; const int dir = (cabi == FFI_PASCAL || cabi == FFI_REGISTER) ? -1 : +1;
unsigned int stack_args_count = 0;
void *p_stack_data[3];
char *argp2 = stack; char *argp2 = stack;
int stack_args_count = 0; #else
int cabi = ecif->cif->abi; #define dir 1
#endif #endif
argp = stack; argp = stack;
@@ -63,48 +65,56 @@ void ffi_prep_args(char *stack, extended_cif *ecif)
if ((ecif->cif->flags == FFI_TYPE_STRUCT if ((ecif->cif->flags == FFI_TYPE_STRUCT
|| ecif->cif->flags == FFI_TYPE_MS_STRUCT) || ecif->cif->flags == FFI_TYPE_MS_STRUCT)
#ifdef X86_WIN64 #ifdef X86_WIN64
&& (ecif->cif->rtype->size != 1 && ecif->cif->rtype->size != 2 && ((ecif->cif->rtype->size & (1 | 2 | 4 | 8)) == 0)
&& ecif->cif->rtype->size != 4 && ecif->cif->rtype->size != 8)
#endif #endif
) )
{ {
*(void **) argp = ecif->rvalue;
#ifndef X86_WIN64 #ifndef X86_WIN64
/* For fastcall/thiscall this is first register-passed /* For fastcall/thiscall/register this is first register-passed
argument. */ argument. */
if (cabi == FFI_THISCALL || cabi == FFI_FASTCALL) if (cabi == FFI_THISCALL || cabi == FFI_FASTCALL || cabi == FFI_REGISTER)
{ {
p_stack_args[stack_args_count] = sizeof (void*);
p_stack_data[stack_args_count] = argp; p_stack_data[stack_args_count] = argp;
++stack_args_count; ++stack_args_count;
} }
#endif #endif
*(void **) argp = ecif->rvalue;
argp += sizeof(void*); argp += sizeof(void*);
} }
p_arg = ecif->cif->arg_types;
p_argv = ecif->avalue; p_argv = ecif->avalue;
if (dir < 0)
for (i = ecif->cif->nargs, p_arg = ecif->cif->arg_types;
i != 0;
i--, p_arg++)
{ {
size_t z; const int nargs = ecif->cif->nargs - 1;
if (nargs > 0)
{
p_arg += nargs;
p_argv += nargs;
}
}
for (i = ecif->cif->nargs;
i != 0;
i--, p_arg += dir, p_argv += dir)
{
/* Align if necessary */ /* Align if necessary */
if ((sizeof(void*) - 1) & (size_t) argp) if ((sizeof(void*) - 1) & (size_t) argp)
argp = (char *) ALIGN(argp, sizeof(void*)); argp = (char *) ALIGN(argp, sizeof(void*));
z = (*p_arg)->size; size_t z = (*p_arg)->size;
#ifdef X86_WIN64 #ifdef X86_WIN64
if (z > sizeof(ffi_arg) if (z > FFI_SIZEOF_ARG
|| ((*p_arg)->type == FFI_TYPE_STRUCT || ((*p_arg)->type == FFI_TYPE_STRUCT
&& (z != 1 && z != 2 && z != 4 && z != 8)) && (z & (1 | 2 | 4 | 8)) == 0)
#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE #if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
|| ((*p_arg)->type == FFI_TYPE_LONGDOUBLE) || ((*p_arg)->type == FFI_TYPE_LONGDOUBLE)
#endif #endif
) )
{ {
z = sizeof(ffi_arg); z = FFI_SIZEOF_ARG;
*(void **)argp = *p_argv; *(void **)argp = *p_argv;
} }
else if ((*p_arg)->type == FFI_TYPE_FLOAT) else if ((*p_arg)->type == FFI_TYPE_FLOAT)
@@ -113,9 +123,9 @@ void ffi_prep_args(char *stack, extended_cif *ecif)
} }
else else
#endif #endif
if (z < sizeof(ffi_arg)) if (z < FFI_SIZEOF_ARG)
{ {
z = sizeof(ffi_arg); z = FFI_SIZEOF_ARG;
switch ((*p_arg)->type) switch ((*p_arg)->type)
{ {
case FFI_TYPE_SINT8: case FFI_TYPE_SINT8:
@@ -156,23 +166,30 @@ void ffi_prep_args(char *stack, extended_cif *ecif)
} }
#ifndef X86_WIN64 #ifndef X86_WIN64
/* For thiscall/fastcall convention register-passed arguments /* For thiscall/fastcall/register convention register-passed arguments
are the first two none-floating-point arguments with a size are the first two none-floating-point arguments with a size
smaller or equal to sizeof (void*). */ smaller or equal to sizeof (void*). */
if ((cabi == FFI_THISCALL && stack_args_count < 1) if ((z == FFI_SIZEOF_ARG)
&& ((cabi == FFI_REGISTER)
|| (cabi == FFI_THISCALL && stack_args_count < 1)
|| (cabi == FFI_FASTCALL && stack_args_count < 2)) || (cabi == FFI_FASTCALL && stack_args_count < 2))
&& ((*p_arg)->type != FFI_TYPE_FLOAT && (*p_arg)->type != FFI_TYPE_STRUCT)
)
{ {
if (z <= 4 if (dir < 0 && stack_args_count > 2)
&& ((*p_arg)->type != FFI_TYPE_FLOAT
&& (*p_arg)->type != FFI_TYPE_STRUCT))
{ {
p_stack_args[stack_args_count] = z; /* Iterating arguments backwards, so first register-passed argument
will be passed last. Shift temporary values to make place. */
p_stack_data[0] = p_stack_data[1];
p_stack_data[1] = p_stack_data[2];
stack_args_count = 2;
}
p_stack_data[stack_args_count] = argp; p_stack_data[stack_args_count] = argp;
++stack_args_count; ++stack_args_count;
} }
}
#endif #endif
p_argv++;
#ifdef X86_WIN64 #ifdef X86_WIN64
argp += (z + sizeof(void*) - 1) & ~(sizeof(void*) - 1); argp += (z + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
#else #else
@@ -181,44 +198,35 @@ void ffi_prep_args(char *stack, extended_cif *ecif)
} }
#ifndef X86_WIN64 #ifndef X86_WIN64
/* We need to move the register-passed arguments for thiscall/fastcall /* We need to move the register-passed arguments for thiscall/fastcall/register
on top of stack, so that those can be moved to registers ecx/edx by on top of stack, so that those can be moved to registers by call-handler. */
call-handler. */
if (stack_args_count > 0) if (stack_args_count > 0)
{ {
size_t zz = (p_stack_args[0] + 3) & ~3; if (dir < 0 && stack_args_count > 1)
char *h;
/* Move first argument to top-stack position. */
if (p_stack_data[0] != argp2)
{ {
h = alloca (zz + 1); /* Reverse order if iterating arguments backwards */
memcpy (h, p_stack_data[0], zz); ffi_arg tmp = *(ffi_arg*) p_stack_data[0];
memmove (argp2 + zz, argp2, *(ffi_arg*) p_stack_data[0] = *(ffi_arg*) p_stack_data[stack_args_count - 1];
(size_t) ((char *) p_stack_data[0] - (char*)argp2)); *(ffi_arg*) p_stack_data[stack_args_count - 1] = tmp;
memcpy (argp2, h, zz);
} }
argp2 += zz; int i;
--stack_args_count; for (i = 0; i < stack_args_count; i++)
if (zz > 4)
stack_args_count = 0;
/* If we have a second argument, then move it on top
after the first one. */
if (stack_args_count > 0 && p_stack_data[1] != argp2)
{ {
zz = p_stack_args[1]; if (p_stack_data[i] != argp2)
zz = (zz + 3) & ~3; {
h = alloca (zz + 1); ffi_arg tmp = *(ffi_arg*) p_stack_data[i];
h = alloca (zz + 1); memmove (argp2 + FFI_SIZEOF_ARG, argp2, (size_t) ((char*) p_stack_data[i] - (char*)argp2));
memcpy (h, p_stack_data[1], zz); *(ffi_arg *) argp2 = tmp;
memmove (argp2 + zz, argp2, (size_t) ((char*) p_stack_data[1] - (char*)argp2)); }
memcpy (argp2, h, zz);
argp2 += FFI_SIZEOF_ARG;
} }
} }
return stack_args_count;
#endif #endif
return; return 0;
} }
/* Perform machine dependent cif processing */ /* Perform machine dependent cif processing */
@@ -314,12 +322,12 @@ ffi_status ffi_prep_cif_machdep(ffi_cif *cif)
#ifdef X86_WIN64 #ifdef X86_WIN64
/* ensure space for storing four registers */ /* ensure space for storing four registers */
cif->bytes += 4 * sizeof(ffi_arg); cif->bytes += 4 * FFI_SIZEOF_ARG;
#endif #endif
#ifndef X86_WIN32 #ifndef X86_WIN32
#ifndef X86_WIN64 #ifndef X86_WIN64
if (cif->abi != FFI_STDCALL && cif->abi != FFI_THISCALL && cif->abi != FFI_FASTCALL) if (cif->abi == FFI_SYSV || cif->abi == FFI_UNIX64)
#endif #endif
cif->bytes = (cif->bytes + 15) & ~0xF; cif->bytes = (cif->bytes + 15) & ~0xF;
#endif #endif
@@ -329,11 +337,11 @@ ffi_status ffi_prep_cif_machdep(ffi_cif *cif)
#ifdef X86_WIN64 #ifdef X86_WIN64
extern int extern int
ffi_call_win64(void (*)(char *, extended_cif *), extended_cif *, ffi_call_win64(unsigned int (*)(char *, extended_cif *), extended_cif *,
unsigned, unsigned, unsigned *, void (*fn)(void)); unsigned, unsigned, unsigned *, void (*fn)(void));
#else #else
extern void extern void
ffi_call_win32(void (*)(char *, extended_cif *), extended_cif *, ffi_call_win32(unsigned int (*)(char *, extended_cif *), extended_cif *,
unsigned, unsigned, unsigned, unsigned *, void (*fn)(void)); unsigned, unsigned, unsigned, unsigned *, void (*fn)(void));
extern void ffi_call_SYSV(void (*)(char *, extended_cif *), extended_cif *, extern void ffi_call_SYSV(void (*)(char *, extended_cif *), extended_cif *,
unsigned, unsigned, unsigned *, void (*fn)(void)); unsigned, unsigned, unsigned *, void (*fn)(void));
@@ -352,8 +360,7 @@ void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
#ifdef X86_WIN64 #ifdef X86_WIN64
if (rvalue == NULL if (rvalue == NULL
&& cif->flags == FFI_TYPE_STRUCT && cif->flags == FFI_TYPE_STRUCT
&& cif->rtype->size != 1 && cif->rtype->size != 2 && ((cif->rtype->size & (1 | 2 | 4 | 8)) == 0))
&& cif->rtype->size != 4 && cif->rtype->size != 8)
{ {
ecif.rvalue = alloca((cif->rtype->size + 0xF) & ~0xF); ecif.rvalue = alloca((cif->rtype->size + 0xF) & ~0xF);
} }
@@ -387,37 +394,12 @@ void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
case FFI_MS_CDECL: case FFI_MS_CDECL:
#endif #endif
case FFI_STDCALL: case FFI_STDCALL:
ffi_call_win32(ffi_prep_args, &ecif, cif->abi, cif->bytes, cif->flags,
ecif.rvalue, fn);
break;
case FFI_THISCALL: case FFI_THISCALL:
case FFI_FASTCALL: case FFI_FASTCALL:
{ case FFI_PASCAL:
unsigned int abi = cif->abi; case FFI_REGISTER:
unsigned int i, passed_regs = 0; ffi_call_win32(ffi_prep_args, &ecif, cif->abi, cif->bytes, cif->flags,
if (cif->flags == FFI_TYPE_STRUCT)
++passed_regs;
for (i=0; i < cif->nargs && passed_regs < 2;i++)
{
size_t sz;
if (cif->arg_types[i]->type == FFI_TYPE_FLOAT
|| cif->arg_types[i]->type == FFI_TYPE_STRUCT)
continue;
sz = (cif->arg_types[i]->size + 3) & ~3;
if (sz == 0 || sz > 4)
continue;
++passed_regs;
}
if (passed_regs < 2 && abi == FFI_FASTCALL)
abi = FFI_THISCALL;
if (passed_regs < 1 && abi == FFI_THISCALL)
abi = FFI_STDCALL;
ffi_call_win32(ffi_prep_args, &ecif, abi, cif->bytes, cif->flags,
ecif.rvalue, fn); ecif.rvalue, fn);
}
break; break;
#endif #endif
default: default:
@@ -431,12 +413,14 @@ void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
/* The following __attribute__((regparm(1))) decorations will have no effect /* The following __attribute__((regparm(1))) decorations will have no effect
on MSVC or SUNPRO_C -- standard conventions apply. */ on MSVC or SUNPRO_C -- standard conventions apply. */
static void ffi_prep_incoming_args_SYSV (char *stack, void **ret, static unsigned int ffi_prep_incoming_args (char *stack, void **ret,
void** args, ffi_cif* cif); void** args, ffi_cif* cif);
void FFI_HIDDEN ffi_closure_SYSV (ffi_closure *) void FFI_HIDDEN ffi_closure_SYSV (ffi_closure *)
__attribute__ ((regparm(1))); __attribute__ ((regparm(1)));
unsigned int FFI_HIDDEN ffi_closure_SYSV_inner (ffi_closure *, void **, void *) unsigned int FFI_HIDDEN ffi_closure_SYSV_inner (ffi_closure *, void **, void *)
__attribute__ ((regparm(1))); __attribute__ ((regparm(1)));
unsigned int FFI_HIDDEN ffi_closure_WIN32_inner (ffi_closure *, void **, void *)
__attribute__ ((regparm(1)));
void FFI_HIDDEN ffi_closure_raw_SYSV (ffi_raw_closure *) void FFI_HIDDEN ffi_closure_raw_SYSV (ffi_raw_closure *)
__attribute__ ((regparm(1))); __attribute__ ((regparm(1)));
#ifdef X86_WIN32 #ifdef X86_WIN32
@@ -444,12 +428,10 @@ void FFI_HIDDEN ffi_closure_raw_THISCALL (ffi_raw_closure *)
__attribute__ ((regparm(1))); __attribute__ ((regparm(1)));
#endif #endif
#ifndef X86_WIN64 #ifndef X86_WIN64
void FFI_HIDDEN ffi_closure_STDCALL (ffi_closure *) void FFI_HIDDEN ffi_closure_STDCALL (ffi_closure *);
__attribute__ ((regparm(1))); void FFI_HIDDEN ffi_closure_THISCALL (ffi_closure *);
void FFI_HIDDEN ffi_closure_THISCALL (ffi_closure *) void FFI_HIDDEN ffi_closure_FASTCALL (ffi_closure *);
__attribute__ ((regparm(1))); void FFI_HIDDEN ffi_closure_REGISTER (ffi_closure *);
void FFI_HIDDEN ffi_closure_FASTCALL (ffi_closure *)
__attribute__ ((regparm(1)));
#else #else
void FFI_HIDDEN ffi_closure_win64 (ffi_closure *); void FFI_HIDDEN ffi_closure_win64 (ffi_closure *);
#endif #endif
@@ -473,7 +455,7 @@ ffi_closure_win64_inner (ffi_closure *closure, void *args) {
* a structure, it will change RESP to point to the * a structure, it will change RESP to point to the
* structure return address. */ * structure return address. */
ffi_prep_incoming_args_SYSV(args, &resp, arg_area, cif); ffi_prep_incoming_args(args, &resp, arg_area, cif);
(closure->fun) (cif, resp, arg_area, closure->user_data); (closure->fun) (cif, resp, arg_area, closure->user_data);
@@ -502,72 +484,157 @@ ffi_closure_SYSV_inner (ffi_closure *closure, void **respp, void *args)
* a structure, it will change RESP to point to the * a structure, it will change RESP to point to the
* structure return address. */ * structure return address. */
ffi_prep_incoming_args_SYSV(args, respp, arg_area, cif); ffi_prep_incoming_args(args, respp, arg_area, cif);
(closure->fun) (cif, *respp, arg_area, closure->user_data); (closure->fun) (cif, *respp, arg_area, closure->user_data);
return cif->flags; return cif->flags;
} }
unsigned int FFI_HIDDEN __attribute__ ((regparm(1)))
ffi_closure_WIN32_inner (ffi_closure *closure, void **respp, void *args)
{
/* our various things... */
ffi_cif *cif;
void **arg_area;
unsigned int ret;
cif = closure->cif;
arg_area = (void**) alloca (cif->nargs * sizeof (void*));
/* this call will initialize ARG_AREA, such that each
* element in that array points to the corresponding
* value on the stack; and if the function returns
* a structure, it will change RESP to point to the
* structure return address. */
ret = ffi_prep_incoming_args(args, respp, arg_area, cif);
(closure->fun) (cif, *respp, arg_area, closure->user_data);
return ret;
}
#endif /* !X86_WIN64 */ #endif /* !X86_WIN64 */
static void static unsigned int
ffi_prep_incoming_args_SYSV(char *stack, void **rvalue, void **avalue, ffi_prep_incoming_args(char *stack, void **rvalue, void **avalue,
ffi_cif *cif) ffi_cif *cif)
{ {
register unsigned int i; register unsigned int i;
register void **p_argv; register void **p_argv;
register char *argp; register char *argp;
register ffi_type **p_arg; register ffi_type **p_arg;
#ifndef X86_WIN64
argp = stack; const int cabi = cif->abi;
const int dir = (cabi == FFI_PASCAL || cabi == FFI_REGISTER) ? -1 : +1;
#ifdef X86_WIN64 const unsigned int max_stack_count = (cabi == FFI_THISCALL) ? 1
if (cif->rtype->size > sizeof(ffi_arg) : (cabi == FFI_FASTCALL) ? 2
|| (cif->flags == FFI_TYPE_STRUCT : (cabi == FFI_REGISTER) ? 3
&& (cif->rtype->size != 1 && cif->rtype->size != 2 : 0;
&& cif->rtype->size != 4 && cif->rtype->size != 8))) { unsigned int passed_regs = 0;
*rvalue = *(void **) argp; void *p_stack_data[3] = { stack - 1 };
argp += sizeof(void *);
}
#else #else
if ( cif->flags == FFI_TYPE_STRUCT #define dir 1
|| cif->flags == FFI_TYPE_MS_STRUCT ) {
*rvalue = *(void **) argp;
argp += sizeof(void *);
}
#endif #endif
p_argv = avalue; argp = stack;
#ifndef X86_WIN64
for (i = cif->nargs, p_arg = cif->arg_types; (i != 0); i--, p_arg++) argp += max_stack_count * FFI_SIZEOF_ARG;
{ #endif
size_t z;
/* Align if necessary */
if ((sizeof(void*) - 1) & (size_t) argp) {
argp = (char *) ALIGN(argp, sizeof(void*));
}
if ((cif->flags == FFI_TYPE_STRUCT
|| cif->flags == FFI_TYPE_MS_STRUCT)
#ifdef X86_WIN64 #ifdef X86_WIN64
if ((*p_arg)->size > sizeof(ffi_arg) && ((cif->rtype->size & (1 | 2 | 4 | 8)) == 0)
|| ((*p_arg)->type == FFI_TYPE_STRUCT #endif
&& ((*p_arg)->size != 1 && (*p_arg)->size != 2 )
&& (*p_arg)->size != 4 && (*p_arg)->size != 8)))
{ {
z = sizeof(void *); #ifndef X86_WIN64
*p_argv = *(void **)argp; if (passed_regs < max_stack_count)
{
*rvalue = *(void**) (stack + (passed_regs*FFI_SIZEOF_ARG));
++passed_regs;
} }
else else
#endif #endif
{ {
z = (*p_arg)->size; *rvalue = *(void **) argp;
argp += sizeof(void *);
}
}
#ifndef X86_WIN64
/* Do register arguments first */
for (i = 0, p_arg = cif->arg_types;
i < cif->nargs && passed_regs < max_stack_count;
i++, p_arg++)
{
if ((*p_arg)->type == FFI_TYPE_FLOAT
|| (*p_arg)->type == FFI_TYPE_STRUCT)
continue;
size_t sz = (*p_arg)->size;
if(sz == 0 || sz > FFI_SIZEOF_ARG)
continue;
p_stack_data[passed_regs] = avalue + i;
avalue[i] = stack + (passed_regs*FFI_SIZEOF_ARG);
++passed_regs;
}
#endif
p_arg = cif->arg_types;
p_argv = avalue;
if (dir < 0)
{
const int nargs = cif->nargs - 1;
if (nargs > 0)
{
p_arg += nargs;
p_argv += nargs;
}
}
for (i = cif->nargs;
i != 0;
i--, p_arg += dir, p_argv += dir)
{
/* Align if necessary */
if ((sizeof(void*) - 1) & (size_t) argp)
argp = (char *) ALIGN(argp, sizeof(void*));
size_t z = (*p_arg)->size;
#ifdef X86_WIN64
if (z > FFI_SIZEOF_ARG
|| ((*p_arg)->type == FFI_TYPE_STRUCT
&& (z & (1 | 2 | 4 | 8)) == 0)
#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
|| ((*p_arg)->type == FFI_TYPE_LONGDOUBLE)
#endif
)
{
z = FFI_SIZEOF_ARG;
*p_argv = *(void **)argp;
}
else
#else
if (passed_regs > 0
&& z <= FFI_SIZEOF_ARG
&& (p_argv == p_stack_data[0]
|| p_argv == p_stack_data[1]
|| p_argv == p_stack_data[2]))
{
/* Already assigned a register value */
continue;
}
else
#endif
{
/* because we're little endian, this is what it turns into. */ /* because we're little endian, this is what it turns into. */
*p_argv = (void*) argp; *p_argv = (void*) argp;
} }
p_argv++;
#ifdef X86_WIN64 #ifdef X86_WIN64
argp += (z + sizeof(void*) - 1) & ~(sizeof(void*) - 1); argp += (z + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
#else #else
@@ -575,7 +642,7 @@ ffi_prep_incoming_args_SYSV(char *stack, void **rvalue, void **avalue,
#endif #endif
} }
return; return (size_t)argp - (size_t)stack;
} }
#define FFI_INIT_TRAMPOLINE_WIN64(TRAMP,FUN,CTX,MASK) \ #define FFI_INIT_TRAMPOLINE_WIN64(TRAMP,FUN,CTX,MASK) \
@@ -636,15 +703,15 @@ ffi_prep_incoming_args_SYSV(char *stack, void **rvalue, void **avalue,
*(unsigned short*) &__tramp[50] = (__size + 8); /* ret (__size + 8) */ \ *(unsigned short*) &__tramp[50] = (__size + 8); /* ret (__size + 8) */ \
} }
#define FFI_INIT_TRAMPOLINE_STDCALL(TRAMP,FUN,CTX) \ #define FFI_INIT_TRAMPOLINE_WIN32(TRAMP,FUN,CTX) \
{ unsigned char *__tramp = (unsigned char*)(TRAMP); \ { unsigned char *__tramp = (unsigned char*)(TRAMP); \
unsigned int __fun = (unsigned int)(FUN); \ unsigned int __fun = (unsigned int)(FUN); \
unsigned int __ctx = (unsigned int)(CTX); \ unsigned int __ctx = (unsigned int)(CTX); \
unsigned int __dis = __fun - (__ctx + 10); \ unsigned int __dis = __fun - (__ctx + 10); \
*(unsigned char*) &__tramp[0] = 0xb8; \ *(unsigned char*) &__tramp[0] = 0x68; \
*(unsigned int*) &__tramp[1] = __ctx; /* movl __ctx, %eax */ \ *(unsigned int*) &__tramp[1] = __ctx; /* push __ctx */ \
*(unsigned char *) &__tramp[5] = 0xe8; \ *(unsigned char*) &__tramp[5] = 0xe9; \
*(unsigned int*) &__tramp[6] = __dis; /* call __fun */ \ *(unsigned int*) &__tramp[6] = __dis; /* jmp __fun */ \
} }
/* the cif must already be prep'ed */ /* the cif must already be prep'ed */
@@ -674,21 +741,27 @@ ffi_prep_closure_loc (ffi_closure* closure,
&ffi_closure_SYSV, &ffi_closure_SYSV,
(void*)codeloc); (void*)codeloc);
} }
else if (cif->abi == FFI_REGISTER)
{
FFI_INIT_TRAMPOLINE_WIN32 (&closure->tramp[0],
&ffi_closure_REGISTER,
(void*)codeloc);
}
else if (cif->abi == FFI_FASTCALL) else if (cif->abi == FFI_FASTCALL)
{ {
FFI_INIT_TRAMPOLINE_STDCALL (&closure->tramp[0], FFI_INIT_TRAMPOLINE_WIN32 (&closure->tramp[0],
&ffi_closure_FASTCALL, &ffi_closure_FASTCALL,
(void*)codeloc); (void*)codeloc);
} }
else if (cif->abi == FFI_THISCALL) else if (cif->abi == FFI_THISCALL)
{ {
FFI_INIT_TRAMPOLINE_STDCALL (&closure->tramp[0], FFI_INIT_TRAMPOLINE_WIN32 (&closure->tramp[0],
&ffi_closure_THISCALL, &ffi_closure_THISCALL,
(void*)codeloc); (void*)codeloc);
} }
else if (cif->abi == FFI_STDCALL) else if (cif->abi == FFI_STDCALL || cif->abi == FFI_PASCAL)
{ {
FFI_INIT_TRAMPOLINE_STDCALL (&closure->tramp[0], FFI_INIT_TRAMPOLINE_WIN32 (&closure->tramp[0],
&ffi_closure_STDCALL, &ffi_closure_STDCALL,
(void*)codeloc); (void*)codeloc);
} }
@@ -764,10 +837,38 @@ ffi_prep_raw_closure_loc (ffi_raw_closure* closure,
return FFI_OK; return FFI_OK;
} }
static void static unsigned int
ffi_prep_args_raw(char *stack, extended_cif *ecif) ffi_prep_args_raw(char *stack, extended_cif *ecif)
{ {
memcpy (stack, ecif->avalue, ecif->cif->bytes); const ffi_cif *cif = ecif->cif;
unsigned int i, passed_regs = 0;
#ifndef X86_WIN64
const unsigned int abi = cif->abi;
const unsigned int max_regs = (abi == FFI_THISCALL) ? 1
: (abi == FFI_FASTCALL) ? 2
: (abi == FFI_REGISTER) ? 3
: 0;
if (cif->flags == FFI_TYPE_STRUCT)
++passed_regs;
for (i = 0; i < cif->nargs && passed_regs <= max_regs; i++)
{
if (cif->arg_types[i]->type == FFI_TYPE_FLOAT
|| cif->arg_types[i]->type == FFI_TYPE_STRUCT)
continue;
size_t sz = cif->arg_types[i]->size;
if (sz == 0 || sz > FFI_SIZEOF_ARG)
continue;
++passed_regs;
}
#endif
memcpy (stack, ecif->avalue, cif->bytes);
return passed_regs;
} }
/* we borrow this routine from libffi (it must be changed, though, to /* we borrow this routine from libffi (it must be changed, though, to
@@ -810,37 +911,12 @@ ffi_raw_call(ffi_cif *cif, void (*fn)(void), void *rvalue, ffi_raw *fake_avalue)
#endif #endif
#ifndef X86_WIN64 #ifndef X86_WIN64
case FFI_STDCALL: case FFI_STDCALL:
ffi_call_win32(ffi_prep_args_raw, &ecif, cif->abi, cif->bytes, cif->flags,
ecif.rvalue, fn);
break;
case FFI_THISCALL: case FFI_THISCALL:
case FFI_FASTCALL: case FFI_FASTCALL:
{ case FFI_PASCAL:
unsigned int abi = cif->abi; case FFI_REGISTER:
unsigned int i, passed_regs = 0; ffi_call_win32(ffi_prep_args_raw, &ecif, cif->abi, cif->bytes, cif->flags,
if (cif->flags == FFI_TYPE_STRUCT)
++passed_regs;
for (i=0; i < cif->nargs && passed_regs < 2;i++)
{
size_t sz;
if (cif->arg_types[i]->type == FFI_TYPE_FLOAT
|| cif->arg_types[i]->type == FFI_TYPE_STRUCT)
continue;
sz = (cif->arg_types[i]->size + 3) & ~3;
if (sz == 0 || sz > 4)
continue;
++passed_regs;
}
if (passed_regs < 2 && abi == FFI_FASTCALL)
cif->abi = abi = FFI_THISCALL;
if (passed_regs < 1 && abi == FFI_THISCALL)
cif->abi = abi = FFI_STDCALL;
ffi_call_win32(ffi_prep_args_raw, &ecif, abi, cif->bytes, cif->flags,
ecif.rvalue, fn); ecif.rvalue, fn);
}
break; break;
#endif #endif
default: default:

View File

@@ -82,6 +82,8 @@ typedef enum ffi_abi {
FFI_THISCALL, FFI_THISCALL,
FFI_FASTCALL, FFI_FASTCALL,
FFI_MS_CDECL, FFI_MS_CDECL,
FFI_PASCAL,
FFI_REGISTER,
FFI_LAST_ABI, FFI_LAST_ABI,
#ifdef _MSC_VER #ifdef _MSC_VER
FFI_DEFAULT_ABI = FFI_MS_CDECL FFI_DEFAULT_ABI = FFI_MS_CDECL
@@ -101,6 +103,8 @@ typedef enum ffi_abi {
FFI_THISCALL, FFI_THISCALL,
FFI_FASTCALL, FFI_FASTCALL,
FFI_STDCALL, FFI_STDCALL,
FFI_PASCAL,
FFI_REGISTER,
FFI_LAST_ABI, FFI_LAST_ABI,
#if defined(__i386__) || defined(__i386) #if defined(__i386__) || defined(__i386)
FFI_DEFAULT_ABI = FFI_SYSV FFI_DEFAULT_ABI = FFI_SYSV

View File

@@ -34,8 +34,8 @@
#include <fficonfig.h> #include <fficonfig.h>
#include <ffi.h> #include <ffi.h>
#define CIF_ABI_OFFSET 0
#define CIF_BYTES_OFFSET 16 #define CIF_BYTES_OFFSET 16
#define CIF_FLAGS_OFFSET 20
#ifdef _MSC_VER #ifdef _MSC_VER
@@ -45,6 +45,7 @@
.MODEL FLAT, C .MODEL FLAT, C
EXTRN ffi_closure_SYSV_inner:NEAR EXTRN ffi_closure_SYSV_inner:NEAR
EXTRN ffi_closure_WIN32_inner:NEAR
_TEXT SEGMENT _TEXT SEGMENT
@@ -63,32 +64,44 @@ ffi_call_win32 PROC NEAR,
mov eax, esp mov eax, esp
;; Place all of the ffi_prep_args in position ;; Call ffi_prep_args
push ecif push ecif
push eax push eax
call ffi_prep_args call ffi_prep_args
;; Return stack to previous state and call the function
add esp, 8 add esp, 8
;; Handle thiscall and fastcall ;; Prepare registers
cmp cif_abi, 3 ;; FFI_THISCALL ;; EAX stores the number of register arguments
jz do_thiscall cmp eax, 0
cmp cif_abi, 4 ;; FFI_FASTCALL je fun
jnz do_stdcall cmp eax, 3
mov ecx, DWORD PTR [esp] jl prepr_two_cmp
mov edx, DWORD PTR [esp+4]
mov ecx, esp
add esp, 12
mov eax, DWORD PTR [ecx+8]
jmp prepr_two
prepr_two_cmp:
cmp eax, 2
jl prepr_one_prep
mov ecx, esp
add esp, 8 add esp, 8
jmp do_stdcall prepr_two:
do_thiscall: mov edx, DWORD PTR [ecx+4]
mov ecx, DWORD PTR [esp] jmp prepr_one
prepr_one_prep:
mov ecx, esp
add esp, 4 add esp, 4
do_stdcall: prepr_one:
call fn mov ecx, DWORD PTR [ecx]
cmp cif_abi, 7 ;; FFI_REGISTER
jne fun
;; cdecl: we restore esp in the epilogue, so there's no need to xchg ecx, eax
;; remove the space we pushed for the args.
;; stdcall: the callee has already cleaned the stack. fun:
;; Call function
call fn
;; Load ecx with the return type code ;; Load ecx with the return type code
mov ecx, cif_flags mov ecx, cif_flags
@@ -203,7 +216,7 @@ ffi_closure_THISCALL PROC NEAR
ffi_closure_THISCALL ENDP ffi_closure_THISCALL ENDP
ffi_closure_FASTCALL PROC NEAR ffi_closure_FASTCALL PROC NEAR
;; Insert the register argument on the stack as the first argument ;; Insert the 2 register arguments on the stack as the first argument
xchg DWORD PTR [esp+4], edx xchg DWORD PTR [esp+4], edx
xchg DWORD PTR [esp], ecx xchg DWORD PTR [esp], ecx
push edx push edx
@@ -211,6 +224,16 @@ ffi_closure_FASTCALL PROC NEAR
jmp ffi_closure_STDCALL jmp ffi_closure_STDCALL
ffi_closure_FASTCALL ENDP ffi_closure_FASTCALL ENDP
ffi_closure_REGISTER PROC NEAR
;; Insert the 3 register arguments on the stack as the first argument
push eax
xchg DWORD PTR [esp+8], ecx
xchg DWORD PTR [esp+4], edx
push ecx
push edx
jmp ffi_closure_STDCALL
ffi_closure_FASTCALL ENDP
ffi_closure_SYSV PROC NEAR FORCEFRAME ffi_closure_SYSV PROC NEAR FORCEFRAME
;; the ffi_closure ctx is passed in eax by the trampoline. ;; the ffi_closure ctx is passed in eax by the trampoline.
@@ -308,7 +331,6 @@ ffi_closure_SYSV ENDP
#define RAW_CLOSURE_CIF_OFFSET ((FFI_TRAMPOLINE_SIZE + 3) AND NOT 3) #define RAW_CLOSURE_CIF_OFFSET ((FFI_TRAMPOLINE_SIZE + 3) AND NOT 3)
#define RAW_CLOSURE_FUN_OFFSET (RAW_CLOSURE_CIF_OFFSET + 4) #define RAW_CLOSURE_FUN_OFFSET (RAW_CLOSURE_CIF_OFFSET + 4)
#define RAW_CLOSURE_USER_DATA_OFFSET (RAW_CLOSURE_FUN_OFFSET + 4) #define RAW_CLOSURE_USER_DATA_OFFSET (RAW_CLOSURE_FUN_OFFSET + 4)
#define CIF_FLAGS_OFFSET 20
ffi_closure_raw_THISCALL PROC NEAR USES esi FORCEFRAME ffi_closure_raw_THISCALL PROC NEAR USES esi FORCEFRAME
sub esp, 36 sub esp, 36
@@ -405,7 +427,7 @@ ffi_closure_raw_SYSV ENDP
#endif /* !FFI_NO_RAW_API */ #endif /* !FFI_NO_RAW_API */
ffi_closure_STDCALL PROC NEAR FORCEFRAME ffi_closure_STDCALL PROC NEAR FORCEFRAME
;; the ffi_closure ctx is passed in eax by the trampoline. mov eax, [esp] ;; the ffi_closure ctx passed by the trampoline.
sub esp, 40 sub esp, 40
lea edx, [ebp - 24] lea edx, [ebp - 24]
@@ -415,9 +437,13 @@ ffi_closure_STDCALL PROC NEAR FORCEFRAME
lea edx, [ebp - 12] lea edx, [ebp - 12]
mov [esp + 4], edx ;; &resp mov [esp + 4], edx ;; &resp
mov [esp], eax ;; closure mov [esp], eax ;; closure
call ffi_closure_SYSV_inner call ffi_closure_WIN32_inner
mov ecx, [ebp - 12] mov ecx, [ebp - 12]
xchg [ebp + 4], eax ;;xchg size of stack parameters and ffi_closure ctx
mov eax, DWORD PTR [eax + CLOSURE_CIF_OFFSET]
mov eax, DWORD PTR [eax + CIF_FLAGS_OFFSET]
cd_jumptable: cd_jumptable:
jmp [cd_jumpdata + 4 * eax] jmp [cd_jumpdata + 4 * eax]
cd_jumpdata: cd_jumpdata:
@@ -481,21 +507,10 @@ cd_retlongdouble:
cd_epilogue: cd_epilogue:
mov esp, ebp mov esp, ebp
pop ebp pop ebp
pop ecx mov ecx, [esp + 4] ;; Return address
pop edx add esp, [esp] ;; Parameters stack size
mov ecx, DWORD PTR [ecx + (CLOSURE_CIF_OFFSET-10)] add esp, 8
add esp, DWORD PTR [ecx + CIF_BYTES_OFFSET] jmp ecx
mov ecx, DWORD PTR [ecx + CIF_ABI_OFFSET]
cmp ecx, 3
je cd_thiscall
cmp ecx, 4
jne cd_not_fastcall
add esp, 4
cd_thiscall:
add esp, 4
cd_not_fastcall:
jmp edx
ffi_closure_STDCALL ENDP ffi_closure_STDCALL ENDP
_TEXT ENDS _TEXT ENDS
@@ -531,32 +546,46 @@ USCORE_SYMBOL(ffi_call_win32):
movl %esp,%eax movl %esp,%eax
# Place all of the ffi_prep_args in position # Call ffi_prep_args
pushl 12(%ebp) pushl 12(%ebp)
pushl %eax pushl %eax
call *8(%ebp) call *8(%ebp)
# Return stack to previous state and call the function
addl $8,%esp addl $8,%esp
# Handle fastcall and thiscall # Prepare registers
cmpl $3, 16(%ebp) # FFI_THISCALL # EAX stores the number of register arguments
jz .do_thiscall cmpl $0, %eax
cmpl $4, 16(%ebp) # FFI_FASTCALL je .fun
jnz .do_fncall cmpl $3, %eax
movl (%esp), %ecx jl .prepr_two_cmp
movl 4(%esp), %edx
movl %esp, %ecx
addl $12, %esp
movl 8(%ecx), %eax
jmp .prepr_two
.prepr_two_cmp:
cmpl $2, %eax
jl .prepr_one_prep
movl %esp, %ecx
addl $8, %esp addl $8, %esp
jmp .do_fncall .prepr_two:
.do_thiscall: movl 4(%ecx), %edx
movl (%esp), %ecx jmp .prepr_one
.prepr_one_prep:
movl %esp, %ecx
addl $4, %esp addl $4, %esp
.prepr_one:
movl (%ecx), %ecx
cmpl $7, 16(%ebp) # FFI_REGISTER
jne .fun
.do_fncall: xchgl %eax, %ecx
.fun:
# FIXME: Align the stack to a 128-bit boundary to avoid # FIXME: Align the stack to a 128-bit boundary to avoid
# potential performance hits. # potential performance hits.
# Call function
call *32(%ebp) call *32(%ebp)
# stdcall functions pop arguments off the stack themselves # stdcall functions pop arguments off the stack themselves
@@ -702,14 +731,27 @@ FFI_HIDDEN(ffi_closure_FASTCALL)
.def _ffi_closure_FASTCALL; .scl 2; .type 32; .endef .def _ffi_closure_FASTCALL; .scl 2; .type 32; .endef
#endif #endif
USCORE_SYMBOL(ffi_closure_FASTCALL): USCORE_SYMBOL(ffi_closure_FASTCALL):
/* Insert the register arguments on the stack as the first two arguments */ /* Insert the 2 register arguments on the stack as the first two arguments */
xchg %edx, 4(%esp) xchg %edx, 4(%esp)
xchg %ecx, (%esp) xchg %ecx, (%esp)
push %edx push %edx
push %ecx push %ecx
jmp .ffi_closure_STDCALL_internal jmp .ffi_closure_STDCALL_internal
.LFE1: FFI_HIDDEN(ffi_closure_REGISTER)
.globl USCORE_SYMBOL(ffi_closure_REGISTER)
#if defined(X86_WIN32) && !defined(__OS2__)
.def _ffi_closure_REGISTER; .scl 2; .type 32; .endef
#endif
USCORE_SYMBOL(ffi_closure_REGISTER):
/* Insert the 3 register arguments on the stack as the first two arguments */
push %eax
xchg %ecx, 8(%esp)
xchg %edx, 4(%esp)
push %ecx
push %edx
jmp .ffi_closure_STDCALL_internal
.LFE1:
# This assumes we are using gas. # This assumes we are using gas.
.balign 16 .balign 16
FFI_HIDDEN(ffi_closure_SYSV) FFI_HIDDEN(ffi_closure_SYSV)
@@ -853,7 +895,6 @@ USCORE_SYMBOL(ffi_closure_SYSV):
#define RAW_CLOSURE_CIF_OFFSET ((FFI_TRAMPOLINE_SIZE + 3) & ~3) #define RAW_CLOSURE_CIF_OFFSET ((FFI_TRAMPOLINE_SIZE + 3) & ~3)
#define RAW_CLOSURE_FUN_OFFSET (RAW_CLOSURE_CIF_OFFSET + 4) #define RAW_CLOSURE_FUN_OFFSET (RAW_CLOSURE_CIF_OFFSET + 4)
#define RAW_CLOSURE_USER_DATA_OFFSET (RAW_CLOSURE_FUN_OFFSET + 4) #define RAW_CLOSURE_USER_DATA_OFFSET (RAW_CLOSURE_FUN_OFFSET + 4)
#define CIF_FLAGS_OFFSET 20
#ifdef X86_WIN32 #ifdef X86_WIN32
.balign 16 .balign 16
@@ -1006,6 +1047,8 @@ FFI_HIDDEN(ffi_closure_STDCALL)
#endif #endif
USCORE_SYMBOL(ffi_closure_STDCALL): USCORE_SYMBOL(ffi_closure_STDCALL):
.ffi_closure_STDCALL_internal: .ffi_closure_STDCALL_internal:
/* ffi_closure ctx is at top of the stack */
movl (%esp), %eax
.LFB5: .LFB5:
pushl %ebp pushl %ebp
.LCFI9: .LCFI9:
@@ -1019,19 +1062,23 @@ USCORE_SYMBOL(ffi_closure_STDCALL):
leal -12(%ebp), %edx leal -12(%ebp), %edx
movl %edx, (%esp) /* &resp */ movl %edx, (%esp) /* &resp */
#if defined(HAVE_HIDDEN_VISIBILITY_ATTRIBUTE) || !defined(__PIC__) #if defined(HAVE_HIDDEN_VISIBILITY_ATTRIBUTE) || !defined(__PIC__)
call USCORE_SYMBOL(ffi_closure_SYSV_inner) call USCORE_SYMBOL(ffi_closure_WIN32_inner)
#elif defined(X86_DARWIN) #elif defined(X86_DARWIN)
calll L_ffi_closure_SYSV_inner$stub calll L_ffi_closure_WIN32_inner$stub
#else #else
movl %ebx, 8(%esp) movl %ebx, 8(%esp)
call 1f call 1f
1: popl %ebx 1: popl %ebx
addl $_GLOBAL_OFFSET_TABLE_+[.-1b], %ebx addl $_GLOBAL_OFFSET_TABLE_+[.-1b], %ebx
call ffi_closure_SYSV_inner@PLT call ffi_closure_WIN32_inner@PLT
movl 8(%esp), %ebx movl 8(%esp), %ebx
#endif #endif
movl -12(%ebp), %ecx movl -12(%ebp), %ecx
0: 0:
xchgl 4(%ebp), %eax /* xchg size of stack parameters and ffi_closure ctx */
movl CLOSURE_CIF_OFFSET(%eax), %eax
movl CIF_FLAGS_OFFSET(%eax), %eax
call 1f call 1f
# Do not insert anything here between the call and the jump table. # Do not insert anything here between the call and the jump table.
.Lscls_store_table: .Lscls_store_table:
@@ -1118,19 +1165,10 @@ USCORE_SYMBOL(ffi_closure_STDCALL):
.Lscls_epilogue: .Lscls_epilogue:
movl %ebp, %esp movl %ebp, %esp
popl %ebp popl %ebp
popl %ecx movl 4(%esp), %ecx /* Return address */
popl %edx addl (%esp), %esp /* Parameters stack size */
movl (CLOSURE_CIF_OFFSET-10)(%ecx), %ecx addl $8, %esp
addl CIF_BYTES_OFFSET(%ecx), %esp jmp *%ecx
movl CIF_ABI_OFFSET(%ecx), %ecx
cmpl $3, %ecx /* FFI_THISCALL */
je 1f
cmpl $4, %ecx /* FFI_FASTCALL */
jne 2f
addl $4, %esp
1: addl $4, %esp
2: jmp *%edx
.ffi_closure_STDCALL_end: .ffi_closure_STDCALL_end:
.LFE5: .LFE5:
@@ -1139,6 +1177,9 @@ USCORE_SYMBOL(ffi_closure_STDCALL):
L_ffi_closure_SYSV_inner$stub: L_ffi_closure_SYSV_inner$stub:
.indirect_symbol _ffi_closure_SYSV_inner .indirect_symbol _ffi_closure_SYSV_inner
hlt ; hlt ; hlt ; hlt ; hlt hlt ; hlt ; hlt ; hlt ; hlt
L_ffi_closure_WIN32_inner$stub:
.indirect_symbol _ffi_closure_WIN32_inner
hlt ; hlt ; hlt ; hlt ; hlt
#endif #endif
#if defined(X86_WIN32) && !defined(__OS2__) #if defined(X86_WIN32) && !defined(__OS2__)