x86_64: Decouple return types from FFI_TYPE constants

We can better support structure returns, and as prep for
complex types.
This commit is contained in:
Richard Henderson
2014-10-28 11:17:35 -07:00
parent 2e9dc16556
commit 32c5683163
3 changed files with 203 additions and 197 deletions

View File

@@ -33,6 +33,7 @@
#include <stdlib.h> #include <stdlib.h>
#include <stdarg.h> #include <stdarg.h>
#include <stdint.h> #include <stdint.h>
#include "internal64.h"
#ifdef __x86_64__ #ifdef __x86_64__
@@ -191,7 +192,7 @@ classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
} }
else if (size <= 16) else if (size <= 16)
{ {
classes[0] = classes[1] = X86_64_INTEGERSI_CLASS; classes[0] = classes[1] = X86_64_INTEGER_CLASS;
return 2; return 2;
} }
else else
@@ -360,15 +361,55 @@ ffi_prep_cif_machdep (ffi_cif *cif)
int gprcount, ssecount, i, avn, ngpr, nsse, flags; int gprcount, ssecount, i, avn, ngpr, nsse, flags;
enum x86_64_reg_class classes[MAX_CLASSES]; enum x86_64_reg_class classes[MAX_CLASSES];
size_t bytes, n; size_t bytes, n;
ffi_type *rtype;
if (cif->abi != FFI_UNIX64) if (cif->abi != FFI_UNIX64)
return FFI_BAD_ABI; return FFI_BAD_ABI;
gprcount = ssecount = 0; gprcount = ssecount = 0;
flags = cif->rtype->type; rtype = cif->rtype;
if (flags != FFI_TYPE_VOID) switch (rtype->type)
{ {
case FFI_TYPE_VOID:
flags = UNIX64_RET_VOID;
break;
case FFI_TYPE_UINT8:
flags = UNIX64_RET_UINT8;
break;
case FFI_TYPE_SINT8:
flags = UNIX64_RET_SINT8;
break;
case FFI_TYPE_UINT16:
flags = UNIX64_RET_UINT16;
break;
case FFI_TYPE_SINT16:
flags = UNIX64_RET_SINT16;
break;
case FFI_TYPE_UINT32:
flags = UNIX64_RET_UINT32;
break;
case FFI_TYPE_INT:
case FFI_TYPE_SINT32:
flags = UNIX64_RET_SINT32;
break;
case FFI_TYPE_UINT64:
case FFI_TYPE_SINT64:
flags = UNIX64_RET_INT64;
break;
case FFI_TYPE_POINTER:
flags = (sizeof(void *) == 4 ? UNIX64_RET_UINT32 : UNIX64_RET_INT64);
break;
case FFI_TYPE_FLOAT:
flags = UNIX64_RET_XMM32;
break;
case FFI_TYPE_DOUBLE:
flags = UNIX64_RET_XMM64;
break;
case FFI_TYPE_LONGDOUBLE:
flags = UNIX64_RET_X87;
break;
case FFI_TYPE_STRUCT:
n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse); n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
if (n == 0) if (n == 0)
{ {
@@ -376,22 +417,24 @@ ffi_prep_cif_machdep (ffi_cif *cif)
memory is the first argument. Allocate a register for it. */ memory is the first argument. Allocate a register for it. */
gprcount++; gprcount++;
/* We don't have to do anything in asm for the return. */ /* We don't have to do anything in asm for the return. */
flags = FFI_TYPE_VOID; flags = UNIX64_RET_VOID | UNIX64_FLAG_RET_IN_MEM;
} }
else if (flags == FFI_TYPE_STRUCT) else
{ {
/* Mark which registers the result appears in. */ /* Mark which registers the result appears in. */
_Bool sse0 = SSE_CLASS_P (classes[0]); _Bool sse0 = SSE_CLASS_P (classes[0]);
_Bool sse1 = n == 2 && SSE_CLASS_P (classes[1]); _Bool sse1 = n == 2 && SSE_CLASS_P (classes[1]);
if (sse0 && !sse1) if (sse0)
flags |= 1 << 8; flags = (sse1 ? UNIX64_RET_ST_XMM0_XMM1 : UNIX64_RET_ST_XMM0_RAX);
else if (!sse0 && sse1) else
flags |= 1 << 9; flags = (sse1 ? UNIX64_RET_ST_RAX_XMM0 : UNIX64_RET_ST_RAX_RDX);
else if (sse0 && sse1)
flags |= 1 << 10;
/* Mark the true size of the structure. */ /* Mark the true size of the structure. */
flags |= cif->rtype->size << 12; flags |= rtype->size << UNIX64_SIZE_SHIFT;
} }
break;
default:
return FFI_BAD_TYPEDEF;
} }
/* Go over all arguments and determine the way they should be passed. /* Go over all arguments and determine the way they should be passed.
@@ -418,9 +461,10 @@ ffi_prep_cif_machdep (ffi_cif *cif)
} }
} }
if (ssecount) if (ssecount)
flags |= 1 << 11; flags |= UNIX64_FLAG_XMM_ARGS;
cif->flags = flags; cif->flags = flags;
cif->bytes = (unsigned)ALIGN (bytes, 8); cif->bytes = ALIGN (bytes, 8);
return FFI_OK; return FFI_OK;
} }
@@ -432,20 +476,22 @@ ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue,
enum x86_64_reg_class classes[MAX_CLASSES]; enum x86_64_reg_class classes[MAX_CLASSES];
char *stack, *argp; char *stack, *argp;
ffi_type **arg_types; ffi_type **arg_types;
int gprcount, ssecount, ngpr, nsse, i, avn; int gprcount, ssecount, ngpr, nsse, i, avn, flags;
_Bool ret_in_memory;
struct register_args *reg_args; struct register_args *reg_args;
/* Can't call 32-bit mode from 64-bit mode. */ /* Can't call 32-bit mode from 64-bit mode. */
FFI_ASSERT (cif->abi == FFI_UNIX64); FFI_ASSERT (cif->abi == FFI_UNIX64);
/* If the return value is a struct and we don't have a return value /* If the return value is a struct and we don't have a return value
address then we need to make one. Note the setting of flags to address then we need to make one. Otherwise we can ignore it. */
VOID above in ffi_prep_cif_machdep. */ flags = cif->flags;
ret_in_memory = (cif->rtype->type == FFI_TYPE_STRUCT if (rvalue == NULL)
&& (cif->flags & 0xff) == FFI_TYPE_VOID); {
if (rvalue == NULL && ret_in_memory) if (flags & UNIX64_FLAG_RET_IN_MEM)
rvalue = alloca (cif->rtype->size); rvalue = alloca (cif->rtype->size);
else
flags = UNIX64_RET_VOID;
}
/* Allocate the space for the arguments, plus 4 words of temp space. */ /* Allocate the space for the arguments, plus 4 words of temp space. */
stack = alloca (sizeof (struct register_args) + cif->bytes + 4*8); stack = alloca (sizeof (struct register_args) + cif->bytes + 4*8);
@@ -458,7 +504,7 @@ ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue,
/* If the return value is passed in memory, add the pointer as the /* If the return value is passed in memory, add the pointer as the
first integer argument. */ first integer argument. */
if (ret_in_memory) if (flags & UNIX64_FLAG_RET_IN_MEM)
reg_args->gpr[gprcount++] = (unsigned long) rvalue; reg_args->gpr[gprcount++] = (unsigned long) rvalue;
avn = cif->nargs; avn = cif->nargs;
@@ -503,17 +549,17 @@ ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue,
switch (arg_types[i]->type) switch (arg_types[i]->type)
{ {
case FFI_TYPE_SINT8: case FFI_TYPE_SINT8:
*(SINT64 *)&reg_args->gpr[gprcount] = (SINT64) *((SINT8 *) a); reg_args->gpr[gprcount] = (SINT64) *((SINT8 *) a);
break; break;
case FFI_TYPE_SINT16: case FFI_TYPE_SINT16:
*(SINT64 *)&reg_args->gpr[gprcount] = (SINT64) *((SINT16 *) a); reg_args->gpr[gprcount] = (SINT64) *((SINT16 *) a);
break; break;
case FFI_TYPE_SINT32: case FFI_TYPE_SINT32:
*(SINT64 *)&reg_args->gpr[gprcount] = (SINT64) *((SINT32 *) a); reg_args->gpr[gprcount] = (SINT64) *((SINT32 *) a);
break; break;
default: default:
reg_args->gpr[gprcount] = 0; reg_args->gpr[gprcount] = 0;
memcpy (&reg_args->gpr[gprcount], a, size < 8 ? size : 8); memcpy (&reg_args->gpr[gprcount], a, size);
} }
gprcount++; gprcount++;
break; break;
@@ -533,7 +579,7 @@ ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue,
reg_args->rax = ssecount; reg_args->rax = ssecount;
ffi_call_unix64 (stack, cif->bytes + sizeof (struct register_args), ffi_call_unix64 (stack, cif->bytes + sizeof (struct register_args),
cif->flags, rvalue, fn); flags, rvalue, fn);
} }
void void
@@ -573,7 +619,7 @@ ffi_prep_closure_loc (ffi_closure* closure,
if (cif->abi != FFI_UNIX64) if (cif->abi != FFI_UNIX64)
return FFI_BAD_ABI; return FFI_BAD_ABI;
if (cif->flags & (1 << 11)) if (cif->flags & UNIX64_FLAG_XMM_ARGS)
dest = ffi_closure_unix64_sse; dest = ffi_closure_unix64_sse;
else else
dest = ffi_closure_unix64; dest = ffi_closure_unix64;
@@ -600,39 +646,17 @@ ffi_closure_unix64_inner(ffi_cif *cif,
ffi_type **arg_types; ffi_type **arg_types;
long i, avn; long i, avn;
int gprcount, ssecount, ngpr, nsse; int gprcount, ssecount, ngpr, nsse;
int ret; int flags;
avalue = alloca(cif->nargs * sizeof(void *));
gprcount = ssecount = 0;
ret = cif->rtype->type;
if (ret != FFI_TYPE_VOID)
{
enum x86_64_reg_class classes[MAX_CLASSES];
size_t n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
if (n == 0)
{
/* The return value goes in memory. Arrange for the closure
return value to go directly back to the original caller. */
rvalue = (void *) (unsigned long) reg_args->gpr[gprcount++];
/* We don't have to do anything in asm for the return. */
ret = FFI_TYPE_VOID;
}
else if (ret == FFI_TYPE_STRUCT && n == 2)
{
/* Mark which register the second word of the structure goes in. */
_Bool sse0 = SSE_CLASS_P (classes[0]);
_Bool sse1 = SSE_CLASS_P (classes[1]);
if (!sse0 && sse1)
ret |= 1 << 8;
else if (sse0 && !sse1)
ret |= 1 << 9;
}
}
avn = cif->nargs; avn = cif->nargs;
arg_types = cif->arg_types; flags = cif->flags;
avalue = alloca(avn * sizeof(void *));
gprcount = ssecount = 0;
if (flags & UNIX64_FLAG_RET_IN_MEM)
rvalue = (void *)(uintptr_t)reg_args->gpr[gprcount++];
arg_types = cif->arg_types;
for (i = 0; i < avn; ++i) for (i = 0; i < avn; ++i)
{ {
enum x86_64_reg_class classes[MAX_CLASSES]; enum x86_64_reg_class classes[MAX_CLASSES];
@@ -693,7 +717,7 @@ ffi_closure_unix64_inner(ffi_cif *cif,
fun (cif, rvalue, avalue, user_data); fun (cif, rvalue, avalue, user_data);
/* Tell assembly how to perform return type promotions. */ /* Tell assembly how to perform return type promotions. */
return ret; return flags;
} }
extern void ffi_go_closure_unix64(void) FFI_HIDDEN; extern void ffi_go_closure_unix64(void) FFI_HIDDEN;
@@ -706,7 +730,7 @@ ffi_prep_go_closure (ffi_go_closure* closure, ffi_cif* cif,
if (cif->abi != FFI_UNIX64) if (cif->abi != FFI_UNIX64)
return FFI_BAD_ABI; return FFI_BAD_ABI;
closure->tramp = (cif->flags & (1 << 11) closure->tramp = (cif->flags & UNIX64_FLAG_XMM_ARGS
? ffi_go_closure_unix64_sse ? ffi_go_closure_unix64_sse
: ffi_go_closure_unix64); : ffi_go_closure_unix64);
closure->cif = cif; closure->cif = cif;

20
src/x86/internal64.h Normal file
View File

@@ -0,0 +1,20 @@
#define UNIX64_RET_VOID 0
#define UNIX64_RET_UINT8 1
#define UNIX64_RET_UINT16 2
#define UNIX64_RET_UINT32 3
#define UNIX64_RET_SINT8 4
#define UNIX64_RET_SINT16 5
#define UNIX64_RET_SINT32 6
#define UNIX64_RET_INT64 7
#define UNIX64_RET_XMM32 8
#define UNIX64_RET_XMM64 9
#define UNIX64_RET_X87 10
#define UNIX64_RET_ST_RAX_RDX 11
#define UNIX64_RET_ST_XMM0_RAX 12
#define UNIX64_RET_ST_RAX_XMM0 13
#define UNIX64_RET_ST_XMM0_XMM1 14
#define UNIX64_RET_LAST 14
#define UNIX64_FLAG_RET_IN_MEM (1 << 10)
#define UNIX64_FLAG_XMM_ARGS (1 << 11)
#define UNIX64_SIZE_SHIFT 12

View File

@@ -31,9 +31,15 @@
#include <fficonfig.h> #include <fficonfig.h>
#include <ffi.h> #include <ffi.h>
#include <ffi_cfi.h> #include <ffi_cfi.h>
#include "internal64.h"
.text .text
.macro E index
.align 8
.org 0b + \index * 8, 0x90
.endm
/* ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags, /* ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
void *raddr, void (*fnaddr)(void)); void *raddr, void (*fnaddr)(void));
@@ -41,7 +47,7 @@
for this function. This has been allocated by ffi_call. We also for this function. This has been allocated by ffi_call. We also
deallocate some of the stack that has been alloca'd. */ deallocate some of the stack that has been alloca'd. */
.align 2 .align 8
.globl ffi_call_unix64 .globl ffi_call_unix64
.type ffi_call_unix64,@function .type ffi_call_unix64,@function
FFI_HIDDEN(ffi_call_unix64) FFI_HIDDEN(ffi_call_unix64)
@@ -100,108 +106,80 @@ ffi_call_unix64:
cfi_restore(%rbp) cfi_restore(%rbp)
/* The first byte of the flags contains the FFI_TYPE. */ /* The first byte of the flags contains the FFI_TYPE. */
cmpb $UNIX64_RET_LAST, %cl
movzbl %cl, %r10d movzbl %cl, %r10d
leaq .Lstore_table(%rip), %r11 leaq 0f(%rip), %r11
movslq (%r11, %r10, 4), %r10 ja 9f
addq %r11, %r10 leaq (%r11, %r10, 8), %r10
/* Prep for the structure cases: scratch area in redzone. */
leaq -20(%rsp), %rsi
jmp *%r10 jmp *%r10
.section .rodata .align 8
.align 2 0:
.Lstore_table: E UNIX64_RET_VOID
.long .Lst_void-.Lstore_table /* FFI_TYPE_VOID */
.long .Lst_sint32-.Lstore_table /* FFI_TYPE_INT */
.long .Lst_float-.Lstore_table /* FFI_TYPE_FLOAT */
.long .Lst_double-.Lstore_table /* FFI_TYPE_DOUBLE */
.long .Lst_ldouble-.Lstore_table /* FFI_TYPE_LONGDOUBLE */
.long .Lst_uint8-.Lstore_table /* FFI_TYPE_UINT8 */
.long .Lst_sint8-.Lstore_table /* FFI_TYPE_SINT8 */
.long .Lst_uint16-.Lstore_table /* FFI_TYPE_UINT16 */
.long .Lst_sint16-.Lstore_table /* FFI_TYPE_SINT16 */
.long .Lst_uint32-.Lstore_table /* FFI_TYPE_UINT32 */
.long .Lst_sint32-.Lstore_table /* FFI_TYPE_SINT32 */
.long .Lst_int64-.Lstore_table /* FFI_TYPE_UINT64 */
.long .Lst_int64-.Lstore_table /* FFI_TYPE_SINT64 */
.long .Lst_struct-.Lstore_table /* FFI_TYPE_STRUCT */
.long .Lst_int64-.Lstore_table /* FFI_TYPE_POINTER */
.previous
.align 2
.Lst_void:
ret ret
.align 2 E UNIX64_RET_UINT8
movzbl %al, %eax
.Lst_uint8:
movzbq %al, %rax
movq %rax, (%rdi) movq %rax, (%rdi)
ret ret
.align 2 E UNIX64_RET_UINT16
.Lst_sint8: movzwl %ax, %eax
movq %rax, (%rdi)
ret
E UNIX64_RET_UINT32
movl %eax, %eax
movq %rax, (%rdi)
ret
E UNIX64_RET_SINT8
movsbq %al, %rax movsbq %al, %rax
movq %rax, (%rdi) movq %rax, (%rdi)
ret ret
.align 2 E UNIX64_RET_SINT16
.Lst_uint16:
movzwq %ax, %rax
movq %rax, (%rdi)
.align 2
.Lst_sint16:
movswq %ax, %rax movswq %ax, %rax
movq %rax, (%rdi) movq %rax, (%rdi)
ret ret
.align 2 E UNIX64_RET_SINT32
.Lst_uint32:
movl %eax, %eax
movq %rax, (%rdi)
.align 2
.Lst_sint32:
cltq cltq
movq %rax, (%rdi) movq %rax, (%rdi)
ret ret
.align 2 E UNIX64_RET_INT64
.Lst_int64:
movq %rax, (%rdi) movq %rax, (%rdi)
ret ret
E UNIX64_RET_XMM32
.align 2 movd %xmm0, (%rdi)
.Lst_float:
movss %xmm0, (%rdi)
ret ret
.align 2 E UNIX64_RET_XMM64
.Lst_double: movq %xmm0, (%rdi)
movsd %xmm0, (%rdi)
ret ret
.Lst_ldouble: E UNIX64_RET_X87
fstpt (%rdi) fstpt (%rdi)
ret ret
E UNIX64_RET_ST_RAX_RDX
.align 2
.Lst_struct:
leaq -20(%rsp), %rsi /* Scratch area in redzone. */
/* We have to locate the values now, and since we don't want to
write too much data into the user's return value, we spill the
value to a 16 byte scratch area first. Bits 8, 9, and 10
control where the values are located. Only one of the three
bits will be set; see ffi_prep_cif_machdep for the pattern. */
movd %xmm0, %r10
movd %xmm1, %r11
testl $0x100, %ecx
cmovnz %rax, %rdx
cmovnz %r10, %rax
testl $0x200, %ecx
cmovnz %r10, %rdx
testl $0x400, %ecx
cmovnz %r10, %rax
cmovnz %r11, %rdx
movq %rax, (%rsi)
movq %rdx, 8(%rsi) movq %rdx, 8(%rsi)
jmp 2f
E UNIX64_RET_ST_XMM0_RAX
movq %rax, 8(%rsi)
jmp 3f
E UNIX64_RET_ST_RAX_XMM0
movq %xmm0, 8(%rsi)
jmp 2f
E UNIX64_RET_ST_XMM0_XMM1
movq %xmm1, 8(%rsi)
/* Bits 12-31 contain the true size of the structure. Copy from .align 8
the scratch area to the true destination. */ 3: movq %xmm0, (%rsi)
shrl $12, %ecx shrl $UNIX64_SIZE_SHIFT, %ecx
rep movsb rep movsb
ret ret
.align 8
2: movq %rax, (%rsi)
shrl $UNIX64_SIZE_SHIFT, %ecx
rep movsb
ret
9: call abort@PLT
/* Many times we can avoid loading any SSE registers at all. /* Many times we can avoid loading any SSE registers at all.
It's not worth an indirect jump to load the exact set of It's not worth an indirect jump to load the exact set of
@@ -292,83 +270,67 @@ ffi_closure_unix64:
cfi_adjust_cfa_offset(-ffi_closure_FS) cfi_adjust_cfa_offset(-ffi_closure_FS)
/* The first byte of the return value contains the FFI_TYPE. */ /* The first byte of the return value contains the FFI_TYPE. */
cmpb $UNIX64_RET_LAST, %al
movzbl %al, %r10d movzbl %al, %r10d
leaq .Lload_table(%rip), %r11 leaq 0f(%rip), %r11
movslq (%r11, %r10, 4), %r10 ja 9f
addq %r11, %r10 leaq (%r11, %r10, 8), %r10
jmp *%r10 jmp *%r10
.section .rodata .align 8
.align 2 0:
.Lload_table: E UNIX64_RET_VOID
.long .Lld_void-.Lload_table /* FFI_TYPE_VOID */
.long .Lld_int32-.Lload_table /* FFI_TYPE_INT */
.long .Lld_float-.Lload_table /* FFI_TYPE_FLOAT */
.long .Lld_double-.Lload_table /* FFI_TYPE_DOUBLE */
.long .Lld_ldouble-.Lload_table /* FFI_TYPE_LONGDOUBLE */
.long .Lld_int8-.Lload_table /* FFI_TYPE_UINT8 */
.long .Lld_int8-.Lload_table /* FFI_TYPE_SINT8 */
.long .Lld_int16-.Lload_table /* FFI_TYPE_UINT16 */
.long .Lld_int16-.Lload_table /* FFI_TYPE_SINT16 */
.long .Lld_int32-.Lload_table /* FFI_TYPE_UINT32 */
.long .Lld_int32-.Lload_table /* FFI_TYPE_SINT32 */
.long .Lld_int64-.Lload_table /* FFI_TYPE_UINT64 */
.long .Lld_int64-.Lload_table /* FFI_TYPE_SINT64 */
.long .Lld_struct-.Lload_table /* FFI_TYPE_STRUCT */
.long .Lld_int64-.Lload_table /* FFI_TYPE_POINTER */
.previous
.align 2
.Lld_void:
ret ret
E UNIX64_RET_UINT8
.align 2
.Lld_int8:
movzbl ffi_closure_RED_RVALUE(%rsp), %eax movzbl ffi_closure_RED_RVALUE(%rsp), %eax
ret ret
.align 2 E UNIX64_RET_UINT16
.Lld_int16:
movzwl ffi_closure_RED_RVALUE(%rsp), %eax movzwl ffi_closure_RED_RVALUE(%rsp), %eax
ret ret
.align 2 E UNIX64_RET_UINT32
.Lld_int32:
movl ffi_closure_RED_RVALUE(%rsp), %eax movl ffi_closure_RED_RVALUE(%rsp), %eax
ret ret
.align 2 E UNIX64_RET_SINT8
.Lld_int64: movsbl ffi_closure_RED_RVALUE(%rsp), %eax
ret
E UNIX64_RET_SINT16
movswl ffi_closure_RED_RVALUE(%rsp), %eax
ret
E UNIX64_RET_SINT32
movl ffi_closure_RED_RVALUE(%rsp), %eax
ret
E UNIX64_RET_INT64
movq ffi_closure_RED_RVALUE(%rsp), %rax movq ffi_closure_RED_RVALUE(%rsp), %rax
ret ret
E UNIX64_RET_XMM32
.align 2 movd ffi_closure_RED_RVALUE(%rsp), %xmm0
.Lld_float:
movss ffi_closure_RED_RVALUE(%rsp), %xmm0
ret ret
.align 2 E UNIX64_RET_XMM64
.Lld_double: movq ffi_closure_RED_RVALUE(%rsp), %xmm0
movsd ffi_closure_RED_RVALUE(%rsp), %xmm0
ret ret
.align 2 E UNIX64_RET_X87
.Lld_ldouble:
fldt ffi_closure_RED_RVALUE(%rsp) fldt ffi_closure_RED_RVALUE(%rsp)
ret ret
E UNIX64_RET_ST_RAX_RDX
.align 2
.Lld_struct:
/* There are four possibilities here, %rax/%rdx, %xmm0/%rax,
%rax/%xmm0, %xmm0/%xmm1. We collapse two by always loading
both rdx and xmm1 with the second word. For the remaining,
bit 8 set means xmm0 gets the second word, and bit 9 means
that rax gets the second word. */
movq ffi_closure_RED_RVALUE(%rsp), %rcx
movq ffi_closure_RED_RVALUE+8(%rsp), %rdx movq ffi_closure_RED_RVALUE+8(%rsp), %rdx
jmp 2f
E UNIX64_RET_ST_XMM0_RAX
movq ffi_closure_RED_RVALUE+8(%rsp), %rax
jmp 3f
E UNIX64_RET_ST_RAX_XMM0
movq ffi_closure_RED_RVALUE+8(%rsp), %xmm0
jmp 2f
E UNIX64_RET_ST_XMM0_XMM1
movq ffi_closure_RED_RVALUE+8(%rsp), %xmm1 movq ffi_closure_RED_RVALUE+8(%rsp), %xmm1
testl $0x100, %eax
cmovnz %rdx, %rcx .align 8
movd %rcx, %xmm0 3: movq ffi_closure_RED_RVALUE(%rsp), %xmm0
testl $0x200, %eax
movq ffi_closure_RED_RVALUE(%rsp), %rax
cmovnz %rdx, %rax
ret ret
.align 8
2: movq ffi_closure_RED_RVALUE(%rsp), %rax
ret
9: call abort@PLT
cfi_endproc cfi_endproc
.size ffi_closure_unix64,.-ffi_closure_unix64 .size ffi_closure_unix64,.-ffi_closure_unix64