x86: Avoid using gas local labels

Which are unsupported by Darwin cctools as.
Thankfully this doesn't uglify the source too much.
This commit is contained in:
Richard Henderson
2014-11-24 16:26:50 +01:00
parent ed1ca2777c
commit 5f35e0ffcc
2 changed files with 247 additions and 221 deletions

View File

@@ -41,6 +41,12 @@
# define C(X) X # define C(X) X
#endif #endif
#ifdef X86_DARWIN
# define L(X) C1(L, X)
#else
# define L(X) C1(.L, X)
#endif
#ifdef __ELF__ #ifdef __ELF__
# define ENDF(X) .type X,@function; .size X, . - X # define ENDF(X) .type X,@function; .size X, . - X
#else #else
@@ -61,9 +67,9 @@
The use of ORG asserts that we're at the correct location. */ The use of ORG asserts that we're at the correct location. */
/* ??? The clang assembler doesn't handle .org with symbolic expressions. */ /* ??? The clang assembler doesn't handle .org with symbolic expressions. */
#if defined(__clang__) || defined(__APPLE__) #if defined(__clang__) || defined(__APPLE__)
# define E(X) .balign 8 # define E(BASE, X) .balign 8
#else #else
# define E(X) .balign 8; .org 0b + X * 8 # define E(BASE, X) .balign 8; .org BASE + X * 8
#endif #endif
.text .text
@@ -113,48 +119,50 @@ ffi_call_i386:
andl $X86_RET_TYPE_MASK, %ecx andl $X86_RET_TYPE_MASK, %ecx
#ifdef __PIC__ #ifdef __PIC__
call C(__x86.get_pc_thunk.bx) call C(__x86.get_pc_thunk.bx)
1: leal 0f-1b(%ebx, %ecx, 8), %ebx L(pc1):
leal L(store_table)-L(pc1)(%ebx, %ecx, 8), %ebx
#else #else
leal 0f(,%ecx, 8), %ebx leal L(store_table)(,%ecx, 8), %ebx
#endif #endif
movl 16(%ebp), %ecx /* load result address */ movl 16(%ebp), %ecx /* load result address */
jmp *%ebx jmp *%ebx
.balign 8 .balign 8
0: L(store_table):
E(X86_RET_FLOAT) E(L(store_table), X86_RET_FLOAT)
fstps (%ecx) fstps (%ecx)
jmp 9f jmp L(e1)
E(X86_RET_DOUBLE) E(L(store_table), X86_RET_DOUBLE)
fstpl (%ecx) fstpl (%ecx)
jmp 9f jmp L(e1)
E(X86_RET_LDOUBLE) E(L(store_table), X86_RET_LDOUBLE)
fstpt (%ecx) fstpt (%ecx)
jmp 9f jmp L(e1)
E(X86_RET_SINT8) E(L(store_table), X86_RET_SINT8)
movsbl %al, %eax movsbl %al, %eax
mov %eax, (%ecx) mov %eax, (%ecx)
jmp 9f jmp L(e1)
E(X86_RET_SINT16) E(L(store_table), X86_RET_SINT16)
movswl %ax, %eax movswl %ax, %eax
mov %eax, (%ecx) mov %eax, (%ecx)
jmp 9f jmp L(e1)
E(X86_RET_UINT8) E(L(store_table), X86_RET_UINT8)
movzbl %al, %eax movzbl %al, %eax
mov %eax, (%ecx) mov %eax, (%ecx)
jmp 9f jmp L(e1)
E(X86_RET_UINT16) E(L(store_table), X86_RET_UINT16)
movzwl %ax, %eax movzwl %ax, %eax
mov %eax, (%ecx) mov %eax, (%ecx)
jmp 9f jmp L(e1)
E(X86_RET_INT64) E(L(store_table), X86_RET_INT64)
movl %edx, 4(%ecx) movl %edx, 4(%ecx)
/* fallthru */ /* fallthru */
E(X86_RET_INT32) E(L(store_table), X86_RET_INT32)
movl %eax, (%ecx) movl %eax, (%ecx)
/* fallthru */ /* fallthru */
E(X86_RET_VOID) E(L(store_table), X86_RET_VOID)
9: movl 8(%ebp), %ebx L(e1):
movl 8(%ebp), %ebx
movl %ebp, %esp movl %ebp, %esp
popl %ebp popl %ebp
cfi_remember_state cfi_remember_state
@@ -164,21 +172,21 @@ E(X86_RET_VOID)
ret ret
cfi_restore_state cfi_restore_state
E(X86_RET_STRUCTPOP) E(L(store_table), X86_RET_STRUCTPOP)
jmp 9b jmp L(e1)
E(X86_RET_STRUCTARG) E(L(store_table), X86_RET_STRUCTARG)
jmp 9b jmp L(e1)
E(X86_RET_STRUCT_1B) E(L(store_table), X86_RET_STRUCT_1B)
movb %al, (%ecx) movb %al, (%ecx)
jmp 9b jmp L(e1)
E(X86_RET_STRUCT_2B) E(L(store_table), X86_RET_STRUCT_2B)
movw %ax, (%ecx) movw %ax, (%ecx)
jmp 9b jmp L(e1)
/* Fill out the table so that bad values are predictable. */ /* Fill out the table so that bad values are predictable. */
E(X86_RET_UNUSED14) E(L(store_table), X86_RET_UNUSED14)
ud2 ud2
E(X86_RET_UNUSED15) E(L(store_table), X86_RET_UNUSED15)
ud2 ud2
cfi_endproc cfi_endproc
@@ -216,18 +224,19 @@ ENDF(ffi_call_i386)
movl %esp, %ecx; /* load closure_data */ \ movl %esp, %ecx; /* load closure_data */ \
leal closure_FS+4(%esp), %edx; /* load incoming stack */ \ leal closure_FS+4(%esp), %edx; /* load incoming stack */ \
call ffi_closure_inner call ffi_closure_inner
#define FFI_CLOSURE_MASK_AND_JUMP \ #define FFI_CLOSURE_MASK_AND_JUMP(N) \
andl $X86_RET_TYPE_MASK, %eax; \ andl $X86_RET_TYPE_MASK, %eax; \
leal 0f(, %eax, 8), %eax; \ leal L(C1(load_table,N))(, %eax, 8), %eax; \
jmp *%eax jmp *%eax
#ifdef __PIC__ #ifdef __PIC__
# if defined X86_DARWIN || defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE # if defined X86_DARWIN || defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE
# undef FFI_CLOSURE_MASK_AND_JUMP # undef FFI_CLOSURE_MASK_AND_JUMP
# define FFI_CLOSURE_MASK_AND_JUMP \ # define FFI_CLOSURE_MASK_AND_JUMP(N) \
andl $X86_RET_TYPE_MASK, %eax; \ andl $X86_RET_TYPE_MASK, %eax; \
call C(__x86.get_pc_thunk.dx); \ call C(__x86.get_pc_thunk.dx); \
1: leal 0f-1b(%edx, %eax, 8), %eax; \ L(C1(pc,N)): \
leal L(C1(load_table,N))-L(C1(pc,N))(%edx, %eax, 8), %eax; \
jmp *%eax jmp *%eax
# else # else
# undef FFI_CLOSURE_CALL_INNER # undef FFI_CLOSURE_CALL_INNER
@@ -237,19 +246,19 @@ ENDF(ffi_call_i386)
movl %ebx, 40(%esp); /* save ebx */ \ movl %ebx, 40(%esp); /* save ebx */ \
cfi_rel_offset(%ebx, 40); \ cfi_rel_offset(%ebx, 40); \
call C(__x86.get_pc_thunk.bx); /* load got register */ \ call C(__x86.get_pc_thunk.bx); /* load got register */ \
1: addl $C(_GLOBAL_OFFSET_TABLE_), %ebx; \ addl $C(_GLOBAL_OFFSET_TABLE_), %ebx; \
call ffi_closure_inner@PLT call ffi_closure_inner@PLT
# undef FFI_CLOSURE_MASK_AND_JUMP # undef FFI_CLOSURE_MASK_AND_JUMP
# define FFI_CLOSURE_MASK_AND_JUMP \ # define FFI_CLOSURE_MASK_AND_JUMP(N) \
andl $X86_RET_TYPE_MASK, %eax; \ andl $X86_RET_TYPE_MASK, %eax; \
leal 0f@GOTOFF(%ebx, %eax, 8), %eax; \ leal L(C1(load_table,N))@GOTOFF(%ebx, %eax, 8), %eax; \
movl 40(%esp), %ebx; /* restore ebx */ \ movl 40(%esp), %ebx; /* restore ebx */ \
cfi_restore(%ebx); \ cfi_restore(%ebx); \
jmp *%eax jmp *%eax
# endif /* DARWIN || HIDDEN */ # endif /* DARWIN || HIDDEN */
#endif /* __PIC__ */ #endif /* __PIC__ */
#define FFI_GO_CLOSURE(suffix, chain, t1, t2) \ #define FFI_GO_CLOSURE(suffix, chain, t1, t2, entry) \
.balign 16; \ .balign 16; \
.globl C(C1(ffi_go_closure_,suffix)); \ .globl C(C1(ffi_go_closure_,suffix)); \
FFI_HIDDEN(C(C1(ffi_go_closure_,suffix))); \ FFI_HIDDEN(C(C1(ffi_go_closure_,suffix))); \
@@ -264,12 +273,12 @@ C(C1(ffi_go_closure_,suffix)): \
movl t1, 28(%esp); \ movl t1, 28(%esp); \
movl t2, 32(%esp); \ movl t2, 32(%esp); \
movl chain, 36(%esp); /* closure is user_data */ \ movl chain, 36(%esp); /* closure is user_data */ \
jmp 88f; \ jmp entry; \
cfi_endproc; \ cfi_endproc; \
ENDF(C(C1(ffi_go_closure_,suffix))) ENDF(C(C1(ffi_go_closure_,suffix)))
FFI_GO_CLOSURE(EAX, %eax, %edx, %ecx) FFI_GO_CLOSURE(EAX, %eax, %edx, %ecx, L(do_closure_i386))
FFI_GO_CLOSURE(ECX, %ecx, %edx, %eax) FFI_GO_CLOSURE(ECX, %ecx, %edx, %eax, L(do_closure_i386))
/* The closure entry points are reached from the ffi_closure trampoline. /* The closure entry points are reached from the ffi_closure trampoline.
On entry, %eax contains the address of the ffi_closure. */ On entry, %eax contains the address of the ffi_closure. */
@@ -287,70 +296,72 @@ C(ffi_closure_i386):
FFI_CLOSURE_SAVE_REGS FFI_CLOSURE_SAVE_REGS
FFI_CLOSURE_COPY_TRAMP_DATA FFI_CLOSURE_COPY_TRAMP_DATA
88: /* Entry point from preceeding Go closures. */ /* Entry point from preceeding Go closures. */
L(do_closure_i386):
FFI_CLOSURE_CALL_INNER FFI_CLOSURE_CALL_INNER
FFI_CLOSURE_MASK_AND_JUMP FFI_CLOSURE_MASK_AND_JUMP(2)
.balign 8 .balign 8
0: L(load_table2):
E(X86_RET_FLOAT) E(L(load_table2), X86_RET_FLOAT)
flds (%esp) flds (%esp)
jmp 9f jmp L(e2)
E(X86_RET_DOUBLE) E(L(load_table2), X86_RET_DOUBLE)
fldl (%esp) fldl (%esp)
jmp 9f jmp L(e2)
E(X86_RET_LDOUBLE) E(L(load_table2), X86_RET_LDOUBLE)
fldt (%esp) fldt (%esp)
jmp 9f jmp L(e2)
E(X86_RET_SINT8) E(L(load_table2), X86_RET_SINT8)
movsbl (%esp), %eax movsbl (%esp), %eax
jmp 9f jmp L(e2)
E(X86_RET_SINT16) E(L(load_table2), X86_RET_SINT16)
movswl (%esp), %eax movswl (%esp), %eax
jmp 9f jmp L(e2)
E(X86_RET_UINT8) E(L(load_table2), X86_RET_UINT8)
movzbl (%esp), %eax movzbl (%esp), %eax
jmp 9f jmp L(e2)
E(X86_RET_UINT16) E(L(load_table2), X86_RET_UINT16)
movzwl (%esp), %eax movzwl (%esp), %eax
jmp 9f jmp L(e2)
E(X86_RET_INT64) E(L(load_table2), X86_RET_INT64)
movl 4(%esp), %edx movl 4(%esp), %edx
/* fallthru */ /* fallthru */
E(X86_RET_INT32) E(L(load_table2), X86_RET_INT32)
movl (%esp), %eax movl (%esp), %eax
/* fallthru */ /* fallthru */
E(X86_RET_VOID) E(L(load_table2), X86_RET_VOID)
9: addl $closure_FS, %esp L(e2):
addl $closure_FS, %esp
cfi_adjust_cfa_offset(-closure_FS) cfi_adjust_cfa_offset(-closure_FS)
ret ret
cfi_adjust_cfa_offset(closure_FS) cfi_adjust_cfa_offset(closure_FS)
E(X86_RET_STRUCTPOP) E(L(load_table2), X86_RET_STRUCTPOP)
addl $closure_FS, %esp addl $closure_FS, %esp
cfi_adjust_cfa_offset(-closure_FS) cfi_adjust_cfa_offset(-closure_FS)
ret $4 ret $4
cfi_adjust_cfa_offset(closure_FS) cfi_adjust_cfa_offset(closure_FS)
E(X86_RET_STRUCTARG) E(L(load_table2), X86_RET_STRUCTARG)
movl (%esp), %eax movl (%esp), %eax
jmp 9b jmp L(e2)
E(X86_RET_STRUCT_1B) E(L(load_table2), X86_RET_STRUCT_1B)
movzbl (%esp), %eax movzbl (%esp), %eax
jmp 9b jmp L(e2)
E(X86_RET_STRUCT_2B) E(L(load_table2), X86_RET_STRUCT_2B)
movzwl (%esp), %eax movzwl (%esp), %eax
jmp 9b jmp L(e2)
/* Fill out the table so that bad values are predictable. */ /* Fill out the table so that bad values are predictable. */
E(X86_RET_UNUSED14) E(L(load_table2), X86_RET_UNUSED14)
ud2 ud2
E(X86_RET_UNUSED15) E(L(load_table2), X86_RET_UNUSED15)
ud2 ud2
cfi_endproc cfi_endproc
ENDF(C(ffi_closure_i386)) ENDF(C(ffi_closure_i386))
FFI_GO_CLOSURE(STDCALL, %ecx, %edx, %eax) FFI_GO_CLOSURE(STDCALL, %ecx, %edx, %eax, L(do_closure_STDCALL))
/* For REGISTER, we have no available parameter registers, and so we /* For REGISTER, we have no available parameter registers, and so we
enter here having pushed the closure onto the stack. */ enter here having pushed the closure onto the stack. */
@@ -371,7 +382,7 @@ C(ffi_closure_REGISTER):
movl closure_FS-4(%esp), %ecx /* load retaddr */ movl closure_FS-4(%esp), %ecx /* load retaddr */
movl closure_FS(%esp), %eax /* load closure */ movl closure_FS(%esp), %eax /* load closure */
movl %ecx, closure_FS(%esp) /* move retaddr */ movl %ecx, closure_FS(%esp) /* move retaddr */
jmp 0f jmp L(do_closure_REGISTER)
cfi_endproc cfi_endproc
ENDF(C(ffi_closure_REGISTER)) ENDF(C(ffi_closure_REGISTER))
@@ -391,11 +402,13 @@ C(ffi_closure_STDCALL):
FFI_CLOSURE_SAVE_REGS FFI_CLOSURE_SAVE_REGS
0: /* Entry point from ffi_closure_REGISTER. */ /* Entry point from ffi_closure_REGISTER. */
L(do_closure_REGISTER):
FFI_CLOSURE_COPY_TRAMP_DATA FFI_CLOSURE_COPY_TRAMP_DATA
88: /* Entry point from preceeding Go closure. */ /* Entry point from preceeding Go closure. */
L(do_closure_STDCALL):
FFI_CLOSURE_CALL_INNER FFI_CLOSURE_CALL_INNER
@@ -411,70 +424,70 @@ C(ffi_closure_STDCALL):
there is always a window between the mov and the ret which there is always a window between the mov and the ret which
will be wrong from one point of view or another. */ will be wrong from one point of view or another. */
FFI_CLOSURE_MASK_AND_JUMP FFI_CLOSURE_MASK_AND_JUMP(3)
.balign 8 .balign 8
0: L(load_table3):
E(X86_RET_FLOAT) E(L(load_table3), X86_RET_FLOAT)
flds (%esp) flds (%esp)
movl %ecx, %esp movl %ecx, %esp
ret ret
E(X86_RET_DOUBLE) E(L(load_table3), X86_RET_DOUBLE)
fldl (%esp) fldl (%esp)
movl %ecx, %esp movl %ecx, %esp
ret ret
E(X86_RET_LDOUBLE) E(L(load_table3), X86_RET_LDOUBLE)
fldt (%esp) fldt (%esp)
movl %ecx, %esp movl %ecx, %esp
ret ret
E(X86_RET_SINT8) E(L(load_table3), X86_RET_SINT8)
movsbl (%esp), %eax movsbl (%esp), %eax
movl %ecx, %esp movl %ecx, %esp
ret ret
E(X86_RET_SINT16) E(L(load_table3), X86_RET_SINT16)
movswl (%esp), %eax movswl (%esp), %eax
movl %ecx, %esp movl %ecx, %esp
ret ret
E(X86_RET_UINT8) E(L(load_table3), X86_RET_UINT8)
movzbl (%esp), %eax movzbl (%esp), %eax
movl %ecx, %esp movl %ecx, %esp
ret ret
E(X86_RET_UINT16) E(L(load_table3), X86_RET_UINT16)
movzwl (%esp), %eax movzwl (%esp), %eax
movl %ecx, %esp movl %ecx, %esp
ret ret
E(X86_RET_INT64) E(L(load_table3), X86_RET_INT64)
popl %eax popl %eax
popl %edx popl %edx
movl %ecx, %esp movl %ecx, %esp
ret ret
E(X86_RET_INT32) E(L(load_table3), X86_RET_INT32)
movl (%esp), %eax movl (%esp), %eax
movl %ecx, %esp movl %ecx, %esp
ret ret
E(X86_RET_VOID) E(L(load_table3), X86_RET_VOID)
movl %ecx, %esp movl %ecx, %esp
ret ret
E(X86_RET_STRUCTPOP) E(L(load_table3), X86_RET_STRUCTPOP)
movl %ecx, %esp movl %ecx, %esp
ret ret
E(X86_RET_STRUCTARG) E(L(load_table3), X86_RET_STRUCTARG)
movl (%esp), %eax movl (%esp), %eax
movl %ecx, %esp movl %ecx, %esp
ret ret
E(X86_RET_STRUCT_1B) E(L(load_table3), X86_RET_STRUCT_1B)
movzbl (%esp), %eax movzbl (%esp), %eax
movl %ecx, %esp movl %ecx, %esp
ret ret
E(X86_RET_STRUCT_2B) E(L(load_table3), X86_RET_STRUCT_2B)
movzwl (%esp), %eax movzwl (%esp), %eax
movl %ecx, %esp movl %ecx, %esp
ret ret
/* Fill out the table so that bad values are predictable. */ /* Fill out the table so that bad values are predictable. */
E(X86_RET_UNUSED14) E(L(load_table3), X86_RET_UNUSED14)
ud2 ud2
E(X86_RET_UNUSED15) E(L(load_table3), X86_RET_UNUSED15)
ud2 ud2
cfi_endproc cfi_endproc
@@ -509,67 +522,69 @@ C(ffi_closure_raw_SYSV):
andl $X86_RET_TYPE_MASK, %eax andl $X86_RET_TYPE_MASK, %eax
#ifdef __PIC__ #ifdef __PIC__
call C(__x86.get_pc_thunk.bx) call C(__x86.get_pc_thunk.bx)
1: leal 0f-1b(%ebx, %eax, 8), %eax L(pc4):
leal L(load_table4)-L(pc4)(%ebx, %eax, 8), %eax
#else #else
leal 0f(,%eax, 8), %eax leal L(load_table4)(,%eax, 8), %eax
#endif #endif
movl raw_closure_S_FS-4(%esp), %ebx movl raw_closure_S_FS-4(%esp), %ebx
cfi_restore(%ebx) cfi_restore(%ebx)
jmp *%eax jmp *%eax
.balign 8 .balign 8
0: L(load_table4):
E(X86_RET_FLOAT) E(L(load_table4), X86_RET_FLOAT)
flds 16(%esp) flds 16(%esp)
jmp 9f jmp L(e4)
E(X86_RET_DOUBLE) E(L(load_table4), X86_RET_DOUBLE)
fldl 16(%esp) fldl 16(%esp)
jmp 9f jmp L(e4)
E(X86_RET_LDOUBLE) E(L(load_table4), X86_RET_LDOUBLE)
fldt 16(%esp) fldt 16(%esp)
jmp 9f jmp L(e4)
E(X86_RET_SINT8) E(L(load_table4), X86_RET_SINT8)
movsbl 16(%esp), %eax movsbl 16(%esp), %eax
jmp 9f jmp L(e4)
E(X86_RET_SINT16) E(L(load_table4), X86_RET_SINT16)
movswl 16(%esp), %eax movswl 16(%esp), %eax
jmp 9f jmp L(e4)
E(X86_RET_UINT8) E(L(load_table4), X86_RET_UINT8)
movzbl 16(%esp), %eax movzbl 16(%esp), %eax
jmp 9f jmp L(e4)
E(X86_RET_UINT16) E(L(load_table4), X86_RET_UINT16)
movzwl 16(%esp), %eax movzwl 16(%esp), %eax
jmp 9f jmp L(e4)
E(X86_RET_INT64) E(L(load_table4), X86_RET_INT64)
movl 16+4(%esp), %edx movl 16+4(%esp), %edx
/* fallthru */ /* fallthru */
E(X86_RET_INT32) E(L(load_table4), X86_RET_INT32)
movl 16(%esp), %eax movl 16(%esp), %eax
/* fallthru */ /* fallthru */
E(X86_RET_VOID) E(L(load_table4), X86_RET_VOID)
9: addl $raw_closure_S_FS, %esp L(e4):
addl $raw_closure_S_FS, %esp
cfi_adjust_cfa_offset(-raw_closure_S_FS) cfi_adjust_cfa_offset(-raw_closure_S_FS)
ret ret
cfi_adjust_cfa_offset(raw_closure_S_FS) cfi_adjust_cfa_offset(raw_closure_S_FS)
E(X86_RET_STRUCTPOP) E(L(load_table4), X86_RET_STRUCTPOP)
addl $raw_closure_S_FS, %esp addl $raw_closure_S_FS, %esp
cfi_adjust_cfa_offset(-raw_closure_S_FS) cfi_adjust_cfa_offset(-raw_closure_S_FS)
ret $4 ret $4
cfi_adjust_cfa_offset(raw_closure_S_FS) cfi_adjust_cfa_offset(raw_closure_S_FS)
E(X86_RET_STRUCTARG) E(L(load_table4), X86_RET_STRUCTARG)
movl 16(%esp), %eax movl 16(%esp), %eax
jmp 9b jmp L(e4)
E(X86_RET_STRUCT_1B) E(L(load_table4), X86_RET_STRUCT_1B)
movzbl 16(%esp), %eax movzbl 16(%esp), %eax
jmp 9b jmp L(e4)
E(X86_RET_STRUCT_2B) E(L(load_table4), X86_RET_STRUCT_2B)
movzwl 16(%esp), %eax movzwl 16(%esp), %eax
jmp 9b jmp L(e4)
/* Fill out the table so that bad values are predictable. */ /* Fill out the table so that bad values are predictable. */
E(X86_RET_UNUSED14) E(L(load_table4), X86_RET_UNUSED14)
ud2 ud2
E(X86_RET_UNUSED15) E(L(load_table4), X86_RET_UNUSED15)
ud2 ud2
cfi_endproc cfi_endproc
@@ -613,68 +628,70 @@ C(ffi_closure_raw_THISCALL):
andl $X86_RET_TYPE_MASK, %eax andl $X86_RET_TYPE_MASK, %eax
#ifdef __PIC__ #ifdef __PIC__
call C(__x86.get_pc_thunk.bx) call C(__x86.get_pc_thunk.bx)
1: leal 0f-1b(%ebx, %eax, 8), %eax L(pc5):
leal L(load_table5)-L(pc5)(%ebx, %eax, 8), %eax
#else #else
leal 0f(,%eax, 8), %eax leal L(load_table5)(,%eax, 8), %eax
#endif #endif
movl raw_closure_T_FS-4(%esp), %ebx movl raw_closure_T_FS-4(%esp), %ebx
cfi_restore(%ebx) cfi_restore(%ebx)
jmp *%eax jmp *%eax
.balign 8 .balign 8
0: L(load_table5):
E(X86_RET_FLOAT) E(L(load_table5), X86_RET_FLOAT)
flds 16(%esp) flds 16(%esp)
jmp 9f jmp L(e5)
E(X86_RET_DOUBLE) E(L(load_table5), X86_RET_DOUBLE)
fldl 16(%esp) fldl 16(%esp)
jmp 9f jmp L(e5)
E(X86_RET_LDOUBLE) E(L(load_table5), X86_RET_LDOUBLE)
fldt 16(%esp) fldt 16(%esp)
jmp 9f jmp L(e5)
E(X86_RET_SINT8) E(L(load_table5), X86_RET_SINT8)
movsbl 16(%esp), %eax movsbl 16(%esp), %eax
jmp 9f jmp L(e5)
E(X86_RET_SINT16) E(L(load_table5), X86_RET_SINT16)
movswl 16(%esp), %eax movswl 16(%esp), %eax
jmp 9f jmp L(e5)
E(X86_RET_UINT8) E(L(load_table5), X86_RET_UINT8)
movzbl 16(%esp), %eax movzbl 16(%esp), %eax
jmp 9f jmp L(e5)
E(X86_RET_UINT16) E(L(load_table5), X86_RET_UINT16)
movzwl 16(%esp), %eax movzwl 16(%esp), %eax
jmp 9f jmp L(e5)
E(X86_RET_INT64) E(L(load_table5), X86_RET_INT64)
movl 16+4(%esp), %edx movl 16+4(%esp), %edx
/* fallthru */ /* fallthru */
E(X86_RET_INT32) E(L(load_table5), X86_RET_INT32)
movl 16(%esp), %eax movl 16(%esp), %eax
/* fallthru */ /* fallthru */
E(X86_RET_VOID) E(L(load_table5), X86_RET_VOID)
9: addl $raw_closure_T_FS, %esp L(e5):
addl $raw_closure_T_FS, %esp
cfi_adjust_cfa_offset(-raw_closure_T_FS) cfi_adjust_cfa_offset(-raw_closure_T_FS)
/* Remove the extra %ecx argument we pushed. */ /* Remove the extra %ecx argument we pushed. */
ret $4 ret $4
cfi_adjust_cfa_offset(raw_closure_T_FS) cfi_adjust_cfa_offset(raw_closure_T_FS)
E(X86_RET_STRUCTPOP) E(L(load_table5), X86_RET_STRUCTPOP)
addl $raw_closure_T_FS, %esp addl $raw_closure_T_FS, %esp
cfi_adjust_cfa_offset(-raw_closure_T_FS) cfi_adjust_cfa_offset(-raw_closure_T_FS)
ret $8 ret $8
cfi_adjust_cfa_offset(raw_closure_T_FS) cfi_adjust_cfa_offset(raw_closure_T_FS)
E(X86_RET_STRUCTARG) E(L(load_table5), X86_RET_STRUCTARG)
movl 16(%esp), %eax movl 16(%esp), %eax
jmp 9b jmp L(e5)
E(X86_RET_STRUCT_1B) E(L(load_table5), X86_RET_STRUCT_1B)
movzbl 16(%esp), %eax movzbl 16(%esp), %eax
jmp 9b jmp L(e5)
E(X86_RET_STRUCT_2B) E(L(load_table5), X86_RET_STRUCT_2B)
movzwl 16(%esp), %eax movzwl 16(%esp), %eax
jmp 9b jmp L(e5)
/* Fill out the table so that bad values are predictable. */ /* Fill out the table so that bad values are predictable. */
E(X86_RET_UNUSED14) E(L(load_table5), X86_RET_UNUSED14)
ud2 ud2
E(X86_RET_UNUSED15) E(L(load_table5), X86_RET_UNUSED15)
ud2 ud2
cfi_endproc cfi_endproc

View File

@@ -43,6 +43,12 @@
# define C(X) X # define C(X) X
#endif #endif
#ifdef __APPLE__
# define L(X) C1(L, X)
#else
# define L(X) C1(.L, X)
#endif
#ifdef __ELF__ #ifdef __ELF__
# define PLT(X) X@PLT # define PLT(X) X@PLT
# define ENDF(X) .type X,@function; .size X, . - X # define ENDF(X) .type X,@function; .size X, . - X
@@ -55,12 +61,11 @@
actual table. The entry points into the table are all 8 bytes. actual table. The entry points into the table are all 8 bytes.
The use of ORG asserts that we're at the correct location. */ The use of ORG asserts that we're at the correct location. */
/* ??? The clang assembler doesn't handle .org with symbolic expressions. */ /* ??? The clang assembler doesn't handle .org with symbolic expressions. */
.macro E index #if defined(__clang__) || defined(__APPLE__)
.balign 8 # define E(BASE, X) .balign 8
#if !defined(__clang__) && !defined(__APPLE__) #else
.org 0b + \index * 8, 0x90 # define E(BASE, X) .balign 8; .org BASE + X * 8
#endif #endif
.endm
/* ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags, /* ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
void *raddr, void (*fnaddr)(void)); void *raddr, void (*fnaddr)(void));
@@ -106,8 +111,8 @@ C(ffi_call_unix64):
movq 0x28(%r10), %r9 movq 0x28(%r10), %r9
movl 0xb0(%r10), %eax movl 0xb0(%r10), %eax
testl %eax, %eax testl %eax, %eax
jnz .Lload_sse jnz L(load_sse)
.Lret_from_load_sse: L(ret_from_load_sse):
/* Deallocate the reg arg area, except for r10, then load via pop. */ /* Deallocate the reg arg area, except for r10, then load via pop. */
leaq 0xb8(%r10), %rsp leaq 0xb8(%r10), %rsp
@@ -129,8 +134,8 @@ C(ffi_call_unix64):
/* The first byte of the flags contains the FFI_TYPE. */ /* The first byte of the flags contains the FFI_TYPE. */
cmpb $UNIX64_RET_LAST, %cl cmpb $UNIX64_RET_LAST, %cl
movzbl %cl, %r10d movzbl %cl, %r10d
leaq 0f(%rip), %r11 leaq L(store_table)(%rip), %r11
ja 9f ja L(sa)
leaq (%r11, %r10, 8), %r10 leaq (%r11, %r10, 8), %r10
/* Prep for the structure cases: scratch area in redzone. */ /* Prep for the structure cases: scratch area in redzone. */
@@ -138,78 +143,80 @@ C(ffi_call_unix64):
jmp *%r10 jmp *%r10
.balign 8 .balign 8
0: L(store_table):
E UNIX64_RET_VOID E(L(store_table), UNIX64_RET_VOID)
ret ret
E UNIX64_RET_UINT8 E(L(store_table), UNIX64_RET_UINT8)
movzbl %al, %eax movzbl %al, %eax
movq %rax, (%rdi) movq %rax, (%rdi)
ret ret
E UNIX64_RET_UINT16 E(L(store_table), UNIX64_RET_UINT16)
movzwl %ax, %eax movzwl %ax, %eax
movq %rax, (%rdi) movq %rax, (%rdi)
ret ret
E UNIX64_RET_UINT32 E(L(store_table), UNIX64_RET_UINT32)
movl %eax, %eax movl %eax, %eax
movq %rax, (%rdi) movq %rax, (%rdi)
ret ret
E UNIX64_RET_SINT8 E(L(store_table), UNIX64_RET_SINT8)
movsbq %al, %rax movsbq %al, %rax
movq %rax, (%rdi) movq %rax, (%rdi)
ret ret
E UNIX64_RET_SINT16 E(L(store_table), UNIX64_RET_SINT16)
movswq %ax, %rax movswq %ax, %rax
movq %rax, (%rdi) movq %rax, (%rdi)
ret ret
E UNIX64_RET_SINT32 E(L(store_table), UNIX64_RET_SINT32)
cltq cltq
movq %rax, (%rdi) movq %rax, (%rdi)
ret ret
E UNIX64_RET_INT64 E(L(store_table), UNIX64_RET_INT64)
movq %rax, (%rdi) movq %rax, (%rdi)
ret ret
E UNIX64_RET_XMM32 E(L(store_table), UNIX64_RET_XMM32)
movd %xmm0, (%rdi) movd %xmm0, (%rdi)
ret ret
E UNIX64_RET_XMM64 E(L(store_table), UNIX64_RET_XMM64)
movq %xmm0, (%rdi) movq %xmm0, (%rdi)
ret ret
E UNIX64_RET_X87 E(L(store_table), UNIX64_RET_X87)
fstpt (%rdi) fstpt (%rdi)
ret ret
E UNIX64_RET_X87_2 E(L(store_table), UNIX64_RET_X87_2)
fstpt (%rdi) fstpt (%rdi)
fstpt 16(%rdi) fstpt 16(%rdi)
ret ret
E UNIX64_RET_ST_XMM0_RAX E(L(store_table), UNIX64_RET_ST_XMM0_RAX)
movq %rax, 8(%rsi) movq %rax, 8(%rsi)
jmp 3f jmp L(s3)
E UNIX64_RET_ST_RAX_XMM0 E(L(store_table), UNIX64_RET_ST_RAX_XMM0)
movq %xmm0, 8(%rsi) movq %xmm0, 8(%rsi)
jmp 2f jmp L(s2)
E UNIX64_RET_ST_XMM0_XMM1 E(L(store_table), UNIX64_RET_ST_XMM0_XMM1)
movq %xmm1, 8(%rsi) movq %xmm1, 8(%rsi)
jmp 3f jmp L(s3)
E UNIX64_RET_ST_RAX_RDX E(L(store_table), UNIX64_RET_ST_RAX_RDX)
movq %rdx, 8(%rsi) movq %rdx, 8(%rsi)
2: movq %rax, (%rsi) L(s2):
movq %rax, (%rsi)
shrl $UNIX64_SIZE_SHIFT, %ecx shrl $UNIX64_SIZE_SHIFT, %ecx
rep movsb rep movsb
ret ret
.balign 8 .balign 8
3: movq %xmm0, (%rsi) L(s3):
movq %xmm0, (%rsi)
shrl $UNIX64_SIZE_SHIFT, %ecx shrl $UNIX64_SIZE_SHIFT, %ecx
rep movsb rep movsb
ret ret
9: call PLT(C(abort)) L(sa): call PLT(C(abort))
/* Many times we can avoid loading any SSE registers at all. /* Many times we can avoid loading any SSE registers at all.
It's not worth an indirect jump to load the exact set of It's not worth an indirect jump to load the exact set of
SSE registers needed; zero or all is a good compromise. */ SSE registers needed; zero or all is a good compromise. */
.balign 2 .balign 2
cfi_restore_state cfi_restore_state
.Lload_sse: L(load_sse):
movdqa 0x30(%r10), %xmm0 movdqa 0x30(%r10), %xmm0
movdqa 0x40(%r10), %xmm1 movdqa 0x40(%r10), %xmm1
movdqa 0x50(%r10), %xmm2 movdqa 0x50(%r10), %xmm2
@@ -218,7 +225,7 @@ E UNIX64_RET_ST_RAX_RDX
movdqa 0x80(%r10), %xmm5 movdqa 0x80(%r10), %xmm5
movdqa 0x90(%r10), %xmm6 movdqa 0x90(%r10), %xmm6
movdqa 0xa0(%r10), %xmm7 movdqa 0xa0(%r10), %xmm7
jmp .Lret_from_load_sse jmp L(ret_from_load_sse)
cfi_endproc cfi_endproc
ENDF(C(ffi_call_unix64)) ENDF(C(ffi_call_unix64))
@@ -251,7 +258,7 @@ C(ffi_closure_unix64_sse):
movdqa %xmm5, ffi_closure_OFS_V+0x50(%rsp) movdqa %xmm5, ffi_closure_OFS_V+0x50(%rsp)
movdqa %xmm6, ffi_closure_OFS_V+0x60(%rsp) movdqa %xmm6, ffi_closure_OFS_V+0x60(%rsp)
movdqa %xmm7, ffi_closure_OFS_V+0x70(%rsp) movdqa %xmm7, ffi_closure_OFS_V+0x70(%rsp)
jmp 0f jmp L(sse_entry1)
cfi_endproc cfi_endproc
ENDF(C(ffi_closure_unix64_sse)) ENDF(C(ffi_closure_unix64_sse))
@@ -265,7 +272,7 @@ C(ffi_closure_unix64):
subq $ffi_closure_FS, %rsp subq $ffi_closure_FS, %rsp
/* Note clang bug 21515: adjust_cfa_offset error across endproc. */ /* Note clang bug 21515: adjust_cfa_offset error across endproc. */
cfi_def_cfa_offset(ffi_closure_FS + 8) cfi_def_cfa_offset(ffi_closure_FS + 8)
0: L(sse_entry1):
movq %rdi, ffi_closure_OFS_G+0x00(%rsp) movq %rdi, ffi_closure_OFS_G+0x00(%rsp)
movq %rsi, ffi_closure_OFS_G+0x08(%rsp) movq %rsi, ffi_closure_OFS_G+0x08(%rsp)
movq %rdx, ffi_closure_OFS_G+0x10(%rsp) movq %rdx, ffi_closure_OFS_G+0x10(%rsp)
@@ -282,7 +289,7 @@ C(ffi_closure_unix64):
movq FFI_TRAMPOLINE_SIZE+8(%r10), %rsi /* Load fun */ movq FFI_TRAMPOLINE_SIZE+8(%r10), %rsi /* Load fun */
movq FFI_TRAMPOLINE_SIZE+16(%r10), %rdx /* Load user_data */ movq FFI_TRAMPOLINE_SIZE+16(%r10), %rdx /* Load user_data */
#endif #endif
.Ldo_closure: L(do_closure):
leaq ffi_closure_OFS_RVALUE(%rsp), %rcx /* Load rvalue */ leaq ffi_closure_OFS_RVALUE(%rsp), %rcx /* Load rvalue */
movq %rsp, %r8 /* Load reg_args */ movq %rsp, %r8 /* Load reg_args */
leaq ffi_closure_FS+8(%rsp), %r9 /* Load argp */ leaq ffi_closure_FS+8(%rsp), %r9 /* Load argp */
@@ -295,68 +302,70 @@ C(ffi_closure_unix64):
/* The first byte of the return value contains the FFI_TYPE. */ /* The first byte of the return value contains the FFI_TYPE. */
cmpb $UNIX64_RET_LAST, %al cmpb $UNIX64_RET_LAST, %al
movzbl %al, %r10d movzbl %al, %r10d
leaq 0f(%rip), %r11 leaq L(load_table)(%rip), %r11
ja 9f ja L(la)
leaq (%r11, %r10, 8), %r10 leaq (%r11, %r10, 8), %r10
leaq ffi_closure_RED_RVALUE(%rsp), %rsi leaq ffi_closure_RED_RVALUE(%rsp), %rsi
jmp *%r10 jmp *%r10
.balign 8 .balign 8
0: L(load_table):
E UNIX64_RET_VOID E(L(load_table), UNIX64_RET_VOID)
ret ret
E UNIX64_RET_UINT8 E(L(load_table), UNIX64_RET_UINT8)
movzbl (%rsi), %eax movzbl (%rsi), %eax
ret ret
E UNIX64_RET_UINT16 E(L(load_table), UNIX64_RET_UINT16)
movzwl (%rsi), %eax movzwl (%rsi), %eax
ret ret
E UNIX64_RET_UINT32 E(L(load_table), UNIX64_RET_UINT32)
movl (%rsi), %eax movl (%rsi), %eax
ret ret
E UNIX64_RET_SINT8 E(L(load_table), UNIX64_RET_SINT8)
movsbl (%rsi), %eax movsbl (%rsi), %eax
ret ret
E UNIX64_RET_SINT16 E(L(load_table), UNIX64_RET_SINT16)
movswl (%rsi), %eax movswl (%rsi), %eax
ret ret
E UNIX64_RET_SINT32 E(L(load_table), UNIX64_RET_SINT32)
movl (%rsi), %eax movl (%rsi), %eax
ret ret
E UNIX64_RET_INT64 E(L(load_table), UNIX64_RET_INT64)
movq (%rsi), %rax movq (%rsi), %rax
ret ret
E UNIX64_RET_XMM32 E(L(load_table), UNIX64_RET_XMM32)
movd (%rsi), %xmm0 movd (%rsi), %xmm0
ret ret
E UNIX64_RET_XMM64 E(L(load_table), UNIX64_RET_XMM64)
movq (%rsi), %xmm0 movq (%rsi), %xmm0
ret ret
E UNIX64_RET_X87 E(L(load_table), UNIX64_RET_X87)
fldt (%rsi) fldt (%rsi)
ret ret
E UNIX64_RET_X87_2 E(L(load_table), UNIX64_RET_X87_2)
fldt 16(%rsi) fldt 16(%rsi)
fldt (%rsi) fldt (%rsi)
ret ret
E UNIX64_RET_ST_XMM0_RAX E(L(load_table), UNIX64_RET_ST_XMM0_RAX)
movq 8(%rsi), %rax movq 8(%rsi), %rax
jmp 3f jmp L(l3)
E UNIX64_RET_ST_RAX_XMM0 E(L(load_table), UNIX64_RET_ST_RAX_XMM0)
movq 8(%rsi), %xmm0 movq 8(%rsi), %xmm0
jmp 2f jmp L(l2)
E UNIX64_RET_ST_XMM0_XMM1 E(L(load_table), UNIX64_RET_ST_XMM0_XMM1)
movq 8(%rsi), %xmm1 movq 8(%rsi), %xmm1
jmp 3f jmp L(l3)
E UNIX64_RET_ST_RAX_RDX E(L(load_table), UNIX64_RET_ST_RAX_RDX)
movq 8(%rsi), %rdx movq 8(%rsi), %rdx
2: movq (%rsi), %rax L(l2):
movq (%rsi), %rax
ret ret
.balign 8 .balign 8
3: movq (%rsi), %xmm0 L(l3):
movq (%rsi), %xmm0
ret ret
9: call PLT(C(abort)) L(la): call PLT(C(abort))
cfi_endproc cfi_endproc
ENDF(C(ffi_closure_unix64)) ENDF(C(ffi_closure_unix64))
@@ -379,7 +388,7 @@ C(ffi_go_closure_unix64_sse):
movdqa %xmm5, ffi_closure_OFS_V+0x50(%rsp) movdqa %xmm5, ffi_closure_OFS_V+0x50(%rsp)
movdqa %xmm6, ffi_closure_OFS_V+0x60(%rsp) movdqa %xmm6, ffi_closure_OFS_V+0x60(%rsp)
movdqa %xmm7, ffi_closure_OFS_V+0x70(%rsp) movdqa %xmm7, ffi_closure_OFS_V+0x70(%rsp)
jmp 0f jmp L(sse_entry2)
cfi_endproc cfi_endproc
ENDF(C(ffi_go_closure_unix64_sse)) ENDF(C(ffi_go_closure_unix64_sse))
@@ -393,7 +402,7 @@ C(ffi_go_closure_unix64):
subq $ffi_closure_FS, %rsp subq $ffi_closure_FS, %rsp
/* Note clang bug 21515: adjust_cfa_offset error across endproc. */ /* Note clang bug 21515: adjust_cfa_offset error across endproc. */
cfi_def_cfa_offset(ffi_closure_FS + 8) cfi_def_cfa_offset(ffi_closure_FS + 8)
0: L(sse_entry2):
movq %rdi, ffi_closure_OFS_G+0x00(%rsp) movq %rdi, ffi_closure_OFS_G+0x00(%rsp)
movq %rsi, ffi_closure_OFS_G+0x08(%rsp) movq %rsi, ffi_closure_OFS_G+0x08(%rsp)
movq %rdx, ffi_closure_OFS_G+0x10(%rsp) movq %rdx, ffi_closure_OFS_G+0x10(%rsp)
@@ -410,7 +419,7 @@ C(ffi_go_closure_unix64):
movq 16(%r10), %rsi /* Load fun */ movq 16(%r10), %rsi /* Load fun */
movq %r10, %rdx /* Load closure (user_data) */ movq %r10, %rdx /* Load closure (user_data) */
#endif #endif
jmp .Ldo_closure jmp L(do_closure)
cfi_endproc cfi_endproc
ENDF(C(ffi_go_closure_unix64)) ENDF(C(ffi_go_closure_unix64))