TILE-Gx/TILEPro support
This commit is contained in:
360
src/tile/tile.S
Normal file
360
src/tile/tile.S
Normal file
@@ -0,0 +1,360 @@
|
||||
/* -----------------------------------------------------------------------
|
||||
tile.S - Copyright (c) 2011 Tilera Corp.
|
||||
|
||||
Tilera TILEPro and TILE-Gx Foreign Function Interface
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
``Software''), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included
|
||||
in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
----------------------------------------------------------------------- */
|
||||
|
||||
#define LIBFFI_ASM
|
||||
#include <fficonfig.h>
|
||||
#include <ffi.h>
|
||||
|
||||
/* Number of bytes in a register. */
|
||||
#define REG_SIZE FFI_SIZEOF_ARG
|
||||
|
||||
/* Number of bytes in stack linkage area for backtracing.
|
||||
|
||||
A note about the ABI: on entry to a procedure, sp points to a stack
|
||||
slot where it must spill the return address if it's not a leaf.
|
||||
REG_SIZE bytes beyond that is a slot owned by the caller which
|
||||
contains the sp value that the caller had when it was originally
|
||||
entered (i.e. the caller's frame pointer). */
|
||||
#define LINKAGE_SIZE (2 * REG_SIZE)
|
||||
|
||||
/* The first 10 registers are used to pass arguments and return values. */
|
||||
#define NUM_ARG_REGS 10
|
||||
|
||||
#ifdef __tilegx__
|
||||
#define SW st
|
||||
#define LW ld
|
||||
#define BGZT bgtzt
|
||||
#else
|
||||
#define SW sw
|
||||
#define LW lw
|
||||
#define BGZT bgzt
|
||||
#endif
|
||||
|
||||
|
||||
/* void ffi_call_tile (int_reg_t reg_args[NUM_ARG_REGS],
|
||||
const int_reg_t *stack_args,
|
||||
unsigned long stack_args_bytes,
|
||||
void (*fnaddr)(void));
|
||||
|
||||
On entry, REG_ARGS contain the outgoing register values,
|
||||
and STACK_ARGS containts STACK_ARG_BYTES of additional values
|
||||
to be passed on the stack. If STACK_ARG_BYTES is zero, then
|
||||
STACK_ARGS is ignored.
|
||||
|
||||
When the invoked function returns, the values of r0-r9 are
|
||||
blindly stored back into REG_ARGS for the caller to examine. */
|
||||
|
||||
.section .text.ffi_call_tile, "ax", @progbits
|
||||
.align 8
|
||||
.globl ffi_call_tile
|
||||
FFI_HIDDEN(ffi_call_tile)
|
||||
ffi_call_tile:
|
||||
|
||||
/* Incoming arguments. */
|
||||
#define REG_ARGS r0
|
||||
#define INCOMING_STACK_ARGS r1
|
||||
#define STACK_ARG_BYTES r2
|
||||
#define ORIG_FNADDR r3
|
||||
|
||||
/* Temporary values. */
|
||||
#define FRAME_SIZE r10
|
||||
#define TMP r11
|
||||
#define TMP2 r12
|
||||
#define OUTGOING_STACK_ARGS r13
|
||||
#define REG_ADDR_PTR r14
|
||||
#define RETURN_REG_ADDR r15
|
||||
#define FNADDR r16
|
||||
|
||||
.cfi_startproc
|
||||
{
|
||||
/* Save return address. */
|
||||
SW sp, lr
|
||||
.cfi_offset lr, 0
|
||||
/* Prepare to spill incoming r52. */
|
||||
addi TMP, sp, -REG_SIZE
|
||||
/* Increase frame size to have room to spill r52 and REG_ARGS.
|
||||
The +7 is to round up mod 8. */
|
||||
addi FRAME_SIZE, STACK_ARG_BYTES, \
|
||||
REG_SIZE + REG_SIZE + LINKAGE_SIZE + 7
|
||||
}
|
||||
{
|
||||
/* Round stack frame size to a multiple of 8 to satisfy ABI. */
|
||||
andi FRAME_SIZE, FRAME_SIZE, -8
|
||||
/* Compute where to spill REG_ARGS value. */
|
||||
addi TMP2, sp, -(REG_SIZE * 2)
|
||||
}
|
||||
{
|
||||
/* Spill incoming r52. */
|
||||
SW TMP, r52
|
||||
.cfi_offset r52, -REG_SIZE
|
||||
/* Set up our frame pointer. */
|
||||
move r52, sp
|
||||
.cfi_def_cfa_register r52
|
||||
/* Push stack frame. */
|
||||
sub sp, sp, FRAME_SIZE
|
||||
}
|
||||
{
|
||||
/* Prepare to set up stack linkage. */
|
||||
addi TMP, sp, REG_SIZE
|
||||
/* Prepare to memcpy stack args. */
|
||||
addi OUTGOING_STACK_ARGS, sp, LINKAGE_SIZE
|
||||
/* Save REG_ARGS which we will need after we call the subroutine. */
|
||||
SW TMP2, REG_ARGS
|
||||
}
|
||||
{
|
||||
/* Set up linkage info to hold incoming stack pointer. */
|
||||
SW TMP, r52
|
||||
}
|
||||
{
|
||||
/* Skip stack args memcpy if we don't have any stack args (common). */
|
||||
blezt STACK_ARG_BYTES, .Ldone_stack_args_memcpy
|
||||
}
|
||||
|
||||
.Lmemcpy_stack_args:
|
||||
{
|
||||
/* Load incoming argument from stack_args. */
|
||||
LW TMP, INCOMING_STACK_ARGS
|
||||
addi INCOMING_STACK_ARGS, INCOMING_STACK_ARGS, REG_SIZE
|
||||
}
|
||||
{
|
||||
/* Store stack argument into outgoing stack argument area. */
|
||||
SW OUTGOING_STACK_ARGS, TMP
|
||||
addi OUTGOING_STACK_ARGS, OUTGOING_STACK_ARGS, REG_SIZE
|
||||
addi STACK_ARG_BYTES, STACK_ARG_BYTES, -REG_SIZE
|
||||
}
|
||||
{
|
||||
BGZT STACK_ARG_BYTES, .Lmemcpy_stack_args
|
||||
}
|
||||
.Ldone_stack_args_memcpy:
|
||||
|
||||
{
|
||||
/* Copy aside ORIG_FNADDR so we can overwrite its register. */
|
||||
move FNADDR, ORIG_FNADDR
|
||||
/* Prepare to load argument registers. */
|
||||
addi REG_ADDR_PTR, r0, REG_SIZE
|
||||
/* Load outgoing r0. */
|
||||
LW r0, r0
|
||||
}
|
||||
|
||||
/* Load up argument registers from the REG_ARGS array. */
|
||||
#define LOAD_REG(REG, PTR) \
|
||||
{ \
|
||||
LW REG, PTR ; \
|
||||
addi PTR, PTR, REG_SIZE \
|
||||
}
|
||||
|
||||
LOAD_REG(r1, REG_ADDR_PTR)
|
||||
LOAD_REG(r2, REG_ADDR_PTR)
|
||||
LOAD_REG(r3, REG_ADDR_PTR)
|
||||
LOAD_REG(r4, REG_ADDR_PTR)
|
||||
LOAD_REG(r5, REG_ADDR_PTR)
|
||||
LOAD_REG(r6, REG_ADDR_PTR)
|
||||
LOAD_REG(r7, REG_ADDR_PTR)
|
||||
LOAD_REG(r8, REG_ADDR_PTR)
|
||||
LOAD_REG(r9, REG_ADDR_PTR)
|
||||
|
||||
{
|
||||
/* Call the subroutine. */
|
||||
jalr FNADDR
|
||||
}
|
||||
|
||||
{
|
||||
/* Restore original lr. */
|
||||
LW lr, r52
|
||||
/* Prepare to recover ARGS, which we spilled earlier. */
|
||||
addi TMP, r52, -(2 * REG_SIZE)
|
||||
}
|
||||
{
|
||||
/* Restore ARGS, so we can fill it in with the return regs r0-r9. */
|
||||
LW RETURN_REG_ADDR, TMP
|
||||
/* Prepare to restore original r52. */
|
||||
addi TMP, r52, -REG_SIZE
|
||||
}
|
||||
|
||||
{
|
||||
/* Pop stack frame. */
|
||||
move sp, r52
|
||||
/* Restore original r52. */
|
||||
LW r52, TMP
|
||||
}
|
||||
|
||||
#define STORE_REG(REG, PTR) \
|
||||
{ \
|
||||
SW PTR, REG ; \
|
||||
addi PTR, PTR, REG_SIZE \
|
||||
}
|
||||
|
||||
/* Return all register values by reference. */
|
||||
STORE_REG(r0, RETURN_REG_ADDR)
|
||||
STORE_REG(r1, RETURN_REG_ADDR)
|
||||
STORE_REG(r2, RETURN_REG_ADDR)
|
||||
STORE_REG(r3, RETURN_REG_ADDR)
|
||||
STORE_REG(r4, RETURN_REG_ADDR)
|
||||
STORE_REG(r5, RETURN_REG_ADDR)
|
||||
STORE_REG(r6, RETURN_REG_ADDR)
|
||||
STORE_REG(r7, RETURN_REG_ADDR)
|
||||
STORE_REG(r8, RETURN_REG_ADDR)
|
||||
STORE_REG(r9, RETURN_REG_ADDR)
|
||||
|
||||
{
|
||||
jrp lr
|
||||
}
|
||||
|
||||
.cfi_endproc
|
||||
.size ffi_call_tile, .-ffi_call_tile
|
||||
|
||||
/* ffi_closure_tile(...)
|
||||
|
||||
On entry, lr points to the closure plus 8 bytes, and r10
|
||||
contains the actual return address.
|
||||
|
||||
This function simply dumps all register parameters into a stack array
|
||||
and passes the closure, the registers array, and the stack arguments
|
||||
to C code that does all of the actual closure processing. */
|
||||
|
||||
.section .text.ffi_closure_tile, "ax", @progbits
|
||||
.align 8
|
||||
.globl ffi_closure_tile
|
||||
FFI_HIDDEN(ffi_closure_tile)
|
||||
|
||||
.cfi_startproc
|
||||
/* Room to spill all NUM_ARG_REGS incoming registers, plus frame linkage. */
|
||||
#define CLOSURE_FRAME_SIZE (((NUM_ARG_REGS * REG_SIZE * 2 + LINKAGE_SIZE) + 7) & -8)
|
||||
ffi_closure_tile:
|
||||
{
|
||||
#ifdef __tilegx__
|
||||
st sp, lr
|
||||
.cfi_offset lr, 0
|
||||
#else
|
||||
/* Save return address (in r10 due to closure stub wrapper). */
|
||||
SW sp, r10
|
||||
.cfi_return_column r10
|
||||
.cfi_offset r10, 0
|
||||
#endif
|
||||
/* Compute address for stack frame linkage. */
|
||||
addli r10, sp, -(CLOSURE_FRAME_SIZE - REG_SIZE)
|
||||
}
|
||||
{
|
||||
/* Save incoming stack pointer in linkage area. */
|
||||
SW r10, sp
|
||||
.cfi_offset sp, -(CLOSURE_FRAME_SIZE - REG_SIZE)
|
||||
/* Push a new stack frame. */
|
||||
addli sp, sp, -CLOSURE_FRAME_SIZE
|
||||
.cfi_adjust_cfa_offset CLOSURE_FRAME_SIZE
|
||||
}
|
||||
|
||||
{
|
||||
/* Create pointer to where to start spilling registers. */
|
||||
addi r10, sp, LINKAGE_SIZE
|
||||
}
|
||||
|
||||
/* Spill all the incoming registers. */
|
||||
STORE_REG(r0, r10)
|
||||
STORE_REG(r1, r10)
|
||||
STORE_REG(r2, r10)
|
||||
STORE_REG(r3, r10)
|
||||
STORE_REG(r4, r10)
|
||||
STORE_REG(r5, r10)
|
||||
STORE_REG(r6, r10)
|
||||
STORE_REG(r7, r10)
|
||||
STORE_REG(r8, r10)
|
||||
{
|
||||
/* Save r9. */
|
||||
SW r10, r9
|
||||
#ifdef __tilegx__
|
||||
/* Pointer to closure is passed in r11. */
|
||||
move r0, r11
|
||||
#else
|
||||
/* Compute pointer to the closure object. Because the closure
|
||||
starts with a "jal ffi_closure_tile", we can just take the
|
||||
value of lr (a phony return address pointing into the closure)
|
||||
and subtract 8. */
|
||||
addi r0, lr, -8
|
||||
#endif
|
||||
/* Compute a pointer to the register arguments we just spilled. */
|
||||
addi r1, sp, LINKAGE_SIZE
|
||||
}
|
||||
{
|
||||
/* Compute a pointer to the extra stack arguments (if any). */
|
||||
addli r2, sp, CLOSURE_FRAME_SIZE + LINKAGE_SIZE
|
||||
/* Call C code to deal with all of the grotty details. */
|
||||
jal ffi_closure_tile_inner
|
||||
}
|
||||
{
|
||||
addli r10, sp, CLOSURE_FRAME_SIZE
|
||||
}
|
||||
{
|
||||
/* Restore the return address. */
|
||||
LW lr, r10
|
||||
/* Compute pointer to registers array. */
|
||||
addli r10, sp, LINKAGE_SIZE + (NUM_ARG_REGS * REG_SIZE)
|
||||
}
|
||||
/* Return all the register values, which C code may have set. */
|
||||
LOAD_REG(r0, r10)
|
||||
LOAD_REG(r1, r10)
|
||||
LOAD_REG(r2, r10)
|
||||
LOAD_REG(r3, r10)
|
||||
LOAD_REG(r4, r10)
|
||||
LOAD_REG(r5, r10)
|
||||
LOAD_REG(r6, r10)
|
||||
LOAD_REG(r7, r10)
|
||||
LOAD_REG(r8, r10)
|
||||
LOAD_REG(r9, r10)
|
||||
{
|
||||
/* Pop the frame. */
|
||||
addli sp, sp, CLOSURE_FRAME_SIZE
|
||||
jrp lr
|
||||
}
|
||||
|
||||
.cfi_endproc
|
||||
.size ffi_closure_tile, . - ffi_closure_tile
|
||||
|
||||
|
||||
/* What follows are code template instructions that get copied to the
|
||||
closure trampoline by ffi_prep_closure_loc. The zeroed operands
|
||||
get replaced by their proper values at runtime. */
|
||||
|
||||
.section .text.ffi_template_tramp_tile, "ax", @progbits
|
||||
.align 8
|
||||
.globl ffi_template_tramp_tile
|
||||
FFI_HIDDEN(ffi_template_tramp_tile)
|
||||
ffi_template_tramp_tile:
|
||||
#ifdef __tilegx__
|
||||
{
|
||||
moveli r11, 0 /* backpatched to address of containing closure. */
|
||||
moveli r10, 0 /* backpatched to ffi_closure_tile. */
|
||||
}
|
||||
/* Note: the following bundle gets generated multiple times
|
||||
depending on the pointer value (esp. useful for -m32 mode). */
|
||||
{ shl16insli r11, r11, 0 ; shl16insli r10, r10, 0 }
|
||||
{ info 2+8 /* for backtracer: -> pc in lr, frame size 0 */ ; jr r10 }
|
||||
#else
|
||||
/* 'jal .' yields a PC-relative offset of zero so we can OR in the
|
||||
right offset at runtime. */
|
||||
{ move r10, lr ; jal . /* ffi_closure_tile */ }
|
||||
#endif
|
||||
|
||||
.size ffi_template_tramp_tile, . - ffi_template_tramp_tile
|
||||
Reference in New Issue
Block a user