sparc: Preprocess float point struct return

We can eliminate recursion and speed structure return
by flattening a nested structure tree into a bitmask.
This commit is contained in:
Richard Henderson
2014-10-25 14:52:45 -07:00
parent 2b27890ba7
commit 0686c2e71c
3 changed files with 106 additions and 33 deletions

View File

@@ -42,41 +42,103 @@
#endif #endif
#ifdef SPARC64 #ifdef SPARC64
/* Perform machine dependent cif processing */
int FFI_HIDDEN /* Flatten the contents of a structure to the parts that are passed in
ffi_v9_layout_struct (ffi_type *arg, int off, void *d, void *si, void *sf) floating point registers. The return is a bit mask wherein bit N
set means bytes [4*n, 4*n+3] are passed in %fN.
We encode both the (running) size (maximum 32) and mask (maxumum 255)
into one integer. The size is placed in the low byte, so that align
and addition work correctly. The mask is placed in the second byte. */
static int
ffi_struct_float_mask (ffi_type *struct_type, int size_mask)
{ {
ffi_type **elts, *t; ffi_type **elts, *t;
for (elts = arg->elements; (t = *elts) != NULL; elts++) for (elts = struct_type->elements; (t = *elts) != NULL; elts++)
{ {
size_t z = t->size; size_t z = t->size;
void *src = si; int o, m;
off = ALIGN(off, t->alignment); size_mask = ALIGN(size_mask, t->alignment);
switch (t->type) switch (t->type)
{ {
case FFI_TYPE_STRUCT: case FFI_TYPE_STRUCT:
off = ffi_v9_layout_struct(t, off, d, si, sf); size_mask = ffi_struct_float_mask (t, size_mask);
off = ALIGN(off, FFI_SIZEOF_ARG); size_mask = ALIGN(size_mask, FFI_SIZEOF_ARG);
continue; continue;
case FFI_TYPE_FLOAT: case FFI_TYPE_FLOAT:
case FFI_TYPE_DOUBLE: case FFI_TYPE_DOUBLE:
case FFI_TYPE_LONGDOUBLE: case FFI_TYPE_LONGDOUBLE:
/* Note that closures start with the argument offset, m = (1 << (z / 4)) - 1; /* compute mask for type */
so that we know when to stop looking at fp regs. */ o = (size_mask >> 2) & 0x3f; /* extract word offset */
if (off < 128) size_mask |= m << (o + 8); /* insert mask into place */
src = sf;
break; break;
} }
memcpy(d + off, src + off, z); size_mask += z;
off += z;
} }
return off; size_mask = ALIGN(size_mask, struct_type->alignment);
FFI_ASSERT ((size_mask & 0xff) == struct_type->size);
return size_mask;
} }
/* Merge floating point data into integer data. If the structure is
entirely floating point, simply return a pointer to the fp data. */
static void *
ffi_struct_float_merge (int size_mask, void *vi, void *vf)
{
int size = size_mask & 0xff;
int mask = size_mask >> 8;
int n = size >> 2;
if (mask == 0)
return vi;
else if (mask == (1 << n) - 1)
return vf;
else
{
unsigned int *wi = vi, *wf = vf;
int i;
for (i = 0; i < n; ++i)
if ((mask >> i) & 1)
wi[i] = wf[i];
return vi;
}
}
/* Similar, but place the data into VD in the end. */
void FFI_HIDDEN
ffi_struct_float_copy (int size_mask, void *vd, void *vi, void *vf)
{
int size = size_mask & 0xff;
int mask = size_mask >> 8;
int n = size >> 2;
if (mask == 0)
;
else if (mask == (1 << n) - 1)
vi = vf;
else
{
unsigned int *wd = vd, *wi = vi, *wf = vf;
int i;
for (i = 0; i < n; ++i)
wd[i] = ((mask >> i) & 1 ? wf : wi)[i];
return;
}
memcpy (vd, vi, size);
}
/* Perform machine dependent cif processing */
ffi_status FFI_HIDDEN ffi_status FFI_HIDDEN
ffi_prep_cif_machdep(ffi_cif *cif) ffi_prep_cif_machdep(ffi_cif *cif)
{ {
@@ -108,7 +170,10 @@ ffi_prep_cif_machdep(ffi_cif *cif)
bytes = 8; bytes = 8;
} }
else else
flags = SPARC_RET_STRUCT; {
flags = ffi_struct_float_mask (rtype, 0) << SPARC_FLTMASK_SHIFT;
flags |= SPARC_RET_STRUCT;
}
break; break;
case FFI_TYPE_SINT8: case FFI_TYPE_SINT8:
@@ -343,7 +408,7 @@ ffi_closure_sparc_inner_v9(ffi_closure *closure, void *rvalue,
ffi_cif *cif; ffi_cif *cif;
ffi_type **arg_types; ffi_type **arg_types;
void **avalue; void **avalue;
int i, argn, nargs, flags; int i, argn, argx, nargs, flags;
cif = closure->cif; cif = closure->cif;
arg_types = cif->arg_types; arg_types = cif->arg_types;
@@ -364,12 +429,13 @@ ffi_closure_sparc_inner_v9(ffi_closure *closure, void *rvalue,
argn = 0; argn = 0;
/* Grab the addresses of the arguments from the stack frame. */ /* Grab the addresses of the arguments from the stack frame. */
for (i = 0; i < nargs; i++) for (i = 0; i < nargs; i++, argn = argx)
{ {
ffi_type *ty = arg_types[i]; ffi_type *ty = arg_types[i];
void *a = &gpr[argn++]; void *a = &gpr[argn];
size_t z; size_t z;
argx = argn + 1;
switch (ty->type) switch (ty->type)
{ {
case FFI_TYPE_STRUCT: case FFI_TYPE_STRUCT:
@@ -378,25 +444,31 @@ ffi_closure_sparc_inner_v9(ffi_closure *closure, void *rvalue,
a = *(void **)a; a = *(void **)a;
else else
{ {
if (--argn < 16) argx = argn + ALIGN (z, 8) / 8;
ffi_v9_layout_struct(arg_types[i], 8*argn, gpr, gpr, fpr); if (argn < 16)
argn += ALIGN (z, 8) / 8; {
int size_mask = ffi_struct_float_mask (ty, 0);
int argn_mask = (0xffff00 >> argn) & 0xff00;
/* Eliminate fp registers off the end. */
size_mask = (size_mask & 0xff) | (size_mask & argn_mask);
a = ffi_struct_float_merge (size_mask, gpr+argn, fpr+argn);
}
} }
break; break;
case FFI_TYPE_LONGDOUBLE: case FFI_TYPE_LONGDOUBLE:
if (--argn & 1) argn = ALIGN (argn, 2);
argn++;
a = (argn < 16 ? fpr : gpr) + argn; a = (argn < 16 ? fpr : gpr) + argn;
argn += 2; argx = argn + 2;
break; break;
case FFI_TYPE_DOUBLE: case FFI_TYPE_DOUBLE:
if (argn <= 16) if (argn <= 16)
a = fpr + argn - 1; a = fpr + argn;
break; break;
case FFI_TYPE_FLOAT: case FFI_TYPE_FLOAT:
if (argn <= 16) if (argn <= 16)
a = fpr + argn - 1; a = fpr + argn;
a += 4; a += 4;
break; break;

View File

@@ -16,3 +16,5 @@
#define SPARC_FLAG_RET_MASK 15 #define SPARC_FLAG_RET_MASK 15
#define SPARC_FLAG_RET_IN_MEM 32 #define SPARC_FLAG_RET_IN_MEM 32
#define SPARC_FLAG_FP_ARGS 64 #define SPARC_FLAG_FP_ARGS 64
#define SPARC_FLTMASK_SHIFT 8

View File

@@ -177,12 +177,11 @@ E 15
std %f6, [%l2+56] std %f6, [%l2+56]
! Copy the structure into place. ! Copy the structure into place.
ldx [%i0+16], %o0 ! load rtype from cif srl %l0, SPARC_FLTMASK_SHIFT, %o0 ! load size_mask
mov 0, %o1 ! load off mov %i2, %o1 ! load dst
mov %i2, %o2 ! load dst mov %l2, %o2 ! load src_gp
mov %l2, %o3 ! load src_int call C(ffi_struct_float_copy)
call C(ffi_v9_layout_struct) add %l2, 32, %o3 ! load src_fp
add %l2, 32, %o4 ! load src_fp
return %i7+8 return %i7+8
nop nop