sparc: Preprocess float point struct return
We can eliminate recursion and speed structure return by flattening a nested structure tree into a bitmask.
This commit is contained in:
@@ -42,41 +42,103 @@
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef SPARC64
|
#ifdef SPARC64
|
||||||
/* Perform machine dependent cif processing */
|
|
||||||
|
|
||||||
int FFI_HIDDEN
|
/* Flatten the contents of a structure to the parts that are passed in
|
||||||
ffi_v9_layout_struct (ffi_type *arg, int off, void *d, void *si, void *sf)
|
floating point registers. The return is a bit mask wherein bit N
|
||||||
|
set means bytes [4*n, 4*n+3] are passed in %fN.
|
||||||
|
|
||||||
|
We encode both the (running) size (maximum 32) and mask (maxumum 255)
|
||||||
|
into one integer. The size is placed in the low byte, so that align
|
||||||
|
and addition work correctly. The mask is placed in the second byte. */
|
||||||
|
|
||||||
|
static int
|
||||||
|
ffi_struct_float_mask (ffi_type *struct_type, int size_mask)
|
||||||
{
|
{
|
||||||
ffi_type **elts, *t;
|
ffi_type **elts, *t;
|
||||||
|
|
||||||
for (elts = arg->elements; (t = *elts) != NULL; elts++)
|
for (elts = struct_type->elements; (t = *elts) != NULL; elts++)
|
||||||
{
|
{
|
||||||
size_t z = t->size;
|
size_t z = t->size;
|
||||||
void *src = si;
|
int o, m;
|
||||||
|
|
||||||
off = ALIGN(off, t->alignment);
|
size_mask = ALIGN(size_mask, t->alignment);
|
||||||
switch (t->type)
|
switch (t->type)
|
||||||
{
|
{
|
||||||
case FFI_TYPE_STRUCT:
|
case FFI_TYPE_STRUCT:
|
||||||
off = ffi_v9_layout_struct(t, off, d, si, sf);
|
size_mask = ffi_struct_float_mask (t, size_mask);
|
||||||
off = ALIGN(off, FFI_SIZEOF_ARG);
|
size_mask = ALIGN(size_mask, FFI_SIZEOF_ARG);
|
||||||
continue;
|
continue;
|
||||||
case FFI_TYPE_FLOAT:
|
case FFI_TYPE_FLOAT:
|
||||||
case FFI_TYPE_DOUBLE:
|
case FFI_TYPE_DOUBLE:
|
||||||
case FFI_TYPE_LONGDOUBLE:
|
case FFI_TYPE_LONGDOUBLE:
|
||||||
/* Note that closures start with the argument offset,
|
m = (1 << (z / 4)) - 1; /* compute mask for type */
|
||||||
so that we know when to stop looking at fp regs. */
|
o = (size_mask >> 2) & 0x3f; /* extract word offset */
|
||||||
if (off < 128)
|
size_mask |= m << (o + 8); /* insert mask into place */
|
||||||
src = sf;
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
memcpy(d + off, src + off, z);
|
size_mask += z;
|
||||||
off += z;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return off;
|
size_mask = ALIGN(size_mask, struct_type->alignment);
|
||||||
|
FFI_ASSERT ((size_mask & 0xff) == struct_type->size);
|
||||||
|
|
||||||
|
return size_mask;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Merge floating point data into integer data. If the structure is
|
||||||
|
entirely floating point, simply return a pointer to the fp data. */
|
||||||
|
|
||||||
|
static void *
|
||||||
|
ffi_struct_float_merge (int size_mask, void *vi, void *vf)
|
||||||
|
{
|
||||||
|
int size = size_mask & 0xff;
|
||||||
|
int mask = size_mask >> 8;
|
||||||
|
int n = size >> 2;
|
||||||
|
|
||||||
|
if (mask == 0)
|
||||||
|
return vi;
|
||||||
|
else if (mask == (1 << n) - 1)
|
||||||
|
return vf;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
unsigned int *wi = vi, *wf = vf;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < n; ++i)
|
||||||
|
if ((mask >> i) & 1)
|
||||||
|
wi[i] = wf[i];
|
||||||
|
|
||||||
|
return vi;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Similar, but place the data into VD in the end. */
|
||||||
|
|
||||||
|
void FFI_HIDDEN
|
||||||
|
ffi_struct_float_copy (int size_mask, void *vd, void *vi, void *vf)
|
||||||
|
{
|
||||||
|
int size = size_mask & 0xff;
|
||||||
|
int mask = size_mask >> 8;
|
||||||
|
int n = size >> 2;
|
||||||
|
|
||||||
|
if (mask == 0)
|
||||||
|
;
|
||||||
|
else if (mask == (1 << n) - 1)
|
||||||
|
vi = vf;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
unsigned int *wd = vd, *wi = vi, *wf = vf;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < n; ++i)
|
||||||
|
wd[i] = ((mask >> i) & 1 ? wf : wi)[i];
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
memcpy (vd, vi, size);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Perform machine dependent cif processing */
|
||||||
|
|
||||||
ffi_status FFI_HIDDEN
|
ffi_status FFI_HIDDEN
|
||||||
ffi_prep_cif_machdep(ffi_cif *cif)
|
ffi_prep_cif_machdep(ffi_cif *cif)
|
||||||
{
|
{
|
||||||
@@ -108,7 +170,10 @@ ffi_prep_cif_machdep(ffi_cif *cif)
|
|||||||
bytes = 8;
|
bytes = 8;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
flags = SPARC_RET_STRUCT;
|
{
|
||||||
|
flags = ffi_struct_float_mask (rtype, 0) << SPARC_FLTMASK_SHIFT;
|
||||||
|
flags |= SPARC_RET_STRUCT;
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case FFI_TYPE_SINT8:
|
case FFI_TYPE_SINT8:
|
||||||
@@ -343,7 +408,7 @@ ffi_closure_sparc_inner_v9(ffi_closure *closure, void *rvalue,
|
|||||||
ffi_cif *cif;
|
ffi_cif *cif;
|
||||||
ffi_type **arg_types;
|
ffi_type **arg_types;
|
||||||
void **avalue;
|
void **avalue;
|
||||||
int i, argn, nargs, flags;
|
int i, argn, argx, nargs, flags;
|
||||||
|
|
||||||
cif = closure->cif;
|
cif = closure->cif;
|
||||||
arg_types = cif->arg_types;
|
arg_types = cif->arg_types;
|
||||||
@@ -364,12 +429,13 @@ ffi_closure_sparc_inner_v9(ffi_closure *closure, void *rvalue,
|
|||||||
argn = 0;
|
argn = 0;
|
||||||
|
|
||||||
/* Grab the addresses of the arguments from the stack frame. */
|
/* Grab the addresses of the arguments from the stack frame. */
|
||||||
for (i = 0; i < nargs; i++)
|
for (i = 0; i < nargs; i++, argn = argx)
|
||||||
{
|
{
|
||||||
ffi_type *ty = arg_types[i];
|
ffi_type *ty = arg_types[i];
|
||||||
void *a = &gpr[argn++];
|
void *a = &gpr[argn];
|
||||||
size_t z;
|
size_t z;
|
||||||
|
|
||||||
|
argx = argn + 1;
|
||||||
switch (ty->type)
|
switch (ty->type)
|
||||||
{
|
{
|
||||||
case FFI_TYPE_STRUCT:
|
case FFI_TYPE_STRUCT:
|
||||||
@@ -378,25 +444,31 @@ ffi_closure_sparc_inner_v9(ffi_closure *closure, void *rvalue,
|
|||||||
a = *(void **)a;
|
a = *(void **)a;
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (--argn < 16)
|
argx = argn + ALIGN (z, 8) / 8;
|
||||||
ffi_v9_layout_struct(arg_types[i], 8*argn, gpr, gpr, fpr);
|
if (argn < 16)
|
||||||
argn += ALIGN (z, 8) / 8;
|
{
|
||||||
|
int size_mask = ffi_struct_float_mask (ty, 0);
|
||||||
|
int argn_mask = (0xffff00 >> argn) & 0xff00;
|
||||||
|
|
||||||
|
/* Eliminate fp registers off the end. */
|
||||||
|
size_mask = (size_mask & 0xff) | (size_mask & argn_mask);
|
||||||
|
a = ffi_struct_float_merge (size_mask, gpr+argn, fpr+argn);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case FFI_TYPE_LONGDOUBLE:
|
case FFI_TYPE_LONGDOUBLE:
|
||||||
if (--argn & 1)
|
argn = ALIGN (argn, 2);
|
||||||
argn++;
|
|
||||||
a = (argn < 16 ? fpr : gpr) + argn;
|
a = (argn < 16 ? fpr : gpr) + argn;
|
||||||
argn += 2;
|
argx = argn + 2;
|
||||||
break;
|
break;
|
||||||
case FFI_TYPE_DOUBLE:
|
case FFI_TYPE_DOUBLE:
|
||||||
if (argn <= 16)
|
if (argn <= 16)
|
||||||
a = fpr + argn - 1;
|
a = fpr + argn;
|
||||||
break;
|
break;
|
||||||
case FFI_TYPE_FLOAT:
|
case FFI_TYPE_FLOAT:
|
||||||
if (argn <= 16)
|
if (argn <= 16)
|
||||||
a = fpr + argn - 1;
|
a = fpr + argn;
|
||||||
a += 4;
|
a += 4;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
|||||||
@@ -16,3 +16,5 @@
|
|||||||
#define SPARC_FLAG_RET_MASK 15
|
#define SPARC_FLAG_RET_MASK 15
|
||||||
#define SPARC_FLAG_RET_IN_MEM 32
|
#define SPARC_FLAG_RET_IN_MEM 32
|
||||||
#define SPARC_FLAG_FP_ARGS 64
|
#define SPARC_FLAG_FP_ARGS 64
|
||||||
|
|
||||||
|
#define SPARC_FLTMASK_SHIFT 8
|
||||||
|
|||||||
@@ -177,12 +177,11 @@ E 15
|
|||||||
std %f6, [%l2+56]
|
std %f6, [%l2+56]
|
||||||
|
|
||||||
! Copy the structure into place.
|
! Copy the structure into place.
|
||||||
ldx [%i0+16], %o0 ! load rtype from cif
|
srl %l0, SPARC_FLTMASK_SHIFT, %o0 ! load size_mask
|
||||||
mov 0, %o1 ! load off
|
mov %i2, %o1 ! load dst
|
||||||
mov %i2, %o2 ! load dst
|
mov %l2, %o2 ! load src_gp
|
||||||
mov %l2, %o3 ! load src_int
|
call C(ffi_struct_float_copy)
|
||||||
call C(ffi_v9_layout_struct)
|
add %l2, 32, %o3 ! load src_fp
|
||||||
add %l2, 32, %o4 ! load src_fp
|
|
||||||
|
|
||||||
return %i7+8
|
return %i7+8
|
||||||
nop
|
nop
|
||||||
|
|||||||
Reference in New Issue
Block a user