Commit 49061652 authored by Leo P. Singer's avatar Leo P. Singer
Browse files

Rewrite loop so that compiler sees opportunity for vector FMA

Original: 277dfbb4a53eada9605dbdff034ffa63d00b6bc1
parent e354e515
......@@ -193,7 +193,7 @@ void bicubic_interp_free(bicubic_interp *interp)
double bicubic_interp_eval(const bicubic_interp *interp, double s, double t)
{
const double (*a)[4];
double b[4];
double b[4] __attribute__ ((aligned));
double is, it;
if (isnan(s) || isnan(t))
......@@ -202,6 +202,10 @@ double bicubic_interp_eval(const bicubic_interp *interp, double s, double t)
cubic_interp_index(interp->ft, interp->t0, interp->tlength, &t, &it);
a = interp->a[(int) (is * interp->slength + it)];
for (int i = 0; i < 4; i ++)
b[i] = cubic_eval(a[i], s);
b[i] = a[0][i] * s + a[1][i];
for (int i = 0; i < 4; i ++)
b[i] = b[i] * s + a[2][i];
for (int i = 0; i < 4; i ++)
b[i] = b[i] * s + a[3][i];
return cubic_eval(b, t);
}
......@@ -38,12 +38,12 @@
typedef struct {
double f, t0, length;
double a[][4];
double a[][4] __attribute__ ((aligned));
} cubic_interp;
typedef struct {
double fs, ft, s0, t0, slength, tlength;
double a[][4][4];
double a[][4][4] __attribute__ ((aligned));
} bicubic_interp;
cubic_interp *cubic_interp_init(
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment