Commit ad88e6d9 authored by Daniel Brown's avatar Daniel Brown
Browse files

Adding superlu multithreaded sparse solver for testing

parent 261ad750
......@@ -2,11 +2,15 @@
#
# Library Makefile for Finesse
.PHONY: clean NICSLU sparse formulc libkatnet libMNet KLUsparse Cuba-3.0
.PHONY: clean SuperLU_MT_2.1 NICSLU sparse formulc libkatnet libMNet KLUsparse Cuba-3.0
default : NICSLU sparse formulc libkatnet libMNet KLUsparse Cuba-3.0
default : SuperLU_MT_2.1 NICSLU sparse formulc libkatnet libMNet KLUsparse Cuba-3.0
# build the sparse package
SuperLU_MT_2.1:
$(MAKE) --directory=$@/CBLAS ARCH=$(ARCH)
$(MAKE) --directory=$@ ARCH=$(ARCH)
NICSLU:
$(MAKE) --directory=$@/lib ARCH=$(ARCH)
......@@ -33,7 +37,7 @@ Cuba-3.0:
cp ./Cuba-3.0/cuba.h ../src/
# cleanup targets
clean:
for d in sparse formulc libkatnet libMNet KLUsparse Cuba-3.0; \
for d in sparse formulc libkatnet libMNet KLUsparse Cuba-3.0 SuperLU_MT_2.1 NICSLU; \
do \
$(MAKE) --directory=$$d clean; \
done
......
include ../make.inc
HEADER = ../SRC
#######################################################################
# This is the makefile to create a library for C-BLAS.
# The files are organized as follows:
#
# SBLAS1 -- Single precision real BLAS routines
# CBLAS1 -- Single precision complex BLAS routines
# DBLAS1 -- Double precision real BLAS routines
# ZBLAS1 -- Double precision complex BLAS routines
#
# CB1AUX -- Real BLAS routines called by complex routines
# ZB1AUX -- D.P. real BLAS routines called by d.p. complex
# routines
#
# ALLBLAS -- Auxiliary routines for Level 2 and 3 BLAS
#
# SBLAS2 -- Single precision real BLAS2 routines
# CBLAS2 -- Single precision complex BLAS2 routines
# DBLAS2 -- Double precision real BLAS2 routines
# ZBLAS2 -- Double precision complex BLAS2 routines
#
# SBLAS3 -- Single precision real BLAS3 routines
# CBLAS3 -- Single precision complex BLAS3 routines
# DBLAS3 -- Double precision real BLAS3 routines
# ZBLAS3 -- Double precision complex BLAS3 routines
#
# The library can be set up to include routines for any combination
# of the four precisions. To create or add to the library, enter make
# followed by one or more of the precisions desired. Some examples:
# make single
# make single complex
# make single double complex complex16
# Alternatively, the command
# make
# without any arguments creates a library of all four precisions.
# The library is called
# blas.a
# and is created at the next higher directory level.
#
# To remove the object files after the library is created, enter
# make clean
#
#######################################################################
SBLAS1 = isamax.o sasum.o saxpy.o scopy.o sdot.o snrm2.o \
srot.o sscal.o
SBLAS2 = sgemv.o ssymv.o strsv.o sger.o ssyr2.o
DBLAS1 = idamax.o dasum.o daxpy.o dcopy.o ddot.o dnrm2.o \
drot.o dscal.o
DBLAS2 = dgemv.o dsymv.o dtrsv.o dger.o dsyr2.o
CBLAS1 = icamax.o scasum.o caxpy.o ccopy.o scnrm2.o \
cscal.o
CBLAS2 = cgemv.o chemv.o ctrsv.o cgerc.o cher2.o
ZBLAS1 = izamax.o dzasum.o zaxpy.o zcopy.o dznrm2.o \
zscal.o dcabs1.o
ZBLAS2 = zgemv.o zhemv.o ztrsv.o zgerc.o zher2.o
all: single double complex complex16
single: $(SBLAS1) $(SBLAS2) $(SBLAS3)
$(ARCHIVER) $(ARCHIVERFLAGS) $(BLASLIB) $(SBLAS1) $(ALLBLAS) \
$(SBLAS2) $(SBLAS3)
$(RANLIB) $(BLASLIB)
double: $(DBLAS1) $(DBLAS2) $(DBLAS3)
$(ARCHIVER) $(ARCHIVERFLAGS) $(BLASLIB) $(DBLAS1) $(ALLBLAS) \
$(DBLAS2) $(DBLAS3)
$(RANLIB) $(BLASLIB)
complex: $(CBLAS1) $(CBLAS2) $(CBLAS3)
$(ARCHIVER) $(ARCHIVERFLAGS) $(BLASLIB) $(CBLAS1) $(ALLBLAS) \
$(CBLAS2) $(CBLAS3)
$(RANLIB) $(BLASLIB)
complex16: $(ZBLAS1) $(ZBLAS2) $(ZBLAS3)
$(ARCHIVER) $(ARCHIVERFLAGS) $(BLASLIB) $(ZBLAS1) $(ALLBLAS) \
$(ZBLAS2) $(ZBLAS3)
$(RANLIB) $(BLASLIB)
.c.o:
$(CC) $(CFLAGS) $(CDEFS) -I$(HEADER) -c $< $(VERBOSE)
clean:
rm -f *.o
/* -- translated by f2c (version 19940927).
You must link the resulting object file with the libraries:
-lf2c -lm (in that order)
*/
#include "f2c.h"
/* Subroutine */ int caxpy_(integer *n, complex *ca, complex *cx, integer *
incx, complex *cy, integer *incy)
{
/* System generated locals */
integer i__1, i__2, i__3, i__4;
real r__1, r__2;
complex q__1, q__2;
/* Builtin functions */
double r_imag(complex *);
/* Local variables */
static integer i, ix, iy;
/* constant times a vector plus a vector.
jack dongarra, linpack, 3/11/78.
modified 12/3/93, array(1) declarations changed to array(*)
Parameter adjustments
Function Body */
#define CY(I) cy[(I)-1]
#define CX(I) cx[(I)-1]
if (*n <= 0) {
return 0;
}
if ((r__1 = ca->r, dabs(r__1)) + (r__2 = r_imag(ca), dabs(r__2)) == 0.f) {
return 0;
}
if (*incx == 1 && *incy == 1) {
goto L20;
}
/* code for unequal increments or equal increments
not equal to 1 */
ix = 1;
iy = 1;
if (*incx < 0) {
ix = (-(*n) + 1) * *incx + 1;
}
if (*incy < 0) {
iy = (-(*n) + 1) * *incy + 1;
}
i__1 = *n;
for (i = 1; i <= *n; ++i) {
i__2 = iy;
i__3 = iy;
i__4 = ix;
q__2.r = ca->r * CX(ix).r - ca->i * CX(ix).i, q__2.i = ca->r * CX(
ix).i + ca->i * CX(ix).r;
q__1.r = CY(iy).r + q__2.r, q__1.i = CY(iy).i + q__2.i;
CY(iy).r = q__1.r, CY(iy).i = q__1.i;
ix += *incx;
iy += *incy;
/* L10: */
}
return 0;
/* code for both increments equal to 1 */
L20:
i__1 = *n;
for (i = 1; i <= *n; ++i) {
i__2 = i;
i__3 = i;
i__4 = i;
q__2.r = ca->r * CX(i).r - ca->i * CX(i).i, q__2.i = ca->r * CX(
i).i + ca->i * CX(i).r;
q__1.r = CY(i).r + q__2.r, q__1.i = CY(i).i + q__2.i;
CY(i).r = q__1.r, CY(i).i = q__1.i;
/* L30: */
}
return 0;
} /* caxpy_ */
/* -- translated by f2c (version 19940927).
You must link the resulting object file with the libraries:
-lf2c -lm (in that order)
*/
#include "f2c.h"
/* Subroutine */ int ccopy_(integer *n, complex *cx, integer *incx, complex *
cy, integer *incy)
{
/* System generated locals */
integer i__1, i__2, i__3;
/* Local variables */
static integer i, ix, iy;
/* copies a vector, x, to a vector, y.
jack dongarra, linpack, 3/11/78.
modified 12/3/93, array(1) declarations changed to array(*)
Parameter adjustments
Function Body */
#define CY(I) cy[(I)-1]
#define CX(I) cx[(I)-1]
if (*n <= 0) {
return 0;
}
if (*incx == 1 && *incy == 1) {
goto L20;
}
/* code for unequal increments or equal increments
not equal to 1 */
ix = 1;
iy = 1;
if (*incx < 0) {
ix = (-(*n) + 1) * *incx + 1;
}
if (*incy < 0) {
iy = (-(*n) + 1) * *incy + 1;
}
i__1 = *n;
for (i = 1; i <= *n; ++i) {
i__2 = iy;
i__3 = ix;
CY(iy).r = CX(ix).r, CY(iy).i = CX(ix).i;
ix += *incx;
iy += *incy;
/* L10: */
}
return 0;
/* code for both increments equal to 1 */
L20:
i__1 = *n;
for (i = 1; i <= *n; ++i) {
i__2 = i;
i__3 = i;
CY(i).r = CX(i).r, CY(i).i = CX(i).i;
/* L30: */
}
return 0;
} /* ccopy_ */
/* -- translated by f2c (version 19940927).
You must link the resulting object file with the libraries:
-lf2c -lm (in that order)
*/
#include "f2c.h"
/* Complex */ VOID cdotc_(complex * ret_val, integer *n, complex *cx, integer
*incx, complex *cy, integer *incy)
{
/* System generated locals */
integer i__1, i__2;
complex q__1, q__2, q__3;
/* Builtin functions */
void r_cnjg(complex *, complex *);
/* Local variables */
static integer i;
static complex ctemp;
static integer ix, iy;
/* forms the dot product of two vectors, conjugating the first
vector.
jack dongarra, linpack, 3/11/78.
modified 12/3/93, array(1) declarations changed to array(*)
Parameter adjustments */
--cy;
--cx;
/* Function Body */
ctemp.r = 0.f, ctemp.i = 0.f;
ret_val->r = 0.f, ret_val->i = 0.f;
if (*n <= 0) {
return ;
}
if (*incx == 1 && *incy == 1) {
goto L20;
}
/* code for unequal increments or equal increments
not equal to 1 */
ix = 1;
iy = 1;
if (*incx < 0) {
ix = (-(*n) + 1) * *incx + 1;
}
if (*incy < 0) {
iy = (-(*n) + 1) * *incy + 1;
}
i__1 = *n;
for (i = 1; i <= *n; ++i) {
r_cnjg(&q__3, &cx[ix]);
i__2 = iy;
q__2.r = q__3.r * cy[iy].r - q__3.i * cy[iy].i, q__2.i = q__3.r *
cy[iy].i + q__3.i * cy[iy].r;
q__1.r = ctemp.r + q__2.r, q__1.i = ctemp.i + q__2.i;
ctemp.r = q__1.r, ctemp.i = q__1.i;
ix += *incx;
iy += *incy;
/* L10: */
}
ret_val->r = ctemp.r, ret_val->i = ctemp.i;
return ;
/* code for both increments equal to 1 */
L20:
i__1 = *n;
for (i = 1; i <= *n; ++i) {
r_cnjg(&q__3, &cx[i]);
i__2 = i;
q__2.r = q__3.r * cy[i].r - q__3.i * cy[i].i, q__2.i = q__3.r *
cy[i].i + q__3.i * cy[i].r;
q__1.r = ctemp.r + q__2.r, q__1.i = ctemp.i + q__2.i;
ctemp.r = q__1.r, ctemp.i = q__1.i;
/* L30: */
}
ret_val->r = ctemp.r, ret_val->i = ctemp.i;
return ;
} /* cdotc_ */
/* -- translated by f2c (version 19940927).
You must link the resulting object file with the libraries:
-lf2c -lm (in that order)
*/
#include "f2c.h"
/* Subroutine */ int cgemv_(char *trans, integer *m, integer *n, complex *
alpha, complex *a, integer *lda, complex *x, integer *incx, complex *
beta, complex *y, integer *incy)
{
/* System generated locals */
integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
complex q__1, q__2, q__3;
/* Builtin functions */
void r_cnjg(complex *, complex *);
/* Local variables */
static integer info;
static complex temp;
static integer lenx, leny, i, j;
extern logical lsame_(char *, char *);
static integer ix, iy, jx, jy, kx, ky;
extern /* Subroutine */ int xerbla_(char *, integer *);
static logical noconj;
/* Purpose
=======
CGEMV performs one of the matrix-vector operations
y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, or
y := alpha*conjg( A' )*x + beta*y,
where alpha and beta are scalars, x and y are vectors and A is an
m by n matrix.
Parameters
==========
TRANS - CHARACTER*1.
On entry, TRANS specifies the operation to be performed as
follows:
TRANS = 'N' or 'n' y := alpha*A*x + beta*y.
TRANS = 'T' or 't' y := alpha*A'*x + beta*y.
TRANS = 'C' or 'c' y := alpha*conjg( A' )*x + beta*y.
Unchanged on exit.
M - INTEGER.
On entry, M specifies the number of rows of the matrix A.
M must be at least zero.
Unchanged on exit.
N - INTEGER.
On entry, N specifies the number of columns of the matrix A.
N must be at least zero.
Unchanged on exit.
ALPHA - COMPLEX .
On entry, ALPHA specifies the scalar alpha.
Unchanged on exit.
A - COMPLEX array of DIMENSION ( LDA, n ).
Before entry, the leading m by n part of the array A must
contain the matrix of coefficients.
Unchanged on exit.
LDA - INTEGER.
On entry, LDA specifies the first dimension of A as declared
in the calling (sub) program. LDA must be at least
max( 1, m ).
Unchanged on exit.
X - COMPLEX array of DIMENSION at least
( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
and at least
( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
Before entry, the incremented array X must contain the
vector x.
Unchanged on exit.
INCX - INTEGER.
On entry, INCX specifies the increment for the elements of
X. INCX must not be zero.
Unchanged on exit.
BETA - COMPLEX .
On entry, BETA specifies the scalar beta. When BETA is
supplied as zero then Y need not be set on input.
Unchanged on exit.
Y - COMPLEX array of DIMENSION at least
( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
and at least
( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
Before entry with BETA non-zero, the incremented array Y
must contain the vector y. On exit, Y is overwritten by the
updated vector y.
INCY - INTEGER.
On entry, INCY specifies the increment for the elements of
Y. INCY must not be zero.
Unchanged on exit.
Level 2 Blas routine.
-- Written on 22-October-1986.
Jack Dongarra, Argonne National Lab.
Jeremy Du Croz, Nag Central Office.
Sven Hammarling, Nag Central Office.
Richard Hanson, Sandia National Labs.
Test the input parameters.
Parameter adjustments
Function Body */
#define X(I) x[(I)-1]
#define Y(I) y[(I)-1]
#define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)]
info = 0;
if (! lsame_(trans, "N") && ! lsame_(trans, "T") && !
lsame_(trans, "C")) {
info = 1;
} else if (*m < 0) {
info = 2;
} else if (*n < 0) {
info = 3;
} else if (*lda < max(1,*m)) {
info = 6;
} else if (*incx == 0) {
info = 8;
} else if (*incy == 0) {
info = 11;
}
if (info != 0) {
xerbla_("CGEMV ", &info);
return 0;
}
/* Quick return if possible. */
if (*m == 0 || *n == 0 || alpha->r == 0.f && alpha->i == 0.f && (beta->r
== 1.f && beta->i == 0.f)) {
return 0;
}
noconj = lsame_(trans, "T");
/* Set LENX and LENY, the lengths of the vectors x and y, and set
up the start points in X and Y. */
if (lsame_(trans, "N")) {
lenx = *n;
leny = *m;
} else {
lenx = *m;
leny = *n;
}
if (*incx > 0) {
kx = 1;
} else {
kx = 1 - (lenx - 1) * *incx;
}
if (*incy > 0) {
ky = 1;
} else {
ky = 1 - (leny - 1) * *incy;
}
/* Start the operations. In this version the elements of A are
accessed sequentially with one pass through A.
First form y := beta*y. */
if (beta->r != 1.f || beta->i != 0.f) {
if (*incy == 1) {
if (beta->r == 0.f && beta->i == 0.f) {
i__1 = leny;
for (i = 1; i <= leny; ++i) {
i__2 = i;
Y(i).r = 0.f, Y(i).i = 0.f;
/* L10: */
}
} else {
i__1 = leny;
for (i = 1; i <= leny; ++i) {
i__2 = i;
i__3 = i;
q__1.r = beta->r * Y(i).r - beta->i * Y(i).i,
q__1.i = beta->r * Y(i).i + beta->i * Y(i)
.r;
Y(i).r = q__1.r, Y(i).i = q__1.i;
/* L20: */
}
}
} else {
iy = ky;
if (beta->r == 0.f && beta->i == 0.f) {
i__1 = leny;
for (i = 1; i <= leny; ++i) {
i__2 = iy;
Y(iy).r = 0.f, Y(iy).i = 0.f;
iy += *incy;
/* L30: */
}
} else {
i__1 = leny;
for (i = 1; i <= leny; ++i) {
i__2 = iy;
i__3 = iy;
q__1.r = beta->r * Y(iy).r - beta->i * Y(iy).i,
q__1.i = beta->r * Y(iy).i + beta->i * Y(iy)
.r;
Y(iy).r = q__1.r, Y(iy).i = q__1.i;
iy += *incy;
/* L40: */
}
}
}
}
if (alpha->r == 0.f && alpha->i == 0.f) {
return 0;
}
if (lsame_(trans, "N")) {
/* Form y := alpha*A*x + y. */