Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
7
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Open sidebar
finesse
finesse
Commits
09697f6f
Commit
09697f6f
authored
Dec 03, 2019
by
Daniel Brown
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'develop' of git.ligo.org:finesse/finesse into develop
parents
86c0c3c4
4c9bce39
Changes
13
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
495 additions
and
1433 deletions
+495
-1433
finesse.sh
finesse.sh
+424
-477
lib/SuperLU_MT_2.1/EXAMPLE/pdlinsolx.c.bak
lib/SuperLU_MT_2.1/EXAMPLE/pdlinsolx.c.bak
+0
-248
lib/SuperLU_MT_2.1/EXAMPLE/pdspmd.c.bak
lib/SuperLU_MT_2.1/EXAMPLE/pdspmd.c.bak
+0
-417
lib/SuperLU_MT_2.1/SRC/sp_coletree.c.orig
lib/SuperLU_MT_2.1/SRC/sp_coletree.c.orig
+0
-266
src/Makefile
src/Makefile
+23
-6
src/fmemopen.c
src/fmemopen.c
+1
-0
src/kat.h
src/kat.h
+4
-1
src/kat_aa.c
src/kat_aa.c
+3
-3
src/kat_aux.c
src/kat_aux.c
+17
-7
src/kat_aux.h
src/kat_aux.h
+5
-2
src/kat_calc.c
src/kat_calc.c
+15
-4
src/kat_io.c
src/kat_io.c
+2
-2
src/kat_server.c
src/kat_server.c
+1
-0
No files found.
finesse.sh
View file @
09697f6f
This diff is collapsed.
Click to expand it.
lib/SuperLU_MT_2.1/EXAMPLE/pdlinsolx.c.bak
deleted
100755 → 0
View file @
86c0c3c4
/*
* -- SuperLU MT routine (version 2.0) --
* Lawrence Berkeley National Lab, Univ. of California Berkeley,
* and Xerox Palo Alto Research Center.
* September 10, 2007
*
*/
#include "pdsp_defs.h"
main(int argc, char *argv[])
{
SuperMatrix A, L, U;
SuperMatrix B, X;
NCformat *Astore;
SCPformat *Lstore;
NCPformat *Ustore;
int nprocs;
fact_t fact;
trans_t trans;
yes_no_t refact, usepr;
equed_t equed;
double *a;
int *asub, *xa;
int *perm_c; /* column permutation vector */
int *perm_r; /* row permutations from partial pivoting */
void *work;
superlumt_options_t superlumt_options;
int info, lwork, nrhs, ldx, panel_size, relax;
int m, n, nnz, permc_spec;
int i, firstfact;
double *rhsb, *rhsx, *xact;
double *R, *C;
double *ferr, *berr;
double u, drop_tol, rpg, rcond;
superlu_memusage_t superlu_memusage;
void parse_command_line();
/* Default parameters to control factorization. */
nprocs = 1;
fact = EQUILIBRATE;
trans = NOTRANS;
equed = NOEQUIL;
refact= NO;
panel_size = sp_ienv(1);
relax = sp_ienv(2);
u = 1.0;
usepr = NO;
drop_tol = 0.0;
lwork = 0;
nrhs = 1;
/* Command line options to modify default behavior. */
parse_command_line(argc, argv, &nprocs, &lwork, &panel_size, &relax,
&u, &fact, &trans, &refact, &equed);
if ( lwork > 0 ) {
work = SUPERLU_MALLOC(lwork);
printf("Use work space of size LWORK = %d bytes\n", lwork);
if ( !work ) {
SUPERLU_ABORT("DLINSOLX: cannot allocate work[]");
}
}
#if ( PRNTlevel==1 )
cpp_defs();
#endif
#define HB
#if defined( DEN )
m = n;
nnz = n * n;
dband(n, n, nnz, &a, &asub, &xa);
#elif defined( BAND )
m = n;
nnz = (2*b+1) * n;
dband(n, b, nnz, &a, &asub, &xa);
#elif defined( BD )
nb = 5;
bs = 200;
m = n = bs * nb;
nnz = bs * bs * nb;
dblockdiag(nb, bs, nnz, &a, &asub, &xa);
#elif defined( HB )
dreadhb(&m, &n, &nnz, &a, &asub, &xa);
#else
dreadmt(&m, &n, &nnz, &a, &asub, &xa);
#endif
firstfact = (fact == FACTORED || refact == YES);
dCreate_CompCol_Matrix(&A, m, n, nnz, a, asub, xa, SLU_NC, SLU_D, SLU_GE);
Astore = A.Store;
printf("Dimension %dx%d; # nonzeros %d\n", A.nrow, A.ncol, Astore->nnz);
if (!(rhsb = doubleMalloc(m * nrhs))) SUPERLU_ABORT("Malloc fails for rhsb[].");
if (!(rhsx = doubleMalloc(m * nrhs))) SUPERLU_ABORT("Malloc fails for rhsx[].");
dCreate_Dense_Matrix(&B, m, nrhs, rhsb, m, SLU_DN, SLU_D, SLU_GE);
dCreate_Dense_Matrix(&X, m, nrhs, rhsx, m, SLU_DN, SLU_D, SLU_GE);
xact = doubleMalloc(n * nrhs);
ldx = n;
dGenXtrue(n, nrhs, xact, ldx);
dFillRHS(trans, nrhs, xact, ldx, &A, &B);
if (!(perm_r = intMalloc(m))) SUPERLU_ABORT("Malloc fails for perm_r[].");
if (!(perm_c = intMalloc(n))) SUPERLU_ABORT("Malloc fails for perm_c[].");
if (!(R = (double *) SUPERLU_MALLOC(A.nrow * sizeof(double))))
SUPERLU_ABORT("SUPERLU_MALLOC fails for R[].");
if ( !(C = (double *) SUPERLU_MALLOC(A.ncol * sizeof(double))) )
SUPERLU_ABORT("SUPERLU_MALLOC fails for C[].");
if ( !(ferr = (double *) SUPERLU_MALLOC(nrhs * sizeof(double))) )
SUPERLU_ABORT("SUPERLU_MALLOC fails for ferr[].");
if ( !(berr = (double *) SUPERLU_MALLOC(nrhs * sizeof(double))) )
SUPERLU_ABORT("SUPERLU_MALLOC fails for berr[].");
/*
* Get column permutation vector perm_c[], according to permc_spec:
* permc_spec = 0: natural ordering
* permc_spec = 1: minimum degree ordering on structure of A'*A
* permc_spec = 2: minimum degree ordering on structure of A'+A
* permc_spec = 3: approximate minimum degree for unsymmetric matrices
*/
permc_spec = 1;
get_perm_c(permc_spec, &A, perm_c);
superlumt_options.nprocs = nprocs;
superlumt_options.fact = fact;
superlumt_options.trans = trans;
superlumt_options.refact = refact;
superlumt_options.panel_size = panel_size;
superlumt_options.relax = relax;
superlumt_options.diag_pivot_thresh = u;
superlumt_options.usepr = usepr;
superlumt_options.drop_tol = drop_tol;
superlumt_options.SymmetricMode = NO;
superlumt_options.PrintStat = NO;
superlumt_options.perm_c = perm_c;
superlumt_options.perm_r = perm_r;
superlumt_options.work = work;
superlumt_options.lwork = lwork;
/*
* Solve the system and compute the condition number
* and error bounds using pdgssvx.
*/
pdgssvx(nprocs, &superlumt_options, &A, perm_c, perm_r,
&equed, R, C, &L, &U, &B, &X, &rpg, &rcond,
ferr, berr, &superlu_memusage, &info);
printf("pdgssvx(): info %d\n", info);
if ( info == 0 || info == n+1 ) {
printf("Recip. pivot growth = %e\n", rpg);
printf("Recip. condition number = %e\n", rcond);
printf("%8s%16s%16s\n", "rhs", "FERR", "BERR");
for (i = 0; i < nrhs; ++i) {
printf("%8d%16e%16e\n", i+1, ferr[i], berr[i]);
}
Lstore = (SCPformat *) L.Store;
Ustore = (NCPformat *) U.Store;
printf("No of nonzeros in factor L = %d\n", Lstore->nnz);
printf("No of nonzeros in factor U = %d\n", Ustore->nnz);
printf("No of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz - n);
printf("L\\U MB %.3f\ttotal MB needed %.3f\texpansions %d\n",
superlu_memusage.for_lu/1e6, superlu_memusage.total_needed/1e6,
superlu_memusage.expansions);
fflush(stdout);
} else if ( info > 0 && lwork == -1 ) {
printf("** Estimated memory: %d bytes\n", info - n);
}
SUPERLU_FREE (rhsb);
SUPERLU_FREE (rhsx);
SUPERLU_FREE (xact);
SUPERLU_FREE (perm_r);
SUPERLU_FREE (perm_c);
SUPERLU_FREE (R);
SUPERLU_FREE (C);
SUPERLU_FREE (ferr);
SUPERLU_FREE (berr);
Destroy_CompCol_Matrix(&A);
Destroy_SuperMatrix_Store(&B);
Destroy_SuperMatrix_Store(&X);
if ( lwork >= 0 ) {
Destroy_SuperNode_SCP(&L);
Destroy_CompCol_NCP(&U);
}
}
/*
* Parse command line options.
*/
void
parse_command_line(int argc, char *argv[], int *nprocs, int *lwork,
int *w, int *relax, double *u, fact_t *fact,
trans_t *trans, yes_no_t *refact, equed_t *equed)
{
int c;
extern char *optarg;
while ( (c = getopt(argc, argv, "hp:l:w:x:u:f:t:r:e:")) != EOF ) {
switch (c) {
case 'h':
printf("Options:\n");
printf("\t-p <int> - number of processes\n");
printf("\t-l <int> - length of work[*] array\n");
printf("\t-w <int> - panel size\n");
printf("\t-x <int> - maximum size of relaxed supernodes\n");
printf("\t-u <int> - pivoting threshold\n");
printf("\t-f <FACTORED/DOFACT/EQUILIBRATE> - factor control\n");
printf("\t-t <NOTRANS/TRANS/CONJ> - transpose or not\n");
printf("\t-r <NO/YES> - refactor or not\n");
printf("\t-e <NOEQUIL/ROW/COL/BOTH> - equilibrate or not\n");
exit(1);
break;
case 'p': *nprocs = atoi(optarg);
break;
case 'l': *lwork = atoi(optarg);
break;
case 'w': *w = atoi(optarg);
break;
case 'x': *relax = atoi(optarg);
break;
case 'u': *u = atof(optarg);
break;
case 'f': *fact = (fact_t) atoi(optarg);
break;
case 't': *trans = (trans_t) atoi(optarg);
break;
case 'r': *refact = (yes_no_t) atoi(optarg);
break;
case 'e': *equed = (equed_t) atoi(optarg);
break;
default: fprintf(stderr, "Invalid command line option.\n");
break;
}
}
}
lib/SuperLU_MT_2.1/EXAMPLE/pdspmd.c.bak
deleted
100755 → 0
View file @
86c0c3c4
/*
* -- SuperLU MT routine (version 2.0) --
* Lawrence Berkeley National Lab, Univ. of California Berkeley,
* and Xerox Palo Alto Research Center.
* September 10, 2007
*
* Purpose
* =======
*
* This program illustrates how to integrate the SPMD mode
* of the factor routine pdgstrf_thread() into a larger SPMD
* application code, and manage the threads yourself.
* In this example, the threads creation happens only once.
*
*/
#include <stdlib.h> /* for getenv and atoi */
#include "pdsp_defs.h"
/* Arguments passed to each dot product thread. */
typedef struct {
int i;
int len;
int nprocs;
double *global_dot;
double *x;
} pddot_threadarg_t;
#if ( MACH==SUN )
mutex_t pddot_lock;
#elif ( MACH==DEC || MACH==PTHREAD )
pthread_mutex_t pddot_lock;
#elif ( MACH==SGI || MACH==ORIGIN || MACH==CRAY_PVP )
int pddot_lock = 1;
#endif
/* Arguments passed to each SPMD program. */
typedef struct {
pdgstrf_threadarg_t *pdgstrf_threadarg; /* for sparse LU. */
pddot_threadarg_t *pddot_threadarg; /* for dot product. */
} dspmd_arg_t;
void *dspmd(void *);
void *pddot_thread(void *);
main(int argc, char *argv[])
{
SuperMatrix A, AC, L, U, B;
NCformat *Astore;
SCPformat *Lstore;
NCPformat *Ustore;
superlumt_options_t superlumt_options;
pxgstrf_shared_t pxgstrf_shared;
pdgstrf_threadarg_t *pdgstrf_threadarg;
pddot_threadarg_t *pddot_threadarg;
dspmd_arg_t *dspmd_arg;
int nprocs;
fact_t fact;
trans_t trans;
yes_no_t refact, usepr;
double u, drop_tol;
double *a;
int *asub, *xa;
int *perm_c; /* column permutation vector */
int *perm_r; /* row permutations from partial pivoting */
void *work;
int info, lwork, nrhs, ldx;
int m, n, nnz, permc_spec, panel_size, relax;
int i, firstfact;
double *rhsb, *xact;
Gstat_t Gstat;
int vlength;
double *xvector, xdot, temp;
double zero = 0.0;
#if ( MACH==SUN )
thread_t *thread_id;
#elif ( MACH==DEC || MACH==PTHREAD )
pthread_t *thread_id;
void *status;
#endif
void parse_command_line();
/* Default parameters to control factorization. */
nprocs = 1;
fact = EQUILIBRATE;
trans = NOTRANS;
refact= NO;
panel_size = sp_ienv(1);
relax = sp_ienv(2);
u = 1.0;
usepr = NO;
drop_tol = 0.0;
work = NULL;
lwork = 0;
nrhs = 1;
/* Get the number of processes from command line. */
parse_command_line(argc, argv, &nprocs);
/* Read the input matrix stored in Harwell-Boeing format. */
dreadhb(&m, &n, &nnz, &a, &asub, &xa);
/* Set up the sparse matrix data structure for A. */
dCreate_CompCol_Matrix(&A, m, n, nnz, a, asub, xa, SLU_NC, SLU_D, SLU_GE);
if (!(rhsb = doubleMalloc(m * nrhs))) SUPERLU_ABORT("Malloc fails for rhsb[].");
dCreate_Dense_Matrix(&B, m, nrhs, rhsb, m, SLU_DN, SLU_D, SLU_GE);
xact = doubleMalloc(n * nrhs);
ldx = n;
dGenXtrue(n, nrhs, xact, ldx);
dFillRHS(trans, nrhs, xact, ldx, &A, &B);
if (!(perm_r = intMalloc(m))) SUPERLU_ABORT("Malloc fails for perm_r[].");
if (!(perm_c = intMalloc(n))) SUPERLU_ABORT("Malloc fails for perm_c[].");
/* ------------------------------------------------------------
Get column permutation vector perm_c[], according to permc_spec:
permc_spec = 0: natural ordering
permc_spec = 1: minimum degree ordering on structure of A'*A
permc_spec = 2: minimum degree ordering on structure of A'+A
permc_spec = 3: approximate minimum degree for unsymmetric matrices
------------------------------------------------------------*/
permc_spec = 1;
get_perm_c(permc_spec, &A, perm_c);
/* ------------------------------------------------------------
Allocate storage and initialize statistics variables.
------------------------------------------------------------*/
StatAlloc(n, nprocs, panel_size, relax, &Gstat);
StatInit(n, nprocs, &Gstat);
/* ------------------------------------------------------------
Initialize the option structure superlumt_options using the
user-input parameters;
Apply perm_c to the columns of original A to form AC.
------------------------------------------------------------*/
pdgstrf_init(nprocs, fact, trans, refact, panel_size, relax,
u, usepr, drop_tol, perm_c, perm_r,
work, lwork, &A, &AC, &superlumt_options, &Gstat);
/* --------------------------------------------------------------
Initializes the parallel data structures for pdgstrf_thread().
--------------------------------------------------------------*/
pdgstrf_threadarg = pdgstrf_thread_init(&AC, &L, &U, &superlumt_options,
&pxgstrf_shared, &Gstat, &info);
/* ------------------------------------------------------------
Initialization for inner product routine.
------------------------------------------------------------*/
vlength = 1000;
xdot = zero;
xvector = doubleMalloc(vlength);
for (i = 0; i < vlength; ++i){
xvector[i] = i+1;
}
pddot_threadarg = (pddot_threadarg_t *)
SUPERLU_MALLOC(nprocs * sizeof(pddot_threadarg_t));
#if ( MACH==SUN )
mutex_init(&pddot_lock, USYNC_THREAD, 0);
#elif ( MACH==DEC || MACH==PTHREAD )
pthread_mutex_init(&pddot_lock, NULL);
#endif
for (i = 0; i < nprocs; ++i) {
pddot_threadarg[i].i = i;
pddot_threadarg[i].len = vlength;
pddot_threadarg[i].nprocs = nprocs;
pddot_threadarg[i].global_dot = &xdot;
pddot_threadarg[i].x = xvector;
}
/* ------------------------------------------------------------
Create multiple threads
------------------------------------------------------------*/
#if ( MACH==SUN ) /* Use Solaris thread ... */
/* Prepare arguments to each SPMD program. */
dspmd_arg = (dspmd_arg_t *)
SUPERLU_MALLOC(nprocs * sizeof(dspmd_arg_t));
for (i = 0; i < nprocs; ++i) {
dspmd_arg[i].pdgstrf_threadarg = &pdgstrf_threadarg[i];
dspmd_arg[i].pddot_threadarg = &pddot_threadarg[i];
}
thread_id = (thread_t *) SUPERLU_MALLOC(nprocs * sizeof(thread_t));
/* Run the SPMD programs in parallel. */
for (i = 0; i < nprocs; ++i)
thr_create(NULL, 0, dspmd, &dspmd_arg[i], 0, &thread_id[i]);
/* Wait for all threads to terminate. */
for (i = 0; i < nprocs; i++) pthread_join(thread_id[i], 0, 0);
SUPERLU_FREE(dspmd_arg);
SUPERLU_FREE (thread_id);
#elif ( MACH==DEC || MACH==PTHREAD ) /* Use pthread ... */
/* Prepare arguments to each SPMD program. */
dspmd_arg = (dspmd_arg_t *)
SUPERLU_MALLOC(nprocs * sizeof(dspmd_arg_t));
for (i = 0; i < nprocs; ++i) {
dspmd_arg[i].pdgstrf_threadarg = &pdgstrf_threadarg[i];
dspmd_arg[i].pddot_threadarg = &pddot_threadarg[i];
}
thread_id = (pthread_t *) SUPERLU_MALLOC(nprocs * sizeof(pthread_t));
/* Run the SPMD programs in parallel. */
for (i = 0; i < nprocs; ++i)
pthread_create(&thread_id[i], NULL,
dspmd, &dspmd_arg[i]);
/* Wait for all threads to terminate. */
for (i = 0; i < nprocs; i++) pthread_join(thread_id[i], &status);
SUPERLU_FREE(dspmd_arg);
SUPERLU_FREE (thread_id);
/* _PTHREAD */
#elif ( MACH==SGI || MACH==ORIGIN ) /* Use parallel C ... */
if ( getenv("MP_SET_NUMTHREADS") ) {
i = atoi(getenv("MP_SET_NUMTHREADS"));
if ( nprocs > i ) {
printf("nprocs=%d > environment allowed: MP_SET_NUMTHREADS=%d\n",
nprocs, i);
exit(-1);
}
}
#pragma parallel
#pragma shared (pdgstrf_threadarg, pddot_threadarg)
#pragma numthreads (nprocs)
{
pdgstrf_thread( pdgstrf_threadarg );
pddot_thread( pddot_threadarg );
}
/* _SGI or _ORIGIN */
#elif ( MACH==CRAY_PVP ) /* Use C microtasking ... */
if ( getenv("NCPUS") ) {
i = atoi(getenv("NCPUS"));
if ( nprocs > i ) {
printf("nprocs=%d > environment allowed: NCPUS=%d\n",
nprocs, i);
exit(-1);
}
}
#pragma _CRI taskloop private(i,nprocs) \
shared(pdgstrf_threadarg, pddot_threadarg)
/* stand-alone task loop */
for (i = 0; i < nprocs; ++i) {
pdgstrf_thread( &pdgstrf_threadarg[i] );
pddot_thread( &pddot_threadarg[i] );
}
/* _CRAY_PVP */
#endif /* _PTHREAD */
/* ------------------------------------------------------------
Clean up and free storage after multithreaded factorization.
------------------------------------------------------------*/
pdgstrf_thread_finalize(pdgstrf_threadarg, &pxgstrf_shared,
&A, perm_r, &L, &U);
/* ------------------------------------------------------------
Deallocate storage after factorization.
------------------------------------------------------------*/
pxgstrf_finalize(&superlumt_options, &AC);
/* ------------------------------------------------------------
Solve the system A*X=B, overwriting B with X.
------------------------------------------------------------*/
dgstrs(trans, &L, &U, perm_r, perm_c, &B, &Gstat, &info);
printf("\n** Result of sparse LU **\n");
dinf_norm_error(nrhs, &B, xact); /* Check inf. norm of the error */
Lstore = (SCPformat *) L.Store;
Ustore = (NCPformat *) U.Store;
printf("No of nonzeros in factor L = %d\n", Lstore->nnz);
printf("No of nonzeros in factor U = %d\n", Ustore->nnz);
printf("No of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz - n);
fflush(stdout);
/* DAN FIX THIS */
temp = vlength*(vlength+1)*(2*vlength+1)/6;
printf("\n** Result of dot product **\n");
printf("computed %e, correct %e\n", xdot, temp);
SUPERLU_FREE (rhsb);
SUPERLU_FREE (xact);
SUPERLU_FREE (perm_r);
SUPERLU_FREE (perm_c);
Destroy_CompCol_Matrix(&A);
Destroy_SuperMatrix_Store(&B);
if ( lwork >= 0 ) {
Destroy_SuperNode_SCP(&L);
Destroy_CompCol_NCP(&U);
}
StatFree(&Gstat);
SUPERLU_FREE(pddot_threadarg);
SUPERLU_FREE(xvector);
#if ( MACH==SUN )
mutex_destroy( &pddot_lock );
#elif ( MACH==DEC || MACH==PTHREAD )
pthread_mutex_destroy( &pddot_lock );
#endif
}
void
*dspmd(void *arg)
{
pdgstrf_threadarg_t *pdgstrf_threadarg;
pddot_threadarg_t *pddot_threadarg;
pdgstrf_threadarg = ((dspmd_arg_t *)arg)->pdgstrf_threadarg;
pddot_threadarg = ((dspmd_arg_t *)arg)->pddot_threadarg;
/* Perform LU factorization. */
pdgstrf_thread(pdgstrf_threadarg);
/* Perform local inner product, and add the sum into the global one. */
pddot_thread(pddot_threadarg);
return 0;
}
void
*pddot_thread(void *arg)
{
#if ( MACH==SGI || MACH==ORIGIN )
#if ( MACH==SGI )
int i = mpc_my_threadnum();
#else
int i = mp_my_threadnum();
#endif
pddot_threadarg_t *pddot_arg = &((pddot_threadarg_t *)arg)[i];
#else
pddot_threadarg_t *pddot_arg = arg;
int i = pddot_arg->i;
#endif
int len = pddot_arg->len;
int nprocs = pddot_arg->nprocs;
double *global_dot = pddot_arg->global_dot;
double *x = pddot_arg->x;
int chunk, start, end;
double temp;
double zero = 0.0;
chunk = len / nprocs;
start = i * chunk;
end = start + chunk;
if ( i == nprocs-1 ) end = len;
printf("(%d) nprocs %d, chunk %d, start %d, end %d\n",
i, nprocs, chunk, start, end);
temp = zero;
for (i = start; i < end; ++i){
temp += x[i]*x[i];
}
#if ( MACH==SUN )
mutex_lock( &pddot_lock );
#elif ( MACH==DEC || MACH==PTHREAD )
pthread_mutex_lock( &pddot_lock );
#elif ( MACH==SGI || MACH==ORIGIN )
#pragma critical lock( pddot_lock );
#elif ( MACH==CRAY_PVP )
#pragma _CRI guard pddot_lock
#endif
{
*global_dot += temp;
}
#if ( MACH==SUN )
mutex_unlock( &pddot_lock );
#elif ( MACH==DEC || MACH==PTHREAD )
pthread_mutex_unlock( &pddot_lock );
#elif ( MACH==CRAY_PVP )