Commit 84c5b777 authored by Adam Mercer's avatar Adam Mercer Committed by Adam Mercer
Browse files

inital cuda fft support

Original: 8ffd56c35216978a74b82f4c1e05c329e87abaa0
parent 2d6f8187
......@@ -112,6 +112,8 @@ packages/fft/test/AvgSpecTest
packages/fft/test/ComplexFFTTest
packages/fft/test/RealFFTTest
packages/fft/test/TimeFreqFFTTest
packages/fft/src/CudaFFT.c
packages/fft/src/CudaFFT.linkinfo
packages/findchirp/test/FindChirpBCVSpinTest
packages/findchirp/test/FindChirpTDTest
packages/framedata/src/FrameSeries.c
......
......@@ -162,9 +162,10 @@ LAL_WITH_EXTRA_CFLAGS
LAL_WITH_EXTRA_CPPFLAGS
LAL_WITH_EXTRA_LDFLAGS
LAL_WITH_EXTRA_LIBS
LAL_ENABLE_GCC_FLAGS
LAL_ENABLE_INTELFFT
LAL_WITH_CUDA
LAL_ENABLE_GCC_FLAGS
LAL_ENABLE_FRAME
LAL_ENABLE_METAIO
LAL_ENABLE_XML
......@@ -397,6 +398,9 @@ fi
dnl Define variables describing what was enabled.
if test "${cuda}" = "true"; then
AC_DEFINE(LAL_CUDA_ENABLED, 1, [Define if using cuda library])
fi
if test "${fftw3}" = "true"; then
AC_DEFINE(LAL_FFTW3_ENABLED, 1, [Define if using fftw3 library])
fi
......@@ -445,6 +449,7 @@ AM_CONDITIONAL(FRAME, test x$frame = xtrue)
AM_CONDITIONAL(METAIO, test x$metaio = xtrue)
AM_CONDITIONAL(XML, test x$xml = xtrue)
AM_CONDITIONAL(INTELFFT, test x$intelfft = xtrue)
AM_CONDITIONAL(CUDA, test x$cuda = xtrue)
AM_CONDITIONAL(QTHREAD, test x$qthread = xtrue)
......@@ -504,6 +509,7 @@ AC_OUTPUT
frameenabled="`test x${frame} = xtrue && echo "ENABLED" || echo "DISABLED"`"
metaioenabled="`test x${metaio} = xtrue && echo "ENABLED" || echo "DISABLED"`"
xmlenabled="`test x${xml} = xtrue && echo "ENABLED" || echo "DISABLED"`"
cudaenabled="`test x${cuda} = xtrue && echo "ENABLED" || echo "DISABLED"`"
echo "
================================================================
......@@ -512,6 +518,7 @@ echo "
LAL Frame library support is $frameenabled
LAL MetaIo library support is $metaioenabled
LAL XML library support is $xmlenabled
CUDA support is $cudaenabled
Now run 'make' to build LAL
and run 'make install' to install LAL
......
......@@ -7,5 +7,5 @@ Name: LAL
Description: LSC Algorithm Library
Version: @VERSION@
Requires: fftw3 fftw3f gsl
Libs: -L${libdir} -llal
Cflags: -I${includedir}
Libs: -L${libdir} -llal @CUDA_LIBS@
Cflags: -I${includedir} @CUDA_CFLAGS@
......@@ -14,10 +14,12 @@ AC_DEFUN([LAL_ENABLE_GCC_FLAGS],
AC_DEFUN([DO_ENABLE_LAL_GCC_FLAGS],
[
lal_gcc_flags="-g3 -O4 -Wall -W -Wmissing-prototypes -Wstrict-prototypes -Wshadow -Wpointer-arith -Wcast-qual -Wcast-align -Wwrite-strings -fno-common -Wnested-externs -Wno-format-zero-length"
case $host_cpu-$host_os in
*i386-darwin*) lal_gcc_flags="${lal_gcc_flags} -pedantic" ;;
*) lal_gcc_flags="${lal_gcc_flags} -pedantic-errors" ;;
esac
if test "${cuda}" != "true"; then
case $host_cpu-$host_os in
*i386-darwin*) lal_gcc_flags="${lal_gcc_flags} -pedantic" ;;
*) lal_gcc_flags="${lal_gcc_flags} -pedantic-errors" ;;
esac
fi
])
AC_DEFUN([LAL_WITH_EXTRA_CPPFLAGS],
......@@ -135,6 +137,37 @@ AC_DEFUN([LAL_ENABLE_INTELFFT],
], [ intelfft=false ] )
])
AC_DEFUN([LAL_WITH_CUDA],
[AC_ARG_WITH(
[cuda],
AC_HELP_STRING([--with-cuda=PATH],[specify location of CUDA [/opt/cuda]]),
[ case "$with_cuda" in
no)
cuda=false
;;
yes)
AC_MSG_WARN([No path for CUDA specifed, using /opt/cuda])
cuda=true
CUDA_LIBS="-L/opt/cuda/lib -lcufft -lcudart"
CUDA_CFLAGS="-I/opt/cuda/include"
LIBS="$LIBS $CUDA_LIBS"
CFLAGS="$CFLAGS $CUDA_CFLAGS"
AC_SUBST(CUDA_LIBS)
AC_SUBST(CUDA_CFLAGS)
;;
*)
AC_MSG_NOTICE([Using ${with_cuda} as CUDA path])
cuda=true
CUDA_LIBS="-L${with_cuda}/lib -lcufft -lcudart"
CUDA_CFLAGS="-I${with_cuda}/include"
LIBS="$LIBS $CUDA_LIBS"
CFLAGS="$CFLAGS $CUDA_CFLAGS"
AC_SUBST(CUDA_LIBS)
AC_SUBST(CUDA_CFLAGS)
esac
], [ cuda=false ])
])
AC_DEFUN([LAL_ENABLE_DEBUG],
[AC_ARG_ENABLE(
[debug],
......
#ifndef _CUDAPLAN_H
#define _CUDAPLAN_H
/* suppress warnings from cuda headers */
#pragma GCC system_header
#include <lal/LALDatatypes.h>
#include <fftw3.h>
#include <cufft.h>
......
## Process this file with automake to produce Makefile.in
pkginclude_HEADERS = ComplexFFT.h RealFFT.h FFTWMutex.h TimeFreqFFT.h
if CUDA
pkginclude_HEADERS += CudaPlan.h
endif
MOSTLYCLEANFILES = .dvi-dep
DISTCLEANFILES = *.tex
all-local: include-link
......
......@@ -129,12 +129,11 @@
#include <lal/ComplexFFT.h>
#include <lal/CudaPlan.h>
#include <lal/FFTWMutex.h>
#include <CudaFunctions.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include <cufft.h>
#include <fftw3.h>
#include "CudaFunctions.h"
#include "CudaFFT.h"
NRCSID( COMPLEXFFTC, "$Id$" );
/*
......@@ -153,6 +152,9 @@ COMPLEX8FFTPlan * XLALCreateCOMPLEX8FFTPlan( UINT4 size, int fwdflg, int measure
if ( ! size )
XLAL_ERROR_NULL( func, XLAL_EBADLEN );
/* "use" measurelvl */
measurelvl = 0;
/* allocate memory for the plan and the temporary arrays */
plan = XLALMalloc( sizeof( *plan ) );
if ( ! plan )
......
#include <lal/LALDatatypes.h>
#include <CudaFunctions.h>
#include "CudaFunctions.h"
int cudafft_execute_r2c(cufftHandle plan,
cufftComplex *output, const cufftReal *input,
......
/*
* Copyright (C) 2009 Adam Mercer
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or (at
* your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with with program; see the file COPYING. If not, write to the
* Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
* 02111-1307 USA
*/
#ifndef _CUDAFFT_H
#define _CUDAFFT_H
/* supress warnings form cuda headers */
#pragma GCC system_header
#include <cuda.h>
#include <cuda_runtime.h>
#include <cufft.h>
#endif /* _CUDAFFT_H */
#include <lal/LALDatatypes.h>
#include <lal/XLALError.h>
#include <CudaFunctions.h>
#include <cuda_runtime.h>
#include "CudaFunctions.h"
REAL4 *XLALCudaMallocReal(UINT4 size)
{
......@@ -10,7 +11,7 @@ REAL4 *XLALCudaMallocReal(UINT4 size)
cudaMalloc( (void **)&d_data, sizeof(REAL4) * size );
if( !d_data )
XLAL_ERROR_NULL( func, XLAL_ENOMEM );
XLAL_ERROR_NULL( func, XLAL_ENOMEM );
return d_data;
}
......@@ -21,7 +22,7 @@ COMPLEX8 *XLALCudaMallocComplex(UINT4 size)
cudaMalloc( (void **)&d_data, sizeof(COMPLEX8) * size );
if( !d_data )
XLAL_ERROR_NULL( func, XLAL_ENOMEM );
XLAL_ERROR_NULL( func, XLAL_ENOMEM );
return d_data;
}
......@@ -29,6 +30,6 @@ void XLALCudaFree(void *d_data)
{
static const char *func = "XLALCudaFree";
if( !d_data )
XLAL_ERROR_VOID( func, XLAL_EFAULT );
XLAL_ERROR_VOID( func, XLAL_EFAULT );
cudaFree(d_data);
}
/* supress warnings from cuda headers */
#pragma GCC system_header
#include <lal/LALDatatypes.h>
#include <cufft.h>
......
......@@ -160,12 +160,11 @@
#include <lal/RealFFT.h>
#include <lal/XLALError.h>
#include <lal/FFTWMutex.h>
#include <CudaFunctions.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include <cufft.h>
#include <fftw3.h>
#include "CudaFunctions.h"
#include "CudaFFT.h"
NRCSID( REALFFTC, "$Id$" );
......@@ -204,6 +203,9 @@ REAL4FFTPlan * XLALCreateREAL4FFTPlan( UINT4 size, int fwdflg, int measurelvl )
if ( ! size )
XLAL_ERROR_NULL( func, XLAL_EBADLEN );
/* "use" measurelvl */
measurelvl = 0;
/* allocate memory for the plan and the temporary arrays */
plan = XLALMalloc( sizeof( *plan ) );
if ( ! plan )
......
......@@ -10,17 +10,32 @@ if INTELFFT
FFTSRC = IntelComplexFFT.c IntelRealFFT.c $(QTHREADSRC)
EXTRA_TEXSOURCES = ComplexFFT.c RealFFT.c FFTWMutex.c
else
if CUDA
FFTSRC = CudaComplexFFT.c CudaRealFFT.c FFTWMutex.c CudaFunctions.c CudaFFT.cu
else
FFTSRC = ComplexFFT.c RealFFT.c FFTWMutex.c
EXTRA_TEXSOURCES =
endif
endif
noinst_LTLIBRARIES = libfft.la
libfft_la_SOURCES = $(FFTSRC) TimeFreqFFT.c AverageSpectrum.c Convolution.c
INCLUDES = -I$(top_builddir)/include -I$(srcdir)/../include
EXTRA_DIST = qthread.c IntelComplexFFT.c IntelRealFFT.c \
ComplexFFT.c RealFFT.c FFTWMutex.c TimeFreqFFT.c
ComplexFFT.c RealFFT.c FFTWMutex.c TimeFreqFFT.c \
CudaComplexFFT.c CudaRealFFT.c CudaFFT.cu CudaFunctions.h \
CudaFFT.h
if CUDA
MAINTAINERCLEANFILES = CudaFFT.cu.c CudaFFT.cu.loT
endif
MOSTLYCLEANFILES = .dvi-dep
DISTCLEANFILES = *.tex
dvi-local: .dvi-dep
@DVIMKFILE@
if CUDA
SUFFIXES = .cu .c
.cu.c:
@nvcc -cuda --host-compilation=c $(INCLUDES) $(CPPFLAGS) --output-file $@ $<
endif
......@@ -73,6 +73,7 @@
#include <lal/LALStdlib.h>
#include <lal/AVFactories.h>
#include <lal/ComplexFFT.h>
#include <config.h>
#define CODES_(x) #x
#define CODES(x) CODES_(x)
......@@ -104,7 +105,11 @@ int
main( int argc, char *argv[] )
{
const UINT4 n = 17;
#if LAL_CUDA_ENABLED
const REAL4 eps = 1e-4;
#else
const REAL4 eps = 1e-6;
#endif
static LALStatus status;
ComplexFFTPlan *pfwd = NULL;
......
......@@ -81,6 +81,7 @@
#include <lal/SeqFactories.h>
#include <lal/RealFFT.h>
#include <lal/VectorOps.h>
#include <config.h>
#define CODES_(x) #x
#define CODES(x) CODES_(x)
......@@ -124,7 +125,14 @@ int main( int argc, char *argv[] )
REAL4Vector *ans = NULL;
COMPLEX8Vector *dft = NULL;
COMPLEX8Vector *fft = NULL;
REAL8 eps = 1e-6; /* very conservative floating point precision */
#if LAL_CUDA_ENABLED
/* The test itself should pass at 1e-4, but it might fail at
* some rare cases where accuracy is bad for some numbers. */
REAL8 eps = 3e-4;
#else
/* very conservative floating point precision */
REAL8 eps = 1e-6;
#endif
REAL8 lbn;
REAL8 ssq;
REAL8 var;
......
......@@ -161,7 +161,11 @@ NRCSID(CZEROPADANDFFTTESTC, "$Id$");
#define CZEROPADANDFFTTESTC_DELTAF 1.0/(CZEROPADANDFFTTESTC_FULLLENGTH * CZEROPADANDFFTTESTC_DELTAT)
#define CZEROPADANDFFTTESTC_FBASE 10
#define CZEROPADANDFFTTESTC_FMIN (CZEROPADANDFFTTESTC_FBASE - (CZEROPADANDFFTTESTC_LENGTH - 1) * CZEROPADANDFFTTESTC_DELTAF)
#if LAL_CUDA_ENABLED
#define CZEROPADANDFFTTESTC_TOL 1e-5
#else
#define CZEROPADANDFFTTESTC_TOL 1e-6
#endif
#define CZEROPADANDFFTTESTC_TRUE 1
#define CZEROPADANDFFTTESTC_FALSE 0
......
......@@ -162,7 +162,11 @@ NRCSID(SZEROPADANDFFTTESTC, "$Id$");
#define SZEROPADANDFFTTESTC_EPOCHNS 56789
#define SZEROPADANDFFTTESTC_DELTAT 0.5
#define SZEROPADANDFFTTESTC_DELTAF 1.0/(SZEROPADANDFFTTESTC_FULLLENGTH * SZEROPADANDFFTTESTC_DELTAT)
#if LAL_CUDA_ENABLED
#define SZEROPADANDFFTTESTC_TOL 1e-5
#else
#define SZEROPADANDFFTTESTC_TOL 1e-6
#endif
#define SZEROPADANDFFTTESTC_TRUE 1
#define SZEROPADANDFFTTESTC_FALSE 0
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment