diff options
author | spiros <andyspiros@gmail.com> | 2011-07-22 23:53:02 +0200 |
---|---|---|
committer | spiros <andyspiros@gmail.com> | 2011-07-22 23:53:02 +0200 |
commit | 47af1c14eaed0149ab6a3eb2b040d767a9fca82e (patch) | |
tree | 2e3552d44d74cda2408cc562eb2497711484c813 /btl | |
parent | Added 2D FFTW tests. Much work around FFTW. (diff) | |
download | auto-numerical-bench-47af1c14eaed0149ab6a3eb2b040d767a9fca82e.tar.gz auto-numerical-bench-47af1c14eaed0149ab6a3eb2b040d767a9fca82e.tar.bz2 auto-numerical-bench-47af1c14eaed0149ab6a3eb2b040d767a9fca82e.zip |
Added support for parallel LU decomposition in the PBLAS module. Much
work on the distributed memory framework.
Diffstat (limited to 'btl')
-rw-r--r-- | btl/actions/action_parallel_lu_decomp.hh | 116 | ||||
-rw-r--r-- | btl/actions/action_parallel_matrix_vector_product.hh | 19 | ||||
-rw-r--r-- | btl/generic_bench/init/init_function.hh | 2 | ||||
-rw-r--r-- | btl/libs/BLACS/blacs_interface.hh | 37 | ||||
-rw-r--r-- | btl/libs/LAPACK/lapack_interface.hh | 11 | ||||
-rw-r--r-- | btl/libs/PBLAS/main.cpp | 10 | ||||
-rw-r--r-- | btl/libs/PBLAS/pblas.h | 10 | ||||
-rw-r--r-- | btl/libs/PBLAS/pblas_interface.hh | 24 | ||||
-rw-r--r-- | btl/libs/PBLAS/pblas_interface_impl.hh | 12 |
9 files changed, 196 insertions, 45 deletions
diff --git a/btl/actions/action_parallel_lu_decomp.hh b/btl/actions/action_parallel_lu_decomp.hh new file mode 100644 index 0000000..4882a21 --- /dev/null +++ b/btl/actions/action_parallel_lu_decomp.hh @@ -0,0 +1,116 @@ +#ifndef ACTION_PARALLEL_LU_DECOMP_HH_ +#define ACTION_PARALLEL_LU_DECOMP_HH_ + +#include "utilities.h" +#include "init/init_function.hh" +#include "init/init_vector.hh" + +#include "lapack_interface.hh" +#include "STL_interface.hh" + +#include <string> + +template<class Interface> +class Action_parallel_lu_decomp { + +public : + + // Constructor + BTL_DONT_INLINE Action_parallel_lu_decomp( int size ) : _size(size) + { + MESSAGE("Action_parallel_lu_decomp Ctor"); + + int myid, procnum; + blacs_pinfo_(&myid, &procnum); + iamroot = (myid == 0); + + // STL matrix and vector initialization + if (iamroot) { + init_vector<pseudo_random>(Global_A_stl, size*size); + } + + Interface::scatter_matrix(Global_A_stl, Local_A_stl, desc, size, size, 64, 64); + LocalRows = desc[8]; + LocalCols = Local_A_stl.size()/desc[8]; + + // Generic local matrix and vectors initialization + Interface::matrix_from_stl(Local_A_ref, Local_A_stl); + Interface::matrix_from_stl(Local_A , Local_A_stl); + + _cost = 2.0*size*size*size/3.0 + static_cast<double>(size*size); + } + + + // Invalidate copy constructor + Action_parallel_lu_decomp(const Action_parallel_lu_decomp&) + { + INFOS("illegal call to Action_parallel_lu_decomp copy constructor"); + exit(1); + } + + // Destructor + ~Action_parallel_lu_decomp() + { + MESSAGE("Action_parallel_lu_decomp destructor"); + + // Deallocation + Interface::free_matrix(Local_A_ref, Local_A_stl.size()); + Interface::free_matrix(Local_A , Local_A_stl.size()); + } + + // Action name + static inline std::string name() + { + return "lu_decomp_" + Interface::name(); + } + + double nb_op_base() + { + return _cost; + } + + BTL_DONT_INLINE void initialize() + { + Interface::copy_matrix(Local_A_ref, Local_A, Local_A_stl.size()); + } + + BTL_DONT_INLINE void calculate() + { + Interface::parallel_lu_decomp(Local_A, desc); + } + + BTL_DONT_INLINE void check_result() + { + /* Gather */ + typename Interface::stl_matrix Global_Y; + typename Interface::stl_matrix Local_Y(Local_A_stl.size()); + Interface::matrix_to_stl(Local_A, Local_Y); + Interface::gather_matrix(Global_Y, Local_Y, desc); + + if (iamroot) { + + typename Interface::gene_matrix A; + Interface::matrix_from_stl(A, Global_A_stl); + lapack_interface<typename Interface::real_type>::lu_decomp(&Global_A_stl[0], A, _size); + typename Interface::stl_vector correct(A, A+_size*_size); + typename Interface::real_type error = STL_interface<typename Interface::real_type>::norm_diff(Global_Y, correct); + if (error > 10e-5) + cerr << " { !! error : " << error << " !! } "; + + Interface::free_matrix(A, _size*_size); + } + } + +private: + int _size, desc[9], LocalRows, LocalCols; + double _cost; + bool iamroot; + + typename Interface::stl_matrix Global_A_stl; + typename Interface::stl_matrix Local_A_stl; + typename Interface::gene_matrix Local_A_ref; + typename Interface::gene_matrix Local_A; +}; + + +#endif /* ACTION_PARALLEL_LU_DECOMP_HH_ */ diff --git a/btl/actions/action_parallel_matrix_vector_product.hh b/btl/actions/action_parallel_matrix_vector_product.hh index 5c97b1d..67e64bf 100644 --- a/btl/actions/action_parallel_matrix_vector_product.hh +++ b/btl/actions/action_parallel_matrix_vector_product.hh @@ -10,8 +10,6 @@ #include "blas.h" -using namespace std; - template<class Interface> class Action_parallel_matrix_vector_product { @@ -42,9 +40,9 @@ public : init_vector<null_function>(Global_y_stl, GlobalRows); } - Interface::scatter_matrix(Global_A_stl, Local_A_stl, GlobalRows, GlobalCols, BlockRows, BlockCols, LocalRows, LocalCols); - Interface::scatter_matrix(Global_x_stl, Local_x_stl, GlobalCols, LocalXCols, BlockRows, BlockCols, LocalXRows, LocalXCols); - Interface::scatter_matrix(Global_y_stl, Local_y_stl, GlobalRows, LocalYCols, BlockRows, BlockCols, LocalYRows, LocalYCols); + Interface::scatter_matrix(Global_A_stl, Local_A_stl, descA, GlobalRows, GlobalCols, BlockRows, BlockCols); + Interface::scatter_matrix(Global_x_stl, Local_x_stl, descX, GlobalCols, 1, BlockRows, BlockCols); + Interface::scatter_matrix(Global_y_stl, Local_y_stl, descY, GlobalRows, 1, BlockRows, BlockCols); // generic local matrix and vectors initialization @@ -54,17 +52,6 @@ public : Interface::vector_from_stl(Local_x , Local_x_stl); Interface::vector_from_stl(Local_y_ref, Local_y_stl); Interface::vector_from_stl(Local_y , Local_y_stl); - - // Descinit - int context = Interface::context(); - int info; - int LDA, LDX, LDY; - LDA = std::max(1, LocalRows); - LDX = std::max(1, LocalXRows); - LDY = std::max(1, LocalYRows); - descinit_(descA, &GlobalRows, &GlobalCols, &BlockRows, &BlockCols, &iZERO, &iZERO, &context, &LDA, &info); - descinit_(descX, &GlobalCols, &iONE, &BlockRows, &BlockCols, &iZERO, &iZERO, &context, &LDX, &info); - descinit_(descY, &GlobalRows, &iONE, &BlockRows, &BlockCols, &iZERO, &iZERO, &context, &LDY, &info); } // invalidate copy ctor diff --git a/btl/generic_bench/init/init_function.hh b/btl/generic_bench/init/init_function.hh index 7b3bdba..5401673 100644 --- a/btl/generic_bench/init/init_function.hh +++ b/btl/generic_bench/init/init_function.hh @@ -32,7 +32,7 @@ double simple_function(int index_i, int index_j) double pseudo_random(int index) { - return std::rand()/double(RAND_MAX); + return static_cast<int>((std::rand()/double(RAND_MAX)-.5)*20); } double pseudo_random(int index_i, int index_j) diff --git a/btl/libs/BLACS/blacs_interface.hh b/btl/libs/BLACS/blacs_interface.hh index 00aafee..6932150 100644 --- a/btl/libs/BLACS/blacs_interface.hh +++ b/btl/libs/BLACS/blacs_interface.hh @@ -2,7 +2,13 @@ #define BTL_BLACS_INTERFACE_H #include <vector> +#include <algorithm> #include "blacs.h" +extern "C" { + void descinit_(int*, const int*, const int*, const int*, const int*, const int*, const int*, const int*, const int*, int*); + int numroc_(const int*, const int*, const int*, const int*, const int*); +} + #include "scatter.h" #include "gather.h" @@ -80,6 +86,27 @@ public: scatter(context(), GlobalMatrix, LocalMatrix, GlobalRows, GlobalCols, BlockRows, BlockCols, LocalRows, LocalCols); } + static void scatter_matrix(const stl_vector& GlobalMatrix, stl_vector& LocalMatrix, + int *desc, + const int& GlobalRows=0, const int& GlobalCols=0, + const int& BlockRows=0, const int& BlockCols=0 + ) { + int GlobalRows_ = GlobalRows, GlobalCols_ = GlobalCols, + BlockRows_ = BlockRows, BlockCols_ = BlockCols, + LocalRows_, LocalCols_; + const int ctxt = context(); + scatter(ctxt, GlobalMatrix, LocalMatrix, + GlobalRows_, GlobalCols_, BlockRows_, BlockCols_, LocalRows_, LocalCols_ + ); + + const int iZERO = 0; + int info; + const int LLD = std::max(1, LocalRows_); + descinit_(desc, &GlobalRows_, &GlobalCols_, &BlockRows_, &BlockCols_, + &iZERO, &iZERO, &ctxt, &LLD, &info + ); + } + static void gather_matrix(stl_vector& GlobalMatrix, const stl_vector& LocalMatrix, int& GlobalRows, int& GlobalCols, int& BlockRows, int& BlockCols, @@ -88,6 +115,16 @@ public: gather(context(), GlobalMatrix, LocalMatrix, GlobalRows, GlobalCols, BlockRows, BlockCols, LocalRows, LocalCols); } + static void gather_matrix(stl_vector& GlobalMatrix, const stl_vector& LocalMatrix, + int* desc + ) { + int GlobalRows = desc[2], GlobalCols = desc[3], + BlockRows = desc[4], BlockCols = desc[5], + LocalRows = desc[8], LocalCols = LocalMatrix.size()/desc[8]; + const int ctxt = context(); + gather(ctxt, GlobalMatrix, LocalMatrix, GlobalRows, GlobalCols, BlockRows, BlockCols, LocalRows, LocalCols); + } + }; diff --git a/btl/libs/LAPACK/lapack_interface.hh b/btl/libs/LAPACK/lapack_interface.hh index 33de2be..1840741 100644 --- a/btl/libs/LAPACK/lapack_interface.hh +++ b/btl/libs/LAPACK/lapack_interface.hh @@ -1,3 +1,6 @@ +#ifndef LAPACK_INTERFACE_HH +#define LAPACK_INTERFACE_HH + #include <../BLAS/c_interface_base.h> #include <complex> @@ -24,8 +27,10 @@ void dsyev_(char*, char*, int*, double*, int*, double*, double*, int*, int*); #define MAKE_STRING2(S) #S #define MAKE_STRING(S) MAKE_STRING2(S) -#define CAT2(A,B) A##B -#define CAT(A,B) CAT2(A,B) +#ifndef CAT +# define CAT2(A,B) A##B +# define CAT(A,B) CAT2(A,B) +#endif template <typename real> class lapack_interface; @@ -51,3 +56,5 @@ static int zeroint = 0; #include "lapack_interface_impl.hh" #undef SCALAR #undef SCALAR_PREFIX + +#endif /* LAPACK_INTERFACE_HH */ diff --git a/btl/libs/PBLAS/main.cpp b/btl/libs/PBLAS/main.cpp index 4b64f12..a2aae2a 100644 --- a/btl/libs/PBLAS/main.cpp +++ b/btl/libs/PBLAS/main.cpp @@ -1,3 +1,5 @@ +#define DISTRIBUTED + #include "mpi.h" #include "utilities.h" #include "bench.hh" @@ -5,9 +7,10 @@ #include <iostream> //using namespace std; -#include "blacsinit.hh" #include "pblas_interface.hh" +#include "blacsinit.hh" #include "action_parallel_matrix_vector_product.hh" +#include "action_parallel_lu_decomp.hh" #include "action_parallel_axpy.hh" #include <string> @@ -19,8 +22,9 @@ int main(int argc, char **argv) bool iamroot = blacsinit(&argc, &argv); // distr_bench<Action_parallel_matrix_vector_product<pblas_interface<double> > >(10,MAX_MV,NB_POINT,!iamroot); - distr_bench<Action_parallel_axpy<pblas_interface<REAL_TYPE> > >(10,MAX_MV,NB_POINT,!iamroot); -// Action_parallel_axpy<pblas_interface<double> > action(8); +// distr_bench<Action_parallel_axpy<pblas_interface<REAL_TYPE> > >(10,MAX_MV,NB_POINT,!iamroot); + distr_bench<Action_parallel_lu_decomp<pblas_interface<REAL_TYPE> > >(10,MAX_MV,NB_POINT,!iamroot); +// Action_parallel_lu_decomp<pblas_interface<REAL_TYPE> > action(8); // action.initialize(); // action.calculate(); // action.check_result(); diff --git a/btl/libs/PBLAS/pblas.h b/btl/libs/PBLAS/pblas.h index adc6c91..2b5860e 100644 --- a/btl/libs/PBLAS/pblas.h +++ b/btl/libs/PBLAS/pblas.h @@ -6,7 +6,7 @@ extern "C" { #endif int numroc_(const int*, const int*, const int*, const int*, const int*); - int descinit_(const int*, const int*, const int*, const int*, const int*, const int*, const int*, const int*, const int*, int*); + void descinit_(int*, const int*, const int*, const int*, const int*, const int*, const int*, const int*, const int*, int*); /* Level 1 */ @@ -42,6 +42,14 @@ extern "C" { ); + + /************* + * Scalapack * + *************/ + void psgetrf_(const int*, const int*, float*, const int*, const int*, const int*, int*, int*); + void pdgetrf_(const int*, const int*, double*, const int*, const int*, const int*, int*, int*); + + #ifdef __cplusplus } #endif diff --git a/btl/libs/PBLAS/pblas_interface.hh b/btl/libs/PBLAS/pblas_interface.hh index b969e4e..cdfb70a 100644 --- a/btl/libs/PBLAS/pblas_interface.hh +++ b/btl/libs/PBLAS/pblas_interface.hh @@ -1,23 +1,3 @@ -//===================================================== -// File : blas_interface.hh -// Author : L. Plagne <laurent.plagne@edf.fr)> -// Copyright (C) EDF R&D, lun sep 30 14:23:28 CEST 2002 -//===================================================== -// -// This program is free software; you can redistribute it and/or -// modify it under the terms of the GNU General Public License -// as published by the Free Software Foundation; either version 2 -// of the License, or (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -// - #include "pblas.h" #include "blacs_interface.hh" @@ -32,7 +12,7 @@ template<class real> class pblas_interface; - +/* static char notrans = 'N'; static char trans = 'T'; static char nonunit = 'N'; @@ -40,7 +20,7 @@ static char lower = 'L'; static char right = 'R'; static char left = 'L'; static int intone = 1; - +*/ #define SCALAR float #define SCALAR_PREFIX s diff --git a/btl/libs/PBLAS/pblas_interface_impl.hh b/btl/libs/PBLAS/pblas_interface_impl.hh index b534a4e..c36c249 100644 --- a/btl/libs/PBLAS/pblas_interface_impl.hh +++ b/btl/libs/PBLAS/pblas_interface_impl.hh @@ -33,6 +33,7 @@ public: real_type alpha = 1., beta = 0.; int iONE = 1; int myid, procnum; + const char notrans = 'N'; blacs_pinfo_(&myid, &procnum); PBLAS_FUNC(gemv)(¬rans, &GlobalRows, &GlobalCols, @@ -42,5 +43,16 @@ public: ); } + + + static inline void parallel_lu_decomp(gene_matrix& X, const int* desc) + { + const int GlobalRows = desc[2], GlobalCols = desc[3]; + const int iONE = 1; + int info; + std::vector<int> ipiv(desc[8] + desc[4]); + PBLAS_FUNC(getrf)(&GlobalRows, &GlobalCols, X, &iONE, &iONE, desc, + &ipiv[0], &info); + } }; |