diff options
author | spiros <andyspiros@gmail.com> | 2011-08-02 19:53:02 +0200 |
---|---|---|
committer | spiros <andyspiros@gmail.com> | 2011-08-02 19:53:02 +0200 |
commit | dd747ab644ad64f205cf1a2c88f47f66beb36277 (patch) | |
tree | 561fefcde6bbb6673feaa8ab99e712c98983c8ab /btl | |
parent | Added SVD decomposition. Work on eigenvalues action and mat-vec (diff) | |
download | auto-numerical-bench-dd747ab644ad64f205cf1a2c88f47f66beb36277.tar.gz auto-numerical-bench-dd747ab644ad64f205cf1a2c88f47f66beb36277.tar.bz2 auto-numerical-bench-dd747ab644ad64f205cf1a2c88f47f66beb36277.zip |
Merge last week's changes:
* PBLAS/ScaLAPACK tests
* BTL changes
* Dependency resolution
Diffstat (limited to 'btl')
-rw-r--r-- | btl/actions/action_parallel_cholesky.hh | 30 | ||||
-rw-r--r-- | btl/actions/action_parallel_lu_decomp.hh | 22 | ||||
-rw-r--r-- | btl/actions/action_parallel_qr_decomp.hh | 11 | ||||
-rw-r--r-- | btl/generic_bench/init/init_matrix.hh | 28 | ||||
-rw-r--r-- | btl/libs/BLACS/blacs_interface.hh | 5 | ||||
-rw-r--r-- | btl/libs/BLACS/gather.h | 15 | ||||
-rw-r--r-- | btl/libs/BLACS/scatter.h | 27 | ||||
-rw-r--r-- | btl/libs/PBLAS/main.cpp | 2 | ||||
-rw-r--r-- | btl/libs/PBLAS/pblas.h | 13 | ||||
-rw-r--r-- | btl/libs/PBLAS/pblas_interface.hh | 1 | ||||
-rw-r--r-- | btl/libs/PBLAS/pblas_interface_impl.hh | 127 |
11 files changed, 232 insertions, 49 deletions
diff --git a/btl/actions/action_parallel_cholesky.hh b/btl/actions/action_parallel_cholesky.hh index 05ef3ef..6c3a6da 100644 --- a/btl/actions/action_parallel_cholesky.hh +++ b/btl/actions/action_parallel_cholesky.hh @@ -9,6 +9,8 @@ #include "STL_interface.hh" #include <string> +#include <sstream> +#include <fstream> template<class Interface> class Action_parallel_cholesky { @@ -27,25 +29,17 @@ public : // STL matrix and vector initialization if (iamroot) { - typename LapackInterface::stl_matrix temp_stl; - init_matrix_symm<pseudo_random>(temp_stl, size); - Global_A_stl.reserve(size*size); - const double add = 5000./size; - for (int r = 0; r < size; ++r) - for (int c = 0; c < size; ++c) - if (r==c) - Global_A_stl.push_back((std::abs(temp_stl[r][c])+add)*size); - else - Global_A_stl.push_back(temp_stl[r][c]); + /* Using a constant seed */ + const unsigned seed = 3; + init_SPD_matrix(Global_A_stl, size, seed); } const int blocksize = std::max(std::min(size/4, 64), 2); - Interface::scatter_matrix(Global_A_stl, Local_A_stl, desc, size, size, blocksize, blocksize); + scatter_matrix(Global_A_stl, Local_A_stl, desc, size, size, blocksize, blocksize); LocalRows = desc[8]; LocalCols = Local_A_stl.size()/desc[8]; // Generic local matrix and vectors initialization - Interface::matrix_from_stl(Local_A_ref, Local_A_stl); Interface::matrix_from_stl(Local_A , Local_A_stl); _cost = 0; @@ -69,7 +63,6 @@ public : MESSAGE("Action_parallel_cholesky destructor"); // Deallocation - Interface::free_matrix(Local_A_ref, Local_A_stl.size()); Interface::free_matrix(Local_A , Local_A_stl.size()); } @@ -86,16 +79,21 @@ public : BTL_DONT_INLINE void initialize() { - Interface::copy_matrix(Local_A_ref, Local_A, Local_A_stl.size()); } BTL_DONT_INLINE void calculate() { + Interface::copy_matrix(&Local_A_stl[0], Local_A, Local_A_stl.size()); Interface::parallel_cholesky(Local_A, desc); } BTL_DONT_INLINE void check_result() { + if (_size > 2) { + double error = Interface::test_cholesky(Global_A_stl, Local_A, desc); + if (iamroot) + cout << " {error: " << error << "} "; + } } @@ -106,8 +104,10 @@ private: typename Interface::stl_matrix Global_A_stl; typename Interface::stl_matrix Local_A_stl; - typename Interface::gene_matrix Local_A_ref; typename Interface::gene_matrix Local_A; + + typename Interface::stl_matrix Glotal_Test_stl; + typename Interface::stl_matrix Local_Test_stl; }; #endif /* ACTION_PARALLEL_CHOLESKY_HH_ */ diff --git a/btl/actions/action_parallel_lu_decomp.hh b/btl/actions/action_parallel_lu_decomp.hh index d3dc620..06602fd 100644 --- a/btl/actions/action_parallel_lu_decomp.hh +++ b/btl/actions/action_parallel_lu_decomp.hh @@ -26,16 +26,17 @@ public : // STL matrix and vector initialization if (iamroot) { - init_vector<pseudo_random>(Global_A_stl, size*size); + /* Using a constant seed */ + const unsigned seed = 3; + init_matrix(Global_A_stl, size, seed); } const int blocksize = std::max(std::min(size/4, 64), 2); - Interface::scatter_matrix(Global_A_stl, Local_A_stl, desc, size, size, blocksize, blocksize); + scatter_matrix(Global_A_stl, Local_A_stl, desc, size, size, blocksize, blocksize); LocalRows = desc[8]; LocalCols = Local_A_stl.size()/desc[8]; // Generic local matrix and vectors initialization - Interface::matrix_from_stl(Local_A_ref, Local_A_stl); Interface::matrix_from_stl(Local_A , Local_A_stl); _cost = 2.0*size*size*size/3.0 + static_cast<double>(size*size); @@ -55,7 +56,6 @@ public : MESSAGE("Action_parallel_lu_decomp destructor"); // Deallocation - Interface::free_matrix(Local_A_ref, Local_A_stl.size()); Interface::free_matrix(Local_A , Local_A_stl.size()); } @@ -72,12 +72,12 @@ public : BTL_DONT_INLINE void initialize() { - Interface::copy_matrix(Local_A_ref, Local_A, Local_A_stl.size()); } BTL_DONT_INLINE void calculate() { - Interface::parallel_lu_decomp(Local_A, desc); + Interface::copy_matrix(&Local_A_stl[0], Local_A, Local_A_stl.size()); + Interface::parallel_lu_decomp(Local_A, ipiv_stl, desc); } BTL_DONT_INLINE void check_result() @@ -100,6 +100,13 @@ public : // // Interface::free_matrix(A, _size*_size); // } + + +// if (_size > 2) { +// double error = Interface::test_LU(Global_A_stl, Local_A, desc); +// if (iamroot) +// cout << " {error: " << error << "} "; +// } } private: @@ -109,8 +116,9 @@ private: typename Interface::stl_matrix Global_A_stl; typename Interface::stl_matrix Local_A_stl; - typename Interface::gene_matrix Local_A_ref; typename Interface::gene_matrix Local_A; + + std::vector<int> ipiv_stl; }; diff --git a/btl/actions/action_parallel_qr_decomp.hh b/btl/actions/action_parallel_qr_decomp.hh index a41414c..cda1ba5 100644 --- a/btl/actions/action_parallel_qr_decomp.hh +++ b/btl/actions/action_parallel_qr_decomp.hh @@ -27,16 +27,17 @@ public : // STL matrix and vector initialization if (iamroot) { - init_vector<pseudo_random>(Global_A_stl, size*size); + /* Using a constant seed */ + const unsigned seed = 3; + init_matrix(Global_A_stl, size, seed); } const int blocksize = std::max(std::min(size/4, 64), 2); - Interface::scatter_matrix(Global_A_stl, Local_A_stl, desc, size, size, blocksize, blocksize); + scatter_matrix(Global_A_stl, Local_A_stl, desc, size, size, blocksize, blocksize); LocalRows = desc[8]; LocalCols = Local_A_stl.size()/desc[8]; // Generic local matrix and vectors initialization - Interface::matrix_from_stl(Local_A_ref, Local_A_stl); Interface::matrix_from_stl(Local_A , Local_A_stl); _cost = 2.0*size*size*size; @@ -56,7 +57,6 @@ public : MESSAGE("Action_parallel_qr_decomp destructor"); // Deallocation - Interface::free_matrix(Local_A_ref, Local_A_stl.size()); Interface::free_matrix(Local_A , Local_A_stl.size()); } @@ -73,11 +73,11 @@ public : BTL_DONT_INLINE void initialize() { - Interface::copy_matrix(Local_A_ref, Local_A, Local_A_stl.size()); } BTL_DONT_INLINE void calculate() { + Interface::copy_matrix(&Local_A_stl[0], Local_A, Local_A_stl.size()); Interface::parallel_qr_decomp(Local_A, desc); } @@ -92,7 +92,6 @@ private: typename Interface::stl_matrix Global_A_stl; typename Interface::stl_matrix Local_A_stl; - typename Interface::gene_matrix Local_A_ref; typename Interface::gene_matrix Local_A; }; diff --git a/btl/generic_bench/init/init_matrix.hh b/btl/generic_bench/init/init_matrix.hh index 67cbd20..0420c7a 100644 --- a/btl/generic_bench/init/init_matrix.hh +++ b/btl/generic_bench/init/init_matrix.hh @@ -20,6 +20,8 @@ #ifndef INIT_MATRIX_HH #define INIT_MATRIX_HH +#include "LinearCongruential.hh" + // The Vector class must satisfy the following part of STL vector concept : // resize() method // [] operator for setting element @@ -61,4 +63,30 @@ BTL_DONT_INLINE void init_matrix_symm(Matrix& A, int size){ } } +template<class Matrix> BTL_DONT_INLINE +void init_matrix(Matrix& A, const int& size, const unsigned& seed) +{ + typedef typename Matrix::value_type value_t; + A.resize(size*size); + LinearCongruential rng(seed); + for (typename Matrix::iterator i = A.begin(), end = A.end(); i != end; ++i) + *i = rng.get_01(); +} + +template<class Matrix> BTL_DONT_INLINE +void init_SPD_matrix(Matrix& A, const int& size, const unsigned& seed) +{ + typedef typename Matrix::value_type value_t; + A.resize(size*size); + LinearCongruential rng(seed); + for (int r = 0; r < size; ++r) { + A[r+size*r] = rng.get_01() + size; + for (int c = r+1; c < size; ++c) { + const value_t v = rng.get_01(); + A[r+size*c] = v; + A[c+size*r] = v; + } + } +} + #endif diff --git a/btl/libs/BLACS/blacs_interface.hh b/btl/libs/BLACS/blacs_interface.hh index 6932150..eaef8a5 100644 --- a/btl/libs/BLACS/blacs_interface.hh +++ b/btl/libs/BLACS/blacs_interface.hh @@ -76,6 +76,11 @@ public: blacs_get_(&ignored, &what, &ctxt); return ctxt; } + static int myid() { + int procnum, myid; + blacs_pinfo_(&myid, &procnum); + return myid; + } static void scatter_matrix(const stl_vector& GlobalMatrix, stl_vector& LocalMatrix, diff --git a/btl/libs/BLACS/gather.h b/btl/libs/BLACS/gather.h index 3505233..1ee8149 100644 --- a/btl/libs/BLACS/gather.h +++ b/btl/libs/BLACS/gather.h @@ -13,5 +13,20 @@ #undef TYPENAME #undef TYPEPREFIX +template<typename T> +static void gather_matrix(std::vector<T>& GlobalMatrix, const std::vector<T>& LocalMatrix, + const int* desc +) { + int GlobalRows = desc[2], GlobalCols = desc[3], + BlockRows = desc[4], BlockCols = desc[5], + LocalRows = desc[8], LocalCols = LocalMatrix.size()/desc[8]; + int ctxt; + { + int ignored, what = 0; + blacs_get_(&ignored, &what, &ctxt); + } + gather(ctxt, GlobalMatrix, LocalMatrix, GlobalRows, GlobalCols, BlockRows, BlockCols, LocalRows, LocalCols); +} + #endif /* GATHER_H_ */ diff --git a/btl/libs/BLACS/scatter.h b/btl/libs/BLACS/scatter.h index 5e6a76c..310da87 100644 --- a/btl/libs/BLACS/scatter.h +++ b/btl/libs/BLACS/scatter.h @@ -13,5 +13,32 @@ #undef TYPENAME #undef TYPEPREFIX +template<typename T> +void scatter_matrix(const std::vector<T>& GlobalMatrix, std::vector<T>& LocalMatrix, + int *desc, + const int& GlobalRows=0, const int& GlobalCols=0, + const int& BlockRows=0, const int& BlockCols=0 + ) +{ + int GlobalRows_ = GlobalRows, GlobalCols_ = GlobalCols, + BlockRows_ = BlockRows, BlockCols_ = BlockCols, + LocalRows_, LocalCols_; + int ctxt; + { + int ignored, what = 0; + blacs_get_(&ignored, &what, &ctxt); + } + scatter(ctxt, GlobalMatrix, LocalMatrix, + GlobalRows_, GlobalCols_, BlockRows_, BlockCols_, LocalRows_, LocalCols_ + ); + + const int iZERO = 0; + int info; + const int LLD = std::max(1, LocalRows_); + descinit_(desc, &GlobalRows_, &GlobalCols_, &BlockRows_, &BlockCols_, + &iZERO, &iZERO, &ctxt, &LLD, &info + ); +} + #endif /* SCATTER_H_ */ diff --git a/btl/libs/PBLAS/main.cpp b/btl/libs/PBLAS/main.cpp index f1f7d69..96046e1 100644 --- a/btl/libs/PBLAS/main.cpp +++ b/btl/libs/PBLAS/main.cpp @@ -60,7 +60,7 @@ int main(int argc, char **argv) distr_bench<Action_parallel_lu_decomp<pblas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT, !iamroot); if (cholesky) - distr_bench<Action_parallel_cholesky<pblas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT, !iamroot); + distr_bench<Action_parallel_cholesky<pblas_interface<REAL_TYPE> > >(MIN_MM, MAX_MM, NB_POINT, !iamroot); if (qr_decomp) distr_bench<Action_parallel_qr_decomp<pblas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT, !iamroot); diff --git a/btl/libs/PBLAS/pblas.h b/btl/libs/PBLAS/pblas.h index e801b8b..48dd7cc 100644 --- a/btl/libs/PBLAS/pblas.h +++ b/btl/libs/PBLAS/pblas.h @@ -42,6 +42,19 @@ extern "C" { ); + /* Level 3 */ + + // Single + void pstrmm_(const char*, const char*, const char*, const char*, const int*, const int*, const float*, + const float*, const int*, const int*, const int*, + float*, const int*, const int*, const int*); + + // Double + void pdtrmm_(const char*, const char*, const char*, const char*, const int*, const int*, const double*, + const double*, const int*, const int*, const int*, + double*, const int*, const int*, const int*); + + /************* * Scalapack * diff --git a/btl/libs/PBLAS/pblas_interface.hh b/btl/libs/PBLAS/pblas_interface.hh index cdfb70a..01459f5 100644 --- a/btl/libs/PBLAS/pblas_interface.hh +++ b/btl/libs/PBLAS/pblas_interface.hh @@ -1,3 +1,4 @@ +#include "blas.h" #include "pblas.h" #include "blacs_interface.hh" diff --git a/btl/libs/PBLAS/pblas_interface_impl.hh b/btl/libs/PBLAS/pblas_interface_impl.hh index d71d61e..d1d1c9f 100644 --- a/btl/libs/PBLAS/pblas_interface_impl.hh +++ b/btl/libs/PBLAS/pblas_interface_impl.hh @@ -1,6 +1,7 @@ #define PBLAS_PREFIX p #define PBLAS_FUNC(NAME) CAT(CAT(CAT(PBLAS_PREFIX, SCALAR_PREFIX),NAME),_) +#define BLAS_FUNC(NAME) CAT(CAT(SCALAR_PREFIX,NAME),_) template<> class pblas_interface<SCALAR> : public blacs_interface<SCALAR> { @@ -45,26 +46,26 @@ public: } - static inline void parallel_lu_decomp(gene_matrix& X, const int* desc) + static inline void parallel_lu_decomp(gene_matrix& X, std::vector<int>& ipiv, const int* desc) { const int GlobalRows = desc[2], GlobalCols = desc[3]; const int iONE = 1; int info; - std::vector<int> ipiv(desc[8] + desc[4]); + ipiv.resize(desc[8] + desc[4]); PBLAS_FUNC(getrf)(&GlobalRows, &GlobalCols, X, &iONE, &iONE, desc, &ipiv[0], &info); -// if (info != 0) -// cerr << " { LU error : " << info << " } "; + if (info != 0 && myid() == 0) + cout << " { LU error : " << info << " } "; } static inline void parallel_cholesky(gene_matrix& X, const int* desc) { const int N = desc[2], iONE = 1; - const char UPLO = 'U'; + const char UP = 'U'; int info; - PBLAS_FUNC(potrf)(&UPLO, &N, X, &iONE, &iONE, desc, &info); -// if (info != 0) -// cerr << " { cholesky error : " << info << " } "; + PBLAS_FUNC(potrf)(&UP, &N, X, &iONE, &iONE, desc, &info); + if (info != 0 && myid() == 0) + cout << " { cholesky error : " << info << " } "; } static inline void parallel_qr_decomp(gene_matrix& X, const int* desc) @@ -86,13 +87,13 @@ public: // Retrieve LWORK PBLAS_FUNC(geqpf)(&GlobalRows, &GlobalCols, X, &iONE, &iONE, desc, &ipiv[0], &tau[0], &lworkd, &imONE, &info); lwork = static_cast<int>(lworkd); -// if (info != 0) -// cerr << " { qr_decomp lwork error } "; + if (info != 0 && myid() == 0) + cout << " { qr_decomp lwork error } "; std::vector<SCALAR> work(lwork); PBLAS_FUNC(geqpf)(&GlobalRows, &GlobalCols, X, &iONE, &iONE, desc, &ipiv[0], &tau[0], &work[0], &lwork, &info); -// if (info != 0) -// cerr << " { qr_decomp computation error } "; + if (info != 0 && myid() == 0) + cerr << " { qr_decomp computation error } "; } static inline void parallel_symm_ev(gene_matrix& A, const int* descA, gene_vector& w, gene_matrix& Z, const int* descZ) @@ -109,13 +110,13 @@ public: Z, &iONE, &iONE, descZ, &lworkd, &imONE, &liwork, &imONE, &info); lwork = static_cast<int>(lworkd); work.resize(lwork); iwork.resize(liwork); -// if (info != 0) -// cerr << " { symm_ev l(i)work error } "; + if (info != 0 && myid() == 0) + cout << " { symm_ev l(i)work error } "; PBLAS_FUNC(syevd)(&jobz, &uplo, &N, A, &iONE, &iONE, descA, w, Z, &iONE, &iONE, descZ, &work[0], &lwork, &iwork[0], &liwork, &info); -// if (info != 0) -// cerr << " { symm_ev computation error } "; + if (info != 0 && myid() == 0) + cout << " { symm_ev computation error } "; } static inline void parallel_svd_decomp(gene_matrix& A, int* descA, gene_matrix& U, int *descU, gene_matrix& V, int *descV, gene_vector& s) @@ -129,14 +130,100 @@ public: // Retrieve lwork PBLAS_FUNC(gesvd)(&job, &job, &size, &size, A, &iONE, &iONE, descA, s, U, &iONE, &iONE, descU, V, &iONE, &iONE, descV, &lworkd, &imONE, &info); -// if (info != 0) -// cerr << " { svd_decomp lwork error } "; + if (info != 0 && myid() == 0) + cout << " { svd_decomp lwork error } "; lwork = static_cast<int>(lworkd); work.resize(lwork); PBLAS_FUNC(gesvd)(&job, &job, &size, &size, A, &iONE, &iONE, descA, s, U, &iONE, &iONE, descU, V, &iONE, &iONE, descV, &work[0], &lwork, &info); -// if (info != 0) -// cerr << " { svd_decomp computation error } "; + if (info != 0 && myid() == 0) + cout << " { svd_decomp computation error } "; + } + + static inline real_type + test_LU(stl_matrix& Global_A, gene_matrix& Local_LU, int *desc) + { + bool iamroot = myid() == 0; + int _size = desc[2]; + + // Create and scatter Identity + int Testdesc[9]; + stl_matrix Global_Test_stl, Local_Test_stl; + if (iamroot) + { + stl_matrix Identity(_size * _size); + for (int r = 0; r < _size; ++r) + Identity[r + _size * r] = 1; + scatter_matrix(Identity, Local_Test_stl, Testdesc, _size, _size, + desc[4], desc[5]); + } + else + scatter_matrix(stl_matrix(), Local_Test_stl, Testdesc); + + // Compute L * U + real_type alpha = 1., malpha = -1; + int iONE = 1; + PBLAS_FUNC(trmm)("L", "L", "N", "N", desc + 2, desc + 2, &alpha, Local_LU, + &iONE, &iONE, desc, &Local_Test_stl[0], &iONE, &iONE, Testdesc); + PBLAS_FUNC(trmm)("R", "U", "N", "N", desc + 2, desc + 2, &alpha, Local_LU, + &iONE, &iONE, desc, &Local_Test_stl[0], &iONE, &iONE, Testdesc); + + // Gather result + gather_matrix(Global_Test_stl, Local_Test_stl, desc); + if (iamroot) + { + int size2 = _size*_size; + BLAS_FUNC(axpy)(&size2, &malpha, &Global_A[0], &iONE, + &Global_Test_stl[0], &iONE); + double error = BLAS_FUNC(nrm2)(&size2, &Global_Test_stl[0], &iONE); + error /= BLAS_FUNC(nrm2)(&size2, &Global_A[0], &iONE); + return error; + } + else + return 0.; + } + + static inline real_type + test_cholesky(stl_matrix& Global_A, gene_matrix& Local_U, int *desc) + { + bool iamroot = myid() == 0; + int _size = desc[2]; + + // Create and scatter Identity + int Testdesc[9]; + stl_matrix Global_Test_stl, Local_Test_stl; + if (iamroot) + { + stl_matrix Identity(_size * _size); + for (int r = 0; r < _size; ++r) + Identity[r + _size * r] = 1; + scatter_matrix(Identity, Local_Test_stl, Testdesc, _size, _size, + desc[4], desc[5]); + } + else + scatter_matrix(stl_matrix(), Local_Test_stl, Testdesc); + + // Compute U' * U + real_type alpha = 1., malpha = -1; + int iONE = 1; + PBLAS_FUNC(trmm)("L", "U", "T", "N", desc + 2, desc + 2, &alpha, Local_U, + &iONE, &iONE, desc, &Local_Test_stl[0], &iONE, &iONE, Testdesc); + PBLAS_FUNC(trmm)("R", "U", "N", "N", desc + 2, desc + 2, &alpha, Local_U, + &iONE, &iONE, desc, &Local_Test_stl[0], &iONE, &iONE, Testdesc); + + // Gather result + gather_matrix(Global_Test_stl, Local_Test_stl, desc); + if (iamroot) + { + int size2 = _size*_size; + BLAS_FUNC(axpy)(&size2, &malpha, &Global_A[0], &iONE, + &Global_Test_stl[0], &iONE); + double error = BLAS_FUNC(nrm2)(&size2, &Global_Test_stl[0], &iONE); + error /= BLAS_FUNC(nrm2)(&size2, &Global_A[0], &iONE); + return error; + } + else + return 0.; } }; |