diff options
author | spiros <andyspiros@gmail.com> | 2011-07-19 04:12:01 +0200 |
---|---|---|
committer | spiros <andyspiros@gmail.com> | 2011-07-19 04:12:01 +0200 |
commit | 75c943fd4c2d2ce6454732cc90df910f3e73b182 (patch) | |
tree | 1c9908e305a75cc763648813e524ee71bb407efe | |
parent | Support for result checking in parallel matrix-vector. It works! (diff) | |
download | auto-numerical-bench-75c943fd4c2d2ce6454732cc90df910f3e73b182.tar.gz auto-numerical-bench-75c943fd4c2d2ce6454732cc90df910f3e73b182.tar.bz2 auto-numerical-bench-75c943fd4c2d2ce6454732cc90df910f3e73b182.zip |
Updated much of the parallel BTL. Added first version of (almost
working) pblas/scalapck python module.
-rw-r--r-- | btl/actions/action_parallel_axpy.hh | 141 | ||||
-rw-r--r-- | btl/actions/action_parallel_matrix_vector_product.hh | 62 | ||||
-rw-r--r-- | btl/generic_bench/timers/distributed_perf_analyzer_root.hh | 2 | ||||
-rw-r--r-- | btl/libs/BLACS/gather.h | 10 | ||||
-rw-r--r-- | btl/libs/BLACS/gather_impl.h | 9 | ||||
-rw-r--r-- | btl/libs/BLACS/scatter_impl.h | 5 | ||||
-rw-r--r-- | btl/libs/BLAS/blas.h | 6 | ||||
-rw-r--r-- | btl/libs/PBLAS/main.cpp | 6 | ||||
-rw-r--r-- | btl/libs/PBLAS/pblas.h | 49 | ||||
-rw-r--r-- | btl/libs/PBLAS/pblas_interface_impl.hh | 13 | ||||
-rw-r--r-- | btl/libs/STL/STL_interface.hh | 11 | ||||
-rw-r--r-- | btlbase.py | 8 | ||||
-rw-r--r-- | pblas.py | 67 |
13 files changed, 307 insertions, 82 deletions
diff --git a/btl/actions/action_parallel_axpy.hh b/btl/actions/action_parallel_axpy.hh new file mode 100644 index 0000000..905909d --- /dev/null +++ b/btl/actions/action_parallel_axpy.hh @@ -0,0 +1,141 @@ +#ifndef ACTION_PARALLEL_AXPY +#define ACTION_PARALLEL_AXPY +#include "utilities.h" +#include "STL_interface.hh" +#include <string> +#include <algorithm> +#include "init/init_function.hh" +#include "init/init_vector.hh" +#include "init/init_matrix.hh" + +#include "blas.h" + +using namespace std; + +template<class Interface> +class Action_parallel_axpy { +public: + BTL_DONT_INLINE Action_parallel_axpy( int size ):_size(size), _coef(1.) + { + MESSAGE("Action_parallel_axpy Ctor"); + int iZERO = 0, iONE = 1; + + GlobalRows = _size; + GlobalCols = 1; + BlockRows = 2; + BlockCols= 1; + + int myid, procnum; + blacs_pinfo_(&myid, &procnum); + iamroot = (myid == 0); + + // STL matrix and vector initialization + if (iamroot) { + init_vector<pseudo_random>(Global_x_stl, _size); + init_vector<pseudo_random>(Global_y_stl, _size); + } + + Interface::scatter_matrix(Global_x_stl, Local_x_stl, GlobalRows, GlobalCols, BlockRows, BlockCols, LocalRows, LocalCols); + Interface::scatter_matrix(Global_y_stl, Local_y_stl, GlobalRows, GlobalCols, BlockRows, BlockCols, LocalRows, LocalCols); + + Interface::vector_from_stl(Local_x_ref, Local_x_stl); + Interface::vector_from_stl(Local_x , Local_x_stl); + Interface::vector_from_stl(Local_y_ref, Local_y_stl); + Interface::vector_from_stl(Local_y , Local_y_stl); + + // Descinit + int context = Interface::context(); + int info; + int LD = std::max(1, LocalRows); + descinit_(descX, &_size, &iONE, &BlockRows, &BlockCols, &iZERO, &iZERO, &context, &LD, &info); + descinit_(descY, &_size, &iONE, &BlockRows, &BlockCols, &iZERO, &iZERO, &context, &LD, &info); + + // Copy Y to Test_y + Test_y_stl = Global_y_stl; + } + + // Invalidate copy constructor + Action_parallel_axpy (const Action_parallel_axpy&) + { + INFOS("illegal call to Action_parallel_axpy Copy Ctor"); + exit(1); + } + + // Destructor + BTL_DONT_INLINE ~Action_parallel_axpy(){ + + MESSAGE("Action_parallel_axpy Dtor"); + + // deallocation + + Interface::free_vector(Local_x_ref); + Interface::free_vector(Local_y_ref); + + Interface::free_vector(Local_x); + Interface::free_vector(Local_y); + } + + // action name + static inline std::string name() + { + return "axpy_" + Interface::name(); + } + + double nb_op_base( void ){ + return 2.0*_size; + } + + BTL_DONT_INLINE void initialize(){ + Interface::copy_vector(Local_x_ref, Local_x, LocalRows*LocalCols); + Interface::copy_vector(Local_y_ref, Local_y, LocalRows*LocalCols); + } + + BTL_DONT_INLINE void calculate( void ) { + BTL_ASM_COMMENT("#begin axpy"); + Interface::parallel_axpy(_coef, Local_x, descX, Local_y, descY, _size); + BTL_ASM_COMMENT("end axpy"); + } + + void check_result( void ){ + int iONE = 1; + Interface::vector_to_stl(Local_y, Local_y_stl); + Interface::gather_matrix(Global_y_stl, Local_y_stl, GlobalRows, GlobalCols, BlockRows, BlockCols, LocalRows, LocalCols); + + // calculation check + if (iamroot) { + + // Compute YTest + STL_interface<typename Interface::real_type>::axpy(_coef, Global_x_stl, Test_y_stl, _size); + + typename Interface::real_type error = + STL_interface<typename Interface::real_type>::norm_diff(Global_y_stl, Test_y_stl); + + if (error > 1e-5) + std::cerr << "Error: " << error << std::endl; + } + + } + +private: + int _size; + int GlobalRows, GlobalCols, BlockRows, BlockCols, LocalRows, LocalCols; + typename Interface::real_type _coef; + bool iamroot; + + typename Interface::stl_vector Global_x_stl; + typename Interface::stl_vector Global_y_stl; + typename Interface::stl_vector Test_y_stl; + + typename Interface::stl_vector Local_x_stl; + typename Interface::stl_vector Local_y_stl; + + typename Interface::gene_vector Local_x_ref; + typename Interface::gene_vector Local_y_ref; + + typename Interface::gene_vector Local_x; + typename Interface::gene_vector Local_y; + + int descX[9], descY[9]; +}; + +#endif diff --git a/btl/actions/action_parallel_matrix_vector_product.hh b/btl/actions/action_parallel_matrix_vector_product.hh index bac8bc3..5c97b1d 100644 --- a/btl/actions/action_parallel_matrix_vector_product.hh +++ b/btl/actions/action_parallel_matrix_vector_product.hh @@ -1,24 +1,5 @@ -//===================================================== -// File : action_matrix_vector_product.hh -// Author : L. Plagne <laurent.plagne@edf.fr)> -// Copyright (C) EDF R&D, lun sep 30 14:23:19 CEST 2002 -//===================================================== -// -// This program is free software; you can redistribute it and/or -// modify it under the terms of the GNU General Public License -// as published by the Free Software Foundation; either version 2 -// of the License, or (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -// -#ifndef ACTION_MATRIX_VECTOR_PRODUCT -#define ACTION_MATRIX_VECTOR_PRODUCT +#ifndef ACTION_PARALLEL_MATRIX_VECTOR_PRODUCT +#define ACTION_PARALLEL_MATRIX_VECTOR_PRODUCT #include "utilities.h" #include "STL_interface.hh" #include <string> @@ -27,10 +8,7 @@ #include "init/init_vector.hh" #include "init/init_matrix.hh" -#include <complex> -extern "C" { #include "blas.h" -} using namespace std; @@ -62,16 +40,6 @@ public : init_vector<pseudo_random>(Global_A_stl, GlobalRows*GlobalCols); init_vector<pseudo_random>(Global_x_stl, GlobalCols); init_vector<null_function>(Global_y_stl, GlobalRows); - - // Compute YTest (?) - Test_y_stl.resize(GlobalRows); - double alpha = 1., beta = 0.; - char notrans = 'N'; - dgemv_(¬rans, &GlobalRows, &GlobalCols, - &alpha, &Global_A_stl[0], &GlobalRows, - &Global_x_stl[0], &iONE, - &beta, &Test_y_stl[0], &iONE - ); } Interface::scatter_matrix(Global_A_stl, Local_A_stl, GlobalRows, GlobalCols, BlockRows, BlockCols, LocalRows, LocalCols); @@ -113,11 +81,11 @@ public : // deallocation - Interface::free_matrix(Local_A_ref, GlobalRows*GlobalCols);; + Interface::free_matrix(Local_A_ref, GlobalRows*GlobalCols); Interface::free_vector(Local_x_ref); Interface::free_vector(Local_y_ref); - Interface::free_matrix(Local_A, GlobalRows*GlobalCols);; + Interface::free_matrix(Local_A, GlobalRows*GlobalCols); Interface::free_vector(Local_x); Interface::free_vector(Local_y); @@ -126,7 +94,7 @@ public : // action name static inline std::string name( void ) { - return "parallel_matrix_vector_" + Interface::name(); + return "matrix_vector_" + Interface::name(); } double nb_op_base( void ){ @@ -134,11 +102,9 @@ public : } BTL_DONT_INLINE void initialize( void ){ - Interface::copy_matrix(Local_A_ref,Local_A,LocalRows*LocalCols); Interface::copy_vector(Local_x_ref,Local_x,LocalXRows*LocalXCols); Interface::copy_vector(Local_y_ref,Local_y,LocalYRows*LocalYCols); - } BTL_DONT_INLINE void calculate( void ) { @@ -148,8 +114,6 @@ public : } BTL_DONT_INLINE void check_result( void ){ - int iONE = 1; - double dmONE = -1.; int GlobalYCols; Interface::vector_to_stl(Local_y, Local_y_stl); @@ -157,11 +121,16 @@ public : // calculation check if (iamroot) { - daxpy_(&GlobalRows, &dmONE, &Global_y_stl[0], &iONE, &Test_y_stl[0], &iONE); - double nrm = dnrm2_(&GlobalRows, &Test_y_stl[0], &iONE); - if (nrm > 1e-5) - std::cerr << "Error: " << nrm << std::endl; + // Compute YTest + Test_y_stl.resize(GlobalRows); + STL_interface<typename Interface::real_type>::matrix_vector_product(Global_A_stl, Global_x_stl, Test_y_stl, _size); + + typename Interface::real_type error = + STL_interface<typename Interface::real_type>::norm_diff(Global_y_stl, Test_y_stl); + + if (error > 1e-5) + std::cerr << "Error: " << error << std::endl; } } @@ -194,6 +163,3 @@ private : #endif - - - diff --git a/btl/generic_bench/timers/distributed_perf_analyzer_root.hh b/btl/generic_bench/timers/distributed_perf_analyzer_root.hh index ca59738..98a08ef 100644 --- a/btl/generic_bench/timers/distributed_perf_analyzer_root.hh +++ b/btl/generic_bench/timers/distributed_perf_analyzer_root.hh @@ -52,7 +52,7 @@ public: double time_action = m_time_action / (double(_nb_calc)); /* Check */ - int do_check = (BtlConfig::Instance.checkResults && size<128) ? 1 : 0; + int do_check = (BtlConfig::Instance.checkResults && size<128) ? 1 : 1; igebs2d_(&context, "A", " ", &iONE, &iONE, &do_check, &iONE); if (do_check > 0) { action.initialize(); diff --git a/btl/libs/BLACS/gather.h b/btl/libs/BLACS/gather.h index 101d975..3505233 100644 --- a/btl/libs/BLACS/gather.h +++ b/btl/libs/BLACS/gather.h @@ -1,13 +1,17 @@ #ifndef GATHER_H_ #define GATHER_H_ -//#define TYPENAME float -//#define TYPEPREFIX s -//#include "gather_impl.h" +#define TYPENAME float +#define TYPEPREFIX s +#include "gather_impl.h" +#undef TYPENAME +#undef TYPEPREFIX #define TYPENAME double #define TYPEPREFIX d #include "gather_impl.h" +#undef TYPENAME +#undef TYPEPREFIX #endif /* GATHER_H_ */ diff --git a/btl/libs/BLACS/gather_impl.h b/btl/libs/BLACS/gather_impl.h index 92a52f3..e9b5c6c 100644 --- a/btl/libs/BLACS/gather_impl.h +++ b/btl/libs/BLACS/gather_impl.h @@ -2,11 +2,6 @@ #define CAT_(x,y) x##y #define CAT(x,y) CAT_(x,y) -#ifndef TYPENAME -# define TYPENAME double -# define TYPEPREFIX d -#endif - #define FUNCNAME(name) CAT(CAT(TYPEPREFIX, name),_) #define vector_t std::vector<TYPENAME> @@ -33,8 +28,8 @@ inline void gather( blacs_pinfo_(&myid, &procnum); blacs_gridinfo_(&context, &procrows, &proccols, &myrow, &mycol); bool iamroot = (myrow == rootrow && mycol == rootcol); - double *GlobalMatrix; - const double *LocalMatrix = &LocalMatrixVector[0]; + TYPENAME *GlobalMatrix; + const TYPENAME *LocalMatrix = &LocalMatrixVector[0]; /* Broadcast matrix info */ int binfo[2]; diff --git a/btl/libs/BLACS/scatter_impl.h b/btl/libs/BLACS/scatter_impl.h index 5c82ea5..7ff633f 100644 --- a/btl/libs/BLACS/scatter_impl.h +++ b/btl/libs/BLACS/scatter_impl.h @@ -1,11 +1,6 @@ #define CAT_(x,y) x##y #define CAT(x,y) CAT_(x,y) -//#ifndef TYPENAME -//# define TYPENAME double -//# define TYPEPREFIX d -//#endif - #define FUNCNAME(name) CAT(CAT(TYPEPREFIX, name),_) #define vector_t std::vector<TYPENAME> diff --git a/btl/libs/BLAS/blas.h b/btl/libs/BLAS/blas.h index 28f3a4e..67e02e4 100644 --- a/btl/libs/BLAS/blas.h +++ b/btl/libs/BLAS/blas.h @@ -11,6 +11,10 @@ typedef long BLASLONG; typedef unsigned long BLASULONG; #endif +#include <complex> + +extern "C" { + int BLASFUNC(xerbla)(const char *, int *info, int); float BLASFUNC(sdot) (int *, float *, int *, float *, int *); @@ -672,4 +676,6 @@ int BLASFUNC(cpotri)(char *, int *, float *, int *, int *); int BLASFUNC(zpotri)(char *, int *, double *, int *, int *); int BLASFUNC(xpotri)(char *, int *, double *, int *, int *); +} + #endif diff --git a/btl/libs/PBLAS/main.cpp b/btl/libs/PBLAS/main.cpp index 33a4f96..4b64f12 100644 --- a/btl/libs/PBLAS/main.cpp +++ b/btl/libs/PBLAS/main.cpp @@ -8,6 +8,7 @@ #include "blacsinit.hh" #include "pblas_interface.hh" #include "action_parallel_matrix_vector_product.hh" +#include "action_parallel_axpy.hh" #include <string> @@ -17,8 +18,9 @@ int main(int argc, char **argv) { bool iamroot = blacsinit(&argc, &argv); - distr_bench<Action_parallel_matrix_vector_product<pblas_interface<double> > >(10,MAX_MV,NB_POINT,!iamroot); -// Action_parallel_matrix_vector_product<pblas_interface<double> > action(3000); +// distr_bench<Action_parallel_matrix_vector_product<pblas_interface<double> > >(10,MAX_MV,NB_POINT,!iamroot); + distr_bench<Action_parallel_axpy<pblas_interface<REAL_TYPE> > >(10,MAX_MV,NB_POINT,!iamroot); +// Action_parallel_axpy<pblas_interface<double> > action(8); // action.initialize(); // action.calculate(); // action.check_result(); diff --git a/btl/libs/PBLAS/pblas.h b/btl/libs/PBLAS/pblas.h index 4144292..adc6c91 100644 --- a/btl/libs/PBLAS/pblas.h +++ b/btl/libs/PBLAS/pblas.h @@ -5,19 +5,42 @@ extern "C" { #endif - /* PBLAS declarations */ - void pdgemv_(const char*, const int*, const int*, - const double*, const double*, const int*, const int*, const int*, - const double*, const int*, const int*, const int*, const int*, - const double*, double*, const int*, const int*, const int*, const int*); - void psgemv_(const char*, const int*, const int*, - const float*, const float*, const int*, const int*, const int*, - const float*, const int*, const int*, const int*, const int*, - const float*, float*, const int*, const int*, const int*, const int*); - - - int numroc_(const int*, const int*, const int*, const int*, const int*); - int descinit_(const int*, const int*, const int*, const int*, const int*, const int*, const int*, const int*, const int*, int*); + int numroc_(const int*, const int*, const int*, const int*, const int*); + int descinit_(const int*, const int*, const int*, const int*, const int*, const int*, const int*, const int*, const int*, int*); + + + /* Level 1 */ + + // Single + void psaxpy_(const int*, const float*, + const float*, const int*, const int*, const int*, const int*, + const float*, const int*, const int*, const int*, const int* + ); + + // Double + void pdaxpy_(const int*, const double*, + const double*, const int*, const int*, const int*, const int*, + const double*, const int*, const int*, const int*, const int* + ); + + + + /* Level 2 */ + + // Single + void psgemv_(const char*, const int*, const int*, + const float*, const float*, const int*, const int*, const int*, + const float*, const int*, const int*, const int*, const int*, + const float*, float*, const int*, const int*, const int*, const int* + ); + + // Double + void pdgemv_(const char*, const int*, const int*, + const double*, const double*, const int*, const int*, const int*, + const double*, const int*, const int*, const int*, const int*, + const double*, double*, const int*, const int*, const int*, const int* + ); + #ifdef __cplusplus } diff --git a/btl/libs/PBLAS/pblas_interface_impl.hh b/btl/libs/PBLAS/pblas_interface_impl.hh index 14b27df..b534a4e 100644 --- a/btl/libs/PBLAS/pblas_interface_impl.hh +++ b/btl/libs/PBLAS/pblas_interface_impl.hh @@ -10,6 +10,19 @@ public: return MAKE_STRING(PBLASNAME); } + static inline void parallel_axpy(const SCALAR& coef, + gene_vector& x, int *descX, + gene_vector& y, int *descY, + const int& size + ) + { + int iZERO = 0, iONE = 1; + PBLAS_FUNC(axpy)(&size, &coef, + x, &iONE, &iONE, descX, &iONE, + y, &iONE, &iONE, descY, &iONE + ); + } + static inline void parallel_matrix_vector_product( int GlobalRows, int GlobalCols, gene_matrix& A, int *descA, diff --git a/btl/libs/STL/STL_interface.hh b/btl/libs/STL/STL_interface.hh index 93e76bd..060cb69 100644 --- a/btl/libs/STL/STL_interface.hh +++ b/btl/libs/STL/STL_interface.hh @@ -133,6 +133,17 @@ public : } } + static inline void matrix_vector_product(gene_vector& A, gene_vector & B, gene_vector & X, int N) + { + real somme; + for (int i=0;i<N;i++){ + somme=0.0; + for (int j=0;j<N;j++) + somme+=A[j*N+i]*B[j]; + X[i]=somme; + } + } + static inline void symv(gene_matrix & A, gene_vector & B, gene_vector & X, int N) { for (int j=0; j<N; ++j) @@ -97,19 +97,21 @@ class BTLTest(basemodule.BaseTest): logfile.close() return proc.returncode, exe, logfile.name - def _executeTest(self, exe): + def _executeTest(self, exe, preargs=[]): # Log dynamic link lddlogfile = file(pjoin(self.logdir, 'ldd.log'), 'w') sp.Popen(['ldd', '-v', exe], stdout=lddlogfile, env=self.runenv).wait() # Open pipe logfile = file(pjoin(self.logdir, 'btlrun.log'), 'w') - args = [exe] + list(self.tests) + args = preargs + [exe] + list(self.tests) logfile.write(' '.join([n+'='+v for n,v in self.runenv.items()]) + ' ') logfile.write(' '.join(args) + '\n') logfile.write(80*'-' + '\n') proc = sp.Popen(args, bufsize=1, stdout=sp.PIPE, stderr=sp.PIPE, - env=self.runenv, cwd=self.testdir) + #env=self.runenv, + env={'LD_LIBRARY_PATH' : self.runenv['LD_LIBRARY_PATH']}, + cwd=self.testdir) # Interpret output while True: diff --git a/pblas.py b/pblas.py new file mode 100644 index 0000000..e1ec738 --- /dev/null +++ b/pblas.py @@ -0,0 +1,67 @@ +import os, btlbase, shlex, subprocess as sp + +numproc = 4 + +class Module(btlbase.BTLBase): + def _initialize(self): + self.libname = "scalapack" + self.avail = ['axpy', 'matrix_vector'] + + def _parse_args(self, args): + # Parse arguments + tests = [] + skip = 0 + + for i, a in enumerate(args): + if skip != 0: + skip -= 1 + continue + if a == '-n': + skip += 1 + numproc = args[i+1] + continue + if a in self.avail: + tests.append(a) + continue + raise Exception("Argument not recognized: " + a) + + # Sort tests + self.tests = [i for i in self.avail if i in tests] + + # If no test is specified, then choose four standard tests + if len(self.tests) == 0: + self.tests = self.avail + + btlbase.BTLBase._parse_args(self, args) + + @staticmethod + def _testClass(): + return PBLASTest + + +class PBLASTest(btlbase.BTLTest): +# def __init__(self, *args, **kwargs): +# os.environ['CXX'] = 'mpic++' +# btlbase.BTLTest.__init__(self, *args, **kwargs) + + def _get_flags(self): + proc = sp.Popen(['mpic++', '-showme'], stdout=sp.PIPE) + out = proc.communicate()[0] + if proc.returncode != 0: + raise Exception("An MPI implementation is needed in order to" \ + + " run the scalapack tests.") + return shlex.split(out)[1:] + btlbase.BTLTest._get_flags(self) + + def _executeTest(self, exe): + btlbase.BTLTest._executeTest(self, exe, ['mpirun', '-n', str(numproc)]) + + @staticmethod + def _btl_source(): + return "libs/PBLAS/main.cpp" + + @staticmethod + def _btl_includes(): + return ["libs/BLAS", "libs/BLACS", "libs/PBLAS"] + + def _btl_defines(self): + return ["PBLASNAME="+self.libname]
\ No newline at end of file |