src/cudaSolvers/cudaCG/cgAinv.cu

   1 /**********************************************************************\
   2   ______  __    __   _______  _______  __       __   __   __   __  ___
   3  /      ||  |  |  | |   ____||   ____||  |     |  | |  \ |  | |  |/  /
   4 |  ,----'|  |  |  | |  |__   |  |__   |  |     |  | |   \|  | |  '  /
   5 |  |     |  |  |  | |   __|  |   __|  |  |     |  | |  . `  | |    <
   6 |  `----.|  `--'  | |  |     |  |     |  `----.|  | |  |\   | |  .  \
   7  \______| \______/  |__|     |__|     |_______||__| |__| \__| |__|\__\
   8
   9 Cuda For FOAM Link
  10
  11 cufflink is a library for linking numerical methods based on Nvidia's
  12 Compute Unified Device Architecture (CUDA™) C/C++ programming language
  13 and OpenFOAM®.
  14
  15 Please note that cufflink is not approved or endorsed by ESI-OpenCFD®
  16 Limited, the owner of the OpenFOAM® and OpenCFD® trademarks and
  17 producer of OpenFOAM® software.
  18
  19 The official web-site of OpenCFD® Limited is www.openfoam.com .
  20
  21 ------------------------------------------------------------------------
  22 This file is part of cufflink.
  23
  24     cufflink is free software: you can redistribute it and/or modify
  25     it under the terms of the GNU General Public License as published by
  26     the Free Software Foundation, either version 3 of the License, or
  27     (at your option) any later version.
  28
  29     cufflink is distributed in the hope that it will be useful,
  30     but WITHOUT ANY WARRANTY; without even the implied warranty of
  31     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  32     GNU General Public License for more details.
  33
  34     You should have received a copy of the GNU General Public License
  35     along with cufflink.  If not, see <http://www.gnu.org/licenses/>.
  36
  37     Author
  38     Daniel P. Combest.  All rights reserved.
  39     Modifications by Dominik Christ, Wikki Ltd.
  40
  41     Description
  42     diagonal preconditioned conjugate gradient
  43     solver for symmetric Matrices using a CUSP CUDA™ based solver.
  44
  45 \**********************************************************************/
  46
  47 #include "cudaTypes.H"
  48
  49 // CUSP Includes
  50 #include <cusp/detail/config.h>
  51 #include <cusp/verify.h>
  52 #include <cusp/precond/ainv.h>
  53 #include <cusp/coo_matrix.h>
  54 #include <cusp/blas.h>
  55 #include <cusp/multiply.h>
  56
  57 extern "C" void cgAinv
  58 (
  59     cuspEquationSystem* ces,
  60     cudaSolverPerformance* solverPerf,
  61     ValueType drop_tolerance,
  62     int nonzero_per_row,
  63     bool lin_dropping,
  64     int lin_param
  65 )
  66 {
  67     // Populate A
  68     #include "fillCOOMatrix.H"
  69
  70     // Set storage
  71     // #include "cufflink/setGPUStorage.H"
  72
  73     if (solverPerf->debugCusp)
  74     {
  75         std::cout<<"   Using Ainv preconditioner\n";
  76     }
  77
  78     cusp::precond::bridson_ainv<ValueType, MemorySpace> M
  79     (
  80         A,
  81         drop_tolerance,
  82         nonzero_per_row,
  83         lin_dropping,
  84         lin_param
  85     );
  86
  87      // Start the krylov solver
  88     assert(A.num_rows == A.num_cols); // sanity check
  89
  90     const size_t N = A.num_rows;
  91
  92     // allocate workspace
  93     cusp::array1d<ValueType,MemorySpace> y(N);
  94     cusp::array1d<ValueType,MemorySpace> z(N);
  95     cusp::array1d<ValueType,MemorySpace> r(N);
  96     cusp::array1d<ValueType,MemorySpace> p(N);
  97
  98     // y <- Ax
  99     cusp::multiply(A, X, y);
 100
 101     // Define the normalization factor
 102     ValueType normFactor = 1.0;
 103
 104 #   include "buildNormFactor.H"
 105
 106     // r <- b - A*x
 107     cusp::blas::axpby(B, y, r, ValueType(1), ValueType(-1));
 108
 109     // z <- M*r
 110     cusp::multiply(M, r, z);
 111
 112     // p <- z
 113     cusp::blas::copy(z, p);
 114
 115     // rz = <r^H, z>
 116     ValueType rz = cusp::blas::dotc(r, z);
 117
 118     ValueType normR = cusp::blas::nrm2(r)/normFactor;
 119     ValueType normR0 = normR;   // Initial residual
 120     solverPerf->iRes = normR0;
 121     int count = 0;
 122
 123     while
 124     (
 125         normR > (solverPerf->tol)
 126      && count < (solverPerf->maxIter)
 127      && normR/normR0 >= (solverPerf->relTol)
 128      || count < solverPerf->minIter
 129     )
 130     {
 131         // y <- Ap
 132         cusp::multiply(A, p, y);
 133
 134         // alpha <- <r,z>/<y,p>
 135         ValueType alpha =  rz / cusp::blas::dotc(y, p);
 136
 137         // x <- x + alpha * p
 138         cusp::blas::axpy(p, X, alpha);
 139
 140         // r <- r - alpha * y
 141         cusp::blas::axpy(y, r, -alpha);
 142
 143         // z <- M*r
 144         cusp::multiply(M, r, z);
 145
 146         ValueType rz_old = rz;
 147
 148         // rz = <r^H, z>
 149         rz = cusp::blas::dotc(r, z);
 150
 151         // beta <- <r_{i+1},r_{i+1}>/<r,r>
 152         ValueType beta = rz / rz_old;
 153
 154         // p <- r + beta*p should be p <- z + beta*p
 155         cusp::blas::axpby(z, p, p, ValueType(1), beta);
 156
 157         normR = cusp::blas::nrm2(r)/normFactor;
 158
 159         count++;
 160     }
 161     // End the krylov solver
 162
 163     // Final residual
 164     solverPerf->fRes = normR;
 165     solverPerf->nIterations = count;
 166
 167     // Converged?
 168     if
 169     (
 170         solverPerf->fRes<=solverPerf->tol
 171      || solverPerf->fRes/solverPerf->iRes<=solverPerf->relTol
 172     )
 173     {
 174         solverPerf->converged = true;
 175     }
 176     else
 177     {
 178         solverPerf->converged = false;
 179     }
 180
 181     // Pass the solution vector back
 182     ces->X = X;
 183 }