src/cudaSolvers/cudaBiCGStab/bicgAinv.cu

   1 /**********************************************************************\
   2   ______  __    __   _______  _______  __       __   __   __   __  ___
   3  /      ||  |  |  | |   ____||   ____||  |     |  | |  \ |  | |  |/  /
   4 |  ,----'|  |  |  | |  |__   |  |__   |  |     |  | |   \|  | |  '  /
   5 |  |     |  |  |  | |   __|  |   __|  |  |     |  | |  . `  | |    <
   6 |  `----.|  `--'  | |  |     |  |     |  `----.|  | |  |\   | |  .  \
   7  \______| \______/  |__|     |__|     |_______||__| |__| \__| |__|\__\
   8
   9 Cuda For FOAM Link
  10
  11 cufflink is a library for linking numerical methods based on Nvidia's
  12 Compute Unified Device Architecture (CUDA™) C/C++ programming language
  13 and OpenFOAM®.
  14
  15 Please note that cufflink is not approved or endorsed by OpenCFD®
  16 Limited, the owner of the OpenFOAM® and OpenCFD® trademarks and
  17 producer of OpenFOAM® software.
  18
  19 The official web-site of OpenCFD® Limited is www.openfoam.com .
  20
  21 ------------------------------------------------------------------------
  22 This file is part of cufflink.
  23
  24     cufflink is free software: you can redistribute it and/or modify
  25     it under the terms of the GNU General Public License as published by
  26     the Free Software Foundation, either version 3 of the License, or
  27     (at your option) any later version.
  28
  29     cufflink is distributed in the hope that it will be useful,
  30     but WITHOUT ANY WARRANTY; without even the implied warranty of
  31     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  32     GNU General Public License for more details.
  33
  34     You should have received a copy of the GNU General Public License
  35     along with cufflink.  If not, see <http://www.gnu.org/licenses/>.
  36
  37     Author
  38     Daniel P. Combest.  All rights reserved.
  39     Modifications by Dominik Christ, Wikki Ltd.
  40
  41     Description
  42     diagonal preconditioned conjugate gradient
  43     solver for symmetric Matrices using a CUSP CUDA™ based solver.
  44
  45 \**********************************************************************/
  46
  47 #include "cudaTypes.H"
  48
  49 // CUSP Includes
  50 #include <cusp/detail/config.h>
  51 #include <cusp/verify.h>
  52 #include <cusp/precond/ainv.h>
  53 #include <cusp/coo_matrix.h>
  54 #include <cusp/blas.h>
  55 #include <cusp/multiply.h>
  56
  57 extern "C" void bicgAinv
  58 (
  59     cuspEquationSystem* ces,
  60     cudaSolverPerformance* solverPerf,
  61     ValueType drop_tolerance,
  62     int nonzero_per_row,
  63     bool lin_dropping,
  64     int lin_param
  65 )
  66 {
  67     cusp::coo_matrix<IndexType, ValueType, MemorySpace> A(ces->A);
  68     cusp::array1d<ValueType, MemorySpace> X(ces->X);
  69     cusp::array1d<ValueType, MemorySpace> B(ces->B);
  70
  71     // Fill in the rest of the diag (rows and col)
  72     // Determine row indices of diagonal values and fill A COO matrix
  73     thrust::sequence
  74     (
  75         A.row_indices.begin(),
  76         A.row_indices.begin() + ces->nCells
  77     );
  78
  79     // Determine column indices of diagonal values and fill A COO matrix
  80     thrust::sequence
  81     (
  82         A.column_indices.begin(),
  83         A.column_indices.begin() + ces->nCells
  84     );
  85
  86     // Sorted coo by row and column. speeds code up a little bit more
  87     A.sort_by_row_and_column();
  88
  89     // Set storage
  90     // #include "cufflink/setGPUStorage.H"
  91
  92     if (solverPerf->debugCusp)
  93     {
  94         std::cout << "   Using Ainv preconditioner\n";
  95     }
  96
  97     cusp::precond::bridson_ainv<ValueType, MemorySpace> M
  98     (
  99          A,
 100          drop_tolerance,
 101          nonzero_per_row,
 102          lin_dropping,
 103          lin_param
 104     );
 105
 106     // Start Krylov solver
 107     assert(A.num_rows == A.num_cols);   // Sanity check
 108
 109     const size_t N = A.num_rows;
 110
 111     // Allocate workspace
 112     cusp::array1d<ValueType,MemorySpace> y(N);
 113
 114     cusp::array1d<ValueType,MemorySpace> p(N);
 115     cusp::array1d<ValueType,MemorySpace> r(N);
 116     cusp::array1d<ValueType,MemorySpace> r_star(N);
 117     cusp::array1d<ValueType,MemorySpace> s(N);
 118     cusp::array1d<ValueType,MemorySpace> Mp(N);
 119     cusp::array1d<ValueType,MemorySpace> AMp(N);
 120     cusp::array1d<ValueType,MemorySpace> Ms(N);
 121     cusp::array1d<ValueType,MemorySpace> AMs(N);
 122
 123     // y <- Ax
 124     cusp::multiply(A, X, y);
 125
 126     // Define the normalization factor
 127     ValueType normFactor = 1.0;
 128
 129 #   include "buildNormFactor.H"
 130
 131     // r <- b - A*x
 132     cusp::blas::axpby(B, y, r, ValueType(1), ValueType(-1));
 133
 134     // p <- r
 135     cusp::blas::copy(r, p);
 136
 137     // r_star <- r
 138     cusp::blas::copy(r, r_star);
 139
 140     ValueType r_r_star_old = cusp::blas::dotc(r_star, r);
 141
 142
 143     ValueType normR = cusp::blas::nrm2(r)/normFactor;
 144     ValueType normR0 = normR;   // Initial residual
 145     solverPerf->iRes    = normR0;
 146     int count = 0;
 147
 148     while
 149     (
 150         normR > (solverPerf->tol)
 151      && count < (solverPerf->maxIter)
 152      && normR/normR0 >= (solverPerf->relTol)
 153      || count < solverPerf->minIter
 154     )
 155     {
 156         // Mp = M*p
 157         cusp::multiply(M, p, Mp);
 158
 159         // AMp = A*Mp
 160         cusp::multiply(A, Mp, AMp);
 161
 162         // alpha = (r_j, r_star) / (A*M*p, r_star)
 163         ValueType alpha = r_r_star_old/cusp::blas::dotc(r_star, AMp);
 164
 165         // s_j = r_j - alpha * AMp
 166         cusp::blas::axpby(r, AMp, s, ValueType(1), ValueType(-alpha));
 167
 168         ValueType normS = cusp::blas::nrm2(s)/normFactor;
 169
 170         if
 171         (
 172            !(
 173                 normS > (solverPerf->tol)
 174              && normS/normR0 >= (solverPerf->relTol)
 175             )
 176         )
 177         {
 178             // is this right?
 179             // x += alpha*M*p_j
 180             cusp::blas::axpby(X, Mp, X, ValueType(1), ValueType(alpha));
 181
 182             // y <- Ax
 183             cusp::multiply(A, X, y);
 184
 185             // r <- b - A*x
 186             cusp::blas::axpby(B, y, r, ValueType(1), ValueType(-1));
 187             // not sure we should be measuring r here...need to look again.
 188             normR = cusp::blas::nrm2(r)/normFactor;
 189
 190             count++;
 191
 192             break;
 193         }
 194
 195         // Ms = M*s_j
 196         cusp::multiply(M, s, Ms);
 197
 198         // AMs = A*Ms
 199         cusp::multiply(A, Ms, AMs);
 200
 201         // omega = (AMs, s) / (AMs, AMs)
 202         ValueType omega = cusp::blas::dotc(AMs, s)/cusp::blas::dotc(AMs, AMs);
 203
 204         // x_{j+1} = x_j + alpha*M*p_j + omega*M*s_j
 205         cusp::blas::axpbypcz(X, Mp, Ms, X, ValueType(1), alpha, omega);
 206
 207         // r_{j+1} = s_j - omega*A*M*s
 208         cusp::blas::axpby(s, AMs, r, ValueType(1), -omega);
 209
 210         // beta_j = (r_{j+1}, r_star) / (r_j, r_star) * (alpha/omega)
 211         ValueType r_r_star_new = cusp::blas::dotc(r_star, r);
 212         ValueType beta = (r_r_star_new/r_r_star_old)*(alpha/omega);
 213         r_r_star_old = r_r_star_new;
 214
 215         // p_{j+1} = r_{j+1} + beta*(p_j - omega*A*M*p)
 216         cusp::blas::axpbypcz(r, p, AMp, p, ValueType(1), beta, -beta*omega);
 217
 218         // not dure we should be measuring r here...need to look again.
 219         normR = cusp::blas::nrm2(r)/normFactor;
 220
 221         count++;
 222     }
 223     // End Krylov Solver
 224
 225     // Final residual
 226     solverPerf->fRes = cusp::blas::nrm2(r)/normFactor;
 227     solverPerf->nIterations = count;
 228
 229     // converged?
 230     if
 231     (
 232         solverPerf->fRes<=solverPerf->tol
 233      || solverPerf->fRes/solverPerf->iRes <= solverPerf->relTol
 234     )
 235     {
 236         solverPerf->converged = true;
 237     }
 238     else
 239     {
 240         solverPerf->converged = false;
 241     }
 242
 243     // Pass the solution vector back
 244     ces->X = X;
 245 }