src/expokit.f

   1 ! from expokit, free for non-commercial use
   2
   3 *----------------------------------------------------------------------|
   4       subroutine DMEXPV( n, m, t, v, w, tol, anorm,
   5      .                   wsp,lwsp, iwsp,liwsp, matvec, itrace,iflag )
   6
   7       implicit none
   8       integer n, m, lwsp, liwsp, itrace, iflag, iwsp(liwsp)
   9       double precision t, tol, anorm, v(n), w(n), wsp(lwsp)
  10       external matvec
  11
  12 *-----Purpose----------------------------------------------------------|
  13 *
  14 *---  DMEXPV computes w = exp(t*A)*v - Customised for MARKOV CHAINS.
  15 *
  16 *     It does not compute the matrix exponential in isolation but
  17 *     instead, it computes directly the action of the exponential
  18 *     operator on the operand vector. This way of doing so allows
  19 *     for addressing large sparse problems.
  20 *
  21 *     The method used is based on Krylov subspace projection
  22 *     techniques and the matrix under consideration interacts only
  23 *     via the external routine `matvec' performing the matrix-vector
  24 *     product (matrix-free method).
  25 *
  26 *     This is a customised version for Markov Chains. This means that a
  27 *     check is done within this code to ensure that the resulting vector
  28 *     w is a probability vector, i.e., w must have all its components
  29 *     in [0,1], with sum equal to 1. This check is done at some expense
  30 *     and the user may try DGEXPV which is cheaper since it ignores
  31 *     probability constraints.
  32 *
  33 *     IMPORTANT: The check assumes that the transition rate matrix Q
  34 *                satisfies Qe = 0, where e=(1,...,1)'. Don't use DMEXPV
  35 *                if this condition does not hold. Use DGEXPV instead.
  36 *                DMEXPV/DGEXPV require the matrix-vector product
  37 *                y = A*x = Q'*x, i.e, the TRANSPOSE of Q times a vector.
  38 *                Failure to remember this leads to wrong results.
  39 *
  40 *-----Arguments--------------------------------------------------------|
  41 *
  42 *     n      : (input) order of the principal matrix A.
  43 *
  44 *     m      : (input) maximum size for the Krylov basis.
  45 *
  46 *     t      : (input) time at wich the solution is needed (can be < 0).
  47 *
  48 *     v(n)   : (input) given operand vector.
  49 *
  50 *     w(n)   : (output) computed approximation of exp(t*A)*v.
  51 *
  52 *     tol    : (input/output) the requested acurracy tolerance on w.
  53 *              If on input tol=0.0d0 or tol is too small (tol.le.eps)
  54 *              the internal value sqrt(eps) is used, and tol is set to
  55 *              sqrt(eps) on output (`eps' denotes the machine epsilon).
  56 *              (`Happy breakdown' is assumed if h(j+1,j) .le. anorm*tol)
  57 *
  58 *     anorm  : (input) an approximation of some norm of A.
  59 *
  60 *   wsp(lwsp): (workspace) lwsp .ge. n*(m+1)+n+(m+2)^2+4*(m+2)^2+ideg+1
  61 *                                   +---------+-------+---------------+
  62 *              (actually, ideg=6)        V        H     wsp for PADE
  63 *
  64 * iwsp(liwsp): (workspace) liwsp .ge. m+2
  65 *
  66 *     matvec : external subroutine for matrix-vector multiplication.
  67 *              synopsis: matvec( x, y )
  68 *                        double precision x(*), y(*)
  69 *              computes: y(1:n) <- A*x(1:n)
  70 *                        where A is the principal matrix.
  71 *
  72 *              IMPORTANT: DMEXPV requires the product y = Ax = Q'x, i.e.
  73 *              the TRANSPOSE of the transition rate matrix.
  74 *
  75 *     itrace : (input) running mode. 0=silent, 1=print step-by-step info
  76 *
  77 *     iflag  : (output) exit flag.
  78 *              <0 - bad input arguments
  79 *               0 - no problem
  80 *               1 - maximum number of steps reached without convergence
  81 *               2 - requested tolerance was too high
  82 *
  83 *-----Accounts on the computation--------------------------------------|
  84 *     Upon exit, an interested user may retrieve accounts on the
  85 *     computations. They are located in the workspace arrays wsp and
  86 *     iwsp as indicated below:
  87 *
  88 *     location  mnemonic                 description
  89 *     -----------------------------------------------------------------|
  90 *     iwsp(1) = nmult, number of matrix-vector multiplications used
  91 *     iwsp(2) = nexph, number of Hessenberg matrix exponential evaluated
  92 *     iwsp(3) = nscale, number of repeated squaring involved in Pade
  93 *     iwsp(4) = nstep, number of integration steps used up to completion
  94 *     iwsp(5) = nreject, number of rejected step-sizes
  95 *     iwsp(6) = ibrkflag, set to 1 if `happy breakdown' and 0 otherwise
  96 *     iwsp(7) = mbrkdwn, if `happy brkdown', basis-size when it occured
  97 *     -----------------------------------------------------------------|
  98 *     wsp(1)  = step_min, minimum step-size used during integration
  99 *     wsp(2)  = step_max, maximum step-size used during integration
 100 *     wsp(3)  = x_round, maximum among all roundoff errors (lower bound)
 101 *     wsp(4)  = s_round, sum of roundoff errors (lower bound)
 102 *     wsp(5)  = x_error, maximum among all local truncation errors
 103 *     wsp(6)  = s_error, global sum of local truncation errors
 104 *     wsp(7)  = tbrkdwn, if `happy breakdown', time when it occured
 105 *     wsp(8)  = t_now, integration domain successfully covered
 106 *     wsp(9)  = hump, i.e., max||exp(sA)||, s in [0,t] (or [t,0] if t<0)
 107 *     wsp(10) = ||w||/||v||, scaled norm of the solution w.
 108 *     -----------------------------------------------------------------|
 109 *     The `hump' is a measure of the conditioning of the problem. The
 110 *     matrix exponential is well-conditioned if hump = 1, whereas it is
 111 *     poorly-conditioned if hump >> 1. However the solution can still be
 112 *     relatively fairly accurate even when the hump is large (the hump
 113 *     is an upper bound), especially when the hump and the scaled norm
 114 *     of w [this is also computed and returned in wsp(10)] are of the
 115 *     same order of magnitude (further details in reference below).
 116 *     Markov chains are usually well-conditioned problems.
 117 *
 118 *----------------------------------------------------------------------|
 119 *-----The following parameters may also be adjusted herein-------------|
 120 *
 121       integer mxstep, mxreject, ideg
 122       double precision delta, gamma
 123       parameter( mxstep   = 500,
 124      .           mxreject = 0,
 125      .           ideg     = 6,
 126      .           delta    = 1.2d0,
 127      .           gamma    = 0.9d0 )
 128
 129 *     mxstep  : maximum allowable number of integration steps.
 130 *               The value 0 means an infinite number of steps.
 131 *
 132 *     mxreject: maximum allowable number of rejections at each step.
 133 *               The value 0 means an infinite number of rejections.
 134 *
 135 *     ideg    : the Pade approximation of type (ideg,ideg) is used as
 136 *               an approximation to exp(H). The value 0 switches to the
 137 *               uniform rational Chebyshev approximation of type (14,14)
 138 *
 139 *     delta   : local truncation error `safety factor'
 140 *
 141 *     gamma   : stepsize `shrinking factor'
 142 *
 143 *----------------------------------------------------------------------|
 144 *     Roger B. Sidje (rbs@maths.uq.edu.au)
 145 *     EXPOKIT: Software Package for Computing Matrix Exponentials.
 146 *     ACM - Transactions On Mathematical Software, 24(1):130-156, 1998
 147 *----------------------------------------------------------------------|
 148 *
 149       integer i, j, k1, mh, mx, iv, ih, j1v, ns, ifree, lfree, iexph,
 150      .        ireject,ibrkflag,mbrkdwn, nmult, nreject, nexph, nscale,
 151      .        nstep
 152       double precision sgn, t_out, tbrkdwn, step_min,step_max, err_loc,
 153      .                 s_error, x_error, t_now, t_new, t_step, t_old,
 154      .                 xm, beta, break_tol, p1, p2, p3, eps, rndoff,
 155      .                 vnorm, avnorm, hj1j, hij, hump, SQR1,
 156      .                 roundoff, s_round, x_round
 157
 158       intrinsic AINT,ABS,DBLE,LOG10,MAX,MIN,NINT,SIGN,SQRT
 159       double precision DDOT, DNRM2, DASUM
 160
 161 *---  check restrictions on input parameters ...
 162       iflag = 0
 163       if ( lwsp.lt.n*(m+2)+5*(m+2)**2+ideg+1 ) iflag = -1
 164       if ( liwsp.lt.m+2 ) iflag = -2
 165       if ( m.ge.n .or. m.le.0 ) iflag = -3
 166       if ( iflag.ne.0 ) stop 'bad sizes (in input of DMEXPV)'
 167 *
 168 *---  initialisations ...
 169 *
 170       k1 = 2
 171       mh = m + 2
 172       iv = 1
 173       ih = iv + n*(m+1) + n
 174       ifree = ih + mh*mh
 175       lfree = lwsp - ifree + 1
 176
 177       ibrkflag = 0
 178       mbrkdwn  = m
 179       nmult    = 0
 180       nreject  = 0
 181       nexph    = 0
 182       nscale   = 0
 183
 184       sgn      = SIGN( 1.0d0,t )
 185       t_out    = ABS( t )
 186       tbrkdwn  = 0.0d0
 187       step_min = t_out
 188       step_max = 0.0d0
 189       nstep    = 0
 190       s_error  = 0.0d0
 191       s_round  = 0.0d0
 192       x_error  = 0.0d0
 193       x_round  = 0.0d0
 194       t_now    = 0.0d0
 195       t_new    = 0.0d0
 196
 197       p1 = 4.0d0/3.0d0
 198  1    p2 = p1 - 1.0d0
 199       p3 = p2 + p2 + p2
 200       eps = ABS( p3-1.0d0 )
 201       if ( eps.eq.0.0d0 ) go to 1
 202       if ( tol.le.eps ) tol = SQRT( eps )
 203       rndoff = eps*anorm
 204
 205       break_tol = 1.0d-7
 206 *>>>  break_tol = tol
 207 *>>>  break_tol = anorm*tol
 208
 209       call DCOPY( n, v,1, w,1 )
 210       beta = DNRM2( n, w,1 )
 211       vnorm = beta
 212       hump = beta
 213 *
 214 *---  obtain the very first stepsize ...
 215 *
 216       SQR1 = SQRT( 0.1d0 )
 217       xm = 1.0d0/DBLE( m )
 218       p1 = tol*(((m+1)/2.72D0)**(m+1))*SQRT(2.0D0*3.14D0*(m+1))
 219       t_new = (1.0d0/anorm)*(p1/(4.0d0*beta*anorm))**xm
 220       p1 = 10.0d0**(NINT( LOG10( t_new )-SQR1 )-1)
 221       t_new = AINT( t_new/p1 + 0.55d0 ) * p1
 222 *
 223 *---  step-by-step integration ...
 224 *
 225  100  if ( t_now.ge.t_out ) goto 500
 226
 227       nstep = nstep + 1
 228       t_step = MIN( t_out-t_now, t_new )
 229
 230       p1 = 1.0d0/beta
 231       do i = 1,n
 232          wsp(iv + i-1) = p1*w(i)
 233       enddo
 234       do i = 1,mh*mh
 235          wsp(ih+i-1) = 0.0d0
 236       enddo
 237 *
 238 *---  Arnoldi loop ...
 239 *
 240       j1v = iv + n
 241       do 200 j = 1,m
 242          nmult = nmult + 1
 243          call matvec( wsp(j1v-n), wsp(j1v) )
 244          do i = 1,j
 245             hij = DDOT( n, wsp(iv+(i-1)*n),1, wsp(j1v),1 )
 246             call DAXPY( n, -hij, wsp(iv+(i-1)*n),1, wsp(j1v),1 )
 247             wsp(ih+(j-1)*mh+i-1) = hij
 248          enddo
 249          hj1j = DNRM2( n, wsp(j1v),1 )
 250 *---     if `happy breakdown' go straightforward at the end ...
 251          if ( hj1j.le.break_tol ) then
 252             print*,'happy breakdown: mbrkdwn =',j,' h =',hj1j
 253             k1 = 0
 254             ibrkflag = 1
 255             mbrkdwn = j
 256             tbrkdwn = t_now
 257             t_step = t_out-t_now
 258             goto 300
 259          endif
 260          wsp(ih+(j-1)*mh+j) = hj1j
 261          call DSCAL( n, 1.0d0/hj1j, wsp(j1v),1 )
 262          j1v = j1v + n
 263  200  continue
 264       nmult = nmult + 1
 265       call matvec( wsp(j1v-n), wsp(j1v) )
 266       avnorm = DNRM2( n, wsp(j1v),1 )
 267 *
 268 *---  set 1 for the 2-corrected scheme ...
 269 *
 270  300  continue
 271       wsp(ih+m*mh+m+1) = 1.0d0
 272 *
 273 *---  loop while ireject<mxreject until the tolerance is reached ...
 274 *
 275       ireject = 0
 276  401  continue
 277
 278 *
 279 *---  compute w = beta*V*exp(t_step*H)*e1 ..
 280 *
 281       nexph = nexph + 1
 282       mx = mbrkdwn + k1
 283       if ( ideg.ne.0 ) then
 284 *---     irreducible rational Pade approximation ...
 285          call DGPADM( ideg, mx, sgn*t_step, wsp(ih),mh,
 286      .                wsp(ifree),lfree, iwsp, iexph, ns, iflag )
 287          iexph = ifree + iexph - 1
 288          nscale = nscale + ns
 289       else
 290 *---     uniform rational Chebyshev approximation ...
 291          iexph = ifree
 292          do i = 1,mx
 293             wsp(iexph+i-1) = 0.0d0
 294          enddo
 295          wsp(iexph) = 1.0d0
 296          call DNCHBV(mx,sgn*t_step,wsp(ih),mh,wsp(iexph),wsp(ifree+mx))
 297       endif
 298
 299  402  continue
 300 *
 301 *---  error estimate ...
 302 *
 303       if ( k1.eq.0 ) then
 304          err_loc = tol
 305       else
 306          p1 = ABS( wsp(iexph+m) )   * beta
 307          p2 = ABS( wsp(iexph+m+1) ) * beta * avnorm
 308          if ( p1.gt.10.0d0*p2 ) then
 309             err_loc = p2
 310             xm = 1.0d0/DBLE( m )
 311          elseif ( p1.gt.p2 ) then
 312             err_loc = (p1*p2)/(p1-p2)
 313             xm = 1.0d0/DBLE( m )
 314          else
 315             err_loc = p1
 316             xm = 1.0d0/DBLE( m-1 )
 317          endif
 318       endif
 319 *
 320 *---  reject the step-size if the error is not acceptable ...
 321 *
 322       if ( (k1.ne.0) .and. (err_loc.gt.delta*t_step*tol) .and.
 323      .     (mxreject.eq.0 .or. ireject.lt.mxreject) ) then
 324          t_old = t_step
 325          t_step = gamma * t_step * (t_step*tol/err_loc)**xm
 326          p1 = 10.0d0**(NINT( LOG10( t_step )-SQR1 )-1)
 327          t_step = AINT( t_step/p1 + 0.55d0 ) * p1
 328          if ( itrace.ne.0 ) then
 329             print*,'t_step =',t_old
 330             print*,'err_loc =',err_loc
 331             print*,'err_required =',delta*t_old*tol
 332             print*,'stepsize rejected, stepping down to:',t_step
 333          endif
 334          ireject = ireject + 1
 335          nreject = nreject + 1
 336          if ( mxreject.ne.0 .and. ireject.gt.mxreject ) then
 337             print*,"Failure in DMEXPV: ---"
 338             print*,"The requested tolerance is too high."
 339             Print*,"Rerun with a smaller value."
 340             iflag = 2
 341             return
 342          endif
 343          goto 401
 344       endif
 345 *
 346 *---  now update w = beta*V*exp(t_step*H)*e1 and the hump ...
 347 *
 348       mx = mbrkdwn + MAX( 0,k1-1 )
 349       call DGEMV( 'n', n,mx,beta,wsp(iv),n,wsp(iexph),1,0.0d0,w,1 )
 350       beta = DNRM2( n, w,1 )
 351       hump = MAX( hump, beta )
 352 *
 353 *---  Markov model constraints ...
 354 *
 355       j = 0
 356       do i = 1,n
 357          if ( w(i).lt.0.0d0 ) then
 358             w(i) = 0.0d0
 359             j = j + 1
 360          endif
 361       enddo
 362       p1 = DASUM( n, w,1 )
 363       if ( j.gt.0 ) call DSCAL( n, 1.0d0/p1, w,1 )
 364       roundoff = DABS( 1.0d0-p1 ) / DBLE( n )
 365 *
 366 *---  suggested value for the next stepsize ...
 367 *
 368       t_new = gamma * t_step * (t_step*tol/err_loc)**xm
 369       p1 = 10.0d0**(NINT( LOG10( t_new )-SQR1 )-1)
 370       t_new = AINT( t_new/p1 + 0.55d0 ) * p1
 371
 372       err_loc = MAX( err_loc, roundoff )
 373       err_loc = MAX( err_loc, rndoff )
 374 *
 375 *---  update the time covered ...
 376 *
 377       t_now = t_now + t_step
 378 *
 379 *---  display and keep some information ...
 380 *
 381       if ( itrace.ne.0 ) then
 382          print*,'integration',nstep,'---------------------------------'
 383          print*,'scale-square =',ns
 384          print*,'wnorm     =',beta
 385          print*,'step_size =',t_step
 386          print*,'err_loc   =',err_loc
 387          print*,'roundoff  =',roundoff
 388          print*,'next_step =',t_new
 389       endif
 390
 391       step_min = MIN( step_min, t_step )
 392       step_max = MAX( step_max, t_step )
 393       s_error = s_error + err_loc
 394       s_round = s_round + roundoff
 395       x_error = MAX( x_error, err_loc )
 396       x_round = MAX( x_round, roundoff )
 397
 398       if ( mxstep.eq.0 .or. nstep.lt.mxstep ) goto 100
 399       iflag = 1
 400
 401  500  continue
 402
 403       iwsp(1) = nmult
 404       iwsp(2) = nexph
 405       iwsp(3) = nscale
 406       iwsp(4) = nstep
 407       iwsp(5) = nreject
 408       iwsp(6) = ibrkflag
 409       iwsp(7) = mbrkdwn
 410
 411       wsp(1)  = step_min
 412       wsp(2)  = step_max
 413       wsp(3)  = x_round
 414       wsp(4)  = s_round
 415       wsp(5)  = x_error
 416       wsp(6)  = s_error
 417       wsp(7)  = tbrkdwn
 418       wsp(8)  = sgn*t_now
 419       wsp(9)  = hump/vnorm
 420       wsp(10) = beta/vnorm
 421       END
 422 *----------------------------------------------------------------------|
 423 *----------------------------------------------------------------------|
 424       subroutine DGPADM( ideg,m,t,H,ldh,wsp,lwsp,ipiv,iexph,ns,iflag )
 425
 426       implicit none
 427       integer ideg, m, ldh, lwsp, iexph, ns, iflag, ipiv(m)
 428       double precision t, H(ldh,m), wsp(lwsp)
 429
 430 *-----Purpose----------------------------------------------------------|
 431 *
 432 *     Computes exp(t*H), the matrix exponential of a general matrix in
 433 *     full, using the irreducible rational Pade approximation to the
 434 *     exponential function exp(x) = r(x) = (+/-)( I + 2*(q(x)/p(x)) ),
 435 *     combined with scaling-and-squaring.
 436 *
 437 *-----Arguments--------------------------------------------------------|
 438 *
 439 *     ideg      : (input) the degre of the diagonal Pade to be used.
 440 *                 a value of 6 is generally satisfactory.
 441 *
 442 *     m         : (input) order of H.
 443 *
 444 *     H(ldh,m)  : (input) argument matrix.
 445 *
 446 *     t         : (input) time-scale (can be < 0).
 447 *
 448 *     wsp(lwsp) : (workspace/output) lwsp .ge. 4*m*m+ideg+1.
 449 *
 450 *     ipiv(m)   : (workspace)
 451 *
 452 *>>>> iexph     : (output) number such that wsp(iexph) points to exp(tH)
 453 *                 i.e., exp(tH) is located at wsp(iexph ... iexph+m*m-1)
 454 *                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 455 *                 NOTE: if the routine was called with wsp(iptr),
 456 *                       then exp(tH) will start at wsp(iptr+iexph-1).
 457 *
 458 *     ns        : (output) number of scaling-squaring used.
 459 *
 460 *     iflag     : (output) exit flag.
 461 *                      0 - no problem
 462 *                     <0 - problem
 463 *
 464 *----------------------------------------------------------------------|
 465 *     Roger B. Sidje (rbs@maths.uq.edu.au)
 466 *     EXPOKIT: Software Package for Computing Matrix Exponentials.
 467 *     ACM - Transactions On Mathematical Software, 24(1):130-156, 1998
 468 *----------------------------------------------------------------------|
 469 *
 470       integer mm,i,j,k,ih2,ip,iq,iused,ifree,iodd,icoef,iput,iget
 471       double precision hnorm,scale,scale2,cp,cq
 472
 473       intrinsic INT,ABS,DBLE,LOG,MAX
 474
 475 *---  check restrictions on input parameters ...
 476       mm = m*m
 477       iflag = 0
 478       if ( ldh.lt.m ) iflag = -1
 479       if ( lwsp.lt.4*mm+ideg+1 ) iflag = -2
 480       if ( iflag.ne.0 ) stop 'bad sizes (in input of DGPADM)'
 481 *
 482 *---  initialise pointers ...
 483 *
 484       icoef = 1
 485       ih2 = icoef + (ideg+1)
 486       ip  = ih2 + mm
 487       iq  = ip + mm
 488       ifree = iq + mm
 489 *
 490 *---  scaling: seek ns such that ||t*H/2^ns|| < 1/2;
 491 *     and set scale = t/2^ns ...
 492 *
 493       do i = 1,m
 494          wsp(i) = 0.0d0
 495       enddo
 496       do j = 1,m
 497          do i = 1,m
 498             wsp(i) = wsp(i) + ABS( H(i,j) )
 499          enddo
 500       enddo
 501       hnorm = 0.0d0
 502       do i = 1,m
 503          hnorm = MAX( hnorm,wsp(i) )
 504       enddo
 505       hnorm = ABS( t*hnorm )
 506       if ( hnorm.eq.0.0d0 ) stop 'Error - null H in input of DGPADM.'
 507       ns = MAX( 0,INT(LOG(hnorm)/LOG(2.0d0))+2 )
 508       scale = t / DBLE(2**ns)
 509       scale2 = scale*scale
 510 *
 511 *---  compute Pade coefficients ...
 512 *
 513       i = ideg+1
 514       j = 2*ideg+1
 515       wsp(icoef) = 1.0d0
 516       do k = 1,ideg
 517          wsp(icoef+k) = (wsp(icoef+k-1)*DBLE( i-k ))/DBLE( k*(j-k) )
 518       enddo
 519 *
 520 *---  H2 = scale2*H*H ...
 521 *
 522       call DGEMM( 'n','n',m,m,m,scale2,H,ldh,H,ldh,0.0d0,wsp(ih2),m )
 523 *
 524 *---  initialize p (numerator) and q (denominator) ...
 525 *
 526       cp = wsp(icoef+ideg-1)
 527       cq = wsp(icoef+ideg)
 528       do j = 1,m
 529          do i = 1,m
 530             wsp(ip + (j-1)*m + i-1) = 0.0d0
 531             wsp(iq + (j-1)*m + i-1) = 0.0d0
 532          enddo
 533          wsp(ip + (j-1)*(m+1)) = cp
 534          wsp(iq + (j-1)*(m+1)) = cq
 535       enddo
 536 *
 537 *---  Apply Horner rule ...
 538 *
 539       iodd = 1
 540       k = ideg - 1
 541  100  continue
 542       iused = iodd*iq + (1-iodd)*ip
 543       call DGEMM( 'n','n',m,m,m, 1.0d0,wsp(iused),m,
 544      .             wsp(ih2),m, 0.0d0,wsp(ifree),m )
 545       do j = 1,m
 546          wsp(ifree+(j-1)*(m+1)) = wsp(ifree+(j-1)*(m+1))+wsp(icoef+k-1)
 547       enddo
 548       ip = (1-iodd)*ifree + iodd*ip
 549       iq = iodd*ifree + (1-iodd)*iq
 550       ifree = iused
 551       iodd = 1-iodd
 552       k = k-1
 553       if ( k.gt.0 )  goto 100
 554 *
 555 *---  Obtain (+/-)(I + 2*(p\q)) ...
 556 *
 557       if ( iodd .eq. 1 ) then
 558          call DGEMM( 'n','n',m,m,m, scale,wsp(iq),m,
 559      .                H,ldh, 0.0d0,wsp(ifree),m )
 560          iq = ifree
 561       else
 562          call DGEMM( 'n','n',m,m,m, scale,wsp(ip),m,
 563      .                H,ldh, 0.0d0,wsp(ifree),m )
 564          ip = ifree
 565       endif
 566       call DAXPY( mm, -1.0d0,wsp(ip),1, wsp(iq),1 )
 567       call DGESV( m,m, wsp(iq),m, ipiv, wsp(ip),m, iflag )
 568       if ( iflag.ne.0 ) stop 'Problem in DGESV (within DGPADM)'
 569       call DSCAL( mm, 2.0d0, wsp(ip), 1 )
 570       do j = 1,m
 571          wsp(ip+(j-1)*(m+1)) = wsp(ip+(j-1)*(m+1)) + 1.0d0
 572       enddo
 573       iput = ip
 574       if ( ns.eq.0 .and. iodd.eq.1 ) then
 575          call DSCAL( mm, -1.0d0, wsp(ip), 1 )
 576          goto 200
 577       endif
 578 *
 579 *--   squaring : exp(t*H) = (exp(t*H))^(2^ns) ...
 580 *
 581       iodd = 1
 582       do k = 1,ns
 583          iget = iodd*ip + (1-iodd)*iq
 584          iput = (1-iodd)*ip + iodd*iq
 585          call DGEMM( 'n','n',m,m,m, 1.0d0,wsp(iget),m, wsp(iget),m,
 586      .                0.0d0,wsp(iput),m )
 587          iodd = 1-iodd
 588       enddo
 589  200  continue
 590       iexph = iput
 591       END
 592 *----------------------------------------------------------------------|
 593 *----------------------------------------------------------------------|
 594       subroutine DSPADM( ideg,m,t,H,ldh,wsp,lwsp,ipiv,iexph,ns,iflag )
 595
 596       implicit none
 597       integer ideg, m, ldh, lwsp, iexph, ns, iflag, ipiv(m)
 598       double precision t, H(ldh,m), wsp(lwsp)
 599
 600 *-----Purpose----------------------------------------------------------|
 601 *
 602 *     Computes exp(t*H), the matrix exponential of a symmetric matrix
 603 *     in full, using the irreducible rational Pade approximation to the
 604 *     exponential function exp(x) = r(x) = (+/-)( I + 2*(q(x)/p(x)) ),
 605 *     combined with scaling-and-squaring.
 606 *
 607 *-----Arguments--------------------------------------------------------|
 608 *
 609 *     ideg      : (input) the degre of the diagonal Pade to be used.
 610 *                 a value of 6 is generally satisfactory.
 611 *
 612 *     m         : (input) order of H.
 613 *
 614 *     H(ldh,m)  : (input) argument matrix (both lower and upper parts).
 615 *
 616 *     t         : (input) time-scale (can be < 0).
 617 *
 618 *     wsp(lwsp) : (workspace/output) lwsp .ge. 4*m*m+ideg+1.
 619 *
 620 *     ipiv(m)   : (workspace)
 621 *
 622 *>>>> iexph     : (output) number such that wsp(iexph) points to exp(tH)
 623 *                 i.e., exp(tH) is located at wsp(iexph ... iexph+m*m-1)
 624 *                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 625 *                 NOTE: if the routine was called with wsp(iptr),
 626 *                       then exp(tH) will start at wsp(iptr+iexph-1).
 627 *
 628 *     ns        : (output) number of scaling-squaring used.
 629 *
 630 *     iflag     : (output) exit flag.
 631 *                      0 - no problem
 632 *                     <0 - problem
 633 *
 634 *----------------------------------------------------------------------|
 635 *     Roger B. Sidje (rbs@maths.uq.edu.au)
 636 *     EXPOKIT: Software Package for Computing Matrix Exponentials.
 637 *     ACM - Transactions On Mathematical Software, 24(1):130-156, 1998
 638 *----------------------------------------------------------------------|
 639 *
 640       integer mm,i,j,k,ih2,ip,iq,iused,ifree,iodd,icoef,iput,iget
 641       double precision hnorm,scale,scale2,cp,cq
 642
 643       intrinsic INT,ABS,DBLE,LOG,MAX
 644
 645 *---  check restrictions on input parameters ...
 646       mm = m*m
 647       iflag = 0
 648       if ( ldh.lt.m ) iflag = -1
 649       if ( lwsp.lt.4*mm+ideg+1 ) iflag = -2
 650       if ( iflag.ne.0 ) stop 'bad sizes (in input of DSPADM)'
 651 *
 652 *---  initialise pointers ...
 653 *
 654       icoef = 1
 655       ih2 = icoef + (ideg+1)
 656       ip  = ih2 + mm
 657       iq  = ip + mm
 658       ifree = iq + mm
 659 *
 660 *---  scaling: seek ns such that ||t*H/2^ns|| < 1/2;
 661 *     and set scale = t/2^ns ...
 662 *
 663       do i = 1,m
 664          wsp(i) = 0.0d0
 665       enddo
 666       do j = 1,m
 667          do i = 1,m
 668             wsp(i) = wsp(i) + ABS( H(i,j) )
 669          enddo
 670       enddo
 671       hnorm = 0.0d0
 672       do i = 1,m
 673          hnorm = MAX( hnorm,wsp(i) )
 674       enddo
 675       hnorm = ABS( t*hnorm )
 676       if ( hnorm.eq.0.0d0 ) stop 'Error - null H in input of DSPADM.'
 677       ns = MAX( 0,INT(LOG(hnorm)/LOG(2.0d0))+2 )
 678       scale = t / DBLE(2**ns)
 679       scale2 = scale*scale
 680 *
 681 *---  compute Pade coefficients ...
 682 *
 683       i = ideg+1
 684       j = 2*ideg+1
 685       wsp(icoef) = 1.0d0
 686       do k = 1,ideg
 687          wsp(icoef+k) = (wsp(icoef+k-1)*DBLE( i-k ))/DBLE( k*(j-k) )
 688       enddo
 689 *
 690 *---  H2 = scale2*H*H ...
 691 *
 692       call DGEMM( 'n','n',m,m,m,scale2,H,ldh,H,ldh,0.0d0,wsp(ih2),m )
 693 *
 694 *---  initialize p (numerator) and q (denominator) ...
 695 *
 696       cp = wsp(icoef+ideg-1)
 697       cq = wsp(icoef+ideg)
 698       do j = 1,m
 699          do i = 1,m
 700             wsp(ip + (j-1)*m + i-1) = 0.0d0
 701             wsp(iq + (j-1)*m + i-1) = 0.0d0
 702          enddo
 703          wsp(ip + (j-1)*(m+1)) = cp
 704          wsp(iq + (j-1)*(m+1)) = cq
 705       enddo
 706 *
 707 *---  Apply Horner rule ...
 708 *
 709       iodd = 1
 710       k = ideg - 1
 711  100  continue
 712       iused = iodd*iq + (1-iodd)*ip
 713       call DGEMM( 'n','n',m,m,m, 1.0d0,wsp(iused),m,
 714      .             wsp(ih2),m, 0.0d0,wsp(ifree),m )
 715       do j = 1,m
 716          wsp(ifree+(j-1)*(m+1)) = wsp(ifree+(j-1)*(m+1))+wsp(icoef+k-1)
 717       enddo
 718       ip = (1-iodd)*ifree + iodd*ip
 719       iq = iodd*ifree + (1-iodd)*iq
 720       ifree = iused
 721       iodd = 1-iodd
 722       k = k-1
 723       if ( k.gt.0 )  goto 100
 724 *
 725 *---  Obtain (+/-)(I + 2*(p\q)) ...
 726 *
 727       if ( iodd .eq. 1 ) then
 728          call DGEMM( 'n','n',m,m,m, scale,wsp(iq),m,
 729      .                H,ldh, 0.0d0,wsp(ifree),m )
 730          iq = ifree
 731       else
 732          call DGEMM( 'n','n',m,m,m, scale,wsp(ip),m,
 733      .                H,ldh, 0.0d0,wsp(ifree),m )
 734          ip = ifree
 735       endif
 736       call DAXPY( mm, -1.0d0,wsp(ip),1, wsp(iq),1 )
 737       call DSYSV( 'U',m,m,wsp(iq),m,ipiv,wsp(ip),m,wsp(ih2),mm,iflag )
 738       if ( iflag.ne.0 ) stop 'Problem in DSYSV (within DSPADM)'
 739       call DSCAL( mm, 2.0d0, wsp(ip), 1 )
 740       do j = 1,m
 741          wsp(ip+(j-1)*(m+1)) = wsp(ip+(j-1)*(m+1)) + 1.0d0
 742       enddo
 743       iput = ip
 744       if ( ns.eq.0 .and. iodd.eq.1 ) then
 745          call DSCAL( mm, -1.0d0, wsp(ip), 1 )
 746          goto 200
 747       endif
 748 *
 749 *--   squaring : exp(t*H) = (exp(t*H))^(2^ns) ...
 750 *
 751       iodd = 1
 752       do k = 1,ns
 753          iget = iodd*ip + (1-iodd)*iq
 754          iput = (1-iodd)*ip + iodd*iq
 755          call DGEMM( 'n','n',m,m,m, 1.0d0,wsp(iget),m, wsp(iget),m,
 756      .                0.0d0,wsp(iput),m )
 757          iodd = 1-iodd
 758       enddo
 759  200  continue
 760       iexph = iput
 761       END
 762 *----------------------------------------------------------------------|
 763 *----------------------------------------------------------------------|
 764       subroutine ZGPADM(ideg,m,t,H,ldh,wsp,lwsp,ipiv,iexph,ns,iflag)
 765
 766       implicit none
 767       double precision t
 768       integer          ideg, m, ldh, lwsp, iexph, ns, iflag, ipiv(m)
 769       complex*16       H(ldh,m), wsp(lwsp)
 770
 771 *-----Purpose----------------------------------------------------------|
 772 *
 773 *     Computes exp(t*H), the matrix exponential of a general complex
 774 *     matrix in full, using the irreducible rational Pade approximation
 775 *     to the exponential exp(z) = r(z) = (+/-)( I + 2*(q(z)/p(z)) ),
 776 *     combined with scaling-and-squaring.
 777 *
 778 *-----Arguments--------------------------------------------------------|
 779 *
 780 *     ideg      : (input) the degre of the diagonal Pade to be used.
 781 *                 a value of 6 is generally satisfactory.
 782 *
 783 *     m         : (input) order of H.
 784 *
 785 *     H(ldh,m)  : (input) argument matrix.
 786 *
 787 *     t         : (input) time-scale (can be < 0).
 788 *
 789 *     wsp(lwsp) : (workspace/output) lwsp .ge. 4*m*m+ideg+1.
 790 *
 791 *     ipiv(m)   : (workspace)
 792 *
 793 *>>>> iexph     : (output) number such that wsp(iexph) points to exp(tH)
 794 *                 i.e., exp(tH) is located at wsp(iexph ... iexph+m*m-1)
 795 *                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 796 *                 NOTE: if the routine was called with wsp(iptr),
 797 *                       then exp(tH) will start at wsp(iptr+iexph-1).
 798 *
 799 *     ns        : (output) number of scaling-squaring used.
 800 *
 801 *     iflag     : (output) exit flag.
 802 *                       0 - no problem
 803 *                      <0 - problem
 804 *
 805 *----------------------------------------------------------------------|
 806 *     Roger B. Sidje (rbs@maths.uq.edu.au)
 807 *     EXPOKIT: Software Package for Computing Matrix Exponentials.
 808 *     ACM - Transactions On Mathematical Software, 24(1):130-156, 1998
 809 *----------------------------------------------------------------------|
 810 *
 811       integer i,j,k,icoef,mm,ih2,iodd,iused,ifree,iq,ip,iput,iget
 812       double precision hnorm
 813       complex*16 cp, cq, scale, scale2, ZERO, ONE
 814
 815       parameter( ZERO=(0.0d0,0.0d0), ONE=(1.0d0,0.0d0) )
 816       intrinsic ABS, CMPLX, DBLE, INT, LOG, MAX
 817
 818 *---  check restrictions on input parameters ...
 819       mm = m*m
 820       iflag = 0
 821       if ( ldh.lt.m ) iflag = -1
 822       if ( lwsp.lt.4*mm+ideg+1 ) iflag = -2
 823       if ( iflag.ne.0 ) stop 'bad sizes (in input of ZGPADM)'
 824 *
 825 *---  initialise pointers ...
 826 *
 827       icoef = 1
 828       ih2 = icoef + (ideg+1)
 829       ip  = ih2 + mm
 830       iq  = ip + mm
 831       ifree = iq + mm
 832 *
 833 *---  scaling: seek ns such that ||t*H/2^ns|| < 1/2;
 834 *     and set scale = t/2^ns ...
 835 *
 836       do i = 1,m
 837          wsp(i) = ZERO
 838       enddo
 839       do j = 1,m
 840          do i = 1,m
 841             wsp(i) = wsp(i) + ABS( H(i,j) )
 842          enddo
 843       enddo
 844       hnorm = 0.0d0
 845       do i = 1,m
 846          hnorm = MAX( hnorm,DBLE(wsp(i)) )
 847       enddo
 848       hnorm = ABS( t*hnorm )
 849       if ( hnorm.eq.0.0d0 ) stop 'Error - null H in input of ZGPADM.'
 850       ns = MAX( 0,INT(LOG(hnorm)/LOG(2.0d0))+2 )
 851       scale =  CMPLX( t/DBLE(2**ns),0.0d0 )
 852       scale2 = scale*scale
 853 *
 854 *---  compute Pade coefficients ...
 855 *
 856       i = ideg+1
 857       j = 2*ideg+1
 858       wsp(icoef) = ONE
 859       do k = 1,ideg
 860          wsp(icoef+k) = (wsp(icoef+k-1)*DBLE( i-k ))/DBLE( k*(j-k) )
 861       enddo
 862 *
 863 *---  H2 = scale2*H*H ...
 864 *
 865       call ZGEMM( 'n','n',m,m,m,scale2,H,ldh,H,ldh,ZERO,wsp(ih2),m )
 866 *
 867 *---  initialise p (numerator) and q (denominator) ...
 868 *
 869       cp = wsp(icoef+ideg-1)
 870       cq = wsp(icoef+ideg)
 871       do j = 1,m
 872          do i = 1,m
 873             wsp(ip + (j-1)*m + i-1) = ZERO
 874             wsp(iq + (j-1)*m + i-1) = ZERO
 875          enddo
 876          wsp(ip + (j-1)*(m+1)) = cp
 877          wsp(iq + (j-1)*(m+1)) = cq
 878       enddo
 879 *
 880 *---  Apply Horner rule ...
 881 *
 882       iodd = 1
 883       k = ideg - 1
 884  100  continue
 885       iused = iodd*iq + (1-iodd)*ip
 886       call ZGEMM( 'n','n',m,m,m, ONE,wsp(iused),m,
 887      .             wsp(ih2),m, ZERO,wsp(ifree),m )
 888       do j = 1,m
 889          wsp(ifree+(j-1)*(m+1)) = wsp(ifree+(j-1)*(m+1))+wsp(icoef+k-1)
 890       enddo
 891       ip = (1-iodd)*ifree + iodd*ip
 892       iq = iodd*ifree + (1-iodd)*iq
 893       ifree = iused
 894       iodd = 1-iodd
 895       k = k-1
 896       if ( k.gt.0 )  goto 100
 897 *
 898 *---  Obtain (+/-)(I + 2*(p\q)) ...
 899 *
 900       if ( iodd.ne.0 ) then
 901          call ZGEMM( 'n','n',m,m,m, scale,wsp(iq),m,
 902      .                H,ldh, ZERO,wsp(ifree),m )
 903          iq = ifree
 904       else
 905          call ZGEMM( 'n','n',m,m,m, scale,wsp(ip),m,
 906      .                H,ldh, ZERO,wsp(ifree),m )
 907          ip = ifree
 908       endif
 909       call ZAXPY( mm, -ONE,wsp(ip),1, wsp(iq),1 )
 910       call ZGESV( m,m, wsp(iq),m, ipiv, wsp(ip),m, iflag )
 911       if ( iflag.ne.0 ) stop 'Problem in ZGESV (within ZGPADM)'
 912       call ZDSCAL( mm, 2.0d0, wsp(ip), 1 )
 913       do j = 1,m
 914          wsp(ip+(j-1)*(m+1)) = wsp(ip+(j-1)*(m+1)) + ONE
 915       enddo
 916       iput = ip
 917       if ( ns.eq.0 .and. iodd.ne.0 ) then
 918          call ZDSCAL( mm, -1.0d0, wsp(ip), 1 )
 919          goto 200
 920       endif
 921 *
 922 *--   squaring : exp(t*H) = (exp(t*H))^(2^ns) ...
 923 *
 924       iodd = 1
 925       do k = 1,ns
 926          iget = iodd*ip + (1-iodd)*iq
 927          iput = (1-iodd)*ip + iodd*iq
 928          call ZGEMM( 'n','n',m,m,m, ONE,wsp(iget),m, wsp(iget),m,
 929      .                ZERO,wsp(iput),m )
 930          iodd = 1-iodd
 931       enddo
 932  200  continue
 933       iexph = iput
 934       END
 935 *----------------------------------------------------------------------|
 936       subroutine ZHPADM(ideg,m,t,H,ldh,wsp,lwsp,ipiv,iexph,ns,iflag)
 937
 938       implicit none
 939       double precision t
 940       integer          ideg, m, ldh, lwsp, iexph, ns, iflag, ipiv(m)
 941       complex*16       H(ldh,m), wsp(lwsp)
 942
 943 *-----Purpose----------------------------------------------------------|
 944 *
 945 *     Computes exp(t*H), the matrix exponential of an Hermitian matrix
 946 *     in full, using the irreducible rational Pade approximation to the
 947 *     exponential function exp(z) = r(z) = (+/-)( I + 2*(q(z)/p(z)) ),
 948 *     combined with scaling-and-squaring.
 949 *
 950 *-----Arguments--------------------------------------------------------|
 951 *
 952 *     ideg      : (input) the degre of the diagonal Pade to be used.
 953 *                 a value of 6 is generally satisfactory.
 954 *
 955 *     m         : (input) order of H.
 956 *
 957 *     H(ldh,m)  : (input) argument matrix (both lower and upper parts).
 958 *
 959 *     t         : (input) time-scale (can be < 0).
 960 *
 961 *     wsp(lwsp) : (workspace/output) lwsp .ge. 4*m*m+ideg+1.
 962 *
 963 *     ipiv(m)   : (workspace)
 964 *
 965 *>>>> iexph     : (output) number such that wsp(iexph) points to exp(tH)
 966 *                 i.e., exp(tH) is located at wsp(iexph ... iexph+m*m-1)
 967 *                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 968 *                 NOTE: if the routine was called with wsp(iptr),
 969 *                       then exp(tH) will start at wsp(iptr+iexph-1).
 970 *
 971 *     ns        : (output) number of scaling-squaring used.
 972 *
 973 *     iflag     : (output) exit flag.
 974 *                       0 - no problem
 975 *                      <0 - problem
 976 *
 977 *----------------------------------------------------------------------|
 978 *     Roger B. Sidje (rbs@maths.uq.edu.au)
 979 *     EXPOKIT: Software Package for Computing Matrix Exponentials.
 980 *     ACM - Transactions On Mathematical Software, 24(1):130-156, 1998
 981 *----------------------------------------------------------------------|
 982 *
 983       integer i,j,k,icoef,mm,ih2,iodd,iused,ifree,iq,ip,iput,iget
 984       double precision hnorm
 985       complex*16 cp, cq, scale, scale2, ZERO, ONE
 986
 987       parameter( ZERO=(0.0d0,0.0d0), ONE=(1.0d0,0.0d0) )
 988       intrinsic ABS, CMPLX, DBLE, INT, LOG, MAX
 989
 990 *---  check restrictions on input parameters ...
 991       mm = m*m
 992       iflag = 0
 993       if ( ldh.lt.m ) iflag = -1
 994       if ( lwsp.lt.4*mm+ideg+1 ) iflag = -2
 995       if ( iflag.ne.0 ) stop 'bad sizes (in input of ZHPADM)'
 996 *
 997 *---  initialise pointers ...
 998 *
 999       icoef = 1
1000       ih2 = icoef + (ideg+1)
1001       ip  = ih2 + mm
1002       iq  = ip + mm
1003       ifree = iq + mm
1004 *
1005 *---  scaling: seek ns such that ||t*H/2^ns|| < 1/2;
1006 *     and set scale = t/2^ns ...
1007 *
1008       do i = 1,m
1009          wsp(i) = ZERO
1010       enddo
1011       do j = 1,m
1012          do i = 1,m
1013             wsp(i) = wsp(i) + ABS( H(i,j) )
1014          enddo
1015       enddo
1016       hnorm = 0.0d0
1017       do i = 1,m
1018          hnorm = MAX( hnorm,DBLE(wsp(i)) )
1019       enddo
1020       hnorm = ABS( t*hnorm )
1021       if ( hnorm.eq.0.0d0 ) stop 'Error - null H in input of ZHPADM.'
1022       ns = MAX( 0,INT(LOG(hnorm)/LOG(2.0d0))+2 )
1023       scale =  CMPLX( t/DBLE(2**ns),0.0d0 )
1024       scale2 = scale*scale
1025 *
1026 *---  compute Pade coefficients ...
1027 *
1028       i = ideg+1
1029       j = 2*ideg+1
1030       wsp(icoef) = ONE
1031       do k = 1,ideg
1032          wsp(icoef+k) = (wsp(icoef+k-1)*DBLE( i-k ))/DBLE( k*(j-k) )
1033       enddo
1034 *
1035 *---  H2 = scale2*H*H ...
1036 *
1037       call ZGEMM( 'n','n',m,m,m,scale2,H,ldh,H,ldh,ZERO,wsp(ih2),m )
1038 *
1039 *---  initialise p (numerator) and q (denominator) ...
1040 *
1041       cp = wsp(icoef+ideg-1)
1042       cq = wsp(icoef+ideg)
1043       do j = 1,m
1044          do i = 1,m
1045             wsp(ip + (j-1)*m + i-1) = ZERO
1046             wsp(iq + (j-1)*m + i-1) = ZERO
1047          enddo
1048          wsp(ip + (j-1)*(m+1)) = cp
1049          wsp(iq + (j-1)*(m+1)) = cq
1050       enddo
1051 *
1052 *---  Apply Horner rule ...
1053 *
1054       iodd = 1
1055       k = ideg - 1
1056  100  continue
1057       iused = iodd*iq + (1-iodd)*ip
1058       call ZGEMM( 'n','n',m,m,m, ONE,wsp(iused),m,
1059      .             wsp(ih2),m, ZERO,wsp(ifree),m )
1060       do j = 1,m
1061          wsp(ifree+(j-1)*(m+1)) = wsp(ifree+(j-1)*(m+1))+wsp(icoef+k-1)
1062       enddo
1063       ip = (1-iodd)*ifree + iodd*ip
1064       iq = iodd*ifree + (1-iodd)*iq
1065       ifree = iused
1066       iodd = 1-iodd
1067       k = k-1
1068       if ( k.gt.0 )  goto 100
1069 *
1070 *---  Obtain (+/-)(I + 2*(p\q)) ...
1071 *
1072       if ( iodd.ne.0 ) then
1073          call ZGEMM( 'n','n',m,m,m, scale,wsp(iq),m,
1074      .                H,ldh, ZERO,wsp(ifree),m )
1075          iq = ifree
1076       else
1077          call ZGEMM( 'n','n',m,m,m, scale,wsp(ip),m,
1078      .                H,ldh, ZERO,wsp(ifree),m )
1079          ip = ifree
1080       endif
1081       call ZAXPY( mm, -ONE,wsp(ip),1, wsp(iq),1 )
1082       call ZHESV( 'U',m,m,wsp(iq),m,ipiv,wsp(ip),m,wsp(ih2),mm,iflag )
1083       if ( iflag.ne.0 ) stop 'Problem in ZHESV (within ZHPADM)'
1084       call ZDSCAL( mm, 2.0d0, wsp(ip), 1 )
1085       do j = 1,m
1086          wsp(ip+(j-1)*(m+1)) = wsp(ip+(j-1)*(m+1)) + ONE
1087       enddo
1088       iput = ip
1089       if ( ns.eq.0 .and. iodd.ne.0 ) then
1090          call ZDSCAL( mm, -1.0d0, wsp(ip), 1 )
1091          goto 200
1092       endif
1093 *
1094 *--   squaring : exp(t*H) = (exp(t*H))^(2^ns) ...
1095 *
1096       iodd = 1
1097       do k = 1,ns
1098          iget = iodd*ip + (1-iodd)*iq
1099          iput = (1-iodd)*ip + iodd*iq
1100          call ZGEMM( 'n','n',m,m,m, ONE,wsp(iget),m, wsp(iget),m,
1101      .                ZERO,wsp(iput),m )
1102          iodd = 1-iodd
1103       enddo
1104  200  continue
1105       iexph = iput
1106       END
1107 *----------------------------------------------------------------------|
1108       subroutine DGCHBV( m, t, H,ldh, y, wsp, iwsp, iflag )
1109
1110       implicit none
1111       integer          m, ldh, iflag, iwsp(m)
1112       double precision t, H(ldh,m), y(m)
1113       complex*16       wsp(m*(m+2))
1114
1115 *-----Purpose----------------------------------------------------------|
1116 *
1117 *---  DGCHBV computes y = exp(t*H)*y using the partial fraction
1118 *     expansion of the uniform rational Chebyshev approximation
1119 *     to exp(-x) of type (14,14). H is a General matrix.
1120 *     About 14-digit accuracy is expected if the matrix H is negative
1121 *     definite. The algorithm may behave poorly otherwise.
1122 *
1123 *-----Arguments--------------------------------------------------------|
1124 *
1125 *     m       : (input) order of the matrix H
1126 *
1127 *     t       : (input) time-scaling factor (can be < 0).
1128 *
1129 *     H(ldh,m): (input) argument matrix.
1130 *
1131 *     y(m)    : (input/output) on input the operand vector,
1132 *               on output the resulting vector exp(t*H)*y.
1133 *
1134 *     iwsp(m) : (workspace)
1135 *
1136 *     wsp     : (workspace). Observe that a double precision vector of
1137 *               length 2*m*(m+2) can be used as well when calling this
1138 *               routine (thus avoiding an idle complex array elsewhere)
1139 *
1140 *----------------------------------------------------------------------|
1141 *     Roger B. Sidje (rbs@maths.uq.edu.au)
1142 *     EXPOKIT: Software Package for Computing Matrix Exponentials.
1143 *     ACM - Transactions On Mathematical Software, 24(1):130-156, 1998
1144 *----------------------------------------------------------------------|
1145 *
1146       integer ndeg, i, j, ip, ih, iy, iz
1147       parameter ( ndeg=7 )
1148       double precision alpha0
1149       complex*16 alpha(ndeg), theta(ndeg)
1150
1151       intrinsic DBLE
1152
1153 *---  Pointers ...
1154
1155       ih = 1
1156       iy = ih + m*m
1157       iz = iy + m
1158
1159 *---  Coefficients and poles of the partial fraction expansion ...
1160
1161       alpha0  =  0.183216998528140087D-11
1162       alpha(1)=( 0.557503973136501826D+02,-0.204295038779771857D+03)
1163       alpha(2)=(-0.938666838877006739D+02, 0.912874896775456363D+02)
1164       alpha(3)=( 0.469965415550370835D+02,-0.116167609985818103D+02)
1165       alpha(4)=(-0.961424200626061065D+01,-0.264195613880262669D+01)
1166       alpha(5)=( 0.752722063978321642D+00, 0.670367365566377770D+00)
1167       alpha(6)=(-0.188781253158648576D-01,-0.343696176445802414D-01)
1168       alpha(7)=( 0.143086431411801849D-03, 0.287221133228814096D-03)
1169
1170       theta(1)=(-0.562314417475317895D+01, 0.119406921611247440D+01)
1171       theta(2)=(-0.508934679728216110D+01, 0.358882439228376881D+01)
1172       theta(3)=(-0.399337136365302569D+01, 0.600483209099604664D+01)
1173       theta(4)=(-0.226978543095856366D+01, 0.846173881758693369D+01)
1174       theta(5)=( 0.208756929753827868D+00, 0.109912615662209418D+02)
1175       theta(6)=( 0.370327340957595652D+01, 0.136563731924991884D+02)
1176       theta(7)=( 0.889777151877331107D+01, 0.166309842834712071D+02)
1177 *
1178 *---  Accumulation of the contribution of each pole ...
1179 *
1180       do j = 1,m
1181          wsp(iz+j-1) = y(j)
1182          y(j) = y(j)*alpha0
1183       enddo
1184       do ip = 1,ndeg
1185 *---     Solve each fraction using Gaussian elimination with pivoting...
1186          do j = 1,m
1187             do i = 1,m
1188                wsp(ih+(j-1)*m+i-1) = -t*H(i,j)
1189             enddo
1190             wsp(ih+(j-1)*m+j-1) = wsp(ih+(j-1)*m+j-1)-theta(ip)
1191             wsp(iy+j-1) = wsp(iz+j-1)
1192          enddo
1193          call ZGESV( M, 1, WSP(iH),M, IWSP, WSP(iY),M, IFLAG )
1194          if ( IFLAG.ne.0 ) stop 'Error in DGCHBV'
1195 *---     Accumulate the partial result in y ...
1196          do j = 1,m
1197             y(j) = y(j) + DBLE( alpha(ip)*wsp(iy+j-1) )
1198          enddo
1199       enddo
1200       END
1201 *----------------------------------------------------------------------|
1202 *----------------------------------------------------------------------|
1203       subroutine DSCHBV( m, t, H,ldh, y, wsp, iwsp, iflag )
1204
1205       implicit none
1206       integer          m, ldh, iflag, iwsp(m)
1207       double precision t, H(ldh,m), y(m)
1208       complex*16       wsp(m*(m+2))
1209
1210 *-----Purpose----------------------------------------------------------|
1211 *
1212 *---  DSCHBV computes y = exp(t*H)*y using the partial fraction
1213 *     expansion of the uniform rational Chebyshev approximation
1214 *     to exp(-x) of type (14,14). H is assumed to be symmetric.
1215 *     About 14-digit accuracy is expected if the matrix H is negative
1216 *     definite. The algorithm may behave poorly otherwise.
1217 *
1218 *-----Arguments--------------------------------------------------------|
1219 *
1220 *     m       : (input) order of matrix H
1221 *
1222 *     t       : (input) time-scaling factor (can be < 0).
1223 *
1224 *     H(ldh,m): (input) symmetric matrix.
1225 *
1226 *     y(m)    : (input/output) on input the operand vector,
1227 *               on output the resulting vector exp(t*H)*y.
1228 *
1229 *     iwsp(m) : (workspace)
1230 *
1231 *     wsp     : (workspace). Observe that a double precision vector of
1232 *               length 2*m*(m+2) can be used as well when calling this
1233 *               routine (thus avoiding an idle complex array elsewhere)
1234 *
1235 *----------------------------------------------------------------------|
1236 *     Roger B. Sidje (rbs@maths.uq.edu.au)
1237 *     EXPOKIT: Software Package for Computing Matrix Exponentials.
1238 *     ACM - Transactions On Mathematical Software, 24(1):130-156, 1998
1239 *----------------------------------------------------------------------|
1240 *
1241       integer ndeg, i, j, ip, ih, iy, iz
1242       parameter ( ndeg=7 )
1243       double precision alpha0
1244       complex*16 alpha(ndeg), theta(ndeg), w
1245
1246       intrinsic ABS,CMPLX,DBLE,MIN
1247
1248 *---  Pointers ...
1249
1250       ih = 1
1251       iy = ih + m*m
1252       iz = iy + m
1253
1254 *---  Coefficients and poles of the partial fraction expansion ...
1255
1256       alpha0  =  0.183216998528140087D-11
1257       alpha(1)=( 0.557503973136501826D+02,-0.204295038779771857D+03)
1258       alpha(2)=(-0.938666838877006739D+02, 0.912874896775456363D+02)
1259       alpha(3)=( 0.469965415550370835D+02,-0.116167609985818103D+02)
1260       alpha(4)=(-0.961424200626061065D+01,-0.264195613880262669D+01)
1261       alpha(5)=( 0.752722063978321642D+00, 0.670367365566377770D+00)
1262       alpha(6)=(-0.188781253158648576D-01,-0.343696176445802414D-01)
1263       alpha(7)=( 0.143086431411801849D-03, 0.287221133228814096D-03)
1264
1265       theta(1)=(-0.562314417475317895D+01, 0.119406921611247440D+01)
1266       theta(2)=(-0.508934679728216110D+01, 0.358882439228376881D+01)
1267       theta(3)=(-0.399337136365302569D+01, 0.600483209099604664D+01)
1268       theta(4)=(-0.226978543095856366D+01, 0.846173881758693369D+01)
1269       theta(5)=( 0.208756929753827868D+00, 0.109912615662209418D+02)
1270       theta(6)=( 0.370327340957595652D+01, 0.136563731924991884D+02)
1271       theta(7)=( 0.889777151877331107D+01, 0.166309842834712071D+02)
1272 *
1273 *---  Accumulation of the contribution of each pole ...
1274 *
1275       do j = 1,m
1276          wsp(iz+j-1) = y(j)
1277          y(j) = y(j)*alpha0
1278       enddo
1279       do ip = 1,ndeg
1280 *---     Solve each fraction using Gaussian elimination with pivoting...
1281          do j = 1,m
1282             do i = 1,m
1283                wsp(ih+(j-1)*m+i-1) = -t*H(i,j)
1284             enddo
1285             wsp(ih+(j-1)*m+j-1) = wsp(ih+(j-1)*m+j-1)-theta(ip)
1286             wsp(iy+j-1) = wsp(iz+j-1)
1287          enddo
1288          !call ZSYSV('U', M, 1, WSP(iH),M, IWSP, WSP(iY),M, W,1, IFLAG )
1289          if ( IFLAG.ne.0 ) stop 'Error in DSCHBV'
1290 *---     Accumulate the partial result in y ...
1291          do i = 1,m
1292             y(i) = y(i) + DBLE( alpha(ip)*wsp(iy+i-1) )
1293          enddo
1294       enddo
1295       END
1296 *----------------------------------------------------------------------|
1297 *----------------------------------------------------------------------|
1298       subroutine ZGCHBV( m, t, H,ldh, y, wsp, iwsp, iflag )
1299
1300       implicit none
1301       integer          m, ldh, iflag, iwsp(m)
1302       double precision t
1303       complex*16       H(ldh,m), y(m), wsp(m*(m+2))
1304
1305 *-----Purpose----------------------------------------------------------|
1306 *
1307 *---  ZGCHBV computes y = exp(t*H)*y using the partial fraction
1308 *     expansion of the uniform rational Chebyshev approximation
1309 *     to exp(-x) of type (14,14). H is a General matrix.
1310 *     About 14-digit accuracy is expected if the matrix H is negative
1311 *     definite. The algorithm may behave poorly otherwise.
1312 *
1313 *-----Arguments--------------------------------------------------------|
1314 *
1315 *     m       : (input) order of the matrix H.
1316 *
1317 *     t       : (input) time-scaling factor (can be < 0).
1318 *
1319 *     H(ldh,m): (input) argument matrix.
1320 *
1321 *     y(m)    : (input/output) on input the operand vector,
1322 *               on output the resulting vector exp(t*H)*y.
1323 *
1324 *     iwsp(m) : (workspace)
1325 *
1326 *     wsp     : (workspace). Observe that a double precision vector of
1327 *               length 2*m*(m+2) can be used as well when calling this
1328 *               routine.
1329 *
1330 *----------------------------------------------------------------------|
1331 *     Roger B. Sidje (rbs@maths.uq.edu.au)
1332 *     EXPOKIT: Software Package for Computing Matrix Exponentials.
1333 *     ACM - Transactions On Mathematical Software, 24(1):130-156, 1998
1334 *----------------------------------------------------------------------|
1335 *
1336       integer     ndeg, i, j, ip, ih, iy, iz
1337       parameter ( ndeg=7 )
1338       double      precision alpha0
1339       complex*16  alpha(2*ndeg), theta(2*ndeg)
1340
1341 *---  Pointers ...
1342
1343       ih = 1
1344       iy = ih + m*m
1345       iz = iy + m
1346
1347 *---  Coefficients and poles of the partial fraction expansion ...
1348
1349       alpha0  =  0.183216998528140087D-11
1350       alpha(1)=( 0.557503973136501826D+02,-0.204295038779771857D+03)
1351       alpha(2)=(-0.938666838877006739D+02, 0.912874896775456363D+02)
1352       alpha(3)=( 0.469965415550370835D+02,-0.116167609985818103D+02)
1353       alpha(4)=(-0.961424200626061065D+01,-0.264195613880262669D+01)
1354       alpha(5)=( 0.752722063978321642D+00, 0.670367365566377770D+00)
1355       alpha(6)=(-0.188781253158648576D-01,-0.343696176445802414D-01)
1356       alpha(7)=( 0.143086431411801849D-03, 0.287221133228814096D-03)
1357
1358       theta(1)=(-0.562314417475317895D+01, 0.119406921611247440D+01)
1359       theta(2)=(-0.508934679728216110D+01, 0.358882439228376881D+01)
1360       theta(3)=(-0.399337136365302569D+01, 0.600483209099604664D+01)
1361       theta(4)=(-0.226978543095856366D+01, 0.846173881758693369D+01)
1362       theta(5)=( 0.208756929753827868D+00, 0.109912615662209418D+02)
1363       theta(6)=( 0.370327340957595652D+01, 0.136563731924991884D+02)
1364       theta(7)=( 0.889777151877331107D+01, 0.166309842834712071D+02)
1365 *
1366       do ip = 1,ndeg
1367          theta(ndeg+ip) = CONJG( theta(ip) )
1368          alpha(ndeg+ip) = CONJG( alpha(ip) )
1369       enddo
1370 *
1371 *---  Accumulation of the contribution of each pole ...
1372 *
1373       do j = 1,m
1374          wsp(iz+j-1) = y(j)
1375          y(j) = y(j)*alpha0
1376       enddo
1377       do ip = 1,2*ndeg
1378          alpha(ip) = 0.5d0*alpha(ip)
1379 *---     Solve each fraction using Gaussian elimination with pivoting...
1380          do j = 1,m
1381             do i = 1,m
1382                wsp(ih+(j-1)*m+i-1) = -t*H(i,j)
1383             enddo
1384             wsp(ih+(j-1)*m+j-1) = wsp(ih+(j-1)*m+j-1)-theta(ip)
1385             wsp(iy+j-1) = wsp(iz+j-1)
1386          enddo
1387          call ZGESV( M, 1, WSP(iH),M, IWSP, WSP(iY),M, IFLAG )
1388          if ( IFLAG.ne.0 ) stop 'Error in ZGCHBV'
1389 *---     Accumulate the partial result in y ...
1390          do i = 1,m
1391             y(i) = y(i) + alpha(ip)*wsp(iy+i-1)
1392          enddo
1393       enddo
1394       END
1395 *----------------------------------------------------------------------|
1396 *----------------------------------------------------------------------|
1397       subroutine DNCHBV( m, t, H,ldh, y, wsp )
1398
1399       implicit none
1400       integer          m, ldh
1401       double precision t, H(ldh,m), y(m)
1402       complex*16       wsp(m*(m+2))
1403
1404 *-----Purpose----------------------------------------------------------|
1405 *
1406 *---  DNCHBV computes y = exp(t*H)*y using the partial fraction
1407 *     expansion of the uniform rational Chebyshev approximation
1408 *     to exp(-x) of type (14,14). H is assumed to be upper-Hessenberg.
1409 *     About 14-digit accuracy is expected if the matrix H is negative
1410 *     definite. The algorithm may behave poorly otherwise.
1411 *
1412 *-----Arguments--------------------------------------------------------|
1413 *
1414 *     m       : (input) order of the Hessenberg matrix H
1415 *
1416 *     t       : (input) time-scaling factor (can be < 0).
1417 *
1418 *     H(ldh,m): (input) upper Hessenberg matrix.
1419 *
1420 *     y(m)    : (input/output) on input the operand vector,
1421 *               on output the resulting vector exp(t*H)*y.
1422 *
1423 *     wsp     : (workspace). Observe that a double precision vector of
1424 *               length 2*m*(m+2) can be used as well when calling this
1425 *               routine (thus avoiding an idle complex array elsewhere)
1426 *
1427 *----------------------------------------------------------------------|
1428 *     Roger B. Sidje (rbs@maths.uq.edu.au)
1429 *     EXPOKIT: Software Package for Computing Matrix Exponentials.
1430 *     ACM - Transactions On Mathematical Software, 24(1):130-156, 1998
1431 *----------------------------------------------------------------------|
1432 *
1433       complex*16 ZERO
1434       integer ndeg, i, j, k, ip, ih, iy, iz
1435       parameter ( ndeg=7, ZERO=(0.0d0,0.0d0) )
1436       double precision alpha0
1437       complex*16 alpha(ndeg), theta(ndeg), tmpc
1438
1439       intrinsic ABS,DBLE,MIN
1440
1441 *---  Pointers ...
1442
1443       ih = 1
1444       iy = ih + m*m
1445       iz = iy + m
1446
1447 *---  Coefficients and poles of the partial fraction expansion...
1448
1449       alpha0  =  0.183216998528140087D-11
1450       alpha(1)=( 0.557503973136501826D+02,-0.204295038779771857D+03)
1451       alpha(2)=(-0.938666838877006739D+02, 0.912874896775456363D+02)
1452       alpha(3)=( 0.469965415550370835D+02,-0.116167609985818103D+02)
1453       alpha(4)=(-0.961424200626061065D+01,-0.264195613880262669D+01)
1454       alpha(5)=( 0.752722063978321642D+00, 0.670367365566377770D+00)
1455       alpha(6)=(-0.188781253158648576D-01,-0.343696176445802414D-01)
1456       alpha(7)=( 0.143086431411801849D-03, 0.287221133228814096D-03)
1457
1458       theta(1)=(-0.562314417475317895D+01, 0.119406921611247440D+01)
1459       theta(2)=(-0.508934679728216110D+01, 0.358882439228376881D+01)
1460       theta(3)=(-0.399337136365302569D+01, 0.600483209099604664D+01)
1461       theta(4)=(-0.226978543095856366D+01, 0.846173881758693369D+01)
1462       theta(5)=( 0.208756929753827868D+00, 0.109912615662209418D+02)
1463       theta(6)=( 0.370327340957595652D+01, 0.136563731924991884D+02)
1464       theta(7)=( 0.889777151877331107D+01, 0.166309842834712071D+02)
1465 *
1466 *---  Accumulation of the contribution of each pole ...
1467 *
1468       do j = 1,m
1469          wsp(iz+j-1) = y(j)
1470          y(j) = y(j)*alpha0
1471       enddo
1472       do ip = 1,ndeg
1473 *---     Solve each fraction using Gaussian elimination with pivoting...
1474          do j = 1,m
1475             wsp(iy+j-1) = wsp(iz+j-1)
1476             do i = 1,MIN( j+1,m )
1477                wsp(ih+(j-1)*m+i-1) = -t*H(i,j)
1478             enddo
1479             wsp(ih+(j-1)*m+j-1) = wsp(ih+(j-1)*m+j-1)-theta(ip)
1480             do k = i,m
1481                wsp(ih+(j-1)*m+k-1) = ZERO
1482             enddo
1483          enddo
1484          do i = 1,m-1
1485 *---        Get pivot and exchange rows ...
1486             if (ABS(wsp(ih+(i-1)*m+i-1)).lt.ABS(wsp(ih+(i-1)*m+i))) then
1487                call ZSWAP( m-i+1, wsp(ih+(i-1)*m+i-1),m,
1488      .                     wsp(ih+(i-1)*m+i),m )
1489                call ZSWAP( 1, wsp(iy+i-1),1, wsp(iy+i),1 )
1490             endif
1491 *---        Forward eliminiation ...
1492             tmpc = wsp(ih+(i-1)*m+i) / wsp(ih+(i-1)*m+i-1)
1493             call ZAXPY( m-i, -tmpc, wsp(ih+i*m+i-1),m, wsp(ih+i*m+i),m )
1494             wsp(iy+i) = wsp(iy+i) - tmpc*wsp(iy+i-1)
1495          enddo
1496 *---     Backward substitution ...
1497          do i = m,1,-1
1498             tmpc = wsp(iy+i-1)
1499             do j = i+1,m
1500                tmpc = tmpc - wsp(ih+(j-1)*m+i-1)*wsp(iy+j-1)
1501             enddo
1502             wsp(iy+i-1) = tmpc / wsp(ih+(i-1)*m+i-1)
1503          enddo
1504 *---     Accumulate the partial result in y ...
1505          do j = 1,m
1506             y(j) = y(j) + DBLE( alpha(ip)*wsp(iy+j-1) )
1507          enddo
1508       enddo
1509       END
1510 *----------------------------------------------------------------------|
1511 *----------------------------------------------------------------------|
1512       subroutine ZNCHBV( m, t, H,ldh, y, wsp )
1513
1514       implicit none
1515       integer          m, ldh
1516       double precision t
1517       complex*16       H(ldh,m), y(m), wsp(m*(m+2))
1518
1519 *-----Purpose----------------------------------------------------------|
1520 *
1521 *---  ZNCHBV computes y = exp(t*H)*y using the partial fraction
1522 *     expansion of the uniform rational Chebyshev approximation
1523 *     to exp(-x) of type (14,14). H is assumed to be upper-Hessenberg.
1524 *     About 14-digit accuracy is expected if the matrix H is negative
1525 *     definite. The algorithm may behave poorly otherwise.
1526 *
1527 *-----Arguments--------------------------------------------------------|
1528 *
1529 *     m       : (input) order of the Hessenberg matrix H
1530 *
1531 *     t       : (input) time-scaling factor (can be < 0).
1532 *
1533 *     H(ldh,m): (input) upper Hessenberg matrix.
1534 *
1535 *     y(m)    : (input/output) on input the operand vector,
1536 *               on output the resulting vector exp(t*H)*y.
1537 *
1538 *     wsp     : (workspace). Observe that a double precision vector of
1539 *               length 2*m*(m+2) can be used as well when calling this
1540 *               routine.
1541 *
1542 *----------------------------------------------------------------------|
1543 *     Roger B. Sidje (rbs@maths.uq.edu.au)
1544 *     EXPOKIT: Software Package for Computing Matrix Exponentials.
1545 *     ACM - Transactions On Mathematical Software, 24(1):130-156, 1998
1546 *----------------------------------------------------------------------|
1547 *
1548       complex*16 ZERO
1549       integer ndeg, i, j, k, ip, ih, iy, iz
1550       parameter ( ndeg=7, ZERO=(0.0d0,0.0d0) )
1551       double precision alpha0
1552       complex*16 alpha(ndeg), theta(ndeg), tmpc
1553
1554       intrinsic ABS,DBLE,CONJG,MIN
1555
1556 *---  Pointers ...
1557
1558       ih = 1
1559       iy = ih + m*m
1560       iz = iy + m
1561
1562 *---  Coefficients and poles of the partial fraction expansion...
1563
1564       alpha0  =  0.183216998528140087D-11
1565       alpha(1)=( 0.557503973136501826D+02,-0.204295038779771857D+03)
1566       alpha(2)=(-0.938666838877006739D+02, 0.912874896775456363D+02)
1567       alpha(3)=( 0.469965415550370835D+02,-0.116167609985818103D+02)
1568       alpha(4)=(-0.961424200626061065D+01,-0.264195613880262669D+01)
1569       alpha(5)=( 0.752722063978321642D+00, 0.670367365566377770D+00)
1570       alpha(6)=(-0.188781253158648576D-01,-0.343696176445802414D-01)
1571       alpha(7)=( 0.143086431411801849D-03, 0.287221133228814096D-03)
1572
1573       theta(1)=(-0.562314417475317895D+01, 0.119406921611247440D+01)
1574       theta(2)=(-0.508934679728216110D+01, 0.358882439228376881D+01)
1575       theta(3)=(-0.399337136365302569D+01, 0.600483209099604664D+01)
1576       theta(4)=(-0.226978543095856366D+01, 0.846173881758693369D+01)
1577       theta(5)=( 0.208756929753827868D+00, 0.109912615662209418D+02)
1578       theta(6)=( 0.370327340957595652D+01, 0.136563731924991884D+02)
1579       theta(7)=( 0.889777151877331107D+01, 0.166309842834712071D+02)
1580 *
1581       do ip = 1,ndeg
1582          theta(ndeg+ip) = CONJG( theta(ip) )
1583          alpha(ndeg+ip) = CONJG( alpha(ip) )
1584       enddo
1585 *
1586 *---  Accumulation of the contribution of each pole ...
1587 *
1588       do j = 1,m
1589          wsp(iz+j-1) = y(j)
1590          y(j) = y(j)*alpha0
1591       enddo
1592       do ip = 1,2*ndeg
1593          alpha(ip) = 0.5d0*alpha(ip)
1594 *---     Solve each fraction using Gaussian elimination with pivoting...
1595          do j = 1,m
1596             wsp(iy+j-1) = wsp(iz+j-1)
1597             do i = 1,MIN( j+1,m )
1598                wsp(ih+(j-1)*m+i-1) = -t*H(i,j)
1599             enddo
1600             wsp(ih+(j-1)*m+j-1) = wsp(ih+(j-1)*m+j-1)-theta(ip)
1601             do k = i,m
1602                wsp(ih+(j-1)*m+k-1) = ZERO
1603             enddo
1604          enddo
1605          do i = 1,m-1
1606 *---        Get pivot and exchange rows ...
1607             if (ABS(wsp(ih+(i-1)*m+i-1)).lt.ABS(wsp(ih+(i-1)*m+i))) then
1608                call ZSWAP( m-i+1, wsp(ih+(i-1)*m+i-1),m,
1609      .                     wsp(ih+(i-1)*m+i),m )
1610                call ZSWAP( 1, wsp(iy+i-1),1, wsp(iy+i),1 )
1611             endif
1612 *---        Forward eliminiation ...
1613             tmpc = wsp(ih+(i-1)*m+i) / wsp(ih+(i-1)*m+i-1)
1614             call ZAXPY( m-i, -tmpc, wsp(ih+i*m+i-1),m, wsp(ih+i*m+i),m )
1615             wsp(iy+i) = wsp(iy+i) - tmpc*wsp(iy+i-1)
1616          enddo
1617 *---     Backward substitution ...
1618          do i = m,1,-1
1619             tmpc = wsp(iy+i-1)
1620             do j = i+1,m
1621                tmpc = tmpc - wsp(ih+(j-1)*m+i-1)*wsp(iy+j-1)
1622             enddo
1623             wsp(iy+i-1) = tmpc / wsp(ih+(i-1)*m+i-1)
1624          enddo
1625 *---     Accumulate the partial result in y ...
1626          do j = 1,m
1627             y(j) = y(j) + alpha(ip)*wsp(iy+j-1)
1628          enddo
1629       enddo
1630       END
1631 *----------------------------------------------------------------------|
1632 *----------------------------------------------------------------------|
1633       subroutine DGEXPV( n, m, t, v, w, tol, anorm,
1634      .                   wsp,lwsp, iwsp,liwsp, matvec, itrace,iflag )
1635
1636       implicit none
1637       integer n, m, lwsp, liwsp, itrace, iflag, iwsp(liwsp)
1638       double precision t, tol, anorm, v(n), w(n), wsp(lwsp)
1639       external matvec
1640
1641 *-----Purpose----------------------------------------------------------|
1642 *
1643 *---  DGEXPV computes w = exp(t*A)*v - for a General matrix A.
1644 *
1645 *     It does not compute the matrix exponential in isolation but
1646 *     instead, it computes directly the action of the exponential
1647 *     operator on the operand vector. This way of doing so allows
1648 *     for addressing large sparse problems.
1649 *
1650 *     The method used is based on Krylov subspace projection
1651 *     techniques and the matrix under consideration interacts only
1652 *     via the external routine `matvec' performing the matrix-vector
1653 *     product (matrix-free method).
1654 *
1655 *-----Arguments--------------------------------------------------------|
1656 *
1657 *     n      : (input) order of the principal matrix A.
1658 *
1659 *     m      : (input) maximum size for the Krylov basis.
1660 *
1661 *     t      : (input) time at wich the solution is needed (can be < 0).
1662 *
1663 *     v(n)   : (input) given operand vector.
1664 *
1665 *     w(n)   : (output) computed approximation of exp(t*A)*v.
1666 *
1667 *     tol    : (input/output) the requested accuracy tolerance on w.
1668 *              If on input tol=0.0d0 or tol is too small (tol.le.eps)
1669 *              the internal value sqrt(eps) is used, and tol is set to
1670 *              sqrt(eps) on output (`eps' denotes the machine epsilon).
1671 *              (`Happy breakdown' is assumed if h(j+1,j) .le. anorm*tol)
1672 *
1673 *     anorm  : (input) an approximation of some norm of A.
1674 *
1675 *   wsp(lwsp): (workspace) lwsp .ge. n*(m+1)+n+(m+2)^2+4*(m+2)^2+ideg+1
1676 *                                   +---------+-------+---------------+
1677 *              (actually, ideg=6)        V        H      wsp for PADE
1678 *
1679 * iwsp(liwsp): (workspace) liwsp .ge. m+2
1680 *
1681 *     matvec : external subroutine for matrix-vector multiplication.
1682 *              synopsis: matvec( x, y )
1683 *                        double precision x(*), y(*)
1684 *              computes: y(1:n) <- A*x(1:n)
1685 *                        where A is the principal matrix.
1686 *
1687 *     itrace : (input) running mode. 0=silent, 1=print step-by-step info
1688 *
1689 *     iflag  : (output) exit flag.
1690 *              <0 - bad input arguments
1691 *               0 - no problem
1692 *               1 - maximum number of steps reached without convergence
1693 *               2 - requested tolerance was too high
1694 *
1695 *-----Accounts on the computation--------------------------------------|
1696 *     Upon exit, an interested user may retrieve accounts on the
1697 *     computations. They are located in wsp and iwsp as indicated below:
1698 *
1699 *     location  mnemonic                 description
1700 *     -----------------------------------------------------------------|
1701 *     iwsp(1) = nmult, number of matrix-vector multiplications used
1702 *     iwsp(2) = nexph, number of Hessenberg matrix exponential evaluated
1703 *     iwsp(3) = nscale, number of repeated squaring involved in Pade
1704 *     iwsp(4) = nstep, number of integration steps used up to completion
1705 *     iwsp(5) = nreject, number of rejected step-sizes
1706 *     iwsp(6) = ibrkflag, set to 1 if `happy breakdown' and 0 otherwise
1707 *     iwsp(7) = mbrkdwn, if `happy brkdown', basis-size when it occured
1708 *     -----------------------------------------------------------------|
1709 *     wsp(1)  = step_min, minimum step-size used during integration
1710 *     wsp(2)  = step_max, maximum step-size used during integration
1711 *     wsp(3)  = dummy
1712 *     wsp(4)  = dummy
1713 *     wsp(5)  = x_error, maximum among all local truncation errors
1714 *     wsp(6)  = s_error, global sum of local truncation errors
1715 *     wsp(7)  = tbrkdwn, if `happy breakdown', time when it occured
1716 *     wsp(8)  = t_now, integration domain successfully covered
1717 *     wsp(9)  = hump, i.e., max||exp(sA)||, s in [0,t] (or [t,0] if t<0)
1718 *     wsp(10) = ||w||/||v||, scaled norm of the solution w.
1719 *     -----------------------------------------------------------------|
1720 *     The `hump' is a measure of the conditioning of the problem. The
1721 *     matrix exponential is well-conditioned if hump = 1, whereas it is
1722 *     poorly-conditioned if hump >> 1. However the solution can still be
1723 *     relatively fairly accurate even when the hump is large (the hump
1724 *     is an upper bound), especially when the hump and the scaled norm
1725 *     of w [this is also computed and returned in wsp(10)] are of the
1726 *     same order of magnitude (further details in reference below).
1727 *
1728 *----------------------------------------------------------------------|
1729 *-----The following parameters may also be adjusted herein-------------|
1730 *
1731       integer mxstep, mxreject, ideg
1732       double precision delta, gamma
1733       parameter( mxstep   = 1000,
1734      .           mxreject = 0,
1735      .           ideg     = 6,
1736      .           delta    = 1.2d0,
1737      .           gamma    = 0.9d0 )
1738
1739 *     mxstep  : maximum allowable number of integration steps.
1740 *               The value 0 means an infinite number of steps.
1741 *
1742 *     mxreject: maximum allowable number of rejections at each step.
1743 *               The value 0 means an infinite number of rejections.
1744 *
1745 *     ideg    : the Pade approximation of type (ideg,ideg) is used as
1746 *               an approximation to exp(H). The value 0 switches to the
1747 *               uniform rational Chebyshev approximation of type (14,14)
1748 *
1749 *     delta   : local truncation error `safety factor'
1750 *
1751 *     gamma   : stepsize `shrinking factor'
1752 *
1753 *----------------------------------------------------------------------|
1754 *     Roger B. Sidje (rbs@maths.uq.edu.au)
1755 *     EXPOKIT: Software Package for Computing Matrix Exponentials.
1756 *     ACM - Transactions On Mathematical Software, 24(1):130-156, 1998
1757 *----------------------------------------------------------------------|
1758 *
1759       integer i, j, k1, mh, mx, iv, ih, j1v, ns, ifree, lfree, iexph,
1760      .        ireject,ibrkflag,mbrkdwn, nmult, nreject, nexph, nscale,
1761      .        nstep
1762       double precision sgn, t_out, tbrkdwn, step_min,step_max, err_loc,
1763      .                 s_error, x_error, t_now, t_new, t_step, t_old,
1764      .                 xm, beta, break_tol, p1, p2, p3, eps, rndoff,
1765      .                 vnorm, avnorm, hj1j, hij, hump, SQR1
1766
1767       intrinsic AINT,ABS,DBLE,LOG10,MAX,MIN,NINT,SIGN,SQRT
1768       double precision DDOT, DNRM2
1769
1770 *---  check restrictions on input parameters ...
1771       iflag = 0
1772       if ( lwsp.lt.n*(m+2)+5*(m+2)**2+ideg+1 ) iflag = -1
1773       if ( liwsp.lt.m+2 ) iflag = -2
1774       if ( m.ge.n .or. m.le.0 ) iflag = -3
1775       if ( iflag.ne.0 ) stop 'bad sizes (in input of DGEXPV)'
1776 *
1777 *---  initialisations ...
1778 *
1779       k1 = 2
1780       mh = m + 2
1781       iv = 1
1782       ih = iv + n*(m+1) + n
1783       ifree = ih + mh*mh
1784       lfree = lwsp - ifree + 1
1785
1786       ibrkflag = 0
1787       mbrkdwn  = m
1788       nmult    = 0
1789       nreject  = 0
1790       nexph    = 0
1791       nscale   = 0
1792
1793       t_out    = ABS( t )
1794       tbrkdwn  = 0.0d0
1795       step_min = t_out
1796       step_max = 0.0d0
1797       nstep    = 0
1798       s_error  = 0.0d0
1799       x_error  = 0.0d0
1800       t_now    = 0.0d0
1801       t_new    = 0.0d0
1802
1803       p1 = 4.0d0/3.0d0
1804  1    p2 = p1 - 1.0d0
1805       p3 = p2 + p2 + p2
1806       eps = ABS( p3-1.0d0 )
1807       if ( eps.eq.0.0d0 ) go to 1
1808       if ( tol.le.eps ) tol = SQRT( eps )
1809       rndoff = eps*anorm
1810
1811       break_tol = 1.0d-7
1812 *>>>  break_tol = tol
1813 *>>>  break_tol = anorm*tol
1814
1815       sgn = SIGN( 1.0d0,t )
1816       call DCOPY( n, v,1, w,1 )
1817       beta = DNRM2( n, w,1 )
1818       vnorm = beta
1819       hump = beta
1820 *
1821 *---  obtain the very first stepsize ...
1822 *
1823       SQR1 = SQRT( 0.1d0 )
1824       xm = 1.0d0/DBLE( m )
1825       p1 = tol*(((m+1)/2.72D0)**(m+1))*SQRT(2.0D0*3.14D0*(m+1))
1826       t_new = (1.0d0/anorm)*(p1/(4.0d0*beta*anorm))**xm
1827       p1 = 10.0d0**(NINT( LOG10( t_new )-SQR1 )-1)
1828       t_new = AINT( t_new/p1 + 0.55d0 ) * p1
1829 *
1830 *---  step-by-step integration ...
1831 *
1832  100  if ( t_now.ge.t_out ) goto 500
1833
1834       nstep = nstep + 1
1835       t_step = MIN( t_out-t_now, t_new )
1836
1837       p1 = 1.0d0/beta
1838       do i = 1,n
1839          wsp(iv + i-1) = p1*w(i)
1840       enddo
1841       do i = 1,mh*mh
1842          wsp(ih+i-1) = 0.0d0
1843       enddo
1844 *
1845 *---  Arnoldi loop ...
1846 *
1847       j1v = iv + n
1848       do 200 j = 1,m
1849          nmult = nmult + 1
1850          call matvec( wsp(j1v-n), wsp(j1v) )
1851          do i = 1,j
1852             hij = DDOT( n, wsp(iv+(i-1)*n),1, wsp(j1v),1 )
1853             call DAXPY( n, -hij, wsp(iv+(i-1)*n),1, wsp(j1v),1 )
1854             wsp(ih+(j-1)*mh+i-1) = hij
1855          enddo
1856          hj1j = DNRM2( n, wsp(j1v),1 )
1857 *---     if `happy breakdown' go straightforward at the end ...
1858          if ( hj1j.le.break_tol ) then
1859             print*,'happy breakdown: mbrkdwn =',j,' h =',hj1j
1860             k1 = 0
1861             ibrkflag = 1
1862             mbrkdwn = j
1863             tbrkdwn = t_now
1864             t_step = t_out-t_now
1865             goto 300
1866          endif
1867          wsp(ih+(j-1)*mh+j) = hj1j
1868          call DSCAL( n, 1.0d0/hj1j, wsp(j1v),1 )
1869          j1v = j1v + n
1870  200  continue
1871       nmult = nmult + 1
1872       call matvec( wsp(j1v-n), wsp(j1v) )
1873       avnorm = DNRM2( n, wsp(j1v),1 )
1874 *
1875 *---  set 1 for the 2-corrected scheme ...
1876 *
1877  300  continue
1878       wsp(ih+m*mh+m+1) = 1.0d0
1879 *
1880 *---  loop while ireject<mxreject until the tolerance is reached ...
1881 *
1882       ireject = 0
1883  401  continue
1884
1885 *
1886 *---  compute w = beta*V*exp(t_step*H)*e1 ...
1887 *
1888       nexph = nexph + 1
1889       mx = mbrkdwn + k1
1890       if ( ideg.ne.0 ) then
1891 *---     irreducible rational Pade approximation ...
1892          call DGPADM( ideg, mx, sgn*t_step, wsp(ih),mh,
1893      .                wsp(ifree),lfree, iwsp, iexph, ns, iflag )
1894          iexph = ifree + iexph - 1
1895          nscale = nscale + ns
1896       else
1897 *---     uniform rational Chebyshev approximation ...
1898          iexph = ifree
1899          do i = 1,mx
1900             wsp(iexph+i-1) = 0.0d0
1901          enddo
1902          wsp(iexph) = 1.0d0
1903          call DNCHBV(mx,sgn*t_step,wsp(ih),mh,wsp(iexph),wsp(ifree+mx))
1904       endif
1905
1906  402  continue
1907 *
1908 *---  error estimate ...
1909 *
1910       if ( k1.eq.0 ) then
1911          err_loc = tol
1912       else
1913          p1 = ABS( wsp(iexph+m) )   * beta
1914          p2 = ABS( wsp(iexph+m+1) ) * beta * avnorm
1915          if ( p1.gt.10.0d0*p2 ) then
1916             err_loc = p2
1917             xm = 1.0d0/DBLE( m )
1918          elseif ( p1.gt.p2 ) then
1919             err_loc = (p1*p2)/(p1-p2)
1920             xm = 1.0d0/DBLE( m )
1921          else
1922             err_loc = p1
1923             xm = 1.0d0/DBLE( m-1 )
1924          endif
1925       endif
1926 *
1927 *---  reject the step-size if the error is not acceptable ...
1928 *
1929       if ( (k1.ne.0) .and. (err_loc.gt.delta*t_step*tol) .and.
1930      .     (mxreject.eq.0 .or. ireject.lt.mxreject) ) then
1931          t_old = t_step
1932          t_step = gamma * t_step * (t_step*tol/err_loc)**xm
1933          p1 = 10.0d0**(NINT( LOG10( t_step )-SQR1 )-1)
1934          t_step = AINT( t_step/p1 + 0.55d0 ) * p1
1935          if ( itrace.ne.0 ) then
1936             print*,'t_step =',t_old
1937             print*,'err_loc =',err_loc
1938             print*,'err_required =',delta*t_old*tol
1939             print*,'stepsize rejected, stepping down to:',t_step
1940          endif
1941          ireject = ireject + 1
1942          nreject = nreject + 1
1943          if ( mxreject.ne.0 .and. ireject.gt.mxreject ) then
1944             print*,"Failure in DGEXPV: ---"
1945             print*,"The requested tolerance is too high."
1946             Print*,"Rerun with a smaller value."
1947             iflag = 2
1948             return
1949          endif
1950          goto 401
1951       endif
1952 *
1953 *---  now update w = beta*V*exp(t_step*H)*e1 and the hump ...
1954 *
1955       mx = mbrkdwn + MAX( 0,k1-1 )
1956       call DGEMV( 'n', n,mx,beta,wsp(iv),n,wsp(iexph),1,0.0d0,w,1 )
1957       beta = DNRM2( n, w,1 )
1958       hump = MAX( hump, beta )
1959 *
1960 *---  suggested value for the next stepsize ...
1961 *
1962       t_new = gamma * t_step * (t_step*tol/err_loc)**xm
1963       p1 = 10.0d0**(NINT( LOG10( t_new )-SQR1 )-1)
1964       t_new = AINT( t_new/p1 + 0.55d0 ) * p1
1965
1966       err_loc = MAX( err_loc,rndoff )
1967 *
1968 *---  update the time covered ...
1969 *
1970       t_now = t_now + t_step
1971 *
1972 *---  display and keep some information ...
1973 *
1974       if ( itrace.ne.0 ) then
1975          print*,'integration',nstep,'---------------------------------'
1976          print*,'scale-square =',ns
1977          print*,'step_size =',t_step
1978          print*,'err_loc   =',err_loc
1979          print*,'next_step =',t_new
1980       endif
1981
1982       step_min = MIN( step_min, t_step )
1983       step_max = MAX( step_max, t_step )
1984       s_error = s_error + err_loc
1985       x_error = MAX( x_error, err_loc )
1986
1987       if ( mxstep.eq.0 .or. nstep.lt.mxstep ) goto 100
1988       iflag = 1
1989
1990  500  continue
1991
1992       iwsp(1) = nmult
1993       iwsp(2) = nexph
1994       iwsp(3) = nscale
1995       iwsp(4) = nstep
1996       iwsp(5) = nreject
1997       iwsp(6) = ibrkflag
1998       iwsp(7) = mbrkdwn
1999
2000       wsp(1)  = step_min
2001       wsp(2)  = step_max
2002       wsp(3)  = 0.0d0
2003       wsp(4)  = 0.0d0
2004       wsp(5)  = x_error
2005       wsp(6)  = s_error
2006       wsp(7)  = tbrkdwn
2007       wsp(8)  = sgn*t_now
2008       wsp(9)  = hump/vnorm
2009       wsp(10) = beta/vnorm
2010       END
2011 *----------------------------------------------------------------------|
2012 *----------------------------------------------------------------------|
2013       subroutine DSEXPV( n, m, t, v, w, tol, anorm,
2014      .                   wsp,lwsp, iwsp,liwsp, matvec, itrace,iflag )
2015
2016       implicit none
2017       integer n, m, lwsp, liwsp, itrace, iflag, iwsp(liwsp)
2018       double precision t, tol, anorm, v(n), w(n), wsp(lwsp)
2019       external matvec
2020
2021 *-----Purpose----------------------------------------------------------|
2022 *
2023 *---  DSEXPV computes w = exp(t*A)*v - for a Symmetric matrix A.
2024 *
2025 *     It does not compute the matrix exponential in isolation but
2026 *     instead, it computes directly the action of the exponential
2027 *     operator on the operand vector. This way of doing so allows
2028 *     for addressing large sparse problems.
2029 *
2030 *     The method used is based on Krylov subspace projection
2031 *     techniques and the matrix under consideration interacts only
2032 *     via the external routine `matvec' performing the matrix-vector
2033 *     product (matrix-free method).
2034 *
2035 *-----Arguments--------------------------------------------------------|
2036 *
2037 *     n      : (input) order of the principal matrix A.
2038 *
2039 *     m      : (input) maximum size for the Krylov basis.
2040 *
2041 *     t      : (input) time at wich the solution is needed (can be < 0).
2042 *
2043 *     v(n)   : (input) given operand vector.
2044 *
2045 *     w(n)   : (output) computed approximation of exp(t*A)*v.
2046 *
2047 *     tol    : (input/output) the requested accuracy tolerance on w.
2048 *              If on input tol=0.0d0 or tol is too small (tol.le.eps)
2049 *              the internal value sqrt(eps) is used, and tol is set to
2050 *              sqrt(eps) on output (`eps' denotes the machine epsilon).
2051 *              (`Happy breakdown' is assumed if h(j+1,j) .le. anorm*tol)
2052 *
2053 *     anorm  : (input) an approximation of some norm of A.
2054 *
2055 *   wsp(lwsp): (workspace) lwsp .ge. n*(m+1)+n+(m+2)^2+4*(m+2)^2+ideg+1
2056 *                                   +---------+-------+---------------+
2057 *              (actually, ideg=6)        V        H      wsp for PADE
2058 *
2059 * iwsp(liwsp): (workspace) liwsp .ge. m+2
2060 *
2061 *     matvec : external subroutine for matrix-vector multiplication.
2062 *              synopsis: matvec( x, y )
2063 *                        double precision x(*), y(*)
2064 *              computes: y(1:n) <- A*x(1:n)
2065 *                        where A is the principal matrix.
2066 *
2067 *     itrace : (input) running mode. 0=silent, 1=print step-by-step info
2068 *
2069 *     iflag  : (output) exit flag.
2070 *              <0 - bad input arguments
2071 *               0 - no problem
2072 *               1 - maximum number of steps reached without convergence
2073 *               2 - requested tolerance was too high
2074 *
2075 *-----Accounts on the computation--------------------------------------|
2076 *     Upon exit, an interested user may retrieve accounts on the
2077 *     computations. They are located in the workspace arrays wsp and
2078 *     iwsp as indicated below:
2079 *
2080 *     location  mnemonic                 description
2081 *     -----------------------------------------------------------------|
2082 *     iwsp(1) = nmult, number of matrix-vector multiplications used
2083 *     iwsp(2) = nexph, nbr of Tridiagonal matrix exponential evaluated
2084 *     iwsp(3) = nscale, number of repeated squaring involved in Pade
2085 *     iwsp(4) = nstep, nbr of integration steps used up to completion
2086 *     iwsp(5) = nreject, number of rejected step-sizes
2087 *     iwsp(6) = ibrkflag, set to 1 if `happy breakdown' and 0 otherwise
2088 *     iwsp(7) = mbrkdwn, if `happy brkdown', basis-size when it occured
2089 *     -----------------------------------------------------------------|
2090 *     wsp(1)  = step_min, minimum step-size used during integration
2091 *     wsp(2)  = step_max, maximum step-size used during integration
2092 *     wsp(3)  = dummy
2093 *     wsp(4)  = dummy
2094 *     wsp(5)  = x_error, maximum among all local truncation errors
2095 *     wsp(6)  = s_error, global sum of local truncation errors
2096 *     wsp(7)  = tbrkdwn, if `happy breakdown', time when it occured
2097 *     wsp(8)  = t_now, integration domain successfully covered
2098 *     wsp(9)  = hump, i.e., max||exp(sA)||, s in [0,t] (or [t,0] if t<0)
2099 *     wsp(10) = ||w||/||v||, scaled norm of the solution w.
2100 *     -----------------------------------------------------------------|
2101 *     The `hump' is a measure of the conditioning of the problem. The
2102 *     matrix exponential is well-conditioned if hump = 1, whereas it is
2103 *     poorly-conditioned if hump >> 1. However the solution can still be
2104 *     relatively fairly accurate even when the hump is large (the hump
2105 *     is an upper bound), especially when the hump and the scaled norm
2106 *     of w [this is also computed and returned in wsp(10)] are of the
2107 *     same order of magnitude (further details in reference below).
2108 *
2109 *----------------------------------------------------------------------|
2110 *-----The following parameters may also be adjusted herein-------------|
2111 *
2112       integer mxstep, mxreject, ideg
2113       double precision delta, gamma
2114       parameter( mxstep   = 500,
2115      .           mxreject = 0,
2116      .           ideg     = 6,
2117      .           delta    = 1.2d0,
2118      .           gamma    = 0.9d0 )
2119
2120 *     mxstep  : maximum allowable number of integration steps.
2121 *               The value 0 means an infinite number of steps.
2122 *
2123 *     mxreject: maximum allowable number of rejections at each step.
2124 *               The value 0 means an infinite number of rejections.
2125 *
2126 *     ideg    : the Pade approximation of type (ideg,ideg) is used as
2127 *               an approximation to exp(H). The value 0 switches to the
2128 *               uniform rational Chebyshev approximation of type (14,14)
2129 *
2130 *     delta   : local truncation error `safety factor'
2131 *
2132 *     gamma   : stepsize `shrinking factor'
2133 *
2134 *----------------------------------------------------------------------|
2135 *     Roger B. Sidje (rbs@maths.uq.edu.au)
2136 *     EXPOKIT: Software Package for Computing Matrix Exponentials.
2137 *     ACM - Transactions On Mathematical Software, 24(1):130-156, 1998
2138 *----------------------------------------------------------------------|
2139 *
2140       integer i, j, k1, mh, mx, iv, ih, j1v, ns, ifree, lfree, iexph,
2141      .        ireject,ibrkflag,mbrkdwn, nmult, nreject, nexph, nscale,
2142      .        nstep
2143       double precision sgn, t_out, tbrkdwn, step_min,step_max, err_loc,
2144      .                 s_error, x_error, t_now, t_new, t_step, t_old,
2145      .                 xm, beta, break_tol, p1, p2, p3, eps, rndoff,
2146      .                 vnorm, avnorm, hj1j, hjj, hump, SQR1
2147
2148       intrinsic AINT,ABS,DBLE,LOG10,MAX,MIN,NINT,SIGN,SQRT
2149       double precision DDOT, DNRM2
2150
2151 *---  check restrictions on input parameters ...
2152       iflag = 0
2153       if ( lwsp.lt.n*(m+2)+5*(m+2)**2+ideg+1 ) iflag = -1
2154       if ( liwsp.lt.m+2 ) iflag = -2
2155       if ( m.ge.n .or. m.le.0 ) iflag = -3
2156       if ( iflag.ne.0 ) stop 'bad sizes (in input of DSEXPV)'
2157 *
2158 *---  initialisations ...
2159 *
2160       k1 = 2
2161       mh = m + 2
2162       iv = 1
2163       ih = iv + n*(m+1) + n
2164       ifree = ih + mh*mh
2165       lfree = lwsp - ifree + 1
2166
2167       ibrkflag = 0
2168       mbrkdwn  = m
2169       nmult    = 0
2170       nreject  = 0
2171       nexph    = 0
2172       nscale   = 0
2173
2174       t_out    = ABS( t )
2175       tbrkdwn  = 0.0d0
2176       step_min = t_out
2177       step_max = 0.0d0
2178       nstep    = 0
2179       s_error  = 0.0d0
2180       x_error  = 0.0d0
2181       t_now    = 0.0d0
2182       t_new    = 0.0d0
2183
2184       p1 = 4.0d0/3.0d0
2185  1    p2 = p1 - 1.0d0
2186       p3 = p2 + p2 + p2
2187       eps = ABS( p3-1.0d0 )
2188       if ( eps.eq.0.0d0 ) go to 1
2189       if ( tol.le.eps ) tol = SQRT( eps )
2190       rndoff = eps*anorm
2191
2192       break_tol = 1.0d-7
2193 *>>>  break_tol = tol
2194 *>>>  break_tol = anorm*tol
2195
2196       sgn = SIGN( 1.0d0,t )
2197       call DCOPY( n, v,1, w,1 )
2198       beta = DNRM2( n, w,1 )
2199       vnorm = beta
2200       hump = beta
2201 *
2202 *---  obtain the very first stepsize ...
2203 *
2204       SQR1 = SQRT( 0.1d0 )
2205       xm = 1.0d0/DBLE( m )
2206       p1 = tol*(((m+1)/2.72D0)**(m+1))*SQRT(2.0D0*3.14D0*(m+1))
2207       t_new = (1.0d0/anorm)*(p1/(4.0d0*beta*anorm))**xm
2208       p1 = 10.0d0**(NINT( LOG10( t_new )-SQR1 )-1)
2209       t_new = AINT( t_new/p1 + 0.55d0 ) * p1
2210 *
2211 *---  step-by-step integration ...
2212 *
2213  100  if ( t_now.ge.t_out ) goto 500
2214
2215       nstep = nstep + 1
2216       t_step = MIN( t_out-t_now, t_new )
2217
2218       p1 = 1.0d0/beta
2219       do i = 1,n
2220          wsp(iv + i-1) = p1*w(i)
2221       enddo
2222       do i = 1,mh*mh
2223          wsp(ih+i-1) = 0.0d0
2224       enddo
2225 *
2226 *---  Lanczos loop ...
2227 *
2228       j1v = iv + n
2229       do 200 j = 1,m
2230          nmult = nmult + 1
2231          call matvec( wsp(j1v-n), wsp(j1v) )
2232          if ( j.gt.1 )
2233      .     call DAXPY(n,-wsp(ih+(j-1)*mh+j-2),wsp(j1v-2*n),1,wsp(j1v),1)
2234          hjj = DDOT( n, wsp(j1v-n),1, wsp(j1v),1 )
2235          call DAXPY( n, -hjj, wsp(j1v-n),1, wsp(j1v),1 )
2236          hj1j = DNRM2( n, wsp(j1v),1 )
2237          wsp(ih+(j-1)*(mh+1)) = hjj
2238 *---     if `happy breakdown' go straightforward at the end ...
2239          if ( hj1j.le.break_tol ) then
2240             print*,'happy breakdown: mbrkdwn =',j,' h =',hj1j
2241             k1 = 0
2242             ibrkflag = 1
2243             mbrkdwn = j
2244             tbrkdwn = t_now
2245             t_step = t_out-t_now
2246             goto 300
2247          endif
2248          wsp(ih+(j-1)*mh+j) = hj1j
2249          wsp(ih+j*mh+j-1) = hj1j
2250          call DSCAL( n, 1.0d0/hj1j, wsp(j1v),1 )
2251          j1v = j1v + n
2252  200  continue
2253       nmult = nmult + 1
2254       call matvec( wsp(j1v-n), wsp(j1v) )
2255       avnorm = DNRM2( n, wsp(j1v),1 )
2256 *
2257 *---  set 1 for the 2-corrected scheme ...
2258 *
2259  300  continue
2260       wsp(ih+m*mh+m-1) = 0.0d0
2261       wsp(ih+m*mh+m+1) = 1.0d0
2262 *
2263 *---  loop while ireject<mxreject until the tolerance is reached ...
2264 *
2265       ireject = 0
2266  401  continue
2267 *
2268 *---  compute w = beta*V*exp(t_step*H)*e1 ...
2269 *
2270       nexph = nexph + 1
2271       mx = mbrkdwn + k1
2272       if ( ideg.ne.0 ) then
2273 *---     irreducible rational Pade approximation ...
2274          call DGPADM( ideg, mx, sgn*t_step, wsp(ih),mh,
2275      .                wsp(ifree),lfree, iwsp, iexph, ns, iflag )
2276          iexph = ifree + iexph - 1
2277          nscale = nscale + ns
2278       else
2279 *---     uniform rational Chebyshev approximation ...
2280          iexph = ifree
2281          do i = 1,mx
2282             wsp(iexph+i-1) = 0.0d0
2283          enddo
2284          wsp(iexph) = 1.0d0
2285          call DNCHBV(mx,sgn*t_step,wsp(ih),mh,wsp(iexph),wsp(ifree+mx))
2286       endif
2287  402  continue
2288 *
2289 *---  error estimate ...
2290 *
2291       if ( k1.eq.0 ) then
2292          err_loc = tol
2293       else
2294          p1 = ABS( wsp(iexph+m) )   * beta
2295          p2 = ABS( wsp(iexph+m+1) ) * beta * avnorm
2296          if ( p1.gt.10.0d0*p2 ) then
2297             err_loc = p2
2298             xm = 1.0d0/DBLE( m )
2299          elseif ( p1.gt.p2 ) then
2300             err_loc = (p1*p2)/(p1-p2)
2301             xm = 1.0d0/DBLE( m )
2302          else
2303             err_loc = p1
2304             xm = 1.0d0/DBLE( m-1 )
2305          endif
2306       endif
2307 *
2308 *---  reject the step-size if the error is not acceptable ...
2309 *
2310       if ( (k1.ne.0) .and. (err_loc.gt.delta*t_step*tol) .and.
2311      .     (mxreject.eq.0 .or. ireject.lt.mxreject) ) then
2312          t_old = t_step
2313          t_step = gamma * t_step * (t_step*tol/err_loc)**xm
2314          p1 = 10.0d0**(NINT( LOG10( t_step )-SQR1 )-1)
2315          t_step = AINT( t_step/p1 + 0.55d0 ) * p1
2316          if ( itrace.ne.0 ) then
2317             print*,'t_step =',t_old
2318             print*,'err_loc =',err_loc
2319             print*,'err_required =',delta*t_old*tol
2320             print*,'stepsize rejected, stepping down to:',t_step
2321          endif
2322          ireject = ireject + 1
2323          nreject = nreject + 1
2324          if ( mxreject.ne.0 .and. ireject.gt.mxreject ) then
2325             print*,"Failure in DSEXPV: ---"
2326             print*,"The requested tolerance is too high."
2327             Print*,"Rerun with a smaller value."
2328             iflag = 2
2329             return
2330          endif
2331          goto 401
2332       endif
2333 *
2334 *---  now update w = beta*V*exp(t_step*H)*e1 and the hump ...
2335 *
2336       mx = mbrkdwn + MAX( 0,k1-1 )
2337       call DGEMV( 'n', n,mx,beta,wsp(iv),n,wsp(iexph),1,0.0d0,w,1 )
2338       beta = DNRM2( n, w,1 )
2339       hump = MAX( hump, beta )
2340 *
2341 *---  suggested value for the next stepsize ...
2342 *
2343       t_new = gamma * t_step * (t_step*tol/err_loc)**xm
2344       p1 = 10.0d0**(NINT( LOG10( t_new )-SQR1 )-1)
2345       t_new = AINT( t_new/p1 + 0.55d0 ) * p1
2346
2347       err_loc = MAX( err_loc,rndoff )
2348 *
2349 *---  update the time covered ...
2350 *
2351       t_now = t_now + t_step
2352 *
2353 *---  display and keep some information ...
2354 *
2355       if ( itrace.ne.0 ) then
2356          print*,'integration',nstep,'---------------------------------'
2357          print*,'scale-square =',ns
2358          print*,'step_size =',t_step
2359          print*,'err_loc   =',err_loc
2360          print*,'next_step =',t_new
2361       endif
2362
2363       step_min = MIN( step_min, t_step )
2364       step_max = MAX( step_max, t_step )
2365       s_error = s_error + err_loc
2366       x_error = MAX( x_error, err_loc )
2367
2368       if ( mxstep.eq.0 .or. nstep.lt.mxstep ) goto 100
2369       iflag = 1
2370
2371  500  continue
2372
2373       iwsp(1) = nmult
2374       iwsp(2) = nexph
2375       iwsp(3) = nscale
2376       iwsp(4) = nstep
2377       iwsp(5) = nreject
2378       iwsp(6) = ibrkflag
2379       iwsp(7) = mbrkdwn
2380
2381       wsp(1)  = step_min
2382       wsp(2)  = step_max
2383       wsp(3)  = 0.0d0
2384       wsp(4)  = 0.0d0
2385       wsp(5)  = x_error
2386       wsp(6)  = s_error
2387       wsp(7)  = tbrkdwn
2388       wsp(8)  = sgn*t_now
2389       wsp(9)  = hump/vnorm
2390       wsp(10) = beta/vnorm
2391       END
2392 *----------------------------------------------------------------------|
2393
2394 *----------------------------------------------------------------------|
2395       subroutine ZGEXPV( n, m, t, v, w, tol, anorm,
2396      .                   wsp,lwsp, iwsp,liwsp, matvec, itrace,iflag )
2397
2398       implicit none
2399       integer          n, m, lwsp, liwsp, itrace, iflag, iwsp(liwsp)
2400       double precision t, tol, anorm
2401       complex*16       v(n), w(n), wsp(lwsp)
2402       external         matvec
2403
2404 *-----Purpose----------------------------------------------------------|
2405 *
2406 *---  ZGEXPV computes w = exp(t*A)*v
2407 *     for a Zomplex (i.e., complex double precision) matrix A
2408 *
2409 *     It does not compute the matrix exponential in isolation but
2410 *     instead, it computes directly the action of the exponential
2411 *     operator on the operand vector. This way of doing so allows
2412 *     for addressing large sparse problems.
2413 *
2414 *     The method used is based on Krylov subspace projection
2415 *     techniques and the matrix under consideration interacts only
2416 *     via the external routine `matvec' performing the matrix-vector
2417 *     product (matrix-free method).
2418 *
2419 *-----Arguments--------------------------------------------------------|
2420 *
2421 *     n      : (input) order of the principal matrix A.
2422 *
2423 *     m      : (input) maximum size for the Krylov basis.
2424 *
2425 *     t      : (input) time at wich the solution is needed (can be < 0).
2426 *
2427 *     v(n)   : (input) given operand vector.
2428 *
2429 *     w(n)   : (output) computed approximation of exp(t*A)*v.
2430 *
2431 *     tol    : (input/output) the requested accuracy tolerance on w.
2432 *              If on input tol=0.0d0 or tol is too small (tol.le.eps)
2433 *              the internal value sqrt(eps) is used, and tol is set to
2434 *              sqrt(eps) on output (`eps' denotes the machine epsilon).
2435 *              (`Happy breakdown' is assumed if h(j+1,j) .le. anorm*tol)
2436 *
2437 *     anorm  : (input) an approximation of some norm of A.
2438 *
2439 *   wsp(lwsp): (workspace) lwsp .ge. n*(m+1)+n+(m+2)^2+4*(m+2)^2+ideg+1
2440 *                                   +---------+-------+---------------+
2441 *              (actually, ideg=6)        V        H      wsp for PADE
2442 *
2443 * iwsp(liwsp): (workspace) liwsp .ge. m+2
2444 *
2445 *     matvec : external subroutine for matrix-vector multiplication.
2446 *              synopsis: matvec( x, y )
2447 *                        complex*16 x(*), y(*)
2448 *              computes: y(1:n) <- A*x(1:n)
2449 *                        where A is the principal matrix.
2450 *
2451 *     itrace : (input) running mode. 0=silent, 1=print step-by-step info
2452 *
2453 *     iflag  : (output) exit flag.
2454 *              <0 - bad input arguments
2455 *               0 - no problem
2456 *               1 - maximum number of steps reached without convergence
2457 *               2 - requested tolerance was too high
2458 *
2459 *-----Accounts on the computation--------------------------------------|
2460 *     Upon exit, an interested user may retrieve accounts on the
2461 *     computations. They are located in the workspace arrays wsp and
2462 *     iwsp as indicated below:
2463 *
2464 *     location  mnemonic                 description
2465 *     -----------------------------------------------------------------|
2466 *     iwsp(1) = nmult, number of matrix-vector multiplications used
2467 *     iwsp(2) = nexph, number of Hessenberg matrix exponential evaluated
2468 *     iwsp(3) = nscale, number of repeated squaring involved in Pade
2469 *     iwsp(4) = nstep, number of integration steps used up to completion
2470 *     iwsp(5) = nreject, number of rejected step-sizes
2471 *     iwsp(6) = ibrkflag, set to 1 if `happy breakdown' and 0 otherwise
2472 *     iwsp(7) = mbrkdwn, if `happy brkdown', basis-size when it occured
2473 *     -----------------------------------------------------------------|
2474 *     wsp(1)  = step_min, minimum step-size used during integration
2475 *     wsp(2)  = step_max, maximum step-size used during integration
2476 *     wsp(3)  = x_round, maximum among all roundoff errors (lower bound)
2477 *     wsp(4)  = s_round, sum of roundoff errors (lower bound)
2478 *     wsp(5)  = x_error, maximum among all local truncation errors
2479 *     wsp(6)  = s_error, global sum of local truncation errors
2480 *     wsp(7)  = tbrkdwn, if `happy breakdown', time when it occured
2481 *     wsp(8)  = t_now, integration domain successfully covered
2482 *     wsp(9)  = hump, i.e., max||exp(sA)||, s in [0,t] (or [t,0] if t<0)
2483 *     wsp(10) = ||w||/||v||, scaled norm of the solution w.
2484 *     -----------------------------------------------------------------|
2485 *     The `hump' is a measure of the conditioning of the problem. The
2486 *     matrix exponential is well-conditioned if hump = 1, whereas it is
2487 *     poorly-conditioned if hump >> 1. However the solution can still be
2488 *     relatively fairly accurate even when the hump is large (the hump
2489 *     is an upper bound), especially when the hump and the scaled norm
2490 *     of w [this is also computed and returned in wsp(10)] are of the
2491 *     same order of magnitude (further details in reference below).
2492 *
2493 *----------------------------------------------------------------------|
2494 *-----The following parameters may also be adjusted herein-------------|
2495 *
2496       integer mxstep, mxreject, ideg
2497       double precision delta, gamma
2498       parameter( mxstep   = 500,
2499      .           mxreject = 0,
2500      .           ideg     = 6,
2501      .           delta    = 1.2d0,
2502      .           gamma    = 0.9d0 )
2503
2504 *     mxstep  : maximum allowable number of integration steps.
2505 *               The value 0 means an infinite number of steps.
2506 *
2507 *     mxreject: maximum allowable number of rejections at each step.
2508 *               The value 0 means an infinite number of rejections.
2509 *
2510 *     ideg    : the Pade approximation of type (ideg,ideg) is used as
2511 *               an approximation to exp(H). The value 0 switches to the
2512 *               uniform rational Chebyshev approximation of type (14,14)
2513 *
2514 *     delta   : local truncation error `safety factor'
2515 *
2516 *     gamma   : stepsize `shrinking factor'
2517 *
2518 *----------------------------------------------------------------------|
2519 *     Roger B. Sidje (rbs@maths.uq.edu.au)
2520 *     EXPOKIT: Software Package for Computing Matrix Exponentials.
2521 *     ACM - Transactions On Mathematical Software, 24(1):130-156, 1998
2522 *----------------------------------------------------------------------|
2523
2524       complex*16 ZERO, ONE
2525       parameter( ZERO=(0.0d0,0.0d0), ONE=(1.0d0,0.0d0) )
2526       integer i, j, k1, mh, mx, iv, ih, j1v, ns, ifree, lfree, iexph,
2527      .        ireject,ibrkflag,mbrkdwn, nmult, nreject, nexph, nscale,
2528      .        nstep
2529       double precision sgn, t_out, tbrkdwn, step_min,step_max, err_loc,
2530      .                 s_error, x_error, t_now, t_new, t_step, t_old,
2531      .                 xm, beta, break_tol, p1, p2, p3, eps, rndoff,
2532      .                 vnorm, avnorm, hj1j, hump, SQR1
2533       complex*16 hij
2534
2535       intrinsic AINT,ABS,CMPLX,DBLE,INT,LOG10,MAX,MIN,NINT,SIGN,SQRT
2536       complex*16 ZDOTC
2537       double precision DZNRM2
2538 *
2539 *---  check restrictions on input parameters ...
2540 *
2541       iflag = 0
2542       if ( lwsp.lt.n*(m+2)+5*(m+2)**2+ideg+1 ) iflag = -1
2543       if ( liwsp.lt.m+2 ) iflag = -2
2544       if ( m.ge.n .or. m.le.0 ) iflag = -3
2545       if ( iflag.ne.0 ) stop 'bad sizes (in input of ZGEXPV)'
2546 *
2547 *---  initialisations ...
2548 *
2549       k1 = 2
2550       mh = m + 2
2551       iv = 1
2552       ih = iv + n*(m+1) + n
2553       ifree = ih + mh*mh
2554       lfree = lwsp - ifree + 1
2555
2556       ibrkflag = 0
2557       mbrkdwn  = m
2558       nmult    = 0
2559       nreject  = 0
2560       nexph    = 0
2561       nscale   = 0
2562
2563       t_out    = ABS( t )
2564       tbrkdwn  = 0.0d0
2565       step_min = t_out
2566       step_max = 0.0d0
2567       nstep    = 0
2568       s_error  = 0.0d0
2569       x_error  = 0.0d0
2570       t_now    = 0.0d0
2571       t_new    = 0.0d0
2572
2573       p1 = 4.0d0/3.0d0
2574  1    p2 = p1 - 1.0d0
2575       p3 = p2 + p2 + p2
2576       eps = ABS( p3-1.0d0 )
2577       if ( eps.eq.0.0d0 ) go to 1
2578       if ( tol.le.eps ) tol = SQRT( eps )
2579       rndoff = eps*anorm
2580
2581       break_tol = 1.0d-7
2582 *>>>  break_tol = tol
2583 *>>>  break_tol = anorm*tol
2584
2585       sgn = SIGN( 1.0d0,t )
2586       call ZCOPY( n, v,1, w,1 )
2587       beta = DZNRM2( n, w,1 )
2588       vnorm = beta
2589       hump = beta
2590 *
2591 *---  obtain the very first stepsize ...
2592 *
2593       SQR1 = SQRT( 0.1d0 )
2594       xm = 1.0d0/DBLE( m )
2595       p2 = tol*(((m+1)/2.72D0)**(m+1))*SQRT(2.0D0*3.14D0*(m+1))
2596       t_new = (1.0d0/anorm)*(p2/(4.0d0*beta*anorm))**xm
2597       p1 = 10.0d0**(NINT( LOG10( t_new )-SQR1 )-1)
2598       t_new = AINT( t_new/p1 + 0.55d0 ) * p1
2599 *
2600 *---  step-by-step integration ...
2601 *
2602  100  if ( t_now.ge.t_out ) goto 500
2603
2604       nstep = nstep + 1
2605       t_step = MIN( t_out-t_now, t_new )
2606       p1 = 1.0d0/beta
2607       do i = 1,n
2608          wsp(iv + i-1) = p1*w(i)
2609       enddo
2610       do i = 1,mh*mh
2611          wsp(ih+i-1) = ZERO
2612       enddo
2613 *
2614 *---  Arnoldi loop ...
2615 *
2616       j1v = iv + n
2617       do 200 j = 1,m
2618          nmult = nmult + 1
2619          call matvec( wsp(j1v-n), wsp(j1v) )
2620          do i = 1,j
2621             hij = ZDOTC( n, wsp(iv+(i-1)*n),1, wsp(j1v),1 )
2622             call ZAXPY( n, -hij, wsp(iv+(i-1)*n),1, wsp(j1v),1 )
2623             wsp(ih+(j-1)*mh+i-1) = hij
2624          enddo
2625          hj1j = DZNRM2( n, wsp(j1v),1 )
2626 *---     if `happy breakdown' go straightforward at the end ...
2627          if ( hj1j.le.break_tol ) then
2628             print*,'happy breakdown: mbrkdwn =',j,' h =',hj1j
2629             k1 = 0
2630             ibrkflag = 1
2631             mbrkdwn = j
2632             tbrkdwn = t_now
2633             t_step = t_out-t_now
2634             goto 300
2635          endif
2636          wsp(ih+(j-1)*mh+j) = CMPLX( hj1j )
2637          call ZDSCAL( n, 1.0d0/hj1j, wsp(j1v),1 )
2638          j1v = j1v + n
2639  200  continue
2640       nmult = nmult + 1
2641       call matvec( wsp(j1v-n), wsp(j1v) )
2642       avnorm = DZNRM2( n, wsp(j1v),1 )
2643 *
2644 *---  set 1 for the 2-corrected scheme ...
2645 *
2646  300  continue
2647       wsp(ih+m*mh+m+1) = ONE
2648 *
2649 *---  loop while ireject<mxreject until the tolerance is reached ...
2650 *
2651       ireject = 0
2652  401  continue
2653 *
2654 *---  compute w = beta*V*exp(t_step*H)*e1 ...
2655 *
2656       nexph = nexph + 1
2657       mx = mbrkdwn + k1
2658       if ( ideg.ne.0 ) then
2659 *---     irreducible rational Pade approximation ...
2660          call ZGPADM( ideg, mx, sgn*t_step, wsp(ih),mh,
2661      .                wsp(ifree),lfree, iwsp, iexph, ns, iflag )
2662          iexph = ifree + iexph - 1
2663          nscale = nscale + ns
2664       else
2665 *---     uniform rational Chebyshev approximation ...
2666          iexph = ifree
2667          do i = 1,mx
2668             wsp(iexph+i-1) = ZERO
2669          enddo
2670          wsp(iexph) = ONE
2671          call ZNCHBV(mx,sgn*t_step,wsp(ih),mh,wsp(iexph),wsp(ifree+mx))
2672       endif
2673  402  continue
2674 *
2675 *---  error estimate ...
2676 *
2677       if ( k1.eq.0 ) then
2678          err_loc = tol
2679       else
2680          p1 = ABS( wsp(iexph+m) )   * beta
2681          p2 = ABS( wsp(iexph+m+1) ) * beta * avnorm
2682          if ( p1.gt.10.0d0*p2 ) then
2683             err_loc = p2
2684             xm = 1.0d0/DBLE( m )
2685          elseif ( p1.gt.p2 ) then
2686             err_loc = (p1*p2)/(p1-p2)
2687             xm = 1.0d0/DBLE( m )
2688          else
2689             err_loc = p1
2690             xm = 1.0d0/DBLE( m-1 )
2691          endif
2692       endif
2693 *
2694 *---  reject the step-size if the error is not acceptable ...
2695 *
2696       if ( (k1.ne.0) .and. (err_loc.gt.delta*t_step*tol) .and.
2697      .     (mxreject.eq.0 .or. ireject.lt.mxreject) ) then
2698          t_old = t_step
2699          t_step = gamma * t_step * (t_step*tol/err_loc)**xm
2700          p1 = 10.0d0**(NINT( LOG10( t_step )-SQR1 )-1)
2701          t_step = AINT( t_step/p1 + 0.55d0 ) * p1
2702          if ( itrace.ne.0 ) then
2703             print*,'t_step =',t_old
2704             print*,'err_loc =',err_loc
2705             print*,'err_required =',delta*t_old*tol
2706             print*,'stepsize rejected, stepping down to:',t_step
2707          endif
2708          ireject = ireject + 1
2709          nreject = nreject + 1
2710          if ( mxreject.ne.0 .and. ireject.gt.mxreject ) then
2711             print*,"Failure in ZGEXPV: ---"
2712             print*,"The requested tolerance is too high."
2713             Print*,"Rerun with a smaller value."
2714             iflag = 2
2715             return
2716          endif
2717          goto 401
2718       endif
2719 *
2720 *---  now update w = beta*V*exp(t_step*H)*e1 and the hump ...
2721 *
2722       mx = mbrkdwn + MAX( 0,k1-1 )
2723       hij = CMPLX( beta )
2724       call ZGEMV( 'n', n,mx,hij,wsp(iv),n,wsp(iexph),1,ZERO,w,1 )
2725       beta = DZNRM2( n, w,1 )
2726       hump = MAX( hump, beta )
2727 *
2728 *---  suggested value for the next stepsize ...
2729 *
2730       t_new = gamma * t_step * (t_step*tol/err_loc)**xm
2731       p1 = 10.0d0**(NINT( LOG10( t_new )-SQR1 )-1)
2732       t_new = AINT( t_new/p1 + 0.55d0 ) * p1
2733
2734       err_loc = MAX( err_loc,rndoff )
2735 *
2736 *---  update the time covered ...
2737 *
2738       t_now = t_now + t_step
2739 *
2740 *---  display and keep some information ...
2741 *
2742       if ( itrace.ne.0 ) then
2743          print*,'integration',nstep,'---------------------------------'
2744          print*,'scale-square =',ns
2745          print*,'step_size =',t_step
2746          print*,'err_loc   =',err_loc
2747          print*,'next_step =',t_new
2748       endif
2749
2750       step_min = MIN( step_min, t_step )
2751       step_max = MAX( step_max, t_step )
2752       s_error = s_error + err_loc
2753       x_error = MAX( x_error, err_loc )
2754
2755       if ( mxstep.eq.0 .or. nstep.lt.mxstep ) goto 100
2756       iflag = 1
2757
2758  500  continue
2759
2760       iwsp(1) = nmult
2761       iwsp(2) = nexph
2762       iwsp(3) = nscale
2763       iwsp(4) = nstep
2764       iwsp(5) = nreject
2765       iwsp(6) = ibrkflag
2766       iwsp(7) = mbrkdwn
2767
2768       wsp(1)  = CMPLX( step_min )
2769       wsp(2)  = CMPLX( step_max )
2770       wsp(3)  = CMPLX( 0.0d0 )
2771       wsp(4)  = CMPLX( 0.0d0 )
2772       wsp(5)  = CMPLX( x_error )
2773       wsp(6)  = CMPLX( s_error )
2774       wsp(7)  = CMPLX( tbrkdwn )
2775       wsp(8)  = CMPLX( sgn*t_now )
2776       wsp(9)  = CMPLX( hump/vnorm )
2777       wsp(10) = CMPLX( beta/vnorm )
2778       END
2779 *----------------------------------------------------------------------|
2780 *----------------------------------------------------------------------|
2781       subroutine ZHEXPV( n, m, t, v, w, tol, anorm,
2782      .                   wsp,lwsp, iwsp,liwsp, matvec, itrace,iflag )
2783
2784       implicit none
2785       integer          n, m, lwsp, liwsp, itrace, iflag, iwsp(liwsp)
2786       double precision t, tol, anorm
2787       complex*16       v(n), w(n), wsp(lwsp)
2788       external         matvec
2789
2790 *-----Purpose----------------------------------------------------------|
2791 *
2792 *---  ZHEXPV computes w = exp(t*A)*v for a Hermitian matrix A.
2793 *
2794 *     It does not compute the matrix exponential in isolation but
2795 *     instead, it computes directly the action of the exponential
2796 *     operator on the operand vector. This way of doing so allows
2797 *     for addressing large sparse problems.
2798 *
2799 *     The method used is based on Krylov subspace projection
2800 *     techniques and the matrix under consideration interacts only
2801 *     via the external routine `matvec' performing the matrix-vector
2802 *     product (matrix-free method).
2803 *
2804 *-----Arguments--------------------------------------------------------|
2805 *
2806 *     n      : (input) order of the principal matrix A.
2807 *
2808 *     m      : (input) maximum size for the Krylov basis.
2809 *
2810 *     t      : (input) time at wich the solution is needed (can be < 0).
2811 *
2812 *     v(n)   : (input) given operand vector.
2813 *
2814 *     w(n)   : (output) computed approximation of exp(t*A)*v.
2815 *
2816 *     tol    : (input/output) the requested accuracy tolerance on w.
2817 *              If on input tol=0.0d0 or tol is too small (tol.le.eps)
2818 *              the internal value sqrt(eps) is used, and tol is set to
2819 *              sqrt(eps) on output (`eps' denotes the machine epsilon).
2820 *              (`Happy breakdown' is assumed if h(j+1,j) .le. anorm*tol)
2821 *
2822 *     anorm  : (input) an approximation of some norm of A.
2823 *
2824 *   wsp(lwsp): (workspace) lwsp .ge. n*(m+1)+n+(m+2)^2+4*(m+2)^2+ideg+1
2825 *                                   +---------+-------+---------------+
2826 *              (actually, ideg=6)        V        H      wsp for PADE
2827 *
2828 * iwsp(liwsp): (workspace) liwsp .ge. m+2
2829 *
2830 *     matvec : external subroutine for matrix-vector multiplication.
2831 *              synopsis: matvec( x, y )
2832 *                        complex*16 x(*), y(*)
2833 *              computes: y(1:n) <- A*x(1:n)
2834 *                        where A is the principal matrix.
2835 *
2836 *     itrace : (input) running mode. 0=silent, 1=print step-by-step info
2837 *
2838 *     iflag  : (output) exit flag.
2839 *              <0 - bad input arguments
2840 *               0 - no problem
2841 *               1 - maximum number of steps reached without convergence
2842 *               2 - requested tolerance was too high
2843 *
2844 *-----Accounts on the computation--------------------------------------|
2845 *     Upon exit, an interested user may retrieve accounts on the
2846 *     computations. They are located in the workspace arrays wsp and
2847 *     iwsp as indicated below:
2848 *
2849 *     location  mnemonic                 description
2850 *     -----------------------------------------------------------------|
2851 *     iwsp(1) = nmult, number of matrix-vector multiplications used
2852 *     iwsp(2) = nexph, number of Hessenberg matrix exponential evaluated
2853 *     iwsp(3) = nscale, number of repeated squaring involved in Pade
2854 *     iwsp(4) = nstep, number of integration steps used up to completion
2855 *     iwsp(5) = nreject, number of rejected step-sizes
2856 *     iwsp(6) = ibrkflag, set to 1 if `happy breakdown' and 0 otherwise
2857 *     iwsp(7) = mbrkdwn, if `happy brkdown', basis-size when it occured
2858 *     -----------------------------------------------------------------|
2859 *     wsp(1)  = step_min, minimum step-size used during integration
2860 *     wsp(2)  = step_max, maximum step-size used during integration
2861 *     wsp(3)  = dummy
2862 *     wsp(4)  = dummy
2863 *     wsp(5)  = x_error, maximum among all local truncation errors
2864 *     wsp(6)  = s_error, global sum of local truncation errors
2865 *     wsp(7)  = tbrkdwn, if `happy breakdown', time when it occured
2866 *     wsp(8)  = t_now, integration domain successfully covered
2867 *     wsp(9)  = hump, i.e., max||exp(sA)||, s in [0,t] (or [t,0] if t<0)
2868 *     wsp(10) = ||w||/||v||, scaled norm of the solution w.
2869 *     -----------------------------------------------------------------|
2870 *     The `hump' is a measure of the conditioning of the problem. The
2871 *     matrix exponential is well-conditioned if hump = 1, whereas it is
2872 *     poorly-conditioned if hump >> 1. However the solution can still be
2873 *     relatively fairly accurate even when the hump is large (the hump
2874 *     is an upper bound), especially when the hump and the scaled norm
2875 *     of w [this is also computed and returned in wsp(10)] are of the
2876 *     same order of magnitude (further details in reference below).
2877 *
2878 *----------------------------------------------------------------------|
2879 *-----The following parameters may also be adjusted herein-------------|
2880 *
2881       integer mxstep, mxreject, ideg
2882       double precision delta, gamma
2883       parameter( mxstep   = 500,
2884      .           mxreject = 0,
2885      .           ideg     = 6,
2886      .           delta    = 1.2d0,
2887      .           gamma    = 0.9d0 )
2888
2889 *     mxstep  : maximum allowable number of integration steps.
2890 *               The value 0 means an infinite number of steps.
2891 *
2892 *     mxreject: maximum allowable number of rejections at each step.
2893 *               The value 0 means an infinite number of rejections.
2894 *
2895 *     ideg    : the Pade approximation of type (ideg,ideg) is used as
2896 *               an approximation to exp(H). The value 0 switches to the
2897 *               uniform rational Chebyshev approximation of type (14,14)
2898 *
2899 *     delta   : local truncation error `safety factor'
2900 *
2901 *     gamma   : stepsize `shrinking factor'
2902 *
2903 *----------------------------------------------------------------------|
2904 *     Roger B. Sidje (rbs@maths.uq.edu.au)
2905 *     EXPOKIT: Software Package for Computing Matrix Exponentials.
2906 *     ACM - Transactions On Mathematical Software, 24(1):130-156, 1998
2907 *----------------------------------------------------------------------|
2908 *
2909       complex*16 ZERO, ONE
2910       parameter( ZERO=(0.0d0,0.0d0), ONE=(1.0d0,0.0d0) )
2911       integer i, j, k1, mh, mx, iv, ih, j1v, ns, ifree, lfree, iexph,
2912      .        ireject,ibrkflag,mbrkdwn, nmult, nreject, nexph, nscale,
2913      .        nstep
2914       double precision sgn, t_out, tbrkdwn, step_min,step_max, err_loc,
2915      .                 s_error, x_error, t_now, t_new, t_step, t_old,
2916      .                 xm, beta, break_tol, p1, p2, p3, eps, rndoff,
2917      .                 vnorm, avnorm, hj1j, hump, SQR1
2918       complex*16 hjj
2919
2920       intrinsic AINT,ABS,CMPLX,DBLE,INT,LOG10,MAX,MIN,NINT,SIGN,SQRT
2921       complex*16 ZDOTC
2922       double precision DZNRM2
2923 *
2924 *---  check restrictions on input parameters ...
2925 *
2926       iflag = 0
2927       if ( lwsp.lt.n*(m+2)+5*(m+2)**2+ideg+1 ) iflag = -1
2928       if ( liwsp.lt.m+2 ) iflag = -2
2929       if ( m.ge.n .or. m.le.0 ) iflag = -3
2930       if ( iflag.ne.0 ) stop 'bad sizes (in input of DHEXPV)'
2931 *
2932 *---  initialisations ...
2933 *
2934       k1 = 2
2935       mh = m + 2
2936       iv = 1
2937       ih = iv + n*(m+1) + n
2938       ifree = ih + mh*mh
2939       lfree = lwsp - ifree + 1
2940
2941       ibrkflag = 0
2942       mbrkdwn  = m
2943       nmult    = 0
2944       nreject  = 0
2945       nexph    = 0
2946       nscale   = 0
2947
2948       t_out    = ABS( t )
2949       tbrkdwn  = 0.0d0
2950       step_min = t_out
2951       step_max = 0.0d0
2952       nstep    = 0
2953       s_error  = 0.0d0
2954       x_error  = 0.0d0
2955       t_now    = 0.0d0
2956       t_new    = 0.0d0
2957
2958       p1 = 4.0d0/3.0d0
2959  1    p2 = p1 - 1.0d0
2960       p3 = p2 + p2 + p2
2961       eps = ABS( p3-1.0d0 )
2962       if ( eps.eq.0.0d0 ) go to 1
2963       if ( tol.le.eps ) tol = SQRT( eps )
2964       rndoff = eps*anorm
2965
2966       break_tol = 1.0d-7
2967 *>>>  break_tol = tol
2968 *>>>  break_tol = anorm*tol
2969
2970       sgn = SIGN( 1.0d0,t )
2971       call ZCOPY( n, v,1, w,1 )
2972       beta = DZNRM2( n, w,1 )
2973       vnorm = beta
2974       hump = beta
2975 *
2976 *---  obtain the very first stepsize ...
2977 *
2978       SQR1 = SQRT( 0.1d0 )
2979       xm = 1.0d0/DBLE( m )
2980       p2 = tol*(((m+1)/2.72D0)**(m+1))*SQRT(2.0D0*3.14D0*(m+1))
2981       t_new = (1.0d0/anorm)*(p2/(4.0d0*beta*anorm))**xm
2982       p1 = 10.0d0**(NINT( LOG10( t_new )-SQR1 )-1)
2983       t_new = AINT( t_new/p1 + 0.55d0 ) * p1
2984 *
2985 *---  step-by-step integration ...
2986 *
2987  100  if ( t_now.ge.t_out ) goto 500
2988
2989       nstep = nstep + 1
2990       t_step = MIN( t_out-t_now, t_new )
2991       beta = DZNRM2( n, w,1 )
2992       p1 = 1.0d0/beta
2993       do i = 1,n
2994          wsp(iv + i-1) = p1*w(i)
2995       enddo
2996       do i = 1,mh*mh
2997          wsp(ih+i-1) = ZERO
2998       enddo
2999 *
3000 *---  Lanczos loop ...
3001 *
3002       j1v = iv + n
3003       do 200 j = 1,m
3004          nmult = nmult + 1
3005          call matvec( wsp(j1v-n), wsp(j1v) )
3006          if ( j.gt.1 )
3007      .     call ZAXPY(n,-wsp(ih+(j-1)*mh+j-2),wsp(j1v-2*n),1,wsp(j1v),1)
3008          hjj = ZDOTC( n, wsp(j1v-n),1, wsp(j1v),1 )
3009          call ZAXPY( n, -hjj, wsp(j1v-n),1, wsp(j1v),1 )
3010          hj1j = DZNRM2( n, wsp(j1v),1 )
3011          wsp(ih+(j-1)*(mh+1)) = hjj
3012 *---     if `happy breakdown' go straightforward at the end ...
3013          if ( hj1j.le.break_tol ) then
3014             print*,'happy breakdown: mbrkdwn =',j,' h =',hj1j
3015             k1 = 0
3016             ibrkflag = 1
3017             mbrkdwn = j
3018             tbrkdwn = t_now
3019             t_step = t_out-t_now
3020             goto 300
3021          endif
3022          wsp(ih+(j-1)*mh+j) = CMPLX( hj1j )
3023          wsp(ih+j*mh+j-1) = CMPLX( hj1j )
3024          call ZDSCAL( n, 1.0d0/hj1j, wsp(j1v),1 )
3025          j1v = j1v + n
3026  200  continue
3027       nmult = nmult + 1
3028       call matvec( wsp(j1v-n), wsp(j1v) )
3029       avnorm = DZNRM2( n, wsp(j1v),1 )
3030 *
3031 *---  set 1 for the 2-corrected scheme ...
3032 *
3033  300  continue
3034       wsp(ih+m*mh+m-1) = ZERO
3035       wsp(ih+m*mh+m+1) = ONE
3036 *
3037 *---  loop while ireject<mxreject until the tolerance is reached ...
3038 *
3039       ireject = 0
3040  401  continue
3041 *
3042 *---  compute w = beta*V*exp(t_step*H)*e1 ...
3043 *
3044       nexph = nexph + 1
3045       mx = mbrkdwn + k1
3046       if ( ideg.ne.0 ) then
3047 *---     irreducible rational Pade approximation ...
3048          call ZGPADM( ideg, mx, sgn*t_step, wsp(ih),mh,
3049      .                wsp(ifree),lfree, iwsp, iexph, ns, iflag )
3050          iexph = ifree + iexph - 1
3051          nscale = nscale + ns
3052       else
3053 *---     uniform rational Chebyshev approximation ...
3054          iexph = ifree
3055          do i = 1,mx
3056             wsp(iexph+i-1) = ZERO
3057          enddo
3058          wsp(iexph) = ONE
3059          call ZNCHBV(mx,sgn*t_step,wsp(ih),mh,wsp(iexph),wsp(ifree+mx))
3060       endif
3061
3062  402  continue
3063 *
3064 *---  error estimate ...
3065 *
3066       if ( k1.eq.0 ) then
3067          err_loc = tol
3068       else
3069          p1 = ABS( wsp(iexph+m) )   * beta
3070          p2 = ABS( wsp(iexph+m+1) ) * beta * avnorm
3071          if ( p1.gt.10.0d0*p2 ) then
3072             err_loc = p2
3073             xm = 1.0d0/DBLE( m )
3074          elseif ( p1.gt.p2 ) then
3075             err_loc = (p1*p2)/(p1-p2)
3076             xm = 1.0d0/DBLE( m )
3077          else
3078             err_loc = p1
3079             xm = 1.0d0/DBLE( m-1 )
3080          endif
3081       endif
3082 *
3083 *---  reject the step-size if the error is not acceptable ...
3084 *
3085       if ( (k1.ne.0) .and. (err_loc.gt.delta*t_step*tol) .and.
3086      .     (mxreject.eq.0 .or. ireject.lt.mxreject) ) then
3087          t_old = t_step
3088          t_step = gamma * t_step * (t_step*tol/err_loc)**xm
3089          p1 = 10.0d0**(NINT( LOG10( t_step )-SQR1 )-1)
3090          t_step = AINT( t_step/p1 + 0.55d0 ) * p1
3091          if ( itrace.ne.0 ) then
3092             print*,'t_step =',t_old
3093             print*,'err_loc =',err_loc
3094             print*,'err_required =',delta*t_old*tol
3095             print*,'stepsize rejected, stepping down to:',t_step
3096          endif
3097          ireject = ireject + 1
3098          nreject = nreject + 1
3099          if ( mxreject.ne.0 .and. ireject.gt.mxreject ) then
3100             print*,"Failure in ZHEXPV: ---"
3101             print*,"The requested tolerance is too high."
3102             Print*,"Rerun with a smaller value."
3103             iflag = 2
3104             return
3105          endif
3106          goto 401
3107       endif
3108 *
3109 *---  now update w = beta*V*exp(t_step*H)*e1 and the hump ...
3110 *
3111       mx = mbrkdwn + MAX( 0,k1-1 )
3112       hjj = CMPLX( beta )
3113       call ZGEMV( 'n', n,mx,hjj,wsp(iv),n,wsp(iexph),1,ZERO,w,1 )
3114       beta = DZNRM2( n, w,1 )
3115       hump = MAX( hump, beta )
3116 *
3117 *---  suggested value for the next stepsize ...
3118 *
3119       t_new = gamma * t_step * (t_step*tol/err_loc)**xm
3120       p1 = 10.0d0**(NINT( LOG10( t_new )-SQR1 )-1)
3121       t_new = AINT( t_new/p1 + 0.55d0 ) * p1
3122
3123       err_loc = MAX( err_loc,rndoff )
3124 *
3125 *---  update the time covered ...
3126 *
3127       t_now = t_now + t_step
3128 *
3129 *---  display and keep some information ...
3130 *
3131       if ( itrace.ne.0 ) then
3132          print*,'integration',nstep,'---------------------------------'
3133          print*,'scale-square =',ns
3134          print*,'step_size =',t_step
3135          print*,'err_loc   =',err_loc
3136          print*,'next_step =',t_new
3137       endif
3138
3139       step_min = MIN( step_min, t_step )
3140       step_max = MAX( step_max, t_step )
3141       s_error = s_error + err_loc
3142       x_error = MAX( x_error, err_loc )
3143
3144       if ( mxstep.eq.0 .or. nstep.lt.mxstep ) goto 100
3145       iflag = 1
3146
3147  500  continue
3148
3149       iwsp(1) = nmult
3150       iwsp(2) = nexph
3151       iwsp(3) = nscale
3152       iwsp(4) = nstep
3153       iwsp(5) = nreject
3154       iwsp(6) = ibrkflag
3155       iwsp(7) = mbrkdwn
3156
3157       wsp(1)  = CMPLX( step_min )
3158       wsp(2)  = CMPLX( step_max )
3159       wsp(3)  = CMPLX( 0.0d0 )
3160       wsp(4)  = CMPLX( 0.0d0 )
3161       wsp(5)  = CMPLX( x_error )
3162       wsp(6)  = CMPLX( s_error )
3163       wsp(7)  = CMPLX( tbrkdwn )
3164       wsp(8)  = CMPLX( sgn*t_now )
3165       wsp(9)  = CMPLX( hump/vnorm )
3166       wsp(10) = CMPLX( beta/vnorm )
3167       END
3168 *----------------------------------------------------------------------|
3169 *----------------------------------------------------------------------|
3170       subroutine DGPHIV( n, m, t, u, v, w, tol, anorm,
3171      .                   wsp,lwsp, iwsp,liwsp, matvec, itrace,iflag )
3172
3173       implicit none
3174       integer n, m, lwsp, liwsp, itrace, iflag, iwsp(liwsp)
3175       double precision t, tol, anorm, u(n), v(n), w(n), wsp(lwsp)
3176       external matvec
3177
3178 *-----Purpose----------------------------------------------------------|
3179 *
3180 *---  DGPHIV computes w = exp(t*A)v + t*phi(tA)u which is the solution
3181 *     of the nonhomogeneous linear ODE problem w' = Aw + u, w(0) = v.
3182 *     phi(z) = (exp(z)-1)/z and A is a General matrix.
3183 *
3184 *     The method used is based on Krylov subspace projection
3185 *     techniques and the matrix under consideration interacts only
3186 *     via the external routine `matvec' performing the matrix-vector
3187 *     product (matrix-free method).
3188 *
3189 *-----Arguments--------------------------------------------------------|
3190 *
3191 *     n      : (input) order of the principal matrix A.
3192 *
3193 *     m      : (input) maximum size for the Krylov basis.
3194 *
3195 *     t      : (input) time at wich the solution is needed (can be < 0).
3196 *
3197 *     u(n)   : (input) operand vector with respect to the phi function
3198 *              (forcing term of the ODE problem).
3199 *
3200 *     v(n)   : (input) operand vector with respect to the exp function
3201 *              (initial condition of the ODE problem).
3202 *
3203 *     w(n)   : (output) computed approximation of exp(t*A)v + t*phi(tA)u
3204 *
3205 *     tol    : (input/output) the requested accuracy tolerance on w.
3206 *              If on input tol=0.0d0 or tol is too small (tol.le.eps)
3207 *              the internal value sqrt(eps) is used, and tol is set to
3208 *              sqrt(eps) on output (`eps' denotes the machine epsilon).
3209 *              (`Happy breakdown' is assumed if h(j+1,j) .le. anorm*tol)
3210 *
3211 *     anorm  : (input) an approximation of some norm of A.
3212 *
3213 *   wsp(lwsp): (workspace) lwsp .ge. n*(m+1)+n+(m+3)^2+4*(m+3)^2+ideg+1
3214 *                                   +---------+-------+---------------+
3215 *              (actually, ideg=6)        V        H      wsp for PADE
3216 *
3217 * iwsp(liwsp): (workspace) liwsp .ge. m+3
3218 *
3219 *     matvec : external subroutine for matrix-vector multiplication.
3220 *              synopsis: matvec( x, y )
3221 *                        double precision x(*), y(*)
3222 *              computes: y(1:n) <- A*x(1:n)
3223 *                        where A is the principal matrix.
3224 *
3225 *     itrace : (input) running mode. 0=silent, 1=print step-by-step info
3226 *
3227 *     iflag  : (output) exit flag.
3228 *              <0 - bad input arguments
3229 *               0 - no problem
3230 *               1 - maximum number of steps reached without convergence
3231 *               2 - requested tolerance was too high
3232 *
3233 *-----Accounts on the computation--------------------------------------|
3234 *     Upon exit, an interested user may retrieve accounts on the
3235 *     computations. They are located in the workspace arrays wsp and
3236 *     iwsp as indicated below:
3237 *
3238 *     location  mnemonic                 description
3239 *     -----------------------------------------------------------------|
3240 *     iwsp(1) = nmult, number of matrix-vector multiplications used
3241 *     iwsp(2) = nexph, number of Hessenberg matrix exponential evaluated
3242 *     iwsp(3) = nscale, number of repeated squaring involved in Pade
3243 *     iwsp(4) = nstep, number of integration steps used up to completion
3244 *     iwsp(5) = nreject, number of rejected step-sizes
3245 *     iwsp(6) = ibrkflag, set to 1 if `happy breakdown' and 0 otherwise
3246 *     iwsp(7) = mbrkdwn, if `happy brkdown', basis-size when it occured
3247 *     -----------------------------------------------------------------|
3248 *     wsp(1)  = step_min, minimum step-size used during integration
3249 *     wsp(2)  = step_max, maximum step-size used during integration
3250 *     wsp(3)  = dummy
3251 *     wsp(4)  = dummy
3252 *     wsp(5)  = x_error, maximum among all local truncation errors
3253 *     wsp(6)  = s_error, global sum of local truncation errors
3254 *     wsp(7)  = tbrkdwn, if `happy breakdown', time when it occured
3255 *     wsp(8)  = t_now, integration domain successfully covered
3256 *
3257 *----------------------------------------------------------------------|
3258 *-----The following parameters may also be adjusted herein-------------|
3259 *
3260       integer mxstep, mxreject, ideg
3261       double precision delta, gamma
3262       parameter( mxstep   = 1000,
3263      .           mxreject = 0,
3264      .           ideg     = 6,
3265      .           delta    = 1.2d0,
3266      .           gamma    = 0.9d0 )
3267
3268 *     mxstep  : maximum allowable number of integration steps.
3269 *               The value 0 means an infinite number of steps.
3270 *
3271 *     mxreject: maximum allowable number of rejections at each step.
3272 *               The value 0 means an infinite number of rejections.
3273 *
3274 *     ideg    : the Pade approximation of type (ideg,ideg) is used as
3275 *               an approximation to exp(H).
3276 *
3277 *     delta   : local truncation error `safety factor'
3278 *
3279 *     gamma   : stepsize `shrinking factor'
3280 *
3281 *----------------------------------------------------------------------|
3282 *     Roger B. Sidje (rbs@maths.uq.edu.au)
3283 *     EXPOKIT: Software Package for Computing Matrix Exponentials.
3284 *     ACM - Transactions On Mathematical Software, 24(1):130-156, 1998
3285 *----------------------------------------------------------------------|
3286 *
3287       integer i, j, k1, mh, mx, iv, ih, j1v, ns, ifree, lfree, iexph,
3288      .        ireject,ibrkflag,mbrkdwn, nmult, nreject, nexph, nscale,
3289      .        nstep, iphih
3290       double precision sgn, t_out, tbrkdwn, step_min,step_max, err_loc,
3291      .                 s_error, x_error, t_now, t_new, t_step, t_old,
3292      .                 xm, beta, break_tol, p1, p2, p3, eps, rndoff,
3293      .                 avnorm, hj1j, hij, SQR1
3294
3295       intrinsic AINT,ABS,DBLE,LOG10,MAX,MIN,NINT,SIGN,SQRT
3296       double precision DDOT, DNRM2
3297
3298 *---  check restrictions on input parameters ...
3299       iflag = 0
3300       if ( lwsp.lt.n*(m+3)+5*(m+3)**2+ideg+1 ) iflag = -1
3301       if ( liwsp.lt.m+3 ) iflag = -2
3302       if ( m.ge.n .or. m.le.0 ) iflag = -3
3303       if ( iflag.ne.0 ) stop 'bad sizes (in input of DGPHIV)'
3304 *
3305 *---  initialisations ...
3306 *
3307       k1 = 3
3308       mh = m + 3
3309       iv = 1
3310       ih = iv + n*(m+1) + n
3311       ifree = ih + mh*mh
3312       lfree = lwsp - ifree + 1
3313
3314       ibrkflag = 0
3315       mbrkdwn  = m
3316       nmult    = 0
3317       nreject  = 0
3318       nexph    = 0
3319       nscale   = 0
3320
3321       t_out    = ABS( t )
3322       tbrkdwn  = 0.0d0
3323       step_min = t_out
3324       step_max = 0.0d0
3325       nstep    = 0
3326       s_error  = 0.0d0
3327       x_error  = 0.0d0
3328       t_now    = 0.0d0
3329       t_new    = 0.0d0
3330
3331       p1 = 4.0d0/3.0d0
3332  1    p2 = p1 - 1.0d0
3333       p3 = p2 + p2 + p2
3334       eps = ABS( p3-1.0d0 )
3335       if ( eps.eq.0.0d0 ) go to 1
3336       if ( tol.le.eps ) tol = SQRT( eps )
3337       rndoff = eps*anorm
3338
3339       break_tol = 1.0d-7
3340 *>>>  break_tol = tol
3341 *>>>  break_tol = anorm*tol
3342
3343 *
3344 *---  step-by-step integration ...
3345 *
3346       sgn = SIGN( 1.0d0,t )
3347       SQR1 = SQRT( 0.1d0 )
3348       call DCOPY( n, v,1, w,1 )
3349
3350  100  if ( t_now.ge.t_out ) goto 500
3351
3352       nmult =  nmult + 1
3353       call matvec( w, wsp(iv) )
3354       call DAXPY( n, 1.0d0, u,1, wsp(iv),1 )
3355       beta = DNRM2( n, wsp(iv),1 )
3356       if ( beta.eq.0.0d0 ) goto 500
3357       call DSCAL( n, 1.0d0/beta, wsp(iv),1 )
3358       do i = 1,mh*mh
3359          wsp(ih+i-1) = 0.0d0
3360       enddo
3361
3362       if ( nstep.eq.0 ) then
3363 *---     obtain the very first stepsize ...
3364          xm = 1.0d0/DBLE( m )
3365          p1 = tol*(((m+1)/2.72D0)**(m+1))*SQRT(2.0D0*3.14D0*(m+1))
3366          t_new = (1.0d0/anorm)*(p1/(4.0d0*beta*anorm))**xm
3367          p1 = 10.0d0**(NINT( LOG10( t_new )-SQR1 )-1)
3368          t_new = AINT( t_new/p1 + 0.55d0 ) * p1
3369       endif
3370       nstep = nstep + 1
3371       t_step = MIN( t_out-t_now, t_new )
3372 *
3373 *---  Arnoldi loop ...
3374 *
3375       j1v = iv + n
3376       do 200 j = 1,m
3377          nmult = nmult + 1
3378          call matvec( wsp(j1v-n), wsp(j1v) )
3379          do i = 1,j
3380             hij = DDOT( n, wsp(iv+(i-1)*n),1, wsp(j1v),1 )
3381             call DAXPY( n, -hij, wsp(iv+(i-1)*n),1, wsp(j1v),1 )
3382             wsp(ih+(j-1)*mh+i-1) = hij
3383          enddo
3384          hj1j = DNRM2( n, wsp(j1v),1 )
3385 *---     if `happy breakdown' go straightforward at the end ...
3386          if ( hj1j.le.break_tol ) then
3387             print*,'happy breakdown: mbrkdwn =',j,' h =',hj1j
3388             k1 = 0
3389             ibrkflag = 1
3390             mbrkdwn = j
3391             tbrkdwn = t_now
3392             t_step = t_out-t_now
3393             goto 300
3394          endif
3395          wsp(ih+(j-1)*mh+j) = hj1j
3396          call DSCAL( n, 1.0d0/hj1j, wsp(j1v),1 )
3397          j1v = j1v + n
3398  200  continue
3399       nmult = nmult + 1
3400       call matvec( wsp(j1v-n), wsp(j1v) )
3401       avnorm = DNRM2( n, wsp(j1v),1 )
3402 *
3403 *---  set 1's for the 3-extended scheme ...
3404 *
3405  300  continue
3406       wsp(ih+mh*mbrkdwn) = 1.0d0
3407       wsp(ih+(m-1)*mh+m) = 0.0d0
3408       do i = 1,k1-1
3409          wsp(ih+(m+i)*mh+m+i-1) = 1.0d0
3410       enddo
3411 *
3412 *---  loop while ireject<mxreject until the tolerance is reached ...
3413 *
3414       ireject = 0
3415  401  continue
3416 *
3417 *---  compute w = beta*t_step*V*phi(t_step*H)*e1 + w
3418 *
3419       nexph = nexph + 1
3420 *---  irreducible rational Pade approximation ...
3421       mx = mbrkdwn + MAX( 1,k1 )
3422       call DGPADM( ideg, mx, sgn*t_step, wsp(ih),mh,
3423      .             wsp(ifree),lfree, iwsp, iexph, ns, iflag )
3424       iexph = ifree + iexph - 1
3425       iphih = iexph + mbrkdwn*mx
3426       nscale = nscale + ns
3427       wsp(iphih+mbrkdwn)   = hj1j*wsp(iphih+mx+mbrkdwn-1)
3428       wsp(iphih+mbrkdwn+1) = hj1j*wsp(iphih+2*mx+mbrkdwn-1)
3429
3430  402  continue
3431 *---  error estimate ...
3432       if ( k1.eq.0 ) then
3433          err_loc = tol
3434       else
3435          p1 = ABS( wsp(iphih+m) )   * beta
3436          p2 = ABS( wsp(iphih+m+1) ) * beta * avnorm
3437          if ( p1.gt.10.0d0*p2 ) then
3438             err_loc = p2
3439             xm = 1.0d0/DBLE( m+1 )
3440          elseif ( p1.gt.p2 ) then
3441             err_loc = (p1*p2)/(p1-p2)
3442             xm = 1.0d0/DBLE( m+1 )
3443          else
3444             err_loc = p1
3445             xm = 1.0d0/DBLE( m )
3446          endif
3447       endif
3448
3449 *---  reject the step-size if the error is not acceptable ...
3450       if ( (k1.ne.0) .and. (err_loc.gt.delta*t_step*tol) .and.
3451      .     (mxreject.eq.0 .or. ireject.lt.mxreject) ) then
3452          t_old = t_step
3453          t_step = gamma * t_step * (t_step*tol/err_loc)**xm
3454          p1 = 10.0d0**(NINT( LOG10( t_step )-SQR1 )-1)
3455          t_step = AINT( t_step/p1 + 0.55d0 ) * p1
3456          if ( itrace.ne.0 ) then
3457             print*,'t_step =',t_old
3458             print*,'err_loc =',err_loc
3459             print*,'err_required =',delta*t_old*tol
3460             print*,'stepsize rejected, stepping down to:',t_step
3461          endif
3462          ireject = ireject + 1
3463          nreject = nreject + 1
3464          if ( mxreject.ne.0 .and. ireject.gt.mxreject ) then
3465             print*,"Failure in DGPHIV: ---"
3466             print*,"The requested tolerance is too high."
3467             Print*,"Rerun with a smaller value."
3468             iflag = 2
3469             return
3470          endif
3471          goto 401
3472       endif
3473 *
3474       mx = mbrkdwn + MAX( 0,k1-2 )
3475       call DGEMV( 'n', n,mx,beta,wsp(iv),n,wsp(iphih),1,1.0d0,w,1 )
3476 *
3477 *---  suggested value for the next stepsize ...
3478 *
3479       t_new = gamma * t_step * (t_step*tol/err_loc)**xm
3480       p1 = 10.0d0**(NINT( LOG10( t_new )-SQR1 )-1)
3481       t_new = AINT( t_new/p1 + 0.55d0 ) * p1
3482
3483       err_loc = MAX( err_loc,rndoff )
3484 *
3485 *---  update the time covered ...
3486 *
3487       t_now = t_now + t_step
3488 *
3489 *---  display and keep some information ...
3490 *
3491       if ( itrace.ne.0 ) then
3492          print*,'integration',nstep,'---------------------------------'
3493          print*,'scale-square =',ns
3494          print*,'step_size =',t_step
3495          print*,'err_loc   =',err_loc
3496          print*,'next_step =',t_new
3497       endif
3498
3499       step_min = MIN( step_min, t_step )
3500       step_max = MAX( step_max, t_step )
3501       s_error = s_error + err_loc
3502       x_error = MAX( x_error, err_loc )
3503
3504       if ( mxstep.eq.0 .or. nstep.lt.mxstep ) goto 100
3505       iflag = 1
3506
3507  500  continue
3508
3509       iwsp(1) = nmult
3510       iwsp(2) = nexph
3511       iwsp(3) = nscale
3512       iwsp(4) = nstep
3513       iwsp(5) = nreject
3514       iwsp(6) = ibrkflag
3515       iwsp(7) = mbrkdwn
3516
3517       wsp(1)  = step_min
3518       wsp(2)  = step_max
3519       wsp(3)  = 0.0d0
3520       wsp(4)  = 0.0d0
3521       wsp(5)  = x_error
3522       wsp(6)  = s_error
3523       wsp(7)  = tbrkdwn
3524       wsp(8)  = sgn*t_now
3525       END
3526 *----------------------------------------------------------------------|
3527 *----------------------------------------------------------------------|
3528       subroutine DSPHIV( n, m, t, u, v, w, tol, anorm,
3529      .                   wsp,lwsp, iwsp,liwsp, matvec, itrace,iflag )
3530
3531       implicit none
3532       integer n, m, lwsp, liwsp, itrace, iflag, iwsp(liwsp)
3533       double precision t, tol, anorm, u(n), v(n), w(n), wsp(lwsp)
3534       external matvec
3535
3536 *-----Purpose----------------------------------------------------------|
3537 *
3538 *---  DSPHIV computes w = exp(t*A)v + t*phi(tA)u which is the solution
3539 *     of the nonhomogeneous linear ODE problem w' = Aw + u, w(0) = v.
3540 *     phi(z) = (exp(z)-1)/z and A is a Symmetric matrix.
3541 *
3542 *     The method used is based on Krylov subspace projection
3543 *     techniques and the matrix under consideration interacts only
3544 *     via the external routine `matvec' performing the matrix-vector
3545 *     product (matrix-free method).
3546 *
3547 *-----Arguments--------------------------------------------------------|
3548 *
3549 *     n      : (input) order of the principal matrix A.
3550 *
3551 *     m      : (input) maximum size for the Krylov basis.
3552 *
3553 *     t      : (input) time at wich the solution is needed (can be < 0).
3554 *
3555 *     u(n)   : (input) operand vector with respect to the phi function
3556 *              (forcing term of the ODE problem).
3557 *
3558 *     v(n)   : (input) operand vector with respect to the exp function
3559 *              (initial condition of the ODE problem).
3560 *
3561 *     w(n)   : (output) computed approximation of exp(t*A)v + t*phi(tA)u
3562 *
3563 *     tol    : (input/output) the requested accuracy tolerance on w.
3564 *              If on input tol=0.0d0 or tol is too small (tol.le.eps)
3565 *              the internal value sqrt(eps) is used, and tol is set to
3566 *              sqrt(eps) on output (`eps' denotes the machine epsilon).
3567 *              (`Happy breakdown' is assumed if h(j+1,j) .le. anorm*tol)
3568 *
3569 *     anorm  : (input) an approximation of some norm of A.
3570 *
3571 *   wsp(lwsp): (workspace) lwsp .ge. n*(m+1)+n+(m+3)^2+4*(m+3)^2+ideg+1
3572 *                                   +---------+-------+---------------+
3573 *              (actually, ideg=6)        V        H      wsp for PADE
3574 *
3575 * iwsp(liwsp): (workspace) liwsp .ge. m+3
3576 *
3577 *     matvec : external subroutine for matrix-vector multiplication.
3578 *              synopsis: matvec( x, y )
3579 *                        double precision x(*), y(*)
3580 *              computes: y(1:n) <- A*x(1:n)
3581 *                        where A is the principal matrix.
3582 *
3583 *     itrace : (input) running mode. 0=silent, 1=print step-by-step info
3584 *
3585 *     iflag  : (output) exit flag.
3586 *              <0 - bad input arguments
3587 *               0 - no problem
3588 *               1 - maximum number of steps reached without convergence
3589 *               2 - requested tolerance was too high
3590 *
3591 *-----Accounts on the computation--------------------------------------|
3592 *     Upon exit, an interested user may retrieve accounts on the
3593 *     computations. They are located in the workspace arrays wsp and
3594 *     iwsp as indicated below:
3595 *
3596 *     location  mnemonic                 description
3597 *     -----------------------------------------------------------------|
3598 *     iwsp(1) = nmult, number of matrix-vector multiplications used
3599 *     iwsp(2) = nexph, number of Hessenberg matrix exponential evaluated
3600 *     iwsp(3) = nscale, number of repeated squaring involved in Pade
3601 *     iwsp(4) = nstep, number of integration steps used up to completion
3602 *     iwsp(5) = nreject, number of rejected step-sizes
3603 *     iwsp(6) = ibrkflag, set to 1 if `happy breakdown' and 0 otherwise
3604 *     iwsp(7) = mbrkdwn, if `happy brkdown', basis-size when it occured
3605 *     -----------------------------------------------------------------|
3606 *     wsp(1)  = step_min, minimum step-size used during integration
3607 *     wsp(2)  = step_max, maximum step-size used during integration
3608 *     wsp(3)  = dummy
3609 *     wsp(4)  = dummy
3610 *     wsp(5)  = x_error, maximum among all local truncation errors
3611 *     wsp(6)  = s_error, global sum of local truncation errors
3612 *     wsp(7)  = tbrkdwn, if `happy breakdown', time when it occured
3613 *     wsp(8)  = t_now, integration domain successfully covered
3614 *
3615 *----------------------------------------------------------------------|
3616 *-----The following parameters may also be adjusted herein-------------|
3617 *
3618       integer mxstep, mxreject, ideg
3619       double precision delta, gamma
3620       parameter( mxstep   = 500,
3621      .           mxreject = 0,
3622      .           ideg     = 6,
3623      .           delta    = 1.2d0,
3624      .           gamma    = 0.9d0 )
3625
3626 *     mxstep  : maximum allowable number of integration steps.
3627 *               The value 0 means an infinite number of steps.
3628 *
3629 *     mxreject: maximum allowable number of rejections at each step.
3630 *               The value 0 means an infinite number of rejections.
3631 *
3632 *     ideg    : the Pade approximation of type (ideg,ideg) is used as
3633 *               an approximation to exp(H).
3634 *
3635 *     delta   : local truncation error `safety factor'
3636 *
3637 *     gamma   : stepsize `shrinking factor'
3638 *
3639 *----------------------------------------------------------------------|
3640 *     Roger B. Sidje (rbs@maths.uq.edu.au)
3641 *     EXPOKIT: Software Package for Computing Matrix Exponentials.
3642 *     ACM - Transactions On Mathematical Software, 24(1):130-156, 1998
3643 *----------------------------------------------------------------------|
3644 *
3645       integer i, j, k1, mh, mx, iv, ih, j1v, ns, ifree, lfree, iexph,
3646      .        ireject,ibrkflag,mbrkdwn, nmult, nreject, nexph, nscale,
3647      .        nstep, iphih
3648       double precision sgn, t_out, tbrkdwn, step_min,step_max, err_loc,
3649      .                 s_error, x_error, t_now, t_new, t_step, t_old,
3650      .                 xm, beta, break_tol, p1, p2, p3, eps, rndoff,
3651      .                 avnorm, hj1j, hjj, SQR1
3652
3653       intrinsic AINT,ABS,DBLE,LOG10,MAX,MIN,NINT,SIGN,SQRT
3654       double precision DDOT, DNRM2
3655
3656 *---  check restrictions on input parameters ...
3657       iflag = 0
3658       if ( lwsp.lt.n*(m+3)+5*(m+3)**2+ideg+1 ) iflag = -1
3659       if ( liwsp.lt.m+3 ) iflag = -2
3660       if ( m.ge.n .or. m.le.0 ) iflag = -3
3661       if ( iflag.ne.0 ) stop 'bad sizes (in input of DSPHIV)'
3662 *
3663 *---  initialisations ...
3664 *
3665       k1 = 3
3666       mh = m + 3
3667       iv = 1
3668       ih = iv + n*(m+1) + n
3669       ifree = ih + mh*mh
3670       lfree = lwsp - ifree + 1
3671
3672       ibrkflag = 0
3673       mbrkdwn  = m
3674       nmult    = 0
3675       nreject  = 0
3676       nexph    = 0
3677       nscale   = 0
3678
3679       t_out    = ABS( t )
3680       tbrkdwn  = 0.0d0
3681       step_min = t_out
3682       step_max = 0.0d0
3683       nstep    = 0
3684       s_error  = 0.0d0
3685       x_error  = 0.0d0
3686       t_now    = 0.0d0
3687       t_new    = 0.0d0
3688
3689       p1 = 4.0d0/3.0d0
3690  1    p2 = p1 - 1.0d0
3691       p3 = p2 + p2 + p2
3692       eps = ABS( p3-1.0d0 )
3693       if ( eps.eq.0.0d0 ) go to 1
3694       if ( tol.le.eps ) tol = SQRT( eps )
3695       rndoff = eps*anorm
3696
3697       break_tol = 1.0d-7
3698 *>>>  break_tol = tol
3699 *>>>  break_tol = anorm*tol
3700
3701 *
3702 *---  step-by-step integration ...
3703 *
3704       sgn = SIGN( 1.0d0,t )
3705       SQR1 = SQRT( 0.1d0 )
3706       call DCOPY( n, v,1, w,1 )
3707
3708  100  if ( t_now.ge.t_out ) goto 500
3709
3710       nmult =  nmult + 1
3711       call matvec( w, wsp(iv) )
3712       call DAXPY( n, 1.0d0, u,1, wsp(iv),1 )
3713       beta = DNRM2( n, wsp(iv),1 )
3714       if ( beta.eq.0.0d0 ) goto 500
3715       call DSCAL( n, 1.0d0/beta, wsp(iv),1 )
3716       do i = 1,mh*mh
3717          wsp(ih+i-1) = 0.0d0
3718       enddo
3719
3720       if ( nstep.eq.0 ) then
3721 *---     obtain the very first stepsize ...
3722          xm = 1.0d0/DBLE( m )
3723          p1 = tol*(((m+1)/2.72D0)**(m+1))*SQRT(2.0D0*3.14D0*(m+1))
3724          t_new = (1.0d0/anorm)*(p1/(4.0d0*beta*anorm))**xm
3725          p1 = 10.0d0**(NINT( LOG10( t_new )-SQR1 )-1)
3726          t_new = AINT( t_new/p1 + 0.55d0 ) * p1
3727       endif
3728       nstep = nstep + 1
3729       t_step = MIN( t_out-t_now, t_new )
3730 *
3731 *---  Lanczos loop ...
3732 *
3733       j1v = iv + n
3734       do 200 j = 1,m
3735          nmult = nmult + 1
3736          call matvec( wsp(j1v-n), wsp(j1v) )
3737          if ( j.gt.1 )
3738      .     call DAXPY(n,-wsp(ih+(j-1)*mh+j-2),wsp(j1v-2*n),1,wsp(j1v),1)
3739          hjj = DDOT( n, wsp(j1v-n),1, wsp(j1v),1 )
3740          call DAXPY( n, -hjj, wsp(j1v-n),1, wsp(j1v),1 )
3741          hj1j = DNRM2( n, wsp(j1v),1 )
3742          wsp(ih+(j-1)*(mh+1)) = hjj
3743 *---     if `happy breakdown' go straightforward at the end ...
3744          if ( hj1j.le.break_tol ) then
3745             print*,'happy breakdown: mbrkdwn =',j,' h =',hj1j
3746             k1 = 0
3747             ibrkflag = 1
3748             mbrkdwn = j
3749             tbrkdwn = t_now
3750             t_step = t_out-t_now
3751             goto 300
3752          endif
3753          wsp(ih+(j-1)*mh+j) = hj1j
3754          wsp(ih+j*mh+j-1) = hj1j
3755          call DSCAL( n, 1.0d0/hj1j, wsp(j1v),1 )
3756          j1v = j1v + n
3757  200  continue
3758       nmult = nmult + 1
3759       call matvec( wsp(j1v-n), wsp(j1v) )
3760       avnorm = DNRM2( n, wsp(j1v),1 )
3761 *
3762 *---  set 1's for the 3-extended scheme ...
3763 *
3764  300  continue
3765       wsp(ih+mh*mbrkdwn) = 1.0d0
3766       wsp(ih+m*mh+m-1)   = 0.0d0
3767       wsp(ih+(m-1)*mh+m) = 0.0d0
3768       do i = 1,k1-1
3769          wsp(ih+(m+i)*mh+m+i-1) = 1.0d0
3770       enddo
3771 *
3772 *---  loop while ireject<mxreject until the tolerance is reached ...
3773 *
3774       ireject = 0
3775  401  continue
3776 *
3777 *---  compute w = beta*t_step*V*phi(t_step*H)*e1 + w
3778 *
3779       nexph = nexph + 1
3780       mx = mbrkdwn + MAX( 1,k1 )
3781
3782 *---  irreducible rational Pade approximation ...
3783       call DGPADM( ideg, mx, sgn*t_step, wsp(ih),mh,
3784      .              wsp(ifree),lfree, iwsp, iexph, ns, iflag )
3785       iexph = ifree + iexph - 1
3786       iphih = iexph + mbrkdwn*mx
3787       nscale = nscale + ns
3788       wsp(iphih+mbrkdwn)   = hj1j*wsp(iphih+mx+mbrkdwn-1)
3789       wsp(iphih+mbrkdwn+1) = hj1j*wsp(iphih+2*mx+mbrkdwn-1)
3790
3791  402  continue
3792 *
3793 *---  error estimate ...
3794 *
3795       if ( k1.eq.0 ) then
3796          err_loc = tol
3797       else
3798          p1 = ABS( wsp(iphih+m) )   * beta
3799          p2 = ABS( wsp(iphih+m+1) ) * beta * avnorm
3800          if ( p1.gt.10.0d0*p2 ) then
3801             err_loc = p2
3802             xm = 1.0d0/DBLE( m+1 )
3803          elseif ( p1.gt.p2 ) then
3804             err_loc = (p1*p2)/(p1-p2)
3805             xm = 1.0d0/DBLE( m+1 )
3806          else
3807             err_loc = p1
3808             xm = 1.0d0/DBLE( m )
3809          endif
3810       endif
3811 *
3812 *---  reject the step-size if the error is not acceptable ...
3813 *
3814       if ( (k1.ne.0) .and. (err_loc.gt.delta*t_step*tol) .and.
3815      .     (mxreject.eq.0 .or. ireject.lt.mxreject) ) then
3816          t_old = t_step
3817          t_step = gamma * t_step * (t_step*tol/err_loc)**xm
3818          p1 = 10.0d0**(NINT( LOG10( t_step )-SQR1 )-1)
3819          t_step = AINT( t_step/p1 + 0.55d0 ) * p1
3820          if ( itrace.ne.0 ) then
3821             print*,'t_step =',t_old
3822             print*,'err_loc =',err_loc
3823             print*,'err_required =',delta*t_old*tol
3824             print*,'stepsize rejected, stepping down to:',t_step
3825          endif
3826          ireject = ireject + 1
3827          nreject = nreject + 1
3828          if ( mxreject.ne.0 .and. ireject.gt.mxreject ) then
3829             print*,"Failure in DSPHIV: ---"
3830             print*,"The requested tolerance is too high."
3831             Print*,"Rerun with a smaller value."
3832             iflag = 2
3833             return
3834          endif
3835          goto 401
3836       endif
3837 *
3838       mx = mbrkdwn + MAX( 0,k1-2 )
3839       call DGEMV( 'n', n,mx,beta,wsp(iv),n,wsp(iphih),1,1.0d0,w,1 )
3840 *
3841 *---  suggested value for the next stepsize ...
3842 *
3843       t_new = gamma * t_step * (t_step*tol/err_loc)**xm
3844       p1 = 10.0d0**(NINT( LOG10( t_new )-SQR1 )-1)
3845       t_new = AINT( t_new/p1 + 0.55d0 ) * p1
3846
3847       err_loc = MAX( err_loc,rndoff )
3848 *
3849 *---  update the time covered ...
3850 *
3851       t_now = t_now + t_step
3852 *
3853 *---  display and keep some information ...
3854 *
3855       if ( itrace.ne.0 ) then
3856          print*,'integration',nstep,'---------------------------------'
3857          print*,'scale-square =',ns
3858          print*,'step_size =',t_step
3859          print*,'err_loc   =',err_loc
3860          print*,'next_step =',t_new
3861       endif
3862
3863       step_min = MIN( step_min, t_step )
3864       step_max = MAX( step_max, t_step )
3865       s_error = s_error + err_loc
3866       x_error = MAX( x_error, err_loc )
3867
3868       if ( mxstep.eq.0 .or. nstep.lt.mxstep ) goto 100
3869       iflag = 1
3870
3871  500  continue
3872
3873       iwsp(1) = nmult
3874       iwsp(2) = nexph
3875       iwsp(3) = nscale
3876       iwsp(4) = nstep
3877       iwsp(5) = nreject
3878       iwsp(6) = ibrkflag
3879       iwsp(7) = mbrkdwn
3880
3881       wsp(1)  = step_min
3882       wsp(2)  = step_max
3883       wsp(3)  = 0.0d0
3884       wsp(4)  = 0.0d0
3885       wsp(5)  = x_error
3886       wsp(6)  = s_error
3887       wsp(7)  = tbrkdwn
3888       wsp(8)  = sgn*t_now
3889       END
3890 *----------------------------------------------------------------------|
3891 *----------------------------------------------------------------------|
3892       subroutine ZGPHIV( n, m, t, u, v, w, tol, anorm,
3893      .                   wsp,lwsp, iwsp,liwsp, matvec, itrace,iflag )
3894
3895       implicit none
3896       integer          n, m, lwsp, liwsp, itrace, iflag, iwsp(liwsp)
3897       double precision t, tol, anorm
3898       complex*16       u(n), v(n), w(n), wsp(lwsp)
3899       external         matvec
3900
3901 *-----Purpose----------------------------------------------------------|
3902 *
3903 *---  ZGPHIV computes w = exp(t*A)v + t*phi(tA)u which is the solution
3904 *     of the nonhomogeneous linear ODE problem w' = Aw + u, w(0) = v.
3905 *     phi(z) = (exp(z)-1)/z and A is a Zomplex (i.e., complex double
3906 *     precision matrix).
3907 *
3908 *     The method used is based on Krylov subspace projection
3909 *     techniques and the matrix under consideration interacts only
3910 *     via the external routine `matvec' performing the matrix-vector
3911 *     product (matrix-free method).
3912 *
3913 *-----Arguments--------------------------------------------------------|
3914 *
3915 *     n      : (input) order of the principal matrix A.
3916 *
3917 *     m      : (input) maximum size for the Krylov basis.
3918 *
3919 *     t      : (input) time at wich the solution is needed (can be < 0).
3920 *
3921 *     u(n)   : (input) operand vector with respect to the phi function
3922 *              (forcing term of the ODE problem).
3923 *
3924 *     v(n)   : (input) operand vector with respect to the exp function
3925 *              (initial condition of the ODE problem).
3926 *
3927 *     w(n)   : (output) computed approximation of exp(t*A)v + t*phi(tA)u
3928 *
3929 *     tol    : (input/output) the requested accuracy tolerance on w.
3930 *              If on input tol=0.0d0 or tol is too small (tol.le.eps)
3931 *              the internal value sqrt(eps) is used, and tol is set to
3932 *              sqrt(eps) on output (`eps' denotes the machine epsilon).
3933 *              (`Happy breakdown' is assumed if h(j+1,j) .le. anorm*tol)
3934 *
3935 *     anorm  : (input) an approximation of some norm of A.
3936 *
3937 *   wsp(lwsp): (workspace) lwsp .ge. n*(m+1)+n+(m+3)^2+4*(m+3)^2+ideg+1
3938 *                                   +---------+-------+---------------+
3939 *              (actually, ideg=6)        V        H      wsp for PADE
3940 *
3941 * iwsp(liwsp): (workspace) liwsp .ge. m+3
3942 *
3943 *     matvec : external subroutine for matrix-vector multiplication.
3944 *              synopsis: matvec( x, y )
3945 *                        double precision x(*), y(*)
3946 *              computes: y(1:n) <- A*x(1:n)
3947 *                        where A is the principal matrix.
3948 *
3949 *     itrace : (input) running mode. 0=silent, 1=print step-by-step info
3950 *
3951 *     iflag  : (output) exit flag.
3952 *              <0 - bad input arguments
3953 *               0 - no problem
3954 *               1 - maximum number of steps reached without convergence
3955 *               2 - requested tolerance was too high
3956 *
3957 *-----Accounts on the computation--------------------------------------|
3958 *     Upon exit, an interested user may retrieve accounts on the
3959 *     computations. They are located in the workspace arrays wsp and
3960 *     iwsp as indicated below:
3961 *
3962 *     location  mnemonic                 description
3963 *     -----------------------------------------------------------------|
3964 *     iwsp(1) = nmult, number of matrix-vector multiplications used
3965 *     iwsp(2) = nexph, number of Hessenberg matrix exponential evaluated
3966 *     iwsp(3) = nscale, number of repeated squaring involved in Pade
3967 *     iwsp(4) = nstep, number of integration steps used up to completion
3968 *     iwsp(5) = nreject, number of rejected step-sizes
3969 *     iwsp(6) = ibrkflag, set to 1 if `happy breakdown' and 0 otherwise
3970 *     iwsp(7) = mbrkdwn, if `happy brkdown', basis-size when it occured
3971 *     -----------------------------------------------------------------|
3972 *     wsp(1)  = step_min, minimum step-size used during integration
3973 *     wsp(2)  = step_max, maximum step-size used during integration
3974 *     wsp(3)  = dummy
3975 *     wsp(4)  = dummy
3976 *     wsp(5)  = x_error, maximum among all local truncation errors
3977 *     wsp(6)  = s_error, global sum of local truncation errors
3978 *     wsp(7)  = tbrkdwn, if `happy breakdown', time when it occured
3979 *     wsp(8)  = t_now, integration domain successfully covered
3980 *
3981 *----------------------------------------------------------------------|
3982 *-----The following parameters may also be adjusted herein-------------|
3983 *
3984       integer mxstep, mxreject, ideg
3985       double precision delta, gamma
3986       parameter( mxstep   = 500,
3987      .           mxreject = 0,
3988      .           ideg     = 6,
3989      .           delta    = 1.2d0,
3990      .           gamma    = 0.9d0 )
3991
3992 *     mxstep  : maximum allowable number of integration steps.
3993 *               The value 0 means an infinite number of steps.
3994 *
3995 *     mxreject: maximum allowable number of rejections at each step.
3996 *               The value 0 means an infinite number of rejections.
3997 *
3998 *     ideg    : the Pade approximation of type (ideg,ideg) is used as
3999 *               an approximation to exp(H).
4000 *
4001 *     delta   : local truncation error `safety factor'
4002 *
4003 *     gamma   : stepsize `shrinking factor'
4004 *
4005 *----------------------------------------------------------------------|
4006 *     Roger B. Sidje (rbs@maths.uq.edu.au)
4007 *     EXPOKIT: Software Package for Computing Matrix Exponentials.
4008 *     ACM - Transactions On Mathematical Software, 24(1):130-156, 1998
4009 *----------------------------------------------------------------------|
4010 *
4011       complex*16 ZERO, ONE
4012       parameter( ZERO=(0.0d0,0.0d0), ONE=(1.0d0,0.0d0) )
4013       integer i, j, k1, mh, mx, iv, ih, j1v, ns, ifree, lfree, iexph,
4014      .        ireject,ibrkflag,mbrkdwn, nmult, nreject, nexph, nscale,
4015      .        nstep, iphih
4016       double precision sgn, t_out, tbrkdwn, step_min,step_max, err_loc,
4017      .                 s_error, x_error, t_now, t_new, t_step, t_old,
4018      .                 xm, beta, break_tol, p1, p2, p3, eps, rndoff,
4019      .                 avnorm, hj1j, SQR1
4020       complex*16 hij
4021
4022       intrinsic AINT,ABS,CMPLX,DBLE,INT,LOG10,MAX,MIN,NINT,SIGN,SQRT
4023       complex*16 ZDOTC
4024       double precision DZNRM2
4025 *
4026 *---  check restrictions on input parameters ...
4027 *
4028       iflag = 0
4029       if ( lwsp.lt.n*(m+3)+5*(m+3)**2+ideg+1 ) iflag = -1
4030       if ( liwsp.lt.m+3 ) iflag = -2
4031       if ( m.ge.n .or. m.le.0 ) iflag = -3
4032       if ( iflag.ne.0 ) stop 'bad sizes (in input of ZGPHIV)'
4033 *
4034 *---  initialisations ...
4035 *
4036       k1 = 3
4037       mh = m + 3
4038       iv = 1
4039       ih = iv + n*(m+1) + n
4040       ifree = ih + mh*mh
4041       lfree = lwsp - ifree + 1
4042
4043       ibrkflag = 0
4044       mbrkdwn  = m
4045       nmult    = 0
4046       nreject  = 0
4047       nexph    = 0
4048       nscale   = 0
4049
4050       t_out    = ABS( t )
4051       tbrkdwn  = 0.0d0
4052       step_min = t_out
4053       step_max = 0.0d0
4054       nstep    = 0
4055       s_error  = 0.0d0
4056       x_error  = 0.0d0
4057       t_now    = 0.0d0
4058       t_new    = 0.0d0
4059
4060       p1 = 4.0d0/3.0d0
4061  1    p2 = p1 - 1.0d0
4062       p3 = p2 + p2 + p2
4063       eps = ABS( p3-1.0d0 )
4064       if ( eps.eq.0.0d0 ) go to 1
4065       if ( tol.le.eps ) tol = SQRT( eps )
4066       rndoff = eps*anorm
4067
4068       break_tol = 1.0d-7
4069 *>>>  break_tol = tol
4070 *>>>  break_tol = anorm*tol
4071
4072       sgn = SIGN( 1.0d0,t )
4073       SQR1 = SQRT( 0.1d0 )
4074       call ZCOPY( n, v,1, w,1 )
4075 *
4076 *---  step-by-step integration ...
4077 *
4078  100  if ( t_now.ge.t_out ) goto 500
4079
4080       nmult =  nmult + 1
4081       call matvec( w, wsp(iv) )
4082       call ZAXPY( n, ONE, u,1, wsp(iv),1 )
4083       beta = DZNRM2( n, wsp(iv),1 )
4084       if ( beta.eq.0.0d0 ) goto 500
4085       call ZDSCAL( n, 1.0d0/beta, wsp(iv),1 )
4086       do i = 1,mh*mh
4087          wsp(ih+i-1) = ZERO
4088       enddo
4089
4090       if ( nstep.eq.0 ) then
4091 *---     obtain the very first stepsize ...
4092          xm = 1.0d0/DBLE( m )
4093          p1 = tol*(((m+1)/2.72D0)**(m+1))*SQRT(2.0D0*3.14D0*(m+1))
4094          t_new = (1.0d0/anorm)*(p1/(4.0d0*beta*anorm))**xm
4095          p1 = 10.0d0**(NINT( LOG10( t_new )-SQR1 )-1)
4096          t_new = AINT( t_new/p1 + 0.55d0 ) * p1
4097       endif
4098       nstep = nstep + 1
4099       t_step = MIN( t_out-t_now, t_new )
4100 *
4101 *---  Arnoldi loop ...
4102 *
4103       j1v = iv + n
4104       do 200 j = 1,m
4105          nmult = nmult + 1
4106          call matvec( wsp(j1v-n), wsp(j1v) )
4107          do i = 1,j
4108             hij = ZDOTC( n, wsp(iv+(i-1)*n),1, wsp(j1v),1 )
4109             call ZAXPY( n, -hij, wsp(iv+(i-1)*n),1, wsp(j1v),1 )
4110             wsp(ih+(j-1)*mh+i-1) = hij
4111          enddo
4112          hj1j = DZNRM2( n, wsp(j1v),1 )
4113 *---     if `happy breakdown' go straightforward at the end ...
4114          if ( hj1j.le.break_tol ) then
4115             print*,'happy breakdown: mbrkdwn =',j,' h =',hj1j
4116             k1 = 0
4117             ibrkflag = 1
4118             mbrkdwn = j
4119             tbrkdwn = t_now
4120             t_step = t_out-t_now
4121             goto 300
4122          endif
4123          wsp(ih+(j-1)*mh+j) = CMPLX( hj1j )
4124          call ZDSCAL( n, 1.0d0/hj1j, wsp(j1v),1 )
4125          j1v = j1v + n
4126  200  continue
4127       nmult = nmult + 1
4128       call matvec( wsp(j1v-n), wsp(j1v) )
4129       avnorm = DZNRM2( n, wsp(j1v),1 )
4130 *
4131 *---  set 1's for the 3-extended scheme ...
4132 *
4133  300  continue
4134       wsp(ih+mh*mbrkdwn) = ONE
4135       wsp(ih+(m-1)*mh+m) = ZERO
4136       do i = 1,k1-1
4137          wsp(ih+(m+i)*mh+m+i-1) = ONE
4138       enddo
4139 *
4140 *---  loop while ireject<mxreject until the tolerance is reached ...
4141 *
4142       ireject = 0
4143  401  continue
4144 *
4145 *---  compute w = beta*t_step*V*phi(t_step*H)*e1 + w
4146 *
4147       nexph = nexph + 1
4148       mx = mbrkdwn + MAX( 1,k1 )
4149
4150 *---  irreducible rational Pade approximation ...
4151       call ZGPADM( ideg, mx, sgn*t_step, wsp(ih),mh,
4152      .             wsp(ifree),lfree, iwsp, iexph, ns, iflag )
4153       iexph = ifree + iexph - 1
4154       iphih = iexph + mbrkdwn*mx
4155       nscale = nscale + ns
4156       wsp(iphih+mbrkdwn)   = hj1j*wsp(iphih+mx+mbrkdwn-1)
4157       wsp(iphih+mbrkdwn+1) = hj1j*wsp(iphih+2*mx+mbrkdwn-1)
4158
4159  402  continue
4160 *
4161 *---  error estimate ...
4162 *
4163       if ( k1.eq.0 ) then
4164          err_loc = tol
4165       else
4166          p1 = ABS( wsp(iphih+m) )   * beta
4167          p2 = ABS( wsp(iphih+m+1) ) * beta * avnorm
4168          if ( p1.gt.10.0d0*p2 ) then
4169             err_loc = p2
4170             xm = 1.0d0/DBLE( m+1 )
4171          elseif ( p1.gt.p2 ) then
4172             err_loc = (p1*p2)/(p1-p2)
4173             xm = 1.0d0/DBLE( m+1 )
4174          else
4175             err_loc = p1
4176             xm = 1.0d0/DBLE( m )
4177          endif
4178       endif
4179 *
4180 *---  reject the step-size if the error is not acceptable ...
4181 *
4182       if ( (k1.ne.0) .and. (err_loc.gt.delta*t_step*tol) .and.
4183      .     (mxreject.eq.0 .or. ireject.lt.mxreject) ) then
4184          t_old = t_step
4185          t_step = gamma * t_step * (t_step*tol/err_loc)**xm
4186          p1 = 10.0d0**(NINT( LOG10( t_step )-SQR1 )-1)
4187          t_step = AINT( t_step/p1 + 0.55d0 ) * p1
4188          if ( itrace.ne.0 ) then
4189             print*,'t_step =',t_old
4190             print*,'err_loc =',err_loc
4191             print*,'err_required =',delta*t_old*tol
4192             print*,'stepsize rejected, stepping down to:',t_step
4193          endif
4194          ireject = ireject + 1
4195          nreject = nreject + 1
4196          if ( mxreject.ne.0 .and. ireject.gt.mxreject ) then
4197             print*,"Failure in ZGPHIV: ---"
4198             print*,"The requested tolerance is too high."
4199             Print*,"Rerun with a smaller value."
4200             iflag = 2
4201             return
4202          endif
4203          goto 401
4204       endif
4205 *
4206       mx = mbrkdwn + MAX( 0,k1-2 )
4207       hij = CMPLX( beta )
4208       call ZGEMV( 'n', n,mx,hij,wsp(iv),n,wsp(iphih),1,ONE,w,1 )
4209 *
4210 *---  suggested value for the next stepsize ...
4211 *
4212       t_new = gamma * t_step * (t_step*tol/err_loc)**xm
4213       p1 = 10.0d0**(NINT( LOG10( t_new )-SQR1 )-1)
4214       t_new = AINT( t_new/p1 + 0.55d0 ) * p1
4215
4216       err_loc = MAX( err_loc,rndoff )
4217 *
4218 *---  update the time covered ...
4219 *
4220       t_now = t_now + t_step
4221 *
4222 *---  display and keep some information ...
4223 *
4224       if ( itrace.ne.0 ) then
4225          print*,'integration',nstep,'---------------------------------'
4226          print*,'scale-square =',ns
4227          print*,'step_size =',t_step
4228          print*,'err_loc   =',err_loc
4229          print*,'next_step =',t_new
4230       endif
4231
4232       step_min = MIN( step_min, t_step )
4233       step_max = MAX( step_max, t_step )
4234       s_error = s_error + err_loc
4235       x_error = MAX( x_error, err_loc )
4236
4237       if ( mxstep.eq.0 .or. nstep.lt.mxstep ) goto 100
4238       iflag = 1
4239
4240  500  continue
4241
4242       iwsp(1) = nmult
4243       iwsp(2) = nexph
4244       iwsp(3) = nscale
4245       iwsp(4) = nstep
4246       iwsp(5) = nreject
4247       iwsp(6) = ibrkflag
4248       iwsp(7) = mbrkdwn
4249
4250       wsp(1)  = CMPLX( step_min )
4251       wsp(2)  = CMPLX( step_max )
4252       wsp(3)  = CMPLX( 0.0d0 )
4253       wsp(4)  = CMPLX( 0.0d0 )
4254       wsp(5)  = CMPLX( x_error )
4255       wsp(6)  = CMPLX( s_error )
4256       wsp(7)  = CMPLX( tbrkdwn )
4257       wsp(8)  = CMPLX( sgn*t_now )
4258       END
4259 *----------------------------------------------------------------------|
4260 *----------------------------------------------------------------------|
4261       subroutine ZHPHIV( n, m, t, u, v, w, tol, anorm,
4262      .                   wsp,lwsp, iwsp,liwsp, matvec, itrace,iflag )
4263
4264       implicit none
4265       integer          n, m, lwsp, liwsp, itrace, iflag, iwsp(liwsp)
4266       double precision t, tol, anorm
4267       complex*16       u(n), v(n), w(n), wsp(lwsp)
4268       external         matvec
4269
4270 *-----Purpose----------------------------------------------------------|
4271 *
4272 *---  ZHPHIV computes w = exp(t*A)v + t*phi(tA)u which is the solution
4273 *     of the nonhomogeneous linear ODE problem w' = Aw + u, w(0) = v.
4274 *     phi(z) = (exp(z)-1)/z and A is an Hermitian matrix.
4275 *
4276 *     The method used is based on Krylov subspace projection
4277 *     techniques and the matrix under consideration interacts only
4278 *     via the external routine `matvec' performing the matrix-vector
4279 *     product (matrix-free method).
4280 *
4281 *-----Arguments--------------------------------------------------------|
4282 *
4283 *     n      : (input) order of the principal matrix A.
4284 *
4285 *     m      : (input) maximum size for the Krylov basis.
4286 *
4287 *     t      : (input) time at wich the solution is needed (can be < 0).
4288 *
4289 *     u(n)   : (input) operand vector with respect to the phi function
4290 *              (forcing term of the ODE problem).
4291 *
4292 *     v(n)   : (input) operand vector with respect to the exp function
4293 *              (initial condition of the ODE problem).
4294 *
4295 *     w(n)   : (output) computed approximation of exp(t*A)v + t*phi(tA)u
4296 *
4297 *     tol    : (input/output) the requested accuracy tolerance on w.
4298 *              If on input tol=0.0d0 or tol is too small (tol.le.eps)
4299 *              the internal value sqrt(eps) is used, and tol is set to
4300 *              sqrt(eps) on output (`eps' denotes the machine epsilon).
4301 *              (`Happy breakdown' is assumed if h(j+1,j) .le. anorm*tol)
4302 *
4303 *     anorm  : (input) an approximation of some norm of A.
4304 *
4305 *   wsp(lwsp): (workspace) lwsp .ge. n*(m+1)+n+(m+3)^2+4*(m+3)^2+ideg+1
4306 *                                   +---------+-------+---------------+
4307 *              (actually, ideg=6)        V        H      wsp for PADE
4308 *
4309 * iwsp(liwsp): (workspace) liwsp .ge. m+3
4310 *
4311 *     matvec : external subroutine for matrix-vector multiplication.
4312 *              synopsis: matvec( x, y )
4313 *                        double precision x(*), y(*)
4314 *              computes: y(1:n) <- A*x(1:n)
4315 *                        where A is the principal matrix.
4316 *
4317 *     itrace : (input) running mode. 0=silent, 1=print step-by-step info
4318 *
4319 *     iflag  : (output) exit flag.
4320 *              <0 - bad input arguments
4321 *               0 - no problem
4322 *               1 - maximum number of steps reached without convergence
4323 *               2 - requested tolerance was too high
4324 *
4325 *-----Accounts on the computation--------------------------------------|
4326 *     Upon exit, an interested user may retrieve accounts on the
4327 *     computations. They are located in the workspace arrays wsp and
4328 *     iwsp as indicated below:
4329 *
4330 *     location  mnemonic                 description
4331 *     -----------------------------------------------------------------|
4332 *     iwsp(1) = nmult, number of matrix-vector multiplications used
4333 *     iwsp(2) = nexph, number of Hessenberg matrix exponential evaluated
4334 *     iwsp(3) = nscale, number of repeated squaring involved in Pade
4335 *     iwsp(4) = nstep, number of integration steps used up to completion
4336 *     iwsp(5) = nreject, number of rejected step-sizes
4337 *     iwsp(6) = ibrkflag, set to 1 if `happy breakdown' and 0 otherwise
4338 *     iwsp(7) = mbrkdwn, if `happy brkdown', basis-size when it occured
4339 *     -----------------------------------------------------------------|
4340 *     wsp(1)  = step_min, minimum step-size used during integration
4341 *     wsp(2)  = step_max, maximum step-size used during integration
4342 *     wsp(3)  = dummy
4343 *     wsp(4)  = dummy
4344 *     wsp(5)  = x_error, maximum among all local truncation errors
4345 *     wsp(6)  = s_error, global sum of local truncation errors
4346 *     wsp(7)  = tbrkdwn, if `happy breakdown', time when it occured
4347 *     wsp(8)  = t_now, integration domain successfully covered
4348 *
4349 *----------------------------------------------------------------------|
4350 *-----The following parameters may also be adjusted herein-------------|
4351 *
4352       integer mxstep, mxreject, ideg
4353       double precision delta, gamma
4354       parameter( mxstep   = 500,
4355      .           mxreject = 0,
4356      .           ideg     = 6,
4357      .           delta    = 1.2d0,
4358      .           gamma    = 0.9d0 )
4359
4360 *     mxstep  : maximum allowable number of integration steps.
4361 *               The value 0 means an infinite number of steps.
4362 *
4363 *     mxreject: maximum allowable number of rejections at each step.
4364 *               The value 0 means an infinite number of rejections.
4365 *
4366 *     ideg    : the Pade approximation of type (ideg,ideg) is used as
4367 *               an approximation to exp(H).
4368 *
4369 *     delta   : local truncation error `safety factor'
4370 *
4371 *     gamma   : stepsize `shrinking factor'
4372 *
4373 *----------------------------------------------------------------------|
4374 *     Roger B. Sidje (rbs@maths.uq.edu.au)
4375 *     EXPOKIT: Software Package for Computing Matrix Exponentials.
4376 *     ACM - Transactions On Mathematical Software, 24(1):130-156, 1998
4377 *----------------------------------------------------------------------|
4378 *
4379       complex*16 ZERO, ONE
4380       parameter( ZERO=(0.0d0,0.0d0), ONE=(1.0d0,0.0d0) )
4381       integer i, j, k1, mh, mx, iv, ih, j1v, ns, ifree, lfree, iexph,
4382      .        ireject,ibrkflag,mbrkdwn, nmult, nreject, nexph, nscale,
4383      .        nstep, iphih
4384       double precision sgn, t_out, tbrkdwn, step_min,step_max, err_loc,
4385      .                 s_error, x_error, t_now, t_new, t_step, t_old,
4386      .                 xm, beta, break_tol, p1, p2, p3, eps, rndoff,
4387      .                 avnorm, hj1j, SQR1
4388       complex*16 hjj
4389
4390       intrinsic AINT,ABS,CMPLX,DBLE,INT,LOG10,MAX,MIN,NINT,SIGN,SQRT
4391       complex*16 ZDOTC
4392       double precision DZNRM2
4393 *
4394 *---  check restrictions on input parameters ...
4395 *
4396       iflag = 0
4397       if ( lwsp.lt.n*(m+3)+5*(m+3)**2+ideg+1 ) iflag = -1
4398       if ( liwsp.lt.m+3 ) iflag = -2
4399       if ( m.ge.n .or. m.le.0 ) iflag = -3
4400       if ( iflag.ne.0 ) stop 'bad sizes (in input of ZHPHIV)'
4401 *
4402 *---  initialisations ...
4403 *
4404       k1 = 3
4405       mh = m + 3
4406       iv = 1
4407       ih = iv + n*(m+1) + n
4408       ifree = ih + mh*mh
4409       lfree = lwsp - ifree + 1
4410
4411       ibrkflag = 0
4412       mbrkdwn  = m
4413       nmult    = 0
4414       nreject  = 0
4415       nexph    = 0
4416       nscale   = 0
4417
4418       t_out    = ABS( t )
4419       tbrkdwn  = 0.0d0
4420       step_min = t_out
4421       step_max = 0.0d0
4422       nstep    = 0
4423       s_error  = 0.0d0
4424       x_error  = 0.0d0
4425       t_now    = 0.0d0
4426       t_new    = 0.0d0
4427
4428       p1 = 4.0d0/3.0d0
4429  1    p2 = p1 - 1.0d0
4430       p3 = p2 + p2 + p2
4431       eps = ABS( p3-1.0d0 )
4432       if ( eps.eq.0.0d0 ) go to 1
4433       if ( tol.le.eps ) tol = SQRT( eps )
4434       rndoff = eps*anorm
4435
4436       break_tol = 1.0d-7
4437 *>>>  break_tol = tol
4438 *>>>  break_tol = anorm*tol
4439
4440       sgn = SIGN( 1.0d0,t )
4441       SQR1 = SQRT( 0.1d0 )
4442       call ZCOPY( n, v,1, w,1 )
4443 *
4444 *---  step-by-step integration ...
4445 *
4446  100  if ( t_now.ge.t_out ) goto 500
4447
4448       nmult =  nmult + 1
4449       call matvec( w, wsp(iv) )
4450       call ZAXPY( n, ONE, u,1, wsp(iv),1 )
4451       beta = DZNRM2( n, wsp(iv),1 )
4452       if ( beta.eq.0.0d0 ) goto 500
4453       call ZDSCAL( n, 1.0d0/beta, wsp(iv),1 )
4454       do i = 1,mh*mh
4455          wsp(ih+i-1) = ZERO
4456       enddo
4457
4458       if ( nstep.eq.0 ) then
4459 *---     obtain the very first stepsize ...
4460          xm = 1.0d0/DBLE( m )
4461          p1 = tol*(((m+1)/2.72D0)**(m+1))*SQRT(2.0D0*3.14D0*(m+1))
4462          t_new = (1.0d0/anorm)*(p1/(4.0d0*beta*anorm))**xm
4463          p1 = 10.0d0**(NINT( LOG10( t_new )-SQR1 )-1)
4464          t_new = AINT( t_new/p1 + 0.55d0 ) * p1
4465       endif
4466       nstep = nstep + 1
4467       t_step = MIN( t_out-t_now, t_new )
4468 *
4469 *---  Lanczos loop ...
4470 *
4471       j1v = iv + n
4472       do 200 j = 1,m
4473          nmult = nmult + 1
4474          call matvec( wsp(j1v-n), wsp(j1v) )
4475          if ( j.gt.1 )
4476      .     call ZAXPY(n,-wsp(ih+(j-1)*mh+j-2),wsp(j1v-2*n),1,wsp(j1v),1)
4477          hjj = ZDOTC( n, wsp(j1v-n),1, wsp(j1v),1 )
4478          call ZAXPY( n, -hjj, wsp(j1v-n),1, wsp(j1v),1 )
4479          hj1j = DZNRM2( n, wsp(j1v),1 )
4480          wsp(ih+(j-1)*(mh+1)) = hjj
4481          if ( hj1j.le.break_tol ) then
4482             print*,'happy breakdown: mbrkdwn =',j,' h =',hj1j
4483             k1 = 0
4484             ibrkflag = 1
4485             mbrkdwn = j
4486             tbrkdwn = t_now
4487             t_step = t_out-t_now
4488             goto 300
4489          endif
4490          wsp(ih+(j-1)*mh+j) = CMPLX( hj1j )
4491          wsp(ih+j*mh+j-1) = CMPLX( hj1j )
4492          call ZDSCAL( n, 1.0d0/hj1j, wsp(j1v),1 )
4493          j1v = j1v + n
4494  200  continue
4495       nmult = nmult + 1
4496       call matvec( wsp(j1v-n), wsp(j1v) )
4497       avnorm = DZNRM2( n, wsp(j1v),1 )
4498 *
4499 *---  set 1's for the 3-extended scheme ...
4500 *
4501  300  continue
4502       wsp(ih+mh*mbrkdwn) = ONE
4503       wsp(ih+m*mh+m-1)   = ZERO
4504       wsp(ih+(m-1)*mh+m) = ZERO
4505       do i = 1,k1-1
4506          wsp(ih+(m+i)*mh+m+i-1) = ONE
4507       enddo
4508 *
4509 *---  loop while ireject<mxreject until the tolerance is reached ...
4510 *
4511       ireject = 0
4512  401  continue
4513 *
4514 *---  compute w = beta*t_step*V*phi(t_step*H)*e1 + w
4515 *
4516       nexph = nexph + 1
4517       mx = mbrkdwn + MAX( 1,k1 )
4518
4519 *---  irreducible rational Pade approximation ...
4520       call ZGPADM( ideg, mx, sgn*t_step, wsp(ih),mh,
4521      .              wsp(ifree),lfree, iwsp, iexph, ns, iflag )
4522       iexph = ifree + iexph - 1
4523       iphih = iexph + mbrkdwn*mx
4524       nscale = nscale + ns
4525       wsp(iphih+mbrkdwn)   = hj1j*wsp(iphih+mx+mbrkdwn-1)
4526       wsp(iphih+mbrkdwn+1) = hj1j*wsp(iphih+2*mx+mbrkdwn-1)
4527
4528  402  continue
4529 *
4530 *---  error estimate ...
4531 *
4532       if ( k1.eq.0 ) then
4533          err_loc = tol
4534       else
4535          p1 = ABS( wsp(iphih+m) )   * beta
4536          p2 = ABS( wsp(iphih+m+1) ) * beta * avnorm
4537          if ( p1.gt.10.0d0*p2 ) then
4538             err_loc = p2
4539             xm = 1.0d0/DBLE( m+1 )
4540          elseif ( p1.gt.p2 ) then
4541             err_loc = (p1*p2)/(p1-p2)
4542             xm = 1.0d0/DBLE( m+1 )
4543          else
4544             err_loc = p1
4545             xm = 1.0d0/DBLE( m )
4546          endif
4547       endif
4548 *
4549 *---  reject the step-size if the error is not acceptable ...
4550 *
4551       if ( (k1.ne.0) .and. (err_loc.gt.delta*t_step*tol) .and.
4552      .     (mxreject.eq.0 .or. ireject.lt.mxreject) ) then
4553          t_old = t_step
4554          t_step = gamma * t_step * (t_step*tol/err_loc)**xm
4555          p1 = 10.0d0**(NINT( LOG10( t_step )-SQR1 )-1)
4556          t_step = AINT( t_step/p1 + 0.55d0 ) * p1
4557          if ( itrace.ne.0 ) then
4558             print*,'t_step =',t_old
4559             print*,'err_loc =',err_loc
4560             print*,'err_required =',delta*t_old*tol
4561             print*,'stepsize rejected, stepping down to:',t_step
4562          endif
4563          ireject = ireject + 1
4564          nreject = nreject + 1
4565          if ( mxreject.ne.0 .and. ireject.gt.mxreject ) then
4566             print*,"Failure in ZHPHIV: ---"
4567             print*,"The requested tolerance is too high."
4568             Print*,"Rerun with a smaller value."
4569             iflag = 2
4570             return
4571          endif
4572          goto 401
4573       endif
4574 *
4575       mx = mbrkdwn + MAX( 0,k1-2 )
4576       hjj = CMPLX( beta )
4577       call ZGEMV( 'n', n,mx,hjj,wsp(iv),n,wsp(iphih),1,ONE,w,1 )
4578 *
4579 *---  suggested value for the next stepsize ...
4580 *
4581       t_new = gamma * t_step * (t_step*tol/err_loc)**xm
4582       p1 = 10.0d0**(NINT( LOG10( t_new )-SQR1 )-1)
4583       t_new = AINT( t_new/p1 + 0.55d0 ) * p1
4584
4585       err_loc = MAX( err_loc,rndoff )
4586 *
4587 *---  update the time covered ...
4588 *
4589       t_now = t_now + t_step
4590 *
4591 *---  display and keep some information ...
4592 *
4593       if ( itrace.ne.0 ) then
4594          print*,'integration',nstep,'---------------------------------'
4595          print*,'scale-square =',ns
4596          print*,'step_size =',t_step
4597          print*,'err_loc   =',err_loc
4598          print*,'next_step =',t_new
4599       endif
4600
4601       step_min = MIN( step_min, t_step )
4602       step_max = MAX( step_max, t_step )
4603       s_error = s_error + err_loc
4604       x_error = MAX( x_error, err_loc )
4605
4606       if ( mxstep.eq.0 .or. nstep.lt.mxstep ) goto 100
4607       iflag = 1
4608
4609  500  continue
4610
4611       iwsp(1) = nmult
4612       iwsp(2) = nexph
4613       iwsp(3) = nscale
4614       iwsp(4) = nstep
4615       iwsp(5) = nreject
4616       iwsp(6) = ibrkflag
4617       iwsp(7) = mbrkdwn
4618
4619       wsp(1)  = CMPLX( step_min )
4620       wsp(2)  = CMPLX( step_max )
4621       wsp(3)  = CMPLX( 0.0d0 )
4622       wsp(4)  = CMPLX( 0.0d0 )
4623       wsp(5)  = CMPLX( x_error )
4624       wsp(6)  = CMPLX( s_error )
4625       wsp(7)  = CMPLX( tbrkdwn )
4626       wsp(8)  = CMPLX( sgn*t_now )
4627       END
4628 *----------------------------------------------------------------------|