src/ include/ test/ fig/ objs/
[sddekit.git] / src / expokit.f
blob863dbd06e6a81916cd21de7c75d889f54aae874f
1 ! from expokit, free for non-commercial use
3 *----------------------------------------------------------------------|
4 subroutine DMEXPV( n, m, t, v, w, tol, anorm,
5 . wsp,lwsp, iwsp,liwsp, matvec, itrace,iflag )
7 implicit none
8 integer n, m, lwsp, liwsp, itrace, iflag, iwsp(liwsp)
9 double precision t, tol, anorm, v(n), w(n), wsp(lwsp)
10 external matvec
12 *-----Purpose----------------------------------------------------------|
14 *--- DMEXPV computes w = exp(t*A)*v - Customised for MARKOV CHAINS.
16 * It does not compute the matrix exponential in isolation but
17 * instead, it computes directly the action of the exponential
18 * operator on the operand vector. This way of doing so allows
19 * for addressing large sparse problems.
21 * The method used is based on Krylov subspace projection
22 * techniques and the matrix under consideration interacts only
23 * via the external routine `matvec' performing the matrix-vector
24 * product (matrix-free method).
26 * This is a customised version for Markov Chains. This means that a
27 * check is done within this code to ensure that the resulting vector
28 * w is a probability vector, i.e., w must have all its components
29 * in [0,1], with sum equal to 1. This check is done at some expense
30 * and the user may try DGEXPV which is cheaper since it ignores
31 * probability constraints.
33 * IMPORTANT: The check assumes that the transition rate matrix Q
34 * satisfies Qe = 0, where e=(1,...,1)'. Don't use DMEXPV
35 * if this condition does not hold. Use DGEXPV instead.
36 * DMEXPV/DGEXPV require the matrix-vector product
37 * y = A*x = Q'*x, i.e, the TRANSPOSE of Q times a vector.
38 * Failure to remember this leads to wrong results.
40 *-----Arguments--------------------------------------------------------|
42 * n : (input) order of the principal matrix A.
44 * m : (input) maximum size for the Krylov basis.
46 * t : (input) time at wich the solution is needed (can be < 0).
48 * v(n) : (input) given operand vector.
50 * w(n) : (output) computed approximation of exp(t*A)*v.
52 * tol : (input/output) the requested acurracy tolerance on w.
53 * If on input tol=0.0d0 or tol is too small (tol.le.eps)
54 * the internal value sqrt(eps) is used, and tol is set to
55 * sqrt(eps) on output (`eps' denotes the machine epsilon).
56 * (`Happy breakdown' is assumed if h(j+1,j) .le. anorm*tol)
58 * anorm : (input) an approximation of some norm of A.
60 * wsp(lwsp): (workspace) lwsp .ge. n*(m+1)+n+(m+2)^2+4*(m+2)^2+ideg+1
61 * +---------+-------+---------------+
62 * (actually, ideg=6) V H wsp for PADE
64 * iwsp(liwsp): (workspace) liwsp .ge. m+2
66 * matvec : external subroutine for matrix-vector multiplication.
67 * synopsis: matvec( x, y )
68 * double precision x(*), y(*)
69 * computes: y(1:n) <- A*x(1:n)
70 * where A is the principal matrix.
72 * IMPORTANT: DMEXPV requires the product y = Ax = Q'x, i.e.
73 * the TRANSPOSE of the transition rate matrix.
75 * itrace : (input) running mode. 0=silent, 1=print step-by-step info
77 * iflag : (output) exit flag.
78 * <0 - bad input arguments
79 * 0 - no problem
80 * 1 - maximum number of steps reached without convergence
81 * 2 - requested tolerance was too high
83 *-----Accounts on the computation--------------------------------------|
84 * Upon exit, an interested user may retrieve accounts on the
85 * computations. They are located in the workspace arrays wsp and
86 * iwsp as indicated below:
88 * location mnemonic description
89 * -----------------------------------------------------------------|
90 * iwsp(1) = nmult, number of matrix-vector multiplications used
91 * iwsp(2) = nexph, number of Hessenberg matrix exponential evaluated
92 * iwsp(3) = nscale, number of repeated squaring involved in Pade
93 * iwsp(4) = nstep, number of integration steps used up to completion
94 * iwsp(5) = nreject, number of rejected step-sizes
95 * iwsp(6) = ibrkflag, set to 1 if `happy breakdown' and 0 otherwise
96 * iwsp(7) = mbrkdwn, if `happy brkdown', basis-size when it occured
97 * -----------------------------------------------------------------|
98 * wsp(1) = step_min, minimum step-size used during integration
99 * wsp(2) = step_max, maximum step-size used during integration
100 * wsp(3) = x_round, maximum among all roundoff errors (lower bound)
101 * wsp(4) = s_round, sum of roundoff errors (lower bound)
102 * wsp(5) = x_error, maximum among all local truncation errors
103 * wsp(6) = s_error, global sum of local truncation errors
104 * wsp(7) = tbrkdwn, if `happy breakdown', time when it occured
105 * wsp(8) = t_now, integration domain successfully covered
106 * wsp(9) = hump, i.e., max||exp(sA)||, s in [0,t] (or [t,0] if t<0)
107 * wsp(10) = ||w||/||v||, scaled norm of the solution w.
108 * -----------------------------------------------------------------|
109 * The `hump' is a measure of the conditioning of the problem. The
110 * matrix exponential is well-conditioned if hump = 1, whereas it is
111 * poorly-conditioned if hump >> 1. However the solution can still be
112 * relatively fairly accurate even when the hump is large (the hump
113 * is an upper bound), especially when the hump and the scaled norm
114 * of w [this is also computed and returned in wsp(10)] are of the
115 * same order of magnitude (further details in reference below).
116 * Markov chains are usually well-conditioned problems.
118 *----------------------------------------------------------------------|
119 *-----The following parameters may also be adjusted herein-------------|
121 integer mxstep, mxreject, ideg
122 double precision delta, gamma
123 parameter( mxstep = 500,
124 . mxreject = 0,
125 . ideg = 6,
126 . delta = 1.2d0,
127 . gamma = 0.9d0 )
129 * mxstep : maximum allowable number of integration steps.
130 * The value 0 means an infinite number of steps.
132 * mxreject: maximum allowable number of rejections at each step.
133 * The value 0 means an infinite number of rejections.
135 * ideg : the Pade approximation of type (ideg,ideg) is used as
136 * an approximation to exp(H). The value 0 switches to the
137 * uniform rational Chebyshev approximation of type (14,14)
139 * delta : local truncation error `safety factor'
141 * gamma : stepsize `shrinking factor'
143 *----------------------------------------------------------------------|
144 * Roger B. Sidje (rbs@maths.uq.edu.au)
145 * EXPOKIT: Software Package for Computing Matrix Exponentials.
146 * ACM - Transactions On Mathematical Software, 24(1):130-156, 1998
147 *----------------------------------------------------------------------|
149 integer i, j, k1, mh, mx, iv, ih, j1v, ns, ifree, lfree, iexph,
150 . ireject,ibrkflag,mbrkdwn, nmult, nreject, nexph, nscale,
151 . nstep
152 double precision sgn, t_out, tbrkdwn, step_min,step_max, err_loc,
153 . s_error, x_error, t_now, t_new, t_step, t_old,
154 . xm, beta, break_tol, p1, p2, p3, eps, rndoff,
155 . vnorm, avnorm, hj1j, hij, hump, SQR1,
156 . roundoff, s_round, x_round
158 intrinsic AINT,ABS,DBLE,LOG10,MAX,MIN,NINT,SIGN,SQRT
159 double precision DDOT, DNRM2, DASUM
161 *--- check restrictions on input parameters ...
162 iflag = 0
163 if ( lwsp.lt.n*(m+2)+5*(m+2)**2+ideg+1 ) iflag = -1
164 if ( liwsp.lt.m+2 ) iflag = -2
165 if ( m.ge.n .or. m.le.0 ) iflag = -3
166 if ( iflag.ne.0 ) stop 'bad sizes (in input of DMEXPV)'
168 *--- initialisations ...
170 k1 = 2
171 mh = m + 2
172 iv = 1
173 ih = iv + n*(m+1) + n
174 ifree = ih + mh*mh
175 lfree = lwsp - ifree + 1
177 ibrkflag = 0
178 mbrkdwn = m
179 nmult = 0
180 nreject = 0
181 nexph = 0
182 nscale = 0
184 sgn = SIGN( 1.0d0,t )
185 t_out = ABS( t )
186 tbrkdwn = 0.0d0
187 step_min = t_out
188 step_max = 0.0d0
189 nstep = 0
190 s_error = 0.0d0
191 s_round = 0.0d0
192 x_error = 0.0d0
193 x_round = 0.0d0
194 t_now = 0.0d0
195 t_new = 0.0d0
197 p1 = 4.0d0/3.0d0
198 1 p2 = p1 - 1.0d0
199 p3 = p2 + p2 + p2
200 eps = ABS( p3-1.0d0 )
201 if ( eps.eq.0.0d0 ) go to 1
202 if ( tol.le.eps ) tol = SQRT( eps )
203 rndoff = eps*anorm
205 break_tol = 1.0d-7
206 *>>> break_tol = tol
207 *>>> break_tol = anorm*tol
209 call DCOPY( n, v,1, w,1 )
210 beta = DNRM2( n, w,1 )
211 vnorm = beta
212 hump = beta
214 *--- obtain the very first stepsize ...
216 SQR1 = SQRT( 0.1d0 )
217 xm = 1.0d0/DBLE( m )
218 p1 = tol*(((m+1)/2.72D0)**(m+1))*SQRT(2.0D0*3.14D0*(m+1))
219 t_new = (1.0d0/anorm)*(p1/(4.0d0*beta*anorm))**xm
220 p1 = 10.0d0**(NINT( LOG10( t_new )-SQR1 )-1)
221 t_new = AINT( t_new/p1 + 0.55d0 ) * p1
223 *--- step-by-step integration ...
225 100 if ( t_now.ge.t_out ) goto 500
227 nstep = nstep + 1
228 t_step = MIN( t_out-t_now, t_new )
230 p1 = 1.0d0/beta
231 do i = 1,n
232 wsp(iv + i-1) = p1*w(i)
233 enddo
234 do i = 1,mh*mh
235 wsp(ih+i-1) = 0.0d0
236 enddo
238 *--- Arnoldi loop ...
240 j1v = iv + n
241 do 200 j = 1,m
242 nmult = nmult + 1
243 call matvec( wsp(j1v-n), wsp(j1v) )
244 do i = 1,j
245 hij = DDOT( n, wsp(iv+(i-1)*n),1, wsp(j1v),1 )
246 call DAXPY( n, -hij, wsp(iv+(i-1)*n),1, wsp(j1v),1 )
247 wsp(ih+(j-1)*mh+i-1) = hij
248 enddo
249 hj1j = DNRM2( n, wsp(j1v),1 )
250 *--- if `happy breakdown' go straightforward at the end ...
251 if ( hj1j.le.break_tol ) then
252 print*,'happy breakdown: mbrkdwn =',j,' h =',hj1j
253 k1 = 0
254 ibrkflag = 1
255 mbrkdwn = j
256 tbrkdwn = t_now
257 t_step = t_out-t_now
258 goto 300
259 endif
260 wsp(ih+(j-1)*mh+j) = hj1j
261 call DSCAL( n, 1.0d0/hj1j, wsp(j1v),1 )
262 j1v = j1v + n
263 200 continue
264 nmult = nmult + 1
265 call matvec( wsp(j1v-n), wsp(j1v) )
266 avnorm = DNRM2( n, wsp(j1v),1 )
268 *--- set 1 for the 2-corrected scheme ...
270 300 continue
271 wsp(ih+m*mh+m+1) = 1.0d0
273 *--- loop while ireject<mxreject until the tolerance is reached ...
275 ireject = 0
276 401 continue
279 *--- compute w = beta*V*exp(t_step*H)*e1 ..
281 nexph = nexph + 1
282 mx = mbrkdwn + k1
283 if ( ideg.ne.0 ) then
284 *--- irreducible rational Pade approximation ...
285 call DGPADM( ideg, mx, sgn*t_step, wsp(ih),mh,
286 . wsp(ifree),lfree, iwsp, iexph, ns, iflag )
287 iexph = ifree + iexph - 1
288 nscale = nscale + ns
289 else
290 *--- uniform rational Chebyshev approximation ...
291 iexph = ifree
292 do i = 1,mx
293 wsp(iexph+i-1) = 0.0d0
294 enddo
295 wsp(iexph) = 1.0d0
296 call DNCHBV(mx,sgn*t_step,wsp(ih),mh,wsp(iexph),wsp(ifree+mx))
297 endif
299 402 continue
301 *--- error estimate ...
303 if ( k1.eq.0 ) then
304 err_loc = tol
305 else
306 p1 = ABS( wsp(iexph+m) ) * beta
307 p2 = ABS( wsp(iexph+m+1) ) * beta * avnorm
308 if ( p1.gt.10.0d0*p2 ) then
309 err_loc = p2
310 xm = 1.0d0/DBLE( m )
311 elseif ( p1.gt.p2 ) then
312 err_loc = (p1*p2)/(p1-p2)
313 xm = 1.0d0/DBLE( m )
314 else
315 err_loc = p1
316 xm = 1.0d0/DBLE( m-1 )
317 endif
318 endif
320 *--- reject the step-size if the error is not acceptable ...
322 if ( (k1.ne.0) .and. (err_loc.gt.delta*t_step*tol) .and.
323 . (mxreject.eq.0 .or. ireject.lt.mxreject) ) then
324 t_old = t_step
325 t_step = gamma * t_step * (t_step*tol/err_loc)**xm
326 p1 = 10.0d0**(NINT( LOG10( t_step )-SQR1 )-1)
327 t_step = AINT( t_step/p1 + 0.55d0 ) * p1
328 if ( itrace.ne.0 ) then
329 print*,'t_step =',t_old
330 print*,'err_loc =',err_loc
331 print*,'err_required =',delta*t_old*tol
332 print*,'stepsize rejected, stepping down to:',t_step
333 endif
334 ireject = ireject + 1
335 nreject = nreject + 1
336 if ( mxreject.ne.0 .and. ireject.gt.mxreject ) then
337 print*,"Failure in DMEXPV: ---"
338 print*,"The requested tolerance is too high."
339 Print*,"Rerun with a smaller value."
340 iflag = 2
341 return
342 endif
343 goto 401
344 endif
346 *--- now update w = beta*V*exp(t_step*H)*e1 and the hump ...
348 mx = mbrkdwn + MAX( 0,k1-1 )
349 call DGEMV( 'n', n,mx,beta,wsp(iv),n,wsp(iexph),1,0.0d0,w,1 )
350 beta = DNRM2( n, w,1 )
351 hump = MAX( hump, beta )
353 *--- Markov model constraints ...
355 j = 0
356 do i = 1,n
357 if ( w(i).lt.0.0d0 ) then
358 w(i) = 0.0d0
359 j = j + 1
360 endif
361 enddo
362 p1 = DASUM( n, w,1 )
363 if ( j.gt.0 ) call DSCAL( n, 1.0d0/p1, w,1 )
364 roundoff = DABS( 1.0d0-p1 ) / DBLE( n )
366 *--- suggested value for the next stepsize ...
368 t_new = gamma * t_step * (t_step*tol/err_loc)**xm
369 p1 = 10.0d0**(NINT( LOG10( t_new )-SQR1 )-1)
370 t_new = AINT( t_new/p1 + 0.55d0 ) * p1
372 err_loc = MAX( err_loc, roundoff )
373 err_loc = MAX( err_loc, rndoff )
375 *--- update the time covered ...
377 t_now = t_now + t_step
379 *--- display and keep some information ...
381 if ( itrace.ne.0 ) then
382 print*,'integration',nstep,'---------------------------------'
383 print*,'scale-square =',ns
384 print*,'wnorm =',beta
385 print*,'step_size =',t_step
386 print*,'err_loc =',err_loc
387 print*,'roundoff =',roundoff
388 print*,'next_step =',t_new
389 endif
391 step_min = MIN( step_min, t_step )
392 step_max = MAX( step_max, t_step )
393 s_error = s_error + err_loc
394 s_round = s_round + roundoff
395 x_error = MAX( x_error, err_loc )
396 x_round = MAX( x_round, roundoff )
398 if ( mxstep.eq.0 .or. nstep.lt.mxstep ) goto 100
399 iflag = 1
401 500 continue
403 iwsp(1) = nmult
404 iwsp(2) = nexph
405 iwsp(3) = nscale
406 iwsp(4) = nstep
407 iwsp(5) = nreject
408 iwsp(6) = ibrkflag
409 iwsp(7) = mbrkdwn
411 wsp(1) = step_min
412 wsp(2) = step_max
413 wsp(3) = x_round
414 wsp(4) = s_round
415 wsp(5) = x_error
416 wsp(6) = s_error
417 wsp(7) = tbrkdwn
418 wsp(8) = sgn*t_now
419 wsp(9) = hump/vnorm
420 wsp(10) = beta/vnorm
422 *----------------------------------------------------------------------|
423 *----------------------------------------------------------------------|
424 subroutine DGPADM( ideg,m,t,H,ldh,wsp,lwsp,ipiv,iexph,ns,iflag )
426 implicit none
427 integer ideg, m, ldh, lwsp, iexph, ns, iflag, ipiv(m)
428 double precision t, H(ldh,m), wsp(lwsp)
430 *-----Purpose----------------------------------------------------------|
432 * Computes exp(t*H), the matrix exponential of a general matrix in
433 * full, using the irreducible rational Pade approximation to the
434 * exponential function exp(x) = r(x) = (+/-)( I + 2*(q(x)/p(x)) ),
435 * combined with scaling-and-squaring.
437 *-----Arguments--------------------------------------------------------|
439 * ideg : (input) the degre of the diagonal Pade to be used.
440 * a value of 6 is generally satisfactory.
442 * m : (input) order of H.
444 * H(ldh,m) : (input) argument matrix.
446 * t : (input) time-scale (can be < 0).
448 * wsp(lwsp) : (workspace/output) lwsp .ge. 4*m*m+ideg+1.
450 * ipiv(m) : (workspace)
452 *>>>> iexph : (output) number such that wsp(iexph) points to exp(tH)
453 * i.e., exp(tH) is located at wsp(iexph ... iexph+m*m-1)
454 * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
455 * NOTE: if the routine was called with wsp(iptr),
456 * then exp(tH) will start at wsp(iptr+iexph-1).
458 * ns : (output) number of scaling-squaring used.
460 * iflag : (output) exit flag.
461 * 0 - no problem
462 * <0 - problem
464 *----------------------------------------------------------------------|
465 * Roger B. Sidje (rbs@maths.uq.edu.au)
466 * EXPOKIT: Software Package for Computing Matrix Exponentials.
467 * ACM - Transactions On Mathematical Software, 24(1):130-156, 1998
468 *----------------------------------------------------------------------|
470 integer mm,i,j,k,ih2,ip,iq,iused,ifree,iodd,icoef,iput,iget
471 double precision hnorm,scale,scale2,cp,cq
473 intrinsic INT,ABS,DBLE,LOG,MAX
475 *--- check restrictions on input parameters ...
476 mm = m*m
477 iflag = 0
478 if ( ldh.lt.m ) iflag = -1
479 if ( lwsp.lt.4*mm+ideg+1 ) iflag = -2
480 if ( iflag.ne.0 ) stop 'bad sizes (in input of DGPADM)'
482 *--- initialise pointers ...
484 icoef = 1
485 ih2 = icoef + (ideg+1)
486 ip = ih2 + mm
487 iq = ip + mm
488 ifree = iq + mm
490 *--- scaling: seek ns such that ||t*H/2^ns|| < 1/2;
491 * and set scale = t/2^ns ...
493 do i = 1,m
494 wsp(i) = 0.0d0
495 enddo
496 do j = 1,m
497 do i = 1,m
498 wsp(i) = wsp(i) + ABS( H(i,j) )
499 enddo
500 enddo
501 hnorm = 0.0d0
502 do i = 1,m
503 hnorm = MAX( hnorm,wsp(i) )
504 enddo
505 hnorm = ABS( t*hnorm )
506 if ( hnorm.eq.0.0d0 ) stop 'Error - null H in input of DGPADM.'
507 ns = MAX( 0,INT(LOG(hnorm)/LOG(2.0d0))+2 )
508 scale = t / DBLE(2**ns)
509 scale2 = scale*scale
511 *--- compute Pade coefficients ...
513 i = ideg+1
514 j = 2*ideg+1
515 wsp(icoef) = 1.0d0
516 do k = 1,ideg
517 wsp(icoef+k) = (wsp(icoef+k-1)*DBLE( i-k ))/DBLE( k*(j-k) )
518 enddo
520 *--- H2 = scale2*H*H ...
522 call DGEMM( 'n','n',m,m,m,scale2,H,ldh,H,ldh,0.0d0,wsp(ih2),m )
524 *--- initialize p (numerator) and q (denominator) ...
526 cp = wsp(icoef+ideg-1)
527 cq = wsp(icoef+ideg)
528 do j = 1,m
529 do i = 1,m
530 wsp(ip + (j-1)*m + i-1) = 0.0d0
531 wsp(iq + (j-1)*m + i-1) = 0.0d0
532 enddo
533 wsp(ip + (j-1)*(m+1)) = cp
534 wsp(iq + (j-1)*(m+1)) = cq
535 enddo
537 *--- Apply Horner rule ...
539 iodd = 1
540 k = ideg - 1
541 100 continue
542 iused = iodd*iq + (1-iodd)*ip
543 call DGEMM( 'n','n',m,m,m, 1.0d0,wsp(iused),m,
544 . wsp(ih2),m, 0.0d0,wsp(ifree),m )
545 do j = 1,m
546 wsp(ifree+(j-1)*(m+1)) = wsp(ifree+(j-1)*(m+1))+wsp(icoef+k-1)
547 enddo
548 ip = (1-iodd)*ifree + iodd*ip
549 iq = iodd*ifree + (1-iodd)*iq
550 ifree = iused
551 iodd = 1-iodd
552 k = k-1
553 if ( k.gt.0 ) goto 100
555 *--- Obtain (+/-)(I + 2*(p\q)) ...
557 if ( iodd .eq. 1 ) then
558 call DGEMM( 'n','n',m,m,m, scale,wsp(iq),m,
559 . H,ldh, 0.0d0,wsp(ifree),m )
560 iq = ifree
561 else
562 call DGEMM( 'n','n',m,m,m, scale,wsp(ip),m,
563 . H,ldh, 0.0d0,wsp(ifree),m )
564 ip = ifree
565 endif
566 call DAXPY( mm, -1.0d0,wsp(ip),1, wsp(iq),1 )
567 call DGESV( m,m, wsp(iq),m, ipiv, wsp(ip),m, iflag )
568 if ( iflag.ne.0 ) stop 'Problem in DGESV (within DGPADM)'
569 call DSCAL( mm, 2.0d0, wsp(ip), 1 )
570 do j = 1,m
571 wsp(ip+(j-1)*(m+1)) = wsp(ip+(j-1)*(m+1)) + 1.0d0
572 enddo
573 iput = ip
574 if ( ns.eq.0 .and. iodd.eq.1 ) then
575 call DSCAL( mm, -1.0d0, wsp(ip), 1 )
576 goto 200
577 endif
579 *-- squaring : exp(t*H) = (exp(t*H))^(2^ns) ...
581 iodd = 1
582 do k = 1,ns
583 iget = iodd*ip + (1-iodd)*iq
584 iput = (1-iodd)*ip + iodd*iq
585 call DGEMM( 'n','n',m,m,m, 1.0d0,wsp(iget),m, wsp(iget),m,
586 . 0.0d0,wsp(iput),m )
587 iodd = 1-iodd
588 enddo
589 200 continue
590 iexph = iput
592 *----------------------------------------------------------------------|
593 *----------------------------------------------------------------------|
594 subroutine DSPADM( ideg,m,t,H,ldh,wsp,lwsp,ipiv,iexph,ns,iflag )
596 implicit none
597 integer ideg, m, ldh, lwsp, iexph, ns, iflag, ipiv(m)
598 double precision t, H(ldh,m), wsp(lwsp)
600 *-----Purpose----------------------------------------------------------|
602 * Computes exp(t*H), the matrix exponential of a symmetric matrix
603 * in full, using the irreducible rational Pade approximation to the
604 * exponential function exp(x) = r(x) = (+/-)( I + 2*(q(x)/p(x)) ),
605 * combined with scaling-and-squaring.
607 *-----Arguments--------------------------------------------------------|
609 * ideg : (input) the degre of the diagonal Pade to be used.
610 * a value of 6 is generally satisfactory.
612 * m : (input) order of H.
614 * H(ldh,m) : (input) argument matrix (both lower and upper parts).
616 * t : (input) time-scale (can be < 0).
618 * wsp(lwsp) : (workspace/output) lwsp .ge. 4*m*m+ideg+1.
620 * ipiv(m) : (workspace)
622 *>>>> iexph : (output) number such that wsp(iexph) points to exp(tH)
623 * i.e., exp(tH) is located at wsp(iexph ... iexph+m*m-1)
624 * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
625 * NOTE: if the routine was called with wsp(iptr),
626 * then exp(tH) will start at wsp(iptr+iexph-1).
628 * ns : (output) number of scaling-squaring used.
630 * iflag : (output) exit flag.
631 * 0 - no problem
632 * <0 - problem
634 *----------------------------------------------------------------------|
635 * Roger B. Sidje (rbs@maths.uq.edu.au)
636 * EXPOKIT: Software Package for Computing Matrix Exponentials.
637 * ACM - Transactions On Mathematical Software, 24(1):130-156, 1998
638 *----------------------------------------------------------------------|
640 integer mm,i,j,k,ih2,ip,iq,iused,ifree,iodd,icoef,iput,iget
641 double precision hnorm,scale,scale2,cp,cq
643 intrinsic INT,ABS,DBLE,LOG,MAX
645 *--- check restrictions on input parameters ...
646 mm = m*m
647 iflag = 0
648 if ( ldh.lt.m ) iflag = -1
649 if ( lwsp.lt.4*mm+ideg+1 ) iflag = -2
650 if ( iflag.ne.0 ) stop 'bad sizes (in input of DSPADM)'
652 *--- initialise pointers ...
654 icoef = 1
655 ih2 = icoef + (ideg+1)
656 ip = ih2 + mm
657 iq = ip + mm
658 ifree = iq + mm
660 *--- scaling: seek ns such that ||t*H/2^ns|| < 1/2;
661 * and set scale = t/2^ns ...
663 do i = 1,m
664 wsp(i) = 0.0d0
665 enddo
666 do j = 1,m
667 do i = 1,m
668 wsp(i) = wsp(i) + ABS( H(i,j) )
669 enddo
670 enddo
671 hnorm = 0.0d0
672 do i = 1,m
673 hnorm = MAX( hnorm,wsp(i) )
674 enddo
675 hnorm = ABS( t*hnorm )
676 if ( hnorm.eq.0.0d0 ) stop 'Error - null H in input of DSPADM.'
677 ns = MAX( 0,INT(LOG(hnorm)/LOG(2.0d0))+2 )
678 scale = t / DBLE(2**ns)
679 scale2 = scale*scale
681 *--- compute Pade coefficients ...
683 i = ideg+1
684 j = 2*ideg+1
685 wsp(icoef) = 1.0d0
686 do k = 1,ideg
687 wsp(icoef+k) = (wsp(icoef+k-1)*DBLE( i-k ))/DBLE( k*(j-k) )
688 enddo
690 *--- H2 = scale2*H*H ...
692 call DGEMM( 'n','n',m,m,m,scale2,H,ldh,H,ldh,0.0d0,wsp(ih2),m )
694 *--- initialize p (numerator) and q (denominator) ...
696 cp = wsp(icoef+ideg-1)
697 cq = wsp(icoef+ideg)
698 do j = 1,m
699 do i = 1,m
700 wsp(ip + (j-1)*m + i-1) = 0.0d0
701 wsp(iq + (j-1)*m + i-1) = 0.0d0
702 enddo
703 wsp(ip + (j-1)*(m+1)) = cp
704 wsp(iq + (j-1)*(m+1)) = cq
705 enddo
707 *--- Apply Horner rule ...
709 iodd = 1
710 k = ideg - 1
711 100 continue
712 iused = iodd*iq + (1-iodd)*ip
713 call DGEMM( 'n','n',m,m,m, 1.0d0,wsp(iused),m,
714 . wsp(ih2),m, 0.0d0,wsp(ifree),m )
715 do j = 1,m
716 wsp(ifree+(j-1)*(m+1)) = wsp(ifree+(j-1)*(m+1))+wsp(icoef+k-1)
717 enddo
718 ip = (1-iodd)*ifree + iodd*ip
719 iq = iodd*ifree + (1-iodd)*iq
720 ifree = iused
721 iodd = 1-iodd
722 k = k-1
723 if ( k.gt.0 ) goto 100
725 *--- Obtain (+/-)(I + 2*(p\q)) ...
727 if ( iodd .eq. 1 ) then
728 call DGEMM( 'n','n',m,m,m, scale,wsp(iq),m,
729 . H,ldh, 0.0d0,wsp(ifree),m )
730 iq = ifree
731 else
732 call DGEMM( 'n','n',m,m,m, scale,wsp(ip),m,
733 . H,ldh, 0.0d0,wsp(ifree),m )
734 ip = ifree
735 endif
736 call DAXPY( mm, -1.0d0,wsp(ip),1, wsp(iq),1 )
737 call DSYSV( 'U',m,m,wsp(iq),m,ipiv,wsp(ip),m,wsp(ih2),mm,iflag )
738 if ( iflag.ne.0 ) stop 'Problem in DSYSV (within DSPADM)'
739 call DSCAL( mm, 2.0d0, wsp(ip), 1 )
740 do j = 1,m
741 wsp(ip+(j-1)*(m+1)) = wsp(ip+(j-1)*(m+1)) + 1.0d0
742 enddo
743 iput = ip
744 if ( ns.eq.0 .and. iodd.eq.1 ) then
745 call DSCAL( mm, -1.0d0, wsp(ip), 1 )
746 goto 200
747 endif
749 *-- squaring : exp(t*H) = (exp(t*H))^(2^ns) ...
751 iodd = 1
752 do k = 1,ns
753 iget = iodd*ip + (1-iodd)*iq
754 iput = (1-iodd)*ip + iodd*iq
755 call DGEMM( 'n','n',m,m,m, 1.0d0,wsp(iget),m, wsp(iget),m,
756 . 0.0d0,wsp(iput),m )
757 iodd = 1-iodd
758 enddo
759 200 continue
760 iexph = iput
762 *----------------------------------------------------------------------|
763 *----------------------------------------------------------------------|
764 subroutine ZGPADM(ideg,m,t,H,ldh,wsp,lwsp,ipiv,iexph,ns,iflag)
766 implicit none
767 double precision t
768 integer ideg, m, ldh, lwsp, iexph, ns, iflag, ipiv(m)
769 complex*16 H(ldh,m), wsp(lwsp)
771 *-----Purpose----------------------------------------------------------|
773 * Computes exp(t*H), the matrix exponential of a general complex
774 * matrix in full, using the irreducible rational Pade approximation
775 * to the exponential exp(z) = r(z) = (+/-)( I + 2*(q(z)/p(z)) ),
776 * combined with scaling-and-squaring.
778 *-----Arguments--------------------------------------------------------|
780 * ideg : (input) the degre of the diagonal Pade to be used.
781 * a value of 6 is generally satisfactory.
783 * m : (input) order of H.
785 * H(ldh,m) : (input) argument matrix.
787 * t : (input) time-scale (can be < 0).
789 * wsp(lwsp) : (workspace/output) lwsp .ge. 4*m*m+ideg+1.
791 * ipiv(m) : (workspace)
793 *>>>> iexph : (output) number such that wsp(iexph) points to exp(tH)
794 * i.e., exp(tH) is located at wsp(iexph ... iexph+m*m-1)
795 * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
796 * NOTE: if the routine was called with wsp(iptr),
797 * then exp(tH) will start at wsp(iptr+iexph-1).
799 * ns : (output) number of scaling-squaring used.
801 * iflag : (output) exit flag.
802 * 0 - no problem
803 * <0 - problem
805 *----------------------------------------------------------------------|
806 * Roger B. Sidje (rbs@maths.uq.edu.au)
807 * EXPOKIT: Software Package for Computing Matrix Exponentials.
808 * ACM - Transactions On Mathematical Software, 24(1):130-156, 1998
809 *----------------------------------------------------------------------|
811 integer i,j,k,icoef,mm,ih2,iodd,iused,ifree,iq,ip,iput,iget
812 double precision hnorm
813 complex*16 cp, cq, scale, scale2, ZERO, ONE
815 parameter( ZERO=(0.0d0,0.0d0), ONE=(1.0d0,0.0d0) )
816 intrinsic ABS, CMPLX, DBLE, INT, LOG, MAX
818 *--- check restrictions on input parameters ...
819 mm = m*m
820 iflag = 0
821 if ( ldh.lt.m ) iflag = -1
822 if ( lwsp.lt.4*mm+ideg+1 ) iflag = -2
823 if ( iflag.ne.0 ) stop 'bad sizes (in input of ZGPADM)'
825 *--- initialise pointers ...
827 icoef = 1
828 ih2 = icoef + (ideg+1)
829 ip = ih2 + mm
830 iq = ip + mm
831 ifree = iq + mm
833 *--- scaling: seek ns such that ||t*H/2^ns|| < 1/2;
834 * and set scale = t/2^ns ...
836 do i = 1,m
837 wsp(i) = ZERO
838 enddo
839 do j = 1,m
840 do i = 1,m
841 wsp(i) = wsp(i) + ABS( H(i,j) )
842 enddo
843 enddo
844 hnorm = 0.0d0
845 do i = 1,m
846 hnorm = MAX( hnorm,DBLE(wsp(i)) )
847 enddo
848 hnorm = ABS( t*hnorm )
849 if ( hnorm.eq.0.0d0 ) stop 'Error - null H in input of ZGPADM.'
850 ns = MAX( 0,INT(LOG(hnorm)/LOG(2.0d0))+2 )
851 scale = CMPLX( t/DBLE(2**ns),0.0d0 )
852 scale2 = scale*scale
854 *--- compute Pade coefficients ...
856 i = ideg+1
857 j = 2*ideg+1
858 wsp(icoef) = ONE
859 do k = 1,ideg
860 wsp(icoef+k) = (wsp(icoef+k-1)*DBLE( i-k ))/DBLE( k*(j-k) )
861 enddo
863 *--- H2 = scale2*H*H ...
865 call ZGEMM( 'n','n',m,m,m,scale2,H,ldh,H,ldh,ZERO,wsp(ih2),m )
867 *--- initialise p (numerator) and q (denominator) ...
869 cp = wsp(icoef+ideg-1)
870 cq = wsp(icoef+ideg)
871 do j = 1,m
872 do i = 1,m
873 wsp(ip + (j-1)*m + i-1) = ZERO
874 wsp(iq + (j-1)*m + i-1) = ZERO
875 enddo
876 wsp(ip + (j-1)*(m+1)) = cp
877 wsp(iq + (j-1)*(m+1)) = cq
878 enddo
880 *--- Apply Horner rule ...
882 iodd = 1
883 k = ideg - 1
884 100 continue
885 iused = iodd*iq + (1-iodd)*ip
886 call ZGEMM( 'n','n',m,m,m, ONE,wsp(iused),m,
887 . wsp(ih2),m, ZERO,wsp(ifree),m )
888 do j = 1,m
889 wsp(ifree+(j-1)*(m+1)) = wsp(ifree+(j-1)*(m+1))+wsp(icoef+k-1)
890 enddo
891 ip = (1-iodd)*ifree + iodd*ip
892 iq = iodd*ifree + (1-iodd)*iq
893 ifree = iused
894 iodd = 1-iodd
895 k = k-1
896 if ( k.gt.0 ) goto 100
898 *--- Obtain (+/-)(I + 2*(p\q)) ...
900 if ( iodd.ne.0 ) then
901 call ZGEMM( 'n','n',m,m,m, scale,wsp(iq),m,
902 . H,ldh, ZERO,wsp(ifree),m )
903 iq = ifree
904 else
905 call ZGEMM( 'n','n',m,m,m, scale,wsp(ip),m,
906 . H,ldh, ZERO,wsp(ifree),m )
907 ip = ifree
908 endif
909 call ZAXPY( mm, -ONE,wsp(ip),1, wsp(iq),1 )
910 call ZGESV( m,m, wsp(iq),m, ipiv, wsp(ip),m, iflag )
911 if ( iflag.ne.0 ) stop 'Problem in ZGESV (within ZGPADM)'
912 call ZDSCAL( mm, 2.0d0, wsp(ip), 1 )
913 do j = 1,m
914 wsp(ip+(j-1)*(m+1)) = wsp(ip+(j-1)*(m+1)) + ONE
915 enddo
916 iput = ip
917 if ( ns.eq.0 .and. iodd.ne.0 ) then
918 call ZDSCAL( mm, -1.0d0, wsp(ip), 1 )
919 goto 200
920 endif
922 *-- squaring : exp(t*H) = (exp(t*H))^(2^ns) ...
924 iodd = 1
925 do k = 1,ns
926 iget = iodd*ip + (1-iodd)*iq
927 iput = (1-iodd)*ip + iodd*iq
928 call ZGEMM( 'n','n',m,m,m, ONE,wsp(iget),m, wsp(iget),m,
929 . ZERO,wsp(iput),m )
930 iodd = 1-iodd
931 enddo
932 200 continue
933 iexph = iput
935 *----------------------------------------------------------------------|
936 subroutine ZHPADM(ideg,m,t,H,ldh,wsp,lwsp,ipiv,iexph,ns,iflag)
938 implicit none
939 double precision t
940 integer ideg, m, ldh, lwsp, iexph, ns, iflag, ipiv(m)
941 complex*16 H(ldh,m), wsp(lwsp)
943 *-----Purpose----------------------------------------------------------|
945 * Computes exp(t*H), the matrix exponential of an Hermitian matrix
946 * in full, using the irreducible rational Pade approximation to the
947 * exponential function exp(z) = r(z) = (+/-)( I + 2*(q(z)/p(z)) ),
948 * combined with scaling-and-squaring.
950 *-----Arguments--------------------------------------------------------|
952 * ideg : (input) the degre of the diagonal Pade to be used.
953 * a value of 6 is generally satisfactory.
955 * m : (input) order of H.
957 * H(ldh,m) : (input) argument matrix (both lower and upper parts).
959 * t : (input) time-scale (can be < 0).
961 * wsp(lwsp) : (workspace/output) lwsp .ge. 4*m*m+ideg+1.
963 * ipiv(m) : (workspace)
965 *>>>> iexph : (output) number such that wsp(iexph) points to exp(tH)
966 * i.e., exp(tH) is located at wsp(iexph ... iexph+m*m-1)
967 * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
968 * NOTE: if the routine was called with wsp(iptr),
969 * then exp(tH) will start at wsp(iptr+iexph-1).
971 * ns : (output) number of scaling-squaring used.
973 * iflag : (output) exit flag.
974 * 0 - no problem
975 * <0 - problem
977 *----------------------------------------------------------------------|
978 * Roger B. Sidje (rbs@maths.uq.edu.au)
979 * EXPOKIT: Software Package for Computing Matrix Exponentials.
980 * ACM - Transactions On Mathematical Software, 24(1):130-156, 1998
981 *----------------------------------------------------------------------|
983 integer i,j,k,icoef,mm,ih2,iodd,iused,ifree,iq,ip,iput,iget
984 double precision hnorm
985 complex*16 cp, cq, scale, scale2, ZERO, ONE
987 parameter( ZERO=(0.0d0,0.0d0), ONE=(1.0d0,0.0d0) )
988 intrinsic ABS, CMPLX, DBLE, INT, LOG, MAX
990 *--- check restrictions on input parameters ...
991 mm = m*m
992 iflag = 0
993 if ( ldh.lt.m ) iflag = -1
994 if ( lwsp.lt.4*mm+ideg+1 ) iflag = -2
995 if ( iflag.ne.0 ) stop 'bad sizes (in input of ZHPADM)'
997 *--- initialise pointers ...
999 icoef = 1
1000 ih2 = icoef + (ideg+1)
1001 ip = ih2 + mm
1002 iq = ip + mm
1003 ifree = iq + mm
1005 *--- scaling: seek ns such that ||t*H/2^ns|| < 1/2;
1006 * and set scale = t/2^ns ...
1008 do i = 1,m
1009 wsp(i) = ZERO
1010 enddo
1011 do j = 1,m
1012 do i = 1,m
1013 wsp(i) = wsp(i) + ABS( H(i,j) )
1014 enddo
1015 enddo
1016 hnorm = 0.0d0
1017 do i = 1,m
1018 hnorm = MAX( hnorm,DBLE(wsp(i)) )
1019 enddo
1020 hnorm = ABS( t*hnorm )
1021 if ( hnorm.eq.0.0d0 ) stop 'Error - null H in input of ZHPADM.'
1022 ns = MAX( 0,INT(LOG(hnorm)/LOG(2.0d0))+2 )
1023 scale = CMPLX( t/DBLE(2**ns),0.0d0 )
1024 scale2 = scale*scale
1026 *--- compute Pade coefficients ...
1028 i = ideg+1
1029 j = 2*ideg+1
1030 wsp(icoef) = ONE
1031 do k = 1,ideg
1032 wsp(icoef+k) = (wsp(icoef+k-1)*DBLE( i-k ))/DBLE( k*(j-k) )
1033 enddo
1035 *--- H2 = scale2*H*H ...
1037 call ZGEMM( 'n','n',m,m,m,scale2,H,ldh,H,ldh,ZERO,wsp(ih2),m )
1039 *--- initialise p (numerator) and q (denominator) ...
1041 cp = wsp(icoef+ideg-1)
1042 cq = wsp(icoef+ideg)
1043 do j = 1,m
1044 do i = 1,m
1045 wsp(ip + (j-1)*m + i-1) = ZERO
1046 wsp(iq + (j-1)*m + i-1) = ZERO
1047 enddo
1048 wsp(ip + (j-1)*(m+1)) = cp
1049 wsp(iq + (j-1)*(m+1)) = cq
1050 enddo
1052 *--- Apply Horner rule ...
1054 iodd = 1
1055 k = ideg - 1
1056 100 continue
1057 iused = iodd*iq + (1-iodd)*ip
1058 call ZGEMM( 'n','n',m,m,m, ONE,wsp(iused),m,
1059 . wsp(ih2),m, ZERO,wsp(ifree),m )
1060 do j = 1,m
1061 wsp(ifree+(j-1)*(m+1)) = wsp(ifree+(j-1)*(m+1))+wsp(icoef+k-1)
1062 enddo
1063 ip = (1-iodd)*ifree + iodd*ip
1064 iq = iodd*ifree + (1-iodd)*iq
1065 ifree = iused
1066 iodd = 1-iodd
1067 k = k-1
1068 if ( k.gt.0 ) goto 100
1070 *--- Obtain (+/-)(I + 2*(p\q)) ...
1072 if ( iodd.ne.0 ) then
1073 call ZGEMM( 'n','n',m,m,m, scale,wsp(iq),m,
1074 . H,ldh, ZERO,wsp(ifree),m )
1075 iq = ifree
1076 else
1077 call ZGEMM( 'n','n',m,m,m, scale,wsp(ip),m,
1078 . H,ldh, ZERO,wsp(ifree),m )
1079 ip = ifree
1080 endif
1081 call ZAXPY( mm, -ONE,wsp(ip),1, wsp(iq),1 )
1082 call ZHESV( 'U',m,m,wsp(iq),m,ipiv,wsp(ip),m,wsp(ih2),mm,iflag )
1083 if ( iflag.ne.0 ) stop 'Problem in ZHESV (within ZHPADM)'
1084 call ZDSCAL( mm, 2.0d0, wsp(ip), 1 )
1085 do j = 1,m
1086 wsp(ip+(j-1)*(m+1)) = wsp(ip+(j-1)*(m+1)) + ONE
1087 enddo
1088 iput = ip
1089 if ( ns.eq.0 .and. iodd.ne.0 ) then
1090 call ZDSCAL( mm, -1.0d0, wsp(ip), 1 )
1091 goto 200
1092 endif
1094 *-- squaring : exp(t*H) = (exp(t*H))^(2^ns) ...
1096 iodd = 1
1097 do k = 1,ns
1098 iget = iodd*ip + (1-iodd)*iq
1099 iput = (1-iodd)*ip + iodd*iq
1100 call ZGEMM( 'n','n',m,m,m, ONE,wsp(iget),m, wsp(iget),m,
1101 . ZERO,wsp(iput),m )
1102 iodd = 1-iodd
1103 enddo
1104 200 continue
1105 iexph = iput
1107 *----------------------------------------------------------------------|
1108 subroutine DGCHBV( m, t, H,ldh, y, wsp, iwsp, iflag )
1110 implicit none
1111 integer m, ldh, iflag, iwsp(m)
1112 double precision t, H(ldh,m), y(m)
1113 complex*16 wsp(m*(m+2))
1115 *-----Purpose----------------------------------------------------------|
1117 *--- DGCHBV computes y = exp(t*H)*y using the partial fraction
1118 * expansion of the uniform rational Chebyshev approximation
1119 * to exp(-x) of type (14,14). H is a General matrix.
1120 * About 14-digit accuracy is expected if the matrix H is negative
1121 * definite. The algorithm may behave poorly otherwise.
1123 *-----Arguments--------------------------------------------------------|
1125 * m : (input) order of the matrix H
1127 * t : (input) time-scaling factor (can be < 0).
1129 * H(ldh,m): (input) argument matrix.
1131 * y(m) : (input/output) on input the operand vector,
1132 * on output the resulting vector exp(t*H)*y.
1134 * iwsp(m) : (workspace)
1136 * wsp : (workspace). Observe that a double precision vector of
1137 * length 2*m*(m+2) can be used as well when calling this
1138 * routine (thus avoiding an idle complex array elsewhere)
1140 *----------------------------------------------------------------------|
1141 * Roger B. Sidje (rbs@maths.uq.edu.au)
1142 * EXPOKIT: Software Package for Computing Matrix Exponentials.
1143 * ACM - Transactions On Mathematical Software, 24(1):130-156, 1998
1144 *----------------------------------------------------------------------|
1146 integer ndeg, i, j, ip, ih, iy, iz
1147 parameter ( ndeg=7 )
1148 double precision alpha0
1149 complex*16 alpha(ndeg), theta(ndeg)
1151 intrinsic DBLE
1153 *--- Pointers ...
1155 ih = 1
1156 iy = ih + m*m
1157 iz = iy + m
1159 *--- Coefficients and poles of the partial fraction expansion ...
1161 alpha0 = 0.183216998528140087D-11
1162 alpha(1)=( 0.557503973136501826D+02,-0.204295038779771857D+03)
1163 alpha(2)=(-0.938666838877006739D+02, 0.912874896775456363D+02)
1164 alpha(3)=( 0.469965415550370835D+02,-0.116167609985818103D+02)
1165 alpha(4)=(-0.961424200626061065D+01,-0.264195613880262669D+01)
1166 alpha(5)=( 0.752722063978321642D+00, 0.670367365566377770D+00)
1167 alpha(6)=(-0.188781253158648576D-01,-0.343696176445802414D-01)
1168 alpha(7)=( 0.143086431411801849D-03, 0.287221133228814096D-03)
1170 theta(1)=(-0.562314417475317895D+01, 0.119406921611247440D+01)
1171 theta(2)=(-0.508934679728216110D+01, 0.358882439228376881D+01)
1172 theta(3)=(-0.399337136365302569D+01, 0.600483209099604664D+01)
1173 theta(4)=(-0.226978543095856366D+01, 0.846173881758693369D+01)
1174 theta(5)=( 0.208756929753827868D+00, 0.109912615662209418D+02)
1175 theta(6)=( 0.370327340957595652D+01, 0.136563731924991884D+02)
1176 theta(7)=( 0.889777151877331107D+01, 0.166309842834712071D+02)
1178 *--- Accumulation of the contribution of each pole ...
1180 do j = 1,m
1181 wsp(iz+j-1) = y(j)
1182 y(j) = y(j)*alpha0
1183 enddo
1184 do ip = 1,ndeg
1185 *--- Solve each fraction using Gaussian elimination with pivoting...
1186 do j = 1,m
1187 do i = 1,m
1188 wsp(ih+(j-1)*m+i-1) = -t*H(i,j)
1189 enddo
1190 wsp(ih+(j-1)*m+j-1) = wsp(ih+(j-1)*m+j-1)-theta(ip)
1191 wsp(iy+j-1) = wsp(iz+j-1)
1192 enddo
1193 call ZGESV( M, 1, WSP(iH),M, IWSP, WSP(iY),M, IFLAG )
1194 if ( IFLAG.ne.0 ) stop 'Error in DGCHBV'
1195 *--- Accumulate the partial result in y ...
1196 do j = 1,m
1197 y(j) = y(j) + DBLE( alpha(ip)*wsp(iy+j-1) )
1198 enddo
1199 enddo
1201 *----------------------------------------------------------------------|
1202 *----------------------------------------------------------------------|
1203 subroutine DSCHBV( m, t, H,ldh, y, wsp, iwsp, iflag )
1205 implicit none
1206 integer m, ldh, iflag, iwsp(m)
1207 double precision t, H(ldh,m), y(m)
1208 complex*16 wsp(m*(m+2))
1210 *-----Purpose----------------------------------------------------------|
1212 *--- DSCHBV computes y = exp(t*H)*y using the partial fraction
1213 * expansion of the uniform rational Chebyshev approximation
1214 * to exp(-x) of type (14,14). H is assumed to be symmetric.
1215 * About 14-digit accuracy is expected if the matrix H is negative
1216 * definite. The algorithm may behave poorly otherwise.
1218 *-----Arguments--------------------------------------------------------|
1220 * m : (input) order of matrix H
1222 * t : (input) time-scaling factor (can be < 0).
1224 * H(ldh,m): (input) symmetric matrix.
1226 * y(m) : (input/output) on input the operand vector,
1227 * on output the resulting vector exp(t*H)*y.
1229 * iwsp(m) : (workspace)
1231 * wsp : (workspace). Observe that a double precision vector of
1232 * length 2*m*(m+2) can be used as well when calling this
1233 * routine (thus avoiding an idle complex array elsewhere)
1235 *----------------------------------------------------------------------|
1236 * Roger B. Sidje (rbs@maths.uq.edu.au)
1237 * EXPOKIT: Software Package for Computing Matrix Exponentials.
1238 * ACM - Transactions On Mathematical Software, 24(1):130-156, 1998
1239 *----------------------------------------------------------------------|
1241 integer ndeg, i, j, ip, ih, iy, iz
1242 parameter ( ndeg=7 )
1243 double precision alpha0
1244 complex*16 alpha(ndeg), theta(ndeg), w
1246 intrinsic ABS,CMPLX,DBLE,MIN
1248 *--- Pointers ...
1250 ih = 1
1251 iy = ih + m*m
1252 iz = iy + m
1254 *--- Coefficients and poles of the partial fraction expansion ...
1256 alpha0 = 0.183216998528140087D-11
1257 alpha(1)=( 0.557503973136501826D+02,-0.204295038779771857D+03)
1258 alpha(2)=(-0.938666838877006739D+02, 0.912874896775456363D+02)
1259 alpha(3)=( 0.469965415550370835D+02,-0.116167609985818103D+02)
1260 alpha(4)=(-0.961424200626061065D+01,-0.264195613880262669D+01)
1261 alpha(5)=( 0.752722063978321642D+00, 0.670367365566377770D+00)
1262 alpha(6)=(-0.188781253158648576D-01,-0.343696176445802414D-01)
1263 alpha(7)=( 0.143086431411801849D-03, 0.287221133228814096D-03)
1265 theta(1)=(-0.562314417475317895D+01, 0.119406921611247440D+01)
1266 theta(2)=(-0.508934679728216110D+01, 0.358882439228376881D+01)
1267 theta(3)=(-0.399337136365302569D+01, 0.600483209099604664D+01)
1268 theta(4)=(-0.226978543095856366D+01, 0.846173881758693369D+01)
1269 theta(5)=( 0.208756929753827868D+00, 0.109912615662209418D+02)
1270 theta(6)=( 0.370327340957595652D+01, 0.136563731924991884D+02)
1271 theta(7)=( 0.889777151877331107D+01, 0.166309842834712071D+02)
1273 *--- Accumulation of the contribution of each pole ...
1275 do j = 1,m
1276 wsp(iz+j-1) = y(j)
1277 y(j) = y(j)*alpha0
1278 enddo
1279 do ip = 1,ndeg
1280 *--- Solve each fraction using Gaussian elimination with pivoting...
1281 do j = 1,m
1282 do i = 1,m
1283 wsp(ih+(j-1)*m+i-1) = -t*H(i,j)
1284 enddo
1285 wsp(ih+(j-1)*m+j-1) = wsp(ih+(j-1)*m+j-1)-theta(ip)
1286 wsp(iy+j-1) = wsp(iz+j-1)
1287 enddo
1288 !call ZSYSV('U', M, 1, WSP(iH),M, IWSP, WSP(iY),M, W,1, IFLAG )
1289 if ( IFLAG.ne.0 ) stop 'Error in DSCHBV'
1290 *--- Accumulate the partial result in y ...
1291 do i = 1,m
1292 y(i) = y(i) + DBLE( alpha(ip)*wsp(iy+i-1) )
1293 enddo
1294 enddo
1296 *----------------------------------------------------------------------|
1297 *----------------------------------------------------------------------|
1298 subroutine ZGCHBV( m, t, H,ldh, y, wsp, iwsp, iflag )
1300 implicit none
1301 integer m, ldh, iflag, iwsp(m)
1302 double precision t
1303 complex*16 H(ldh,m), y(m), wsp(m*(m+2))
1305 *-----Purpose----------------------------------------------------------|
1307 *--- ZGCHBV computes y = exp(t*H)*y using the partial fraction
1308 * expansion of the uniform rational Chebyshev approximation
1309 * to exp(-x) of type (14,14). H is a General matrix.
1310 * About 14-digit accuracy is expected if the matrix H is negative
1311 * definite. The algorithm may behave poorly otherwise.
1313 *-----Arguments--------------------------------------------------------|
1315 * m : (input) order of the matrix H.
1317 * t : (input) time-scaling factor (can be < 0).
1319 * H(ldh,m): (input) argument matrix.
1321 * y(m) : (input/output) on input the operand vector,
1322 * on output the resulting vector exp(t*H)*y.
1324 * iwsp(m) : (workspace)
1326 * wsp : (workspace). Observe that a double precision vector of
1327 * length 2*m*(m+2) can be used as well when calling this
1328 * routine.
1330 *----------------------------------------------------------------------|
1331 * Roger B. Sidje (rbs@maths.uq.edu.au)
1332 * EXPOKIT: Software Package for Computing Matrix Exponentials.
1333 * ACM - Transactions On Mathematical Software, 24(1):130-156, 1998
1334 *----------------------------------------------------------------------|
1336 integer ndeg, i, j, ip, ih, iy, iz
1337 parameter ( ndeg=7 )
1338 double precision alpha0
1339 complex*16 alpha(2*ndeg), theta(2*ndeg)
1341 *--- Pointers ...
1343 ih = 1
1344 iy = ih + m*m
1345 iz = iy + m
1347 *--- Coefficients and poles of the partial fraction expansion ...
1349 alpha0 = 0.183216998528140087D-11
1350 alpha(1)=( 0.557503973136501826D+02,-0.204295038779771857D+03)
1351 alpha(2)=(-0.938666838877006739D+02, 0.912874896775456363D+02)
1352 alpha(3)=( 0.469965415550370835D+02,-0.116167609985818103D+02)
1353 alpha(4)=(-0.961424200626061065D+01,-0.264195613880262669D+01)
1354 alpha(5)=( 0.752722063978321642D+00, 0.670367365566377770D+00)
1355 alpha(6)=(-0.188781253158648576D-01,-0.343696176445802414D-01)
1356 alpha(7)=( 0.143086431411801849D-03, 0.287221133228814096D-03)
1358 theta(1)=(-0.562314417475317895D+01, 0.119406921611247440D+01)
1359 theta(2)=(-0.508934679728216110D+01, 0.358882439228376881D+01)
1360 theta(3)=(-0.399337136365302569D+01, 0.600483209099604664D+01)
1361 theta(4)=(-0.226978543095856366D+01, 0.846173881758693369D+01)
1362 theta(5)=( 0.208756929753827868D+00, 0.109912615662209418D+02)
1363 theta(6)=( 0.370327340957595652D+01, 0.136563731924991884D+02)
1364 theta(7)=( 0.889777151877331107D+01, 0.166309842834712071D+02)
1366 do ip = 1,ndeg
1367 theta(ndeg+ip) = CONJG( theta(ip) )
1368 alpha(ndeg+ip) = CONJG( alpha(ip) )
1369 enddo
1371 *--- Accumulation of the contribution of each pole ...
1373 do j = 1,m
1374 wsp(iz+j-1) = y(j)
1375 y(j) = y(j)*alpha0
1376 enddo
1377 do ip = 1,2*ndeg
1378 alpha(ip) = 0.5d0*alpha(ip)
1379 *--- Solve each fraction using Gaussian elimination with pivoting...
1380 do j = 1,m
1381 do i = 1,m
1382 wsp(ih+(j-1)*m+i-1) = -t*H(i,j)
1383 enddo
1384 wsp(ih+(j-1)*m+j-1) = wsp(ih+(j-1)*m+j-1)-theta(ip)
1385 wsp(iy+j-1) = wsp(iz+j-1)
1386 enddo
1387 call ZGESV( M, 1, WSP(iH),M, IWSP, WSP(iY),M, IFLAG )
1388 if ( IFLAG.ne.0 ) stop 'Error in ZGCHBV'
1389 *--- Accumulate the partial result in y ...
1390 do i = 1,m
1391 y(i) = y(i) + alpha(ip)*wsp(iy+i-1)
1392 enddo
1393 enddo
1395 *----------------------------------------------------------------------|
1396 *----------------------------------------------------------------------|
1397 subroutine DNCHBV( m, t, H,ldh, y, wsp )
1399 implicit none
1400 integer m, ldh
1401 double precision t, H(ldh,m), y(m)
1402 complex*16 wsp(m*(m+2))
1404 *-----Purpose----------------------------------------------------------|
1406 *--- DNCHBV computes y = exp(t*H)*y using the partial fraction
1407 * expansion of the uniform rational Chebyshev approximation
1408 * to exp(-x) of type (14,14). H is assumed to be upper-Hessenberg.
1409 * About 14-digit accuracy is expected if the matrix H is negative
1410 * definite. The algorithm may behave poorly otherwise.
1412 *-----Arguments--------------------------------------------------------|
1414 * m : (input) order of the Hessenberg matrix H
1416 * t : (input) time-scaling factor (can be < 0).
1418 * H(ldh,m): (input) upper Hessenberg matrix.
1420 * y(m) : (input/output) on input the operand vector,
1421 * on output the resulting vector exp(t*H)*y.
1423 * wsp : (workspace). Observe that a double precision vector of
1424 * length 2*m*(m+2) can be used as well when calling this
1425 * routine (thus avoiding an idle complex array elsewhere)
1427 *----------------------------------------------------------------------|
1428 * Roger B. Sidje (rbs@maths.uq.edu.au)
1429 * EXPOKIT: Software Package for Computing Matrix Exponentials.
1430 * ACM - Transactions On Mathematical Software, 24(1):130-156, 1998
1431 *----------------------------------------------------------------------|
1433 complex*16 ZERO
1434 integer ndeg, i, j, k, ip, ih, iy, iz
1435 parameter ( ndeg=7, ZERO=(0.0d0,0.0d0) )
1436 double precision alpha0
1437 complex*16 alpha(ndeg), theta(ndeg), tmpc
1439 intrinsic ABS,DBLE,MIN
1441 *--- Pointers ...
1443 ih = 1
1444 iy = ih + m*m
1445 iz = iy + m
1447 *--- Coefficients and poles of the partial fraction expansion...
1449 alpha0 = 0.183216998528140087D-11
1450 alpha(1)=( 0.557503973136501826D+02,-0.204295038779771857D+03)
1451 alpha(2)=(-0.938666838877006739D+02, 0.912874896775456363D+02)
1452 alpha(3)=( 0.469965415550370835D+02,-0.116167609985818103D+02)
1453 alpha(4)=(-0.961424200626061065D+01,-0.264195613880262669D+01)
1454 alpha(5)=( 0.752722063978321642D+00, 0.670367365566377770D+00)
1455 alpha(6)=(-0.188781253158648576D-01,-0.343696176445802414D-01)
1456 alpha(7)=( 0.143086431411801849D-03, 0.287221133228814096D-03)
1458 theta(1)=(-0.562314417475317895D+01, 0.119406921611247440D+01)
1459 theta(2)=(-0.508934679728216110D+01, 0.358882439228376881D+01)
1460 theta(3)=(-0.399337136365302569D+01, 0.600483209099604664D+01)
1461 theta(4)=(-0.226978543095856366D+01, 0.846173881758693369D+01)
1462 theta(5)=( 0.208756929753827868D+00, 0.109912615662209418D+02)
1463 theta(6)=( 0.370327340957595652D+01, 0.136563731924991884D+02)
1464 theta(7)=( 0.889777151877331107D+01, 0.166309842834712071D+02)
1466 *--- Accumulation of the contribution of each pole ...
1468 do j = 1,m
1469 wsp(iz+j-1) = y(j)
1470 y(j) = y(j)*alpha0
1471 enddo
1472 do ip = 1,ndeg
1473 *--- Solve each fraction using Gaussian elimination with pivoting...
1474 do j = 1,m
1475 wsp(iy+j-1) = wsp(iz+j-1)
1476 do i = 1,MIN( j+1,m )
1477 wsp(ih+(j-1)*m+i-1) = -t*H(i,j)
1478 enddo
1479 wsp(ih+(j-1)*m+j-1) = wsp(ih+(j-1)*m+j-1)-theta(ip)
1480 do k = i,m
1481 wsp(ih+(j-1)*m+k-1) = ZERO
1482 enddo
1483 enddo
1484 do i = 1,m-1
1485 *--- Get pivot and exchange rows ...
1486 if (ABS(wsp(ih+(i-1)*m+i-1)).lt.ABS(wsp(ih+(i-1)*m+i))) then
1487 call ZSWAP( m-i+1, wsp(ih+(i-1)*m+i-1),m,
1488 . wsp(ih+(i-1)*m+i),m )
1489 call ZSWAP( 1, wsp(iy+i-1),1, wsp(iy+i),1 )
1490 endif
1491 *--- Forward eliminiation ...
1492 tmpc = wsp(ih+(i-1)*m+i) / wsp(ih+(i-1)*m+i-1)
1493 call ZAXPY( m-i, -tmpc, wsp(ih+i*m+i-1),m, wsp(ih+i*m+i),m )
1494 wsp(iy+i) = wsp(iy+i) - tmpc*wsp(iy+i-1)
1495 enddo
1496 *--- Backward substitution ...
1497 do i = m,1,-1
1498 tmpc = wsp(iy+i-1)
1499 do j = i+1,m
1500 tmpc = tmpc - wsp(ih+(j-1)*m+i-1)*wsp(iy+j-1)
1501 enddo
1502 wsp(iy+i-1) = tmpc / wsp(ih+(i-1)*m+i-1)
1503 enddo
1504 *--- Accumulate the partial result in y ...
1505 do j = 1,m
1506 y(j) = y(j) + DBLE( alpha(ip)*wsp(iy+j-1) )
1507 enddo
1508 enddo
1510 *----------------------------------------------------------------------|
1511 *----------------------------------------------------------------------|
1512 subroutine ZNCHBV( m, t, H,ldh, y, wsp )
1514 implicit none
1515 integer m, ldh
1516 double precision t
1517 complex*16 H(ldh,m), y(m), wsp(m*(m+2))
1519 *-----Purpose----------------------------------------------------------|
1521 *--- ZNCHBV computes y = exp(t*H)*y using the partial fraction
1522 * expansion of the uniform rational Chebyshev approximation
1523 * to exp(-x) of type (14,14). H is assumed to be upper-Hessenberg.
1524 * About 14-digit accuracy is expected if the matrix H is negative
1525 * definite. The algorithm may behave poorly otherwise.
1527 *-----Arguments--------------------------------------------------------|
1529 * m : (input) order of the Hessenberg matrix H
1531 * t : (input) time-scaling factor (can be < 0).
1533 * H(ldh,m): (input) upper Hessenberg matrix.
1535 * y(m) : (input/output) on input the operand vector,
1536 * on output the resulting vector exp(t*H)*y.
1538 * wsp : (workspace). Observe that a double precision vector of
1539 * length 2*m*(m+2) can be used as well when calling this
1540 * routine.
1542 *----------------------------------------------------------------------|
1543 * Roger B. Sidje (rbs@maths.uq.edu.au)
1544 * EXPOKIT: Software Package for Computing Matrix Exponentials.
1545 * ACM - Transactions On Mathematical Software, 24(1):130-156, 1998
1546 *----------------------------------------------------------------------|
1548 complex*16 ZERO
1549 integer ndeg, i, j, k, ip, ih, iy, iz
1550 parameter ( ndeg=7, ZERO=(0.0d0,0.0d0) )
1551 double precision alpha0
1552 complex*16 alpha(ndeg), theta(ndeg), tmpc
1554 intrinsic ABS,DBLE,CONJG,MIN
1556 *--- Pointers ...
1558 ih = 1
1559 iy = ih + m*m
1560 iz = iy + m
1562 *--- Coefficients and poles of the partial fraction expansion...
1564 alpha0 = 0.183216998528140087D-11
1565 alpha(1)=( 0.557503973136501826D+02,-0.204295038779771857D+03)
1566 alpha(2)=(-0.938666838877006739D+02, 0.912874896775456363D+02)
1567 alpha(3)=( 0.469965415550370835D+02,-0.116167609985818103D+02)
1568 alpha(4)=(-0.961424200626061065D+01,-0.264195613880262669D+01)
1569 alpha(5)=( 0.752722063978321642D+00, 0.670367365566377770D+00)
1570 alpha(6)=(-0.188781253158648576D-01,-0.343696176445802414D-01)
1571 alpha(7)=( 0.143086431411801849D-03, 0.287221133228814096D-03)
1573 theta(1)=(-0.562314417475317895D+01, 0.119406921611247440D+01)
1574 theta(2)=(-0.508934679728216110D+01, 0.358882439228376881D+01)
1575 theta(3)=(-0.399337136365302569D+01, 0.600483209099604664D+01)
1576 theta(4)=(-0.226978543095856366D+01, 0.846173881758693369D+01)
1577 theta(5)=( 0.208756929753827868D+00, 0.109912615662209418D+02)
1578 theta(6)=( 0.370327340957595652D+01, 0.136563731924991884D+02)
1579 theta(7)=( 0.889777151877331107D+01, 0.166309842834712071D+02)
1581 do ip = 1,ndeg
1582 theta(ndeg+ip) = CONJG( theta(ip) )
1583 alpha(ndeg+ip) = CONJG( alpha(ip) )
1584 enddo
1586 *--- Accumulation of the contribution of each pole ...
1588 do j = 1,m
1589 wsp(iz+j-1) = y(j)
1590 y(j) = y(j)*alpha0
1591 enddo
1592 do ip = 1,2*ndeg
1593 alpha(ip) = 0.5d0*alpha(ip)
1594 *--- Solve each fraction using Gaussian elimination with pivoting...
1595 do j = 1,m
1596 wsp(iy+j-1) = wsp(iz+j-1)
1597 do i = 1,MIN( j+1,m )
1598 wsp(ih+(j-1)*m+i-1) = -t*H(i,j)
1599 enddo
1600 wsp(ih+(j-1)*m+j-1) = wsp(ih+(j-1)*m+j-1)-theta(ip)
1601 do k = i,m
1602 wsp(ih+(j-1)*m+k-1) = ZERO
1603 enddo
1604 enddo
1605 do i = 1,m-1
1606 *--- Get pivot and exchange rows ...
1607 if (ABS(wsp(ih+(i-1)*m+i-1)).lt.ABS(wsp(ih+(i-1)*m+i))) then
1608 call ZSWAP( m-i+1, wsp(ih+(i-1)*m+i-1),m,
1609 . wsp(ih+(i-1)*m+i),m )
1610 call ZSWAP( 1, wsp(iy+i-1),1, wsp(iy+i),1 )
1611 endif
1612 *--- Forward eliminiation ...
1613 tmpc = wsp(ih+(i-1)*m+i) / wsp(ih+(i-1)*m+i-1)
1614 call ZAXPY( m-i, -tmpc, wsp(ih+i*m+i-1),m, wsp(ih+i*m+i),m )
1615 wsp(iy+i) = wsp(iy+i) - tmpc*wsp(iy+i-1)
1616 enddo
1617 *--- Backward substitution ...
1618 do i = m,1,-1
1619 tmpc = wsp(iy+i-1)
1620 do j = i+1,m
1621 tmpc = tmpc - wsp(ih+(j-1)*m+i-1)*wsp(iy+j-1)
1622 enddo
1623 wsp(iy+i-1) = tmpc / wsp(ih+(i-1)*m+i-1)
1624 enddo
1625 *--- Accumulate the partial result in y ...
1626 do j = 1,m
1627 y(j) = y(j) + alpha(ip)*wsp(iy+j-1)
1628 enddo
1629 enddo
1631 *----------------------------------------------------------------------|
1632 *----------------------------------------------------------------------|
1633 subroutine DGEXPV( n, m, t, v, w, tol, anorm,
1634 . wsp,lwsp, iwsp,liwsp, matvec, itrace,iflag )
1636 implicit none
1637 integer n, m, lwsp, liwsp, itrace, iflag, iwsp(liwsp)
1638 double precision t, tol, anorm, v(n), w(n), wsp(lwsp)
1639 external matvec
1641 *-----Purpose----------------------------------------------------------|
1643 *--- DGEXPV computes w = exp(t*A)*v - for a General matrix A.
1645 * It does not compute the matrix exponential in isolation but
1646 * instead, it computes directly the action of the exponential
1647 * operator on the operand vector. This way of doing so allows
1648 * for addressing large sparse problems.
1650 * The method used is based on Krylov subspace projection
1651 * techniques and the matrix under consideration interacts only
1652 * via the external routine `matvec' performing the matrix-vector
1653 * product (matrix-free method).
1655 *-----Arguments--------------------------------------------------------|
1657 * n : (input) order of the principal matrix A.
1659 * m : (input) maximum size for the Krylov basis.
1661 * t : (input) time at wich the solution is needed (can be < 0).
1663 * v(n) : (input) given operand vector.
1665 * w(n) : (output) computed approximation of exp(t*A)*v.
1667 * tol : (input/output) the requested accuracy tolerance on w.
1668 * If on input tol=0.0d0 or tol is too small (tol.le.eps)
1669 * the internal value sqrt(eps) is used, and tol is set to
1670 * sqrt(eps) on output (`eps' denotes the machine epsilon).
1671 * (`Happy breakdown' is assumed if h(j+1,j) .le. anorm*tol)
1673 * anorm : (input) an approximation of some norm of A.
1675 * wsp(lwsp): (workspace) lwsp .ge. n*(m+1)+n+(m+2)^2+4*(m+2)^2+ideg+1
1676 * +---------+-------+---------------+
1677 * (actually, ideg=6) V H wsp for PADE
1679 * iwsp(liwsp): (workspace) liwsp .ge. m+2
1681 * matvec : external subroutine for matrix-vector multiplication.
1682 * synopsis: matvec( x, y )
1683 * double precision x(*), y(*)
1684 * computes: y(1:n) <- A*x(1:n)
1685 * where A is the principal matrix.
1687 * itrace : (input) running mode. 0=silent, 1=print step-by-step info
1689 * iflag : (output) exit flag.
1690 * <0 - bad input arguments
1691 * 0 - no problem
1692 * 1 - maximum number of steps reached without convergence
1693 * 2 - requested tolerance was too high
1695 *-----Accounts on the computation--------------------------------------|
1696 * Upon exit, an interested user may retrieve accounts on the
1697 * computations. They are located in wsp and iwsp as indicated below:
1699 * location mnemonic description
1700 * -----------------------------------------------------------------|
1701 * iwsp(1) = nmult, number of matrix-vector multiplications used
1702 * iwsp(2) = nexph, number of Hessenberg matrix exponential evaluated
1703 * iwsp(3) = nscale, number of repeated squaring involved in Pade
1704 * iwsp(4) = nstep, number of integration steps used up to completion
1705 * iwsp(5) = nreject, number of rejected step-sizes
1706 * iwsp(6) = ibrkflag, set to 1 if `happy breakdown' and 0 otherwise
1707 * iwsp(7) = mbrkdwn, if `happy brkdown', basis-size when it occured
1708 * -----------------------------------------------------------------|
1709 * wsp(1) = step_min, minimum step-size used during integration
1710 * wsp(2) = step_max, maximum step-size used during integration
1711 * wsp(3) = dummy
1712 * wsp(4) = dummy
1713 * wsp(5) = x_error, maximum among all local truncation errors
1714 * wsp(6) = s_error, global sum of local truncation errors
1715 * wsp(7) = tbrkdwn, if `happy breakdown', time when it occured
1716 * wsp(8) = t_now, integration domain successfully covered
1717 * wsp(9) = hump, i.e., max||exp(sA)||, s in [0,t] (or [t,0] if t<0)
1718 * wsp(10) = ||w||/||v||, scaled norm of the solution w.
1719 * -----------------------------------------------------------------|
1720 * The `hump' is a measure of the conditioning of the problem. The
1721 * matrix exponential is well-conditioned if hump = 1, whereas it is
1722 * poorly-conditioned if hump >> 1. However the solution can still be
1723 * relatively fairly accurate even when the hump is large (the hump
1724 * is an upper bound), especially when the hump and the scaled norm
1725 * of w [this is also computed and returned in wsp(10)] are of the
1726 * same order of magnitude (further details in reference below).
1728 *----------------------------------------------------------------------|
1729 *-----The following parameters may also be adjusted herein-------------|
1731 integer mxstep, mxreject, ideg
1732 double precision delta, gamma
1733 parameter( mxstep = 1000,
1734 . mxreject = 0,
1735 . ideg = 6,
1736 . delta = 1.2d0,
1737 . gamma = 0.9d0 )
1739 * mxstep : maximum allowable number of integration steps.
1740 * The value 0 means an infinite number of steps.
1742 * mxreject: maximum allowable number of rejections at each step.
1743 * The value 0 means an infinite number of rejections.
1745 * ideg : the Pade approximation of type (ideg,ideg) is used as
1746 * an approximation to exp(H). The value 0 switches to the
1747 * uniform rational Chebyshev approximation of type (14,14)
1749 * delta : local truncation error `safety factor'
1751 * gamma : stepsize `shrinking factor'
1753 *----------------------------------------------------------------------|
1754 * Roger B. Sidje (rbs@maths.uq.edu.au)
1755 * EXPOKIT: Software Package for Computing Matrix Exponentials.
1756 * ACM - Transactions On Mathematical Software, 24(1):130-156, 1998
1757 *----------------------------------------------------------------------|
1759 integer i, j, k1, mh, mx, iv, ih, j1v, ns, ifree, lfree, iexph,
1760 . ireject,ibrkflag,mbrkdwn, nmult, nreject, nexph, nscale,
1761 . nstep
1762 double precision sgn, t_out, tbrkdwn, step_min,step_max, err_loc,
1763 . s_error, x_error, t_now, t_new, t_step, t_old,
1764 . xm, beta, break_tol, p1, p2, p3, eps, rndoff,
1765 . vnorm, avnorm, hj1j, hij, hump, SQR1
1767 intrinsic AINT,ABS,DBLE,LOG10,MAX,MIN,NINT,SIGN,SQRT
1768 double precision DDOT, DNRM2
1770 *--- check restrictions on input parameters ...
1771 iflag = 0
1772 if ( lwsp.lt.n*(m+2)+5*(m+2)**2+ideg+1 ) iflag = -1
1773 if ( liwsp.lt.m+2 ) iflag = -2
1774 if ( m.ge.n .or. m.le.0 ) iflag = -3
1775 if ( iflag.ne.0 ) stop 'bad sizes (in input of DGEXPV)'
1777 *--- initialisations ...
1779 k1 = 2
1780 mh = m + 2
1781 iv = 1
1782 ih = iv + n*(m+1) + n
1783 ifree = ih + mh*mh
1784 lfree = lwsp - ifree + 1
1786 ibrkflag = 0
1787 mbrkdwn = m
1788 nmult = 0
1789 nreject = 0
1790 nexph = 0
1791 nscale = 0
1793 t_out = ABS( t )
1794 tbrkdwn = 0.0d0
1795 step_min = t_out
1796 step_max = 0.0d0
1797 nstep = 0
1798 s_error = 0.0d0
1799 x_error = 0.0d0
1800 t_now = 0.0d0
1801 t_new = 0.0d0
1803 p1 = 4.0d0/3.0d0
1804 1 p2 = p1 - 1.0d0
1805 p3 = p2 + p2 + p2
1806 eps = ABS( p3-1.0d0 )
1807 if ( eps.eq.0.0d0 ) go to 1
1808 if ( tol.le.eps ) tol = SQRT( eps )
1809 rndoff = eps*anorm
1811 break_tol = 1.0d-7
1812 *>>> break_tol = tol
1813 *>>> break_tol = anorm*tol
1815 sgn = SIGN( 1.0d0,t )
1816 call DCOPY( n, v,1, w,1 )
1817 beta = DNRM2( n, w,1 )
1818 vnorm = beta
1819 hump = beta
1821 *--- obtain the very first stepsize ...
1823 SQR1 = SQRT( 0.1d0 )
1824 xm = 1.0d0/DBLE( m )
1825 p1 = tol*(((m+1)/2.72D0)**(m+1))*SQRT(2.0D0*3.14D0*(m+1))
1826 t_new = (1.0d0/anorm)*(p1/(4.0d0*beta*anorm))**xm
1827 p1 = 10.0d0**(NINT( LOG10( t_new )-SQR1 )-1)
1828 t_new = AINT( t_new/p1 + 0.55d0 ) * p1
1830 *--- step-by-step integration ...
1832 100 if ( t_now.ge.t_out ) goto 500
1834 nstep = nstep + 1
1835 t_step = MIN( t_out-t_now, t_new )
1837 p1 = 1.0d0/beta
1838 do i = 1,n
1839 wsp(iv + i-1) = p1*w(i)
1840 enddo
1841 do i = 1,mh*mh
1842 wsp(ih+i-1) = 0.0d0
1843 enddo
1845 *--- Arnoldi loop ...
1847 j1v = iv + n
1848 do 200 j = 1,m
1849 nmult = nmult + 1
1850 call matvec( wsp(j1v-n), wsp(j1v) )
1851 do i = 1,j
1852 hij = DDOT( n, wsp(iv+(i-1)*n),1, wsp(j1v),1 )
1853 call DAXPY( n, -hij, wsp(iv+(i-1)*n),1, wsp(j1v),1 )
1854 wsp(ih+(j-1)*mh+i-1) = hij
1855 enddo
1856 hj1j = DNRM2( n, wsp(j1v),1 )
1857 *--- if `happy breakdown' go straightforward at the end ...
1858 if ( hj1j.le.break_tol ) then
1859 print*,'happy breakdown: mbrkdwn =',j,' h =',hj1j
1860 k1 = 0
1861 ibrkflag = 1
1862 mbrkdwn = j
1863 tbrkdwn = t_now
1864 t_step = t_out-t_now
1865 goto 300
1866 endif
1867 wsp(ih+(j-1)*mh+j) = hj1j
1868 call DSCAL( n, 1.0d0/hj1j, wsp(j1v),1 )
1869 j1v = j1v + n
1870 200 continue
1871 nmult = nmult + 1
1872 call matvec( wsp(j1v-n), wsp(j1v) )
1873 avnorm = DNRM2( n, wsp(j1v),1 )
1875 *--- set 1 for the 2-corrected scheme ...
1877 300 continue
1878 wsp(ih+m*mh+m+1) = 1.0d0
1880 *--- loop while ireject<mxreject until the tolerance is reached ...
1882 ireject = 0
1883 401 continue
1886 *--- compute w = beta*V*exp(t_step*H)*e1 ...
1888 nexph = nexph + 1
1889 mx = mbrkdwn + k1
1890 if ( ideg.ne.0 ) then
1891 *--- irreducible rational Pade approximation ...
1892 call DGPADM( ideg, mx, sgn*t_step, wsp(ih),mh,
1893 . wsp(ifree),lfree, iwsp, iexph, ns, iflag )
1894 iexph = ifree + iexph - 1
1895 nscale = nscale + ns
1896 else
1897 *--- uniform rational Chebyshev approximation ...
1898 iexph = ifree
1899 do i = 1,mx
1900 wsp(iexph+i-1) = 0.0d0
1901 enddo
1902 wsp(iexph) = 1.0d0
1903 call DNCHBV(mx,sgn*t_step,wsp(ih),mh,wsp(iexph),wsp(ifree+mx))
1904 endif
1906 402 continue
1908 *--- error estimate ...
1910 if ( k1.eq.0 ) then
1911 err_loc = tol
1912 else
1913 p1 = ABS( wsp(iexph+m) ) * beta
1914 p2 = ABS( wsp(iexph+m+1) ) * beta * avnorm
1915 if ( p1.gt.10.0d0*p2 ) then
1916 err_loc = p2
1917 xm = 1.0d0/DBLE( m )
1918 elseif ( p1.gt.p2 ) then
1919 err_loc = (p1*p2)/(p1-p2)
1920 xm = 1.0d0/DBLE( m )
1921 else
1922 err_loc = p1
1923 xm = 1.0d0/DBLE( m-1 )
1924 endif
1925 endif
1927 *--- reject the step-size if the error is not acceptable ...
1929 if ( (k1.ne.0) .and. (err_loc.gt.delta*t_step*tol) .and.
1930 . (mxreject.eq.0 .or. ireject.lt.mxreject) ) then
1931 t_old = t_step
1932 t_step = gamma * t_step * (t_step*tol/err_loc)**xm
1933 p1 = 10.0d0**(NINT( LOG10( t_step )-SQR1 )-1)
1934 t_step = AINT( t_step/p1 + 0.55d0 ) * p1
1935 if ( itrace.ne.0 ) then
1936 print*,'t_step =',t_old
1937 print*,'err_loc =',err_loc
1938 print*,'err_required =',delta*t_old*tol
1939 print*,'stepsize rejected, stepping down to:',t_step
1940 endif
1941 ireject = ireject + 1
1942 nreject = nreject + 1
1943 if ( mxreject.ne.0 .and. ireject.gt.mxreject ) then
1944 print*,"Failure in DGEXPV: ---"
1945 print*,"The requested tolerance is too high."
1946 Print*,"Rerun with a smaller value."
1947 iflag = 2
1948 return
1949 endif
1950 goto 401
1951 endif
1953 *--- now update w = beta*V*exp(t_step*H)*e1 and the hump ...
1955 mx = mbrkdwn + MAX( 0,k1-1 )
1956 call DGEMV( 'n', n,mx,beta,wsp(iv),n,wsp(iexph),1,0.0d0,w,1 )
1957 beta = DNRM2( n, w,1 )
1958 hump = MAX( hump, beta )
1960 *--- suggested value for the next stepsize ...
1962 t_new = gamma * t_step * (t_step*tol/err_loc)**xm
1963 p1 = 10.0d0**(NINT( LOG10( t_new )-SQR1 )-1)
1964 t_new = AINT( t_new/p1 + 0.55d0 ) * p1
1966 err_loc = MAX( err_loc,rndoff )
1968 *--- update the time covered ...
1970 t_now = t_now + t_step
1972 *--- display and keep some information ...
1974 if ( itrace.ne.0 ) then
1975 print*,'integration',nstep,'---------------------------------'
1976 print*,'scale-square =',ns
1977 print*,'step_size =',t_step
1978 print*,'err_loc =',err_loc
1979 print*,'next_step =',t_new
1980 endif
1982 step_min = MIN( step_min, t_step )
1983 step_max = MAX( step_max, t_step )
1984 s_error = s_error + err_loc
1985 x_error = MAX( x_error, err_loc )
1987 if ( mxstep.eq.0 .or. nstep.lt.mxstep ) goto 100
1988 iflag = 1
1990 500 continue
1992 iwsp(1) = nmult
1993 iwsp(2) = nexph
1994 iwsp(3) = nscale
1995 iwsp(4) = nstep
1996 iwsp(5) = nreject
1997 iwsp(6) = ibrkflag
1998 iwsp(7) = mbrkdwn
2000 wsp(1) = step_min
2001 wsp(2) = step_max
2002 wsp(3) = 0.0d0
2003 wsp(4) = 0.0d0
2004 wsp(5) = x_error
2005 wsp(6) = s_error
2006 wsp(7) = tbrkdwn
2007 wsp(8) = sgn*t_now
2008 wsp(9) = hump/vnorm
2009 wsp(10) = beta/vnorm
2011 *----------------------------------------------------------------------|
2012 *----------------------------------------------------------------------|
2013 subroutine DSEXPV( n, m, t, v, w, tol, anorm,
2014 . wsp,lwsp, iwsp,liwsp, matvec, itrace,iflag )
2016 implicit none
2017 integer n, m, lwsp, liwsp, itrace, iflag, iwsp(liwsp)
2018 double precision t, tol, anorm, v(n), w(n), wsp(lwsp)
2019 external matvec
2021 *-----Purpose----------------------------------------------------------|
2023 *--- DSEXPV computes w = exp(t*A)*v - for a Symmetric matrix A.
2025 * It does not compute the matrix exponential in isolation but
2026 * instead, it computes directly the action of the exponential
2027 * operator on the operand vector. This way of doing so allows
2028 * for addressing large sparse problems.
2030 * The method used is based on Krylov subspace projection
2031 * techniques and the matrix under consideration interacts only
2032 * via the external routine `matvec' performing the matrix-vector
2033 * product (matrix-free method).
2035 *-----Arguments--------------------------------------------------------|
2037 * n : (input) order of the principal matrix A.
2039 * m : (input) maximum size for the Krylov basis.
2041 * t : (input) time at wich the solution is needed (can be < 0).
2043 * v(n) : (input) given operand vector.
2045 * w(n) : (output) computed approximation of exp(t*A)*v.
2047 * tol : (input/output) the requested accuracy tolerance on w.
2048 * If on input tol=0.0d0 or tol is too small (tol.le.eps)
2049 * the internal value sqrt(eps) is used, and tol is set to
2050 * sqrt(eps) on output (`eps' denotes the machine epsilon).
2051 * (`Happy breakdown' is assumed if h(j+1,j) .le. anorm*tol)
2053 * anorm : (input) an approximation of some norm of A.
2055 * wsp(lwsp): (workspace) lwsp .ge. n*(m+1)+n+(m+2)^2+4*(m+2)^2+ideg+1
2056 * +---------+-------+---------------+
2057 * (actually, ideg=6) V H wsp for PADE
2059 * iwsp(liwsp): (workspace) liwsp .ge. m+2
2061 * matvec : external subroutine for matrix-vector multiplication.
2062 * synopsis: matvec( x, y )
2063 * double precision x(*), y(*)
2064 * computes: y(1:n) <- A*x(1:n)
2065 * where A is the principal matrix.
2067 * itrace : (input) running mode. 0=silent, 1=print step-by-step info
2069 * iflag : (output) exit flag.
2070 * <0 - bad input arguments
2071 * 0 - no problem
2072 * 1 - maximum number of steps reached without convergence
2073 * 2 - requested tolerance was too high
2075 *-----Accounts on the computation--------------------------------------|
2076 * Upon exit, an interested user may retrieve accounts on the
2077 * computations. They are located in the workspace arrays wsp and
2078 * iwsp as indicated below:
2080 * location mnemonic description
2081 * -----------------------------------------------------------------|
2082 * iwsp(1) = nmult, number of matrix-vector multiplications used
2083 * iwsp(2) = nexph, nbr of Tridiagonal matrix exponential evaluated
2084 * iwsp(3) = nscale, number of repeated squaring involved in Pade
2085 * iwsp(4) = nstep, nbr of integration steps used up to completion
2086 * iwsp(5) = nreject, number of rejected step-sizes
2087 * iwsp(6) = ibrkflag, set to 1 if `happy breakdown' and 0 otherwise
2088 * iwsp(7) = mbrkdwn, if `happy brkdown', basis-size when it occured
2089 * -----------------------------------------------------------------|
2090 * wsp(1) = step_min, minimum step-size used during integration
2091 * wsp(2) = step_max, maximum step-size used during integration
2092 * wsp(3) = dummy
2093 * wsp(4) = dummy
2094 * wsp(5) = x_error, maximum among all local truncation errors
2095 * wsp(6) = s_error, global sum of local truncation errors
2096 * wsp(7) = tbrkdwn, if `happy breakdown', time when it occured
2097 * wsp(8) = t_now, integration domain successfully covered
2098 * wsp(9) = hump, i.e., max||exp(sA)||, s in [0,t] (or [t,0] if t<0)
2099 * wsp(10) = ||w||/||v||, scaled norm of the solution w.
2100 * -----------------------------------------------------------------|
2101 * The `hump' is a measure of the conditioning of the problem. The
2102 * matrix exponential is well-conditioned if hump = 1, whereas it is
2103 * poorly-conditioned if hump >> 1. However the solution can still be
2104 * relatively fairly accurate even when the hump is large (the hump
2105 * is an upper bound), especially when the hump and the scaled norm
2106 * of w [this is also computed and returned in wsp(10)] are of the
2107 * same order of magnitude (further details in reference below).
2109 *----------------------------------------------------------------------|
2110 *-----The following parameters may also be adjusted herein-------------|
2112 integer mxstep, mxreject, ideg
2113 double precision delta, gamma
2114 parameter( mxstep = 500,
2115 . mxreject = 0,
2116 . ideg = 6,
2117 . delta = 1.2d0,
2118 . gamma = 0.9d0 )
2120 * mxstep : maximum allowable number of integration steps.
2121 * The value 0 means an infinite number of steps.
2123 * mxreject: maximum allowable number of rejections at each step.
2124 * The value 0 means an infinite number of rejections.
2126 * ideg : the Pade approximation of type (ideg,ideg) is used as
2127 * an approximation to exp(H). The value 0 switches to the
2128 * uniform rational Chebyshev approximation of type (14,14)
2130 * delta : local truncation error `safety factor'
2132 * gamma : stepsize `shrinking factor'
2134 *----------------------------------------------------------------------|
2135 * Roger B. Sidje (rbs@maths.uq.edu.au)
2136 * EXPOKIT: Software Package for Computing Matrix Exponentials.
2137 * ACM - Transactions On Mathematical Software, 24(1):130-156, 1998
2138 *----------------------------------------------------------------------|
2140 integer i, j, k1, mh, mx, iv, ih, j1v, ns, ifree, lfree, iexph,
2141 . ireject,ibrkflag,mbrkdwn, nmult, nreject, nexph, nscale,
2142 . nstep
2143 double precision sgn, t_out, tbrkdwn, step_min,step_max, err_loc,
2144 . s_error, x_error, t_now, t_new, t_step, t_old,
2145 . xm, beta, break_tol, p1, p2, p3, eps, rndoff,
2146 . vnorm, avnorm, hj1j, hjj, hump, SQR1
2148 intrinsic AINT,ABS,DBLE,LOG10,MAX,MIN,NINT,SIGN,SQRT
2149 double precision DDOT, DNRM2
2151 *--- check restrictions on input parameters ...
2152 iflag = 0
2153 if ( lwsp.lt.n*(m+2)+5*(m+2)**2+ideg+1 ) iflag = -1
2154 if ( liwsp.lt.m+2 ) iflag = -2
2155 if ( m.ge.n .or. m.le.0 ) iflag = -3
2156 if ( iflag.ne.0 ) stop 'bad sizes (in input of DSEXPV)'
2158 *--- initialisations ...
2160 k1 = 2
2161 mh = m + 2
2162 iv = 1
2163 ih = iv + n*(m+1) + n
2164 ifree = ih + mh*mh
2165 lfree = lwsp - ifree + 1
2167 ibrkflag = 0
2168 mbrkdwn = m
2169 nmult = 0
2170 nreject = 0
2171 nexph = 0
2172 nscale = 0
2174 t_out = ABS( t )
2175 tbrkdwn = 0.0d0
2176 step_min = t_out
2177 step_max = 0.0d0
2178 nstep = 0
2179 s_error = 0.0d0
2180 x_error = 0.0d0
2181 t_now = 0.0d0
2182 t_new = 0.0d0
2184 p1 = 4.0d0/3.0d0
2185 1 p2 = p1 - 1.0d0
2186 p3 = p2 + p2 + p2
2187 eps = ABS( p3-1.0d0 )
2188 if ( eps.eq.0.0d0 ) go to 1
2189 if ( tol.le.eps ) tol = SQRT( eps )
2190 rndoff = eps*anorm
2192 break_tol = 1.0d-7
2193 *>>> break_tol = tol
2194 *>>> break_tol = anorm*tol
2196 sgn = SIGN( 1.0d0,t )
2197 call DCOPY( n, v,1, w,1 )
2198 beta = DNRM2( n, w,1 )
2199 vnorm = beta
2200 hump = beta
2202 *--- obtain the very first stepsize ...
2204 SQR1 = SQRT( 0.1d0 )
2205 xm = 1.0d0/DBLE( m )
2206 p1 = tol*(((m+1)/2.72D0)**(m+1))*SQRT(2.0D0*3.14D0*(m+1))
2207 t_new = (1.0d0/anorm)*(p1/(4.0d0*beta*anorm))**xm
2208 p1 = 10.0d0**(NINT( LOG10( t_new )-SQR1 )-1)
2209 t_new = AINT( t_new/p1 + 0.55d0 ) * p1
2211 *--- step-by-step integration ...
2213 100 if ( t_now.ge.t_out ) goto 500
2215 nstep = nstep + 1
2216 t_step = MIN( t_out-t_now, t_new )
2218 p1 = 1.0d0/beta
2219 do i = 1,n
2220 wsp(iv + i-1) = p1*w(i)
2221 enddo
2222 do i = 1,mh*mh
2223 wsp(ih+i-1) = 0.0d0
2224 enddo
2226 *--- Lanczos loop ...
2228 j1v = iv + n
2229 do 200 j = 1,m
2230 nmult = nmult + 1
2231 call matvec( wsp(j1v-n), wsp(j1v) )
2232 if ( j.gt.1 )
2233 . call DAXPY(n,-wsp(ih+(j-1)*mh+j-2),wsp(j1v-2*n),1,wsp(j1v),1)
2234 hjj = DDOT( n, wsp(j1v-n),1, wsp(j1v),1 )
2235 call DAXPY( n, -hjj, wsp(j1v-n),1, wsp(j1v),1 )
2236 hj1j = DNRM2( n, wsp(j1v),1 )
2237 wsp(ih+(j-1)*(mh+1)) = hjj
2238 *--- if `happy breakdown' go straightforward at the end ...
2239 if ( hj1j.le.break_tol ) then
2240 print*,'happy breakdown: mbrkdwn =',j,' h =',hj1j
2241 k1 = 0
2242 ibrkflag = 1
2243 mbrkdwn = j
2244 tbrkdwn = t_now
2245 t_step = t_out-t_now
2246 goto 300
2247 endif
2248 wsp(ih+(j-1)*mh+j) = hj1j
2249 wsp(ih+j*mh+j-1) = hj1j
2250 call DSCAL( n, 1.0d0/hj1j, wsp(j1v),1 )
2251 j1v = j1v + n
2252 200 continue
2253 nmult = nmult + 1
2254 call matvec( wsp(j1v-n), wsp(j1v) )
2255 avnorm = DNRM2( n, wsp(j1v),1 )
2257 *--- set 1 for the 2-corrected scheme ...
2259 300 continue
2260 wsp(ih+m*mh+m-1) = 0.0d0
2261 wsp(ih+m*mh+m+1) = 1.0d0
2263 *--- loop while ireject<mxreject until the tolerance is reached ...
2265 ireject = 0
2266 401 continue
2268 *--- compute w = beta*V*exp(t_step*H)*e1 ...
2270 nexph = nexph + 1
2271 mx = mbrkdwn + k1
2272 if ( ideg.ne.0 ) then
2273 *--- irreducible rational Pade approximation ...
2274 call DGPADM( ideg, mx, sgn*t_step, wsp(ih),mh,
2275 . wsp(ifree),lfree, iwsp, iexph, ns, iflag )
2276 iexph = ifree + iexph - 1
2277 nscale = nscale + ns
2278 else
2279 *--- uniform rational Chebyshev approximation ...
2280 iexph = ifree
2281 do i = 1,mx
2282 wsp(iexph+i-1) = 0.0d0
2283 enddo
2284 wsp(iexph) = 1.0d0
2285 call DNCHBV(mx,sgn*t_step,wsp(ih),mh,wsp(iexph),wsp(ifree+mx))
2286 endif
2287 402 continue
2289 *--- error estimate ...
2291 if ( k1.eq.0 ) then
2292 err_loc = tol
2293 else
2294 p1 = ABS( wsp(iexph+m) ) * beta
2295 p2 = ABS( wsp(iexph+m+1) ) * beta * avnorm
2296 if ( p1.gt.10.0d0*p2 ) then
2297 err_loc = p2
2298 xm = 1.0d0/DBLE( m )
2299 elseif ( p1.gt.p2 ) then
2300 err_loc = (p1*p2)/(p1-p2)
2301 xm = 1.0d0/DBLE( m )
2302 else
2303 err_loc = p1
2304 xm = 1.0d0/DBLE( m-1 )
2305 endif
2306 endif
2308 *--- reject the step-size if the error is not acceptable ...
2310 if ( (k1.ne.0) .and. (err_loc.gt.delta*t_step*tol) .and.
2311 . (mxreject.eq.0 .or. ireject.lt.mxreject) ) then
2312 t_old = t_step
2313 t_step = gamma * t_step * (t_step*tol/err_loc)**xm
2314 p1 = 10.0d0**(NINT( LOG10( t_step )-SQR1 )-1)
2315 t_step = AINT( t_step/p1 + 0.55d0 ) * p1
2316 if ( itrace.ne.0 ) then
2317 print*,'t_step =',t_old
2318 print*,'err_loc =',err_loc
2319 print*,'err_required =',delta*t_old*tol
2320 print*,'stepsize rejected, stepping down to:',t_step
2321 endif
2322 ireject = ireject + 1
2323 nreject = nreject + 1
2324 if ( mxreject.ne.0 .and. ireject.gt.mxreject ) then
2325 print*,"Failure in DSEXPV: ---"
2326 print*,"The requested tolerance is too high."
2327 Print*,"Rerun with a smaller value."
2328 iflag = 2
2329 return
2330 endif
2331 goto 401
2332 endif
2334 *--- now update w = beta*V*exp(t_step*H)*e1 and the hump ...
2336 mx = mbrkdwn + MAX( 0,k1-1 )
2337 call DGEMV( 'n', n,mx,beta,wsp(iv),n,wsp(iexph),1,0.0d0,w,1 )
2338 beta = DNRM2( n, w,1 )
2339 hump = MAX( hump, beta )
2341 *--- suggested value for the next stepsize ...
2343 t_new = gamma * t_step * (t_step*tol/err_loc)**xm
2344 p1 = 10.0d0**(NINT( LOG10( t_new )-SQR1 )-1)
2345 t_new = AINT( t_new/p1 + 0.55d0 ) * p1
2347 err_loc = MAX( err_loc,rndoff )
2349 *--- update the time covered ...
2351 t_now = t_now + t_step
2353 *--- display and keep some information ...
2355 if ( itrace.ne.0 ) then
2356 print*,'integration',nstep,'---------------------------------'
2357 print*,'scale-square =',ns
2358 print*,'step_size =',t_step
2359 print*,'err_loc =',err_loc
2360 print*,'next_step =',t_new
2361 endif
2363 step_min = MIN( step_min, t_step )
2364 step_max = MAX( step_max, t_step )
2365 s_error = s_error + err_loc
2366 x_error = MAX( x_error, err_loc )
2368 if ( mxstep.eq.0 .or. nstep.lt.mxstep ) goto 100
2369 iflag = 1
2371 500 continue
2373 iwsp(1) = nmult
2374 iwsp(2) = nexph
2375 iwsp(3) = nscale
2376 iwsp(4) = nstep
2377 iwsp(5) = nreject
2378 iwsp(6) = ibrkflag
2379 iwsp(7) = mbrkdwn
2381 wsp(1) = step_min
2382 wsp(2) = step_max
2383 wsp(3) = 0.0d0
2384 wsp(4) = 0.0d0
2385 wsp(5) = x_error
2386 wsp(6) = s_error
2387 wsp(7) = tbrkdwn
2388 wsp(8) = sgn*t_now
2389 wsp(9) = hump/vnorm
2390 wsp(10) = beta/vnorm
2392 *----------------------------------------------------------------------|
2394 *----------------------------------------------------------------------|
2395 subroutine ZGEXPV( n, m, t, v, w, tol, anorm,
2396 . wsp,lwsp, iwsp,liwsp, matvec, itrace,iflag )
2398 implicit none
2399 integer n, m, lwsp, liwsp, itrace, iflag, iwsp(liwsp)
2400 double precision t, tol, anorm
2401 complex*16 v(n), w(n), wsp(lwsp)
2402 external matvec
2404 *-----Purpose----------------------------------------------------------|
2406 *--- ZGEXPV computes w = exp(t*A)*v
2407 * for a Zomplex (i.e., complex double precision) matrix A
2409 * It does not compute the matrix exponential in isolation but
2410 * instead, it computes directly the action of the exponential
2411 * operator on the operand vector. This way of doing so allows
2412 * for addressing large sparse problems.
2414 * The method used is based on Krylov subspace projection
2415 * techniques and the matrix under consideration interacts only
2416 * via the external routine `matvec' performing the matrix-vector
2417 * product (matrix-free method).
2419 *-----Arguments--------------------------------------------------------|
2421 * n : (input) order of the principal matrix A.
2423 * m : (input) maximum size for the Krylov basis.
2425 * t : (input) time at wich the solution is needed (can be < 0).
2427 * v(n) : (input) given operand vector.
2429 * w(n) : (output) computed approximation of exp(t*A)*v.
2431 * tol : (input/output) the requested accuracy tolerance on w.
2432 * If on input tol=0.0d0 or tol is too small (tol.le.eps)
2433 * the internal value sqrt(eps) is used, and tol is set to
2434 * sqrt(eps) on output (`eps' denotes the machine epsilon).
2435 * (`Happy breakdown' is assumed if h(j+1,j) .le. anorm*tol)
2437 * anorm : (input) an approximation of some norm of A.
2439 * wsp(lwsp): (workspace) lwsp .ge. n*(m+1)+n+(m+2)^2+4*(m+2)^2+ideg+1
2440 * +---------+-------+---------------+
2441 * (actually, ideg=6) V H wsp for PADE
2443 * iwsp(liwsp): (workspace) liwsp .ge. m+2
2445 * matvec : external subroutine for matrix-vector multiplication.
2446 * synopsis: matvec( x, y )
2447 * complex*16 x(*), y(*)
2448 * computes: y(1:n) <- A*x(1:n)
2449 * where A is the principal matrix.
2451 * itrace : (input) running mode. 0=silent, 1=print step-by-step info
2453 * iflag : (output) exit flag.
2454 * <0 - bad input arguments
2455 * 0 - no problem
2456 * 1 - maximum number of steps reached without convergence
2457 * 2 - requested tolerance was too high
2459 *-----Accounts on the computation--------------------------------------|
2460 * Upon exit, an interested user may retrieve accounts on the
2461 * computations. They are located in the workspace arrays wsp and
2462 * iwsp as indicated below:
2464 * location mnemonic description
2465 * -----------------------------------------------------------------|
2466 * iwsp(1) = nmult, number of matrix-vector multiplications used
2467 * iwsp(2) = nexph, number of Hessenberg matrix exponential evaluated
2468 * iwsp(3) = nscale, number of repeated squaring involved in Pade
2469 * iwsp(4) = nstep, number of integration steps used up to completion
2470 * iwsp(5) = nreject, number of rejected step-sizes
2471 * iwsp(6) = ibrkflag, set to 1 if `happy breakdown' and 0 otherwise
2472 * iwsp(7) = mbrkdwn, if `happy brkdown', basis-size when it occured
2473 * -----------------------------------------------------------------|
2474 * wsp(1) = step_min, minimum step-size used during integration
2475 * wsp(2) = step_max, maximum step-size used during integration
2476 * wsp(3) = x_round, maximum among all roundoff errors (lower bound)
2477 * wsp(4) = s_round, sum of roundoff errors (lower bound)
2478 * wsp(5) = x_error, maximum among all local truncation errors
2479 * wsp(6) = s_error, global sum of local truncation errors
2480 * wsp(7) = tbrkdwn, if `happy breakdown', time when it occured
2481 * wsp(8) = t_now, integration domain successfully covered
2482 * wsp(9) = hump, i.e., max||exp(sA)||, s in [0,t] (or [t,0] if t<0)
2483 * wsp(10) = ||w||/||v||, scaled norm of the solution w.
2484 * -----------------------------------------------------------------|
2485 * The `hump' is a measure of the conditioning of the problem. The
2486 * matrix exponential is well-conditioned if hump = 1, whereas it is
2487 * poorly-conditioned if hump >> 1. However the solution can still be
2488 * relatively fairly accurate even when the hump is large (the hump
2489 * is an upper bound), especially when the hump and the scaled norm
2490 * of w [this is also computed and returned in wsp(10)] are of the
2491 * same order of magnitude (further details in reference below).
2493 *----------------------------------------------------------------------|
2494 *-----The following parameters may also be adjusted herein-------------|
2496 integer mxstep, mxreject, ideg
2497 double precision delta, gamma
2498 parameter( mxstep = 500,
2499 . mxreject = 0,
2500 . ideg = 6,
2501 . delta = 1.2d0,
2502 . gamma = 0.9d0 )
2504 * mxstep : maximum allowable number of integration steps.
2505 * The value 0 means an infinite number of steps.
2507 * mxreject: maximum allowable number of rejections at each step.
2508 * The value 0 means an infinite number of rejections.
2510 * ideg : the Pade approximation of type (ideg,ideg) is used as
2511 * an approximation to exp(H). The value 0 switches to the
2512 * uniform rational Chebyshev approximation of type (14,14)
2514 * delta : local truncation error `safety factor'
2516 * gamma : stepsize `shrinking factor'
2518 *----------------------------------------------------------------------|
2519 * Roger B. Sidje (rbs@maths.uq.edu.au)
2520 * EXPOKIT: Software Package for Computing Matrix Exponentials.
2521 * ACM - Transactions On Mathematical Software, 24(1):130-156, 1998
2522 *----------------------------------------------------------------------|
2524 complex*16 ZERO, ONE
2525 parameter( ZERO=(0.0d0,0.0d0), ONE=(1.0d0,0.0d0) )
2526 integer i, j, k1, mh, mx, iv, ih, j1v, ns, ifree, lfree, iexph,
2527 . ireject,ibrkflag,mbrkdwn, nmult, nreject, nexph, nscale,
2528 . nstep
2529 double precision sgn, t_out, tbrkdwn, step_min,step_max, err_loc,
2530 . s_error, x_error, t_now, t_new, t_step, t_old,
2531 . xm, beta, break_tol, p1, p2, p3, eps, rndoff,
2532 . vnorm, avnorm, hj1j, hump, SQR1
2533 complex*16 hij
2535 intrinsic AINT,ABS,CMPLX,DBLE,INT,LOG10,MAX,MIN,NINT,SIGN,SQRT
2536 complex*16 ZDOTC
2537 double precision DZNRM2
2539 *--- check restrictions on input parameters ...
2541 iflag = 0
2542 if ( lwsp.lt.n*(m+2)+5*(m+2)**2+ideg+1 ) iflag = -1
2543 if ( liwsp.lt.m+2 ) iflag = -2
2544 if ( m.ge.n .or. m.le.0 ) iflag = -3
2545 if ( iflag.ne.0 ) stop 'bad sizes (in input of ZGEXPV)'
2547 *--- initialisations ...
2549 k1 = 2
2550 mh = m + 2
2551 iv = 1
2552 ih = iv + n*(m+1) + n
2553 ifree = ih + mh*mh
2554 lfree = lwsp - ifree + 1
2556 ibrkflag = 0
2557 mbrkdwn = m
2558 nmult = 0
2559 nreject = 0
2560 nexph = 0
2561 nscale = 0
2563 t_out = ABS( t )
2564 tbrkdwn = 0.0d0
2565 step_min = t_out
2566 step_max = 0.0d0
2567 nstep = 0
2568 s_error = 0.0d0
2569 x_error = 0.0d0
2570 t_now = 0.0d0
2571 t_new = 0.0d0
2573 p1 = 4.0d0/3.0d0
2574 1 p2 = p1 - 1.0d0
2575 p3 = p2 + p2 + p2
2576 eps = ABS( p3-1.0d0 )
2577 if ( eps.eq.0.0d0 ) go to 1
2578 if ( tol.le.eps ) tol = SQRT( eps )
2579 rndoff = eps*anorm
2581 break_tol = 1.0d-7
2582 *>>> break_tol = tol
2583 *>>> break_tol = anorm*tol
2585 sgn = SIGN( 1.0d0,t )
2586 call ZCOPY( n, v,1, w,1 )
2587 beta = DZNRM2( n, w,1 )
2588 vnorm = beta
2589 hump = beta
2591 *--- obtain the very first stepsize ...
2593 SQR1 = SQRT( 0.1d0 )
2594 xm = 1.0d0/DBLE( m )
2595 p2 = tol*(((m+1)/2.72D0)**(m+1))*SQRT(2.0D0*3.14D0*(m+1))
2596 t_new = (1.0d0/anorm)*(p2/(4.0d0*beta*anorm))**xm
2597 p1 = 10.0d0**(NINT( LOG10( t_new )-SQR1 )-1)
2598 t_new = AINT( t_new/p1 + 0.55d0 ) * p1
2600 *--- step-by-step integration ...
2602 100 if ( t_now.ge.t_out ) goto 500
2604 nstep = nstep + 1
2605 t_step = MIN( t_out-t_now, t_new )
2606 p1 = 1.0d0/beta
2607 do i = 1,n
2608 wsp(iv + i-1) = p1*w(i)
2609 enddo
2610 do i = 1,mh*mh
2611 wsp(ih+i-1) = ZERO
2612 enddo
2614 *--- Arnoldi loop ...
2616 j1v = iv + n
2617 do 200 j = 1,m
2618 nmult = nmult + 1
2619 call matvec( wsp(j1v-n), wsp(j1v) )
2620 do i = 1,j
2621 hij = ZDOTC( n, wsp(iv+(i-1)*n),1, wsp(j1v),1 )
2622 call ZAXPY( n, -hij, wsp(iv+(i-1)*n),1, wsp(j1v),1 )
2623 wsp(ih+(j-1)*mh+i-1) = hij
2624 enddo
2625 hj1j = DZNRM2( n, wsp(j1v),1 )
2626 *--- if `happy breakdown' go straightforward at the end ...
2627 if ( hj1j.le.break_tol ) then
2628 print*,'happy breakdown: mbrkdwn =',j,' h =',hj1j
2629 k1 = 0
2630 ibrkflag = 1
2631 mbrkdwn = j
2632 tbrkdwn = t_now
2633 t_step = t_out-t_now
2634 goto 300
2635 endif
2636 wsp(ih+(j-1)*mh+j) = CMPLX( hj1j )
2637 call ZDSCAL( n, 1.0d0/hj1j, wsp(j1v),1 )
2638 j1v = j1v + n
2639 200 continue
2640 nmult = nmult + 1
2641 call matvec( wsp(j1v-n), wsp(j1v) )
2642 avnorm = DZNRM2( n, wsp(j1v),1 )
2644 *--- set 1 for the 2-corrected scheme ...
2646 300 continue
2647 wsp(ih+m*mh+m+1) = ONE
2649 *--- loop while ireject<mxreject until the tolerance is reached ...
2651 ireject = 0
2652 401 continue
2654 *--- compute w = beta*V*exp(t_step*H)*e1 ...
2656 nexph = nexph + 1
2657 mx = mbrkdwn + k1
2658 if ( ideg.ne.0 ) then
2659 *--- irreducible rational Pade approximation ...
2660 call ZGPADM( ideg, mx, sgn*t_step, wsp(ih),mh,
2661 . wsp(ifree),lfree, iwsp, iexph, ns, iflag )
2662 iexph = ifree + iexph - 1
2663 nscale = nscale + ns
2664 else
2665 *--- uniform rational Chebyshev approximation ...
2666 iexph = ifree
2667 do i = 1,mx
2668 wsp(iexph+i-1) = ZERO
2669 enddo
2670 wsp(iexph) = ONE
2671 call ZNCHBV(mx,sgn*t_step,wsp(ih),mh,wsp(iexph),wsp(ifree+mx))
2672 endif
2673 402 continue
2675 *--- error estimate ...
2677 if ( k1.eq.0 ) then
2678 err_loc = tol
2679 else
2680 p1 = ABS( wsp(iexph+m) ) * beta
2681 p2 = ABS( wsp(iexph+m+1) ) * beta * avnorm
2682 if ( p1.gt.10.0d0*p2 ) then
2683 err_loc = p2
2684 xm = 1.0d0/DBLE( m )
2685 elseif ( p1.gt.p2 ) then
2686 err_loc = (p1*p2)/(p1-p2)
2687 xm = 1.0d0/DBLE( m )
2688 else
2689 err_loc = p1
2690 xm = 1.0d0/DBLE( m-1 )
2691 endif
2692 endif
2694 *--- reject the step-size if the error is not acceptable ...
2696 if ( (k1.ne.0) .and. (err_loc.gt.delta*t_step*tol) .and.
2697 . (mxreject.eq.0 .or. ireject.lt.mxreject) ) then
2698 t_old = t_step
2699 t_step = gamma * t_step * (t_step*tol/err_loc)**xm
2700 p1 = 10.0d0**(NINT( LOG10( t_step )-SQR1 )-1)
2701 t_step = AINT( t_step/p1 + 0.55d0 ) * p1
2702 if ( itrace.ne.0 ) then
2703 print*,'t_step =',t_old
2704 print*,'err_loc =',err_loc
2705 print*,'err_required =',delta*t_old*tol
2706 print*,'stepsize rejected, stepping down to:',t_step
2707 endif
2708 ireject = ireject + 1
2709 nreject = nreject + 1
2710 if ( mxreject.ne.0 .and. ireject.gt.mxreject ) then
2711 print*,"Failure in ZGEXPV: ---"
2712 print*,"The requested tolerance is too high."
2713 Print*,"Rerun with a smaller value."
2714 iflag = 2
2715 return
2716 endif
2717 goto 401
2718 endif
2720 *--- now update w = beta*V*exp(t_step*H)*e1 and the hump ...
2722 mx = mbrkdwn + MAX( 0,k1-1 )
2723 hij = CMPLX( beta )
2724 call ZGEMV( 'n', n,mx,hij,wsp(iv),n,wsp(iexph),1,ZERO,w,1 )
2725 beta = DZNRM2( n, w,1 )
2726 hump = MAX( hump, beta )
2728 *--- suggested value for the next stepsize ...
2730 t_new = gamma * t_step * (t_step*tol/err_loc)**xm
2731 p1 = 10.0d0**(NINT( LOG10( t_new )-SQR1 )-1)
2732 t_new = AINT( t_new/p1 + 0.55d0 ) * p1
2734 err_loc = MAX( err_loc,rndoff )
2736 *--- update the time covered ...
2738 t_now = t_now + t_step
2740 *--- display and keep some information ...
2742 if ( itrace.ne.0 ) then
2743 print*,'integration',nstep,'---------------------------------'
2744 print*,'scale-square =',ns
2745 print*,'step_size =',t_step
2746 print*,'err_loc =',err_loc
2747 print*,'next_step =',t_new
2748 endif
2750 step_min = MIN( step_min, t_step )
2751 step_max = MAX( step_max, t_step )
2752 s_error = s_error + err_loc
2753 x_error = MAX( x_error, err_loc )
2755 if ( mxstep.eq.0 .or. nstep.lt.mxstep ) goto 100
2756 iflag = 1
2758 500 continue
2760 iwsp(1) = nmult
2761 iwsp(2) = nexph
2762 iwsp(3) = nscale
2763 iwsp(4) = nstep
2764 iwsp(5) = nreject
2765 iwsp(6) = ibrkflag
2766 iwsp(7) = mbrkdwn
2768 wsp(1) = CMPLX( step_min )
2769 wsp(2) = CMPLX( step_max )
2770 wsp(3) = CMPLX( 0.0d0 )
2771 wsp(4) = CMPLX( 0.0d0 )
2772 wsp(5) = CMPLX( x_error )
2773 wsp(6) = CMPLX( s_error )
2774 wsp(7) = CMPLX( tbrkdwn )
2775 wsp(8) = CMPLX( sgn*t_now )
2776 wsp(9) = CMPLX( hump/vnorm )
2777 wsp(10) = CMPLX( beta/vnorm )
2779 *----------------------------------------------------------------------|
2780 *----------------------------------------------------------------------|
2781 subroutine ZHEXPV( n, m, t, v, w, tol, anorm,
2782 . wsp,lwsp, iwsp,liwsp, matvec, itrace,iflag )
2784 implicit none
2785 integer n, m, lwsp, liwsp, itrace, iflag, iwsp(liwsp)
2786 double precision t, tol, anorm
2787 complex*16 v(n), w(n), wsp(lwsp)
2788 external matvec
2790 *-----Purpose----------------------------------------------------------|
2792 *--- ZHEXPV computes w = exp(t*A)*v for a Hermitian matrix A.
2794 * It does not compute the matrix exponential in isolation but
2795 * instead, it computes directly the action of the exponential
2796 * operator on the operand vector. This way of doing so allows
2797 * for addressing large sparse problems.
2799 * The method used is based on Krylov subspace projection
2800 * techniques and the matrix under consideration interacts only
2801 * via the external routine `matvec' performing the matrix-vector
2802 * product (matrix-free method).
2804 *-----Arguments--------------------------------------------------------|
2806 * n : (input) order of the principal matrix A.
2808 * m : (input) maximum size for the Krylov basis.
2810 * t : (input) time at wich the solution is needed (can be < 0).
2812 * v(n) : (input) given operand vector.
2814 * w(n) : (output) computed approximation of exp(t*A)*v.
2816 * tol : (input/output) the requested accuracy tolerance on w.
2817 * If on input tol=0.0d0 or tol is too small (tol.le.eps)
2818 * the internal value sqrt(eps) is used, and tol is set to
2819 * sqrt(eps) on output (`eps' denotes the machine epsilon).
2820 * (`Happy breakdown' is assumed if h(j+1,j) .le. anorm*tol)
2822 * anorm : (input) an approximation of some norm of A.
2824 * wsp(lwsp): (workspace) lwsp .ge. n*(m+1)+n+(m+2)^2+4*(m+2)^2+ideg+1
2825 * +---------+-------+---------------+
2826 * (actually, ideg=6) V H wsp for PADE
2828 * iwsp(liwsp): (workspace) liwsp .ge. m+2
2830 * matvec : external subroutine for matrix-vector multiplication.
2831 * synopsis: matvec( x, y )
2832 * complex*16 x(*), y(*)
2833 * computes: y(1:n) <- A*x(1:n)
2834 * where A is the principal matrix.
2836 * itrace : (input) running mode. 0=silent, 1=print step-by-step info
2838 * iflag : (output) exit flag.
2839 * <0 - bad input arguments
2840 * 0 - no problem
2841 * 1 - maximum number of steps reached without convergence
2842 * 2 - requested tolerance was too high
2844 *-----Accounts on the computation--------------------------------------|
2845 * Upon exit, an interested user may retrieve accounts on the
2846 * computations. They are located in the workspace arrays wsp and
2847 * iwsp as indicated below:
2849 * location mnemonic description
2850 * -----------------------------------------------------------------|
2851 * iwsp(1) = nmult, number of matrix-vector multiplications used
2852 * iwsp(2) = nexph, number of Hessenberg matrix exponential evaluated
2853 * iwsp(3) = nscale, number of repeated squaring involved in Pade
2854 * iwsp(4) = nstep, number of integration steps used up to completion
2855 * iwsp(5) = nreject, number of rejected step-sizes
2856 * iwsp(6) = ibrkflag, set to 1 if `happy breakdown' and 0 otherwise
2857 * iwsp(7) = mbrkdwn, if `happy brkdown', basis-size when it occured
2858 * -----------------------------------------------------------------|
2859 * wsp(1) = step_min, minimum step-size used during integration
2860 * wsp(2) = step_max, maximum step-size used during integration
2861 * wsp(3) = dummy
2862 * wsp(4) = dummy
2863 * wsp(5) = x_error, maximum among all local truncation errors
2864 * wsp(6) = s_error, global sum of local truncation errors
2865 * wsp(7) = tbrkdwn, if `happy breakdown', time when it occured
2866 * wsp(8) = t_now, integration domain successfully covered
2867 * wsp(9) = hump, i.e., max||exp(sA)||, s in [0,t] (or [t,0] if t<0)
2868 * wsp(10) = ||w||/||v||, scaled norm of the solution w.
2869 * -----------------------------------------------------------------|
2870 * The `hump' is a measure of the conditioning of the problem. The
2871 * matrix exponential is well-conditioned if hump = 1, whereas it is
2872 * poorly-conditioned if hump >> 1. However the solution can still be
2873 * relatively fairly accurate even when the hump is large (the hump
2874 * is an upper bound), especially when the hump and the scaled norm
2875 * of w [this is also computed and returned in wsp(10)] are of the
2876 * same order of magnitude (further details in reference below).
2878 *----------------------------------------------------------------------|
2879 *-----The following parameters may also be adjusted herein-------------|
2881 integer mxstep, mxreject, ideg
2882 double precision delta, gamma
2883 parameter( mxstep = 500,
2884 . mxreject = 0,
2885 . ideg = 6,
2886 . delta = 1.2d0,
2887 . gamma = 0.9d0 )
2889 * mxstep : maximum allowable number of integration steps.
2890 * The value 0 means an infinite number of steps.
2892 * mxreject: maximum allowable number of rejections at each step.
2893 * The value 0 means an infinite number of rejections.
2895 * ideg : the Pade approximation of type (ideg,ideg) is used as
2896 * an approximation to exp(H). The value 0 switches to the
2897 * uniform rational Chebyshev approximation of type (14,14)
2899 * delta : local truncation error `safety factor'
2901 * gamma : stepsize `shrinking factor'
2903 *----------------------------------------------------------------------|
2904 * Roger B. Sidje (rbs@maths.uq.edu.au)
2905 * EXPOKIT: Software Package for Computing Matrix Exponentials.
2906 * ACM - Transactions On Mathematical Software, 24(1):130-156, 1998
2907 *----------------------------------------------------------------------|
2909 complex*16 ZERO, ONE
2910 parameter( ZERO=(0.0d0,0.0d0), ONE=(1.0d0,0.0d0) )
2911 integer i, j, k1, mh, mx, iv, ih, j1v, ns, ifree, lfree, iexph,
2912 . ireject,ibrkflag,mbrkdwn, nmult, nreject, nexph, nscale,
2913 . nstep
2914 double precision sgn, t_out, tbrkdwn, step_min,step_max, err_loc,
2915 . s_error, x_error, t_now, t_new, t_step, t_old,
2916 . xm, beta, break_tol, p1, p2, p3, eps, rndoff,
2917 . vnorm, avnorm, hj1j, hump, SQR1
2918 complex*16 hjj
2920 intrinsic AINT,ABS,CMPLX,DBLE,INT,LOG10,MAX,MIN,NINT,SIGN,SQRT
2921 complex*16 ZDOTC
2922 double precision DZNRM2
2924 *--- check restrictions on input parameters ...
2926 iflag = 0
2927 if ( lwsp.lt.n*(m+2)+5*(m+2)**2+ideg+1 ) iflag = -1
2928 if ( liwsp.lt.m+2 ) iflag = -2
2929 if ( m.ge.n .or. m.le.0 ) iflag = -3
2930 if ( iflag.ne.0 ) stop 'bad sizes (in input of DHEXPV)'
2932 *--- initialisations ...
2934 k1 = 2
2935 mh = m + 2
2936 iv = 1
2937 ih = iv + n*(m+1) + n
2938 ifree = ih + mh*mh
2939 lfree = lwsp - ifree + 1
2941 ibrkflag = 0
2942 mbrkdwn = m
2943 nmult = 0
2944 nreject = 0
2945 nexph = 0
2946 nscale = 0
2948 t_out = ABS( t )
2949 tbrkdwn = 0.0d0
2950 step_min = t_out
2951 step_max = 0.0d0
2952 nstep = 0
2953 s_error = 0.0d0
2954 x_error = 0.0d0
2955 t_now = 0.0d0
2956 t_new = 0.0d0
2958 p1 = 4.0d0/3.0d0
2959 1 p2 = p1 - 1.0d0
2960 p3 = p2 + p2 + p2
2961 eps = ABS( p3-1.0d0 )
2962 if ( eps.eq.0.0d0 ) go to 1
2963 if ( tol.le.eps ) tol = SQRT( eps )
2964 rndoff = eps*anorm
2966 break_tol = 1.0d-7
2967 *>>> break_tol = tol
2968 *>>> break_tol = anorm*tol
2970 sgn = SIGN( 1.0d0,t )
2971 call ZCOPY( n, v,1, w,1 )
2972 beta = DZNRM2( n, w,1 )
2973 vnorm = beta
2974 hump = beta
2976 *--- obtain the very first stepsize ...
2978 SQR1 = SQRT( 0.1d0 )
2979 xm = 1.0d0/DBLE( m )
2980 p2 = tol*(((m+1)/2.72D0)**(m+1))*SQRT(2.0D0*3.14D0*(m+1))
2981 t_new = (1.0d0/anorm)*(p2/(4.0d0*beta*anorm))**xm
2982 p1 = 10.0d0**(NINT( LOG10( t_new )-SQR1 )-1)
2983 t_new = AINT( t_new/p1 + 0.55d0 ) * p1
2985 *--- step-by-step integration ...
2987 100 if ( t_now.ge.t_out ) goto 500
2989 nstep = nstep + 1
2990 t_step = MIN( t_out-t_now, t_new )
2991 beta = DZNRM2( n, w,1 )
2992 p1 = 1.0d0/beta
2993 do i = 1,n
2994 wsp(iv + i-1) = p1*w(i)
2995 enddo
2996 do i = 1,mh*mh
2997 wsp(ih+i-1) = ZERO
2998 enddo
3000 *--- Lanczos loop ...
3002 j1v = iv + n
3003 do 200 j = 1,m
3004 nmult = nmult + 1
3005 call matvec( wsp(j1v-n), wsp(j1v) )
3006 if ( j.gt.1 )
3007 . call ZAXPY(n,-wsp(ih+(j-1)*mh+j-2),wsp(j1v-2*n),1,wsp(j1v),1)
3008 hjj = ZDOTC( n, wsp(j1v-n),1, wsp(j1v),1 )
3009 call ZAXPY( n, -hjj, wsp(j1v-n),1, wsp(j1v),1 )
3010 hj1j = DZNRM2( n, wsp(j1v),1 )
3011 wsp(ih+(j-1)*(mh+1)) = hjj
3012 *--- if `happy breakdown' go straightforward at the end ...
3013 if ( hj1j.le.break_tol ) then
3014 print*,'happy breakdown: mbrkdwn =',j,' h =',hj1j
3015 k1 = 0
3016 ibrkflag = 1
3017 mbrkdwn = j
3018 tbrkdwn = t_now
3019 t_step = t_out-t_now
3020 goto 300
3021 endif
3022 wsp(ih+(j-1)*mh+j) = CMPLX( hj1j )
3023 wsp(ih+j*mh+j-1) = CMPLX( hj1j )
3024 call ZDSCAL( n, 1.0d0/hj1j, wsp(j1v),1 )
3025 j1v = j1v + n
3026 200 continue
3027 nmult = nmult + 1
3028 call matvec( wsp(j1v-n), wsp(j1v) )
3029 avnorm = DZNRM2( n, wsp(j1v),1 )
3031 *--- set 1 for the 2-corrected scheme ...
3033 300 continue
3034 wsp(ih+m*mh+m-1) = ZERO
3035 wsp(ih+m*mh+m+1) = ONE
3037 *--- loop while ireject<mxreject until the tolerance is reached ...
3039 ireject = 0
3040 401 continue
3042 *--- compute w = beta*V*exp(t_step*H)*e1 ...
3044 nexph = nexph + 1
3045 mx = mbrkdwn + k1
3046 if ( ideg.ne.0 ) then
3047 *--- irreducible rational Pade approximation ...
3048 call ZGPADM( ideg, mx, sgn*t_step, wsp(ih),mh,
3049 . wsp(ifree),lfree, iwsp, iexph, ns, iflag )
3050 iexph = ifree + iexph - 1
3051 nscale = nscale + ns
3052 else
3053 *--- uniform rational Chebyshev approximation ...
3054 iexph = ifree
3055 do i = 1,mx
3056 wsp(iexph+i-1) = ZERO
3057 enddo
3058 wsp(iexph) = ONE
3059 call ZNCHBV(mx,sgn*t_step,wsp(ih),mh,wsp(iexph),wsp(ifree+mx))
3060 endif
3062 402 continue
3064 *--- error estimate ...
3066 if ( k1.eq.0 ) then
3067 err_loc = tol
3068 else
3069 p1 = ABS( wsp(iexph+m) ) * beta
3070 p2 = ABS( wsp(iexph+m+1) ) * beta * avnorm
3071 if ( p1.gt.10.0d0*p2 ) then
3072 err_loc = p2
3073 xm = 1.0d0/DBLE( m )
3074 elseif ( p1.gt.p2 ) then
3075 err_loc = (p1*p2)/(p1-p2)
3076 xm = 1.0d0/DBLE( m )
3077 else
3078 err_loc = p1
3079 xm = 1.0d0/DBLE( m-1 )
3080 endif
3081 endif
3083 *--- reject the step-size if the error is not acceptable ...
3085 if ( (k1.ne.0) .and. (err_loc.gt.delta*t_step*tol) .and.
3086 . (mxreject.eq.0 .or. ireject.lt.mxreject) ) then
3087 t_old = t_step
3088 t_step = gamma * t_step * (t_step*tol/err_loc)**xm
3089 p1 = 10.0d0**(NINT( LOG10( t_step )-SQR1 )-1)
3090 t_step = AINT( t_step/p1 + 0.55d0 ) * p1
3091 if ( itrace.ne.0 ) then
3092 print*,'t_step =',t_old
3093 print*,'err_loc =',err_loc
3094 print*,'err_required =',delta*t_old*tol
3095 print*,'stepsize rejected, stepping down to:',t_step
3096 endif
3097 ireject = ireject + 1
3098 nreject = nreject + 1
3099 if ( mxreject.ne.0 .and. ireject.gt.mxreject ) then
3100 print*,"Failure in ZHEXPV: ---"
3101 print*,"The requested tolerance is too high."
3102 Print*,"Rerun with a smaller value."
3103 iflag = 2
3104 return
3105 endif
3106 goto 401
3107 endif
3109 *--- now update w = beta*V*exp(t_step*H)*e1 and the hump ...
3111 mx = mbrkdwn + MAX( 0,k1-1 )
3112 hjj = CMPLX( beta )
3113 call ZGEMV( 'n', n,mx,hjj,wsp(iv),n,wsp(iexph),1,ZERO,w,1 )
3114 beta = DZNRM2( n, w,1 )
3115 hump = MAX( hump, beta )
3117 *--- suggested value for the next stepsize ...
3119 t_new = gamma * t_step * (t_step*tol/err_loc)**xm
3120 p1 = 10.0d0**(NINT( LOG10( t_new )-SQR1 )-1)
3121 t_new = AINT( t_new/p1 + 0.55d0 ) * p1
3123 err_loc = MAX( err_loc,rndoff )
3125 *--- update the time covered ...
3127 t_now = t_now + t_step
3129 *--- display and keep some information ...
3131 if ( itrace.ne.0 ) then
3132 print*,'integration',nstep,'---------------------------------'
3133 print*,'scale-square =',ns
3134 print*,'step_size =',t_step
3135 print*,'err_loc =',err_loc
3136 print*,'next_step =',t_new
3137 endif
3139 step_min = MIN( step_min, t_step )
3140 step_max = MAX( step_max, t_step )
3141 s_error = s_error + err_loc
3142 x_error = MAX( x_error, err_loc )
3144 if ( mxstep.eq.0 .or. nstep.lt.mxstep ) goto 100
3145 iflag = 1
3147 500 continue
3149 iwsp(1) = nmult
3150 iwsp(2) = nexph
3151 iwsp(3) = nscale
3152 iwsp(4) = nstep
3153 iwsp(5) = nreject
3154 iwsp(6) = ibrkflag
3155 iwsp(7) = mbrkdwn
3157 wsp(1) = CMPLX( step_min )
3158 wsp(2) = CMPLX( step_max )
3159 wsp(3) = CMPLX( 0.0d0 )
3160 wsp(4) = CMPLX( 0.0d0 )
3161 wsp(5) = CMPLX( x_error )
3162 wsp(6) = CMPLX( s_error )
3163 wsp(7) = CMPLX( tbrkdwn )
3164 wsp(8) = CMPLX( sgn*t_now )
3165 wsp(9) = CMPLX( hump/vnorm )
3166 wsp(10) = CMPLX( beta/vnorm )
3168 *----------------------------------------------------------------------|
3169 *----------------------------------------------------------------------|
3170 subroutine DGPHIV( n, m, t, u, v, w, tol, anorm,
3171 . wsp,lwsp, iwsp,liwsp, matvec, itrace,iflag )
3173 implicit none
3174 integer n, m, lwsp, liwsp, itrace, iflag, iwsp(liwsp)
3175 double precision t, tol, anorm, u(n), v(n), w(n), wsp(lwsp)
3176 external matvec
3178 *-----Purpose----------------------------------------------------------|
3180 *--- DGPHIV computes w = exp(t*A)v + t*phi(tA)u which is the solution
3181 * of the nonhomogeneous linear ODE problem w' = Aw + u, w(0) = v.
3182 * phi(z) = (exp(z)-1)/z and A is a General matrix.
3184 * The method used is based on Krylov subspace projection
3185 * techniques and the matrix under consideration interacts only
3186 * via the external routine `matvec' performing the matrix-vector
3187 * product (matrix-free method).
3189 *-----Arguments--------------------------------------------------------|
3191 * n : (input) order of the principal matrix A.
3193 * m : (input) maximum size for the Krylov basis.
3195 * t : (input) time at wich the solution is needed (can be < 0).
3197 * u(n) : (input) operand vector with respect to the phi function
3198 * (forcing term of the ODE problem).
3200 * v(n) : (input) operand vector with respect to the exp function
3201 * (initial condition of the ODE problem).
3203 * w(n) : (output) computed approximation of exp(t*A)v + t*phi(tA)u
3205 * tol : (input/output) the requested accuracy tolerance on w.
3206 * If on input tol=0.0d0 or tol is too small (tol.le.eps)
3207 * the internal value sqrt(eps) is used, and tol is set to
3208 * sqrt(eps) on output (`eps' denotes the machine epsilon).
3209 * (`Happy breakdown' is assumed if h(j+1,j) .le. anorm*tol)
3211 * anorm : (input) an approximation of some norm of A.
3213 * wsp(lwsp): (workspace) lwsp .ge. n*(m+1)+n+(m+3)^2+4*(m+3)^2+ideg+1
3214 * +---------+-------+---------------+
3215 * (actually, ideg=6) V H wsp for PADE
3217 * iwsp(liwsp): (workspace) liwsp .ge. m+3
3219 * matvec : external subroutine for matrix-vector multiplication.
3220 * synopsis: matvec( x, y )
3221 * double precision x(*), y(*)
3222 * computes: y(1:n) <- A*x(1:n)
3223 * where A is the principal matrix.
3225 * itrace : (input) running mode. 0=silent, 1=print step-by-step info
3227 * iflag : (output) exit flag.
3228 * <0 - bad input arguments
3229 * 0 - no problem
3230 * 1 - maximum number of steps reached without convergence
3231 * 2 - requested tolerance was too high
3233 *-----Accounts on the computation--------------------------------------|
3234 * Upon exit, an interested user may retrieve accounts on the
3235 * computations. They are located in the workspace arrays wsp and
3236 * iwsp as indicated below:
3238 * location mnemonic description
3239 * -----------------------------------------------------------------|
3240 * iwsp(1) = nmult, number of matrix-vector multiplications used
3241 * iwsp(2) = nexph, number of Hessenberg matrix exponential evaluated
3242 * iwsp(3) = nscale, number of repeated squaring involved in Pade
3243 * iwsp(4) = nstep, number of integration steps used up to completion
3244 * iwsp(5) = nreject, number of rejected step-sizes
3245 * iwsp(6) = ibrkflag, set to 1 if `happy breakdown' and 0 otherwise
3246 * iwsp(7) = mbrkdwn, if `happy brkdown', basis-size when it occured
3247 * -----------------------------------------------------------------|
3248 * wsp(1) = step_min, minimum step-size used during integration
3249 * wsp(2) = step_max, maximum step-size used during integration
3250 * wsp(3) = dummy
3251 * wsp(4) = dummy
3252 * wsp(5) = x_error, maximum among all local truncation errors
3253 * wsp(6) = s_error, global sum of local truncation errors
3254 * wsp(7) = tbrkdwn, if `happy breakdown', time when it occured
3255 * wsp(8) = t_now, integration domain successfully covered
3257 *----------------------------------------------------------------------|
3258 *-----The following parameters may also be adjusted herein-------------|
3260 integer mxstep, mxreject, ideg
3261 double precision delta, gamma
3262 parameter( mxstep = 1000,
3263 . mxreject = 0,
3264 . ideg = 6,
3265 . delta = 1.2d0,
3266 . gamma = 0.9d0 )
3268 * mxstep : maximum allowable number of integration steps.
3269 * The value 0 means an infinite number of steps.
3271 * mxreject: maximum allowable number of rejections at each step.
3272 * The value 0 means an infinite number of rejections.
3274 * ideg : the Pade approximation of type (ideg,ideg) is used as
3275 * an approximation to exp(H).
3277 * delta : local truncation error `safety factor'
3279 * gamma : stepsize `shrinking factor'
3281 *----------------------------------------------------------------------|
3282 * Roger B. Sidje (rbs@maths.uq.edu.au)
3283 * EXPOKIT: Software Package for Computing Matrix Exponentials.
3284 * ACM - Transactions On Mathematical Software, 24(1):130-156, 1998
3285 *----------------------------------------------------------------------|
3287 integer i, j, k1, mh, mx, iv, ih, j1v, ns, ifree, lfree, iexph,
3288 . ireject,ibrkflag,mbrkdwn, nmult, nreject, nexph, nscale,
3289 . nstep, iphih
3290 double precision sgn, t_out, tbrkdwn, step_min,step_max, err_loc,
3291 . s_error, x_error, t_now, t_new, t_step, t_old,
3292 . xm, beta, break_tol, p1, p2, p3, eps, rndoff,
3293 . avnorm, hj1j, hij, SQR1
3295 intrinsic AINT,ABS,DBLE,LOG10,MAX,MIN,NINT,SIGN,SQRT
3296 double precision DDOT, DNRM2
3298 *--- check restrictions on input parameters ...
3299 iflag = 0
3300 if ( lwsp.lt.n*(m+3)+5*(m+3)**2+ideg+1 ) iflag = -1
3301 if ( liwsp.lt.m+3 ) iflag = -2
3302 if ( m.ge.n .or. m.le.0 ) iflag = -3
3303 if ( iflag.ne.0 ) stop 'bad sizes (in input of DGPHIV)'
3305 *--- initialisations ...
3307 k1 = 3
3308 mh = m + 3
3309 iv = 1
3310 ih = iv + n*(m+1) + n
3311 ifree = ih + mh*mh
3312 lfree = lwsp - ifree + 1
3314 ibrkflag = 0
3315 mbrkdwn = m
3316 nmult = 0
3317 nreject = 0
3318 nexph = 0
3319 nscale = 0
3321 t_out = ABS( t )
3322 tbrkdwn = 0.0d0
3323 step_min = t_out
3324 step_max = 0.0d0
3325 nstep = 0
3326 s_error = 0.0d0
3327 x_error = 0.0d0
3328 t_now = 0.0d0
3329 t_new = 0.0d0
3331 p1 = 4.0d0/3.0d0
3332 1 p2 = p1 - 1.0d0
3333 p3 = p2 + p2 + p2
3334 eps = ABS( p3-1.0d0 )
3335 if ( eps.eq.0.0d0 ) go to 1
3336 if ( tol.le.eps ) tol = SQRT( eps )
3337 rndoff = eps*anorm
3339 break_tol = 1.0d-7
3340 *>>> break_tol = tol
3341 *>>> break_tol = anorm*tol
3344 *--- step-by-step integration ...
3346 sgn = SIGN( 1.0d0,t )
3347 SQR1 = SQRT( 0.1d0 )
3348 call DCOPY( n, v,1, w,1 )
3350 100 if ( t_now.ge.t_out ) goto 500
3352 nmult = nmult + 1
3353 call matvec( w, wsp(iv) )
3354 call DAXPY( n, 1.0d0, u,1, wsp(iv),1 )
3355 beta = DNRM2( n, wsp(iv),1 )
3356 if ( beta.eq.0.0d0 ) goto 500
3357 call DSCAL( n, 1.0d0/beta, wsp(iv),1 )
3358 do i = 1,mh*mh
3359 wsp(ih+i-1) = 0.0d0
3360 enddo
3362 if ( nstep.eq.0 ) then
3363 *--- obtain the very first stepsize ...
3364 xm = 1.0d0/DBLE( m )
3365 p1 = tol*(((m+1)/2.72D0)**(m+1))*SQRT(2.0D0*3.14D0*(m+1))
3366 t_new = (1.0d0/anorm)*(p1/(4.0d0*beta*anorm))**xm
3367 p1 = 10.0d0**(NINT( LOG10( t_new )-SQR1 )-1)
3368 t_new = AINT( t_new/p1 + 0.55d0 ) * p1
3369 endif
3370 nstep = nstep + 1
3371 t_step = MIN( t_out-t_now, t_new )
3373 *--- Arnoldi loop ...
3375 j1v = iv + n
3376 do 200 j = 1,m
3377 nmult = nmult + 1
3378 call matvec( wsp(j1v-n), wsp(j1v) )
3379 do i = 1,j
3380 hij = DDOT( n, wsp(iv+(i-1)*n),1, wsp(j1v),1 )
3381 call DAXPY( n, -hij, wsp(iv+(i-1)*n),1, wsp(j1v),1 )
3382 wsp(ih+(j-1)*mh+i-1) = hij
3383 enddo
3384 hj1j = DNRM2( n, wsp(j1v),1 )
3385 *--- if `happy breakdown' go straightforward at the end ...
3386 if ( hj1j.le.break_tol ) then
3387 print*,'happy breakdown: mbrkdwn =',j,' h =',hj1j
3388 k1 = 0
3389 ibrkflag = 1
3390 mbrkdwn = j
3391 tbrkdwn = t_now
3392 t_step = t_out-t_now
3393 goto 300
3394 endif
3395 wsp(ih+(j-1)*mh+j) = hj1j
3396 call DSCAL( n, 1.0d0/hj1j, wsp(j1v),1 )
3397 j1v = j1v + n
3398 200 continue
3399 nmult = nmult + 1
3400 call matvec( wsp(j1v-n), wsp(j1v) )
3401 avnorm = DNRM2( n, wsp(j1v),1 )
3403 *--- set 1's for the 3-extended scheme ...
3405 300 continue
3406 wsp(ih+mh*mbrkdwn) = 1.0d0
3407 wsp(ih+(m-1)*mh+m) = 0.0d0
3408 do i = 1,k1-1
3409 wsp(ih+(m+i)*mh+m+i-1) = 1.0d0
3410 enddo
3412 *--- loop while ireject<mxreject until the tolerance is reached ...
3414 ireject = 0
3415 401 continue
3417 *--- compute w = beta*t_step*V*phi(t_step*H)*e1 + w
3419 nexph = nexph + 1
3420 *--- irreducible rational Pade approximation ...
3421 mx = mbrkdwn + MAX( 1,k1 )
3422 call DGPADM( ideg, mx, sgn*t_step, wsp(ih),mh,
3423 . wsp(ifree),lfree, iwsp, iexph, ns, iflag )
3424 iexph = ifree + iexph - 1
3425 iphih = iexph + mbrkdwn*mx
3426 nscale = nscale + ns
3427 wsp(iphih+mbrkdwn) = hj1j*wsp(iphih+mx+mbrkdwn-1)
3428 wsp(iphih+mbrkdwn+1) = hj1j*wsp(iphih+2*mx+mbrkdwn-1)
3430 402 continue
3431 *--- error estimate ...
3432 if ( k1.eq.0 ) then
3433 err_loc = tol
3434 else
3435 p1 = ABS( wsp(iphih+m) ) * beta
3436 p2 = ABS( wsp(iphih+m+1) ) * beta * avnorm
3437 if ( p1.gt.10.0d0*p2 ) then
3438 err_loc = p2
3439 xm = 1.0d0/DBLE( m+1 )
3440 elseif ( p1.gt.p2 ) then
3441 err_loc = (p1*p2)/(p1-p2)
3442 xm = 1.0d0/DBLE( m+1 )
3443 else
3444 err_loc = p1
3445 xm = 1.0d0/DBLE( m )
3446 endif
3447 endif
3449 *--- reject the step-size if the error is not acceptable ...
3450 if ( (k1.ne.0) .and. (err_loc.gt.delta*t_step*tol) .and.
3451 . (mxreject.eq.0 .or. ireject.lt.mxreject) ) then
3452 t_old = t_step
3453 t_step = gamma * t_step * (t_step*tol/err_loc)**xm
3454 p1 = 10.0d0**(NINT( LOG10( t_step )-SQR1 )-1)
3455 t_step = AINT( t_step/p1 + 0.55d0 ) * p1
3456 if ( itrace.ne.0 ) then
3457 print*,'t_step =',t_old
3458 print*,'err_loc =',err_loc
3459 print*,'err_required =',delta*t_old*tol
3460 print*,'stepsize rejected, stepping down to:',t_step
3461 endif
3462 ireject = ireject + 1
3463 nreject = nreject + 1
3464 if ( mxreject.ne.0 .and. ireject.gt.mxreject ) then
3465 print*,"Failure in DGPHIV: ---"
3466 print*,"The requested tolerance is too high."
3467 Print*,"Rerun with a smaller value."
3468 iflag = 2
3469 return
3470 endif
3471 goto 401
3472 endif
3474 mx = mbrkdwn + MAX( 0,k1-2 )
3475 call DGEMV( 'n', n,mx,beta,wsp(iv),n,wsp(iphih),1,1.0d0,w,1 )
3477 *--- suggested value for the next stepsize ...
3479 t_new = gamma * t_step * (t_step*tol/err_loc)**xm
3480 p1 = 10.0d0**(NINT( LOG10( t_new )-SQR1 )-1)
3481 t_new = AINT( t_new/p1 + 0.55d0 ) * p1
3483 err_loc = MAX( err_loc,rndoff )
3485 *--- update the time covered ...
3487 t_now = t_now + t_step
3489 *--- display and keep some information ...
3491 if ( itrace.ne.0 ) then
3492 print*,'integration',nstep,'---------------------------------'
3493 print*,'scale-square =',ns
3494 print*,'step_size =',t_step
3495 print*,'err_loc =',err_loc
3496 print*,'next_step =',t_new
3497 endif
3499 step_min = MIN( step_min, t_step )
3500 step_max = MAX( step_max, t_step )
3501 s_error = s_error + err_loc
3502 x_error = MAX( x_error, err_loc )
3504 if ( mxstep.eq.0 .or. nstep.lt.mxstep ) goto 100
3505 iflag = 1
3507 500 continue
3509 iwsp(1) = nmult
3510 iwsp(2) = nexph
3511 iwsp(3) = nscale
3512 iwsp(4) = nstep
3513 iwsp(5) = nreject
3514 iwsp(6) = ibrkflag
3515 iwsp(7) = mbrkdwn
3517 wsp(1) = step_min
3518 wsp(2) = step_max
3519 wsp(3) = 0.0d0
3520 wsp(4) = 0.0d0
3521 wsp(5) = x_error
3522 wsp(6) = s_error
3523 wsp(7) = tbrkdwn
3524 wsp(8) = sgn*t_now
3526 *----------------------------------------------------------------------|
3527 *----------------------------------------------------------------------|
3528 subroutine DSPHIV( n, m, t, u, v, w, tol, anorm,
3529 . wsp,lwsp, iwsp,liwsp, matvec, itrace,iflag )
3531 implicit none
3532 integer n, m, lwsp, liwsp, itrace, iflag, iwsp(liwsp)
3533 double precision t, tol, anorm, u(n), v(n), w(n), wsp(lwsp)
3534 external matvec
3536 *-----Purpose----------------------------------------------------------|
3538 *--- DSPHIV computes w = exp(t*A)v + t*phi(tA)u which is the solution
3539 * of the nonhomogeneous linear ODE problem w' = Aw + u, w(0) = v.
3540 * phi(z) = (exp(z)-1)/z and A is a Symmetric matrix.
3542 * The method used is based on Krylov subspace projection
3543 * techniques and the matrix under consideration interacts only
3544 * via the external routine `matvec' performing the matrix-vector
3545 * product (matrix-free method).
3547 *-----Arguments--------------------------------------------------------|
3549 * n : (input) order of the principal matrix A.
3551 * m : (input) maximum size for the Krylov basis.
3553 * t : (input) time at wich the solution is needed (can be < 0).
3555 * u(n) : (input) operand vector with respect to the phi function
3556 * (forcing term of the ODE problem).
3558 * v(n) : (input) operand vector with respect to the exp function
3559 * (initial condition of the ODE problem).
3561 * w(n) : (output) computed approximation of exp(t*A)v + t*phi(tA)u
3563 * tol : (input/output) the requested accuracy tolerance on w.
3564 * If on input tol=0.0d0 or tol is too small (tol.le.eps)
3565 * the internal value sqrt(eps) is used, and tol is set to
3566 * sqrt(eps) on output (`eps' denotes the machine epsilon).
3567 * (`Happy breakdown' is assumed if h(j+1,j) .le. anorm*tol)
3569 * anorm : (input) an approximation of some norm of A.
3571 * wsp(lwsp): (workspace) lwsp .ge. n*(m+1)+n+(m+3)^2+4*(m+3)^2+ideg+1
3572 * +---------+-------+---------------+
3573 * (actually, ideg=6) V H wsp for PADE
3575 * iwsp(liwsp): (workspace) liwsp .ge. m+3
3577 * matvec : external subroutine for matrix-vector multiplication.
3578 * synopsis: matvec( x, y )
3579 * double precision x(*), y(*)
3580 * computes: y(1:n) <- A*x(1:n)
3581 * where A is the principal matrix.
3583 * itrace : (input) running mode. 0=silent, 1=print step-by-step info
3585 * iflag : (output) exit flag.
3586 * <0 - bad input arguments
3587 * 0 - no problem
3588 * 1 - maximum number of steps reached without convergence
3589 * 2 - requested tolerance was too high
3591 *-----Accounts on the computation--------------------------------------|
3592 * Upon exit, an interested user may retrieve accounts on the
3593 * computations. They are located in the workspace arrays wsp and
3594 * iwsp as indicated below:
3596 * location mnemonic description
3597 * -----------------------------------------------------------------|
3598 * iwsp(1) = nmult, number of matrix-vector multiplications used
3599 * iwsp(2) = nexph, number of Hessenberg matrix exponential evaluated
3600 * iwsp(3) = nscale, number of repeated squaring involved in Pade
3601 * iwsp(4) = nstep, number of integration steps used up to completion
3602 * iwsp(5) = nreject, number of rejected step-sizes
3603 * iwsp(6) = ibrkflag, set to 1 if `happy breakdown' and 0 otherwise
3604 * iwsp(7) = mbrkdwn, if `happy brkdown', basis-size when it occured
3605 * -----------------------------------------------------------------|
3606 * wsp(1) = step_min, minimum step-size used during integration
3607 * wsp(2) = step_max, maximum step-size used during integration
3608 * wsp(3) = dummy
3609 * wsp(4) = dummy
3610 * wsp(5) = x_error, maximum among all local truncation errors
3611 * wsp(6) = s_error, global sum of local truncation errors
3612 * wsp(7) = tbrkdwn, if `happy breakdown', time when it occured
3613 * wsp(8) = t_now, integration domain successfully covered
3615 *----------------------------------------------------------------------|
3616 *-----The following parameters may also be adjusted herein-------------|
3618 integer mxstep, mxreject, ideg
3619 double precision delta, gamma
3620 parameter( mxstep = 500,
3621 . mxreject = 0,
3622 . ideg = 6,
3623 . delta = 1.2d0,
3624 . gamma = 0.9d0 )
3626 * mxstep : maximum allowable number of integration steps.
3627 * The value 0 means an infinite number of steps.
3629 * mxreject: maximum allowable number of rejections at each step.
3630 * The value 0 means an infinite number of rejections.
3632 * ideg : the Pade approximation of type (ideg,ideg) is used as
3633 * an approximation to exp(H).
3635 * delta : local truncation error `safety factor'
3637 * gamma : stepsize `shrinking factor'
3639 *----------------------------------------------------------------------|
3640 * Roger B. Sidje (rbs@maths.uq.edu.au)
3641 * EXPOKIT: Software Package for Computing Matrix Exponentials.
3642 * ACM - Transactions On Mathematical Software, 24(1):130-156, 1998
3643 *----------------------------------------------------------------------|
3645 integer i, j, k1, mh, mx, iv, ih, j1v, ns, ifree, lfree, iexph,
3646 . ireject,ibrkflag,mbrkdwn, nmult, nreject, nexph, nscale,
3647 . nstep, iphih
3648 double precision sgn, t_out, tbrkdwn, step_min,step_max, err_loc,
3649 . s_error, x_error, t_now, t_new, t_step, t_old,
3650 . xm, beta, break_tol, p1, p2, p3, eps, rndoff,
3651 . avnorm, hj1j, hjj, SQR1
3653 intrinsic AINT,ABS,DBLE,LOG10,MAX,MIN,NINT,SIGN,SQRT
3654 double precision DDOT, DNRM2
3656 *--- check restrictions on input parameters ...
3657 iflag = 0
3658 if ( lwsp.lt.n*(m+3)+5*(m+3)**2+ideg+1 ) iflag = -1
3659 if ( liwsp.lt.m+3 ) iflag = -2
3660 if ( m.ge.n .or. m.le.0 ) iflag = -3
3661 if ( iflag.ne.0 ) stop 'bad sizes (in input of DSPHIV)'
3663 *--- initialisations ...
3665 k1 = 3
3666 mh = m + 3
3667 iv = 1
3668 ih = iv + n*(m+1) + n
3669 ifree = ih + mh*mh
3670 lfree = lwsp - ifree + 1
3672 ibrkflag = 0
3673 mbrkdwn = m
3674 nmult = 0
3675 nreject = 0
3676 nexph = 0
3677 nscale = 0
3679 t_out = ABS( t )
3680 tbrkdwn = 0.0d0
3681 step_min = t_out
3682 step_max = 0.0d0
3683 nstep = 0
3684 s_error = 0.0d0
3685 x_error = 0.0d0
3686 t_now = 0.0d0
3687 t_new = 0.0d0
3689 p1 = 4.0d0/3.0d0
3690 1 p2 = p1 - 1.0d0
3691 p3 = p2 + p2 + p2
3692 eps = ABS( p3-1.0d0 )
3693 if ( eps.eq.0.0d0 ) go to 1
3694 if ( tol.le.eps ) tol = SQRT( eps )
3695 rndoff = eps*anorm
3697 break_tol = 1.0d-7
3698 *>>> break_tol = tol
3699 *>>> break_tol = anorm*tol
3702 *--- step-by-step integration ...
3704 sgn = SIGN( 1.0d0,t )
3705 SQR1 = SQRT( 0.1d0 )
3706 call DCOPY( n, v,1, w,1 )
3708 100 if ( t_now.ge.t_out ) goto 500
3710 nmult = nmult + 1
3711 call matvec( w, wsp(iv) )
3712 call DAXPY( n, 1.0d0, u,1, wsp(iv),1 )
3713 beta = DNRM2( n, wsp(iv),1 )
3714 if ( beta.eq.0.0d0 ) goto 500
3715 call DSCAL( n, 1.0d0/beta, wsp(iv),1 )
3716 do i = 1,mh*mh
3717 wsp(ih+i-1) = 0.0d0
3718 enddo
3720 if ( nstep.eq.0 ) then
3721 *--- obtain the very first stepsize ...
3722 xm = 1.0d0/DBLE( m )
3723 p1 = tol*(((m+1)/2.72D0)**(m+1))*SQRT(2.0D0*3.14D0*(m+1))
3724 t_new = (1.0d0/anorm)*(p1/(4.0d0*beta*anorm))**xm
3725 p1 = 10.0d0**(NINT( LOG10( t_new )-SQR1 )-1)
3726 t_new = AINT( t_new/p1 + 0.55d0 ) * p1
3727 endif
3728 nstep = nstep + 1
3729 t_step = MIN( t_out-t_now, t_new )
3731 *--- Lanczos loop ...
3733 j1v = iv + n
3734 do 200 j = 1,m
3735 nmult = nmult + 1
3736 call matvec( wsp(j1v-n), wsp(j1v) )
3737 if ( j.gt.1 )
3738 . call DAXPY(n,-wsp(ih+(j-1)*mh+j-2),wsp(j1v-2*n),1,wsp(j1v),1)
3739 hjj = DDOT( n, wsp(j1v-n),1, wsp(j1v),1 )
3740 call DAXPY( n, -hjj, wsp(j1v-n),1, wsp(j1v),1 )
3741 hj1j = DNRM2( n, wsp(j1v),1 )
3742 wsp(ih+(j-1)*(mh+1)) = hjj
3743 *--- if `happy breakdown' go straightforward at the end ...
3744 if ( hj1j.le.break_tol ) then
3745 print*,'happy breakdown: mbrkdwn =',j,' h =',hj1j
3746 k1 = 0
3747 ibrkflag = 1
3748 mbrkdwn = j
3749 tbrkdwn = t_now
3750 t_step = t_out-t_now
3751 goto 300
3752 endif
3753 wsp(ih+(j-1)*mh+j) = hj1j
3754 wsp(ih+j*mh+j-1) = hj1j
3755 call DSCAL( n, 1.0d0/hj1j, wsp(j1v),1 )
3756 j1v = j1v + n
3757 200 continue
3758 nmult = nmult + 1
3759 call matvec( wsp(j1v-n), wsp(j1v) )
3760 avnorm = DNRM2( n, wsp(j1v),1 )
3762 *--- set 1's for the 3-extended scheme ...
3764 300 continue
3765 wsp(ih+mh*mbrkdwn) = 1.0d0
3766 wsp(ih+m*mh+m-1) = 0.0d0
3767 wsp(ih+(m-1)*mh+m) = 0.0d0
3768 do i = 1,k1-1
3769 wsp(ih+(m+i)*mh+m+i-1) = 1.0d0
3770 enddo
3772 *--- loop while ireject<mxreject until the tolerance is reached ...
3774 ireject = 0
3775 401 continue
3777 *--- compute w = beta*t_step*V*phi(t_step*H)*e1 + w
3779 nexph = nexph + 1
3780 mx = mbrkdwn + MAX( 1,k1 )
3782 *--- irreducible rational Pade approximation ...
3783 call DGPADM( ideg, mx, sgn*t_step, wsp(ih),mh,
3784 . wsp(ifree),lfree, iwsp, iexph, ns, iflag )
3785 iexph = ifree + iexph - 1
3786 iphih = iexph + mbrkdwn*mx
3787 nscale = nscale + ns
3788 wsp(iphih+mbrkdwn) = hj1j*wsp(iphih+mx+mbrkdwn-1)
3789 wsp(iphih+mbrkdwn+1) = hj1j*wsp(iphih+2*mx+mbrkdwn-1)
3791 402 continue
3793 *--- error estimate ...
3795 if ( k1.eq.0 ) then
3796 err_loc = tol
3797 else
3798 p1 = ABS( wsp(iphih+m) ) * beta
3799 p2 = ABS( wsp(iphih+m+1) ) * beta * avnorm
3800 if ( p1.gt.10.0d0*p2 ) then
3801 err_loc = p2
3802 xm = 1.0d0/DBLE( m+1 )
3803 elseif ( p1.gt.p2 ) then
3804 err_loc = (p1*p2)/(p1-p2)
3805 xm = 1.0d0/DBLE( m+1 )
3806 else
3807 err_loc = p1
3808 xm = 1.0d0/DBLE( m )
3809 endif
3810 endif
3812 *--- reject the step-size if the error is not acceptable ...
3814 if ( (k1.ne.0) .and. (err_loc.gt.delta*t_step*tol) .and.
3815 . (mxreject.eq.0 .or. ireject.lt.mxreject) ) then
3816 t_old = t_step
3817 t_step = gamma * t_step * (t_step*tol/err_loc)**xm
3818 p1 = 10.0d0**(NINT( LOG10( t_step )-SQR1 )-1)
3819 t_step = AINT( t_step/p1 + 0.55d0 ) * p1
3820 if ( itrace.ne.0 ) then
3821 print*,'t_step =',t_old
3822 print*,'err_loc =',err_loc
3823 print*,'err_required =',delta*t_old*tol
3824 print*,'stepsize rejected, stepping down to:',t_step
3825 endif
3826 ireject = ireject + 1
3827 nreject = nreject + 1
3828 if ( mxreject.ne.0 .and. ireject.gt.mxreject ) then
3829 print*,"Failure in DSPHIV: ---"
3830 print*,"The requested tolerance is too high."
3831 Print*,"Rerun with a smaller value."
3832 iflag = 2
3833 return
3834 endif
3835 goto 401
3836 endif
3838 mx = mbrkdwn + MAX( 0,k1-2 )
3839 call DGEMV( 'n', n,mx,beta,wsp(iv),n,wsp(iphih),1,1.0d0,w,1 )
3841 *--- suggested value for the next stepsize ...
3843 t_new = gamma * t_step * (t_step*tol/err_loc)**xm
3844 p1 = 10.0d0**(NINT( LOG10( t_new )-SQR1 )-1)
3845 t_new = AINT( t_new/p1 + 0.55d0 ) * p1
3847 err_loc = MAX( err_loc,rndoff )
3849 *--- update the time covered ...
3851 t_now = t_now + t_step
3853 *--- display and keep some information ...
3855 if ( itrace.ne.0 ) then
3856 print*,'integration',nstep,'---------------------------------'
3857 print*,'scale-square =',ns
3858 print*,'step_size =',t_step
3859 print*,'err_loc =',err_loc
3860 print*,'next_step =',t_new
3861 endif
3863 step_min = MIN( step_min, t_step )
3864 step_max = MAX( step_max, t_step )
3865 s_error = s_error + err_loc
3866 x_error = MAX( x_error, err_loc )
3868 if ( mxstep.eq.0 .or. nstep.lt.mxstep ) goto 100
3869 iflag = 1
3871 500 continue
3873 iwsp(1) = nmult
3874 iwsp(2) = nexph
3875 iwsp(3) = nscale
3876 iwsp(4) = nstep
3877 iwsp(5) = nreject
3878 iwsp(6) = ibrkflag
3879 iwsp(7) = mbrkdwn
3881 wsp(1) = step_min
3882 wsp(2) = step_max
3883 wsp(3) = 0.0d0
3884 wsp(4) = 0.0d0
3885 wsp(5) = x_error
3886 wsp(6) = s_error
3887 wsp(7) = tbrkdwn
3888 wsp(8) = sgn*t_now
3890 *----------------------------------------------------------------------|
3891 *----------------------------------------------------------------------|
3892 subroutine ZGPHIV( n, m, t, u, v, w, tol, anorm,
3893 . wsp,lwsp, iwsp,liwsp, matvec, itrace,iflag )
3895 implicit none
3896 integer n, m, lwsp, liwsp, itrace, iflag, iwsp(liwsp)
3897 double precision t, tol, anorm
3898 complex*16 u(n), v(n), w(n), wsp(lwsp)
3899 external matvec
3901 *-----Purpose----------------------------------------------------------|
3903 *--- ZGPHIV computes w = exp(t*A)v + t*phi(tA)u which is the solution
3904 * of the nonhomogeneous linear ODE problem w' = Aw + u, w(0) = v.
3905 * phi(z) = (exp(z)-1)/z and A is a Zomplex (i.e., complex double
3906 * precision matrix).
3908 * The method used is based on Krylov subspace projection
3909 * techniques and the matrix under consideration interacts only
3910 * via the external routine `matvec' performing the matrix-vector
3911 * product (matrix-free method).
3913 *-----Arguments--------------------------------------------------------|
3915 * n : (input) order of the principal matrix A.
3917 * m : (input) maximum size for the Krylov basis.
3919 * t : (input) time at wich the solution is needed (can be < 0).
3921 * u(n) : (input) operand vector with respect to the phi function
3922 * (forcing term of the ODE problem).
3924 * v(n) : (input) operand vector with respect to the exp function
3925 * (initial condition of the ODE problem).
3927 * w(n) : (output) computed approximation of exp(t*A)v + t*phi(tA)u
3929 * tol : (input/output) the requested accuracy tolerance on w.
3930 * If on input tol=0.0d0 or tol is too small (tol.le.eps)
3931 * the internal value sqrt(eps) is used, and tol is set to
3932 * sqrt(eps) on output (`eps' denotes the machine epsilon).
3933 * (`Happy breakdown' is assumed if h(j+1,j) .le. anorm*tol)
3935 * anorm : (input) an approximation of some norm of A.
3937 * wsp(lwsp): (workspace) lwsp .ge. n*(m+1)+n+(m+3)^2+4*(m+3)^2+ideg+1
3938 * +---------+-------+---------------+
3939 * (actually, ideg=6) V H wsp for PADE
3941 * iwsp(liwsp): (workspace) liwsp .ge. m+3
3943 * matvec : external subroutine for matrix-vector multiplication.
3944 * synopsis: matvec( x, y )
3945 * double precision x(*), y(*)
3946 * computes: y(1:n) <- A*x(1:n)
3947 * where A is the principal matrix.
3949 * itrace : (input) running mode. 0=silent, 1=print step-by-step info
3951 * iflag : (output) exit flag.
3952 * <0 - bad input arguments
3953 * 0 - no problem
3954 * 1 - maximum number of steps reached without convergence
3955 * 2 - requested tolerance was too high
3957 *-----Accounts on the computation--------------------------------------|
3958 * Upon exit, an interested user may retrieve accounts on the
3959 * computations. They are located in the workspace arrays wsp and
3960 * iwsp as indicated below:
3962 * location mnemonic description
3963 * -----------------------------------------------------------------|
3964 * iwsp(1) = nmult, number of matrix-vector multiplications used
3965 * iwsp(2) = nexph, number of Hessenberg matrix exponential evaluated
3966 * iwsp(3) = nscale, number of repeated squaring involved in Pade
3967 * iwsp(4) = nstep, number of integration steps used up to completion
3968 * iwsp(5) = nreject, number of rejected step-sizes
3969 * iwsp(6) = ibrkflag, set to 1 if `happy breakdown' and 0 otherwise
3970 * iwsp(7) = mbrkdwn, if `happy brkdown', basis-size when it occured
3971 * -----------------------------------------------------------------|
3972 * wsp(1) = step_min, minimum step-size used during integration
3973 * wsp(2) = step_max, maximum step-size used during integration
3974 * wsp(3) = dummy
3975 * wsp(4) = dummy
3976 * wsp(5) = x_error, maximum among all local truncation errors
3977 * wsp(6) = s_error, global sum of local truncation errors
3978 * wsp(7) = tbrkdwn, if `happy breakdown', time when it occured
3979 * wsp(8) = t_now, integration domain successfully covered
3981 *----------------------------------------------------------------------|
3982 *-----The following parameters may also be adjusted herein-------------|
3984 integer mxstep, mxreject, ideg
3985 double precision delta, gamma
3986 parameter( mxstep = 500,
3987 . mxreject = 0,
3988 . ideg = 6,
3989 . delta = 1.2d0,
3990 . gamma = 0.9d0 )
3992 * mxstep : maximum allowable number of integration steps.
3993 * The value 0 means an infinite number of steps.
3995 * mxreject: maximum allowable number of rejections at each step.
3996 * The value 0 means an infinite number of rejections.
3998 * ideg : the Pade approximation of type (ideg,ideg) is used as
3999 * an approximation to exp(H).
4001 * delta : local truncation error `safety factor'
4003 * gamma : stepsize `shrinking factor'
4005 *----------------------------------------------------------------------|
4006 * Roger B. Sidje (rbs@maths.uq.edu.au)
4007 * EXPOKIT: Software Package for Computing Matrix Exponentials.
4008 * ACM - Transactions On Mathematical Software, 24(1):130-156, 1998
4009 *----------------------------------------------------------------------|
4011 complex*16 ZERO, ONE
4012 parameter( ZERO=(0.0d0,0.0d0), ONE=(1.0d0,0.0d0) )
4013 integer i, j, k1, mh, mx, iv, ih, j1v, ns, ifree, lfree, iexph,
4014 . ireject,ibrkflag,mbrkdwn, nmult, nreject, nexph, nscale,
4015 . nstep, iphih
4016 double precision sgn, t_out, tbrkdwn, step_min,step_max, err_loc,
4017 . s_error, x_error, t_now, t_new, t_step, t_old,
4018 . xm, beta, break_tol, p1, p2, p3, eps, rndoff,
4019 . avnorm, hj1j, SQR1
4020 complex*16 hij
4022 intrinsic AINT,ABS,CMPLX,DBLE,INT,LOG10,MAX,MIN,NINT,SIGN,SQRT
4023 complex*16 ZDOTC
4024 double precision DZNRM2
4026 *--- check restrictions on input parameters ...
4028 iflag = 0
4029 if ( lwsp.lt.n*(m+3)+5*(m+3)**2+ideg+1 ) iflag = -1
4030 if ( liwsp.lt.m+3 ) iflag = -2
4031 if ( m.ge.n .or. m.le.0 ) iflag = -3
4032 if ( iflag.ne.0 ) stop 'bad sizes (in input of ZGPHIV)'
4034 *--- initialisations ...
4036 k1 = 3
4037 mh = m + 3
4038 iv = 1
4039 ih = iv + n*(m+1) + n
4040 ifree = ih + mh*mh
4041 lfree = lwsp - ifree + 1
4043 ibrkflag = 0
4044 mbrkdwn = m
4045 nmult = 0
4046 nreject = 0
4047 nexph = 0
4048 nscale = 0
4050 t_out = ABS( t )
4051 tbrkdwn = 0.0d0
4052 step_min = t_out
4053 step_max = 0.0d0
4054 nstep = 0
4055 s_error = 0.0d0
4056 x_error = 0.0d0
4057 t_now = 0.0d0
4058 t_new = 0.0d0
4060 p1 = 4.0d0/3.0d0
4061 1 p2 = p1 - 1.0d0
4062 p3 = p2 + p2 + p2
4063 eps = ABS( p3-1.0d0 )
4064 if ( eps.eq.0.0d0 ) go to 1
4065 if ( tol.le.eps ) tol = SQRT( eps )
4066 rndoff = eps*anorm
4068 break_tol = 1.0d-7
4069 *>>> break_tol = tol
4070 *>>> break_tol = anorm*tol
4072 sgn = SIGN( 1.0d0,t )
4073 SQR1 = SQRT( 0.1d0 )
4074 call ZCOPY( n, v,1, w,1 )
4076 *--- step-by-step integration ...
4078 100 if ( t_now.ge.t_out ) goto 500
4080 nmult = nmult + 1
4081 call matvec( w, wsp(iv) )
4082 call ZAXPY( n, ONE, u,1, wsp(iv),1 )
4083 beta = DZNRM2( n, wsp(iv),1 )
4084 if ( beta.eq.0.0d0 ) goto 500
4085 call ZDSCAL( n, 1.0d0/beta, wsp(iv),1 )
4086 do i = 1,mh*mh
4087 wsp(ih+i-1) = ZERO
4088 enddo
4090 if ( nstep.eq.0 ) then
4091 *--- obtain the very first stepsize ...
4092 xm = 1.0d0/DBLE( m )
4093 p1 = tol*(((m+1)/2.72D0)**(m+1))*SQRT(2.0D0*3.14D0*(m+1))
4094 t_new = (1.0d0/anorm)*(p1/(4.0d0*beta*anorm))**xm
4095 p1 = 10.0d0**(NINT( LOG10( t_new )-SQR1 )-1)
4096 t_new = AINT( t_new/p1 + 0.55d0 ) * p1
4097 endif
4098 nstep = nstep + 1
4099 t_step = MIN( t_out-t_now, t_new )
4101 *--- Arnoldi loop ...
4103 j1v = iv + n
4104 do 200 j = 1,m
4105 nmult = nmult + 1
4106 call matvec( wsp(j1v-n), wsp(j1v) )
4107 do i = 1,j
4108 hij = ZDOTC( n, wsp(iv+(i-1)*n),1, wsp(j1v),1 )
4109 call ZAXPY( n, -hij, wsp(iv+(i-1)*n),1, wsp(j1v),1 )
4110 wsp(ih+(j-1)*mh+i-1) = hij
4111 enddo
4112 hj1j = DZNRM2( n, wsp(j1v),1 )
4113 *--- if `happy breakdown' go straightforward at the end ...
4114 if ( hj1j.le.break_tol ) then
4115 print*,'happy breakdown: mbrkdwn =',j,' h =',hj1j
4116 k1 = 0
4117 ibrkflag = 1
4118 mbrkdwn = j
4119 tbrkdwn = t_now
4120 t_step = t_out-t_now
4121 goto 300
4122 endif
4123 wsp(ih+(j-1)*mh+j) = CMPLX( hj1j )
4124 call ZDSCAL( n, 1.0d0/hj1j, wsp(j1v),1 )
4125 j1v = j1v + n
4126 200 continue
4127 nmult = nmult + 1
4128 call matvec( wsp(j1v-n), wsp(j1v) )
4129 avnorm = DZNRM2( n, wsp(j1v),1 )
4131 *--- set 1's for the 3-extended scheme ...
4133 300 continue
4134 wsp(ih+mh*mbrkdwn) = ONE
4135 wsp(ih+(m-1)*mh+m) = ZERO
4136 do i = 1,k1-1
4137 wsp(ih+(m+i)*mh+m+i-1) = ONE
4138 enddo
4140 *--- loop while ireject<mxreject until the tolerance is reached ...
4142 ireject = 0
4143 401 continue
4145 *--- compute w = beta*t_step*V*phi(t_step*H)*e1 + w
4147 nexph = nexph + 1
4148 mx = mbrkdwn + MAX( 1,k1 )
4150 *--- irreducible rational Pade approximation ...
4151 call ZGPADM( ideg, mx, sgn*t_step, wsp(ih),mh,
4152 . wsp(ifree),lfree, iwsp, iexph, ns, iflag )
4153 iexph = ifree + iexph - 1
4154 iphih = iexph + mbrkdwn*mx
4155 nscale = nscale + ns
4156 wsp(iphih+mbrkdwn) = hj1j*wsp(iphih+mx+mbrkdwn-1)
4157 wsp(iphih+mbrkdwn+1) = hj1j*wsp(iphih+2*mx+mbrkdwn-1)
4159 402 continue
4161 *--- error estimate ...
4163 if ( k1.eq.0 ) then
4164 err_loc = tol
4165 else
4166 p1 = ABS( wsp(iphih+m) ) * beta
4167 p2 = ABS( wsp(iphih+m+1) ) * beta * avnorm
4168 if ( p1.gt.10.0d0*p2 ) then
4169 err_loc = p2
4170 xm = 1.0d0/DBLE( m+1 )
4171 elseif ( p1.gt.p2 ) then
4172 err_loc = (p1*p2)/(p1-p2)
4173 xm = 1.0d0/DBLE( m+1 )
4174 else
4175 err_loc = p1
4176 xm = 1.0d0/DBLE( m )
4177 endif
4178 endif
4180 *--- reject the step-size if the error is not acceptable ...
4182 if ( (k1.ne.0) .and. (err_loc.gt.delta*t_step*tol) .and.
4183 . (mxreject.eq.0 .or. ireject.lt.mxreject) ) then
4184 t_old = t_step
4185 t_step = gamma * t_step * (t_step*tol/err_loc)**xm
4186 p1 = 10.0d0**(NINT( LOG10( t_step )-SQR1 )-1)
4187 t_step = AINT( t_step/p1 + 0.55d0 ) * p1
4188 if ( itrace.ne.0 ) then
4189 print*,'t_step =',t_old
4190 print*,'err_loc =',err_loc
4191 print*,'err_required =',delta*t_old*tol
4192 print*,'stepsize rejected, stepping down to:',t_step
4193 endif
4194 ireject = ireject + 1
4195 nreject = nreject + 1
4196 if ( mxreject.ne.0 .and. ireject.gt.mxreject ) then
4197 print*,"Failure in ZGPHIV: ---"
4198 print*,"The requested tolerance is too high."
4199 Print*,"Rerun with a smaller value."
4200 iflag = 2
4201 return
4202 endif
4203 goto 401
4204 endif
4206 mx = mbrkdwn + MAX( 0,k1-2 )
4207 hij = CMPLX( beta )
4208 call ZGEMV( 'n', n,mx,hij,wsp(iv),n,wsp(iphih),1,ONE,w,1 )
4210 *--- suggested value for the next stepsize ...
4212 t_new = gamma * t_step * (t_step*tol/err_loc)**xm
4213 p1 = 10.0d0**(NINT( LOG10( t_new )-SQR1 )-1)
4214 t_new = AINT( t_new/p1 + 0.55d0 ) * p1
4216 err_loc = MAX( err_loc,rndoff )
4218 *--- update the time covered ...
4220 t_now = t_now + t_step
4222 *--- display and keep some information ...
4224 if ( itrace.ne.0 ) then
4225 print*,'integration',nstep,'---------------------------------'
4226 print*,'scale-square =',ns
4227 print*,'step_size =',t_step
4228 print*,'err_loc =',err_loc
4229 print*,'next_step =',t_new
4230 endif
4232 step_min = MIN( step_min, t_step )
4233 step_max = MAX( step_max, t_step )
4234 s_error = s_error + err_loc
4235 x_error = MAX( x_error, err_loc )
4237 if ( mxstep.eq.0 .or. nstep.lt.mxstep ) goto 100
4238 iflag = 1
4240 500 continue
4242 iwsp(1) = nmult
4243 iwsp(2) = nexph
4244 iwsp(3) = nscale
4245 iwsp(4) = nstep
4246 iwsp(5) = nreject
4247 iwsp(6) = ibrkflag
4248 iwsp(7) = mbrkdwn
4250 wsp(1) = CMPLX( step_min )
4251 wsp(2) = CMPLX( step_max )
4252 wsp(3) = CMPLX( 0.0d0 )
4253 wsp(4) = CMPLX( 0.0d0 )
4254 wsp(5) = CMPLX( x_error )
4255 wsp(6) = CMPLX( s_error )
4256 wsp(7) = CMPLX( tbrkdwn )
4257 wsp(8) = CMPLX( sgn*t_now )
4259 *----------------------------------------------------------------------|
4260 *----------------------------------------------------------------------|
4261 subroutine ZHPHIV( n, m, t, u, v, w, tol, anorm,
4262 . wsp,lwsp, iwsp,liwsp, matvec, itrace,iflag )
4264 implicit none
4265 integer n, m, lwsp, liwsp, itrace, iflag, iwsp(liwsp)
4266 double precision t, tol, anorm
4267 complex*16 u(n), v(n), w(n), wsp(lwsp)
4268 external matvec
4270 *-----Purpose----------------------------------------------------------|
4272 *--- ZHPHIV computes w = exp(t*A)v + t*phi(tA)u which is the solution
4273 * of the nonhomogeneous linear ODE problem w' = Aw + u, w(0) = v.
4274 * phi(z) = (exp(z)-1)/z and A is an Hermitian matrix.
4276 * The method used is based on Krylov subspace projection
4277 * techniques and the matrix under consideration interacts only
4278 * via the external routine `matvec' performing the matrix-vector
4279 * product (matrix-free method).
4281 *-----Arguments--------------------------------------------------------|
4283 * n : (input) order of the principal matrix A.
4285 * m : (input) maximum size for the Krylov basis.
4287 * t : (input) time at wich the solution is needed (can be < 0).
4289 * u(n) : (input) operand vector with respect to the phi function
4290 * (forcing term of the ODE problem).
4292 * v(n) : (input) operand vector with respect to the exp function
4293 * (initial condition of the ODE problem).
4295 * w(n) : (output) computed approximation of exp(t*A)v + t*phi(tA)u
4297 * tol : (input/output) the requested accuracy tolerance on w.
4298 * If on input tol=0.0d0 or tol is too small (tol.le.eps)
4299 * the internal value sqrt(eps) is used, and tol is set to
4300 * sqrt(eps) on output (`eps' denotes the machine epsilon).
4301 * (`Happy breakdown' is assumed if h(j+1,j) .le. anorm*tol)
4303 * anorm : (input) an approximation of some norm of A.
4305 * wsp(lwsp): (workspace) lwsp .ge. n*(m+1)+n+(m+3)^2+4*(m+3)^2+ideg+1
4306 * +---------+-------+---------------+
4307 * (actually, ideg=6) V H wsp for PADE
4309 * iwsp(liwsp): (workspace) liwsp .ge. m+3
4311 * matvec : external subroutine for matrix-vector multiplication.
4312 * synopsis: matvec( x, y )
4313 * double precision x(*), y(*)
4314 * computes: y(1:n) <- A*x(1:n)
4315 * where A is the principal matrix.
4317 * itrace : (input) running mode. 0=silent, 1=print step-by-step info
4319 * iflag : (output) exit flag.
4320 * <0 - bad input arguments
4321 * 0 - no problem
4322 * 1 - maximum number of steps reached without convergence
4323 * 2 - requested tolerance was too high
4325 *-----Accounts on the computation--------------------------------------|
4326 * Upon exit, an interested user may retrieve accounts on the
4327 * computations. They are located in the workspace arrays wsp and
4328 * iwsp as indicated below:
4330 * location mnemonic description
4331 * -----------------------------------------------------------------|
4332 * iwsp(1) = nmult, number of matrix-vector multiplications used
4333 * iwsp(2) = nexph, number of Hessenberg matrix exponential evaluated
4334 * iwsp(3) = nscale, number of repeated squaring involved in Pade
4335 * iwsp(4) = nstep, number of integration steps used up to completion
4336 * iwsp(5) = nreject, number of rejected step-sizes
4337 * iwsp(6) = ibrkflag, set to 1 if `happy breakdown' and 0 otherwise
4338 * iwsp(7) = mbrkdwn, if `happy brkdown', basis-size when it occured
4339 * -----------------------------------------------------------------|
4340 * wsp(1) = step_min, minimum step-size used during integration
4341 * wsp(2) = step_max, maximum step-size used during integration
4342 * wsp(3) = dummy
4343 * wsp(4) = dummy
4344 * wsp(5) = x_error, maximum among all local truncation errors
4345 * wsp(6) = s_error, global sum of local truncation errors
4346 * wsp(7) = tbrkdwn, if `happy breakdown', time when it occured
4347 * wsp(8) = t_now, integration domain successfully covered
4349 *----------------------------------------------------------------------|
4350 *-----The following parameters may also be adjusted herein-------------|
4352 integer mxstep, mxreject, ideg
4353 double precision delta, gamma
4354 parameter( mxstep = 500,
4355 . mxreject = 0,
4356 . ideg = 6,
4357 . delta = 1.2d0,
4358 . gamma = 0.9d0 )
4360 * mxstep : maximum allowable number of integration steps.
4361 * The value 0 means an infinite number of steps.
4363 * mxreject: maximum allowable number of rejections at each step.
4364 * The value 0 means an infinite number of rejections.
4366 * ideg : the Pade approximation of type (ideg,ideg) is used as
4367 * an approximation to exp(H).
4369 * delta : local truncation error `safety factor'
4371 * gamma : stepsize `shrinking factor'
4373 *----------------------------------------------------------------------|
4374 * Roger B. Sidje (rbs@maths.uq.edu.au)
4375 * EXPOKIT: Software Package for Computing Matrix Exponentials.
4376 * ACM - Transactions On Mathematical Software, 24(1):130-156, 1998
4377 *----------------------------------------------------------------------|
4379 complex*16 ZERO, ONE
4380 parameter( ZERO=(0.0d0,0.0d0), ONE=(1.0d0,0.0d0) )
4381 integer i, j, k1, mh, mx, iv, ih, j1v, ns, ifree, lfree, iexph,
4382 . ireject,ibrkflag,mbrkdwn, nmult, nreject, nexph, nscale,
4383 . nstep, iphih
4384 double precision sgn, t_out, tbrkdwn, step_min,step_max, err_loc,
4385 . s_error, x_error, t_now, t_new, t_step, t_old,
4386 . xm, beta, break_tol, p1, p2, p3, eps, rndoff,
4387 . avnorm, hj1j, SQR1
4388 complex*16 hjj
4390 intrinsic AINT,ABS,CMPLX,DBLE,INT,LOG10,MAX,MIN,NINT,SIGN,SQRT
4391 complex*16 ZDOTC
4392 double precision DZNRM2
4394 *--- check restrictions on input parameters ...
4396 iflag = 0
4397 if ( lwsp.lt.n*(m+3)+5*(m+3)**2+ideg+1 ) iflag = -1
4398 if ( liwsp.lt.m+3 ) iflag = -2
4399 if ( m.ge.n .or. m.le.0 ) iflag = -3
4400 if ( iflag.ne.0 ) stop 'bad sizes (in input of ZHPHIV)'
4402 *--- initialisations ...
4404 k1 = 3
4405 mh = m + 3
4406 iv = 1
4407 ih = iv + n*(m+1) + n
4408 ifree = ih + mh*mh
4409 lfree = lwsp - ifree + 1
4411 ibrkflag = 0
4412 mbrkdwn = m
4413 nmult = 0
4414 nreject = 0
4415 nexph = 0
4416 nscale = 0
4418 t_out = ABS( t )
4419 tbrkdwn = 0.0d0
4420 step_min = t_out
4421 step_max = 0.0d0
4422 nstep = 0
4423 s_error = 0.0d0
4424 x_error = 0.0d0
4425 t_now = 0.0d0
4426 t_new = 0.0d0
4428 p1 = 4.0d0/3.0d0
4429 1 p2 = p1 - 1.0d0
4430 p3 = p2 + p2 + p2
4431 eps = ABS( p3-1.0d0 )
4432 if ( eps.eq.0.0d0 ) go to 1
4433 if ( tol.le.eps ) tol = SQRT( eps )
4434 rndoff = eps*anorm
4436 break_tol = 1.0d-7
4437 *>>> break_tol = tol
4438 *>>> break_tol = anorm*tol
4440 sgn = SIGN( 1.0d0,t )
4441 SQR1 = SQRT( 0.1d0 )
4442 call ZCOPY( n, v,1, w,1 )
4444 *--- step-by-step integration ...
4446 100 if ( t_now.ge.t_out ) goto 500
4448 nmult = nmult + 1
4449 call matvec( w, wsp(iv) )
4450 call ZAXPY( n, ONE, u,1, wsp(iv),1 )
4451 beta = DZNRM2( n, wsp(iv),1 )
4452 if ( beta.eq.0.0d0 ) goto 500
4453 call ZDSCAL( n, 1.0d0/beta, wsp(iv),1 )
4454 do i = 1,mh*mh
4455 wsp(ih+i-1) = ZERO
4456 enddo
4458 if ( nstep.eq.0 ) then
4459 *--- obtain the very first stepsize ...
4460 xm = 1.0d0/DBLE( m )
4461 p1 = tol*(((m+1)/2.72D0)**(m+1))*SQRT(2.0D0*3.14D0*(m+1))
4462 t_new = (1.0d0/anorm)*(p1/(4.0d0*beta*anorm))**xm
4463 p1 = 10.0d0**(NINT( LOG10( t_new )-SQR1 )-1)
4464 t_new = AINT( t_new/p1 + 0.55d0 ) * p1
4465 endif
4466 nstep = nstep + 1
4467 t_step = MIN( t_out-t_now, t_new )
4469 *--- Lanczos loop ...
4471 j1v = iv + n
4472 do 200 j = 1,m
4473 nmult = nmult + 1
4474 call matvec( wsp(j1v-n), wsp(j1v) )
4475 if ( j.gt.1 )
4476 . call ZAXPY(n,-wsp(ih+(j-1)*mh+j-2),wsp(j1v-2*n),1,wsp(j1v),1)
4477 hjj = ZDOTC( n, wsp(j1v-n),1, wsp(j1v),1 )
4478 call ZAXPY( n, -hjj, wsp(j1v-n),1, wsp(j1v),1 )
4479 hj1j = DZNRM2( n, wsp(j1v),1 )
4480 wsp(ih+(j-1)*(mh+1)) = hjj
4481 if ( hj1j.le.break_tol ) then
4482 print*,'happy breakdown: mbrkdwn =',j,' h =',hj1j
4483 k1 = 0
4484 ibrkflag = 1
4485 mbrkdwn = j
4486 tbrkdwn = t_now
4487 t_step = t_out-t_now
4488 goto 300
4489 endif
4490 wsp(ih+(j-1)*mh+j) = CMPLX( hj1j )
4491 wsp(ih+j*mh+j-1) = CMPLX( hj1j )
4492 call ZDSCAL( n, 1.0d0/hj1j, wsp(j1v),1 )
4493 j1v = j1v + n
4494 200 continue
4495 nmult = nmult + 1
4496 call matvec( wsp(j1v-n), wsp(j1v) )
4497 avnorm = DZNRM2( n, wsp(j1v),1 )
4499 *--- set 1's for the 3-extended scheme ...
4501 300 continue
4502 wsp(ih+mh*mbrkdwn) = ONE
4503 wsp(ih+m*mh+m-1) = ZERO
4504 wsp(ih+(m-1)*mh+m) = ZERO
4505 do i = 1,k1-1
4506 wsp(ih+(m+i)*mh+m+i-1) = ONE
4507 enddo
4509 *--- loop while ireject<mxreject until the tolerance is reached ...
4511 ireject = 0
4512 401 continue
4514 *--- compute w = beta*t_step*V*phi(t_step*H)*e1 + w
4516 nexph = nexph + 1
4517 mx = mbrkdwn + MAX( 1,k1 )
4519 *--- irreducible rational Pade approximation ...
4520 call ZGPADM( ideg, mx, sgn*t_step, wsp(ih),mh,
4521 . wsp(ifree),lfree, iwsp, iexph, ns, iflag )
4522 iexph = ifree + iexph - 1
4523 iphih = iexph + mbrkdwn*mx
4524 nscale = nscale + ns
4525 wsp(iphih+mbrkdwn) = hj1j*wsp(iphih+mx+mbrkdwn-1)
4526 wsp(iphih+mbrkdwn+1) = hj1j*wsp(iphih+2*mx+mbrkdwn-1)
4528 402 continue
4530 *--- error estimate ...
4532 if ( k1.eq.0 ) then
4533 err_loc = tol
4534 else
4535 p1 = ABS( wsp(iphih+m) ) * beta
4536 p2 = ABS( wsp(iphih+m+1) ) * beta * avnorm
4537 if ( p1.gt.10.0d0*p2 ) then
4538 err_loc = p2
4539 xm = 1.0d0/DBLE( m+1 )
4540 elseif ( p1.gt.p2 ) then
4541 err_loc = (p1*p2)/(p1-p2)
4542 xm = 1.0d0/DBLE( m+1 )
4543 else
4544 err_loc = p1
4545 xm = 1.0d0/DBLE( m )
4546 endif
4547 endif
4549 *--- reject the step-size if the error is not acceptable ...
4551 if ( (k1.ne.0) .and. (err_loc.gt.delta*t_step*tol) .and.
4552 . (mxreject.eq.0 .or. ireject.lt.mxreject) ) then
4553 t_old = t_step
4554 t_step = gamma * t_step * (t_step*tol/err_loc)**xm
4555 p1 = 10.0d0**(NINT( LOG10( t_step )-SQR1 )-1)
4556 t_step = AINT( t_step/p1 + 0.55d0 ) * p1
4557 if ( itrace.ne.0 ) then
4558 print*,'t_step =',t_old
4559 print*,'err_loc =',err_loc
4560 print*,'err_required =',delta*t_old*tol
4561 print*,'stepsize rejected, stepping down to:',t_step
4562 endif
4563 ireject = ireject + 1
4564 nreject = nreject + 1
4565 if ( mxreject.ne.0 .and. ireject.gt.mxreject ) then
4566 print*,"Failure in ZHPHIV: ---"
4567 print*,"The requested tolerance is too high."
4568 Print*,"Rerun with a smaller value."
4569 iflag = 2
4570 return
4571 endif
4572 goto 401
4573 endif
4575 mx = mbrkdwn + MAX( 0,k1-2 )
4576 hjj = CMPLX( beta )
4577 call ZGEMV( 'n', n,mx,hjj,wsp(iv),n,wsp(iphih),1,ONE,w,1 )
4579 *--- suggested value for the next stepsize ...
4581 t_new = gamma * t_step * (t_step*tol/err_loc)**xm
4582 p1 = 10.0d0**(NINT( LOG10( t_new )-SQR1 )-1)
4583 t_new = AINT( t_new/p1 + 0.55d0 ) * p1
4585 err_loc = MAX( err_loc,rndoff )
4587 *--- update the time covered ...
4589 t_now = t_now + t_step
4591 *--- display and keep some information ...
4593 if ( itrace.ne.0 ) then
4594 print*,'integration',nstep,'---------------------------------'
4595 print*,'scale-square =',ns
4596 print*,'step_size =',t_step
4597 print*,'err_loc =',err_loc
4598 print*,'next_step =',t_new
4599 endif
4601 step_min = MIN( step_min, t_step )
4602 step_max = MAX( step_max, t_step )
4603 s_error = s_error + err_loc
4604 x_error = MAX( x_error, err_loc )
4606 if ( mxstep.eq.0 .or. nstep.lt.mxstep ) goto 100
4607 iflag = 1
4609 500 continue
4611 iwsp(1) = nmult
4612 iwsp(2) = nexph
4613 iwsp(3) = nscale
4614 iwsp(4) = nstep
4615 iwsp(5) = nreject
4616 iwsp(6) = ibrkflag
4617 iwsp(7) = mbrkdwn
4619 wsp(1) = CMPLX( step_min )
4620 wsp(2) = CMPLX( step_max )
4621 wsp(3) = CMPLX( 0.0d0 )
4622 wsp(4) = CMPLX( 0.0d0 )
4623 wsp(5) = CMPLX( x_error )
4624 wsp(6) = CMPLX( s_error )
4625 wsp(7) = CMPLX( tbrkdwn )
4626 wsp(8) = CMPLX( sgn*t_now )
4628 *----------------------------------------------------------------------|