1 ! from expokit
, free
for non
-commercial use
3 *----------------------------------------------------------------------|
4 subroutine DMEXPV
( n
, m
, t
, v
, w
, tol
, anorm
,
5 . wsp
,lwsp
, iwsp
,liwsp
, matvec
, itrace
,iflag
)
8 integer n
, m
, lwsp
, liwsp
, itrace
, iflag
, iwsp
(liwsp
)
9 double precision t
, tol
, anorm
, v
(n
), w
(n
), wsp
(lwsp
)
12 *-----Purpose
----------------------------------------------------------|
14 *--- DMEXPV computes w
= exp
(t*A
)*v
- Customised
for MARKOV CHAINS
.
16 * It does not compute the matrix exponential in isolation but
17 * instead
, it computes directly the action of the exponential
18 * operator on the operand vector
. This way of doing so allows
19 * for addressing large sparse problems
.
21 * The method used is based on Krylov subspace projection
22 * techniques and the matrix under consideration interacts only
23 * via the
external routine `matvec
' performing the matrix-vector
24 * product (matrix-free method).
26 * This is a customised version for Markov Chains. This means that a
27 * check is done within this code to ensure that the resulting vector
28 * w is a probability vector, i.e., w must have all its components
29 * in [0,1], with sum equal to 1. This check is done at some expense
30 * and the user may try DGEXPV which is cheaper since it ignores
31 * probability constraints.
33 * IMPORTANT: The check assumes that the transition rate matrix Q
34 * satisfies Qe = 0, where e=(1,...,1)'. Don
't use DMEXPV
35 * if this condition does not hold. Use DGEXPV instead.
36 * DMEXPV/DGEXPV require the matrix-vector product
37 * y = A*x = Q'*x
, i
.e
, the TRANSPOSE of Q times a vector
.
38 * Failure
to remember this leads
to wrong results
.
40 *-----Arguments
--------------------------------------------------------|
42 * n
: (input
) order of the principal matrix A
.
44 * m
: (input
) maximum size
for the Krylov basis
.
46 * t
: (input
) time at wich the solution is needed
(can be
< 0).
48 * v
(n
) : (input
) given operand vector
.
50 * w
(n
) : (output
) computed approximation of exp
(t*A
)*v
.
52 * tol
: (input
/output
) the requested acurracy tolerance on w
.
53 * If on input tol
=0.0d0 or tol is too small
(tol
.le
.eps
)
54 * the internal value sqrt
(eps
) is used
, and tol is set
to
55 * sqrt
(eps
) on output
(`eps
' denotes the machine epsilon).
56 * (`Happy breakdown' is assumed
if h
(j
+1,j
) .le
. anorm*tol
)
58 * anorm
: (input
) an approximation of some norm of A
.
60 * wsp
(lwsp
): (workspace
) lwsp
.ge
. n*
(m
+1)+n
+(m
+2)^
2+4*(m
+2)^
2+ideg
+1
61 * +---------+-------+---------------+
62 * (actually
, ideg
=6) V H wsp
for PADE
64 * iwsp
(liwsp
): (workspace
) liwsp
.ge
. m
+2
66 * matvec
: external subroutine for matrix
-vector multiplication
.
67 * synopsis
: matvec
( x
, y
)
68 * double precision x
(*), y
(*)
69 * computes
: y
(1:n
) <- A*x
(1:n
)
70 * where A is the principal matrix
.
72 * IMPORTANT
: DMEXPV requires the product y
= Ax
= Q
'x, i.e.
73 * the TRANSPOSE of the transition rate matrix.
75 * itrace : (input) running mode. 0=silent, 1=print step-by-step info
77 * iflag : (output) exit flag.
78 * <0 - bad input arguments
80 * 1 - maximum number of steps reached without convergence
81 * 2 - requested tolerance was too high
83 *-----Accounts on the computation--------------------------------------|
84 * Upon exit, an interested user may retrieve accounts on the
85 * computations. They are located in the workspace arrays wsp and
86 * iwsp as indicated below:
88 * location mnemonic description
89 * -----------------------------------------------------------------|
90 * iwsp(1) = nmult, number of matrix-vector multiplications used
91 * iwsp(2) = nexph, number of Hessenberg matrix exponential evaluated
92 * iwsp(3) = nscale, number of repeated squaring involved in Pade
93 * iwsp(4) = nstep, number of integration steps used up to completion
94 * iwsp(5) = nreject, number of rejected step-sizes
95 * iwsp(6) = ibrkflag, set to 1 if `happy breakdown' and
0 otherwise
96 * iwsp
(7) = mbrkdwn
, if `happy brkdown
', basis-size when it occured
97 * -----------------------------------------------------------------|
98 * wsp(1) = step_min, minimum step-size used during integration
99 * wsp(2) = step_max, maximum step-size used during integration
100 * wsp(3) = x_round, maximum among all roundoff errors (lower bound)
101 * wsp(4) = s_round, sum of roundoff errors (lower bound)
102 * wsp(5) = x_error, maximum among all local truncation errors
103 * wsp(6) = s_error, global sum of local truncation errors
104 * wsp(7) = tbrkdwn, if `happy breakdown', time when it occured
105 * wsp
(8) = t_now
, integration domain successfully covered
106 * wsp
(9) = hump
, i
.e
., max||exp
(sA
)||
, s in
[0,t
] (or
[t
,0] if t
<0)
107 * wsp
(10) = ||w||
/||v||
, scaled norm of the solution w
.
108 * -----------------------------------------------------------------|
109 * The `hump
' is a measure of the conditioning of the problem. The
110 * matrix exponential is well-conditioned if hump = 1, whereas it is
111 * poorly-conditioned if hump >> 1. However the solution can still be
112 * relatively fairly accurate even when the hump is large (the hump
113 * is an upper bound), especially when the hump and the scaled norm
114 * of w [this is also computed and returned in wsp(10)] are of the
115 * same order of magnitude (further details in reference below).
116 * Markov chains are usually well-conditioned problems.
118 *----------------------------------------------------------------------|
119 *-----The following parameters may also be adjusted herein-------------|
121 integer mxstep, mxreject, ideg
122 double precision delta, gamma
123 parameter( mxstep = 500,
129 * mxstep : maximum allowable number of integration steps.
130 * The value 0 means an infinite number of steps.
132 * mxreject: maximum allowable number of rejections at each step.
133 * The value 0 means an infinite number of rejections.
135 * ideg : the Pade approximation of type (ideg,ideg) is used as
136 * an approximation to exp(H). The value 0 switches to the
137 * uniform rational Chebyshev approximation of type (14,14)
139 * delta : local truncation error `safety factor'
141 * gamma
: stepsize `shrinking factor
'
143 *----------------------------------------------------------------------|
144 * Roger B. Sidje (rbs@maths.uq.edu.au)
145 * EXPOKIT: Software Package for Computing Matrix Exponentials.
146 * ACM - Transactions On Mathematical Software, 24(1):130-156, 1998
147 *----------------------------------------------------------------------|
149 integer i, j, k1, mh, mx, iv, ih, j1v, ns, ifree, lfree, iexph,
150 . ireject,ibrkflag,mbrkdwn, nmult, nreject, nexph, nscale,
152 double precision sgn, t_out, tbrkdwn, step_min,step_max, err_loc,
153 . s_error, x_error, t_now, t_new, t_step, t_old,
154 . xm, beta, break_tol, p1, p2, p3, eps, rndoff,
155 . vnorm, avnorm, hj1j, hij, hump, SQR1,
156 . roundoff, s_round, x_round
158 intrinsic AINT,ABS,DBLE,LOG10,MAX,MIN,NINT,SIGN,SQRT
159 double precision DDOT, DNRM2, DASUM
161 *--- check restrictions on input parameters ...
163 if ( lwsp.lt.n*(m+2)+5*(m+2)**2+ideg+1 ) iflag = -1
164 if ( liwsp.lt.m+2 ) iflag = -2
165 if ( m.ge.n .or. m.le.0 ) iflag = -3
166 if ( iflag.ne.0 ) stop 'bad sizes
(in input of DMEXPV
)'
168 *--- initialisations ...
173 ih = iv + n*(m+1) + n
175 lfree = lwsp - ifree + 1
184 sgn = SIGN( 1.0d0,t )
200 eps = ABS( p3-1.0d0 )
201 if ( eps.eq.0.0d0 ) go to 1
202 if ( tol.le.eps ) tol = SQRT( eps )
207 *>>> break_tol = anorm*tol
209 call DCOPY( n, v,1, w,1 )
210 beta = DNRM2( n, w,1 )
214 *--- obtain the very first stepsize ...
218 p1 = tol*(((m+1)/2.72D0)**(m+1))*SQRT(2.0D0*3.14D0*(m+1))
219 t_new = (1.0d0/anorm)*(p1/(4.0d0*beta*anorm))**xm
220 p1 = 10.0d0**(NINT( LOG10( t_new )-SQR1 )-1)
221 t_new = AINT( t_new/p1 + 0.55d0 ) * p1
223 *--- step-by-step integration ...
225 100 if ( t_now.ge.t_out ) goto 500
228 t_step = MIN( t_out-t_now, t_new )
232 wsp(iv + i-1) = p1*w(i)
238 *--- Arnoldi loop ...
243 call matvec( wsp(j1v-n), wsp(j1v) )
245 hij = DDOT( n, wsp(iv+(i-1)*n),1, wsp(j1v),1 )
246 call DAXPY( n, -hij, wsp(iv+(i-1)*n),1, wsp(j1v),1 )
247 wsp(ih+(j-1)*mh+i-1) = hij
249 hj1j = DNRM2( n, wsp(j1v),1 )
250 *--- if `happy breakdown' go straightforward at the
end ...
251 if ( hj1j
.le
.break_tol
) then
252 print*
,'happy breakdown: mbrkdwn =',j
,' h =',hj1j
260 wsp
(ih
+(j
-1)*mh
+j
) = hj1j
261 call DSCAL
( n
, 1.0d0
/hj1j
, wsp
(j1v
),1 )
265 call matvec
( wsp
(j1v
-n
), wsp
(j1v
) )
266 avnorm
= DNRM2
( n
, wsp
(j1v
),1 )
268 *--- set
1 for the
2-corrected scheme
...
271 wsp
(ih
+m*mh
+m
+1) = 1.0d0
273 *--- loop
while ireject
<mxreject until the tolerance is reached
...
279 *--- compute w
= beta*V*exp
(t_step*H
)*e1
..
283 if ( ideg
.ne
.0 ) then
284 *--- irreducible rational Pade approximation
...
285 call DGPADM
( ideg
, mx
, sgn*t_step
, wsp
(ih
),mh
,
286 . wsp
(ifree
),lfree
, iwsp
, iexph
, ns
, iflag
)
287 iexph
= ifree
+ iexph
- 1
290 *--- uniform rational Chebyshev approximation
...
293 wsp
(iexph
+i
-1) = 0.0d0
296 call DNCHBV
(mx
,sgn*t_step
,wsp
(ih
),mh
,wsp
(iexph
),wsp
(ifree
+mx
))
301 *--- error estimate
...
306 p1
= ABS
( wsp
(iexph
+m
) ) * beta
307 p2
= ABS
( wsp
(iexph
+m
+1) ) * beta
* avnorm
308 if ( p1
.gt
.10.0d0*p2
) then
311 elseif
( p1
.gt
.p2
) then
312 err_loc
= (p1*p2
)/(p1
-p2
)
316 xm
= 1.0d0
/DBLE
( m
-1 )
320 *--- reject the step
-size
if the error is not acceptable
...
322 if ( (k1
.ne
.0) .and
. (err_loc
.gt
.delta*t_step*tol
) .and
.
323 . (mxreject
.eq
.0 .or
. ireject
.lt
.mxreject
) ) then
325 t_step
= gamma
* t_step
* (t_step*tol
/err_loc
)**xm
326 p1
= 10.0d0**
(NINT
( LOG10
( t_step
)-SQR1
)-1)
327 t_step
= AINT
( t_step
/p1
+ 0.55d0
) * p1
328 if ( itrace
.ne
.0 ) then
329 print*
,'t_step =',t_old
330 print*
,'err_loc =',err_loc
331 print*
,'err_required =',delta*t_old*tol
332 print*
,'stepsize rejected, stepping down to:',t_step
334 ireject
= ireject
+ 1
335 nreject
= nreject
+ 1
336 if ( mxreject
.ne
.0 .and
. ireject
.gt
.mxreject
) then
337 print*
,"Failure in DMEXPV: ---"
338 print*
,"The requested tolerance is too high."
339 Print*
,"Rerun with a smaller value."
346 *--- now update w
= beta*V*exp
(t_step*H
)*e1 and the hump
...
348 mx
= mbrkdwn
+ MAX
( 0,k1
-1 )
349 call DGEMV
( 'n', n
,mx
,beta
,wsp
(iv
),n
,wsp
(iexph
),1,0.0d0
,w
,1 )
350 beta
= DNRM2
( n
, w
,1 )
351 hump
= MAX
( hump
, beta
)
353 *--- Markov model constraints
...
357 if ( w
(i
).lt
.0.0d0
) then
363 if ( j
.gt
.0 ) call DSCAL
( n
, 1.0d0
/p1
, w
,1 )
364 roundoff
= DABS
( 1.0d0
-p1
) / DBLE
( n
)
366 *--- suggested value
for the next stepsize
...
368 t_new
= gamma
* t_step
* (t_step*tol
/err_loc
)**xm
369 p1
= 10.0d0**
(NINT
( LOG10
( t_new
)-SQR1
)-1)
370 t_new
= AINT
( t_new
/p1
+ 0.55d0
) * p1
372 err_loc
= MAX
( err_loc
, roundoff
)
373 err_loc
= MAX
( err_loc
, rndoff
)
375 *--- update the time covered
...
377 t_now
= t_now
+ t_step
379 *--- display and keep some information
...
381 if ( itrace
.ne
.0 ) then
382 print*
,'integration',nstep
,'---------------------------------'
383 print*
,'scale-square =',ns
384 print*
,'wnorm =',beta
385 print*
,'step_size =',t_step
386 print*
,'err_loc =',err_loc
387 print*
,'roundoff =',roundoff
388 print*
,'next_step =',t_new
391 step_min
= MIN
( step_min
, t_step
)
392 step_max
= MAX
( step_max
, t_step
)
393 s_error
= s_error
+ err_loc
394 s_round
= s_round
+ roundoff
395 x_error
= MAX
( x_error
, err_loc
)
396 x_round
= MAX
( x_round
, roundoff
)
398 if ( mxstep
.eq
.0 .or
. nstep
.lt
.mxstep
) goto 100
422 *----------------------------------------------------------------------|
423 *----------------------------------------------------------------------|
424 subroutine DGPADM
( ideg
,m
,t
,H
,ldh
,wsp
,lwsp
,ipiv
,iexph
,ns
,iflag
)
427 integer ideg
, m
, ldh
, lwsp
, iexph
, ns
, iflag
, ipiv
(m
)
428 double precision t
, H
(ldh
,m
), wsp
(lwsp
)
430 *-----Purpose
----------------------------------------------------------|
432 * Computes exp
(t*H
), the matrix exponential of a general matrix in
433 * full
, using the irreducible rational Pade approximation
to the
434 * exponential
function exp
(x
) = r
(x
) = (+/-)( I
+ 2*(q
(x
)/p
(x
)) ),
435 * combined with scaling
-and
-squaring
.
437 *-----Arguments
--------------------------------------------------------|
439 * ideg
: (input
) the degre of the diagonal Pade
to be used
.
440 * a value of
6 is generally satisfactory
.
442 * m
: (input
) order of H
.
444 * H
(ldh
,m
) : (input
) argument matrix
.
446 * t
: (input
) time
-scale
(can be
< 0).
448 * wsp
(lwsp
) : (workspace
/output
) lwsp
.ge
. 4*m*m
+ideg
+1.
450 * ipiv
(m
) : (workspace
)
452 *>>>> iexph
: (output
) number such that wsp
(iexph
) points
to exp
(tH
)
453 * i
.e
., exp
(tH
) is located at wsp
(iexph
... iexph
+m*m
-1)
454 * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
455 * NOTE
: if the routine was called with wsp
(iptr
),
456 * then exp
(tH
) will start at wsp
(iptr
+iexph
-1).
458 * ns
: (output
) number of scaling
-squaring used
.
460 * iflag
: (output
) exit flag
.
464 *----------------------------------------------------------------------|
465 * Roger B
. Sidje
(rbs@maths
.uq
.edu
.au
)
466 * EXPOKIT
: Software Package
for Computing Matrix Exponentials
.
467 * ACM
- Transactions On Mathematical Software
, 24(1):130-156, 1998
468 *----------------------------------------------------------------------|
470 integer mm
,i
,j
,k
,ih2
,ip
,iq
,iused
,ifree
,iodd
,icoef
,iput
,iget
471 double precision hnorm
,scale
,scale2
,cp
,cq
473 intrinsic INT
,ABS
,DBLE
,LOG
,MAX
475 *--- check restrictions on input parameters
...
478 if ( ldh
.lt
.m
) iflag
= -1
479 if ( lwsp
.lt
.4*mm
+ideg
+1 ) iflag
= -2
480 if ( iflag
.ne
.0 ) stop 'bad sizes (in input of DGPADM)'
482 *--- initialise pointers
...
485 ih2
= icoef
+ (ideg
+1)
490 *--- scaling
: seek ns such that ||t*H
/2^ns||
< 1/2;
491 * and set scale
= t
/2^ns
...
498 wsp
(i
) = wsp
(i
) + ABS
( H
(i
,j
) )
503 hnorm
= MAX
( hnorm
,wsp
(i
) )
505 hnorm
= ABS
( t*hnorm
)
506 if ( hnorm
.eq
.0.0d0
) stop 'Error - null H in input of DGPADM.'
507 ns
= MAX
( 0,INT
(LOG
(hnorm
)/LOG
(2.0d0
))+2 )
508 scale
= t
/ DBLE
(2**ns
)
511 *--- compute Pade coefficients
...
517 wsp
(icoef
+k
) = (wsp
(icoef
+k
-1)*DBLE
( i
-k
))/DBLE
( k*
(j
-k
) )
520 *--- H2
= scale2*H*H
...
522 call DGEMM
( 'n','n',m
,m
,m
,scale2
,H
,ldh
,H
,ldh
,0.0d0
,wsp
(ih2
),m
)
524 *--- initialize p
(numerator
) and q
(denominator
) ...
526 cp
= wsp
(icoef
+ideg
-1)
530 wsp
(ip
+ (j
-1)*m
+ i
-1) = 0.0d0
531 wsp
(iq
+ (j
-1)*m
+ i
-1) = 0.0d0
533 wsp
(ip
+ (j
-1)*(m
+1)) = cp
534 wsp
(iq
+ (j
-1)*(m
+1)) = cq
537 *--- Apply Horner rule
...
542 iused
= iodd*iq
+ (1-iodd
)*ip
543 call DGEMM
( 'n','n',m
,m
,m
, 1.0d0
,wsp
(iused
),m
,
544 . wsp
(ih2
),m
, 0.0d0
,wsp
(ifree
),m
)
546 wsp
(ifree
+(j
-1)*(m
+1)) = wsp
(ifree
+(j
-1)*(m
+1))+wsp
(icoef
+k
-1)
548 ip
= (1-iodd
)*ifree
+ iodd*ip
549 iq
= iodd*ifree
+ (1-iodd
)*iq
553 if ( k
.gt
.0 ) goto 100
555 *--- Obtain
(+/-)(I
+ 2*(p\q
)) ...
557 if ( iodd
.eq
. 1 ) then
558 call DGEMM
( 'n','n',m
,m
,m
, scale
,wsp
(iq
),m
,
559 . H
,ldh
, 0.0d0
,wsp
(ifree
),m
)
562 call DGEMM
( 'n','n',m
,m
,m
, scale
,wsp
(ip
),m
,
563 . H
,ldh
, 0.0d0
,wsp
(ifree
),m
)
566 call DAXPY
( mm
, -1.0d0
,wsp
(ip
),1, wsp
(iq
),1 )
567 call DGESV
( m
,m
, wsp
(iq
),m
, ipiv
, wsp
(ip
),m
, iflag
)
568 if ( iflag
.ne
.0 ) stop 'Problem in DGESV (within DGPADM)'
569 call DSCAL
( mm
, 2.0d0
, wsp
(ip
), 1 )
571 wsp
(ip
+(j
-1)*(m
+1)) = wsp
(ip
+(j
-1)*(m
+1)) + 1.0d0
574 if ( ns
.eq
.0 .and
. iodd
.eq
.1 ) then
575 call DSCAL
( mm
, -1.0d0
, wsp
(ip
), 1 )
579 *-- squaring
: exp
(t*H
) = (exp
(t*H
))^
(2^ns
) ...
583 iget
= iodd*ip
+ (1-iodd
)*iq
584 iput
= (1-iodd
)*ip
+ iodd*iq
585 call DGEMM
( 'n','n',m
,m
,m
, 1.0d0
,wsp
(iget
),m
, wsp
(iget
),m
,
586 . 0.0d0
,wsp
(iput
),m
)
592 *----------------------------------------------------------------------|
593 *----------------------------------------------------------------------|
594 subroutine DSPADM
( ideg
,m
,t
,H
,ldh
,wsp
,lwsp
,ipiv
,iexph
,ns
,iflag
)
597 integer ideg
, m
, ldh
, lwsp
, iexph
, ns
, iflag
, ipiv
(m
)
598 double precision t
, H
(ldh
,m
), wsp
(lwsp
)
600 *-----Purpose
----------------------------------------------------------|
602 * Computes exp
(t*H
), the matrix exponential of a symmetric matrix
603 * in full
, using the irreducible rational Pade approximation
to the
604 * exponential
function exp
(x
) = r
(x
) = (+/-)( I
+ 2*(q
(x
)/p
(x
)) ),
605 * combined with scaling
-and
-squaring
.
607 *-----Arguments
--------------------------------------------------------|
609 * ideg
: (input
) the degre of the diagonal Pade
to be used
.
610 * a value of
6 is generally satisfactory
.
612 * m
: (input
) order of H
.
614 * H
(ldh
,m
) : (input
) argument matrix
(both lower and upper parts
).
616 * t
: (input
) time
-scale
(can be
< 0).
618 * wsp
(lwsp
) : (workspace
/output
) lwsp
.ge
. 4*m*m
+ideg
+1.
620 * ipiv
(m
) : (workspace
)
622 *>>>> iexph
: (output
) number such that wsp
(iexph
) points
to exp
(tH
)
623 * i
.e
., exp
(tH
) is located at wsp
(iexph
... iexph
+m*m
-1)
624 * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
625 * NOTE
: if the routine was called with wsp
(iptr
),
626 * then exp
(tH
) will start at wsp
(iptr
+iexph
-1).
628 * ns
: (output
) number of scaling
-squaring used
.
630 * iflag
: (output
) exit flag
.
634 *----------------------------------------------------------------------|
635 * Roger B
. Sidje
(rbs@maths
.uq
.edu
.au
)
636 * EXPOKIT
: Software Package
for Computing Matrix Exponentials
.
637 * ACM
- Transactions On Mathematical Software
, 24(1):130-156, 1998
638 *----------------------------------------------------------------------|
640 integer mm
,i
,j
,k
,ih2
,ip
,iq
,iused
,ifree
,iodd
,icoef
,iput
,iget
641 double precision hnorm
,scale
,scale2
,cp
,cq
643 intrinsic INT
,ABS
,DBLE
,LOG
,MAX
645 *--- check restrictions on input parameters
...
648 if ( ldh
.lt
.m
) iflag
= -1
649 if ( lwsp
.lt
.4*mm
+ideg
+1 ) iflag
= -2
650 if ( iflag
.ne
.0 ) stop 'bad sizes (in input of DSPADM)'
652 *--- initialise pointers
...
655 ih2
= icoef
+ (ideg
+1)
660 *--- scaling
: seek ns such that ||t*H
/2^ns||
< 1/2;
661 * and set scale
= t
/2^ns
...
668 wsp
(i
) = wsp
(i
) + ABS
( H
(i
,j
) )
673 hnorm
= MAX
( hnorm
,wsp
(i
) )
675 hnorm
= ABS
( t*hnorm
)
676 if ( hnorm
.eq
.0.0d0
) stop 'Error - null H in input of DSPADM.'
677 ns
= MAX
( 0,INT
(LOG
(hnorm
)/LOG
(2.0d0
))+2 )
678 scale
= t
/ DBLE
(2**ns
)
681 *--- compute Pade coefficients
...
687 wsp
(icoef
+k
) = (wsp
(icoef
+k
-1)*DBLE
( i
-k
))/DBLE
( k*
(j
-k
) )
690 *--- H2
= scale2*H*H
...
692 call DGEMM
( 'n','n',m
,m
,m
,scale2
,H
,ldh
,H
,ldh
,0.0d0
,wsp
(ih2
),m
)
694 *--- initialize p
(numerator
) and q
(denominator
) ...
696 cp
= wsp
(icoef
+ideg
-1)
700 wsp
(ip
+ (j
-1)*m
+ i
-1) = 0.0d0
701 wsp
(iq
+ (j
-1)*m
+ i
-1) = 0.0d0
703 wsp
(ip
+ (j
-1)*(m
+1)) = cp
704 wsp
(iq
+ (j
-1)*(m
+1)) = cq
707 *--- Apply Horner rule
...
712 iused
= iodd*iq
+ (1-iodd
)*ip
713 call DGEMM
( 'n','n',m
,m
,m
, 1.0d0
,wsp
(iused
),m
,
714 . wsp
(ih2
),m
, 0.0d0
,wsp
(ifree
),m
)
716 wsp
(ifree
+(j
-1)*(m
+1)) = wsp
(ifree
+(j
-1)*(m
+1))+wsp
(icoef
+k
-1)
718 ip
= (1-iodd
)*ifree
+ iodd*ip
719 iq
= iodd*ifree
+ (1-iodd
)*iq
723 if ( k
.gt
.0 ) goto 100
725 *--- Obtain
(+/-)(I
+ 2*(p\q
)) ...
727 if ( iodd
.eq
. 1 ) then
728 call DGEMM
( 'n','n',m
,m
,m
, scale
,wsp
(iq
),m
,
729 . H
,ldh
, 0.0d0
,wsp
(ifree
),m
)
732 call DGEMM
( 'n','n',m
,m
,m
, scale
,wsp
(ip
),m
,
733 . H
,ldh
, 0.0d0
,wsp
(ifree
),m
)
736 call DAXPY
( mm
, -1.0d0
,wsp
(ip
),1, wsp
(iq
),1 )
737 call DSYSV
( 'U',m
,m
,wsp
(iq
),m
,ipiv
,wsp
(ip
),m
,wsp
(ih2
),mm
,iflag
)
738 if ( iflag
.ne
.0 ) stop 'Problem in DSYSV (within DSPADM)'
739 call DSCAL
( mm
, 2.0d0
, wsp
(ip
), 1 )
741 wsp
(ip
+(j
-1)*(m
+1)) = wsp
(ip
+(j
-1)*(m
+1)) + 1.0d0
744 if ( ns
.eq
.0 .and
. iodd
.eq
.1 ) then
745 call DSCAL
( mm
, -1.0d0
, wsp
(ip
), 1 )
749 *-- squaring
: exp
(t*H
) = (exp
(t*H
))^
(2^ns
) ...
753 iget
= iodd*ip
+ (1-iodd
)*iq
754 iput
= (1-iodd
)*ip
+ iodd*iq
755 call DGEMM
( 'n','n',m
,m
,m
, 1.0d0
,wsp
(iget
),m
, wsp
(iget
),m
,
756 . 0.0d0
,wsp
(iput
),m
)
762 *----------------------------------------------------------------------|
763 *----------------------------------------------------------------------|
764 subroutine ZGPADM
(ideg
,m
,t
,H
,ldh
,wsp
,lwsp
,ipiv
,iexph
,ns
,iflag
)
768 integer ideg
, m
, ldh
, lwsp
, iexph
, ns
, iflag
, ipiv
(m
)
769 complex*16 H
(ldh
,m
), wsp
(lwsp
)
771 *-----Purpose
----------------------------------------------------------|
773 * Computes exp
(t*H
), the matrix exponential of a general
complex
774 * matrix in full
, using the irreducible rational Pade approximation
775 * to the exponential exp
(z
) = r
(z
) = (+/-)( I
+ 2*(q
(z
)/p
(z
)) ),
776 * combined with scaling
-and
-squaring
.
778 *-----Arguments
--------------------------------------------------------|
780 * ideg
: (input
) the degre of the diagonal Pade
to be used
.
781 * a value of
6 is generally satisfactory
.
783 * m
: (input
) order of H
.
785 * H
(ldh
,m
) : (input
) argument matrix
.
787 * t
: (input
) time
-scale
(can be
< 0).
789 * wsp
(lwsp
) : (workspace
/output
) lwsp
.ge
. 4*m*m
+ideg
+1.
791 * ipiv
(m
) : (workspace
)
793 *>>>> iexph
: (output
) number such that wsp
(iexph
) points
to exp
(tH
)
794 * i
.e
., exp
(tH
) is located at wsp
(iexph
... iexph
+m*m
-1)
795 * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
796 * NOTE
: if the routine was called with wsp
(iptr
),
797 * then exp
(tH
) will start at wsp
(iptr
+iexph
-1).
799 * ns
: (output
) number of scaling
-squaring used
.
801 * iflag
: (output
) exit flag
.
805 *----------------------------------------------------------------------|
806 * Roger B
. Sidje
(rbs@maths
.uq
.edu
.au
)
807 * EXPOKIT
: Software Package
for Computing Matrix Exponentials
.
808 * ACM
- Transactions On Mathematical Software
, 24(1):130-156, 1998
809 *----------------------------------------------------------------------|
811 integer i
,j
,k
,icoef
,mm
,ih2
,iodd
,iused
,ifree
,iq
,ip
,iput
,iget
812 double precision hnorm
813 complex*16 cp
, cq
, scale
, scale2
, ZERO
, ONE
815 parameter( ZERO
=(0.0d0
,0.0d0
), ONE
=(1.0d0
,0.0d0
) )
816 intrinsic ABS
, CMPLX
, DBLE
, INT
, LOG
, MAX
818 *--- check restrictions on input parameters
...
821 if ( ldh
.lt
.m
) iflag
= -1
822 if ( lwsp
.lt
.4*mm
+ideg
+1 ) iflag
= -2
823 if ( iflag
.ne
.0 ) stop 'bad sizes (in input of ZGPADM)'
825 *--- initialise pointers
...
828 ih2
= icoef
+ (ideg
+1)
833 *--- scaling
: seek ns such that ||t*H
/2^ns||
< 1/2;
834 * and set scale
= t
/2^ns
...
841 wsp
(i
) = wsp
(i
) + ABS
( H
(i
,j
) )
846 hnorm
= MAX
( hnorm
,DBLE
(wsp
(i
)) )
848 hnorm
= ABS
( t*hnorm
)
849 if ( hnorm
.eq
.0.0d0
) stop 'Error - null H in input of ZGPADM.'
850 ns
= MAX
( 0,INT
(LOG
(hnorm
)/LOG
(2.0d0
))+2 )
851 scale
= CMPLX
( t
/DBLE
(2**ns
),0.0d0
)
854 *--- compute Pade coefficients
...
860 wsp
(icoef
+k
) = (wsp
(icoef
+k
-1)*DBLE
( i
-k
))/DBLE
( k*
(j
-k
) )
863 *--- H2
= scale2*H*H
...
865 call ZGEMM
( 'n','n',m
,m
,m
,scale2
,H
,ldh
,H
,ldh
,ZERO
,wsp
(ih2
),m
)
867 *--- initialise p
(numerator
) and q
(denominator
) ...
869 cp
= wsp
(icoef
+ideg
-1)
873 wsp
(ip
+ (j
-1)*m
+ i
-1) = ZERO
874 wsp
(iq
+ (j
-1)*m
+ i
-1) = ZERO
876 wsp
(ip
+ (j
-1)*(m
+1)) = cp
877 wsp
(iq
+ (j
-1)*(m
+1)) = cq
880 *--- Apply Horner rule
...
885 iused
= iodd*iq
+ (1-iodd
)*ip
886 call ZGEMM
( 'n','n',m
,m
,m
, ONE
,wsp
(iused
),m
,
887 . wsp
(ih2
),m
, ZERO
,wsp
(ifree
),m
)
889 wsp
(ifree
+(j
-1)*(m
+1)) = wsp
(ifree
+(j
-1)*(m
+1))+wsp
(icoef
+k
-1)
891 ip
= (1-iodd
)*ifree
+ iodd*ip
892 iq
= iodd*ifree
+ (1-iodd
)*iq
896 if ( k
.gt
.0 ) goto 100
898 *--- Obtain
(+/-)(I
+ 2*(p\q
)) ...
900 if ( iodd
.ne
.0 ) then
901 call ZGEMM
( 'n','n',m
,m
,m
, scale
,wsp
(iq
),m
,
902 . H
,ldh
, ZERO
,wsp
(ifree
),m
)
905 call ZGEMM
( 'n','n',m
,m
,m
, scale
,wsp
(ip
),m
,
906 . H
,ldh
, ZERO
,wsp
(ifree
),m
)
909 call ZAXPY
( mm
, -ONE
,wsp
(ip
),1, wsp
(iq
),1 )
910 call ZGESV
( m
,m
, wsp
(iq
),m
, ipiv
, wsp
(ip
),m
, iflag
)
911 if ( iflag
.ne
.0 ) stop 'Problem in ZGESV (within ZGPADM)'
912 call ZDSCAL
( mm
, 2.0d0
, wsp
(ip
), 1 )
914 wsp
(ip
+(j
-1)*(m
+1)) = wsp
(ip
+(j
-1)*(m
+1)) + ONE
917 if ( ns
.eq
.0 .and
. iodd
.ne
.0 ) then
918 call ZDSCAL
( mm
, -1.0d0
, wsp
(ip
), 1 )
922 *-- squaring
: exp
(t*H
) = (exp
(t*H
))^
(2^ns
) ...
926 iget
= iodd*ip
+ (1-iodd
)*iq
927 iput
= (1-iodd
)*ip
+ iodd*iq
928 call ZGEMM
( 'n','n',m
,m
,m
, ONE
,wsp
(iget
),m
, wsp
(iget
),m
,
935 *----------------------------------------------------------------------|
936 subroutine ZHPADM
(ideg
,m
,t
,H
,ldh
,wsp
,lwsp
,ipiv
,iexph
,ns
,iflag
)
940 integer ideg
, m
, ldh
, lwsp
, iexph
, ns
, iflag
, ipiv
(m
)
941 complex*16 H
(ldh
,m
), wsp
(lwsp
)
943 *-----Purpose
----------------------------------------------------------|
945 * Computes exp
(t*H
), the matrix exponential of an Hermitian matrix
946 * in full
, using the irreducible rational Pade approximation
to the
947 * exponential
function exp
(z
) = r
(z
) = (+/-)( I
+ 2*(q
(z
)/p
(z
)) ),
948 * combined with scaling
-and
-squaring
.
950 *-----Arguments
--------------------------------------------------------|
952 * ideg
: (input
) the degre of the diagonal Pade
to be used
.
953 * a value of
6 is generally satisfactory
.
955 * m
: (input
) order of H
.
957 * H
(ldh
,m
) : (input
) argument matrix
(both lower and upper parts
).
959 * t
: (input
) time
-scale
(can be
< 0).
961 * wsp
(lwsp
) : (workspace
/output
) lwsp
.ge
. 4*m*m
+ideg
+1.
963 * ipiv
(m
) : (workspace
)
965 *>>>> iexph
: (output
) number such that wsp
(iexph
) points
to exp
(tH
)
966 * i
.e
., exp
(tH
) is located at wsp
(iexph
... iexph
+m*m
-1)
967 * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
968 * NOTE
: if the routine was called with wsp
(iptr
),
969 * then exp
(tH
) will start at wsp
(iptr
+iexph
-1).
971 * ns
: (output
) number of scaling
-squaring used
.
973 * iflag
: (output
) exit flag
.
977 *----------------------------------------------------------------------|
978 * Roger B
. Sidje
(rbs@maths
.uq
.edu
.au
)
979 * EXPOKIT
: Software Package
for Computing Matrix Exponentials
.
980 * ACM
- Transactions On Mathematical Software
, 24(1):130-156, 1998
981 *----------------------------------------------------------------------|
983 integer i
,j
,k
,icoef
,mm
,ih2
,iodd
,iused
,ifree
,iq
,ip
,iput
,iget
984 double precision hnorm
985 complex*16 cp
, cq
, scale
, scale2
, ZERO
, ONE
987 parameter( ZERO
=(0.0d0
,0.0d0
), ONE
=(1.0d0
,0.0d0
) )
988 intrinsic ABS
, CMPLX
, DBLE
, INT
, LOG
, MAX
990 *--- check restrictions on input parameters
...
993 if ( ldh
.lt
.m
) iflag
= -1
994 if ( lwsp
.lt
.4*mm
+ideg
+1 ) iflag
= -2
995 if ( iflag
.ne
.0 ) stop 'bad sizes (in input of ZHPADM)'
997 *--- initialise pointers
...
1000 ih2
= icoef
+ (ideg
+1)
1005 *--- scaling
: seek ns such that ||t*H
/2^ns||
< 1/2;
1006 * and set scale
= t
/2^ns
...
1013 wsp
(i
) = wsp
(i
) + ABS
( H
(i
,j
) )
1018 hnorm
= MAX
( hnorm
,DBLE
(wsp
(i
)) )
1020 hnorm
= ABS
( t*hnorm
)
1021 if ( hnorm
.eq
.0.0d0
) stop 'Error - null H in input of ZHPADM.'
1022 ns
= MAX
( 0,INT
(LOG
(hnorm
)/LOG
(2.0d0
))+2 )
1023 scale
= CMPLX
( t
/DBLE
(2**ns
),0.0d0
)
1024 scale2
= scale*scale
1026 *--- compute Pade coefficients
...
1032 wsp
(icoef
+k
) = (wsp
(icoef
+k
-1)*DBLE
( i
-k
))/DBLE
( k*
(j
-k
) )
1035 *--- H2
= scale2*H*H
...
1037 call ZGEMM
( 'n','n',m
,m
,m
,scale2
,H
,ldh
,H
,ldh
,ZERO
,wsp
(ih2
),m
)
1039 *--- initialise p
(numerator
) and q
(denominator
) ...
1041 cp
= wsp
(icoef
+ideg
-1)
1042 cq
= wsp
(icoef
+ideg
)
1045 wsp
(ip
+ (j
-1)*m
+ i
-1) = ZERO
1046 wsp
(iq
+ (j
-1)*m
+ i
-1) = ZERO
1048 wsp
(ip
+ (j
-1)*(m
+1)) = cp
1049 wsp
(iq
+ (j
-1)*(m
+1)) = cq
1052 *--- Apply Horner rule
...
1057 iused
= iodd*iq
+ (1-iodd
)*ip
1058 call ZGEMM
( 'n','n',m
,m
,m
, ONE
,wsp
(iused
),m
,
1059 . wsp
(ih2
),m
, ZERO
,wsp
(ifree
),m
)
1061 wsp
(ifree
+(j
-1)*(m
+1)) = wsp
(ifree
+(j
-1)*(m
+1))+wsp
(icoef
+k
-1)
1063 ip
= (1-iodd
)*ifree
+ iodd*ip
1064 iq
= iodd*ifree
+ (1-iodd
)*iq
1068 if ( k
.gt
.0 ) goto 100
1070 *--- Obtain
(+/-)(I
+ 2*(p\q
)) ...
1072 if ( iodd
.ne
.0 ) then
1073 call ZGEMM
( 'n','n',m
,m
,m
, scale
,wsp
(iq
),m
,
1074 . H
,ldh
, ZERO
,wsp
(ifree
),m
)
1077 call ZGEMM
( 'n','n',m
,m
,m
, scale
,wsp
(ip
),m
,
1078 . H
,ldh
, ZERO
,wsp
(ifree
),m
)
1081 call ZAXPY
( mm
, -ONE
,wsp
(ip
),1, wsp
(iq
),1 )
1082 call ZHESV
( 'U',m
,m
,wsp
(iq
),m
,ipiv
,wsp
(ip
),m
,wsp
(ih2
),mm
,iflag
)
1083 if ( iflag
.ne
.0 ) stop 'Problem in ZHESV (within ZHPADM)'
1084 call ZDSCAL
( mm
, 2.0d0
, wsp
(ip
), 1 )
1086 wsp
(ip
+(j
-1)*(m
+1)) = wsp
(ip
+(j
-1)*(m
+1)) + ONE
1089 if ( ns
.eq
.0 .and
. iodd
.ne
.0 ) then
1090 call ZDSCAL
( mm
, -1.0d0
, wsp
(ip
), 1 )
1094 *-- squaring
: exp
(t*H
) = (exp
(t*H
))^
(2^ns
) ...
1098 iget
= iodd*ip
+ (1-iodd
)*iq
1099 iput
= (1-iodd
)*ip
+ iodd*iq
1100 call ZGEMM
( 'n','n',m
,m
,m
, ONE
,wsp
(iget
),m
, wsp
(iget
),m
,
1101 . ZERO
,wsp
(iput
),m
)
1107 *----------------------------------------------------------------------|
1108 subroutine DGCHBV
( m
, t
, H
,ldh
, y
, wsp
, iwsp
, iflag
)
1111 integer m
, ldh
, iflag
, iwsp
(m
)
1112 double precision t
, H
(ldh
,m
), y
(m
)
1113 complex*16 wsp
(m*
(m
+2))
1115 *-----Purpose
----------------------------------------------------------|
1117 *--- DGCHBV computes y
= exp
(t*H
)*y using the partial fraction
1118 * expansion of the uniform rational Chebyshev approximation
1119 * to exp
(-x
) of type
(14,14). H is a General matrix
.
1120 * About
14-digit accuracy is expected
if the matrix H is negative
1121 * definite
. The algorithm may behave poorly otherwise
.
1123 *-----Arguments
--------------------------------------------------------|
1125 * m
: (input
) order of the matrix H
1127 * t
: (input
) time
-scaling factor
(can be
< 0).
1129 * H
(ldh
,m
): (input
) argument matrix
.
1131 * y
(m
) : (input
/output
) on input the operand vector
,
1132 * on output the resulting vector exp
(t*H
)*y
.
1134 * iwsp
(m
) : (workspace
)
1136 * wsp
: (workspace
). Observe that a
double precision vector of
1137 * length
2*m*
(m
+2) can be used as well when calling this
1138 * routine
(thus avoiding an idle
complex array elsewhere
)
1140 *----------------------------------------------------------------------|
1141 * Roger B
. Sidje
(rbs@maths
.uq
.edu
.au
)
1142 * EXPOKIT
: Software Package
for Computing Matrix Exponentials
.
1143 * ACM
- Transactions On Mathematical Software
, 24(1):130-156, 1998
1144 *----------------------------------------------------------------------|
1146 integer ndeg
, i
, j
, ip
, ih
, iy
, iz
1147 parameter ( ndeg
=7 )
1148 double precision alpha0
1149 complex*16 alpha
(ndeg
), theta
(ndeg
)
1159 *--- Coefficients and poles of the partial fraction expansion
...
1161 alpha0
= 0.183216998528140087D
-11
1162 alpha
(1)=( 0.557503973136501826D
+02,-0.204295038779771857D
+03)
1163 alpha
(2)=(-0.938666838877006739D
+02, 0.912874896775456363D
+02)
1164 alpha
(3)=( 0.469965415550370835D
+02,-0.116167609985818103D
+02)
1165 alpha
(4)=(-0.961424200626061065D
+01,-0.264195613880262669D
+01)
1166 alpha
(5)=( 0.752722063978321642D
+00, 0.670367365566377770D
+00)
1167 alpha
(6)=(-0.188781253158648576D
-01,-0.343696176445802414D
-01)
1168 alpha
(7)=( 0.143086431411801849D
-03, 0.287221133228814096D
-03)
1170 theta
(1)=(-0.562314417475317895D
+01, 0.119406921611247440D
+01)
1171 theta
(2)=(-0.508934679728216110D
+01, 0.358882439228376881D
+01)
1172 theta
(3)=(-0.399337136365302569D
+01, 0.600483209099604664D
+01)
1173 theta
(4)=(-0.226978543095856366D
+01, 0.846173881758693369D
+01)
1174 theta
(5)=( 0.208756929753827868D
+00, 0.109912615662209418D
+02)
1175 theta
(6)=( 0.370327340957595652D
+01, 0.136563731924991884D
+02)
1176 theta
(7)=( 0.889777151877331107D
+01, 0.166309842834712071D
+02)
1178 *--- Accumulation of the contribution of each pole
...
1185 *--- Solve each fraction using Gaussian elimination with pivoting
...
1188 wsp
(ih
+(j
-1)*m
+i
-1) = -t*H
(i
,j
)
1190 wsp
(ih
+(j
-1)*m
+j
-1) = wsp
(ih
+(j
-1)*m
+j
-1)-theta
(ip
)
1191 wsp
(iy
+j
-1) = wsp
(iz
+j
-1)
1193 call ZGESV
( M
, 1, WSP
(iH
),M
, IWSP
, WSP
(iY
),M
, IFLAG
)
1194 if ( IFLAG
.ne
.0 ) stop 'Error in DGCHBV'
1195 *--- Accumulate the partial result in y
...
1197 y
(j
) = y
(j
) + DBLE
( alpha
(ip
)*wsp
(iy
+j
-1) )
1201 *----------------------------------------------------------------------|
1202 *----------------------------------------------------------------------|
1203 subroutine DSCHBV
( m
, t
, H
,ldh
, y
, wsp
, iwsp
, iflag
)
1206 integer m
, ldh
, iflag
, iwsp
(m
)
1207 double precision t
, H
(ldh
,m
), y
(m
)
1208 complex*16 wsp
(m*
(m
+2))
1210 *-----Purpose
----------------------------------------------------------|
1212 *--- DSCHBV computes y
= exp
(t*H
)*y using the partial fraction
1213 * expansion of the uniform rational Chebyshev approximation
1214 * to exp
(-x
) of type
(14,14). H is assumed
to be symmetric
.
1215 * About
14-digit accuracy is expected
if the matrix H is negative
1216 * definite
. The algorithm may behave poorly otherwise
.
1218 *-----Arguments
--------------------------------------------------------|
1220 * m
: (input
) order of matrix H
1222 * t
: (input
) time
-scaling factor
(can be
< 0).
1224 * H
(ldh
,m
): (input
) symmetric matrix
.
1226 * y
(m
) : (input
/output
) on input the operand vector
,
1227 * on output the resulting vector exp
(t*H
)*y
.
1229 * iwsp
(m
) : (workspace
)
1231 * wsp
: (workspace
). Observe that a
double precision vector of
1232 * length
2*m*
(m
+2) can be used as well when calling this
1233 * routine
(thus avoiding an idle
complex array elsewhere
)
1235 *----------------------------------------------------------------------|
1236 * Roger B
. Sidje
(rbs@maths
.uq
.edu
.au
)
1237 * EXPOKIT
: Software Package
for Computing Matrix Exponentials
.
1238 * ACM
- Transactions On Mathematical Software
, 24(1):130-156, 1998
1239 *----------------------------------------------------------------------|
1241 integer ndeg
, i
, j
, ip
, ih
, iy
, iz
1242 parameter ( ndeg
=7 )
1243 double precision alpha0
1244 complex*16 alpha
(ndeg
), theta
(ndeg
), w
1246 intrinsic ABS
,CMPLX
,DBLE
,MIN
1254 *--- Coefficients and poles of the partial fraction expansion
...
1256 alpha0
= 0.183216998528140087D
-11
1257 alpha
(1)=( 0.557503973136501826D
+02,-0.204295038779771857D
+03)
1258 alpha
(2)=(-0.938666838877006739D
+02, 0.912874896775456363D
+02)
1259 alpha
(3)=( 0.469965415550370835D
+02,-0.116167609985818103D
+02)
1260 alpha
(4)=(-0.961424200626061065D
+01,-0.264195613880262669D
+01)
1261 alpha
(5)=( 0.752722063978321642D
+00, 0.670367365566377770D
+00)
1262 alpha
(6)=(-0.188781253158648576D
-01,-0.343696176445802414D
-01)
1263 alpha
(7)=( 0.143086431411801849D
-03, 0.287221133228814096D
-03)
1265 theta
(1)=(-0.562314417475317895D
+01, 0.119406921611247440D
+01)
1266 theta
(2)=(-0.508934679728216110D
+01, 0.358882439228376881D
+01)
1267 theta
(3)=(-0.399337136365302569D
+01, 0.600483209099604664D
+01)
1268 theta
(4)=(-0.226978543095856366D
+01, 0.846173881758693369D
+01)
1269 theta
(5)=( 0.208756929753827868D
+00, 0.109912615662209418D
+02)
1270 theta
(6)=( 0.370327340957595652D
+01, 0.136563731924991884D
+02)
1271 theta
(7)=( 0.889777151877331107D
+01, 0.166309842834712071D
+02)
1273 *--- Accumulation of the contribution of each pole
...
1280 *--- Solve each fraction using Gaussian elimination with pivoting
...
1283 wsp
(ih
+(j
-1)*m
+i
-1) = -t*H
(i
,j
)
1285 wsp
(ih
+(j
-1)*m
+j
-1) = wsp
(ih
+(j
-1)*m
+j
-1)-theta
(ip
)
1286 wsp
(iy
+j
-1) = wsp
(iz
+j
-1)
1288 !call ZSYSV
('U', M
, 1, WSP
(iH
),M
, IWSP
, WSP
(iY
),M
, W
,1, IFLAG
)
1289 if ( IFLAG
.ne
.0 ) stop 'Error in DSCHBV'
1290 *--- Accumulate the partial result in y
...
1292 y
(i
) = y
(i
) + DBLE
( alpha
(ip
)*wsp
(iy
+i
-1) )
1296 *----------------------------------------------------------------------|
1297 *----------------------------------------------------------------------|
1298 subroutine ZGCHBV
( m
, t
, H
,ldh
, y
, wsp
, iwsp
, iflag
)
1301 integer m
, ldh
, iflag
, iwsp
(m
)
1303 complex*16 H
(ldh
,m
), y
(m
), wsp
(m*
(m
+2))
1305 *-----Purpose
----------------------------------------------------------|
1307 *--- ZGCHBV computes y
= exp
(t*H
)*y using the partial fraction
1308 * expansion of the uniform rational Chebyshev approximation
1309 * to exp
(-x
) of type
(14,14). H is a General matrix
.
1310 * About
14-digit accuracy is expected
if the matrix H is negative
1311 * definite
. The algorithm may behave poorly otherwise
.
1313 *-----Arguments
--------------------------------------------------------|
1315 * m
: (input
) order of the matrix H
.
1317 * t
: (input
) time
-scaling factor
(can be
< 0).
1319 * H
(ldh
,m
): (input
) argument matrix
.
1321 * y
(m
) : (input
/output
) on input the operand vector
,
1322 * on output the resulting vector exp
(t*H
)*y
.
1324 * iwsp
(m
) : (workspace
)
1326 * wsp
: (workspace
). Observe that a
double precision vector of
1327 * length
2*m*
(m
+2) can be used as well when calling this
1330 *----------------------------------------------------------------------|
1331 * Roger B
. Sidje
(rbs@maths
.uq
.edu
.au
)
1332 * EXPOKIT
: Software Package
for Computing Matrix Exponentials
.
1333 * ACM
- Transactions On Mathematical Software
, 24(1):130-156, 1998
1334 *----------------------------------------------------------------------|
1336 integer ndeg
, i
, j
, ip
, ih
, iy
, iz
1337 parameter ( ndeg
=7 )
1338 double precision alpha0
1339 complex*16 alpha
(2*ndeg
), theta
(2*ndeg
)
1347 *--- Coefficients and poles of the partial fraction expansion
...
1349 alpha0
= 0.183216998528140087D
-11
1350 alpha
(1)=( 0.557503973136501826D
+02,-0.204295038779771857D
+03)
1351 alpha
(2)=(-0.938666838877006739D
+02, 0.912874896775456363D
+02)
1352 alpha
(3)=( 0.469965415550370835D
+02,-0.116167609985818103D
+02)
1353 alpha
(4)=(-0.961424200626061065D
+01,-0.264195613880262669D
+01)
1354 alpha
(5)=( 0.752722063978321642D
+00, 0.670367365566377770D
+00)
1355 alpha
(6)=(-0.188781253158648576D
-01,-0.343696176445802414D
-01)
1356 alpha
(7)=( 0.143086431411801849D
-03, 0.287221133228814096D
-03)
1358 theta
(1)=(-0.562314417475317895D
+01, 0.119406921611247440D
+01)
1359 theta
(2)=(-0.508934679728216110D
+01, 0.358882439228376881D
+01)
1360 theta
(3)=(-0.399337136365302569D
+01, 0.600483209099604664D
+01)
1361 theta
(4)=(-0.226978543095856366D
+01, 0.846173881758693369D
+01)
1362 theta
(5)=( 0.208756929753827868D
+00, 0.109912615662209418D
+02)
1363 theta
(6)=( 0.370327340957595652D
+01, 0.136563731924991884D
+02)
1364 theta
(7)=( 0.889777151877331107D
+01, 0.166309842834712071D
+02)
1367 theta
(ndeg
+ip
) = CONJG
( theta
(ip
) )
1368 alpha
(ndeg
+ip
) = CONJG
( alpha
(ip
) )
1371 *--- Accumulation of the contribution of each pole
...
1378 alpha
(ip
) = 0.5d0*alpha
(ip
)
1379 *--- Solve each fraction using Gaussian elimination with pivoting
...
1382 wsp
(ih
+(j
-1)*m
+i
-1) = -t*H
(i
,j
)
1384 wsp
(ih
+(j
-1)*m
+j
-1) = wsp
(ih
+(j
-1)*m
+j
-1)-theta
(ip
)
1385 wsp
(iy
+j
-1) = wsp
(iz
+j
-1)
1387 call ZGESV
( M
, 1, WSP
(iH
),M
, IWSP
, WSP
(iY
),M
, IFLAG
)
1388 if ( IFLAG
.ne
.0 ) stop 'Error in ZGCHBV'
1389 *--- Accumulate the partial result in y
...
1391 y
(i
) = y
(i
) + alpha
(ip
)*wsp
(iy
+i
-1)
1395 *----------------------------------------------------------------------|
1396 *----------------------------------------------------------------------|
1397 subroutine DNCHBV
( m
, t
, H
,ldh
, y
, wsp
)
1401 double precision t
, H
(ldh
,m
), y
(m
)
1402 complex*16 wsp
(m*
(m
+2))
1404 *-----Purpose
----------------------------------------------------------|
1406 *--- DNCHBV computes y
= exp
(t*H
)*y using the partial fraction
1407 * expansion of the uniform rational Chebyshev approximation
1408 * to exp
(-x
) of type
(14,14). H is assumed
to be upper
-Hessenberg
.
1409 * About
14-digit accuracy is expected
if the matrix H is negative
1410 * definite
. The algorithm may behave poorly otherwise
.
1412 *-----Arguments
--------------------------------------------------------|
1414 * m
: (input
) order of the Hessenberg matrix H
1416 * t
: (input
) time
-scaling factor
(can be
< 0).
1418 * H
(ldh
,m
): (input
) upper Hessenberg matrix
.
1420 * y
(m
) : (input
/output
) on input the operand vector
,
1421 * on output the resulting vector exp
(t*H
)*y
.
1423 * wsp
: (workspace
). Observe that a
double precision vector of
1424 * length
2*m*
(m
+2) can be used as well when calling this
1425 * routine
(thus avoiding an idle
complex array elsewhere
)
1427 *----------------------------------------------------------------------|
1428 * Roger B
. Sidje
(rbs@maths
.uq
.edu
.au
)
1429 * EXPOKIT
: Software Package
for Computing Matrix Exponentials
.
1430 * ACM
- Transactions On Mathematical Software
, 24(1):130-156, 1998
1431 *----------------------------------------------------------------------|
1434 integer ndeg
, i
, j
, k
, ip
, ih
, iy
, iz
1435 parameter ( ndeg
=7, ZERO
=(0.0d0
,0.0d0
) )
1436 double precision alpha0
1437 complex*16 alpha
(ndeg
), theta
(ndeg
), tmpc
1439 intrinsic ABS
,DBLE
,MIN
1447 *--- Coefficients and poles of the partial fraction expansion
...
1449 alpha0
= 0.183216998528140087D
-11
1450 alpha
(1)=( 0.557503973136501826D
+02,-0.204295038779771857D
+03)
1451 alpha
(2)=(-0.938666838877006739D
+02, 0.912874896775456363D
+02)
1452 alpha
(3)=( 0.469965415550370835D
+02,-0.116167609985818103D
+02)
1453 alpha
(4)=(-0.961424200626061065D
+01,-0.264195613880262669D
+01)
1454 alpha
(5)=( 0.752722063978321642D
+00, 0.670367365566377770D
+00)
1455 alpha
(6)=(-0.188781253158648576D
-01,-0.343696176445802414D
-01)
1456 alpha
(7)=( 0.143086431411801849D
-03, 0.287221133228814096D
-03)
1458 theta
(1)=(-0.562314417475317895D
+01, 0.119406921611247440D
+01)
1459 theta
(2)=(-0.508934679728216110D
+01, 0.358882439228376881D
+01)
1460 theta
(3)=(-0.399337136365302569D
+01, 0.600483209099604664D
+01)
1461 theta
(4)=(-0.226978543095856366D
+01, 0.846173881758693369D
+01)
1462 theta
(5)=( 0.208756929753827868D
+00, 0.109912615662209418D
+02)
1463 theta
(6)=( 0.370327340957595652D
+01, 0.136563731924991884D
+02)
1464 theta
(7)=( 0.889777151877331107D
+01, 0.166309842834712071D
+02)
1466 *--- Accumulation of the contribution of each pole
...
1473 *--- Solve each fraction using Gaussian elimination with pivoting
...
1475 wsp
(iy
+j
-1) = wsp
(iz
+j
-1)
1476 do i
= 1,MIN
( j
+1,m
)
1477 wsp
(ih
+(j
-1)*m
+i
-1) = -t*H
(i
,j
)
1479 wsp
(ih
+(j
-1)*m
+j
-1) = wsp
(ih
+(j
-1)*m
+j
-1)-theta
(ip
)
1481 wsp
(ih
+(j
-1)*m
+k
-1) = ZERO
1485 *--- Get pivot and exchange rows
...
1486 if (ABS
(wsp
(ih
+(i
-1)*m
+i
-1)).lt
.ABS
(wsp
(ih
+(i
-1)*m
+i
))) then
1487 call ZSWAP
( m
-i
+1, wsp
(ih
+(i
-1)*m
+i
-1),m
,
1488 . wsp
(ih
+(i
-1)*m
+i
),m
)
1489 call ZSWAP
( 1, wsp
(iy
+i
-1),1, wsp
(iy
+i
),1 )
1491 *--- Forward eliminiation
...
1492 tmpc
= wsp
(ih
+(i
-1)*m
+i
) / wsp
(ih
+(i
-1)*m
+i
-1)
1493 call ZAXPY
( m
-i
, -tmpc
, wsp
(ih
+i*m
+i
-1),m
, wsp
(ih
+i*m
+i
),m
)
1494 wsp
(iy
+i
) = wsp
(iy
+i
) - tmpc*wsp
(iy
+i
-1)
1496 *--- Backward substitution
...
1500 tmpc
= tmpc
- wsp
(ih
+(j
-1)*m
+i
-1)*wsp
(iy
+j
-1)
1502 wsp
(iy
+i
-1) = tmpc
/ wsp
(ih
+(i
-1)*m
+i
-1)
1504 *--- Accumulate the partial result in y
...
1506 y
(j
) = y
(j
) + DBLE
( alpha
(ip
)*wsp
(iy
+j
-1) )
1510 *----------------------------------------------------------------------|
1511 *----------------------------------------------------------------------|
1512 subroutine ZNCHBV
( m
, t
, H
,ldh
, y
, wsp
)
1517 complex*16 H
(ldh
,m
), y
(m
), wsp
(m*
(m
+2))
1519 *-----Purpose
----------------------------------------------------------|
1521 *--- ZNCHBV computes y
= exp
(t*H
)*y using the partial fraction
1522 * expansion of the uniform rational Chebyshev approximation
1523 * to exp
(-x
) of type
(14,14). H is assumed
to be upper
-Hessenberg
.
1524 * About
14-digit accuracy is expected
if the matrix H is negative
1525 * definite
. The algorithm may behave poorly otherwise
.
1527 *-----Arguments
--------------------------------------------------------|
1529 * m
: (input
) order of the Hessenberg matrix H
1531 * t
: (input
) time
-scaling factor
(can be
< 0).
1533 * H
(ldh
,m
): (input
) upper Hessenberg matrix
.
1535 * y
(m
) : (input
/output
) on input the operand vector
,
1536 * on output the resulting vector exp
(t*H
)*y
.
1538 * wsp
: (workspace
). Observe that a
double precision vector of
1539 * length
2*m*
(m
+2) can be used as well when calling this
1542 *----------------------------------------------------------------------|
1543 * Roger B
. Sidje
(rbs@maths
.uq
.edu
.au
)
1544 * EXPOKIT
: Software Package
for Computing Matrix Exponentials
.
1545 * ACM
- Transactions On Mathematical Software
, 24(1):130-156, 1998
1546 *----------------------------------------------------------------------|
1549 integer ndeg
, i
, j
, k
, ip
, ih
, iy
, iz
1550 parameter ( ndeg
=7, ZERO
=(0.0d0
,0.0d0
) )
1551 double precision alpha0
1552 complex*16 alpha
(ndeg
), theta
(ndeg
), tmpc
1554 intrinsic ABS
,DBLE
,CONJG
,MIN
1562 *--- Coefficients and poles of the partial fraction expansion
...
1564 alpha0
= 0.183216998528140087D
-11
1565 alpha
(1)=( 0.557503973136501826D
+02,-0.204295038779771857D
+03)
1566 alpha
(2)=(-0.938666838877006739D
+02, 0.912874896775456363D
+02)
1567 alpha
(3)=( 0.469965415550370835D
+02,-0.116167609985818103D
+02)
1568 alpha
(4)=(-0.961424200626061065D
+01,-0.264195613880262669D
+01)
1569 alpha
(5)=( 0.752722063978321642D
+00, 0.670367365566377770D
+00)
1570 alpha
(6)=(-0.188781253158648576D
-01,-0.343696176445802414D
-01)
1571 alpha
(7)=( 0.143086431411801849D
-03, 0.287221133228814096D
-03)
1573 theta
(1)=(-0.562314417475317895D
+01, 0.119406921611247440D
+01)
1574 theta
(2)=(-0.508934679728216110D
+01, 0.358882439228376881D
+01)
1575 theta
(3)=(-0.399337136365302569D
+01, 0.600483209099604664D
+01)
1576 theta
(4)=(-0.226978543095856366D
+01, 0.846173881758693369D
+01)
1577 theta
(5)=( 0.208756929753827868D
+00, 0.109912615662209418D
+02)
1578 theta
(6)=( 0.370327340957595652D
+01, 0.136563731924991884D
+02)
1579 theta
(7)=( 0.889777151877331107D
+01, 0.166309842834712071D
+02)
1582 theta
(ndeg
+ip
) = CONJG
( theta
(ip
) )
1583 alpha
(ndeg
+ip
) = CONJG
( alpha
(ip
) )
1586 *--- Accumulation of the contribution of each pole
...
1593 alpha
(ip
) = 0.5d0*alpha
(ip
)
1594 *--- Solve each fraction using Gaussian elimination with pivoting
...
1596 wsp
(iy
+j
-1) = wsp
(iz
+j
-1)
1597 do i
= 1,MIN
( j
+1,m
)
1598 wsp
(ih
+(j
-1)*m
+i
-1) = -t*H
(i
,j
)
1600 wsp
(ih
+(j
-1)*m
+j
-1) = wsp
(ih
+(j
-1)*m
+j
-1)-theta
(ip
)
1602 wsp
(ih
+(j
-1)*m
+k
-1) = ZERO
1606 *--- Get pivot and exchange rows
...
1607 if (ABS
(wsp
(ih
+(i
-1)*m
+i
-1)).lt
.ABS
(wsp
(ih
+(i
-1)*m
+i
))) then
1608 call ZSWAP
( m
-i
+1, wsp
(ih
+(i
-1)*m
+i
-1),m
,
1609 . wsp
(ih
+(i
-1)*m
+i
),m
)
1610 call ZSWAP
( 1, wsp
(iy
+i
-1),1, wsp
(iy
+i
),1 )
1612 *--- Forward eliminiation
...
1613 tmpc
= wsp
(ih
+(i
-1)*m
+i
) / wsp
(ih
+(i
-1)*m
+i
-1)
1614 call ZAXPY
( m
-i
, -tmpc
, wsp
(ih
+i*m
+i
-1),m
, wsp
(ih
+i*m
+i
),m
)
1615 wsp
(iy
+i
) = wsp
(iy
+i
) - tmpc*wsp
(iy
+i
-1)
1617 *--- Backward substitution
...
1621 tmpc
= tmpc
- wsp
(ih
+(j
-1)*m
+i
-1)*wsp
(iy
+j
-1)
1623 wsp
(iy
+i
-1) = tmpc
/ wsp
(ih
+(i
-1)*m
+i
-1)
1625 *--- Accumulate the partial result in y
...
1627 y
(j
) = y
(j
) + alpha
(ip
)*wsp
(iy
+j
-1)
1631 *----------------------------------------------------------------------|
1632 *----------------------------------------------------------------------|
1633 subroutine DGEXPV
( n
, m
, t
, v
, w
, tol
, anorm
,
1634 . wsp
,lwsp
, iwsp
,liwsp
, matvec
, itrace
,iflag
)
1637 integer n
, m
, lwsp
, liwsp
, itrace
, iflag
, iwsp
(liwsp
)
1638 double precision t
, tol
, anorm
, v
(n
), w
(n
), wsp
(lwsp
)
1641 *-----Purpose
----------------------------------------------------------|
1643 *--- DGEXPV computes w
= exp
(t*A
)*v
- for a General matrix A
.
1645 * It does not compute the matrix exponential in isolation but
1646 * instead
, it computes directly the action of the exponential
1647 * operator on the operand vector
. This way of doing so allows
1648 * for addressing large sparse problems
.
1650 * The method used is based on Krylov subspace projection
1651 * techniques and the matrix under consideration interacts only
1652 * via the
external routine `matvec
' performing the matrix-vector
1653 * product (matrix-free method).
1655 *-----Arguments--------------------------------------------------------|
1657 * n : (input) order of the principal matrix A.
1659 * m : (input) maximum size for the Krylov basis.
1661 * t : (input) time at wich the solution is needed (can be < 0).
1663 * v(n) : (input) given operand vector.
1665 * w(n) : (output) computed approximation of exp(t*A)*v.
1667 * tol : (input/output) the requested accuracy tolerance on w.
1668 * If on input tol=0.0d0 or tol is too small (tol.le.eps)
1669 * the internal value sqrt(eps) is used, and tol is set to
1670 * sqrt(eps) on output (`eps' denotes the machine epsilon
).
1671 * (`Happy breakdown
' is assumed if h(j+1,j) .le. anorm*tol)
1673 * anorm : (input) an approximation of some norm of A.
1675 * wsp(lwsp): (workspace) lwsp .ge. n*(m+1)+n+(m+2)^2+4*(m+2)^2+ideg+1
1676 * +---------+-------+---------------+
1677 * (actually, ideg=6) V H wsp for PADE
1679 * iwsp(liwsp): (workspace) liwsp .ge. m+2
1681 * matvec : external subroutine for matrix-vector multiplication.
1682 * synopsis: matvec( x, y )
1683 * double precision x(*), y(*)
1684 * computes: y(1:n) <- A*x(1:n)
1685 * where A is the principal matrix.
1687 * itrace : (input) running mode. 0=silent, 1=print step-by-step info
1689 * iflag : (output) exit flag.
1690 * <0 - bad input arguments
1692 * 1 - maximum number of steps reached without convergence
1693 * 2 - requested tolerance was too high
1695 *-----Accounts on the computation--------------------------------------|
1696 * Upon exit, an interested user may retrieve accounts on the
1697 * computations. They are located in wsp and iwsp as indicated below:
1699 * location mnemonic description
1700 * -----------------------------------------------------------------|
1701 * iwsp(1) = nmult, number of matrix-vector multiplications used
1702 * iwsp(2) = nexph, number of Hessenberg matrix exponential evaluated
1703 * iwsp(3) = nscale, number of repeated squaring involved in Pade
1704 * iwsp(4) = nstep, number of integration steps used up to completion
1705 * iwsp(5) = nreject, number of rejected step-sizes
1706 * iwsp(6) = ibrkflag, set to 1 if `happy breakdown' and
0 otherwise
1707 * iwsp
(7) = mbrkdwn
, if `happy brkdown
', basis-size when it occured
1708 * -----------------------------------------------------------------|
1709 * wsp(1) = step_min, minimum step-size used during integration
1710 * wsp(2) = step_max, maximum step-size used during integration
1713 * wsp(5) = x_error, maximum among all local truncation errors
1714 * wsp(6) = s_error, global sum of local truncation errors
1715 * wsp(7) = tbrkdwn, if `happy breakdown', time when it occured
1716 * wsp
(8) = t_now
, integration domain successfully covered
1717 * wsp
(9) = hump
, i
.e
., max||exp
(sA
)||
, s in
[0,t
] (or
[t
,0] if t
<0)
1718 * wsp
(10) = ||w||
/||v||
, scaled norm of the solution w
.
1719 * -----------------------------------------------------------------|
1720 * The `hump
' is a measure of the conditioning of the problem. The
1721 * matrix exponential is well-conditioned if hump = 1, whereas it is
1722 * poorly-conditioned if hump >> 1. However the solution can still be
1723 * relatively fairly accurate even when the hump is large (the hump
1724 * is an upper bound), especially when the hump and the scaled norm
1725 * of w [this is also computed and returned in wsp(10)] are of the
1726 * same order of magnitude (further details in reference below).
1728 *----------------------------------------------------------------------|
1729 *-----The following parameters may also be adjusted herein-------------|
1731 integer mxstep, mxreject, ideg
1732 double precision delta, gamma
1733 parameter( mxstep = 1000,
1739 * mxstep : maximum allowable number of integration steps.
1740 * The value 0 means an infinite number of steps.
1742 * mxreject: maximum allowable number of rejections at each step.
1743 * The value 0 means an infinite number of rejections.
1745 * ideg : the Pade approximation of type (ideg,ideg) is used as
1746 * an approximation to exp(H). The value 0 switches to the
1747 * uniform rational Chebyshev approximation of type (14,14)
1749 * delta : local truncation error `safety factor'
1751 * gamma
: stepsize `shrinking factor
'
1753 *----------------------------------------------------------------------|
1754 * Roger B. Sidje (rbs@maths.uq.edu.au)
1755 * EXPOKIT: Software Package for Computing Matrix Exponentials.
1756 * ACM - Transactions On Mathematical Software, 24(1):130-156, 1998
1757 *----------------------------------------------------------------------|
1759 integer i, j, k1, mh, mx, iv, ih, j1v, ns, ifree, lfree, iexph,
1760 . ireject,ibrkflag,mbrkdwn, nmult, nreject, nexph, nscale,
1762 double precision sgn, t_out, tbrkdwn, step_min,step_max, err_loc,
1763 . s_error, x_error, t_now, t_new, t_step, t_old,
1764 . xm, beta, break_tol, p1, p2, p3, eps, rndoff,
1765 . vnorm, avnorm, hj1j, hij, hump, SQR1
1767 intrinsic AINT,ABS,DBLE,LOG10,MAX,MIN,NINT,SIGN,SQRT
1768 double precision DDOT, DNRM2
1770 *--- check restrictions on input parameters ...
1772 if ( lwsp.lt.n*(m+2)+5*(m+2)**2+ideg+1 ) iflag = -1
1773 if ( liwsp.lt.m+2 ) iflag = -2
1774 if ( m.ge.n .or. m.le.0 ) iflag = -3
1775 if ( iflag.ne.0 ) stop 'bad sizes
(in input of DGEXPV
)'
1777 *--- initialisations ...
1782 ih = iv + n*(m+1) + n
1784 lfree = lwsp - ifree + 1
1806 eps = ABS( p3-1.0d0 )
1807 if ( eps.eq.0.0d0 ) go to 1
1808 if ( tol.le.eps ) tol = SQRT( eps )
1812 *>>> break_tol = tol
1813 *>>> break_tol = anorm*tol
1815 sgn = SIGN( 1.0d0,t )
1816 call DCOPY( n, v,1, w,1 )
1817 beta = DNRM2( n, w,1 )
1821 *--- obtain the very first stepsize ...
1823 SQR1 = SQRT( 0.1d0 )
1824 xm = 1.0d0/DBLE( m )
1825 p1 = tol*(((m+1)/2.72D0)**(m+1))*SQRT(2.0D0*3.14D0*(m+1))
1826 t_new = (1.0d0/anorm)*(p1/(4.0d0*beta*anorm))**xm
1827 p1 = 10.0d0**(NINT( LOG10( t_new )-SQR1 )-1)
1828 t_new = AINT( t_new/p1 + 0.55d0 ) * p1
1830 *--- step-by-step integration ...
1832 100 if ( t_now.ge.t_out ) goto 500
1835 t_step = MIN( t_out-t_now, t_new )
1839 wsp(iv + i-1) = p1*w(i)
1845 *--- Arnoldi loop ...
1850 call matvec( wsp(j1v-n), wsp(j1v) )
1852 hij = DDOT( n, wsp(iv+(i-1)*n),1, wsp(j1v),1 )
1853 call DAXPY( n, -hij, wsp(iv+(i-1)*n),1, wsp(j1v),1 )
1854 wsp(ih+(j-1)*mh+i-1) = hij
1856 hj1j = DNRM2( n, wsp(j1v),1 )
1857 *--- if `happy breakdown' go straightforward at the
end ...
1858 if ( hj1j
.le
.break_tol
) then
1859 print*
,'happy breakdown: mbrkdwn =',j
,' h =',hj1j
1864 t_step
= t_out
-t_now
1867 wsp
(ih
+(j
-1)*mh
+j
) = hj1j
1868 call DSCAL
( n
, 1.0d0
/hj1j
, wsp
(j1v
),1 )
1872 call matvec
( wsp
(j1v
-n
), wsp
(j1v
) )
1873 avnorm
= DNRM2
( n
, wsp
(j1v
),1 )
1875 *--- set
1 for the
2-corrected scheme
...
1878 wsp
(ih
+m*mh
+m
+1) = 1.0d0
1880 *--- loop
while ireject
<mxreject until the tolerance is reached
...
1886 *--- compute w
= beta*V*exp
(t_step*H
)*e1
...
1890 if ( ideg
.ne
.0 ) then
1891 *--- irreducible rational Pade approximation
...
1892 call DGPADM
( ideg
, mx
, sgn*t_step
, wsp
(ih
),mh
,
1893 . wsp
(ifree
),lfree
, iwsp
, iexph
, ns
, iflag
)
1894 iexph
= ifree
+ iexph
- 1
1895 nscale
= nscale
+ ns
1897 *--- uniform rational Chebyshev approximation
...
1900 wsp
(iexph
+i
-1) = 0.0d0
1903 call DNCHBV
(mx
,sgn*t_step
,wsp
(ih
),mh
,wsp
(iexph
),wsp
(ifree
+mx
))
1908 *--- error estimate
...
1913 p1
= ABS
( wsp
(iexph
+m
) ) * beta
1914 p2
= ABS
( wsp
(iexph
+m
+1) ) * beta
* avnorm
1915 if ( p1
.gt
.10.0d0*p2
) then
1917 xm
= 1.0d0
/DBLE
( m
)
1918 elseif
( p1
.gt
.p2
) then
1919 err_loc
= (p1*p2
)/(p1
-p2
)
1920 xm
= 1.0d0
/DBLE
( m
)
1923 xm
= 1.0d0
/DBLE
( m
-1 )
1927 *--- reject the step
-size
if the error is not acceptable
...
1929 if ( (k1
.ne
.0) .and
. (err_loc
.gt
.delta*t_step*tol
) .and
.
1930 . (mxreject
.eq
.0 .or
. ireject
.lt
.mxreject
) ) then
1932 t_step
= gamma
* t_step
* (t_step*tol
/err_loc
)**xm
1933 p1
= 10.0d0**
(NINT
( LOG10
( t_step
)-SQR1
)-1)
1934 t_step
= AINT
( t_step
/p1
+ 0.55d0
) * p1
1935 if ( itrace
.ne
.0 ) then
1936 print*
,'t_step =',t_old
1937 print*
,'err_loc =',err_loc
1938 print*
,'err_required =',delta*t_old*tol
1939 print*
,'stepsize rejected, stepping down to:',t_step
1941 ireject
= ireject
+ 1
1942 nreject
= nreject
+ 1
1943 if ( mxreject
.ne
.0 .and
. ireject
.gt
.mxreject
) then
1944 print*
,"Failure in DGEXPV: ---"
1945 print*
,"The requested tolerance is too high."
1946 Print*
,"Rerun with a smaller value."
1953 *--- now update w
= beta*V*exp
(t_step*H
)*e1 and the hump
...
1955 mx
= mbrkdwn
+ MAX
( 0,k1
-1 )
1956 call DGEMV
( 'n', n
,mx
,beta
,wsp
(iv
),n
,wsp
(iexph
),1,0.0d0
,w
,1 )
1957 beta
= DNRM2
( n
, w
,1 )
1958 hump
= MAX
( hump
, beta
)
1960 *--- suggested value
for the next stepsize
...
1962 t_new
= gamma
* t_step
* (t_step*tol
/err_loc
)**xm
1963 p1
= 10.0d0**
(NINT
( LOG10
( t_new
)-SQR1
)-1)
1964 t_new
= AINT
( t_new
/p1
+ 0.55d0
) * p1
1966 err_loc
= MAX
( err_loc
,rndoff
)
1968 *--- update the time covered
...
1970 t_now
= t_now
+ t_step
1972 *--- display and keep some information
...
1974 if ( itrace
.ne
.0 ) then
1975 print*
,'integration',nstep
,'---------------------------------'
1976 print*
,'scale-square =',ns
1977 print*
,'step_size =',t_step
1978 print*
,'err_loc =',err_loc
1979 print*
,'next_step =',t_new
1982 step_min
= MIN
( step_min
, t_step
)
1983 step_max
= MAX
( step_max
, t_step
)
1984 s_error
= s_error
+ err_loc
1985 x_error
= MAX
( x_error
, err_loc
)
1987 if ( mxstep
.eq
.0 .or
. nstep
.lt
.mxstep
) goto 100
2009 wsp
(10) = beta
/vnorm
2011 *----------------------------------------------------------------------|
2012 *----------------------------------------------------------------------|
2013 subroutine DSEXPV
( n
, m
, t
, v
, w
, tol
, anorm
,
2014 . wsp
,lwsp
, iwsp
,liwsp
, matvec
, itrace
,iflag
)
2017 integer n
, m
, lwsp
, liwsp
, itrace
, iflag
, iwsp
(liwsp
)
2018 double precision t
, tol
, anorm
, v
(n
), w
(n
), wsp
(lwsp
)
2021 *-----Purpose
----------------------------------------------------------|
2023 *--- DSEXPV computes w
= exp
(t*A
)*v
- for a Symmetric matrix A
.
2025 * It does not compute the matrix exponential in isolation but
2026 * instead
, it computes directly the action of the exponential
2027 * operator on the operand vector
. This way of doing so allows
2028 * for addressing large sparse problems
.
2030 * The method used is based on Krylov subspace projection
2031 * techniques and the matrix under consideration interacts only
2032 * via the
external routine `matvec
' performing the matrix-vector
2033 * product (matrix-free method).
2035 *-----Arguments--------------------------------------------------------|
2037 * n : (input) order of the principal matrix A.
2039 * m : (input) maximum size for the Krylov basis.
2041 * t : (input) time at wich the solution is needed (can be < 0).
2043 * v(n) : (input) given operand vector.
2045 * w(n) : (output) computed approximation of exp(t*A)*v.
2047 * tol : (input/output) the requested accuracy tolerance on w.
2048 * If on input tol=0.0d0 or tol is too small (tol.le.eps)
2049 * the internal value sqrt(eps) is used, and tol is set to
2050 * sqrt(eps) on output (`eps' denotes the machine epsilon
).
2051 * (`Happy breakdown
' is assumed if h(j+1,j) .le. anorm*tol)
2053 * anorm : (input) an approximation of some norm of A.
2055 * wsp(lwsp): (workspace) lwsp .ge. n*(m+1)+n+(m+2)^2+4*(m+2)^2+ideg+1
2056 * +---------+-------+---------------+
2057 * (actually, ideg=6) V H wsp for PADE
2059 * iwsp(liwsp): (workspace) liwsp .ge. m+2
2061 * matvec : external subroutine for matrix-vector multiplication.
2062 * synopsis: matvec( x, y )
2063 * double precision x(*), y(*)
2064 * computes: y(1:n) <- A*x(1:n)
2065 * where A is the principal matrix.
2067 * itrace : (input) running mode. 0=silent, 1=print step-by-step info
2069 * iflag : (output) exit flag.
2070 * <0 - bad input arguments
2072 * 1 - maximum number of steps reached without convergence
2073 * 2 - requested tolerance was too high
2075 *-----Accounts on the computation--------------------------------------|
2076 * Upon exit, an interested user may retrieve accounts on the
2077 * computations. They are located in the workspace arrays wsp and
2078 * iwsp as indicated below:
2080 * location mnemonic description
2081 * -----------------------------------------------------------------|
2082 * iwsp(1) = nmult, number of matrix-vector multiplications used
2083 * iwsp(2) = nexph, nbr of Tridiagonal matrix exponential evaluated
2084 * iwsp(3) = nscale, number of repeated squaring involved in Pade
2085 * iwsp(4) = nstep, nbr of integration steps used up to completion
2086 * iwsp(5) = nreject, number of rejected step-sizes
2087 * iwsp(6) = ibrkflag, set to 1 if `happy breakdown' and
0 otherwise
2088 * iwsp
(7) = mbrkdwn
, if `happy brkdown
', basis-size when it occured
2089 * -----------------------------------------------------------------|
2090 * wsp(1) = step_min, minimum step-size used during integration
2091 * wsp(2) = step_max, maximum step-size used during integration
2094 * wsp(5) = x_error, maximum among all local truncation errors
2095 * wsp(6) = s_error, global sum of local truncation errors
2096 * wsp(7) = tbrkdwn, if `happy breakdown', time when it occured
2097 * wsp
(8) = t_now
, integration domain successfully covered
2098 * wsp
(9) = hump
, i
.e
., max||exp
(sA
)||
, s in
[0,t
] (or
[t
,0] if t
<0)
2099 * wsp
(10) = ||w||
/||v||
, scaled norm of the solution w
.
2100 * -----------------------------------------------------------------|
2101 * The `hump
' is a measure of the conditioning of the problem. The
2102 * matrix exponential is well-conditioned if hump = 1, whereas it is
2103 * poorly-conditioned if hump >> 1. However the solution can still be
2104 * relatively fairly accurate even when the hump is large (the hump
2105 * is an upper bound), especially when the hump and the scaled norm
2106 * of w [this is also computed and returned in wsp(10)] are of the
2107 * same order of magnitude (further details in reference below).
2109 *----------------------------------------------------------------------|
2110 *-----The following parameters may also be adjusted herein-------------|
2112 integer mxstep, mxreject, ideg
2113 double precision delta, gamma
2114 parameter( mxstep = 500,
2120 * mxstep : maximum allowable number of integration steps.
2121 * The value 0 means an infinite number of steps.
2123 * mxreject: maximum allowable number of rejections at each step.
2124 * The value 0 means an infinite number of rejections.
2126 * ideg : the Pade approximation of type (ideg,ideg) is used as
2127 * an approximation to exp(H). The value 0 switches to the
2128 * uniform rational Chebyshev approximation of type (14,14)
2130 * delta : local truncation error `safety factor'
2132 * gamma
: stepsize `shrinking factor
'
2134 *----------------------------------------------------------------------|
2135 * Roger B. Sidje (rbs@maths.uq.edu.au)
2136 * EXPOKIT: Software Package for Computing Matrix Exponentials.
2137 * ACM - Transactions On Mathematical Software, 24(1):130-156, 1998
2138 *----------------------------------------------------------------------|
2140 integer i, j, k1, mh, mx, iv, ih, j1v, ns, ifree, lfree, iexph,
2141 . ireject,ibrkflag,mbrkdwn, nmult, nreject, nexph, nscale,
2143 double precision sgn, t_out, tbrkdwn, step_min,step_max, err_loc,
2144 . s_error, x_error, t_now, t_new, t_step, t_old,
2145 . xm, beta, break_tol, p1, p2, p3, eps, rndoff,
2146 . vnorm, avnorm, hj1j, hjj, hump, SQR1
2148 intrinsic AINT,ABS,DBLE,LOG10,MAX,MIN,NINT,SIGN,SQRT
2149 double precision DDOT, DNRM2
2151 *--- check restrictions on input parameters ...
2153 if ( lwsp.lt.n*(m+2)+5*(m+2)**2+ideg+1 ) iflag = -1
2154 if ( liwsp.lt.m+2 ) iflag = -2
2155 if ( m.ge.n .or. m.le.0 ) iflag = -3
2156 if ( iflag.ne.0 ) stop 'bad sizes
(in input of DSEXPV
)'
2158 *--- initialisations ...
2163 ih = iv + n*(m+1) + n
2165 lfree = lwsp - ifree + 1
2187 eps = ABS( p3-1.0d0 )
2188 if ( eps.eq.0.0d0 ) go to 1
2189 if ( tol.le.eps ) tol = SQRT( eps )
2193 *>>> break_tol = tol
2194 *>>> break_tol = anorm*tol
2196 sgn = SIGN( 1.0d0,t )
2197 call DCOPY( n, v,1, w,1 )
2198 beta = DNRM2( n, w,1 )
2202 *--- obtain the very first stepsize ...
2204 SQR1 = SQRT( 0.1d0 )
2205 xm = 1.0d0/DBLE( m )
2206 p1 = tol*(((m+1)/2.72D0)**(m+1))*SQRT(2.0D0*3.14D0*(m+1))
2207 t_new = (1.0d0/anorm)*(p1/(4.0d0*beta*anorm))**xm
2208 p1 = 10.0d0**(NINT( LOG10( t_new )-SQR1 )-1)
2209 t_new = AINT( t_new/p1 + 0.55d0 ) * p1
2211 *--- step-by-step integration ...
2213 100 if ( t_now.ge.t_out ) goto 500
2216 t_step = MIN( t_out-t_now, t_new )
2220 wsp(iv + i-1) = p1*w(i)
2226 *--- Lanczos loop ...
2231 call matvec( wsp(j1v-n), wsp(j1v) )
2233 . call DAXPY(n,-wsp(ih+(j-1)*mh+j-2),wsp(j1v-2*n),1,wsp(j1v),1)
2234 hjj = DDOT( n, wsp(j1v-n),1, wsp(j1v),1 )
2235 call DAXPY( n, -hjj, wsp(j1v-n),1, wsp(j1v),1 )
2236 hj1j = DNRM2( n, wsp(j1v),1 )
2237 wsp(ih+(j-1)*(mh+1)) = hjj
2238 *--- if `happy breakdown' go straightforward at the
end ...
2239 if ( hj1j
.le
.break_tol
) then
2240 print*
,'happy breakdown: mbrkdwn =',j
,' h =',hj1j
2245 t_step
= t_out
-t_now
2248 wsp
(ih
+(j
-1)*mh
+j
) = hj1j
2249 wsp
(ih
+j*mh
+j
-1) = hj1j
2250 call DSCAL
( n
, 1.0d0
/hj1j
, wsp
(j1v
),1 )
2254 call matvec
( wsp
(j1v
-n
), wsp
(j1v
) )
2255 avnorm
= DNRM2
( n
, wsp
(j1v
),1 )
2257 *--- set
1 for the
2-corrected scheme
...
2260 wsp
(ih
+m*mh
+m
-1) = 0.0d0
2261 wsp
(ih
+m*mh
+m
+1) = 1.0d0
2263 *--- loop
while ireject
<mxreject until the tolerance is reached
...
2268 *--- compute w
= beta*V*exp
(t_step*H
)*e1
...
2272 if ( ideg
.ne
.0 ) then
2273 *--- irreducible rational Pade approximation
...
2274 call DGPADM
( ideg
, mx
, sgn*t_step
, wsp
(ih
),mh
,
2275 . wsp
(ifree
),lfree
, iwsp
, iexph
, ns
, iflag
)
2276 iexph
= ifree
+ iexph
- 1
2277 nscale
= nscale
+ ns
2279 *--- uniform rational Chebyshev approximation
...
2282 wsp
(iexph
+i
-1) = 0.0d0
2285 call DNCHBV
(mx
,sgn*t_step
,wsp
(ih
),mh
,wsp
(iexph
),wsp
(ifree
+mx
))
2289 *--- error estimate
...
2294 p1
= ABS
( wsp
(iexph
+m
) ) * beta
2295 p2
= ABS
( wsp
(iexph
+m
+1) ) * beta
* avnorm
2296 if ( p1
.gt
.10.0d0*p2
) then
2298 xm
= 1.0d0
/DBLE
( m
)
2299 elseif
( p1
.gt
.p2
) then
2300 err_loc
= (p1*p2
)/(p1
-p2
)
2301 xm
= 1.0d0
/DBLE
( m
)
2304 xm
= 1.0d0
/DBLE
( m
-1 )
2308 *--- reject the step
-size
if the error is not acceptable
...
2310 if ( (k1
.ne
.0) .and
. (err_loc
.gt
.delta*t_step*tol
) .and
.
2311 . (mxreject
.eq
.0 .or
. ireject
.lt
.mxreject
) ) then
2313 t_step
= gamma
* t_step
* (t_step*tol
/err_loc
)**xm
2314 p1
= 10.0d0**
(NINT
( LOG10
( t_step
)-SQR1
)-1)
2315 t_step
= AINT
( t_step
/p1
+ 0.55d0
) * p1
2316 if ( itrace
.ne
.0 ) then
2317 print*
,'t_step =',t_old
2318 print*
,'err_loc =',err_loc
2319 print*
,'err_required =',delta*t_old*tol
2320 print*
,'stepsize rejected, stepping down to:',t_step
2322 ireject
= ireject
+ 1
2323 nreject
= nreject
+ 1
2324 if ( mxreject
.ne
.0 .and
. ireject
.gt
.mxreject
) then
2325 print*
,"Failure in DSEXPV: ---"
2326 print*
,"The requested tolerance is too high."
2327 Print*
,"Rerun with a smaller value."
2334 *--- now update w
= beta*V*exp
(t_step*H
)*e1 and the hump
...
2336 mx
= mbrkdwn
+ MAX
( 0,k1
-1 )
2337 call DGEMV
( 'n', n
,mx
,beta
,wsp
(iv
),n
,wsp
(iexph
),1,0.0d0
,w
,1 )
2338 beta
= DNRM2
( n
, w
,1 )
2339 hump
= MAX
( hump
, beta
)
2341 *--- suggested value
for the next stepsize
...
2343 t_new
= gamma
* t_step
* (t_step*tol
/err_loc
)**xm
2344 p1
= 10.0d0**
(NINT
( LOG10
( t_new
)-SQR1
)-1)
2345 t_new
= AINT
( t_new
/p1
+ 0.55d0
) * p1
2347 err_loc
= MAX
( err_loc
,rndoff
)
2349 *--- update the time covered
...
2351 t_now
= t_now
+ t_step
2353 *--- display and keep some information
...
2355 if ( itrace
.ne
.0 ) then
2356 print*
,'integration',nstep
,'---------------------------------'
2357 print*
,'scale-square =',ns
2358 print*
,'step_size =',t_step
2359 print*
,'err_loc =',err_loc
2360 print*
,'next_step =',t_new
2363 step_min
= MIN
( step_min
, t_step
)
2364 step_max
= MAX
( step_max
, t_step
)
2365 s_error
= s_error
+ err_loc
2366 x_error
= MAX
( x_error
, err_loc
)
2368 if ( mxstep
.eq
.0 .or
. nstep
.lt
.mxstep
) goto 100
2390 wsp
(10) = beta
/vnorm
2392 *----------------------------------------------------------------------|
2394 *----------------------------------------------------------------------|
2395 subroutine ZGEXPV
( n
, m
, t
, v
, w
, tol
, anorm
,
2396 . wsp
,lwsp
, iwsp
,liwsp
, matvec
, itrace
,iflag
)
2399 integer n
, m
, lwsp
, liwsp
, itrace
, iflag
, iwsp
(liwsp
)
2400 double precision t
, tol
, anorm
2401 complex*16 v
(n
), w
(n
), wsp
(lwsp
)
2404 *-----Purpose
----------------------------------------------------------|
2406 *--- ZGEXPV computes w
= exp
(t*A
)*v
2407 * for a Zomplex
(i
.e
., complex double precision) matrix A
2409 * It does not compute the matrix exponential in isolation but
2410 * instead
, it computes directly the action of the exponential
2411 * operator on the operand vector
. This way of doing so allows
2412 * for addressing large sparse problems
.
2414 * The method used is based on Krylov subspace projection
2415 * techniques and the matrix under consideration interacts only
2416 * via the
external routine `matvec
' performing the matrix-vector
2417 * product (matrix-free method).
2419 *-----Arguments--------------------------------------------------------|
2421 * n : (input) order of the principal matrix A.
2423 * m : (input) maximum size for the Krylov basis.
2425 * t : (input) time at wich the solution is needed (can be < 0).
2427 * v(n) : (input) given operand vector.
2429 * w(n) : (output) computed approximation of exp(t*A)*v.
2431 * tol : (input/output) the requested accuracy tolerance on w.
2432 * If on input tol=0.0d0 or tol is too small (tol.le.eps)
2433 * the internal value sqrt(eps) is used, and tol is set to
2434 * sqrt(eps) on output (`eps' denotes the machine epsilon
).
2435 * (`Happy breakdown
' is assumed if h(j+1,j) .le. anorm*tol)
2437 * anorm : (input) an approximation of some norm of A.
2439 * wsp(lwsp): (workspace) lwsp .ge. n*(m+1)+n+(m+2)^2+4*(m+2)^2+ideg+1
2440 * +---------+-------+---------------+
2441 * (actually, ideg=6) V H wsp for PADE
2443 * iwsp(liwsp): (workspace) liwsp .ge. m+2
2445 * matvec : external subroutine for matrix-vector multiplication.
2446 * synopsis: matvec( x, y )
2447 * complex*16 x(*), y(*)
2448 * computes: y(1:n) <- A*x(1:n)
2449 * where A is the principal matrix.
2451 * itrace : (input) running mode. 0=silent, 1=print step-by-step info
2453 * iflag : (output) exit flag.
2454 * <0 - bad input arguments
2456 * 1 - maximum number of steps reached without convergence
2457 * 2 - requested tolerance was too high
2459 *-----Accounts on the computation--------------------------------------|
2460 * Upon exit, an interested user may retrieve accounts on the
2461 * computations. They are located in the workspace arrays wsp and
2462 * iwsp as indicated below:
2464 * location mnemonic description
2465 * -----------------------------------------------------------------|
2466 * iwsp(1) = nmult, number of matrix-vector multiplications used
2467 * iwsp(2) = nexph, number of Hessenberg matrix exponential evaluated
2468 * iwsp(3) = nscale, number of repeated squaring involved in Pade
2469 * iwsp(4) = nstep, number of integration steps used up to completion
2470 * iwsp(5) = nreject, number of rejected step-sizes
2471 * iwsp(6) = ibrkflag, set to 1 if `happy breakdown' and
0 otherwise
2472 * iwsp
(7) = mbrkdwn
, if `happy brkdown
', basis-size when it occured
2473 * -----------------------------------------------------------------|
2474 * wsp(1) = step_min, minimum step-size used during integration
2475 * wsp(2) = step_max, maximum step-size used during integration
2476 * wsp(3) = x_round, maximum among all roundoff errors (lower bound)
2477 * wsp(4) = s_round, sum of roundoff errors (lower bound)
2478 * wsp(5) = x_error, maximum among all local truncation errors
2479 * wsp(6) = s_error, global sum of local truncation errors
2480 * wsp(7) = tbrkdwn, if `happy breakdown', time when it occured
2481 * wsp
(8) = t_now
, integration domain successfully covered
2482 * wsp
(9) = hump
, i
.e
., max||exp
(sA
)||
, s in
[0,t
] (or
[t
,0] if t
<0)
2483 * wsp
(10) = ||w||
/||v||
, scaled norm of the solution w
.
2484 * -----------------------------------------------------------------|
2485 * The `hump
' is a measure of the conditioning of the problem. The
2486 * matrix exponential is well-conditioned if hump = 1, whereas it is
2487 * poorly-conditioned if hump >> 1. However the solution can still be
2488 * relatively fairly accurate even when the hump is large (the hump
2489 * is an upper bound), especially when the hump and the scaled norm
2490 * of w [this is also computed and returned in wsp(10)] are of the
2491 * same order of magnitude (further details in reference below).
2493 *----------------------------------------------------------------------|
2494 *-----The following parameters may also be adjusted herein-------------|
2496 integer mxstep, mxreject, ideg
2497 double precision delta, gamma
2498 parameter( mxstep = 500,
2504 * mxstep : maximum allowable number of integration steps.
2505 * The value 0 means an infinite number of steps.
2507 * mxreject: maximum allowable number of rejections at each step.
2508 * The value 0 means an infinite number of rejections.
2510 * ideg : the Pade approximation of type (ideg,ideg) is used as
2511 * an approximation to exp(H). The value 0 switches to the
2512 * uniform rational Chebyshev approximation of type (14,14)
2514 * delta : local truncation error `safety factor'
2516 * gamma
: stepsize `shrinking factor
'
2518 *----------------------------------------------------------------------|
2519 * Roger B. Sidje (rbs@maths.uq.edu.au)
2520 * EXPOKIT: Software Package for Computing Matrix Exponentials.
2521 * ACM - Transactions On Mathematical Software, 24(1):130-156, 1998
2522 *----------------------------------------------------------------------|
2524 complex*16 ZERO, ONE
2525 parameter( ZERO=(0.0d0,0.0d0), ONE=(1.0d0,0.0d0) )
2526 integer i, j, k1, mh, mx, iv, ih, j1v, ns, ifree, lfree, iexph,
2527 . ireject,ibrkflag,mbrkdwn, nmult, nreject, nexph, nscale,
2529 double precision sgn, t_out, tbrkdwn, step_min,step_max, err_loc,
2530 . s_error, x_error, t_now, t_new, t_step, t_old,
2531 . xm, beta, break_tol, p1, p2, p3, eps, rndoff,
2532 . vnorm, avnorm, hj1j, hump, SQR1
2535 intrinsic AINT,ABS,CMPLX,DBLE,INT,LOG10,MAX,MIN,NINT,SIGN,SQRT
2537 double precision DZNRM2
2539 *--- check restrictions on input parameters ...
2542 if ( lwsp.lt.n*(m+2)+5*(m+2)**2+ideg+1 ) iflag = -1
2543 if ( liwsp.lt.m+2 ) iflag = -2
2544 if ( m.ge.n .or. m.le.0 ) iflag = -3
2545 if ( iflag.ne.0 ) stop 'bad sizes
(in input of ZGEXPV
)'
2547 *--- initialisations ...
2552 ih = iv + n*(m+1) + n
2554 lfree = lwsp - ifree + 1
2576 eps = ABS( p3-1.0d0 )
2577 if ( eps.eq.0.0d0 ) go to 1
2578 if ( tol.le.eps ) tol = SQRT( eps )
2582 *>>> break_tol = tol
2583 *>>> break_tol = anorm*tol
2585 sgn = SIGN( 1.0d0,t )
2586 call ZCOPY( n, v,1, w,1 )
2587 beta = DZNRM2( n, w,1 )
2591 *--- obtain the very first stepsize ...
2593 SQR1 = SQRT( 0.1d0 )
2594 xm = 1.0d0/DBLE( m )
2595 p2 = tol*(((m+1)/2.72D0)**(m+1))*SQRT(2.0D0*3.14D0*(m+1))
2596 t_new = (1.0d0/anorm)*(p2/(4.0d0*beta*anorm))**xm
2597 p1 = 10.0d0**(NINT( LOG10( t_new )-SQR1 )-1)
2598 t_new = AINT( t_new/p1 + 0.55d0 ) * p1
2600 *--- step-by-step integration ...
2602 100 if ( t_now.ge.t_out ) goto 500
2605 t_step = MIN( t_out-t_now, t_new )
2608 wsp(iv + i-1) = p1*w(i)
2614 *--- Arnoldi loop ...
2619 call matvec( wsp(j1v-n), wsp(j1v) )
2621 hij = ZDOTC( n, wsp(iv+(i-1)*n),1, wsp(j1v),1 )
2622 call ZAXPY( n, -hij, wsp(iv+(i-1)*n),1, wsp(j1v),1 )
2623 wsp(ih+(j-1)*mh+i-1) = hij
2625 hj1j = DZNRM2( n, wsp(j1v),1 )
2626 *--- if `happy breakdown' go straightforward at the
end ...
2627 if ( hj1j
.le
.break_tol
) then
2628 print*
,'happy breakdown: mbrkdwn =',j
,' h =',hj1j
2633 t_step
= t_out
-t_now
2636 wsp
(ih
+(j
-1)*mh
+j
) = CMPLX
( hj1j
)
2637 call ZDSCAL
( n
, 1.0d0
/hj1j
, wsp
(j1v
),1 )
2641 call matvec
( wsp
(j1v
-n
), wsp
(j1v
) )
2642 avnorm
= DZNRM2
( n
, wsp
(j1v
),1 )
2644 *--- set
1 for the
2-corrected scheme
...
2647 wsp
(ih
+m*mh
+m
+1) = ONE
2649 *--- loop
while ireject
<mxreject until the tolerance is reached
...
2654 *--- compute w
= beta*V*exp
(t_step*H
)*e1
...
2658 if ( ideg
.ne
.0 ) then
2659 *--- irreducible rational Pade approximation
...
2660 call ZGPADM
( ideg
, mx
, sgn*t_step
, wsp
(ih
),mh
,
2661 . wsp
(ifree
),lfree
, iwsp
, iexph
, ns
, iflag
)
2662 iexph
= ifree
+ iexph
- 1
2663 nscale
= nscale
+ ns
2665 *--- uniform rational Chebyshev approximation
...
2668 wsp
(iexph
+i
-1) = ZERO
2671 call ZNCHBV
(mx
,sgn*t_step
,wsp
(ih
),mh
,wsp
(iexph
),wsp
(ifree
+mx
))
2675 *--- error estimate
...
2680 p1
= ABS
( wsp
(iexph
+m
) ) * beta
2681 p2
= ABS
( wsp
(iexph
+m
+1) ) * beta
* avnorm
2682 if ( p1
.gt
.10.0d0*p2
) then
2684 xm
= 1.0d0
/DBLE
( m
)
2685 elseif
( p1
.gt
.p2
) then
2686 err_loc
= (p1*p2
)/(p1
-p2
)
2687 xm
= 1.0d0
/DBLE
( m
)
2690 xm
= 1.0d0
/DBLE
( m
-1 )
2694 *--- reject the step
-size
if the error is not acceptable
...
2696 if ( (k1
.ne
.0) .and
. (err_loc
.gt
.delta*t_step*tol
) .and
.
2697 . (mxreject
.eq
.0 .or
. ireject
.lt
.mxreject
) ) then
2699 t_step
= gamma
* t_step
* (t_step*tol
/err_loc
)**xm
2700 p1
= 10.0d0**
(NINT
( LOG10
( t_step
)-SQR1
)-1)
2701 t_step
= AINT
( t_step
/p1
+ 0.55d0
) * p1
2702 if ( itrace
.ne
.0 ) then
2703 print*
,'t_step =',t_old
2704 print*
,'err_loc =',err_loc
2705 print*
,'err_required =',delta*t_old*tol
2706 print*
,'stepsize rejected, stepping down to:',t_step
2708 ireject
= ireject
+ 1
2709 nreject
= nreject
+ 1
2710 if ( mxreject
.ne
.0 .and
. ireject
.gt
.mxreject
) then
2711 print*
,"Failure in ZGEXPV: ---"
2712 print*
,"The requested tolerance is too high."
2713 Print*
,"Rerun with a smaller value."
2720 *--- now update w
= beta*V*exp
(t_step*H
)*e1 and the hump
...
2722 mx
= mbrkdwn
+ MAX
( 0,k1
-1 )
2724 call ZGEMV
( 'n', n
,mx
,hij
,wsp
(iv
),n
,wsp
(iexph
),1,ZERO
,w
,1 )
2725 beta
= DZNRM2
( n
, w
,1 )
2726 hump
= MAX
( hump
, beta
)
2728 *--- suggested value
for the next stepsize
...
2730 t_new
= gamma
* t_step
* (t_step*tol
/err_loc
)**xm
2731 p1
= 10.0d0**
(NINT
( LOG10
( t_new
)-SQR1
)-1)
2732 t_new
= AINT
( t_new
/p1
+ 0.55d0
) * p1
2734 err_loc
= MAX
( err_loc
,rndoff
)
2736 *--- update the time covered
...
2738 t_now
= t_now
+ t_step
2740 *--- display and keep some information
...
2742 if ( itrace
.ne
.0 ) then
2743 print*
,'integration',nstep
,'---------------------------------'
2744 print*
,'scale-square =',ns
2745 print*
,'step_size =',t_step
2746 print*
,'err_loc =',err_loc
2747 print*
,'next_step =',t_new
2750 step_min
= MIN
( step_min
, t_step
)
2751 step_max
= MAX
( step_max
, t_step
)
2752 s_error
= s_error
+ err_loc
2753 x_error
= MAX
( x_error
, err_loc
)
2755 if ( mxstep
.eq
.0 .or
. nstep
.lt
.mxstep
) goto 100
2768 wsp
(1) = CMPLX
( step_min
)
2769 wsp
(2) = CMPLX
( step_max
)
2770 wsp
(3) = CMPLX
( 0.0d0
)
2771 wsp
(4) = CMPLX
( 0.0d0
)
2772 wsp
(5) = CMPLX
( x_error
)
2773 wsp
(6) = CMPLX
( s_error
)
2774 wsp
(7) = CMPLX
( tbrkdwn
)
2775 wsp
(8) = CMPLX
( sgn*t_now
)
2776 wsp
(9) = CMPLX
( hump
/vnorm
)
2777 wsp
(10) = CMPLX
( beta
/vnorm
)
2779 *----------------------------------------------------------------------|
2780 *----------------------------------------------------------------------|
2781 subroutine ZHEXPV
( n
, m
, t
, v
, w
, tol
, anorm
,
2782 . wsp
,lwsp
, iwsp
,liwsp
, matvec
, itrace
,iflag
)
2785 integer n
, m
, lwsp
, liwsp
, itrace
, iflag
, iwsp
(liwsp
)
2786 double precision t
, tol
, anorm
2787 complex*16 v
(n
), w
(n
), wsp
(lwsp
)
2790 *-----Purpose
----------------------------------------------------------|
2792 *--- ZHEXPV computes w
= exp
(t*A
)*v
for a Hermitian matrix A
.
2794 * It does not compute the matrix exponential in isolation but
2795 * instead
, it computes directly the action of the exponential
2796 * operator on the operand vector
. This way of doing so allows
2797 * for addressing large sparse problems
.
2799 * The method used is based on Krylov subspace projection
2800 * techniques and the matrix under consideration interacts only
2801 * via the
external routine `matvec
' performing the matrix-vector
2802 * product (matrix-free method).
2804 *-----Arguments--------------------------------------------------------|
2806 * n : (input) order of the principal matrix A.
2808 * m : (input) maximum size for the Krylov basis.
2810 * t : (input) time at wich the solution is needed (can be < 0).
2812 * v(n) : (input) given operand vector.
2814 * w(n) : (output) computed approximation of exp(t*A)*v.
2816 * tol : (input/output) the requested accuracy tolerance on w.
2817 * If on input tol=0.0d0 or tol is too small (tol.le.eps)
2818 * the internal value sqrt(eps) is used, and tol is set to
2819 * sqrt(eps) on output (`eps' denotes the machine epsilon
).
2820 * (`Happy breakdown
' is assumed if h(j+1,j) .le. anorm*tol)
2822 * anorm : (input) an approximation of some norm of A.
2824 * wsp(lwsp): (workspace) lwsp .ge. n*(m+1)+n+(m+2)^2+4*(m+2)^2+ideg+1
2825 * +---------+-------+---------------+
2826 * (actually, ideg=6) V H wsp for PADE
2828 * iwsp(liwsp): (workspace) liwsp .ge. m+2
2830 * matvec : external subroutine for matrix-vector multiplication.
2831 * synopsis: matvec( x, y )
2832 * complex*16 x(*), y(*)
2833 * computes: y(1:n) <- A*x(1:n)
2834 * where A is the principal matrix.
2836 * itrace : (input) running mode. 0=silent, 1=print step-by-step info
2838 * iflag : (output) exit flag.
2839 * <0 - bad input arguments
2841 * 1 - maximum number of steps reached without convergence
2842 * 2 - requested tolerance was too high
2844 *-----Accounts on the computation--------------------------------------|
2845 * Upon exit, an interested user may retrieve accounts on the
2846 * computations. They are located in the workspace arrays wsp and
2847 * iwsp as indicated below:
2849 * location mnemonic description
2850 * -----------------------------------------------------------------|
2851 * iwsp(1) = nmult, number of matrix-vector multiplications used
2852 * iwsp(2) = nexph, number of Hessenberg matrix exponential evaluated
2853 * iwsp(3) = nscale, number of repeated squaring involved in Pade
2854 * iwsp(4) = nstep, number of integration steps used up to completion
2855 * iwsp(5) = nreject, number of rejected step-sizes
2856 * iwsp(6) = ibrkflag, set to 1 if `happy breakdown' and
0 otherwise
2857 * iwsp
(7) = mbrkdwn
, if `happy brkdown
', basis-size when it occured
2858 * -----------------------------------------------------------------|
2859 * wsp(1) = step_min, minimum step-size used during integration
2860 * wsp(2) = step_max, maximum step-size used during integration
2863 * wsp(5) = x_error, maximum among all local truncation errors
2864 * wsp(6) = s_error, global sum of local truncation errors
2865 * wsp(7) = tbrkdwn, if `happy breakdown', time when it occured
2866 * wsp
(8) = t_now
, integration domain successfully covered
2867 * wsp
(9) = hump
, i
.e
., max||exp
(sA
)||
, s in
[0,t
] (or
[t
,0] if t
<0)
2868 * wsp
(10) = ||w||
/||v||
, scaled norm of the solution w
.
2869 * -----------------------------------------------------------------|
2870 * The `hump
' is a measure of the conditioning of the problem. The
2871 * matrix exponential is well-conditioned if hump = 1, whereas it is
2872 * poorly-conditioned if hump >> 1. However the solution can still be
2873 * relatively fairly accurate even when the hump is large (the hump
2874 * is an upper bound), especially when the hump and the scaled norm
2875 * of w [this is also computed and returned in wsp(10)] are of the
2876 * same order of magnitude (further details in reference below).
2878 *----------------------------------------------------------------------|
2879 *-----The following parameters may also be adjusted herein-------------|
2881 integer mxstep, mxreject, ideg
2882 double precision delta, gamma
2883 parameter( mxstep = 500,
2889 * mxstep : maximum allowable number of integration steps.
2890 * The value 0 means an infinite number of steps.
2892 * mxreject: maximum allowable number of rejections at each step.
2893 * The value 0 means an infinite number of rejections.
2895 * ideg : the Pade approximation of type (ideg,ideg) is used as
2896 * an approximation to exp(H). The value 0 switches to the
2897 * uniform rational Chebyshev approximation of type (14,14)
2899 * delta : local truncation error `safety factor'
2901 * gamma
: stepsize `shrinking factor
'
2903 *----------------------------------------------------------------------|
2904 * Roger B. Sidje (rbs@maths.uq.edu.au)
2905 * EXPOKIT: Software Package for Computing Matrix Exponentials.
2906 * ACM - Transactions On Mathematical Software, 24(1):130-156, 1998
2907 *----------------------------------------------------------------------|
2909 complex*16 ZERO, ONE
2910 parameter( ZERO=(0.0d0,0.0d0), ONE=(1.0d0,0.0d0) )
2911 integer i, j, k1, mh, mx, iv, ih, j1v, ns, ifree, lfree, iexph,
2912 . ireject,ibrkflag,mbrkdwn, nmult, nreject, nexph, nscale,
2914 double precision sgn, t_out, tbrkdwn, step_min,step_max, err_loc,
2915 . s_error, x_error, t_now, t_new, t_step, t_old,
2916 . xm, beta, break_tol, p1, p2, p3, eps, rndoff,
2917 . vnorm, avnorm, hj1j, hump, SQR1
2920 intrinsic AINT,ABS,CMPLX,DBLE,INT,LOG10,MAX,MIN,NINT,SIGN,SQRT
2922 double precision DZNRM2
2924 *--- check restrictions on input parameters ...
2927 if ( lwsp.lt.n*(m+2)+5*(m+2)**2+ideg+1 ) iflag = -1
2928 if ( liwsp.lt.m+2 ) iflag = -2
2929 if ( m.ge.n .or. m.le.0 ) iflag = -3
2930 if ( iflag.ne.0 ) stop 'bad sizes
(in input of DHEXPV
)'
2932 *--- initialisations ...
2937 ih = iv + n*(m+1) + n
2939 lfree = lwsp - ifree + 1
2961 eps = ABS( p3-1.0d0 )
2962 if ( eps.eq.0.0d0 ) go to 1
2963 if ( tol.le.eps ) tol = SQRT( eps )
2967 *>>> break_tol = tol
2968 *>>> break_tol = anorm*tol
2970 sgn = SIGN( 1.0d0,t )
2971 call ZCOPY( n, v,1, w,1 )
2972 beta = DZNRM2( n, w,1 )
2976 *--- obtain the very first stepsize ...
2978 SQR1 = SQRT( 0.1d0 )
2979 xm = 1.0d0/DBLE( m )
2980 p2 = tol*(((m+1)/2.72D0)**(m+1))*SQRT(2.0D0*3.14D0*(m+1))
2981 t_new = (1.0d0/anorm)*(p2/(4.0d0*beta*anorm))**xm
2982 p1 = 10.0d0**(NINT( LOG10( t_new )-SQR1 )-1)
2983 t_new = AINT( t_new/p1 + 0.55d0 ) * p1
2985 *--- step-by-step integration ...
2987 100 if ( t_now.ge.t_out ) goto 500
2990 t_step = MIN( t_out-t_now, t_new )
2991 beta = DZNRM2( n, w,1 )
2994 wsp(iv + i-1) = p1*w(i)
3000 *--- Lanczos loop ...
3005 call matvec( wsp(j1v-n), wsp(j1v) )
3007 . call ZAXPY(n,-wsp(ih+(j-1)*mh+j-2),wsp(j1v-2*n),1,wsp(j1v),1)
3008 hjj = ZDOTC( n, wsp(j1v-n),1, wsp(j1v),1 )
3009 call ZAXPY( n, -hjj, wsp(j1v-n),1, wsp(j1v),1 )
3010 hj1j = DZNRM2( n, wsp(j1v),1 )
3011 wsp(ih+(j-1)*(mh+1)) = hjj
3012 *--- if `happy breakdown' go straightforward at the
end ...
3013 if ( hj1j
.le
.break_tol
) then
3014 print*
,'happy breakdown: mbrkdwn =',j
,' h =',hj1j
3019 t_step
= t_out
-t_now
3022 wsp
(ih
+(j
-1)*mh
+j
) = CMPLX
( hj1j
)
3023 wsp
(ih
+j*mh
+j
-1) = CMPLX
( hj1j
)
3024 call ZDSCAL
( n
, 1.0d0
/hj1j
, wsp
(j1v
),1 )
3028 call matvec
( wsp
(j1v
-n
), wsp
(j1v
) )
3029 avnorm
= DZNRM2
( n
, wsp
(j1v
),1 )
3031 *--- set
1 for the
2-corrected scheme
...
3034 wsp
(ih
+m*mh
+m
-1) = ZERO
3035 wsp
(ih
+m*mh
+m
+1) = ONE
3037 *--- loop
while ireject
<mxreject until the tolerance is reached
...
3042 *--- compute w
= beta*V*exp
(t_step*H
)*e1
...
3046 if ( ideg
.ne
.0 ) then
3047 *--- irreducible rational Pade approximation
...
3048 call ZGPADM
( ideg
, mx
, sgn*t_step
, wsp
(ih
),mh
,
3049 . wsp
(ifree
),lfree
, iwsp
, iexph
, ns
, iflag
)
3050 iexph
= ifree
+ iexph
- 1
3051 nscale
= nscale
+ ns
3053 *--- uniform rational Chebyshev approximation
...
3056 wsp
(iexph
+i
-1) = ZERO
3059 call ZNCHBV
(mx
,sgn*t_step
,wsp
(ih
),mh
,wsp
(iexph
),wsp
(ifree
+mx
))
3064 *--- error estimate
...
3069 p1
= ABS
( wsp
(iexph
+m
) ) * beta
3070 p2
= ABS
( wsp
(iexph
+m
+1) ) * beta
* avnorm
3071 if ( p1
.gt
.10.0d0*p2
) then
3073 xm
= 1.0d0
/DBLE
( m
)
3074 elseif
( p1
.gt
.p2
) then
3075 err_loc
= (p1*p2
)/(p1
-p2
)
3076 xm
= 1.0d0
/DBLE
( m
)
3079 xm
= 1.0d0
/DBLE
( m
-1 )
3083 *--- reject the step
-size
if the error is not acceptable
...
3085 if ( (k1
.ne
.0) .and
. (err_loc
.gt
.delta*t_step*tol
) .and
.
3086 . (mxreject
.eq
.0 .or
. ireject
.lt
.mxreject
) ) then
3088 t_step
= gamma
* t_step
* (t_step*tol
/err_loc
)**xm
3089 p1
= 10.0d0**
(NINT
( LOG10
( t_step
)-SQR1
)-1)
3090 t_step
= AINT
( t_step
/p1
+ 0.55d0
) * p1
3091 if ( itrace
.ne
.0 ) then
3092 print*
,'t_step =',t_old
3093 print*
,'err_loc =',err_loc
3094 print*
,'err_required =',delta*t_old*tol
3095 print*
,'stepsize rejected, stepping down to:',t_step
3097 ireject
= ireject
+ 1
3098 nreject
= nreject
+ 1
3099 if ( mxreject
.ne
.0 .and
. ireject
.gt
.mxreject
) then
3100 print*
,"Failure in ZHEXPV: ---"
3101 print*
,"The requested tolerance is too high."
3102 Print*
,"Rerun with a smaller value."
3109 *--- now update w
= beta*V*exp
(t_step*H
)*e1 and the hump
...
3111 mx
= mbrkdwn
+ MAX
( 0,k1
-1 )
3113 call ZGEMV
( 'n', n
,mx
,hjj
,wsp
(iv
),n
,wsp
(iexph
),1,ZERO
,w
,1 )
3114 beta
= DZNRM2
( n
, w
,1 )
3115 hump
= MAX
( hump
, beta
)
3117 *--- suggested value
for the next stepsize
...
3119 t_new
= gamma
* t_step
* (t_step*tol
/err_loc
)**xm
3120 p1
= 10.0d0**
(NINT
( LOG10
( t_new
)-SQR1
)-1)
3121 t_new
= AINT
( t_new
/p1
+ 0.55d0
) * p1
3123 err_loc
= MAX
( err_loc
,rndoff
)
3125 *--- update the time covered
...
3127 t_now
= t_now
+ t_step
3129 *--- display and keep some information
...
3131 if ( itrace
.ne
.0 ) then
3132 print*
,'integration',nstep
,'---------------------------------'
3133 print*
,'scale-square =',ns
3134 print*
,'step_size =',t_step
3135 print*
,'err_loc =',err_loc
3136 print*
,'next_step =',t_new
3139 step_min
= MIN
( step_min
, t_step
)
3140 step_max
= MAX
( step_max
, t_step
)
3141 s_error
= s_error
+ err_loc
3142 x_error
= MAX
( x_error
, err_loc
)
3144 if ( mxstep
.eq
.0 .or
. nstep
.lt
.mxstep
) goto 100
3157 wsp
(1) = CMPLX
( step_min
)
3158 wsp
(2) = CMPLX
( step_max
)
3159 wsp
(3) = CMPLX
( 0.0d0
)
3160 wsp
(4) = CMPLX
( 0.0d0
)
3161 wsp
(5) = CMPLX
( x_error
)
3162 wsp
(6) = CMPLX
( s_error
)
3163 wsp
(7) = CMPLX
( tbrkdwn
)
3164 wsp
(8) = CMPLX
( sgn*t_now
)
3165 wsp
(9) = CMPLX
( hump
/vnorm
)
3166 wsp
(10) = CMPLX
( beta
/vnorm
)
3168 *----------------------------------------------------------------------|
3169 *----------------------------------------------------------------------|
3170 subroutine DGPHIV
( n
, m
, t
, u
, v
, w
, tol
, anorm
,
3171 . wsp
,lwsp
, iwsp
,liwsp
, matvec
, itrace
,iflag
)
3174 integer n
, m
, lwsp
, liwsp
, itrace
, iflag
, iwsp
(liwsp
)
3175 double precision t
, tol
, anorm
, u
(n
), v
(n
), w
(n
), wsp
(lwsp
)
3178 *-----Purpose
----------------------------------------------------------|
3180 *--- DGPHIV computes w
= exp
(t*A
)v
+ t*phi
(tA
)u which is the solution
3181 * of the nonhomogeneous linear ODE problem w
' = Aw + u, w(0) = v.
3182 * phi(z) = (exp(z)-1)/z and A is a General matrix.
3184 * The method used is based on Krylov subspace projection
3185 * techniques and the matrix under consideration interacts only
3186 * via the external routine `matvec' performing the matrix
-vector
3187 * product
(matrix
-free method
).
3189 *-----Arguments
--------------------------------------------------------|
3191 * n
: (input
) order of the principal matrix A
.
3193 * m
: (input
) maximum size
for the Krylov basis
.
3195 * t
: (input
) time at wich the solution is needed
(can be
< 0).
3197 * u
(n
) : (input
) operand vector with respect
to the phi
function
3198 * (forcing term of the ODE problem
).
3200 * v
(n
) : (input
) operand vector with respect
to the exp
function
3201 * (initial condition of the ODE problem
).
3203 * w
(n
) : (output
) computed approximation of exp
(t*A
)v
+ t*phi
(tA
)u
3205 * tol
: (input
/output
) the requested accuracy tolerance on w
.
3206 * If on input tol
=0.0d0 or tol is too small
(tol
.le
.eps
)
3207 * the internal value sqrt
(eps
) is used
, and tol is set
to
3208 * sqrt
(eps
) on output
(`eps
' denotes the machine epsilon).
3209 * (`Happy breakdown' is assumed
if h
(j
+1,j
) .le
. anorm*tol
)
3211 * anorm
: (input
) an approximation of some norm of A
.
3213 * wsp
(lwsp
): (workspace
) lwsp
.ge
. n*
(m
+1)+n
+(m
+3)^
2+4*(m
+3)^
2+ideg
+1
3214 * +---------+-------+---------------+
3215 * (actually
, ideg
=6) V H wsp
for PADE
3217 * iwsp
(liwsp
): (workspace
) liwsp
.ge
. m
+3
3219 * matvec
: external subroutine for matrix
-vector multiplication
.
3220 * synopsis
: matvec
( x
, y
)
3221 * double precision x
(*), y
(*)
3222 * computes
: y
(1:n
) <- A*x
(1:n
)
3223 * where A is the principal matrix
.
3225 * itrace
: (input
) running mode
. 0=silent
, 1=print step
-by
-step info
3227 * iflag
: (output
) exit flag
.
3228 * <0 - bad input arguments
3230 * 1 - maximum number of steps reached without convergence
3231 * 2 - requested tolerance was too high
3233 *-----Accounts on the computation
--------------------------------------|
3234 * Upon exit
, an interested user may retrieve accounts on the
3235 * computations
. They are located in the workspace arrays wsp and
3236 * iwsp as indicated below
:
3238 * location mnemonic description
3239 * -----------------------------------------------------------------|
3240 * iwsp
(1) = nmult
, number of matrix
-vector multiplications used
3241 * iwsp
(2) = nexph
, number of Hessenberg matrix exponential evaluated
3242 * iwsp
(3) = nscale
, number of repeated squaring involved in Pade
3243 * iwsp
(4) = nstep
, number of integration steps used up
to completion
3244 * iwsp
(5) = nreject
, number of rejected step
-sizes
3245 * iwsp
(6) = ibrkflag
, set
to 1 if `happy breakdown
' and 0 otherwise
3246 * iwsp(7) = mbrkdwn, if `happy brkdown', basis
-size when it occured
3247 * -----------------------------------------------------------------|
3248 * wsp
(1) = step_min
, minimum step
-size used during integration
3249 * wsp
(2) = step_max
, maximum step
-size used during integration
3252 * wsp
(5) = x_error
, maximum among all local truncation errors
3253 * wsp
(6) = s_error
, global sum of local truncation errors
3254 * wsp
(7) = tbrkdwn
, if `happy breakdown
', time when it occured
3255 * wsp(8) = t_now, integration domain successfully covered
3257 *----------------------------------------------------------------------|
3258 *-----The following parameters may also be adjusted herein-------------|
3260 integer mxstep, mxreject, ideg
3261 double precision delta, gamma
3262 parameter( mxstep = 1000,
3268 * mxstep : maximum allowable number of integration steps.
3269 * The value 0 means an infinite number of steps.
3271 * mxreject: maximum allowable number of rejections at each step.
3272 * The value 0 means an infinite number of rejections.
3274 * ideg : the Pade approximation of type (ideg,ideg) is used as
3275 * an approximation to exp(H).
3277 * delta : local truncation error `safety factor'
3279 * gamma
: stepsize `shrinking factor
'
3281 *----------------------------------------------------------------------|
3282 * Roger B. Sidje (rbs@maths.uq.edu.au)
3283 * EXPOKIT: Software Package for Computing Matrix Exponentials.
3284 * ACM - Transactions On Mathematical Software, 24(1):130-156, 1998
3285 *----------------------------------------------------------------------|
3287 integer i, j, k1, mh, mx, iv, ih, j1v, ns, ifree, lfree, iexph,
3288 . ireject,ibrkflag,mbrkdwn, nmult, nreject, nexph, nscale,
3290 double precision sgn, t_out, tbrkdwn, step_min,step_max, err_loc,
3291 . s_error, x_error, t_now, t_new, t_step, t_old,
3292 . xm, beta, break_tol, p1, p2, p3, eps, rndoff,
3293 . avnorm, hj1j, hij, SQR1
3295 intrinsic AINT,ABS,DBLE,LOG10,MAX,MIN,NINT,SIGN,SQRT
3296 double precision DDOT, DNRM2
3298 *--- check restrictions on input parameters ...
3300 if ( lwsp.lt.n*(m+3)+5*(m+3)**2+ideg+1 ) iflag = -1
3301 if ( liwsp.lt.m+3 ) iflag = -2
3302 if ( m.ge.n .or. m.le.0 ) iflag = -3
3303 if ( iflag.ne.0 ) stop 'bad sizes
(in input of DGPHIV
)'
3305 *--- initialisations ...
3310 ih = iv + n*(m+1) + n
3312 lfree = lwsp - ifree + 1
3334 eps = ABS( p3-1.0d0 )
3335 if ( eps.eq.0.0d0 ) go to 1
3336 if ( tol.le.eps ) tol = SQRT( eps )
3340 *>>> break_tol = tol
3341 *>>> break_tol = anorm*tol
3344 *--- step-by-step integration ...
3346 sgn = SIGN( 1.0d0,t )
3347 SQR1 = SQRT( 0.1d0 )
3348 call DCOPY( n, v,1, w,1 )
3350 100 if ( t_now.ge.t_out ) goto 500
3353 call matvec( w, wsp(iv) )
3354 call DAXPY( n, 1.0d0, u,1, wsp(iv),1 )
3355 beta = DNRM2( n, wsp(iv),1 )
3356 if ( beta.eq.0.0d0 ) goto 500
3357 call DSCAL( n, 1.0d0/beta, wsp(iv),1 )
3362 if ( nstep.eq.0 ) then
3363 *--- obtain the very first stepsize ...
3364 xm = 1.0d0/DBLE( m )
3365 p1 = tol*(((m+1)/2.72D0)**(m+1))*SQRT(2.0D0*3.14D0*(m+1))
3366 t_new = (1.0d0/anorm)*(p1/(4.0d0*beta*anorm))**xm
3367 p1 = 10.0d0**(NINT( LOG10( t_new )-SQR1 )-1)
3368 t_new = AINT( t_new/p1 + 0.55d0 ) * p1
3371 t_step = MIN( t_out-t_now, t_new )
3373 *--- Arnoldi loop ...
3378 call matvec( wsp(j1v-n), wsp(j1v) )
3380 hij = DDOT( n, wsp(iv+(i-1)*n),1, wsp(j1v),1 )
3381 call DAXPY( n, -hij, wsp(iv+(i-1)*n),1, wsp(j1v),1 )
3382 wsp(ih+(j-1)*mh+i-1) = hij
3384 hj1j = DNRM2( n, wsp(j1v),1 )
3385 *--- if `happy breakdown' go straightforward at the
end ...
3386 if ( hj1j
.le
.break_tol
) then
3387 print*
,'happy breakdown: mbrkdwn =',j
,' h =',hj1j
3392 t_step
= t_out
-t_now
3395 wsp
(ih
+(j
-1)*mh
+j
) = hj1j
3396 call DSCAL
( n
, 1.0d0
/hj1j
, wsp
(j1v
),1 )
3400 call matvec
( wsp
(j1v
-n
), wsp
(j1v
) )
3401 avnorm
= DNRM2
( n
, wsp
(j1v
),1 )
3403 *--- set
1's for the 3-extended scheme ...
3406 wsp(ih+mh*mbrkdwn) = 1.0d0
3407 wsp(ih+(m-1)*mh+m) = 0.0d0
3409 wsp(ih+(m+i)*mh+m+i-1) = 1.0d0
3412 *--- loop while ireject<mxreject until the tolerance is reached ...
3417 *--- compute w = beta*t_step*V*phi(t_step*H)*e1 + w
3420 *--- irreducible rational Pade approximation ...
3421 mx = mbrkdwn + MAX( 1,k1 )
3422 call DGPADM( ideg, mx, sgn*t_step, wsp(ih),mh,
3423 . wsp(ifree),lfree, iwsp, iexph, ns, iflag )
3424 iexph = ifree + iexph - 1
3425 iphih = iexph + mbrkdwn*mx
3426 nscale = nscale + ns
3427 wsp(iphih+mbrkdwn) = hj1j*wsp(iphih+mx+mbrkdwn-1)
3428 wsp(iphih+mbrkdwn+1) = hj1j*wsp(iphih+2*mx+mbrkdwn-1)
3431 *--- error estimate ...
3435 p1 = ABS( wsp(iphih+m) ) * beta
3436 p2 = ABS( wsp(iphih+m+1) ) * beta * avnorm
3437 if ( p1.gt.10.0d0*p2 ) then
3439 xm = 1.0d0/DBLE( m+1 )
3440 elseif ( p1.gt.p2 ) then
3441 err_loc = (p1*p2)/(p1-p2)
3442 xm = 1.0d0/DBLE( m+1 )
3445 xm = 1.0d0/DBLE( m )
3449 *--- reject the step-size if the error is not acceptable ...
3450 if ( (k1.ne.0) .and. (err_loc.gt.delta*t_step*tol) .and.
3451 . (mxreject.eq.0 .or. ireject.lt.mxreject) ) then
3453 t_step = gamma * t_step * (t_step*tol/err_loc)**xm
3454 p1 = 10.0d0**(NINT( LOG10( t_step )-SQR1 )-1)
3455 t_step = AINT( t_step/p1 + 0.55d0 ) * p1
3456 if ( itrace.ne.0 ) then
3457 print*,'t_step
=',t_old
3458 print*,'err_loc
=',err_loc
3459 print*,'err_required
=',delta*t_old*tol
3460 print*,'stepsize rejected
, stepping down
to:',t_step
3462 ireject = ireject + 1
3463 nreject = nreject + 1
3464 if ( mxreject.ne.0 .and. ireject.gt.mxreject ) then
3465 print*,"Failure in DGPHIV: ---"
3466 print*,"The requested tolerance is too high."
3467 Print*,"Rerun with a smaller value."
3474 mx = mbrkdwn + MAX( 0,k1-2 )
3475 call DGEMV( 'n
', n,mx,beta,wsp(iv),n,wsp(iphih),1,1.0d0,w,1 )
3477 *--- suggested value for the next stepsize ...
3479 t_new = gamma * t_step * (t_step*tol/err_loc)**xm
3480 p1 = 10.0d0**(NINT( LOG10( t_new )-SQR1 )-1)
3481 t_new = AINT( t_new/p1 + 0.55d0 ) * p1
3483 err_loc = MAX( err_loc,rndoff )
3485 *--- update the time covered ...
3487 t_now = t_now + t_step
3489 *--- display and keep some information ...
3491 if ( itrace.ne.0 ) then
3492 print*,'integration
',nstep,'---------------------------------'
3493 print*,'scale
-square
=',ns
3494 print*,'step_size
=',t_step
3495 print*,'err_loc
=',err_loc
3496 print*,'next_step
=',t_new
3499 step_min = MIN( step_min, t_step )
3500 step_max = MAX( step_max, t_step )
3501 s_error = s_error + err_loc
3502 x_error = MAX( x_error, err_loc )
3504 if ( mxstep.eq.0 .or. nstep.lt.mxstep ) goto 100
3526 *----------------------------------------------------------------------|
3527 *----------------------------------------------------------------------|
3528 subroutine DSPHIV( n, m, t, u, v, w, tol, anorm,
3529 . wsp,lwsp, iwsp,liwsp, matvec, itrace,iflag )
3532 integer n, m, lwsp, liwsp, itrace, iflag, iwsp(liwsp)
3533 double precision t, tol, anorm, u(n), v(n), w(n), wsp(lwsp)
3536 *-----Purpose----------------------------------------------------------|
3538 *--- DSPHIV computes w = exp(t*A)v + t*phi(tA)u which is the solution
3539 * of the nonhomogeneous linear ODE problem w' = Aw
+ u
, w
(0) = v
.
3540 * phi
(z
) = (exp
(z
)-1)/z and A is a Symmetric matrix
.
3542 * The method used is based on Krylov subspace projection
3543 * techniques and the matrix under consideration interacts only
3544 * via the
external routine `matvec
' performing the matrix-vector
3545 * product (matrix-free method).
3547 *-----Arguments--------------------------------------------------------|
3549 * n : (input) order of the principal matrix A.
3551 * m : (input) maximum size for the Krylov basis.
3553 * t : (input) time at wich the solution is needed (can be < 0).
3555 * u(n) : (input) operand vector with respect to the phi function
3556 * (forcing term of the ODE problem).
3558 * v(n) : (input) operand vector with respect to the exp function
3559 * (initial condition of the ODE problem).
3561 * w(n) : (output) computed approximation of exp(t*A)v + t*phi(tA)u
3563 * tol : (input/output) the requested accuracy tolerance on w.
3564 * If on input tol=0.0d0 or tol is too small (tol.le.eps)
3565 * the internal value sqrt(eps) is used, and tol is set to
3566 * sqrt(eps) on output (`eps' denotes the machine epsilon
).
3567 * (`Happy breakdown
' is assumed if h(j+1,j) .le. anorm*tol)
3569 * anorm : (input) an approximation of some norm of A.
3571 * wsp(lwsp): (workspace) lwsp .ge. n*(m+1)+n+(m+3)^2+4*(m+3)^2+ideg+1
3572 * +---------+-------+---------------+
3573 * (actually, ideg=6) V H wsp for PADE
3575 * iwsp(liwsp): (workspace) liwsp .ge. m+3
3577 * matvec : external subroutine for matrix-vector multiplication.
3578 * synopsis: matvec( x, y )
3579 * double precision x(*), y(*)
3580 * computes: y(1:n) <- A*x(1:n)
3581 * where A is the principal matrix.
3583 * itrace : (input) running mode. 0=silent, 1=print step-by-step info
3585 * iflag : (output) exit flag.
3586 * <0 - bad input arguments
3588 * 1 - maximum number of steps reached without convergence
3589 * 2 - requested tolerance was too high
3591 *-----Accounts on the computation--------------------------------------|
3592 * Upon exit, an interested user may retrieve accounts on the
3593 * computations. They are located in the workspace arrays wsp and
3594 * iwsp as indicated below:
3596 * location mnemonic description
3597 * -----------------------------------------------------------------|
3598 * iwsp(1) = nmult, number of matrix-vector multiplications used
3599 * iwsp(2) = nexph, number of Hessenberg matrix exponential evaluated
3600 * iwsp(3) = nscale, number of repeated squaring involved in Pade
3601 * iwsp(4) = nstep, number of integration steps used up to completion
3602 * iwsp(5) = nreject, number of rejected step-sizes
3603 * iwsp(6) = ibrkflag, set to 1 if `happy breakdown' and
0 otherwise
3604 * iwsp
(7) = mbrkdwn
, if `happy brkdown
', basis-size when it occured
3605 * -----------------------------------------------------------------|
3606 * wsp(1) = step_min, minimum step-size used during integration
3607 * wsp(2) = step_max, maximum step-size used during integration
3610 * wsp(5) = x_error, maximum among all local truncation errors
3611 * wsp(6) = s_error, global sum of local truncation errors
3612 * wsp(7) = tbrkdwn, if `happy breakdown', time when it occured
3613 * wsp
(8) = t_now
, integration domain successfully covered
3615 *----------------------------------------------------------------------|
3616 *-----The following parameters may also be adjusted herein
-------------|
3618 integer mxstep
, mxreject
, ideg
3619 double precision delta
, gamma
3620 parameter( mxstep
= 500,
3626 * mxstep
: maximum allowable number of integration steps
.
3627 * The value
0 means an infinite number of steps
.
3629 * mxreject
: maximum allowable number of rejections at each step
.
3630 * The value
0 means an infinite number of rejections
.
3632 * ideg
: the Pade approximation of type
(ideg
,ideg
) is used as
3633 * an approximation
to exp
(H
).
3635 * delta
: local truncation error `safety factor
'
3637 * gamma : stepsize `shrinking factor'
3639 *----------------------------------------------------------------------|
3640 * Roger B
. Sidje
(rbs@maths
.uq
.edu
.au
)
3641 * EXPOKIT
: Software Package
for Computing Matrix Exponentials
.
3642 * ACM
- Transactions On Mathematical Software
, 24(1):130-156, 1998
3643 *----------------------------------------------------------------------|
3645 integer i
, j
, k1
, mh
, mx
, iv
, ih
, j1v
, ns
, ifree
, lfree
, iexph
,
3646 . ireject
,ibrkflag
,mbrkdwn
, nmult
, nreject
, nexph
, nscale
,
3648 double precision sgn
, t_out
, tbrkdwn
, step_min
,step_max
, err_loc
,
3649 . s_error
, x_error
, t_now
, t_new
, t_step
, t_old
,
3650 . xm
, beta
, break_tol
, p1
, p2
, p3
, eps
, rndoff
,
3651 . avnorm
, hj1j
, hjj
, SQR1
3653 intrinsic AINT
,ABS
,DBLE
,LOG10
,MAX
,MIN
,NINT
,SIGN
,SQRT
3654 double precision DDOT
, DNRM2
3656 *--- check restrictions on input parameters
...
3658 if ( lwsp
.lt
.n*
(m
+3)+5*(m
+3)**2+ideg
+1 ) iflag
= -1
3659 if ( liwsp
.lt
.m
+3 ) iflag
= -2
3660 if ( m
.ge
.n
.or
. m
.le
.0 ) iflag
= -3
3661 if ( iflag
.ne
.0 ) stop 'bad sizes (in input of DSPHIV)'
3663 *--- initialisations
...
3668 ih
= iv
+ n*
(m
+1) + n
3670 lfree
= lwsp
- ifree
+ 1
3692 eps
= ABS
( p3
-1.0d0
)
3693 if ( eps
.eq
.0.0d0
) go to 1
3694 if ( tol
.le
.eps
) tol
= SQRT
( eps
)
3698 *>>> break_tol
= tol
3699 *>>> break_tol
= anorm*tol
3702 *--- step
-by
-step integration
...
3704 sgn
= SIGN
( 1.0d0
,t
)
3705 SQR1
= SQRT
( 0.1d0
)
3706 call DCOPY
( n
, v
,1, w
,1 )
3708 100 if ( t_now
.ge
.t_out
) goto 500
3711 call matvec
( w
, wsp
(iv
) )
3712 call DAXPY
( n
, 1.0d0
, u
,1, wsp
(iv
),1 )
3713 beta
= DNRM2
( n
, wsp
(iv
),1 )
3714 if ( beta
.eq
.0.0d0
) goto 500
3715 call DSCAL
( n
, 1.0d0
/beta
, wsp
(iv
),1 )
3720 if ( nstep
.eq
.0 ) then
3721 *--- obtain the very first stepsize
...
3722 xm
= 1.0d0
/DBLE
( m
)
3723 p1
= tol*
(((m
+1)/2.72D0
)**(m
+1))*SQRT
(2.0D0*3
.14D0*
(m
+1))
3724 t_new
= (1.0d0
/anorm
)*(p1
/(4.0d0*beta*anorm
))**xm
3725 p1
= 10.0d0**
(NINT
( LOG10
( t_new
)-SQR1
)-1)
3726 t_new
= AINT
( t_new
/p1
+ 0.55d0
) * p1
3729 t_step
= MIN
( t_out
-t_now
, t_new
)
3731 *--- Lanczos loop
...
3736 call matvec
( wsp
(j1v
-n
), wsp
(j1v
) )
3738 . call DAXPY
(n
,-wsp
(ih
+(j
-1)*mh
+j
-2),wsp
(j1v
-2*n
),1,wsp
(j1v
),1)
3739 hjj
= DDOT
( n
, wsp
(j1v
-n
),1, wsp
(j1v
),1 )
3740 call DAXPY
( n
, -hjj
, wsp
(j1v
-n
),1, wsp
(j1v
),1 )
3741 hj1j
= DNRM2
( n
, wsp
(j1v
),1 )
3742 wsp
(ih
+(j
-1)*(mh
+1)) = hjj
3743 *--- if `happy breakdown
' go straightforward at the end ...
3744 if ( hj1j.le.break_tol ) then
3745 print*,'happy breakdown
: mbrkdwn
=',j,' h
=',hj1j
3750 t_step = t_out-t_now
3753 wsp(ih+(j-1)*mh+j) = hj1j
3754 wsp(ih+j*mh+j-1) = hj1j
3755 call DSCAL( n, 1.0d0/hj1j, wsp(j1v),1 )
3759 call matvec( wsp(j1v-n), wsp(j1v) )
3760 avnorm = DNRM2( n, wsp(j1v),1 )
3762 *--- set 1's
for the
3-extended scheme
...
3765 wsp
(ih
+mh*mbrkdwn
) = 1.0d0
3766 wsp
(ih
+m*mh
+m
-1) = 0.0d0
3767 wsp
(ih
+(m
-1)*mh
+m
) = 0.0d0
3769 wsp
(ih
+(m
+i
)*mh
+m
+i
-1) = 1.0d0
3772 *--- loop
while ireject
<mxreject until the tolerance is reached
...
3777 *--- compute w
= beta*t_step*V*phi
(t_step*H
)*e1
+ w
3780 mx
= mbrkdwn
+ MAX
( 1,k1
)
3782 *--- irreducible rational Pade approximation
...
3783 call DGPADM
( ideg
, mx
, sgn*t_step
, wsp
(ih
),mh
,
3784 . wsp
(ifree
),lfree
, iwsp
, iexph
, ns
, iflag
)
3785 iexph
= ifree
+ iexph
- 1
3786 iphih
= iexph
+ mbrkdwn*mx
3787 nscale
= nscale
+ ns
3788 wsp
(iphih
+mbrkdwn
) = hj1j*wsp
(iphih
+mx
+mbrkdwn
-1)
3789 wsp
(iphih
+mbrkdwn
+1) = hj1j*wsp
(iphih
+2*mx
+mbrkdwn
-1)
3793 *--- error estimate
...
3798 p1
= ABS
( wsp
(iphih
+m
) ) * beta
3799 p2
= ABS
( wsp
(iphih
+m
+1) ) * beta
* avnorm
3800 if ( p1
.gt
.10.0d0*p2
) then
3802 xm
= 1.0d0
/DBLE
( m
+1 )
3803 elseif
( p1
.gt
.p2
) then
3804 err_loc
= (p1*p2
)/(p1
-p2
)
3805 xm
= 1.0d0
/DBLE
( m
+1 )
3808 xm
= 1.0d0
/DBLE
( m
)
3812 *--- reject the step
-size
if the error is not acceptable
...
3814 if ( (k1
.ne
.0) .and
. (err_loc
.gt
.delta*t_step*tol
) .and
.
3815 . (mxreject
.eq
.0 .or
. ireject
.lt
.mxreject
) ) then
3817 t_step
= gamma
* t_step
* (t_step*tol
/err_loc
)**xm
3818 p1
= 10.0d0**
(NINT
( LOG10
( t_step
)-SQR1
)-1)
3819 t_step
= AINT
( t_step
/p1
+ 0.55d0
) * p1
3820 if ( itrace
.ne
.0 ) then
3821 print*
,'t_step =',t_old
3822 print*
,'err_loc =',err_loc
3823 print*
,'err_required =',delta*t_old*tol
3824 print*
,'stepsize rejected, stepping down to:',t_step
3826 ireject
= ireject
+ 1
3827 nreject
= nreject
+ 1
3828 if ( mxreject
.ne
.0 .and
. ireject
.gt
.mxreject
) then
3829 print*
,"Failure in DSPHIV: ---"
3830 print*
,"The requested tolerance is too high."
3831 Print*
,"Rerun with a smaller value."
3838 mx
= mbrkdwn
+ MAX
( 0,k1
-2 )
3839 call DGEMV
( 'n', n
,mx
,beta
,wsp
(iv
),n
,wsp
(iphih
),1,1.0d0
,w
,1 )
3841 *--- suggested value
for the next stepsize
...
3843 t_new
= gamma
* t_step
* (t_step*tol
/err_loc
)**xm
3844 p1
= 10.0d0**
(NINT
( LOG10
( t_new
)-SQR1
)-1)
3845 t_new
= AINT
( t_new
/p1
+ 0.55d0
) * p1
3847 err_loc
= MAX
( err_loc
,rndoff
)
3849 *--- update the time covered
...
3851 t_now
= t_now
+ t_step
3853 *--- display and keep some information
...
3855 if ( itrace
.ne
.0 ) then
3856 print*
,'integration',nstep
,'---------------------------------'
3857 print*
,'scale-square =',ns
3858 print*
,'step_size =',t_step
3859 print*
,'err_loc =',err_loc
3860 print*
,'next_step =',t_new
3863 step_min
= MIN
( step_min
, t_step
)
3864 step_max
= MAX
( step_max
, t_step
)
3865 s_error
= s_error
+ err_loc
3866 x_error
= MAX
( x_error
, err_loc
)
3868 if ( mxstep
.eq
.0 .or
. nstep
.lt
.mxstep
) goto 100
3890 *----------------------------------------------------------------------|
3891 *----------------------------------------------------------------------|
3892 subroutine ZGPHIV
( n
, m
, t
, u
, v
, w
, tol
, anorm
,
3893 . wsp
,lwsp
, iwsp
,liwsp
, matvec
, itrace
,iflag
)
3896 integer n
, m
, lwsp
, liwsp
, itrace
, iflag
, iwsp
(liwsp
)
3897 double precision t
, tol
, anorm
3898 complex*16 u
(n
), v
(n
), w
(n
), wsp
(lwsp
)
3901 *-----Purpose
----------------------------------------------------------|
3903 *--- ZGPHIV computes w
= exp
(t*A
)v
+ t*phi
(tA
)u which is the solution
3904 * of the nonhomogeneous linear ODE problem w
' = Aw + u, w(0) = v.
3905 * phi(z) = (exp(z)-1)/z and A is a Zomplex (i.e., complex double
3906 * precision matrix).
3908 * The method used is based on Krylov subspace projection
3909 * techniques and the matrix under consideration interacts only
3910 * via the external routine `matvec' performing the matrix
-vector
3911 * product
(matrix
-free method
).
3913 *-----Arguments
--------------------------------------------------------|
3915 * n
: (input
) order of the principal matrix A
.
3917 * m
: (input
) maximum size
for the Krylov basis
.
3919 * t
: (input
) time at wich the solution is needed
(can be
< 0).
3921 * u
(n
) : (input
) operand vector with respect
to the phi
function
3922 * (forcing term of the ODE problem
).
3924 * v
(n
) : (input
) operand vector with respect
to the exp
function
3925 * (initial condition of the ODE problem
).
3927 * w
(n
) : (output
) computed approximation of exp
(t*A
)v
+ t*phi
(tA
)u
3929 * tol
: (input
/output
) the requested accuracy tolerance on w
.
3930 * If on input tol
=0.0d0 or tol is too small
(tol
.le
.eps
)
3931 * the internal value sqrt
(eps
) is used
, and tol is set
to
3932 * sqrt
(eps
) on output
(`eps
' denotes the machine epsilon).
3933 * (`Happy breakdown' is assumed
if h
(j
+1,j
) .le
. anorm*tol
)
3935 * anorm
: (input
) an approximation of some norm of A
.
3937 * wsp
(lwsp
): (workspace
) lwsp
.ge
. n*
(m
+1)+n
+(m
+3)^
2+4*(m
+3)^
2+ideg
+1
3938 * +---------+-------+---------------+
3939 * (actually
, ideg
=6) V H wsp
for PADE
3941 * iwsp
(liwsp
): (workspace
) liwsp
.ge
. m
+3
3943 * matvec
: external subroutine for matrix
-vector multiplication
.
3944 * synopsis
: matvec
( x
, y
)
3945 * double precision x
(*), y
(*)
3946 * computes
: y
(1:n
) <- A*x
(1:n
)
3947 * where A is the principal matrix
.
3949 * itrace
: (input
) running mode
. 0=silent
, 1=print step
-by
-step info
3951 * iflag
: (output
) exit flag
.
3952 * <0 - bad input arguments
3954 * 1 - maximum number of steps reached without convergence
3955 * 2 - requested tolerance was too high
3957 *-----Accounts on the computation
--------------------------------------|
3958 * Upon exit
, an interested user may retrieve accounts on the
3959 * computations
. They are located in the workspace arrays wsp and
3960 * iwsp as indicated below
:
3962 * location mnemonic description
3963 * -----------------------------------------------------------------|
3964 * iwsp
(1) = nmult
, number of matrix
-vector multiplications used
3965 * iwsp
(2) = nexph
, number of Hessenberg matrix exponential evaluated
3966 * iwsp
(3) = nscale
, number of repeated squaring involved in Pade
3967 * iwsp
(4) = nstep
, number of integration steps used up
to completion
3968 * iwsp
(5) = nreject
, number of rejected step
-sizes
3969 * iwsp
(6) = ibrkflag
, set
to 1 if `happy breakdown
' and 0 otherwise
3970 * iwsp(7) = mbrkdwn, if `happy brkdown', basis
-size when it occured
3971 * -----------------------------------------------------------------|
3972 * wsp
(1) = step_min
, minimum step
-size used during integration
3973 * wsp
(2) = step_max
, maximum step
-size used during integration
3976 * wsp
(5) = x_error
, maximum among all local truncation errors
3977 * wsp
(6) = s_error
, global sum of local truncation errors
3978 * wsp
(7) = tbrkdwn
, if `happy breakdown
', time when it occured
3979 * wsp(8) = t_now, integration domain successfully covered
3981 *----------------------------------------------------------------------|
3982 *-----The following parameters may also be adjusted herein-------------|
3984 integer mxstep, mxreject, ideg
3985 double precision delta, gamma
3986 parameter( mxstep = 500,
3992 * mxstep : maximum allowable number of integration steps.
3993 * The value 0 means an infinite number of steps.
3995 * mxreject: maximum allowable number of rejections at each step.
3996 * The value 0 means an infinite number of rejections.
3998 * ideg : the Pade approximation of type (ideg,ideg) is used as
3999 * an approximation to exp(H).
4001 * delta : local truncation error `safety factor'
4003 * gamma
: stepsize `shrinking factor
'
4005 *----------------------------------------------------------------------|
4006 * Roger B. Sidje (rbs@maths.uq.edu.au)
4007 * EXPOKIT: Software Package for Computing Matrix Exponentials.
4008 * ACM - Transactions On Mathematical Software, 24(1):130-156, 1998
4009 *----------------------------------------------------------------------|
4011 complex*16 ZERO, ONE
4012 parameter( ZERO=(0.0d0,0.0d0), ONE=(1.0d0,0.0d0) )
4013 integer i, j, k1, mh, mx, iv, ih, j1v, ns, ifree, lfree, iexph,
4014 . ireject,ibrkflag,mbrkdwn, nmult, nreject, nexph, nscale,
4016 double precision sgn, t_out, tbrkdwn, step_min,step_max, err_loc,
4017 . s_error, x_error, t_now, t_new, t_step, t_old,
4018 . xm, beta, break_tol, p1, p2, p3, eps, rndoff,
4019 . avnorm, hj1j, SQR1
4022 intrinsic AINT,ABS,CMPLX,DBLE,INT,LOG10,MAX,MIN,NINT,SIGN,SQRT
4024 double precision DZNRM2
4026 *--- check restrictions on input parameters ...
4029 if ( lwsp.lt.n*(m+3)+5*(m+3)**2+ideg+1 ) iflag = -1
4030 if ( liwsp.lt.m+3 ) iflag = -2
4031 if ( m.ge.n .or. m.le.0 ) iflag = -3
4032 if ( iflag.ne.0 ) stop 'bad sizes
(in input of ZGPHIV
)'
4034 *--- initialisations ...
4039 ih = iv + n*(m+1) + n
4041 lfree = lwsp - ifree + 1
4063 eps = ABS( p3-1.0d0 )
4064 if ( eps.eq.0.0d0 ) go to 1
4065 if ( tol.le.eps ) tol = SQRT( eps )
4069 *>>> break_tol = tol
4070 *>>> break_tol = anorm*tol
4072 sgn = SIGN( 1.0d0,t )
4073 SQR1 = SQRT( 0.1d0 )
4074 call ZCOPY( n, v,1, w,1 )
4076 *--- step-by-step integration ...
4078 100 if ( t_now.ge.t_out ) goto 500
4081 call matvec( w, wsp(iv) )
4082 call ZAXPY( n, ONE, u,1, wsp(iv),1 )
4083 beta = DZNRM2( n, wsp(iv),1 )
4084 if ( beta.eq.0.0d0 ) goto 500
4085 call ZDSCAL( n, 1.0d0/beta, wsp(iv),1 )
4090 if ( nstep.eq.0 ) then
4091 *--- obtain the very first stepsize ...
4092 xm = 1.0d0/DBLE( m )
4093 p1 = tol*(((m+1)/2.72D0)**(m+1))*SQRT(2.0D0*3.14D0*(m+1))
4094 t_new = (1.0d0/anorm)*(p1/(4.0d0*beta*anorm))**xm
4095 p1 = 10.0d0**(NINT( LOG10( t_new )-SQR1 )-1)
4096 t_new = AINT( t_new/p1 + 0.55d0 ) * p1
4099 t_step = MIN( t_out-t_now, t_new )
4101 *--- Arnoldi loop ...
4106 call matvec( wsp(j1v-n), wsp(j1v) )
4108 hij = ZDOTC( n, wsp(iv+(i-1)*n),1, wsp(j1v),1 )
4109 call ZAXPY( n, -hij, wsp(iv+(i-1)*n),1, wsp(j1v),1 )
4110 wsp(ih+(j-1)*mh+i-1) = hij
4112 hj1j = DZNRM2( n, wsp(j1v),1 )
4113 *--- if `happy breakdown' go straightforward at the
end ...
4114 if ( hj1j
.le
.break_tol
) then
4115 print*
,'happy breakdown: mbrkdwn =',j
,' h =',hj1j
4120 t_step
= t_out
-t_now
4123 wsp
(ih
+(j
-1)*mh
+j
) = CMPLX
( hj1j
)
4124 call ZDSCAL
( n
, 1.0d0
/hj1j
, wsp
(j1v
),1 )
4128 call matvec
( wsp
(j1v
-n
), wsp
(j1v
) )
4129 avnorm
= DZNRM2
( n
, wsp
(j1v
),1 )
4131 *--- set
1's for the 3-extended scheme ...
4134 wsp(ih+mh*mbrkdwn) = ONE
4135 wsp(ih+(m-1)*mh+m) = ZERO
4137 wsp(ih+(m+i)*mh+m+i-1) = ONE
4140 *--- loop while ireject<mxreject until the tolerance is reached ...
4145 *--- compute w = beta*t_step*V*phi(t_step*H)*e1 + w
4148 mx = mbrkdwn + MAX( 1,k1 )
4150 *--- irreducible rational Pade approximation ...
4151 call ZGPADM( ideg, mx, sgn*t_step, wsp(ih),mh,
4152 . wsp(ifree),lfree, iwsp, iexph, ns, iflag )
4153 iexph = ifree + iexph - 1
4154 iphih = iexph + mbrkdwn*mx
4155 nscale = nscale + ns
4156 wsp(iphih+mbrkdwn) = hj1j*wsp(iphih+mx+mbrkdwn-1)
4157 wsp(iphih+mbrkdwn+1) = hj1j*wsp(iphih+2*mx+mbrkdwn-1)
4161 *--- error estimate ...
4166 p1 = ABS( wsp(iphih+m) ) * beta
4167 p2 = ABS( wsp(iphih+m+1) ) * beta * avnorm
4168 if ( p1.gt.10.0d0*p2 ) then
4170 xm = 1.0d0/DBLE( m+1 )
4171 elseif ( p1.gt.p2 ) then
4172 err_loc = (p1*p2)/(p1-p2)
4173 xm = 1.0d0/DBLE( m+1 )
4176 xm = 1.0d0/DBLE( m )
4180 *--- reject the step-size if the error is not acceptable ...
4182 if ( (k1.ne.0) .and. (err_loc.gt.delta*t_step*tol) .and.
4183 . (mxreject.eq.0 .or. ireject.lt.mxreject) ) then
4185 t_step = gamma * t_step * (t_step*tol/err_loc)**xm
4186 p1 = 10.0d0**(NINT( LOG10( t_step )-SQR1 )-1)
4187 t_step = AINT( t_step/p1 + 0.55d0 ) * p1
4188 if ( itrace.ne.0 ) then
4189 print*,'t_step
=',t_old
4190 print*,'err_loc
=',err_loc
4191 print*,'err_required
=',delta*t_old*tol
4192 print*,'stepsize rejected
, stepping down
to:',t_step
4194 ireject = ireject + 1
4195 nreject = nreject + 1
4196 if ( mxreject.ne.0 .and. ireject.gt.mxreject ) then
4197 print*,"Failure in ZGPHIV: ---"
4198 print*,"The requested tolerance is too high."
4199 Print*,"Rerun with a smaller value."
4206 mx = mbrkdwn + MAX( 0,k1-2 )
4208 call ZGEMV( 'n
', n,mx,hij,wsp(iv),n,wsp(iphih),1,ONE,w,1 )
4210 *--- suggested value for the next stepsize ...
4212 t_new = gamma * t_step * (t_step*tol/err_loc)**xm
4213 p1 = 10.0d0**(NINT( LOG10( t_new )-SQR1 )-1)
4214 t_new = AINT( t_new/p1 + 0.55d0 ) * p1
4216 err_loc = MAX( err_loc,rndoff )
4218 *--- update the time covered ...
4220 t_now = t_now + t_step
4222 *--- display and keep some information ...
4224 if ( itrace.ne.0 ) then
4225 print*,'integration
',nstep,'---------------------------------'
4226 print*,'scale
-square
=',ns
4227 print*,'step_size
=',t_step
4228 print*,'err_loc
=',err_loc
4229 print*,'next_step
=',t_new
4232 step_min = MIN( step_min, t_step )
4233 step_max = MAX( step_max, t_step )
4234 s_error = s_error + err_loc
4235 x_error = MAX( x_error, err_loc )
4237 if ( mxstep.eq.0 .or. nstep.lt.mxstep ) goto 100
4250 wsp(1) = CMPLX( step_min )
4251 wsp(2) = CMPLX( step_max )
4252 wsp(3) = CMPLX( 0.0d0 )
4253 wsp(4) = CMPLX( 0.0d0 )
4254 wsp(5) = CMPLX( x_error )
4255 wsp(6) = CMPLX( s_error )
4256 wsp(7) = CMPLX( tbrkdwn )
4257 wsp(8) = CMPLX( sgn*t_now )
4259 *----------------------------------------------------------------------|
4260 *----------------------------------------------------------------------|
4261 subroutine ZHPHIV( n, m, t, u, v, w, tol, anorm,
4262 . wsp,lwsp, iwsp,liwsp, matvec, itrace,iflag )
4265 integer n, m, lwsp, liwsp, itrace, iflag, iwsp(liwsp)
4266 double precision t, tol, anorm
4267 complex*16 u(n), v(n), w(n), wsp(lwsp)
4270 *-----Purpose----------------------------------------------------------|
4272 *--- ZHPHIV computes w = exp(t*A)v + t*phi(tA)u which is the solution
4273 * of the nonhomogeneous linear ODE problem w' = Aw
+ u
, w
(0) = v
.
4274 * phi
(z
) = (exp
(z
)-1)/z and A is an Hermitian matrix
.
4276 * The method used is based on Krylov subspace projection
4277 * techniques and the matrix under consideration interacts only
4278 * via the
external routine `matvec
' performing the matrix-vector
4279 * product (matrix-free method).
4281 *-----Arguments--------------------------------------------------------|
4283 * n : (input) order of the principal matrix A.
4285 * m : (input) maximum size for the Krylov basis.
4287 * t : (input) time at wich the solution is needed (can be < 0).
4289 * u(n) : (input) operand vector with respect to the phi function
4290 * (forcing term of the ODE problem).
4292 * v(n) : (input) operand vector with respect to the exp function
4293 * (initial condition of the ODE problem).
4295 * w(n) : (output) computed approximation of exp(t*A)v + t*phi(tA)u
4297 * tol : (input/output) the requested accuracy tolerance on w.
4298 * If on input tol=0.0d0 or tol is too small (tol.le.eps)
4299 * the internal value sqrt(eps) is used, and tol is set to
4300 * sqrt(eps) on output (`eps' denotes the machine epsilon
).
4301 * (`Happy breakdown
' is assumed if h(j+1,j) .le. anorm*tol)
4303 * anorm : (input) an approximation of some norm of A.
4305 * wsp(lwsp): (workspace) lwsp .ge. n*(m+1)+n+(m+3)^2+4*(m+3)^2+ideg+1
4306 * +---------+-------+---------------+
4307 * (actually, ideg=6) V H wsp for PADE
4309 * iwsp(liwsp): (workspace) liwsp .ge. m+3
4311 * matvec : external subroutine for matrix-vector multiplication.
4312 * synopsis: matvec( x, y )
4313 * double precision x(*), y(*)
4314 * computes: y(1:n) <- A*x(1:n)
4315 * where A is the principal matrix.
4317 * itrace : (input) running mode. 0=silent, 1=print step-by-step info
4319 * iflag : (output) exit flag.
4320 * <0 - bad input arguments
4322 * 1 - maximum number of steps reached without convergence
4323 * 2 - requested tolerance was too high
4325 *-----Accounts on the computation--------------------------------------|
4326 * Upon exit, an interested user may retrieve accounts on the
4327 * computations. They are located in the workspace arrays wsp and
4328 * iwsp as indicated below:
4330 * location mnemonic description
4331 * -----------------------------------------------------------------|
4332 * iwsp(1) = nmult, number of matrix-vector multiplications used
4333 * iwsp(2) = nexph, number of Hessenberg matrix exponential evaluated
4334 * iwsp(3) = nscale, number of repeated squaring involved in Pade
4335 * iwsp(4) = nstep, number of integration steps used up to completion
4336 * iwsp(5) = nreject, number of rejected step-sizes
4337 * iwsp(6) = ibrkflag, set to 1 if `happy breakdown' and
0 otherwise
4338 * iwsp
(7) = mbrkdwn
, if `happy brkdown
', basis-size when it occured
4339 * -----------------------------------------------------------------|
4340 * wsp(1) = step_min, minimum step-size used during integration
4341 * wsp(2) = step_max, maximum step-size used during integration
4344 * wsp(5) = x_error, maximum among all local truncation errors
4345 * wsp(6) = s_error, global sum of local truncation errors
4346 * wsp(7) = tbrkdwn, if `happy breakdown', time when it occured
4347 * wsp
(8) = t_now
, integration domain successfully covered
4349 *----------------------------------------------------------------------|
4350 *-----The following parameters may also be adjusted herein
-------------|
4352 integer mxstep
, mxreject
, ideg
4353 double precision delta
, gamma
4354 parameter( mxstep
= 500,
4360 * mxstep
: maximum allowable number of integration steps
.
4361 * The value
0 means an infinite number of steps
.
4363 * mxreject
: maximum allowable number of rejections at each step
.
4364 * The value
0 means an infinite number of rejections
.
4366 * ideg
: the Pade approximation of type
(ideg
,ideg
) is used as
4367 * an approximation
to exp
(H
).
4369 * delta
: local truncation error `safety factor
'
4371 * gamma : stepsize `shrinking factor'
4373 *----------------------------------------------------------------------|
4374 * Roger B
. Sidje
(rbs@maths
.uq
.edu
.au
)
4375 * EXPOKIT
: Software Package
for Computing Matrix Exponentials
.
4376 * ACM
- Transactions On Mathematical Software
, 24(1):130-156, 1998
4377 *----------------------------------------------------------------------|
4379 complex*16 ZERO
, ONE
4380 parameter( ZERO
=(0.0d0
,0.0d0
), ONE
=(1.0d0
,0.0d0
) )
4381 integer i
, j
, k1
, mh
, mx
, iv
, ih
, j1v
, ns
, ifree
, lfree
, iexph
,
4382 . ireject
,ibrkflag
,mbrkdwn
, nmult
, nreject
, nexph
, nscale
,
4384 double precision sgn
, t_out
, tbrkdwn
, step_min
,step_max
, err_loc
,
4385 . s_error
, x_error
, t_now
, t_new
, t_step
, t_old
,
4386 . xm
, beta
, break_tol
, p1
, p2
, p3
, eps
, rndoff
,
4387 . avnorm
, hj1j
, SQR1
4390 intrinsic AINT
,ABS
,CMPLX
,DBLE
,INT
,LOG10
,MAX
,MIN
,NINT
,SIGN
,SQRT
4392 double precision DZNRM2
4394 *--- check restrictions on input parameters
...
4397 if ( lwsp
.lt
.n*
(m
+3)+5*(m
+3)**2+ideg
+1 ) iflag
= -1
4398 if ( liwsp
.lt
.m
+3 ) iflag
= -2
4399 if ( m
.ge
.n
.or
. m
.le
.0 ) iflag
= -3
4400 if ( iflag
.ne
.0 ) stop 'bad sizes (in input of ZHPHIV)'
4402 *--- initialisations
...
4407 ih
= iv
+ n*
(m
+1) + n
4409 lfree
= lwsp
- ifree
+ 1
4431 eps
= ABS
( p3
-1.0d0
)
4432 if ( eps
.eq
.0.0d0
) go to 1
4433 if ( tol
.le
.eps
) tol
= SQRT
( eps
)
4437 *>>> break_tol
= tol
4438 *>>> break_tol
= anorm*tol
4440 sgn
= SIGN
( 1.0d0
,t
)
4441 SQR1
= SQRT
( 0.1d0
)
4442 call ZCOPY
( n
, v
,1, w
,1 )
4444 *--- step
-by
-step integration
...
4446 100 if ( t_now
.ge
.t_out
) goto 500
4449 call matvec
( w
, wsp
(iv
) )
4450 call ZAXPY
( n
, ONE
, u
,1, wsp
(iv
),1 )
4451 beta
= DZNRM2
( n
, wsp
(iv
),1 )
4452 if ( beta
.eq
.0.0d0
) goto 500
4453 call ZDSCAL
( n
, 1.0d0
/beta
, wsp
(iv
),1 )
4458 if ( nstep
.eq
.0 ) then
4459 *--- obtain the very first stepsize
...
4460 xm
= 1.0d0
/DBLE
( m
)
4461 p1
= tol*
(((m
+1)/2.72D0
)**(m
+1))*SQRT
(2.0D0*3
.14D0*
(m
+1))
4462 t_new
= (1.0d0
/anorm
)*(p1
/(4.0d0*beta*anorm
))**xm
4463 p1
= 10.0d0**
(NINT
( LOG10
( t_new
)-SQR1
)-1)
4464 t_new
= AINT
( t_new
/p1
+ 0.55d0
) * p1
4467 t_step
= MIN
( t_out
-t_now
, t_new
)
4469 *--- Lanczos loop
...
4474 call matvec
( wsp
(j1v
-n
), wsp
(j1v
) )
4476 . call ZAXPY
(n
,-wsp
(ih
+(j
-1)*mh
+j
-2),wsp
(j1v
-2*n
),1,wsp
(j1v
),1)
4477 hjj
= ZDOTC
( n
, wsp
(j1v
-n
),1, wsp
(j1v
),1 )
4478 call ZAXPY
( n
, -hjj
, wsp
(j1v
-n
),1, wsp
(j1v
),1 )
4479 hj1j
= DZNRM2
( n
, wsp
(j1v
),1 )
4480 wsp
(ih
+(j
-1)*(mh
+1)) = hjj
4481 if ( hj1j
.le
.break_tol
) then
4482 print*
,'happy breakdown: mbrkdwn =',j
,' h =',hj1j
4487 t_step
= t_out
-t_now
4490 wsp
(ih
+(j
-1)*mh
+j
) = CMPLX
( hj1j
)
4491 wsp
(ih
+j*mh
+j
-1) = CMPLX
( hj1j
)
4492 call ZDSCAL
( n
, 1.0d0
/hj1j
, wsp
(j1v
),1 )
4496 call matvec
( wsp
(j1v
-n
), wsp
(j1v
) )
4497 avnorm
= DZNRM2
( n
, wsp
(j1v
),1 )
4499 *--- set
1's for the 3-extended scheme ...
4502 wsp(ih+mh*mbrkdwn) = ONE
4503 wsp(ih+m*mh+m-1) = ZERO
4504 wsp(ih+(m-1)*mh+m) = ZERO
4506 wsp(ih+(m+i)*mh+m+i-1) = ONE
4509 *--- loop while ireject<mxreject until the tolerance is reached ...
4514 *--- compute w = beta*t_step*V*phi(t_step*H)*e1 + w
4517 mx = mbrkdwn + MAX( 1,k1 )
4519 *--- irreducible rational Pade approximation ...
4520 call ZGPADM( ideg, mx, sgn*t_step, wsp(ih),mh,
4521 . wsp(ifree),lfree, iwsp, iexph, ns, iflag )
4522 iexph = ifree + iexph - 1
4523 iphih = iexph + mbrkdwn*mx
4524 nscale = nscale + ns
4525 wsp(iphih+mbrkdwn) = hj1j*wsp(iphih+mx+mbrkdwn-1)
4526 wsp(iphih+mbrkdwn+1) = hj1j*wsp(iphih+2*mx+mbrkdwn-1)
4530 *--- error estimate ...
4535 p1 = ABS( wsp(iphih+m) ) * beta
4536 p2 = ABS( wsp(iphih+m+1) ) * beta * avnorm
4537 if ( p1.gt.10.0d0*p2 ) then
4539 xm = 1.0d0/DBLE( m+1 )
4540 elseif ( p1.gt.p2 ) then
4541 err_loc = (p1*p2)/(p1-p2)
4542 xm = 1.0d0/DBLE( m+1 )
4545 xm = 1.0d0/DBLE( m )
4549 *--- reject the step-size if the error is not acceptable ...
4551 if ( (k1.ne.0) .and. (err_loc.gt.delta*t_step*tol) .and.
4552 . (mxreject.eq.0 .or. ireject.lt.mxreject) ) then
4554 t_step = gamma * t_step * (t_step*tol/err_loc)**xm
4555 p1 = 10.0d0**(NINT( LOG10( t_step )-SQR1 )-1)
4556 t_step = AINT( t_step/p1 + 0.55d0 ) * p1
4557 if ( itrace.ne.0 ) then
4558 print*,'t_step
=',t_old
4559 print*,'err_loc
=',err_loc
4560 print*,'err_required
=',delta*t_old*tol
4561 print*,'stepsize rejected
, stepping down
to:',t_step
4563 ireject = ireject + 1
4564 nreject = nreject + 1
4565 if ( mxreject.ne.0 .and. ireject.gt.mxreject ) then
4566 print*,"Failure in ZHPHIV: ---"
4567 print*,"The requested tolerance is too high."
4568 Print*,"Rerun with a smaller value."
4575 mx = mbrkdwn + MAX( 0,k1-2 )
4577 call ZGEMV( 'n
', n,mx,hjj,wsp(iv),n,wsp(iphih),1,ONE,w,1 )
4579 *--- suggested value for the next stepsize ...
4581 t_new = gamma * t_step * (t_step*tol/err_loc)**xm
4582 p1 = 10.0d0**(NINT( LOG10( t_new )-SQR1 )-1)
4583 t_new = AINT( t_new/p1 + 0.55d0 ) * p1
4585 err_loc = MAX( err_loc,rndoff )
4587 *--- update the time covered ...
4589 t_now = t_now + t_step
4591 *--- display and keep some information ...
4593 if ( itrace.ne.0 ) then
4594 print*,'integration
',nstep,'---------------------------------'
4595 print*,'scale
-square
=',ns
4596 print*,'step_size
=',t_step
4597 print*,'err_loc
=',err_loc
4598 print*,'next_step
=',t_new
4601 step_min = MIN( step_min, t_step )
4602 step_max = MAX( step_max, t_step )
4603 s_error = s_error + err_loc
4604 x_error = MAX( x_error, err_loc )
4606 if ( mxstep.eq.0 .or. nstep.lt.mxstep ) goto 100
4619 wsp(1) = CMPLX( step_min )
4620 wsp(2) = CMPLX( step_max )
4621 wsp(3) = CMPLX( 0.0d0 )
4622 wsp(4) = CMPLX( 0.0d0 )
4623 wsp(5) = CMPLX( x_error )
4624 wsp(6) = CMPLX( s_error )
4625 wsp(7) = CMPLX( tbrkdwn )
4626 wsp(8) = CMPLX( sgn*t_now )
4628 *----------------------------------------------------------------------|