2 ! { dg-options "-O -fno-tree-fre -fno-tree-sra -ftree-loop-vectorize" }
3 ! { dg-additional-options "-mavx2" { target x86_64-*-* i?86-*-* } }
6 integer, parameter :: dp
=kind(1.d0
)
9 !***********************************************
12 !***********************************************************************
14 ! KERNEL executes 24 samples of Fortran computation *
15 ! TK(1) - total cpu time to execute only the 24 kernels. *
16 ! TK(2) - total Flops executed by the 24 Kernels *
17 !***********************************************************************
19 ! L. L. N. L. F O R T R A N K E R N E L S: M F L O P S *
21 ! These kernels measure Fortran numerical computation rates for a *
22 ! spectrum of CPU-limited computational structures. Mathematical *
23 ! through-put is measured in units of millions of floating-point *
24 ! operations executed per Second, called Mega-Flops/Sec. *
26 ! This program measures a realistic CPU performance range for the *
27 ! Fortran programming system on a given day. The CPU performance *
28 ! rates depend strongly on the maturity of the Fortran compiler's *
29 ! ability to translate Fortran code into efficient machine code. *
30 ! [ The CPU hardware capability apart from compiler maturity (or *
31 ! availability), could be measured (or simulated) by programming the *
32 ! kernels in assembly or machine code directly. These measurements *
33 ! can also serve as a framework for tracking the maturation of the *
34 ! Fortran compiler during system development.] *
36 ! Fonzi's Law: There is not now and there never will be a language *
37 ! in which it is the least bit difficult to write *
40 !***********************************************************************
42 ! l1 := param-dimension governs the size of most 1-d arrays
43 ! l2 := param-dimension governs the size of most 2-d arrays
45 ! Loop := multiple pass control to execute kernel long enough to ti
47 ! n := DO loop control for each kernel. Controls are set in subr.
50 ! ******************************************************************
52 implicit double precision (a
-h
,o
-z
)
53 !IBM IMPLICIT REAL*8 (A-H,O-Z)
55 REAL(kind
=dp
), INTENT(inout
) :: tk
56 INTEGER :: test
!!,AND
58 COMMON/alpha
/mk
,ik
,im
,ml
,il
,mruns
,nruns
,jr
,iovec
,npfs(8,3,47)
59 COMMON/beta
/tic
,times(8,3,47),see(5,3,8,3),terrs(8,3,47),csums(8,3 &
60 ,47),fopn(8,3,47),dos(8,3,47)
62 COMMON/spaces
/ion
,j5
,k2
,k3
,loop1
,laps
,loop
,m
,kr
,lp
,n13h
,ibuf
,nx
,l
, &
63 npass
,nfail
,n
,n1
,n2
,n13
,n213
,n813
,n14
,n16
,n416
,n21
,nt1
,nt2
,last
,idebug
&
64 ,mpy
,loop2
,mucho
,mpylim
,intbuf(16)
66 COMMON/spacer
/a11
,a12
,a13
,a21
,a22
,a23
,a31
,a32
,a33
,ar
,br
,c0
,cr
,di
,dk
&
67 ,dm22
,dm23
,dm24
,dm25
,dm26
,dm27
,dm28
,dn
,e3
,e6
,expmax
,flx
,q
,qa
,r
,ri
&
68 ,s
,scale
,sig
,stb5
,t
,xnc
,xnei
,xnm
70 COMMON/space0
/time(47),csum(47),ww(47),wt(47),ticks
,fr(9),terr1(47 &
71 ),sumw(7),start
,skale(47),bias(47),ws(95),total(47),flopn(47),iq(7 &
74 COMMON/spacei
/wtp(3),mul(3),ispan(47,3),ipass(47,3)
76 ! ******************************************************************
80 COMMON/ispace
/e(96),f(96),ix(1001),ir(1001),zone(300)
82 COMMON/space1
/u(1001),v(1001),w(1001),x(1001),y(1001),z(1001),g(1001) &
83 ,du1(101),du2(101),du3(101),grd(1001),dex(1001),xi(1001),ex(1001) &
84 ,ex1(1001),dex1(1001),vx(1001),xx(1001),rx(1001),rh(2048),vsp(101) &
85 ,vstp(101),vxne(101),vxnd(101),ve3(101),vlr(101),vlin(101),b5(101) &
86 ,plan(300),d(300),sa(101),sb(101)
88 COMMON/space2
/p(4,512),px(25,101),cx(25,101),vy(101,25),vh(101,7), &
89 vf(101,7),vg(101,7),vs(101,7),za(101,7),zp(101,7),zq(101,7),zr(101 &
90 ,7),zm(101,7),zb(101,7),zu(101,7),zv(101,7),zz(101,7),b(64,64),c(64,64) &
91 ,h(64,64),u1(5,101,2),u2(5,101,2),u3(5,101,2)
93 ! ******************************************************************
95 dimension zx(1023),xz(447,3),tk(6),mtmp(1)
96 EQUIVALENCE(zx(1),z(1)),(xz(1,1),x(1))
101 ! ******************************************************************
103 ! STANDARD PRODUCT COMPILER DIRECTIVES MAY BE USED FOR OPTIMIZATION
109 CALL trace('KERNEL ')
125 !***********************************************************************
126 !*** KERNEL 1 HYDRO FRAGMENT
127 !***********************************************************************
129 x(:n
)= q
+y(:n
)*(r
*zx(11:n
+10)+t
*zx(12:n
+11))
136 ! we must execute DO k= 1,n repeatedly for accurat
139 !***********************************************************************
140 !*** KERNEL 2 ICCG EXCERPT (INCOMPLETE CHOLESKY - CONJUGATE GRADIE
142 !***********************************************************************
154 x(ipntp
+2:ipntp
+ii
+1)=x(ipnt
+2:ipntp
:2)-v(ipnt
+2:ipntp
:2) &
155 &*x(ipnt
+1:ipntp
-1:2)-v(ipnt
+3:ipntp
+1:2)*x(ipnt
+3:ipntp
+1:2)
164 !***********************************************************************
165 !*** KERNEL 3 INNER PRODUCT
166 !***********************************************************************
169 q
= dot_product(z(:n
),x(:n
))
176 !***********************************************************************
177 !*** KERNEL 4 BANDED LINEAR EQUATIONS
178 !***********************************************************************
184 xz(6,:3)= y(5)*(xz(6,:3)+matmul(y(5:n
:5), xz(:n
/5,:3)))
193 !***********************************************************************
194 !*** KERNEL 5 TRI-DIAGONAL ELIMINATION, BELOW DIAGONAL (NO VECTORS
196 !***********************************************************************
211 !***********************************************************************
212 !*** KERNEL 6 GENERAL LINEAR RECURRENCE EQUATIONS
213 !***********************************************************************
217 w(i
)= 0.0100D0
+dot_product(b(i
,:i
-1),w(i
-1:1:-1))
226 !***********************************************************************
227 !*** KERNEL 7 EQUATION OF STATE FRAGMENT
228 !***********************************************************************
231 x(:n
)= u(:n
)+r
*(z(:n
)+r
*y(:n
))+t
*(u(4:n
+3)+r
*(u(3:n
+2)+r
*u(2:n
+1))+t
*( &
232 u(7:n
+6)+q
*(u(6:n
+5)+q
*u(5:n
+4))))
241 !***********************************************************************
242 !*** KERNEL 8 A.D.I. INTEGRATION
243 !***********************************************************************
251 du1ky
= u1(kx
,ky
+1,nl1
)-u1(kx
,ky
-1,nl1
)
252 du2ky
= u2(kx
,ky
+1,nl1
)-u2(kx
,ky
-1,nl1
)
253 du3ky
= u3(kx
,ky
+1,nl1
)-u3(kx
,ky
-1,nl1
)
254 u1(kx
,ky
,nl2
)= u1(kx
,ky
,nl1
)+a11
*du1ky
+a12
*du2ky
+a13
&
255 *du3ky
+sig
*(u1(kx
+1,ky
,nl1
)-fw
*u1(kx
,ky
,nl1
)+u1(kx
-1,ky
,nl1
))
256 u2(kx
,ky
,nl2
)= u2(kx
,ky
,nl1
)+a21
*du1ky
+a22
*du2ky
+a23
&
257 *du3ky
+sig
*(u2(kx
+1,ky
,nl1
)-fw
*u2(kx
,ky
,nl1
)+u2(kx
-1,ky
,nl1
))
258 u3(kx
,ky
,nl2
)= u3(kx
,ky
,nl1
)+a31
*du1ky
+a32
*du2ky
+a33
&
259 *du3ky
+sig
*(u3(kx
+1,ky
,nl1
)-fw
*u3(kx
,ky
,nl1
)+u3(kx
-1,ky
,nl1
))
269 !***********************************************************************
270 !*** KERNEL 9 INTEGRATE PREDICTORS
271 !***********************************************************************
274 px(1,:n
)= dm28
*px(13,:n
)+px(3,:n
)+dm27
*px(12,:n
)+dm26
*px(11,:n
)+dm25
*px(10 &
275 ,:n
)+dm24
*px(9,:n
)+dm23
*px(8,:n
)+dm22
*px(7,:n
)+c0
*(px(5,:n
)+px(6,:n
))
283 !***********************************************************************
284 !*** KERNEL 10 DIFFERENCE PREDICTORS
285 !***********************************************************************
305 px(14,k
)= cr
-px(13,k
)
308 IF(test(10) <= 0)THEN
315 !***********************************************************************
316 !*** KERNEL 11 FIRST SUM. PARTIAL SUMS. (NO VECTORS)
317 !***********************************************************************
325 IF(test(11) <= 0)THEN
332 !***********************************************************************
333 !*** KERNEL 12 FIRST DIFF.
334 !***********************************************************************
336 x(:n
)= y(2:n
+1)-y(:n
)
337 IF(test(12) <= 0)THEN
343 !***********************************************************************
344 !*** KERNEL 13 2-D PIC Particle In Cell
345 !***********************************************************************
350 ! rounding modes for integerizing make no difference here
352 i1
= 1+iand(int(p(1,k
)),63)
353 j1
= 1+iand(int(p(2,k
)),63)
354 p(3,k
)= p(3,k
)+b(i1
,j1
)
355 p(1,k
)= p(1,k
)+p(3,k
)
356 i2
= iand(int(p(1,k
)),63)
357 p(1,k
)= p(1,k
)+y(i2
+32)
358 p(4,k
)= p(4,k
)+c(i1
,j1
)
359 p(2,k
)= p(2,k
)+p(4,k
)
360 j2
= iand(int(p(2,k
)),63)
361 p(2,k
)= p(2,k
)+z(j2
+32)
364 h(i2
,j2
)= h(i2
,j2
)+fw
366 IF(test(13) <= 0)THEN
372 !***********************************************************************
373 !*** KERNEL 14 1-D PIC Particle In Cell
374 !***********************************************************************
382 vx(:n
)= ex(ix(:n
))-ix(:n
)*dex(ix(:n
))
384 rx(:n
)= vx(:n
)+flx
-ir(:n
)
385 ir(:n
)= iand(ir(:n
),2047)+1
386 xx(:n
)= rx(:n
)+ir(:n
)
388 rh(ir(k
))= rh(ir(k
))+fw
-rx(k
)
389 rh(ir(k
)+1)= rh(ir(k
)+1)+rx(k
)
391 IF(test(14) <= 0)THEN
398 !***********************************************************************
399 !*** KERNEL 15 CASUAL FORTRAN. DEVELOPMENT VERSION.
400 !***********************************************************************
403 ! CASUAL ORDERING OF SCALAR OPERATIONS IS TYPICAL PRACTICE.
404 ! THIS EXAMPLE DEMONSTRATES THE NON-TRIVIAL TRANSFORMATION
405 ! REQUIRED TO MAP INTO AN EFFICIENT MACHINE IMPLEMENTATION.
412 !$omp parallel do private(t,j,k,r,s,i,ltmp) if(nz>98)
415 i
= merge(k
-1,k
,vf(k
,j
) < vf((k
-1),j
))
416 t
= merge(br
,ar
,vh(k
,(j
+1)) <= vh(k
,j
))
417 r
= MAX(vh(i
,j
),vh(i
,j
+1))
419 vy(k
,j
)= t
/s
*SQRT(vg(k
,j
)**2+r
*r
)
421 ltmp
=vf(k
,j
) >= vf(k
,(j
-1))
424 r
= MAX(vg(k
,i
),vg(k
+1,i
))
426 vs(k
,j
)= t
/s
*SQRT(vh(k
,j
)**2+r
*r
)
432 IF(test(15) <= 0)THEN
438 !***********************************************************************
439 !*** KERNEL 16 MONTE CARLO SEARCH LOOP
440 !***********************************************************************
458 IF(d(j5
) < d(j5
-1)*(t
-d(j5
-2))**2+(s
-d(j5
-3))**2+ (r
-d(j5
-4))**2)THEN
461 IF(d(j5
) == d(j5
-1)*(t
-d(j5
-2))**2+(s
-d(j5
-3))**2+ (r
-d(j5
-4))**2)THEN
469 IF(plan(j5
) == t
)THEN
477 IF(plan(j5
) == s
)THEN
484 IF(plan(j5
) == r
)THEN
490 IF(zone(j4
-1) <= 0)THEN
495 200 IF(zone(j4
-1) == 0)THEN
499 IF(test(16) <= 0)THEN
503 dw
= 5.0000D0
/3.0000D0
505 !***********************************************************************
506 !*** KERNEL 17 IMPLICIT, CONDITIONAL COMPUTATION (NO VECTORS)
507 !***********************************************************************
509 ! RECURSIVE-DOUBLING VECTOR TECHNIQUES CAN NOT BE USED
510 ! BECAUSE CONDITIONAL OPERATIONS APPLY TO EACH ELEMENT.
512 fw
= 1.0000D0
/3.0000D0
513 tw
= 1.0300D0
/3.0700D0
520 e3
= rtmp
*vlr(k
)+vlin(k
)
525 IF(max(rtmp
,xnei
) <= xnc
)THEN
531 rtmp
= rtmp
*vsp(k
)+vstp(k
)
539 IF(test(17) <= 0)THEN
546 !***********************************************************************
547 !*** KERNEL 18 2-D EXPLICIT HYDRODYNAMICS FRAGMENT
548 !***********************************************************************
555 zb(2:jn
,2:kn
)=(zr(2:jn
,2:kn
)+zr(2:jn
,:kn
-1))/(zm(2:jn
,2:kn
)+zm(:jn
-1,2:kn
)) &
556 *(zp(:jn
-1,2:kn
)-zp(2:jn
,2:kn
)+(zq(:jn
-1,2:kn
)-zq(2:jn
,2:kn
)))
557 za(2:jn
,2:kn
)=(zr(2:jn
,2:kn
)+zr(:jn
-1,2:kn
))/(zm(:jn
-1,2:kn
)+zm(:jn
-1,3:kn
+1)) &
558 *(zp(:jn
-1,3:kn
+1)-zp(:jn
-1,2:kn
)+(zq(:jn
-1,3:kn
+1)-zq(:jn
-1,2:kn
)))
559 zu(2:jn
,2:kn
)= zu(2:jn
,2:kn
)+ &
560 s
*(za(2:jn
,2:kn
)*(zz(2:jn
,2:kn
)-zz(3:jn
+1,2:kn
)) &
561 -za(:jn
-1,2:kn
)*(zz(2:jn
,2:kn
)-zz(:jn
-1,2:kn
)) &
562 -zb(2:jn
,2:kn
)*(zz(2:jn
,2:kn
)-zz(2:jn
,:kn
-1))+ &
563 zb(2:jn
,3:kn
+1)*(zz(2:jn
, 2:kn
)-zz(2:jn
,3:kn
+1)))
564 zv(2:jn
,2:kn
)= zv(2:jn
,2:kn
)+ &
565 s
*(za(2:jn
,2:kn
)*(zr(2:jn
,2:kn
)-zr(3:jn
+1,2:kn
)) &
566 -za(:jn
-1,2:kn
)*(zr(2:jn
,2:kn
)-zr(:jn
-1,2:kn
)) &
567 -zb(2:jn
,2:kn
)*(zr(2:jn
,2:kn
)-zr(2:jn
,:kn
-1))+ &
568 zb(2:jn
,3:kn
+1)*(zr(2:jn
, 2:kn
)-zr(2:jn
,3:kn
+1)))
569 zr(2:jn
,2:kn
)= zr(2:jn
,2:kn
)+t
*zu(2:jn
,2:kn
)
570 zz(2:jn
,2:kn
)= zz(2:jn
,2:kn
)+t
*zv(2:jn
,2:kn
)
571 IF(test(18) <= 0)THEN
578 !***********************************************************************
579 !*** KERNEL 19 GENERAL LINEAR RECURRENCE EQUATIONS (NO VECTORS)
580 !***********************************************************************
585 b5(k
+kb5i
)= sa(k
)+stb5
*sb(k
)
586 stb5
= b5(k
+kb5i
)-stb5
589 b5(k
+kb5i
)= sa(k
)+stb5
*sb(k
)
590 stb5
= b5(k
+kb5i
)-stb5
592 IF(test(19) <= 0)THEN
598 !***********************************************************************
599 !*** KERNEL 20 DISCRETE ORDINATES TRANSPORT: RECURRENCE (NO VECTORS
600 !***********************************************************************
607 di
= y(k
)*(rtmp
+dk
)-g(k
)
608 dn
=merge( max(s
,min(z(k
)*(rtmp
+dk
)/di
,t
)),dw
,di
/= 0.0)
609 x(k
)= ((w(k
)+v(k
)*dn
)*rtmp
+u(k
))/(vx(k
)+v(k
)*dn
)
610 rtmp
= ((w(k
)-vx(k
))*rtmp
+u(k
))*DN
/(vx(k
)+v(k
)*dn
)+ rtmp
613 IF(test(20) <= 0)THEN
620 !***********************************************************************
621 !*** KERNEL 21 MATRIX*MATRIX PRODUCT
622 !***********************************************************************
624 px(:25,:n
)= px(:25,:n
)+matmul(vy(:25,:25),cx(:25,:n
))
625 IF(test(21) <= 0)THEN
632 !***********************************************************************
633 !*** KERNEL 22 PLANCKIAN DISTRIBUTION
634 !***********************************************************************
638 u(n
)= 0.99000D0
*expmax
*v(n
)
643 w(:n
)= x(:n
)/(EXP(y(:n
))-fw
)
644 IF(test(22) <= 0)THEN
650 !***********************************************************************
651 !*** KERNEL 23 2-D IMPLICIT HYDRODYNAMICS FRAGMENT
652 !***********************************************************************
659 za(k
,j
)= za(k
,j
)+fw
*(za(k
,j
+1)*zr(k
,j
)-za(k
,j
)+ &
660 & zv(k
,j
)*za(k
-1,j
)+(zz(k
,j
)+za(k
+1,j
)* &
661 & zu(k
,j
)+za(k
,j
-1)*zb(k
,j
)))
664 IF(test(23) <= 0)THEN
670 !***********************************************************************
671 !*** KERNEL 24 FIND LOCATION OF FIRST MINIMUM IN ARRAY
672 !***********************************************************************
674 ! X( n/2)= -1.000d+50
677 m
= minloc(x(:n
),DIM
=1)
679 IF(test(24) == 0)THEN
687 times(jr
,il
,k
)= time(k
)
688 terrs(jr
,il
,k
)= terr1(k
)
689 npfs(jr
,il
,k
)= npfs1(k
)
690 csums(jr
,il
,k
)= csum(k
)
691 dos(jr
,il
,k
)= total(k
)
692 fopn(jr
,il
,k
)= flopn(k
)
693 som
= som
+flopn(k
)*total(k
)
697 ! Dumpout Checksums: file "chksum"
698 ! WRITE ( 7,706) jr, il
700 ! WRITE ( 7,707) ( CSUM(k), k= 1,mk)
701 ! 707 FORMAT(5X,'&',1PE23.16,',',1PE23.16,',',1PE23.16,',')
703 CALL track('KERNEL ')
705 END SUBROUTINE kernel