* tree-ssa-loop-manip.c (lv_adjust_loop_header_phi): Use
[official-gcc.git] / libgfortran / generated / matmul_c4.c
blobfd265d8b5b1349e6561bca520f92ef0c995cfd61
1 /* Implementation of the MATMUL intrinsic
2 Copyright 2002 Free Software Foundation, Inc.
3 Contributed by Paul Brook <paul@nowt.org>
5 This file is part of the GNU Fortran 95 runtime library (libgfortran).
7 Libgfortran is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Lesser General Public
9 License as published by the Free Software Foundation; either
10 version 2.1 of the License, or (at your option) any later version.
12 Libgfortran is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU Lesser General Public License for more details.
17 You should have received a copy of the GNU Lesser General Public
18 License along with libgfor; see the file COPYING.LIB. If not,
19 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 #include "config.h"
23 #include <stdlib.h>
24 #include <string.h>
25 #include <assert.h>
26 #include "libgfortran.h"
28 /* This is a C version of the following fortran pseudo-code. The key
29 point is the loop order -- we access all arrays column-first, which
30 improves the performance enough to boost galgel spec score by 50%.
32 DIMENSION A(M,COUNT), B(COUNT,N), C(M,N)
33 C = 0
34 DO J=1,N
35 DO K=1,COUNT
36 DO I=1,M
37 C(I,J) = C(I,J)+A(I,K)*B(K,J)
40 void
41 __matmul_c4 (gfc_array_c4 * retarray, gfc_array_c4 * a, gfc_array_c4 * b)
43 GFC_COMPLEX_4 *abase;
44 GFC_COMPLEX_4 *bbase;
45 GFC_COMPLEX_4 *dest;
47 index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
48 index_type x, y, n, count, xcount, ycount;
50 assert (GFC_DESCRIPTOR_RANK (a) == 2
51 || GFC_DESCRIPTOR_RANK (b) == 2);
53 /* C[xcount,ycount] = A[xcount, count] * B[count,ycount]
55 Either A or B (but not both) can be rank 1:
57 o One-dimensional argument A is implicitly treated as a row matrix
58 dimensioned [1,count], so xcount=1.
60 o One-dimensional argument B is implicitly treated as a column matrix
61 dimensioned [count, 1], so ycount=1.
64 if (retarray->data == NULL)
66 if (GFC_DESCRIPTOR_RANK (a) == 1)
68 retarray->dim[0].lbound = 0;
69 retarray->dim[0].ubound = b->dim[1].ubound - b->dim[1].lbound;
70 retarray->dim[0].stride = 1;
72 else if (GFC_DESCRIPTOR_RANK (b) == 1)
74 retarray->dim[0].lbound = 0;
75 retarray->dim[0].ubound = a->dim[0].ubound - a->dim[0].lbound;
76 retarray->dim[0].stride = 1;
78 else
80 retarray->dim[0].lbound = 0;
81 retarray->dim[0].ubound = a->dim[0].ubound - a->dim[0].lbound;
82 retarray->dim[0].stride = 1;
84 retarray->dim[1].lbound = 0;
85 retarray->dim[1].ubound = b->dim[1].ubound - b->dim[1].lbound;
86 retarray->dim[1].stride = retarray->dim[0].ubound+1;
89 retarray->data = internal_malloc (sizeof (GFC_COMPLEX_4) * size0 (retarray));
90 retarray->base = 0;
93 abase = a->data;
94 bbase = b->data;
95 dest = retarray->data;
97 if (retarray->dim[0].stride == 0)
98 retarray->dim[0].stride = 1;
99 if (a->dim[0].stride == 0)
100 a->dim[0].stride = 1;
101 if (b->dim[0].stride == 0)
102 b->dim[0].stride = 1;
105 if (GFC_DESCRIPTOR_RANK (retarray) == 1)
107 /* One-dimensional result may be addressed in the code below
108 either as a row or a column matrix. We want both cases to
109 work. */
110 rxstride = rystride = retarray->dim[0].stride;
112 else
114 rxstride = retarray->dim[0].stride;
115 rystride = retarray->dim[1].stride;
119 if (GFC_DESCRIPTOR_RANK (a) == 1)
121 /* Treat it as a a row matrix A[1,count]. */
122 axstride = a->dim[0].stride;
123 aystride = 1;
125 xcount = 1;
126 count = a->dim[0].ubound + 1 - a->dim[0].lbound;
128 else
130 axstride = a->dim[0].stride;
131 aystride = a->dim[1].stride;
133 count = a->dim[1].ubound + 1 - a->dim[1].lbound;
134 xcount = a->dim[0].ubound + 1 - a->dim[0].lbound;
137 assert(count == b->dim[0].ubound + 1 - b->dim[0].lbound);
139 if (GFC_DESCRIPTOR_RANK (b) == 1)
141 /* Treat it as a column matrix B[count,1] */
142 bxstride = b->dim[0].stride;
144 /* bystride should never be used for 1-dimensional b.
145 in case it is we want it to cause a segfault, rather than
146 an incorrect result. */
147 bystride = 0xDEADBEEF;
148 ycount = 1;
150 else
152 bxstride = b->dim[0].stride;
153 bystride = b->dim[1].stride;
154 ycount = b->dim[1].ubound + 1 - b->dim[1].lbound;
157 assert (a->base == 0);
158 assert (b->base == 0);
159 assert (retarray->base == 0);
161 abase = a->data;
162 bbase = b->data;
163 dest = retarray->data;
165 if (rxstride == 1 && axstride == 1 && bxstride == 1)
167 GFC_COMPLEX_4 *bbase_y;
168 GFC_COMPLEX_4 *dest_y;
169 GFC_COMPLEX_4 *abase_n;
170 GFC_COMPLEX_4 bbase_yn;
172 memset (dest, 0, (sizeof (GFC_COMPLEX_4) * size0(retarray)));
174 for (y = 0; y < ycount; y++)
176 bbase_y = bbase + y*bystride;
177 dest_y = dest + y*rystride;
178 for (n = 0; n < count; n++)
180 abase_n = abase + n*aystride;
181 bbase_yn = bbase_y[n];
182 for (x = 0; x < xcount; x++)
184 dest_y[x] += abase_n[x] * bbase_yn;
189 else
191 for (y = 0; y < ycount; y++)
192 for (x = 0; x < xcount; x++)
193 dest[x*rxstride + y*rystride] = (GFC_COMPLEX_4)0;
195 for (y = 0; y < ycount; y++)
196 for (n = 0; n < count; n++)
197 for (x = 0; x < xcount; x++)
198 /* dest[x,y] += a[x,n] * b[n,y] */
199 dest[x*rxstride + y*rystride] += abase[x*axstride + n*aystride] * bbase[n*bxstride + y*bystride];