Split g96 I/O routines from confio.cpp
[gromacs.git] / src / gromacs / linearalgebra / gmx_lapack / dlasd3.c
blob33ca0e6a4b496e9edce6109951161d8c67ebe2c7
1 #include <math.h>
2 #include "../gmx_blas.h"
3 #include "../gmx_lapack.h"
5 void
6 F77_FUNC(dlasd3,DLASD3)(int *nl,
7 int *nr,
8 int *sqre,
9 int *k,
10 double *d__,
11 double *q,
12 int *ldq,
13 double *dsigma,
14 double *u,
15 int *ldu,
16 double *u2,
17 int *ldu2,
18 double *vt,
19 int *ldvt,
20 double *vt2,
21 int *ldvt2,
22 int *idxc,
23 int *ctot,
24 double *z__,
25 int *info)
27 int q_dim1, q_offset, u_dim1, u_offset, u2_dim1, u2_offset, vt_dim1,
28 vt_offset, vt2_dim1, vt2_offset, i__1, i__2;
29 double d__2;
31 int i__, j, m, n, jc;
32 double rho;
33 int nlp1, nlp2, nrp1;
34 double temp;
35 int ctemp;
36 int ktemp;
37 int c__1 = 1;
38 int c__0 = 0;
39 double zero = 0.0;
40 double one = 1.0;
41 double *p1,*p2,t1,t2;
43 p1 = &t1;
44 p2 = &t2;
46 --d__;
47 q_dim1 = *ldq;
48 q_offset = 1 + q_dim1;
49 q -= q_offset;
50 --dsigma;
51 u_dim1 = *ldu;
52 u_offset = 1 + u_dim1;
53 u -= u_offset;
54 u2_dim1 = *ldu2;
55 u2_offset = 1 + u2_dim1;
56 u2 -= u2_offset;
57 vt_dim1 = *ldvt;
58 vt_offset = 1 + vt_dim1;
59 vt -= vt_offset;
60 vt2_dim1 = *ldvt2;
61 vt2_offset = 1 + vt2_dim1;
62 vt2 -= vt2_offset;
63 --idxc;
64 --ctot;
65 --z__;
67 /* Function Body */
68 *info = 0;
70 if (*nl < 1) {
71 *info = -1;
72 } else if (*nr < 1) {
73 *info = -2;
74 } else if (*sqre != 1 && *sqre != 0) {
75 *info = -3;
78 n = *nl + *nr + 1;
79 m = n + *sqre;
80 nlp1 = *nl + 1;
81 nlp2 = *nl + 2;
83 if (*k == 1) {
84 d__[1] = fabs(z__[1]);
85 F77_FUNC(dcopy,DCOPY)(&m, &vt2[vt2_dim1 + 1], ldvt2, &vt[vt_dim1 + 1], ldvt);
86 if (z__[1] > 0.) {
87 F77_FUNC(dcopy,DCOPY)(&n, &u2[u2_dim1 + 1], &c__1, &u[u_dim1 + 1], &c__1);
88 } else {
89 i__1 = n;
90 for (i__ = 1; i__ <= i__1; ++i__) {
91 u[i__ + u_dim1] = -u2[i__ + u2_dim1];
94 return;
97 i__1 = *k;
98 for (i__ = 1; i__ <= i__1; ++i__) {
99 t1 = dsigma[i__];
100 t2 = dsigma[i__];
101 /* force store and reload from memory */
102 t1 = (*p1) + (*p2) - dsigma[i__];
104 dsigma[i__] = t1;
107 F77_FUNC(dcopy,DCOPY)(k, &z__[1], &c__1, &q[q_offset], &c__1);
109 rho = F77_FUNC(dnrm2,DNRM2)(k, &z__[1], &c__1);
110 F77_FUNC(dlascl,DLASCL)("G", &c__0, &c__0, &rho, &one, k, &c__1, &z__[1], k, info);
111 rho *= rho;
114 i__1 = *k;
115 for (j = 1; j <= i__1; ++j) {
116 F77_FUNC(dlasd4,DLASD4)(k, &j, &dsigma[1], &z__[1], &u[j * u_dim1 + 1], &rho, &d__[j],
117 &vt[j * vt_dim1 + 1], info);
119 if (*info != 0) {
120 return;
124 i__1 = *k;
125 for (i__ = 1; i__ <= i__1; ++i__) {
126 z__[i__] = u[i__ + *k * u_dim1] * vt[i__ + *k * vt_dim1];
127 i__2 = i__ - 1;
128 for (j = 1; j <= i__2; ++j) {
129 z__[i__] *= u[i__ + j * u_dim1] * vt[i__ + j * vt_dim1] / (dsigma[
130 i__] - dsigma[j]) / (dsigma[i__] + dsigma[j]);
132 i__2 = *k - 1;
133 for (j = i__; j <= i__2; ++j) {
134 z__[i__] *= u[i__ + j * u_dim1] * vt[i__ + j * vt_dim1] / (dsigma[
135 i__] - dsigma[j + 1]) / (dsigma[i__] + dsigma[j + 1]);
137 d__2 = sqrt(fabs(z__[i__]));
138 z__[i__] = (q[i__ + q_dim1] > 0) ? d__2 : -d__2;
141 i__1 = *k;
142 for (i__ = 1; i__ <= i__1; ++i__) {
143 vt[i__ * vt_dim1 + 1] = z__[1] / u[i__ * u_dim1 + 1] / vt[i__ *
144 vt_dim1 + 1];
145 u[i__ * u_dim1 + 1] = -1.;
146 i__2 = *k;
147 for (j = 2; j <= i__2; ++j) {
148 vt[j + i__ * vt_dim1] = z__[j] / u[j + i__ * u_dim1] / vt[j + i__
149 * vt_dim1];
150 u[j + i__ * u_dim1] = dsigma[j] * vt[j + i__ * vt_dim1];
152 temp = F77_FUNC(dnrm2,DNRM2)(k, &u[i__ * u_dim1 + 1], &c__1);
153 q[i__ * q_dim1 + 1] = u[i__ * u_dim1 + 1] / temp;
154 i__2 = *k;
155 for (j = 2; j <= i__2; ++j) {
156 jc = idxc[j];
157 q[j + i__ * q_dim1] = u[jc + i__ * u_dim1] / temp;
161 if (*k == 2) {
162 F77_FUNC(dgemm,DGEMM)("N", "N", &n, k, k, &one, &u2[u2_offset], ldu2, &q[q_offset],
163 ldq, &zero, &u[u_offset], ldu);
164 goto L100;
166 if (ctot[1] > 0) {
167 F77_FUNC(dgemm,DGEMM)("N", "N", nl, k, &ctot[1], &one, &u2[(u2_dim1 << 1) + 1],
168 ldu2, &q[q_dim1 + 2], ldq, &zero, &u[u_dim1 + 1], ldu);
169 if (ctot[3] > 0) {
170 ktemp = ctot[1] + 2 + ctot[2];
171 F77_FUNC(dgemm,DGEMM)("N", "N", nl, k, &ctot[3], &one, &u2[ktemp * u2_dim1 + 1]
172 , ldu2, &q[ktemp + q_dim1], ldq, &one, &u[u_dim1 + 1],
173 ldu);
175 } else if (ctot[3] > 0) {
176 ktemp = ctot[1] + 2 + ctot[2];
177 F77_FUNC(dgemm,DGEMM)("N", "N", nl, k, &ctot[3], &one, &u2[ktemp * u2_dim1 + 1],
178 ldu2, &q[ktemp + q_dim1], ldq, &zero, &u[u_dim1 + 1], ldu);
179 } else {
180 F77_FUNC(dlacpy,DLACPY)("F", nl, k, &u2[u2_offset], ldu2, &u[u_offset], ldu);
182 F77_FUNC(dcopy,DCOPY)(k, &q[q_dim1 + 1], ldq, &u[nlp1 + u_dim1], ldu);
183 ktemp = ctot[1] + 2;
184 ctemp = ctot[2] + ctot[3];
185 F77_FUNC(dgemm,DGEMM)("N", "N", nr, k, &ctemp, &one, &u2[nlp2 + ktemp * u2_dim1], ldu2,
186 &q[ktemp + q_dim1], ldq, &zero, &u[nlp2 + u_dim1], ldu);
188 L100:
189 i__1 = *k;
190 for (i__ = 1; i__ <= i__1; ++i__) {
191 temp = F77_FUNC(dnrm2,DNRM2)(k, &vt[i__ * vt_dim1 + 1], &c__1);
192 q[i__ + q_dim1] = vt[i__ * vt_dim1 + 1] / temp;
193 i__2 = *k;
194 for (j = 2; j <= i__2; ++j) {
195 jc = idxc[j];
196 q[i__ + j * q_dim1] = vt[jc + i__ * vt_dim1] / temp;
200 if (*k == 2) {
201 F77_FUNC(dgemm,DGEMM)("N", "N", k, &m, k, &one, &q[q_offset], ldq, &vt2[vt2_offset]
202 , ldvt2, &zero, &vt[vt_offset], ldvt);
203 return;
205 ktemp = ctot[1] + 1;
206 F77_FUNC(dgemm,DGEMM)("N", "N", k, &nlp1, &ktemp, &one, &q[q_dim1 + 1], ldq, &vt2[
207 vt2_dim1 + 1], ldvt2, &zero, &vt[vt_dim1 + 1], ldvt);
208 ktemp = ctot[1] + 2 + ctot[2];
209 if (ktemp <= *ldvt2) {
210 F77_FUNC(dgemm,DGEMM)("N", "N", k, &nlp1, &ctot[3], &one, &q[ktemp * q_dim1 + 1],
211 ldq, &vt2[ktemp + vt2_dim1], ldvt2, &one, &vt[vt_dim1 + 1],
212 ldvt);
215 ktemp = ctot[1] + 1;
216 nrp1 = *nr + *sqre;
217 if (ktemp > 1) {
218 i__1 = *k;
219 for (i__ = 1; i__ <= i__1; ++i__) {
220 q[i__ + ktemp * q_dim1] = q[i__ + q_dim1];
222 i__1 = m;
223 for (i__ = nlp2; i__ <= i__1; ++i__) {
224 vt2[ktemp + i__ * vt2_dim1] = vt2[i__ * vt2_dim1 + 1];
227 ctemp = ctot[2] + 1 + ctot[3];
228 F77_FUNC(dgemm,DGEMM)("N", "N", k, &nrp1, &ctemp, &one, &q[ktemp * q_dim1 + 1], ldq, &
229 vt2[ktemp + nlp2 * vt2_dim1], ldvt2, &zero, &vt[nlp2 * vt_dim1 +
230 1], ldvt);
232 return;