2 #include "../gmx_blas.h"
3 #include "../gmx_lapack.h"
6 F77_FUNC(dlasd3
,DLASD3
)(int *nl
,
27 int q_dim1
, q_offset
, u_dim1
, u_offset
, u2_dim1
, u2_offset
, vt_dim1
,
28 vt_offset
, vt2_dim1
, vt2_offset
, i__1
, i__2
;
48 q_offset
= 1 + q_dim1
;
52 u_offset
= 1 + u_dim1
;
55 u2_offset
= 1 + u2_dim1
;
58 vt_offset
= 1 + vt_dim1
;
61 vt2_offset
= 1 + vt2_dim1
;
74 } else if (*sqre
!= 1 && *sqre
!= 0) {
84 d__
[1] = fabs(z__
[1]);
85 F77_FUNC(dcopy
,DCOPY
)(&m
, &vt2
[vt2_dim1
+ 1], ldvt2
, &vt
[vt_dim1
+ 1], ldvt
);
87 F77_FUNC(dcopy
,DCOPY
)(&n
, &u2
[u2_dim1
+ 1], &c__1
, &u
[u_dim1
+ 1], &c__1
);
90 for (i__
= 1; i__
<= i__1
; ++i__
) {
91 u
[i__
+ u_dim1
] = -u2
[i__
+ u2_dim1
];
98 for (i__
= 1; i__
<= i__1
; ++i__
) {
101 /* force store and reload from memory */
102 t1
= (*p1
) + (*p2
) - dsigma
[i__
];
107 F77_FUNC(dcopy
,DCOPY
)(k
, &z__
[1], &c__1
, &q
[q_offset
], &c__1
);
109 rho
= F77_FUNC(dnrm2
,DNRM2
)(k
, &z__
[1], &c__1
);
110 F77_FUNC(dlascl
,DLASCL
)("G", &c__0
, &c__0
, &rho
, &one
, k
, &c__1
, &z__
[1], k
, info
);
115 for (j
= 1; j
<= i__1
; ++j
) {
116 F77_FUNC(dlasd4
,DLASD4
)(k
, &j
, &dsigma
[1], &z__
[1], &u
[j
* u_dim1
+ 1], &rho
, &d__
[j
],
117 &vt
[j
* vt_dim1
+ 1], info
);
125 for (i__
= 1; i__
<= i__1
; ++i__
) {
126 z__
[i__
] = u
[i__
+ *k
* u_dim1
] * vt
[i__
+ *k
* vt_dim1
];
128 for (j
= 1; j
<= i__2
; ++j
) {
129 z__
[i__
] *= u
[i__
+ j
* u_dim1
] * vt
[i__
+ j
* vt_dim1
] / (dsigma
[
130 i__
] - dsigma
[j
]) / (dsigma
[i__
] + dsigma
[j
]);
133 for (j
= i__
; j
<= i__2
; ++j
) {
134 z__
[i__
] *= u
[i__
+ j
* u_dim1
] * vt
[i__
+ j
* vt_dim1
] / (dsigma
[
135 i__
] - dsigma
[j
+ 1]) / (dsigma
[i__
] + dsigma
[j
+ 1]);
137 d__2
= sqrt(fabs(z__
[i__
]));
138 z__
[i__
] = (q
[i__
+ q_dim1
] > 0) ? d__2
: -d__2
;
142 for (i__
= 1; i__
<= i__1
; ++i__
) {
143 vt
[i__
* vt_dim1
+ 1] = z__
[1] / u
[i__
* u_dim1
+ 1] / vt
[i__
*
145 u
[i__
* u_dim1
+ 1] = -1.;
147 for (j
= 2; j
<= i__2
; ++j
) {
148 vt
[j
+ i__
* vt_dim1
] = z__
[j
] / u
[j
+ i__
* u_dim1
] / vt
[j
+ i__
150 u
[j
+ i__
* u_dim1
] = dsigma
[j
] * vt
[j
+ i__
* vt_dim1
];
152 temp
= F77_FUNC(dnrm2
,DNRM2
)(k
, &u
[i__
* u_dim1
+ 1], &c__1
);
153 q
[i__
* q_dim1
+ 1] = u
[i__
* u_dim1
+ 1] / temp
;
155 for (j
= 2; j
<= i__2
; ++j
) {
157 q
[j
+ i__
* q_dim1
] = u
[jc
+ i__
* u_dim1
] / temp
;
162 F77_FUNC(dgemm
,DGEMM
)("N", "N", &n
, k
, k
, &one
, &u2
[u2_offset
], ldu2
, &q
[q_offset
],
163 ldq
, &zero
, &u
[u_offset
], ldu
);
167 F77_FUNC(dgemm
,DGEMM
)("N", "N", nl
, k
, &ctot
[1], &one
, &u2
[(u2_dim1
<< 1) + 1],
168 ldu2
, &q
[q_dim1
+ 2], ldq
, &zero
, &u
[u_dim1
+ 1], ldu
);
170 ktemp
= ctot
[1] + 2 + ctot
[2];
171 F77_FUNC(dgemm
,DGEMM
)("N", "N", nl
, k
, &ctot
[3], &one
, &u2
[ktemp
* u2_dim1
+ 1]
172 , ldu2
, &q
[ktemp
+ q_dim1
], ldq
, &one
, &u
[u_dim1
+ 1],
175 } else if (ctot
[3] > 0) {
176 ktemp
= ctot
[1] + 2 + ctot
[2];
177 F77_FUNC(dgemm
,DGEMM
)("N", "N", nl
, k
, &ctot
[3], &one
, &u2
[ktemp
* u2_dim1
+ 1],
178 ldu2
, &q
[ktemp
+ q_dim1
], ldq
, &zero
, &u
[u_dim1
+ 1], ldu
);
180 F77_FUNC(dlacpy
,DLACPY
)("F", nl
, k
, &u2
[u2_offset
], ldu2
, &u
[u_offset
], ldu
);
182 F77_FUNC(dcopy
,DCOPY
)(k
, &q
[q_dim1
+ 1], ldq
, &u
[nlp1
+ u_dim1
], ldu
);
184 ctemp
= ctot
[2] + ctot
[3];
185 F77_FUNC(dgemm
,DGEMM
)("N", "N", nr
, k
, &ctemp
, &one
, &u2
[nlp2
+ ktemp
* u2_dim1
], ldu2
,
186 &q
[ktemp
+ q_dim1
], ldq
, &zero
, &u
[nlp2
+ u_dim1
], ldu
);
190 for (i__
= 1; i__
<= i__1
; ++i__
) {
191 temp
= F77_FUNC(dnrm2
,DNRM2
)(k
, &vt
[i__
* vt_dim1
+ 1], &c__1
);
192 q
[i__
+ q_dim1
] = vt
[i__
* vt_dim1
+ 1] / temp
;
194 for (j
= 2; j
<= i__2
; ++j
) {
196 q
[i__
+ j
* q_dim1
] = vt
[jc
+ i__
* vt_dim1
] / temp
;
201 F77_FUNC(dgemm
,DGEMM
)("N", "N", k
, &m
, k
, &one
, &q
[q_offset
], ldq
, &vt2
[vt2_offset
]
202 , ldvt2
, &zero
, &vt
[vt_offset
], ldvt
);
206 F77_FUNC(dgemm
,DGEMM
)("N", "N", k
, &nlp1
, &ktemp
, &one
, &q
[q_dim1
+ 1], ldq
, &vt2
[
207 vt2_dim1
+ 1], ldvt2
, &zero
, &vt
[vt_dim1
+ 1], ldvt
);
208 ktemp
= ctot
[1] + 2 + ctot
[2];
209 if (ktemp
<= *ldvt2
) {
210 F77_FUNC(dgemm
,DGEMM
)("N", "N", k
, &nlp1
, &ctot
[3], &one
, &q
[ktemp
* q_dim1
+ 1],
211 ldq
, &vt2
[ktemp
+ vt2_dim1
], ldvt2
, &one
, &vt
[vt_dim1
+ 1],
219 for (i__
= 1; i__
<= i__1
; ++i__
) {
220 q
[i__
+ ktemp
* q_dim1
] = q
[i__
+ q_dim1
];
223 for (i__
= nlp2
; i__
<= i__1
; ++i__
) {
224 vt2
[ktemp
+ i__
* vt2_dim1
] = vt2
[i__
* vt2_dim1
+ 1];
227 ctemp
= ctot
[2] + 1 + ctot
[3];
228 F77_FUNC(dgemm
,DGEMM
)("N", "N", k
, &nrp1
, &ctemp
, &one
, &q
[ktemp
* q_dim1
+ 1], ldq
, &
229 vt2
[ktemp
+ nlp2
* vt2_dim1
], ldvt2
, &zero
, &vt
[nlp2
* vt_dim1
+