Updated intel syntax x86-64 asm files to also support MS win64 call convention (ifdef...
[gromacs/rigid-bodies.git] / src / gmxlib / gmx_blas / sgemv.c
blob71c2fe2ae636df77d60b6bee6600a35f789fb96d
1 #include <ctype.h>
2 #include <math.h>
4 #include <types/simple.h>
5 #include "gmx_blas.h"
7 void
8 F77_FUNC(sgemv,SGEMV)(const char *trans,
9 int *m__,
10 int *n__,
11 float *alpha__,
12 float *a,
13 int *lda__,
14 float *x,
15 int *incx__,
16 float *beta__,
17 float *y,
18 int *incy__)
20 const char ch=toupper(*trans);
21 int lenx,leny,kx,ky;
22 int i,j,jx,jy,ix,iy;
23 float temp;
25 int m = *m__;
26 int n = *n__;
27 float alpha = *alpha__;
28 float beta = *beta__;
29 int incx = *incx__;
30 int incy = *incy__;
31 int lda = *lda__;
33 if(n<=0 || m<=0 || (fabs(alpha)<GMX_FLOAT_MIN && fabs(beta-1.0)<GMX_FLOAT_EPS))
34 return;
36 if(ch=='N') {
37 lenx = n;
38 leny = m;
39 } else {
40 lenx = m;
41 leny = n;
44 if(incx>0)
45 kx = 1;
46 else
47 kx = 1 - (lenx -1)*(incx);
49 if(incy>0)
50 ky = 1;
51 else
52 ky = 1 - (leny -1)*(incy);
54 if(fabs(beta-1.0)>GMX_FLOAT_EPS) {
55 if(incy==1) {
56 if(fabs(beta)<GMX_FLOAT_MIN)
57 for(i=0;i<leny;i++)
58 y[i] = 0.0;
59 else
60 for(i=0;i<leny;i++)
61 y[i] *= beta;
62 } else {
63 /* non-unit incr. */
64 iy = ky;
65 if(fabs(beta)<GMX_FLOAT_MIN)
66 for(i=0;i<leny;i++,iy+=incy)
67 y[iy] = 0.0;
68 else
69 for(i=0;i<leny;i++,iy+=incy)
70 y[iy] *= beta;
74 if(fabs(alpha)<GMX_FLOAT_MIN)
75 return;
77 if(ch=='N') {
78 jx = kx;
79 if(incy==1) {
80 for(j=1;j<=n;j++,jx+=incx)
81 if( fabs(x[jx-1])>GMX_FLOAT_MIN) {
82 temp = alpha * x[jx-1];
83 for(i=1;i<=m;i++)
84 y[i-1] += temp * a[(j-1)*(lda)+(i-1)];
86 } else {
87 /* non-unit y incr. */
88 for(j=1;j<=n;j++,jx+=incx)
89 if( fabs(x[jx-1])>GMX_FLOAT_MIN) {
90 temp = alpha * x[jx-1];
91 iy = ky;
92 for(i=1;i<=m;i++,iy+=incy)
93 y[iy-1] += temp * a[(j-1)*(lda)+(i-1)];
96 } else {
97 /* transpose */
98 jy = ky;
99 if(incx==1) {
100 for(j=1;j<=n;j++,jy+=incy) {
101 temp = 0.0;
102 for(i=1;i<=m;i++)
103 temp += a[(j-1)*(lda)+(i-1)] * x[i-1];
104 y[jy-1] += alpha * temp;
106 } else {
107 /* non-unit y incr. */
108 for(j=1;j<=n;j++,jy+=incy) {
109 temp = 0.0;
110 ix = kx;
111 for(i=1;i<=m;i++,ix+=incx)
112 temp += a[(j-1)*(lda)+(i-1)] * x[ix-1];
113 y[jy-1] += alpha * temp;