PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
flops.h
Go to the documentation of this file.
1 
12 /*
13  * This file provide the flops formula for all Level 3 BLAS and some
14  * Lapack routines. Each macro uses the same size parameters as the
15  * function associated and provide one formula for additions and one
16  * for multiplications. Example to use these macros:
17  *
18  * FLOPS_ZGEMM( m, n, k )
19  *
20  * All the formula are reported in the LAPACK Lawn 41:
21  * http://www.netlib.org/lapack/lawns/lawn41.ps
22  */
23 #ifndef _FLOPS_H_
24 #define _FLOPS_H_
25 
26 /************************************************************************
27  * Generic formula coming from LAWN 41
28  ***********************************************************************/
29 
30 /*
31  * Level 2 BLAS
32  */
33 #define FMULS_GEMV(__m, __n) ((double)(__m) * (double)(__n) + 2. * (double)(__m))
34 #define FADDS_GEMV(__m, __n) ((double)(__m) * (double)(__n) )
35 
36 #define FMULS_SYMV(__n) FMULS_GEMV( (__n), (__n) )
37 #define FADDS_SYMV(__n) FADDS_GEMV( (__n), (__n) )
38 #define FMULS_HEMV FMULS_SYMV
39 #define FADDS_HEMV FADDS_SYMV
40 
41 /*
42  * Level 3 BLAS
43  */
44 #define FMULS_GEMM(__m, __n, __k) ((double)(__m) * (double)(__n) * (double)(__k))
45 #define FADDS_GEMM(__m, __n, __k) ((double)(__m) * (double)(__n) * (double)(__k))
46 
47 #define FMULS_SYMM(__side, __m, __n) ( ( (__side) == PlasmaLeft ) ? FMULS_GEMM((__m), (__m), (__n)) : FMULS_GEMM((__m), (__n), (__n)) )
48 #define FADDS_SYMM(__side, __m, __n) ( ( (__side) == PlasmaLeft ) ? FADDS_GEMM((__m), (__m), (__n)) : FADDS_GEMM((__m), (__n), (__n)) )
49 #define FMULS_HEMM FMULS_SYMM
50 #define FADDS_HEMM FADDS_SYMM
51 
52 #define FMULS_SYRK(__k, __n) (0.5 * (double)(__k) * (double)(__n) * ((double)(__n)+1.))
53 #define FADDS_SYRK(__k, __n) (0.5 * (double)(__k) * (double)(__n) * ((double)(__n)+1.))
54 #define FMULS_HERK FMULS_SYRK
55 #define FADDS_HERK FADDS_SYRK
56 
57 #define FMULS_SYR2K(__k, __n) ((double)(__k) * (double)(__n) * (double)(__n) )
58 #define FADDS_SYR2K(__k, __n) ((double)(__k) * (double)(__n) * (double)(__n) + (double)(__n))
59 #define FMULS_HER2K FMULS_SYR2K
60 #define FADDS_HER2K FADDS_SYR2K
61 
62 #define FMULS_TRMM_2(__m, __n) (0.5 * (double)(__n) * (double)(__m) * ((double)(__m)+1.))
63 #define FADDS_TRMM_2(__m, __n) (0.5 * (double)(__n) * (double)(__m) * ((double)(__m)-1.))
64 
65 
66 #define FMULS_TRMM(__side, __m, __n) ( ( (__side) == PlasmaLeft ) ? FMULS_TRMM_2((__m), (__n)) : FMULS_TRMM_2((__n), (__m)) )
67 #define FADDS_TRMM(__side, __m, __n) ( ( (__side) == PlasmaLeft ) ? FADDS_TRMM_2((__m), (__n)) : FADDS_TRMM_2((__n), (__m)) )
68 
69 #define FMULS_TRSM FMULS_TRMM
70 #define FADDS_TRSM FMULS_TRMM
71 
72 /*
73  * Lapack
74  */
75 #define FMULS_GETRF(__m, __n) ( ((__m) < (__n)) ? (0.5 * (double)(__m) * ((double)(__m) * ((double)(__n) - (1./3.) * (__m) - 1. ) + (double)(__n)) + (2. / 3.) * (__m)) \
76  : (0.5 * (double)(__n) * ((double)(__n) * ((double)(__m) - (1./3.) * (__n) - 1. ) + (double)(__m)) + (2. / 3.) * (__n)) )
77 #define FADDS_GETRF(__m, __n) ( ((__m) < (__n)) ? (0.5 * (double)(__m) * ((double)(__m) * ((double)(__n) - (1./3.) * (__m) ) - (double)(__n)) + (1. / 6.) * (__m)) \
78  : (0.5 * (double)(__n) * ((double)(__n) * ((double)(__m) - (1./3.) * (__n) ) - (double)(__m)) + (1. / 6.) * (__n)) )
79 
80 #define FMULS_GETRI(__n) ( (double)(__n) * ((5. / 6.) + (double)(__n) * ((2. / 3.) * (double)(__n) + 0.5)) )
81 #define FADDS_GETRI(__n) ( (double)(__n) * ((5. / 6.) + (double)(__n) * ((2. / 3.) * (double)(__n) - 1.5)) )
82 
83 #define FMULS_GETRS(__n, __nrhs) ((double)(__nrhs) * (double)(__n) * (double)(__n) )
84 #define FADDS_GETRS(__n, __nrhs) ((double)(__nrhs) * (double)(__n) * ((double)(__n) - 1. ))
85 
86 #define FMULS_POTRF(__n) ((double)(__n) * (((1. / 6.) * (double)(__n) + 0.5) * (double)(__n) + (1. / 3.)))
87 #define FADDS_POTRF(__n) ((double)(__n) * (((1. / 6.) * (double)(__n) ) * (double)(__n) - (1. / 6.)))
88 
89 #define FMULS_POTRI(__n) ( (double)(__n) * ((2. / 3.) + (double)(__n) * ((1. / 3.) * (double)(__n) + 1. )) )
90 #define FADDS_POTRI(__n) ( (double)(__n) * ((1. / 6.) + (double)(__n) * ((1. / 3.) * (double)(__n) - 0.5)) )
91 
92 #define FMULS_POTRS(__n, __nrhs) ((double)(__nrhs) * (double)(__n) * ((double)(__n) + 1. ))
93 #define FADDS_POTRS(__n, __nrhs) ((double)(__nrhs) * (double)(__n) * ((double)(__n) - 1. ))
94 
95 //SPBTRF
96 //SPBTRS
97 //SSYTRF
98 //SSYTRI
99 //SSYTRS
100 
101 #define FMULS_GEQRF(__m, __n) (((__m) > (__n)) ? ((double)(__n) * ((double)(__n) * ( 0.5-(1./3.) * (double)(__n) + (double)(__m)) + (double)(__m) + 23. / 6.)) \
102  : ((double)(__m) * ((double)(__m) * ( -0.5-(1./3.) * (double)(__m) + (double)(__n)) + 2.*(double)(__n) + 23. / 6.)) )
103 #define FADDS_GEQRF(__m, __n) (((__m) > (__n)) ? ((double)(__n) * ((double)(__n) * ( 0.5-(1./3.) * (double)(__n) + (double)(__m)) + 5. / 6.)) \
104  : ((double)(__m) * ((double)(__m) * ( -0.5-(1./3.) * (double)(__m) + (double)(__n)) + (double)(__n) + 5. / 6.)) )
105 
106 #define FMULS_GEQLF(__m, __n) FMULS_GEQRF(__m, __n)
107 #define FADDS_GEQLF(__m, __n) FADDS_GEQRF(__m, __n)
108 
109 #define FMULS_GERQF(__m, __n) (((__m) > (__n)) ? ((double)(__n) * ((double)(__n) * ( 0.5-(1./3.) * (double)(__n) + (double)(__m)) + (double)(__m) + 29. / 6.)) \
110  : ((double)(__m) * ((double)(__m) * ( -0.5-(1./3.) * (double)(__m) + (double)(__n)) + 2.*(double)(__n) + 29. / 6.)) )
111 #define FADDS_GERQF(__m, __n) (((__m) > (__n)) ? ((double)(__n) * ((double)(__n) * ( -0.5-(1./3.) * (double)(__n) + (double)(__m)) + (double)(__m) + 5. / 6.)) \
112  : ((double)(__m) * ((double)(__m) * ( 0.5-(1./3.) * (double)(__m) + (double)(__n)) + + 5. / 6.)) )
113 
114 #define FMULS_GELQF(__m, __n) FMULS_GERQF(__m, __n)
115 #define FADDS_GELQF(__m, __n) FADDS_GERQF(__m, __n)
116 
117 #define FMULS_UNGQR(__m, __n, __k) ((double)(__k) * (2.* (double)(__m) * (double)(__n) + 2. * (double)(__n) - 5./3. + (double)(__k) * ( 2./3. * (double)(__k) - ((double)(__m) + (double)(__n)) - 1.)))
118 #define FADDS_UNGQR(__m, __n, __k) ((double)(__k) * (2.* (double)(__m) * (double)(__n) + (double)(__n) - (double)(__m) + 1./3. + (double)(__k) * ( 2./3. * (double)(__k) - ((double)(__m) + (double)(__n)) )))
119 #define FMULS_UNGQL FMULS_UNGQR
120 #define FMULS_ORGQR FMULS_UNGQR
121 #define FMULS_ORGQL FMULS_UNGQR
122 #define FADDS_UNGQL FADDS_UNGQR
123 #define FADDS_ORGQR FADDS_UNGQR
124 #define FADDS_ORGQL FADDS_UNGQR
125 
126 #define FMULS_UNGRQ(__m, __n, __k) ((double)(__k) * (2.* (double)(__m) * (double)(__n) + (double)(__m) + (double)(__n) - 2./3. + (double)(__k) * ( 2./3. * (double)(__k) - ((double)(__m) + (double)(__n)) - 1.)))
127 #define FADDS_UNGRQ(__m, __n, __k) ((double)(__k) * (2.* (double)(__m) * (double)(__n) + (double)(__m) - (double)(__n) + 1./3. + (double)(__k) * ( 2./3. * (double)(__k) - ((double)(__m) + (double)(__n)) )))
128 #define FMULS_UNGLQ FMULS_UNGRQ
129 #define FMULS_ORGRQ FMULS_UNGRQ
130 #define FMULS_ORGLQ FMULS_UNGRQ
131 #define FADDS_UNGLQ FADDS_UNGRQ
132 #define FADDS_ORGRQ FADDS_UNGRQ
133 #define FADDS_ORGLQ FADDS_UNGRQ
134 
135 #define FMULS_GEQRS(__m, __n, __nrhs) ((double)(__nrhs) * ((double)(__n) * ( 2.* (double)(__m) - 0.5 * (double)(__n) + 2.5)))
136 #define FADDS_GEQRS(__m, __n, __nrhs) ((double)(__nrhs) * ((double)(__n) * ( 2.* (double)(__m) - 0.5 * (double)(__n) + 0.5)))
137 
138 //UNMQR, UNMLQ, UNMQL, UNMRQ (Left)
139 //UNMQR, UNMLQ, UNMQL, UNMRQ (Right)
140 
141 #define FMULS_TRTRI(__n) ((double)(__n) * ((double)(__n) * ( 1./6. * (double)(__n) + 0.5 ) + 1./3.))
142 #define FADDS_TRTRI(__n) ((double)(__n) * ((double)(__n) * ( 1./6. * (double)(__n) - 0.5 ) + 1./3.))
143 
144 #define FMULS_GEHRD(__n) ( (double)(__n) * ((double)(__n) * (5./3. *(double)(__n) + 0.5) - 7./6.) - 13. )
145 #define FADDS_GEHRD(__n) ( (double)(__n) * ((double)(__n) * (5./3. *(double)(__n) - 1. ) - 2./3.) - 8. )
146 
147 #define FMULS_SYTRD(__n) ( (double)(__n) * ( (double)(__n) * ( 2./3. * (double)(__n) + 2.5 ) - 1./6. ) - 15.)
148 #define FADDS_SYTRD(__n) ( (double)(__n) * ( (double)(__n) * ( 2./3. * (double)(__n) + 1. ) - 8./3. ) - 4.)
149 #define FMULS_HETRD FMULS_SYTRD
150 #define FADDS_HETRD FADDS_SYTRD
151 
152 #define FMULS_GEBRD(__m, __n) ( ((__m) >= (__n)) ? ((double)(__n) * ((double)(__n) * (2. * (double)(__m) - 2./3. * (double)(__n) + 2. ) + 20./3.)) \
153  : ((double)(__m) * ((double)(__m) * (2. * (double)(__n) - 2./3. * (double)(__m) + 2. ) + 20./3.)) )
154 #define FADDS_GEBRD(__m, __n) ( ((__m) >= (__n)) ? ((double)(__n) * ((double)(__n) * (2. * (double)(__m) - 2./3. * (double)(__n) + 1. ) - (double)(__m) + 5./3.)) \
155  : ((double)(__m) * ((double)(__m) * (2. * (double)(__n) - 2./3. * (double)(__m) + 1. ) - (double)(__n) + 5./3.)) )
156 
157 
158 /*******************************************************************************
159  * Users functions
160  ******************************************************************************/
161 
162 /*
163  * Level 2 BLAS
164  */
165 #define FLOPS_ZGEMV(__m, __n) (6. * FMULS_GEMV((__m), (__n)) + 2.0 * FADDS_GEMV((__m), (__n)) )
166 #define FLOPS_CGEMV(__m, __n) (6. * FMULS_GEMV((__m), (__n)) + 2.0 * FADDS_GEMV((__m), (__n)) )
167 #define FLOPS_DGEMV(__m, __n) ( FMULS_GEMV((__m), (__n)) + FADDS_GEMV((__m), (__n)) )
168 #define FLOPS_SGEMV(__m, __n) ( FMULS_GEMV((__m), (__n)) + FADDS_GEMV((__m), (__n)) )
169 
170 #define FLOPS_ZHEMV(__n) (6. * FMULS_HEMV((__n)) + 2.0 * FADDS_HEMV((__n)) )
171 #define FLOPS_CHEMV(__n) (6. * FMULS_HEMV((__n)) + 2.0 * FADDS_HEMV((__n)) )
172 
173 #define FLOPS_ZSYMV(__n) (6. * FMULS_SYMV((__n)) + 2.0 * FADDS_SYMV((__n)) )
174 #define FLOPS_CSYMV(__n) (6. * FMULS_SYMV((__n)) + 2.0 * FADDS_SYMV((__n)) )
175 #define FLOPS_DSYMV(__n) ( FMULS_SYMV((__n)) + FADDS_SYMV((__n)) )
176 #define FLOPS_SSYMV(__n) ( FMULS_SYMV((__n)) + FADDS_SYMV((__n)) )
177 
178 /*
179  * Level 3 BLAS
180  */
181 #define FLOPS_ZGEMM(__m, __n, __k) (6. * FMULS_GEMM((__m), (__n), (__k)) + 2.0 * FADDS_GEMM((__m), (__n), (__k)) )
182 #define FLOPS_CGEMM(__m, __n, __k) (6. * FMULS_GEMM((__m), (__n), (__k)) + 2.0 * FADDS_GEMM((__m), (__n), (__k)) )
183 #define FLOPS_DGEMM(__m, __n, __k) ( FMULS_GEMM((__m), (__n), (__k)) + FADDS_GEMM((__m), (__n), (__k)) )
184 #define FLOPS_SGEMM(__m, __n, __k) ( FMULS_GEMM((__m), (__n), (__k)) + FADDS_GEMM((__m), (__n), (__k)) )
185 
186 #define FLOPS_ZHEMM(__side, __m, __n) (6. * FMULS_HEMM(__side, (__m), (__n)) + 2.0 * FADDS_HEMM(__side, (__m), (__n)) )
187 #define FLOPS_CHEMM(__side, __m, __n) (6. * FMULS_HEMM(__side, (__m), (__n)) + 2.0 * FADDS_HEMM(__side, (__m), (__n)) )
188 
189 #define FLOPS_ZSYMM(__side, __m, __n) (6. * FMULS_SYMM(__side, (__m), (__n)) + 2.0 * FADDS_SYMM(__side, (__m), (__n)) )
190 #define FLOPS_CSYMM(__side, __m, __n) (6. * FMULS_SYMM(__side, (__m), (__n)) + 2.0 * FADDS_SYMM(__side, (__m), (__n)) )
191 #define FLOPS_DSYMM(__side, __m, __n) ( FMULS_SYMM(__side, (__m), (__n)) + FADDS_SYMM(__side, (__m), (__n)) )
192 #define FLOPS_SSYMM(__side, __m, __n) ( FMULS_SYMM(__side, (__m), (__n)) + FADDS_SYMM(__side, (__m), (__n)) )
193 
194 #define FLOPS_ZHERK(__k, __n) (6. * FMULS_HERK((__k), (__n)) + 2.0 * FADDS_HERK((__k), (__n)) )
195 #define FLOPS_CHERK(__k, __n) (6. * FMULS_HERK((__k), (__n)) + 2.0 * FADDS_HERK((__k), (__n)) )
196 
197 #define FLOPS_ZSYRK(__k, __n) (6. * FMULS_SYRK((__k), (__n)) + 2.0 * FADDS_SYRK((__k), (__n)) )
198 #define FLOPS_CSYRK(__k, __n) (6. * FMULS_SYRK((__k), (__n)) + 2.0 * FADDS_SYRK((__k), (__n)) )
199 #define FLOPS_DSYRK(__k, __n) ( FMULS_SYRK((__k), (__n)) + FADDS_SYRK((__k), (__n)) )
200 #define FLOPS_SSYRK(__k, __n) ( FMULS_SYRK((__k), (__n)) + FADDS_SYRK((__k), (__n)) )
201 
202 #define FLOPS_ZHER2K(__k, __n) (6. * FMULS_HER2K((__k), (__n)) + 2.0 * FADDS_HER2K((__k), (__n)) )
203 #define FLOPS_CHER2K(__k, __n) (6. * FMULS_HER2K((__k), (__n)) + 2.0 * FADDS_HER2K((__k), (__n)) )
204 
205 #define FLOPS_ZSYR2K(__k, __n) (6. * FMULS_SYR2K((__k), (__n)) + 2.0 * FADDS_SYR2K((__k), (__n)) )
206 #define FLOPS_CSYR2K(__k, __n) (6. * FMULS_SYR2K((__k), (__n)) + 2.0 * FADDS_SYR2K((__k), (__n)) )
207 #define FLOPS_DSYR2K(__k, __n) ( FMULS_SYR2K((__k), (__n)) + FADDS_SYR2K((__k), (__n)) )
208 #define FLOPS_SSYR2K(__k, __n) ( FMULS_SYR2K((__k), (__n)) + FADDS_SYR2K((__k), (__n)) )
209 
210 #define FLOPS_ZTRMM(__side, __m, __n) (6. * FMULS_TRMM(__side, (__m), (__n)) + 2.0 * FADDS_TRMM(__side, (__m), (__n)) )
211 #define FLOPS_CTRMM(__side, __m, __n) (6. * FMULS_TRMM(__side, (__m), (__n)) + 2.0 * FADDS_TRMM(__side, (__m), (__n)) )
212 #define FLOPS_DTRMM(__side, __m, __n) ( FMULS_TRMM(__side, (__m), (__n)) + FADDS_TRMM(__side, (__m), (__n)) )
213 #define FLOPS_STRMM(__side, __m, __n) ( FMULS_TRMM(__side, (__m), (__n)) + FADDS_TRMM(__side, (__m), (__n)) )
214 
215 #define FLOPS_ZTRSM(__side, __m, __n) (6. * FMULS_TRSM(__side, (__m), (__n)) + 2.0 * FADDS_TRSM(__side, (__m), (__n)) )
216 #define FLOPS_CTRSM(__side, __m, __n) (6. * FMULS_TRSM(__side, (__m), (__n)) + 2.0 * FADDS_TRSM(__side, (__m), (__n)) )
217 #define FLOPS_DTRSM(__side, __m, __n) ( FMULS_TRSM(__side, (__m), (__n)) + FADDS_TRSM(__side, (__m), (__n)) )
218 #define FLOPS_STRSM(__side, __m, __n) ( FMULS_TRSM(__side, (__m), (__n)) + FADDS_TRSM(__side, (__m), (__n)) )
219 
220 /*
221  * Lapack
222  */
223 #define FLOPS_ZGETRF(__m, __n) (6. * FMULS_GETRF((__m), (__n)) + 2.0 * FADDS_GETRF((__m), (__n)) )
224 #define FLOPS_CGETRF(__m, __n) (6. * FMULS_GETRF((__m), (__n)) + 2.0 * FADDS_GETRF((__m), (__n)) )
225 #define FLOPS_DGETRF(__m, __n) ( FMULS_GETRF((__m), (__n)) + FADDS_GETRF((__m), (__n)) )
226 #define FLOPS_SGETRF(__m, __n) ( FMULS_GETRF((__m), (__n)) + FADDS_GETRF((__m), (__n)) )
227 
228 #define FLOPS_ZGETRI(__n) (6. * FMULS_GETRI((__n)) + 2.0 * FADDS_GETRI((__n)) )
229 #define FLOPS_CGETRI(__n) (6. * FMULS_GETRI((__n)) + 2.0 * FADDS_GETRI((__n)) )
230 #define FLOPS_DGETRI(__n) ( FMULS_GETRI((__n)) + FADDS_GETRI((__n)) )
231 #define FLOPS_SGETRI(__n) ( FMULS_GETRI((__n)) + FADDS_GETRI((__n)) )
232 
233 #define FLOPS_ZGETRS(__n, __nrhs) (6. * FMULS_GETRS((__n), (__nrhs)) + 2.0 * FADDS_GETRS((__n), (__nrhs)) )
234 #define FLOPS_CGETRS(__n, __nrhs) (6. * FMULS_GETRS((__n), (__nrhs)) + 2.0 * FADDS_GETRS((__n), (__nrhs)) )
235 #define FLOPS_DGETRS(__n, __nrhs) ( FMULS_GETRS((__n), (__nrhs)) + FADDS_GETRS((__n), (__nrhs)) )
236 #define FLOPS_SGETRS(__n, __nrhs) ( FMULS_GETRS((__n), (__nrhs)) + FADDS_GETRS((__n), (__nrhs)) )
237 
238 #define FLOPS_ZPOTRF(__n) (6. * FMULS_POTRF((__n)) + 2.0 * FADDS_POTRF((__n)) )
239 #define FLOPS_CPOTRF(__n) (6. * FMULS_POTRF((__n)) + 2.0 * FADDS_POTRF((__n)) )
240 #define FLOPS_DPOTRF(__n) ( FMULS_POTRF((__n)) + FADDS_POTRF((__n)) )
241 #define FLOPS_SPOTRF(__n) ( FMULS_POTRF((__n)) + FADDS_POTRF((__n)) )
242 
243 #define FLOPS_ZPOTRI(__n) (6. * FMULS_POTRI((__n)) + 2.0 * FADDS_POTRI((__n)) )
244 #define FLOPS_CPOTRI(__n) (6. * FMULS_POTRI((__n)) + 2.0 * FADDS_POTRI((__n)) )
245 #define FLOPS_DPOTRI(__n) ( FMULS_POTRI((__n)) + FADDS_POTRI((__n)) )
246 #define FLOPS_SPOTRI(__n) ( FMULS_POTRI((__n)) + FADDS_POTRI((__n)) )
247 
248 #define FLOPS_ZPOTRS(__n, __nrhs) (6. * FMULS_POTRS((__n), (__nrhs)) + 2.0 * FADDS_POTRS((__n), (__nrhs)) )
249 #define FLOPS_CPOTRS(__n, __nrhs) (6. * FMULS_POTRS((__n), (__nrhs)) + 2.0 * FADDS_POTRS((__n), (__nrhs)) )
250 #define FLOPS_DPOTRS(__n, __nrhs) ( FMULS_POTRS((__n), (__nrhs)) + FADDS_POTRS((__n), (__nrhs)) )
251 #define FLOPS_SPOTRS(__n, __nrhs) ( FMULS_POTRS((__n), (__nrhs)) + FADDS_POTRS((__n), (__nrhs)) )
252 
253 #define FLOPS_ZGEQRF(__m, __n) (6. * FMULS_GEQRF((__m), (__n)) + 2.0 * FADDS_GEQRF((__m), (__n)) )
254 #define FLOPS_CGEQRF(__m, __n) (6. * FMULS_GEQRF((__m), (__n)) + 2.0 * FADDS_GEQRF((__m), (__n)) )
255 #define FLOPS_DGEQRF(__m, __n) ( FMULS_GEQRF((__m), (__n)) + FADDS_GEQRF((__m), (__n)) )
256 #define FLOPS_SGEQRF(__m, __n) ( FMULS_GEQRF((__m), (__n)) + FADDS_GEQRF((__m), (__n)) )
257 
258 #define FLOPS_ZGEQLF(__m, __n) (6. * FMULS_GEQLF((__m), (__n)) + 2.0 * FADDS_GEQLF((__m), (__n)) )
259 #define FLOPS_CGEQLF(__m, __n) (6. * FMULS_GEQLF((__m), (__n)) + 2.0 * FADDS_GEQLF((__m), (__n)) )
260 #define FLOPS_DGEQLF(__m, __n) ( FMULS_GEQLF((__m), (__n)) + FADDS_GEQLF((__m), (__n)) )
261 #define FLOPS_SGEQLF(__m, __n) ( FMULS_GEQLF((__m), (__n)) + FADDS_GEQLF((__m), (__n)) )
262 
263 #define FLOPS_ZGERQF(__m, __n) (6. * FMULS_GERQF((__m), (__n)) + 2.0 * FADDS_GERQF((__m), (__n)) )
264 #define FLOPS_CGERQF(__m, __n) (6. * FMULS_GERQF((__m), (__n)) + 2.0 * FADDS_GERQF((__m), (__n)) )
265 #define FLOPS_DGERQF(__m, __n) ( FMULS_GERQF((__m), (__n)) + FADDS_GERQF((__m), (__n)) )
266 #define FLOPS_SGERQF(__m, __n) ( FMULS_GERQF((__m), (__n)) + FADDS_GERQF((__m), (__n)) )
267 
268 #define FLOPS_ZGELQF(__m, __n) (6. * FMULS_GELQF((__m), (__n)) + 2.0 * FADDS_GELQF((__m), (__n)) )
269 #define FLOPS_CGELQF(__m, __n) (6. * FMULS_GELQF((__m), (__n)) + 2.0 * FADDS_GELQF((__m), (__n)) )
270 #define FLOPS_DGELQF(__m, __n) ( FMULS_GELQF((__m), (__n)) + FADDS_GELQF((__m), (__n)) )
271 #define FLOPS_SGELQF(__m, __n) ( FMULS_GELQF((__m), (__n)) + FADDS_GELQF((__m), (__n)) )
272 
273 #define FLOPS_ZUNGQR(__m, __n, __k) (6. * FMULS_UNGQR((__m), (__n), (__k)) + 2.0 * FADDS_UNGQR((__m), (__n), (__k)) )
274 #define FLOPS_CUNGQR(__m, __n, __k) (6. * FMULS_UNGQR((__m), (__n), (__k)) + 2.0 * FADDS_UNGQR((__m), (__n), (__k)) )
275 #define FLOPS_DUNGQR(__m, __n, __k) ( FMULS_UNGQR((__m), (__n), (__k)) + FADDS_UNGQR((__m), (__n), (__k)) )
276 #define FLOPS_SUNGQR(__m, __n, __k) ( FMULS_UNGQR((__m), (__n), (__k)) + FADDS_UNGQR((__m), (__n), (__k)) )
277 
278 #define FLOPS_ZUNGQL(__m, __n, __k) (6. * FMULS_UNGQL((__m), (__n), (__k)) + 2.0 * FADDS_UNGQL((__m), (__n), (__k)) )
279 #define FLOPS_CUNGQL(__m, __n, __k) (6. * FMULS_UNGQL((__m), (__n), (__k)) + 2.0 * FADDS_UNGQL((__m), (__n), (__k)) )
280 #define FLOPS_DUNGQL(__m, __n, __k) ( FMULS_UNGQL((__m), (__n), (__k)) + FADDS_UNGQL((__m), (__n), (__k)) )
281 #define FLOPS_SUNGQL(__m, __n, __k) ( FMULS_UNGQL((__m), (__n), (__k)) + FADDS_UNGQL((__m), (__n), (__k)) )
282 
283 #define FLOPS_ZORGQR(__m, __n, __k) (6. * FMULS_ORGQR((__m), (__n), (__k)) + 2.0 * FADDS_ORGQR((__m), (__n), (__k)) )
284 #define FLOPS_CORGQR(__m, __n, __k) (6. * FMULS_ORGQR((__m), (__n), (__k)) + 2.0 * FADDS_ORGQR((__m), (__n), (__k)) )
285 #define FLOPS_DORGQR(__m, __n, __k) ( FMULS_ORGQR((__m), (__n), (__k)) + FADDS_ORGQR((__m), (__n), (__k)) )
286 #define FLOPS_SORGQR(__m, __n, __k) ( FMULS_ORGQR((__m), (__n), (__k)) + FADDS_ORGQR((__m), (__n), (__k)) )
287 
288 #define FLOPS_ZORGQL(__m, __n, __k) (6. * FMULS_ORGQL((__m), (__n), (__k)) + 2.0 * FADDS_ORGQL((__m), (__n), (__k)) )
289 #define FLOPS_CORGQL(__m, __n, __k) (6. * FMULS_ORGQL((__m), (__n), (__k)) + 2.0 * FADDS_ORGQL((__m), (__n), (__k)) )
290 #define FLOPS_DORGQL(__m, __n, __k) ( FMULS_ORGQL((__m), (__n), (__k)) + FADDS_ORGQL((__m), (__n), (__k)) )
291 #define FLOPS_SORGQL(__m, __n, __k) ( FMULS_ORGQL((__m), (__n), (__k)) + FADDS_ORGQL((__m), (__n), (__k)) )
292 
293 #define FLOPS_ZUNGRQ(__m, __n, __k) (6. * FMULS_UNGRQ((__m), (__n), (__k)) + 2.0 * FADDS_UNGRQ((__m), (__n), (__k)) )
294 #define FLOPS_CUNGRQ(__m, __n, __k) (6. * FMULS_UNGRQ((__m), (__n), (__k)) + 2.0 * FADDS_UNGRQ((__m), (__n), (__k)) )
295 #define FLOPS_DUNGRQ(__m, __n, __k) ( FMULS_UNGRQ((__m), (__n), (__k)) + FADDS_UNGRQ((__m), (__n), (__k)) )
296 #define FLOPS_SUNGRQ(__m, __n, __k) ( FMULS_UNGRQ((__m), (__n), (__k)) + FADDS_UNGRQ((__m), (__n), (__k)) )
297 
298 #define FLOPS_ZUNGLQ(__m, __n, __k) (6. * FMULS_UNGLQ((__m), (__n), (__k)) + 2.0 * FADDS_UNGLQ((__m), (__n), (__k)) )
299 #define FLOPS_CUNGLQ(__m, __n, __k) (6. * FMULS_UNGLQ((__m), (__n), (__k)) + 2.0 * FADDS_UNGLQ((__m), (__n), (__k)) )
300 #define FLOPS_DUNGLQ(__m, __n, __k) ( FMULS_UNGLQ((__m), (__n), (__k)) + FADDS_UNGLQ((__m), (__n), (__k)) )
301 #define FLOPS_SUNGLQ(__m, __n, __k) ( FMULS_UNGLQ((__m), (__n), (__k)) + FADDS_UNGLQ((__m), (__n), (__k)) )
302 
303 #define FLOPS_ZORGRQ(__m, __n, __k) (6. * FMULS_ORGRQ((__m), (__n), (__k)) + 2.0 * FADDS_ORGRQ((__m), (__n), (__k)) )
304 #define FLOPS_CORGRQ(__m, __n, __k) (6. * FMULS_ORGRQ((__m), (__n), (__k)) + 2.0 * FADDS_ORGRQ((__m), (__n), (__k)) )
305 #define FLOPS_DORGRQ(__m, __n, __k) ( FMULS_ORGRQ((__m), (__n), (__k)) + FADDS_ORGRQ((__m), (__n), (__k)) )
306 #define FLOPS_SORGRQ(__m, __n, __k) ( FMULS_ORGRQ((__m), (__n), (__k)) + FADDS_ORGRQ((__m), (__n), (__k)) )
307 
308 #define FLOPS_ZORGLQ(__m, __n, __k) (6. * FMULS_ORGLQ((__m), (__n), (__k)) + 2.0 * FADDS_ORGLQ((__m), (__n), (__k)) )
309 #define FLOPS_CORGLQ(__m, __n, __k) (6. * FMULS_ORGLQ((__m), (__n), (__k)) + 2.0 * FADDS_ORGLQ((__m), (__n), (__k)) )
310 #define FLOPS_DORGLQ(__m, __n, __k) ( FMULS_ORGLQ((__m), (__n), (__k)) + FADDS_ORGLQ((__m), (__n), (__k)) )
311 #define FLOPS_SORGLQ(__m, __n, __k) ( FMULS_ORGLQ((__m), (__n), (__k)) + FADDS_ORGLQ((__m), (__n), (__k)) )
312 
313 #define FLOPS_ZGEQRS(__m, __n, __nrhs) (6. * FMULS_GEQRS((__m), (__n), (__nrhs)) + 2.0 * FADDS_GEQRS((__m), (__n), (__nrhs)) )
314 #define FLOPS_CGEQRS(__m, __n, __nrhs) (6. * FMULS_GEQRS((__m), (__n), (__nrhs)) + 2.0 * FADDS_GEQRS((__m), (__n), (__nrhs)) )
315 #define FLOPS_DGEQRS(__m, __n, __nrhs) ( FMULS_GEQRS((__m), (__n), (__nrhs)) + FADDS_GEQRS((__m), (__n), (__nrhs)) )
316 #define FLOPS_SGEQRS(__m, __n, __nrhs) ( FMULS_GEQRS((__m), (__n), (__nrhs)) + FADDS_GEQRS((__m), (__n), (__nrhs)) )
317 
318 #define FLOPS_ZTRTRI(__n) (6. * FMULS_TRTRI((__n)) + 2.0 * FADDS_TRTRI((__n)) )
319 #define FLOPS_CTRTRI(__n) (6. * FMULS_TRTRI((__n)) + 2.0 * FADDS_TRTRI((__n)) )
320 #define FLOPS_DTRTRI(__n) ( FMULS_TRTRI((__n)) + FADDS_TRTRI((__n)) )
321 #define FLOPS_STRTRI(__n) ( FMULS_TRTRI((__n)) + FADDS_TRTRI((__n)) )
322 
323 #define FLOPS_ZGEHRD(__n) (6. * FMULS_GEHRD((__n)) + 2.0 * FADDS_GEHRD((__n)) )
324 #define FLOPS_CGEHRD(__n) (6. * FMULS_GEHRD((__n)) + 2.0 * FADDS_GEHRD((__n)) )
325 #define FLOPS_DGEHRD(__n) ( FMULS_GEHRD((__n)) + FADDS_GEHRD((__n)) )
326 #define FLOPS_SGEHRD(__n) ( FMULS_GEHRD((__n)) + FADDS_GEHRD((__n)) )
327 
328 #define FLOPS_ZHETRD(__n) (6. * FMULS_HETRD((__n)) + 2.0 * FADDS_HETRD((__n)) )
329 #define FLOPS_CHETRD(__n) (6. * FMULS_HETRD((__n)) + 2.0 * FADDS_HETRD((__n)) )
330 
331 #define FLOPS_ZSYTRD(__n) (6. * FMULS_SYTRD((__n)) + 2.0 * FADDS_SYTRD((__n)) )
332 #define FLOPS_CSYTRD(__n) (6. * FMULS_SYTRD((__n)) + 2.0 * FADDS_SYTRD((__n)) )
333 #define FLOPS_DSYTRD(__n) ( FMULS_SYTRD((__n)) + FADDS_SYTRD((__n)) )
334 #define FLOPS_SSYTRD(__n) ( FMULS_SYTRD((__n)) + FADDS_SYTRD((__n)) )
335 
336 #define FLOPS_ZGEBRD(__m, __n) (6. * FMULS_GEBRD((__m), (__n)) + 2.0 * FADDS_GEBRD((__m), (__n)) )
337 #define FLOPS_CGEBRD(__m, __n) (6. * FMULS_GEBRD((__m), (__n)) + 2.0 * FADDS_GEBRD((__m), (__n)) )
338 #define FLOPS_DGEBRD(__m, __n) ( FMULS_GEBRD((__m), (__n)) + FADDS_GEBRD((__m), (__n)) )
339 #define FLOPS_SGEBRD(__m, __n) ( FMULS_GEBRD((__m), (__n)) + FADDS_GEBRD((__m), (__n)) )
340 
341 #endif /* _FLOPS_H_ */