PLASMA
2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
Main Page
Modules
Namespaces
Data Structures
Files
File List
Globals
All
Data Structures
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Macros
Groups
flops.h
Go to the documentation of this file.
1
12
/*
13
* This file provide the flops formula for all Level 3 BLAS and some
14
* Lapack routines. Each macro uses the same size parameters as the
15
* function associated and provide one formula for additions and one
16
* for multiplications. Example to use these macros:
17
*
18
* FLOPS_ZGEMM( m, n, k )
19
*
20
* All the formula are reported in the LAPACK Lawn 41:
21
* http://www.netlib.org/lapack/lawns/lawn41.ps
22
*/
23
#ifndef _FLOPS_H_
24
#define _FLOPS_H_
25
26
/************************************************************************
27
* Generic formula coming from LAWN 41
28
***********************************************************************/
29
30
/*
31
* Level 2 BLAS
32
*/
33
#define FMULS_GEMV(__m, __n) ((double)(__m) * (double)(__n) + 2. * (double)(__m))
34
#define FADDS_GEMV(__m, __n) ((double)(__m) * (double)(__n) )
35
36
#define FMULS_SYMV(__n) FMULS_GEMV( (__n), (__n) )
37
#define FADDS_SYMV(__n) FADDS_GEMV( (__n), (__n) )
38
#define FMULS_HEMV FMULS_SYMV
39
#define FADDS_HEMV FADDS_SYMV
40
41
/*
42
* Level 3 BLAS
43
*/
44
#define FMULS_GEMM(__m, __n, __k) ((double)(__m) * (double)(__n) * (double)(__k))
45
#define FADDS_GEMM(__m, __n, __k) ((double)(__m) * (double)(__n) * (double)(__k))
46
47
#define FMULS_SYMM(__side, __m, __n) ( ( (__side) == PlasmaLeft ) ? FMULS_GEMM((__m), (__m), (__n)) : FMULS_GEMM((__m), (__n), (__n)) )
48
#define FADDS_SYMM(__side, __m, __n) ( ( (__side) == PlasmaLeft ) ? FADDS_GEMM((__m), (__m), (__n)) : FADDS_GEMM((__m), (__n), (__n)) )
49
#define FMULS_HEMM FMULS_SYMM
50
#define FADDS_HEMM FADDS_SYMM
51
52
#define FMULS_SYRK(__k, __n) (0.5 * (double)(__k) * (double)(__n) * ((double)(__n)+1.))
53
#define FADDS_SYRK(__k, __n) (0.5 * (double)(__k) * (double)(__n) * ((double)(__n)+1.))
54
#define FMULS_HERK FMULS_SYRK
55
#define FADDS_HERK FADDS_SYRK
56
57
#define FMULS_SYR2K(__k, __n) ((double)(__k) * (double)(__n) * (double)(__n) )
58
#define FADDS_SYR2K(__k, __n) ((double)(__k) * (double)(__n) * (double)(__n) + (double)(__n))
59
#define FMULS_HER2K FMULS_SYR2K
60
#define FADDS_HER2K FADDS_SYR2K
61
62
#define FMULS_TRMM_2(__m, __n) (0.5 * (double)(__n) * (double)(__m) * ((double)(__m)+1.))
63
#define FADDS_TRMM_2(__m, __n) (0.5 * (double)(__n) * (double)(__m) * ((double)(__m)-1.))
64
65
66
#define FMULS_TRMM(__side, __m, __n) ( ( (__side) == PlasmaLeft ) ? FMULS_TRMM_2((__m), (__n)) : FMULS_TRMM_2((__n), (__m)) )
67
#define FADDS_TRMM(__side, __m, __n) ( ( (__side) == PlasmaLeft ) ? FADDS_TRMM_2((__m), (__n)) : FADDS_TRMM_2((__n), (__m)) )
68
69
#define FMULS_TRSM FMULS_TRMM
70
#define FADDS_TRSM FMULS_TRMM
71
72
/*
73
* Lapack
74
*/
75
#define FMULS_GETRF(__m, __n) ( ((__m) < (__n)) ? (0.5 * (double)(__m) * ((double)(__m) * ((double)(__n) - (1./3.) * (__m) - 1. ) + (double)(__n)) + (2. / 3.) * (__m)) \
76
: (0.5 * (double)(__n) * ((double)(__n) * ((double)(__m) - (1./3.) * (__n) - 1. ) + (double)(__m)) + (2. / 3.) * (__n)) )
77
#define FADDS_GETRF(__m, __n) ( ((__m) < (__n)) ? (0.5 * (double)(__m) * ((double)(__m) * ((double)(__n) - (1./3.) * (__m) ) - (double)(__n)) + (1. / 6.) * (__m)) \
78
: (0.5 * (double)(__n) * ((double)(__n) * ((double)(__m) - (1./3.) * (__n) ) - (double)(__m)) + (1. / 6.) * (__n)) )
79
80
#define FMULS_GETRI(__n) ( (double)(__n) * ((5. / 6.) + (double)(__n) * ((2. / 3.) * (double)(__n) + 0.5)) )
81
#define FADDS_GETRI(__n) ( (double)(__n) * ((5. / 6.) + (double)(__n) * ((2. / 3.) * (double)(__n) - 1.5)) )
82
83
#define FMULS_GETRS(__n, __nrhs) ((double)(__nrhs) * (double)(__n) * (double)(__n) )
84
#define FADDS_GETRS(__n, __nrhs) ((double)(__nrhs) * (double)(__n) * ((double)(__n) - 1. ))
85
86
#define FMULS_POTRF(__n) ((double)(__n) * (((1. / 6.) * (double)(__n) + 0.5) * (double)(__n) + (1. / 3.)))
87
#define FADDS_POTRF(__n) ((double)(__n) * (((1. / 6.) * (double)(__n) ) * (double)(__n) - (1. / 6.)))
88
89
#define FMULS_POTRI(__n) ( (double)(__n) * ((2. / 3.) + (double)(__n) * ((1. / 3.) * (double)(__n) + 1. )) )
90
#define FADDS_POTRI(__n) ( (double)(__n) * ((1. / 6.) + (double)(__n) * ((1. / 3.) * (double)(__n) - 0.5)) )
91
92
#define FMULS_POTRS(__n, __nrhs) ((double)(__nrhs) * (double)(__n) * ((double)(__n) + 1. ))
93
#define FADDS_POTRS(__n, __nrhs) ((double)(__nrhs) * (double)(__n) * ((double)(__n) - 1. ))
94
95
//SPBTRF
96
//SPBTRS
97
//SSYTRF
98
//SSYTRI
99
//SSYTRS
100
101
#define FMULS_GEQRF(__m, __n) (((__m) > (__n)) ? ((double)(__n) * ((double)(__n) * ( 0.5-(1./3.) * (double)(__n) + (double)(__m)) + (double)(__m) + 23. / 6.)) \
102
: ((double)(__m) * ((double)(__m) * ( -0.5-(1./3.) * (double)(__m) + (double)(__n)) + 2.*(double)(__n) + 23. / 6.)) )
103
#define FADDS_GEQRF(__m, __n) (((__m) > (__n)) ? ((double)(__n) * ((double)(__n) * ( 0.5-(1./3.) * (double)(__n) + (double)(__m)) + 5. / 6.)) \
104
: ((double)(__m) * ((double)(__m) * ( -0.5-(1./3.) * (double)(__m) + (double)(__n)) + (double)(__n) + 5. / 6.)) )
105
106
#define FMULS_GEQLF(__m, __n) FMULS_GEQRF(__m, __n)
107
#define FADDS_GEQLF(__m, __n) FADDS_GEQRF(__m, __n)
108
109
#define FMULS_GERQF(__m, __n) (((__m) > (__n)) ? ((double)(__n) * ((double)(__n) * ( 0.5-(1./3.) * (double)(__n) + (double)(__m)) + (double)(__m) + 29. / 6.)) \
110
: ((double)(__m) * ((double)(__m) * ( -0.5-(1./3.) * (double)(__m) + (double)(__n)) + 2.*(double)(__n) + 29. / 6.)) )
111
#define FADDS_GERQF(__m, __n) (((__m) > (__n)) ? ((double)(__n) * ((double)(__n) * ( -0.5-(1./3.) * (double)(__n) + (double)(__m)) + (double)(__m) + 5. / 6.)) \
112
: ((double)(__m) * ((double)(__m) * ( 0.5-(1./3.) * (double)(__m) + (double)(__n)) + + 5. / 6.)) )
113
114
#define FMULS_GELQF(__m, __n) FMULS_GERQF(__m, __n)
115
#define FADDS_GELQF(__m, __n) FADDS_GERQF(__m, __n)
116
117
#define FMULS_UNGQR(__m, __n, __k) ((double)(__k) * (2.* (double)(__m) * (double)(__n) + 2. * (double)(__n) - 5./3. + (double)(__k) * ( 2./3. * (double)(__k) - ((double)(__m) + (double)(__n)) - 1.)))
118
#define FADDS_UNGQR(__m, __n, __k) ((double)(__k) * (2.* (double)(__m) * (double)(__n) + (double)(__n) - (double)(__m) + 1./3. + (double)(__k) * ( 2./3. * (double)(__k) - ((double)(__m) + (double)(__n)) )))
119
#define FMULS_UNGQL FMULS_UNGQR
120
#define FMULS_ORGQR FMULS_UNGQR
121
#define FMULS_ORGQL FMULS_UNGQR
122
#define FADDS_UNGQL FADDS_UNGQR
123
#define FADDS_ORGQR FADDS_UNGQR
124
#define FADDS_ORGQL FADDS_UNGQR
125
126
#define FMULS_UNGRQ(__m, __n, __k) ((double)(__k) * (2.* (double)(__m) * (double)(__n) + (double)(__m) + (double)(__n) - 2./3. + (double)(__k) * ( 2./3. * (double)(__k) - ((double)(__m) + (double)(__n)) - 1.)))
127
#define FADDS_UNGRQ(__m, __n, __k) ((double)(__k) * (2.* (double)(__m) * (double)(__n) + (double)(__m) - (double)(__n) + 1./3. + (double)(__k) * ( 2./3. * (double)(__k) - ((double)(__m) + (double)(__n)) )))
128
#define FMULS_UNGLQ FMULS_UNGRQ
129
#define FMULS_ORGRQ FMULS_UNGRQ
130
#define FMULS_ORGLQ FMULS_UNGRQ
131
#define FADDS_UNGLQ FADDS_UNGRQ
132
#define FADDS_ORGRQ FADDS_UNGRQ
133
#define FADDS_ORGLQ FADDS_UNGRQ
134
135
#define FMULS_GEQRS(__m, __n, __nrhs) ((double)(__nrhs) * ((double)(__n) * ( 2.* (double)(__m) - 0.5 * (double)(__n) + 2.5)))
136
#define FADDS_GEQRS(__m, __n, __nrhs) ((double)(__nrhs) * ((double)(__n) * ( 2.* (double)(__m) - 0.5 * (double)(__n) + 0.5)))
137
138
//UNMQR, UNMLQ, UNMQL, UNMRQ (Left)
139
//UNMQR, UNMLQ, UNMQL, UNMRQ (Right)
140
141
#define FMULS_TRTRI(__n) ((double)(__n) * ((double)(__n) * ( 1./6. * (double)(__n) + 0.5 ) + 1./3.))
142
#define FADDS_TRTRI(__n) ((double)(__n) * ((double)(__n) * ( 1./6. * (double)(__n) - 0.5 ) + 1./3.))
143
144
#define FMULS_GEHRD(__n) ( (double)(__n) * ((double)(__n) * (5./3. *(double)(__n) + 0.5) - 7./6.) - 13. )
145
#define FADDS_GEHRD(__n) ( (double)(__n) * ((double)(__n) * (5./3. *(double)(__n) - 1. ) - 2./3.) - 8. )
146
147
#define FMULS_SYTRD(__n) ( (double)(__n) * ( (double)(__n) * ( 2./3. * (double)(__n) + 2.5 ) - 1./6. ) - 15.)
148
#define FADDS_SYTRD(__n) ( (double)(__n) * ( (double)(__n) * ( 2./3. * (double)(__n) + 1. ) - 8./3. ) - 4.)
149
#define FMULS_HETRD FMULS_SYTRD
150
#define FADDS_HETRD FADDS_SYTRD
151
152
#define FMULS_GEBRD(__m, __n) ( ((__m) >= (__n)) ? ((double)(__n) * ((double)(__n) * (2. * (double)(__m) - 2./3. * (double)(__n) + 2. ) + 20./3.)) \
153
: ((double)(__m) * ((double)(__m) * (2. * (double)(__n) - 2./3. * (double)(__m) + 2. ) + 20./3.)) )
154
#define FADDS_GEBRD(__m, __n) ( ((__m) >= (__n)) ? ((double)(__n) * ((double)(__n) * (2. * (double)(__m) - 2./3. * (double)(__n) + 1. ) - (double)(__m) + 5./3.)) \
155
: ((double)(__m) * ((double)(__m) * (2. * (double)(__n) - 2./3. * (double)(__m) + 1. ) - (double)(__n) + 5./3.)) )
156
157
158
/*******************************************************************************
159
* Users functions
160
******************************************************************************/
161
162
/*
163
* Level 2 BLAS
164
*/
165
#define FLOPS_ZGEMV(__m, __n) (6. * FMULS_GEMV((__m), (__n)) + 2.0 * FADDS_GEMV((__m), (__n)) )
166
#define FLOPS_CGEMV(__m, __n) (6. * FMULS_GEMV((__m), (__n)) + 2.0 * FADDS_GEMV((__m), (__n)) )
167
#define FLOPS_DGEMV(__m, __n) ( FMULS_GEMV((__m), (__n)) + FADDS_GEMV((__m), (__n)) )
168
#define FLOPS_SGEMV(__m, __n) ( FMULS_GEMV((__m), (__n)) + FADDS_GEMV((__m), (__n)) )
169
170
#define FLOPS_ZHEMV(__n) (6. * FMULS_HEMV((__n)) + 2.0 * FADDS_HEMV((__n)) )
171
#define FLOPS_CHEMV(__n) (6. * FMULS_HEMV((__n)) + 2.0 * FADDS_HEMV((__n)) )
172
173
#define FLOPS_ZSYMV(__n) (6. * FMULS_SYMV((__n)) + 2.0 * FADDS_SYMV((__n)) )
174
#define FLOPS_CSYMV(__n) (6. * FMULS_SYMV((__n)) + 2.0 * FADDS_SYMV((__n)) )
175
#define FLOPS_DSYMV(__n) ( FMULS_SYMV((__n)) + FADDS_SYMV((__n)) )
176
#define FLOPS_SSYMV(__n) ( FMULS_SYMV((__n)) + FADDS_SYMV((__n)) )
177
178
/*
179
* Level 3 BLAS
180
*/
181
#define FLOPS_ZGEMM(__m, __n, __k) (6. * FMULS_GEMM((__m), (__n), (__k)) + 2.0 * FADDS_GEMM((__m), (__n), (__k)) )
182
#define FLOPS_CGEMM(__m, __n, __k) (6. * FMULS_GEMM((__m), (__n), (__k)) + 2.0 * FADDS_GEMM((__m), (__n), (__k)) )
183
#define FLOPS_DGEMM(__m, __n, __k) ( FMULS_GEMM((__m), (__n), (__k)) + FADDS_GEMM((__m), (__n), (__k)) )
184
#define FLOPS_SGEMM(__m, __n, __k) ( FMULS_GEMM((__m), (__n), (__k)) + FADDS_GEMM((__m), (__n), (__k)) )
185
186
#define FLOPS_ZHEMM(__side, __m, __n) (6. * FMULS_HEMM(__side, (__m), (__n)) + 2.0 * FADDS_HEMM(__side, (__m), (__n)) )
187
#define FLOPS_CHEMM(__side, __m, __n) (6. * FMULS_HEMM(__side, (__m), (__n)) + 2.0 * FADDS_HEMM(__side, (__m), (__n)) )
188
189
#define FLOPS_ZSYMM(__side, __m, __n) (6. * FMULS_SYMM(__side, (__m), (__n)) + 2.0 * FADDS_SYMM(__side, (__m), (__n)) )
190
#define FLOPS_CSYMM(__side, __m, __n) (6. * FMULS_SYMM(__side, (__m), (__n)) + 2.0 * FADDS_SYMM(__side, (__m), (__n)) )
191
#define FLOPS_DSYMM(__side, __m, __n) ( FMULS_SYMM(__side, (__m), (__n)) + FADDS_SYMM(__side, (__m), (__n)) )
192
#define FLOPS_SSYMM(__side, __m, __n) ( FMULS_SYMM(__side, (__m), (__n)) + FADDS_SYMM(__side, (__m), (__n)) )
193
194
#define FLOPS_ZHERK(__k, __n) (6. * FMULS_HERK((__k), (__n)) + 2.0 * FADDS_HERK((__k), (__n)) )
195
#define FLOPS_CHERK(__k, __n) (6. * FMULS_HERK((__k), (__n)) + 2.0 * FADDS_HERK((__k), (__n)) )
196
197
#define FLOPS_ZSYRK(__k, __n) (6. * FMULS_SYRK((__k), (__n)) + 2.0 * FADDS_SYRK((__k), (__n)) )
198
#define FLOPS_CSYRK(__k, __n) (6. * FMULS_SYRK((__k), (__n)) + 2.0 * FADDS_SYRK((__k), (__n)) )
199
#define FLOPS_DSYRK(__k, __n) ( FMULS_SYRK((__k), (__n)) + FADDS_SYRK((__k), (__n)) )
200
#define FLOPS_SSYRK(__k, __n) ( FMULS_SYRK((__k), (__n)) + FADDS_SYRK((__k), (__n)) )
201
202
#define FLOPS_ZHER2K(__k, __n) (6. * FMULS_HER2K((__k), (__n)) + 2.0 * FADDS_HER2K((__k), (__n)) )
203
#define FLOPS_CHER2K(__k, __n) (6. * FMULS_HER2K((__k), (__n)) + 2.0 * FADDS_HER2K((__k), (__n)) )
204
205
#define FLOPS_ZSYR2K(__k, __n) (6. * FMULS_SYR2K((__k), (__n)) + 2.0 * FADDS_SYR2K((__k), (__n)) )
206
#define FLOPS_CSYR2K(__k, __n) (6. * FMULS_SYR2K((__k), (__n)) + 2.0 * FADDS_SYR2K((__k), (__n)) )
207
#define FLOPS_DSYR2K(__k, __n) ( FMULS_SYR2K((__k), (__n)) + FADDS_SYR2K((__k), (__n)) )
208
#define FLOPS_SSYR2K(__k, __n) ( FMULS_SYR2K((__k), (__n)) + FADDS_SYR2K((__k), (__n)) )
209
210
#define FLOPS_ZTRMM(__side, __m, __n) (6. * FMULS_TRMM(__side, (__m), (__n)) + 2.0 * FADDS_TRMM(__side, (__m), (__n)) )
211
#define FLOPS_CTRMM(__side, __m, __n) (6. * FMULS_TRMM(__side, (__m), (__n)) + 2.0 * FADDS_TRMM(__side, (__m), (__n)) )
212
#define FLOPS_DTRMM(__side, __m, __n) ( FMULS_TRMM(__side, (__m), (__n)) + FADDS_TRMM(__side, (__m), (__n)) )
213
#define FLOPS_STRMM(__side, __m, __n) ( FMULS_TRMM(__side, (__m), (__n)) + FADDS_TRMM(__side, (__m), (__n)) )
214
215
#define FLOPS_ZTRSM(__side, __m, __n) (6. * FMULS_TRSM(__side, (__m), (__n)) + 2.0 * FADDS_TRSM(__side, (__m), (__n)) )
216
#define FLOPS_CTRSM(__side, __m, __n) (6. * FMULS_TRSM(__side, (__m), (__n)) + 2.0 * FADDS_TRSM(__side, (__m), (__n)) )
217
#define FLOPS_DTRSM(__side, __m, __n) ( FMULS_TRSM(__side, (__m), (__n)) + FADDS_TRSM(__side, (__m), (__n)) )
218
#define FLOPS_STRSM(__side, __m, __n) ( FMULS_TRSM(__side, (__m), (__n)) + FADDS_TRSM(__side, (__m), (__n)) )
219
220
/*
221
* Lapack
222
*/
223
#define FLOPS_ZGETRF(__m, __n) (6. * FMULS_GETRF((__m), (__n)) + 2.0 * FADDS_GETRF((__m), (__n)) )
224
#define FLOPS_CGETRF(__m, __n) (6. * FMULS_GETRF((__m), (__n)) + 2.0 * FADDS_GETRF((__m), (__n)) )
225
#define FLOPS_DGETRF(__m, __n) ( FMULS_GETRF((__m), (__n)) + FADDS_GETRF((__m), (__n)) )
226
#define FLOPS_SGETRF(__m, __n) ( FMULS_GETRF((__m), (__n)) + FADDS_GETRF((__m), (__n)) )
227
228
#define FLOPS_ZGETRI(__n) (6. * FMULS_GETRI((__n)) + 2.0 * FADDS_GETRI((__n)) )
229
#define FLOPS_CGETRI(__n) (6. * FMULS_GETRI((__n)) + 2.0 * FADDS_GETRI((__n)) )
230
#define FLOPS_DGETRI(__n) ( FMULS_GETRI((__n)) + FADDS_GETRI((__n)) )
231
#define FLOPS_SGETRI(__n) ( FMULS_GETRI((__n)) + FADDS_GETRI((__n)) )
232
233
#define FLOPS_ZGETRS(__n, __nrhs) (6. * FMULS_GETRS((__n), (__nrhs)) + 2.0 * FADDS_GETRS((__n), (__nrhs)) )
234
#define FLOPS_CGETRS(__n, __nrhs) (6. * FMULS_GETRS((__n), (__nrhs)) + 2.0 * FADDS_GETRS((__n), (__nrhs)) )
235
#define FLOPS_DGETRS(__n, __nrhs) ( FMULS_GETRS((__n), (__nrhs)) + FADDS_GETRS((__n), (__nrhs)) )
236
#define FLOPS_SGETRS(__n, __nrhs) ( FMULS_GETRS((__n), (__nrhs)) + FADDS_GETRS((__n), (__nrhs)) )
237
238
#define FLOPS_ZPOTRF(__n) (6. * FMULS_POTRF((__n)) + 2.0 * FADDS_POTRF((__n)) )
239
#define FLOPS_CPOTRF(__n) (6. * FMULS_POTRF((__n)) + 2.0 * FADDS_POTRF((__n)) )
240
#define FLOPS_DPOTRF(__n) ( FMULS_POTRF((__n)) + FADDS_POTRF((__n)) )
241
#define FLOPS_SPOTRF(__n) ( FMULS_POTRF((__n)) + FADDS_POTRF((__n)) )
242
243
#define FLOPS_ZPOTRI(__n) (6. * FMULS_POTRI((__n)) + 2.0 * FADDS_POTRI((__n)) )
244
#define FLOPS_CPOTRI(__n) (6. * FMULS_POTRI((__n)) + 2.0 * FADDS_POTRI((__n)) )
245
#define FLOPS_DPOTRI(__n) ( FMULS_POTRI((__n)) + FADDS_POTRI((__n)) )
246
#define FLOPS_SPOTRI(__n) ( FMULS_POTRI((__n)) + FADDS_POTRI((__n)) )
247
248
#define FLOPS_ZPOTRS(__n, __nrhs) (6. * FMULS_POTRS((__n), (__nrhs)) + 2.0 * FADDS_POTRS((__n), (__nrhs)) )
249
#define FLOPS_CPOTRS(__n, __nrhs) (6. * FMULS_POTRS((__n), (__nrhs)) + 2.0 * FADDS_POTRS((__n), (__nrhs)) )
250
#define FLOPS_DPOTRS(__n, __nrhs) ( FMULS_POTRS((__n), (__nrhs)) + FADDS_POTRS((__n), (__nrhs)) )
251
#define FLOPS_SPOTRS(__n, __nrhs) ( FMULS_POTRS((__n), (__nrhs)) + FADDS_POTRS((__n), (__nrhs)) )
252
253
#define FLOPS_ZGEQRF(__m, __n) (6. * FMULS_GEQRF((__m), (__n)) + 2.0 * FADDS_GEQRF((__m), (__n)) )
254
#define FLOPS_CGEQRF(__m, __n) (6. * FMULS_GEQRF((__m), (__n)) + 2.0 * FADDS_GEQRF((__m), (__n)) )
255
#define FLOPS_DGEQRF(__m, __n) ( FMULS_GEQRF((__m), (__n)) + FADDS_GEQRF((__m), (__n)) )
256
#define FLOPS_SGEQRF(__m, __n) ( FMULS_GEQRF((__m), (__n)) + FADDS_GEQRF((__m), (__n)) )
257
258
#define FLOPS_ZGEQLF(__m, __n) (6. * FMULS_GEQLF((__m), (__n)) + 2.0 * FADDS_GEQLF((__m), (__n)) )
259
#define FLOPS_CGEQLF(__m, __n) (6. * FMULS_GEQLF((__m), (__n)) + 2.0 * FADDS_GEQLF((__m), (__n)) )
260
#define FLOPS_DGEQLF(__m, __n) ( FMULS_GEQLF((__m), (__n)) + FADDS_GEQLF((__m), (__n)) )
261
#define FLOPS_SGEQLF(__m, __n) ( FMULS_GEQLF((__m), (__n)) + FADDS_GEQLF((__m), (__n)) )
262
263
#define FLOPS_ZGERQF(__m, __n) (6. * FMULS_GERQF((__m), (__n)) + 2.0 * FADDS_GERQF((__m), (__n)) )
264
#define FLOPS_CGERQF(__m, __n) (6. * FMULS_GERQF((__m), (__n)) + 2.0 * FADDS_GERQF((__m), (__n)) )
265
#define FLOPS_DGERQF(__m, __n) ( FMULS_GERQF((__m), (__n)) + FADDS_GERQF((__m), (__n)) )
266
#define FLOPS_SGERQF(__m, __n) ( FMULS_GERQF((__m), (__n)) + FADDS_GERQF((__m), (__n)) )
267
268
#define FLOPS_ZGELQF(__m, __n) (6. * FMULS_GELQF((__m), (__n)) + 2.0 * FADDS_GELQF((__m), (__n)) )
269
#define FLOPS_CGELQF(__m, __n) (6. * FMULS_GELQF((__m), (__n)) + 2.0 * FADDS_GELQF((__m), (__n)) )
270
#define FLOPS_DGELQF(__m, __n) ( FMULS_GELQF((__m), (__n)) + FADDS_GELQF((__m), (__n)) )
271
#define FLOPS_SGELQF(__m, __n) ( FMULS_GELQF((__m), (__n)) + FADDS_GELQF((__m), (__n)) )
272
273
#define FLOPS_ZUNGQR(__m, __n, __k) (6. * FMULS_UNGQR((__m), (__n), (__k)) + 2.0 * FADDS_UNGQR((__m), (__n), (__k)) )
274
#define FLOPS_CUNGQR(__m, __n, __k) (6. * FMULS_UNGQR((__m), (__n), (__k)) + 2.0 * FADDS_UNGQR((__m), (__n), (__k)) )
275
#define FLOPS_DUNGQR(__m, __n, __k) ( FMULS_UNGQR((__m), (__n), (__k)) + FADDS_UNGQR((__m), (__n), (__k)) )
276
#define FLOPS_SUNGQR(__m, __n, __k) ( FMULS_UNGQR((__m), (__n), (__k)) + FADDS_UNGQR((__m), (__n), (__k)) )
277
278
#define FLOPS_ZUNGQL(__m, __n, __k) (6. * FMULS_UNGQL((__m), (__n), (__k)) + 2.0 * FADDS_UNGQL((__m), (__n), (__k)) )
279
#define FLOPS_CUNGQL(__m, __n, __k) (6. * FMULS_UNGQL((__m), (__n), (__k)) + 2.0 * FADDS_UNGQL((__m), (__n), (__k)) )
280
#define FLOPS_DUNGQL(__m, __n, __k) ( FMULS_UNGQL((__m), (__n), (__k)) + FADDS_UNGQL((__m), (__n), (__k)) )
281
#define FLOPS_SUNGQL(__m, __n, __k) ( FMULS_UNGQL((__m), (__n), (__k)) + FADDS_UNGQL((__m), (__n), (__k)) )
282
283
#define FLOPS_ZORGQR(__m, __n, __k) (6. * FMULS_ORGQR((__m), (__n), (__k)) + 2.0 * FADDS_ORGQR((__m), (__n), (__k)) )
284
#define FLOPS_CORGQR(__m, __n, __k) (6. * FMULS_ORGQR((__m), (__n), (__k)) + 2.0 * FADDS_ORGQR((__m), (__n), (__k)) )
285
#define FLOPS_DORGQR(__m, __n, __k) ( FMULS_ORGQR((__m), (__n), (__k)) + FADDS_ORGQR((__m), (__n), (__k)) )
286
#define FLOPS_SORGQR(__m, __n, __k) ( FMULS_ORGQR((__m), (__n), (__k)) + FADDS_ORGQR((__m), (__n), (__k)) )
287
288
#define FLOPS_ZORGQL(__m, __n, __k) (6. * FMULS_ORGQL((__m), (__n), (__k)) + 2.0 * FADDS_ORGQL((__m), (__n), (__k)) )
289
#define FLOPS_CORGQL(__m, __n, __k) (6. * FMULS_ORGQL((__m), (__n), (__k)) + 2.0 * FADDS_ORGQL((__m), (__n), (__k)) )
290
#define FLOPS_DORGQL(__m, __n, __k) ( FMULS_ORGQL((__m), (__n), (__k)) + FADDS_ORGQL((__m), (__n), (__k)) )
291
#define FLOPS_SORGQL(__m, __n, __k) ( FMULS_ORGQL((__m), (__n), (__k)) + FADDS_ORGQL((__m), (__n), (__k)) )
292
293
#define FLOPS_ZUNGRQ(__m, __n, __k) (6. * FMULS_UNGRQ((__m), (__n), (__k)) + 2.0 * FADDS_UNGRQ((__m), (__n), (__k)) )
294
#define FLOPS_CUNGRQ(__m, __n, __k) (6. * FMULS_UNGRQ((__m), (__n), (__k)) + 2.0 * FADDS_UNGRQ((__m), (__n), (__k)) )
295
#define FLOPS_DUNGRQ(__m, __n, __k) ( FMULS_UNGRQ((__m), (__n), (__k)) + FADDS_UNGRQ((__m), (__n), (__k)) )
296
#define FLOPS_SUNGRQ(__m, __n, __k) ( FMULS_UNGRQ((__m), (__n), (__k)) + FADDS_UNGRQ((__m), (__n), (__k)) )
297
298
#define FLOPS_ZUNGLQ(__m, __n, __k) (6. * FMULS_UNGLQ((__m), (__n), (__k)) + 2.0 * FADDS_UNGLQ((__m), (__n), (__k)) )
299
#define FLOPS_CUNGLQ(__m, __n, __k) (6. * FMULS_UNGLQ((__m), (__n), (__k)) + 2.0 * FADDS_UNGLQ((__m), (__n), (__k)) )
300
#define FLOPS_DUNGLQ(__m, __n, __k) ( FMULS_UNGLQ((__m), (__n), (__k)) + FADDS_UNGLQ((__m), (__n), (__k)) )
301
#define FLOPS_SUNGLQ(__m, __n, __k) ( FMULS_UNGLQ((__m), (__n), (__k)) + FADDS_UNGLQ((__m), (__n), (__k)) )
302
303
#define FLOPS_ZORGRQ(__m, __n, __k) (6. * FMULS_ORGRQ((__m), (__n), (__k)) + 2.0 * FADDS_ORGRQ((__m), (__n), (__k)) )
304
#define FLOPS_CORGRQ(__m, __n, __k) (6. * FMULS_ORGRQ((__m), (__n), (__k)) + 2.0 * FADDS_ORGRQ((__m), (__n), (__k)) )
305
#define FLOPS_DORGRQ(__m, __n, __k) ( FMULS_ORGRQ((__m), (__n), (__k)) + FADDS_ORGRQ((__m), (__n), (__k)) )
306
#define FLOPS_SORGRQ(__m, __n, __k) ( FMULS_ORGRQ((__m), (__n), (__k)) + FADDS_ORGRQ((__m), (__n), (__k)) )
307
308
#define FLOPS_ZORGLQ(__m, __n, __k) (6. * FMULS_ORGLQ((__m), (__n), (__k)) + 2.0 * FADDS_ORGLQ((__m), (__n), (__k)) )
309
#define FLOPS_CORGLQ(__m, __n, __k) (6. * FMULS_ORGLQ((__m), (__n), (__k)) + 2.0 * FADDS_ORGLQ((__m), (__n), (__k)) )
310
#define FLOPS_DORGLQ(__m, __n, __k) ( FMULS_ORGLQ((__m), (__n), (__k)) + FADDS_ORGLQ((__m), (__n), (__k)) )
311
#define FLOPS_SORGLQ(__m, __n, __k) ( FMULS_ORGLQ((__m), (__n), (__k)) + FADDS_ORGLQ((__m), (__n), (__k)) )
312
313
#define FLOPS_ZGEQRS(__m, __n, __nrhs) (6. * FMULS_GEQRS((__m), (__n), (__nrhs)) + 2.0 * FADDS_GEQRS((__m), (__n), (__nrhs)) )
314
#define FLOPS_CGEQRS(__m, __n, __nrhs) (6. * FMULS_GEQRS((__m), (__n), (__nrhs)) + 2.0 * FADDS_GEQRS((__m), (__n), (__nrhs)) )
315
#define FLOPS_DGEQRS(__m, __n, __nrhs) ( FMULS_GEQRS((__m), (__n), (__nrhs)) + FADDS_GEQRS((__m), (__n), (__nrhs)) )
316
#define FLOPS_SGEQRS(__m, __n, __nrhs) ( FMULS_GEQRS((__m), (__n), (__nrhs)) + FADDS_GEQRS((__m), (__n), (__nrhs)) )
317
318
#define FLOPS_ZTRTRI(__n) (6. * FMULS_TRTRI((__n)) + 2.0 * FADDS_TRTRI((__n)) )
319
#define FLOPS_CTRTRI(__n) (6. * FMULS_TRTRI((__n)) + 2.0 * FADDS_TRTRI((__n)) )
320
#define FLOPS_DTRTRI(__n) ( FMULS_TRTRI((__n)) + FADDS_TRTRI((__n)) )
321
#define FLOPS_STRTRI(__n) ( FMULS_TRTRI((__n)) + FADDS_TRTRI((__n)) )
322
323
#define FLOPS_ZGEHRD(__n) (6. * FMULS_GEHRD((__n)) + 2.0 * FADDS_GEHRD((__n)) )
324
#define FLOPS_CGEHRD(__n) (6. * FMULS_GEHRD((__n)) + 2.0 * FADDS_GEHRD((__n)) )
325
#define FLOPS_DGEHRD(__n) ( FMULS_GEHRD((__n)) + FADDS_GEHRD((__n)) )
326
#define FLOPS_SGEHRD(__n) ( FMULS_GEHRD((__n)) + FADDS_GEHRD((__n)) )
327
328
#define FLOPS_ZHETRD(__n) (6. * FMULS_HETRD((__n)) + 2.0 * FADDS_HETRD((__n)) )
329
#define FLOPS_CHETRD(__n) (6. * FMULS_HETRD((__n)) + 2.0 * FADDS_HETRD((__n)) )
330
331
#define FLOPS_ZSYTRD(__n) (6. * FMULS_SYTRD((__n)) + 2.0 * FADDS_SYTRD((__n)) )
332
#define FLOPS_CSYTRD(__n) (6. * FMULS_SYTRD((__n)) + 2.0 * FADDS_SYTRD((__n)) )
333
#define FLOPS_DSYTRD(__n) ( FMULS_SYTRD((__n)) + FADDS_SYTRD((__n)) )
334
#define FLOPS_SSYTRD(__n) ( FMULS_SYTRD((__n)) + FADDS_SYTRD((__n)) )
335
336
#define FLOPS_ZGEBRD(__m, __n) (6. * FMULS_GEBRD((__m), (__n)) + 2.0 * FADDS_GEBRD((__m), (__n)) )
337
#define FLOPS_CGEBRD(__m, __n) (6. * FMULS_GEBRD((__m), (__n)) + 2.0 * FADDS_GEBRD((__m), (__n)) )
338
#define FLOPS_DGEBRD(__m, __n) ( FMULS_GEBRD((__m), (__n)) + FADDS_GEBRD((__m), (__n)) )
339
#define FLOPS_SGEBRD(__m, __n) ( FMULS_GEBRD((__m), (__n)) + FADDS_GEBRD((__m), (__n)) )
340
341
#endif
/* _FLOPS_H_ */
plasma_2.4.5
timing
flops.h
Generated on Mon Jul 9 2012 12:45:07 for PLASMA by
1.8.1