PLASMA
2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
Main Page
Modules
Namespaces
Data Structures
Files
File List
Globals
All
Data Structures
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Macros
Groups
time_cgetrf_reclap.c
Go to the documentation of this file.
1
6
#define _TYPE PLASMA_Complex32_t
7
#define _PREC float
8
#define _LAMCH LAPACKE_slamch_work
9
10
#define _NAME "PLASMA_cgetrf_reclap"
11
/* See Lawn 41 page 120 */
12
#define _FMULS FMULS_GETRF(M, NRHS)
13
#define _FADDS FADDS_GETRF(M, NRHS)
14
15
#include "../control/common.h"
16
#include "
./timing.c
"
17
18
void
CORE_cgetrf_reclap_init
(
void
);
19
extern
plasma_context_t
*
plasma_context_self
(
void
);
20
21
/*
22
* WARNING: the check is only working with LAPACK Netlib
23
* which choose the same pivot than this code.
24
* MKL has a different code and can pick a different pivot
25
* if two elments have the same absolute value but not the
26
* same sign for example.
27
*/
28
29
static
int
30
RunTest(
int
*iparam,
float
*dparam,
real_Double_t
*t_)
31
{
32
PASTE_CODE_IPARAM_LOCALS
( iparam );
33
plasma_context_t
*
plasma
;
34
Quark_Task_Flags
task_flags =
Quark_Task_Flags_Initializer
;
35
PLASMA_sequence
*sequence = NULL;
36
PLASMA_request
request =
PLASMA_REQUEST_INITIALIZER
;
37
38
/* Allocate Data */
39
PASTE_CODE_ALLOCATE_MATRIX
(
A
, 1,
PLASMA_Complex32_t
, LDA, NRHS );
40
PASTE_CODE_ALLOCATE_MATRIX
( ipiv, 1,
int
,
max
(M, NRHS), 1 );
41
42
/* Initialiaze Data */
43
PLASMA_cplrnt
(M, NRHS,
A
, LDA, 3456);
44
45
/* Save A in lapack layout for check */
46
PASTE_CODE_ALLOCATE_COPY
( A2, check,
PLASMA_Complex32_t
,
A
, LDA, NRHS );
47
PASTE_CODE_ALLOCATE_MATRIX
( ipiv2, check,
int
,
max
(M, NRHS), 1 );
48
if
( check ) {
49
LAPACKE_cgetrf_work(LAPACK_COL_MAJOR, M, NRHS, A2, LDA, ipiv2 );
50
}
51
52
plasma =
plasma_context_self
();
53
PLASMA_Sequence_Create
(&sequence);
54
QUARK_Task_Flag_Set
(&task_flags,
TASK_SEQUENCE
, (intptr_t)sequence->
quark_sequence
);
55
QUARK_Task_Flag_Set
(&task_flags,
TASK_THREAD_COUNT
, iparam[
IPARAM_THRDNBR
] );
56
57
plasma_dynamic_spawn
();
58
CORE_cgetrf_reclap_init
();
59
60
START_TIMING
();
61
QUARK_CORE_cgetrf_reclap
(plasma->
quark
, &task_flags,
62
M, NRHS, NRHS,
63
A
, LDA, ipiv,
64
sequence, &request,
65
0, 0,
66
iparam[
IPARAM_THRDNBR
]);
67
PLASMA_Sequence_Wait
(sequence);
68
STOP_TIMING
();
69
70
PLASMA_Sequence_Destroy
(sequence);
71
72
/* Check the solution */
73
if
( check )
74
{
75
int64_t i;
76
float
*work = (
float
*)malloc(
max
(M, NRHS)*
sizeof
(float));
77
78
/* Check ipiv */
79
for
(i=0; i<NRHS; i++)
80
{
81
if
( ipiv[i] != ipiv2[i] ) {
82
fprintf(stderr,
"\nPLASMA (ipiv[%ld] = %d, A[%ld] = %e) / LAPACK (ipiv[%ld] = %d, A[%ld] = [%e])\n"
,
83
i, ipiv[i], i, crealf(
A
[ i * LDA + i ]),
84
i, ipiv2[i], i, crealf(A2[ i * LDA + i ]));
85
break
;
86
}
87
}
88
89
dparam[
IPARAM_ANORM
] = LAPACKE_clange_work(LAPACK_COL_MAJOR,
lapack_const
(
PlasmaMaxNorm
),
90
M, NRHS,
A
, LDA, work);
91
dparam[
IPARAM_XNORM
] = LAPACKE_clange_work(LAPACK_COL_MAJOR,
lapack_const
(
PlasmaMaxNorm
),
92
M, NRHS, A2, LDA, work);
93
dparam[
IPARAM_BNORM
] = 0.0;
94
95
CORE_cgeadd
( M, NRHS, -1.0,
A
, LDA, A2, LDA);
96
97
dparam[
IPARAM_RES
] = LAPACKE_clange_work(LAPACK_COL_MAJOR,
lapack_const
(
PlasmaMaxNorm
),
98
M, NRHS, A2, LDA, work);
99
100
free( A2 );
101
free( ipiv2 );
102
free( work );
103
}
104
105
free(
A
);
106
free( ipiv );
107
108
return
0;
109
}
plasma_2.4.5
timing
time_cgetrf_reclap.c
Generated on Mon Jul 9 2012 12:45:07 for PLASMA by
1.8.1