PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
time_zgetrf_reclap.c
Go to the documentation of this file.
1 
6 #define _TYPE PLASMA_Complex64_t
7 #define _PREC double
8 #define _LAMCH LAPACKE_dlamch_work
9 
10 #define _NAME "PLASMA_zgetrf_reclap"
11 /* See Lawn 41 page 120 */
12 #define _FMULS FMULS_GETRF(M, NRHS)
13 #define _FADDS FADDS_GETRF(M, NRHS)
14 
15 #include "../control/common.h"
16 #include "./timing.c"
17 
18 void CORE_zgetrf_reclap_init(void);
20 
21 /*
22  * WARNING: the check is only working with LAPACK Netlib
23  * which choose the same pivot than this code.
24  * MKL has a different code and can pick a different pivot
25  * if two elments have the same absolute value but not the
26  * same sign for example.
27  */
28 
29 static int
30 RunTest(int *iparam, double *dparam, real_Double_t *t_)
31 {
32  PASTE_CODE_IPARAM_LOCALS( iparam );
35  PLASMA_sequence *sequence = NULL;
37 
38  /* Allocate Data */
40  PASTE_CODE_ALLOCATE_MATRIX( ipiv, 1, int, max(M, NRHS), 1 );
41 
42  /* Initialiaze Data */
43  PLASMA_zplrnt(M, NRHS, A, LDA, 3456);
44 
45  /* Save A in lapack layout for check */
46  PASTE_CODE_ALLOCATE_COPY( A2, check, PLASMA_Complex64_t, A, LDA, NRHS );
47  PASTE_CODE_ALLOCATE_MATRIX( ipiv2, check, int, max(M, NRHS), 1 );
48  if ( check ) {
49  LAPACKE_zgetrf_work(LAPACK_COL_MAJOR, M, NRHS, A2, LDA, ipiv2 );
50  }
51 
52  plasma = plasma_context_self();
53  PLASMA_Sequence_Create(&sequence);
54  QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
56 
59 
60  START_TIMING();
61  QUARK_CORE_zgetrf_reclap(plasma->quark, &task_flags,
62  M, NRHS, NRHS,
63  A, LDA, ipiv,
64  sequence, &request,
65  0, 0,
66  iparam[IPARAM_THRDNBR]);
67  PLASMA_Sequence_Wait(sequence);
68  STOP_TIMING();
69 
70  PLASMA_Sequence_Destroy(sequence);
71 
72  /* Check the solution */
73  if ( check )
74  {
75  int64_t i;
76  double *work = (double *)malloc(max(M, NRHS)*sizeof(double));
77 
78  /* Check ipiv */
79  for(i=0; i<NRHS; i++)
80  {
81  if( ipiv[i] != ipiv2[i] ) {
82  fprintf(stderr, "\nPLASMA (ipiv[%ld] = %d, A[%ld] = %e) / LAPACK (ipiv[%ld] = %d, A[%ld] = [%e])\n",
83  i, ipiv[i], i, creal(A[ i * LDA + i ]),
84  i, ipiv2[i], i, creal(A2[ i * LDA + i ]));
85  break;
86  }
87  }
88 
89  dparam[IPARAM_ANORM] = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaMaxNorm),
90  M, NRHS, A, LDA, work);
91  dparam[IPARAM_XNORM] = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaMaxNorm),
92  M, NRHS, A2, LDA, work);
93  dparam[IPARAM_BNORM] = 0.0;
94 
95  CORE_zgeadd( M, NRHS, -1.0, A, LDA, A2, LDA);
96 
97  dparam[IPARAM_RES] = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaMaxNorm),
98  M, NRHS, A2, LDA, work);
99 
100  free( A2 );
101  free( ipiv2 );
102  free( work );
103  }
104 
105  free( A );
106  free( ipiv );
107 
108  return 0;
109 }