PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
time_zpotri_tile.c
Go to the documentation of this file.
1 
6 #define _TYPE PLASMA_Complex64_t
7 #define _PREC double
8 #define _LAMCH LAPACKE_dlamch_work
9 
10 #define _NAME "PLASMA_zpotri_Tile"
11 /* See Lawn 41 page 120 */
12 #define _FMULS (FMULS_POTRF( N ) + FMULS_POTRI( N ))
13 #define _FADDS (FADDS_POTRF( N ) + FADDS_POTRI( N ))
14 
15 //#define POTRI_SYNC
16 
17 #include "./timing.c"
18 
19 static int
20 RunTest(int *iparam, double *dparam, real_Double_t *t_)
21 {
22  PASTE_CODE_IPARAM_LOCALS( iparam );
24 
25  LDA = max(LDA, N);
26 
27  /* Allocate Data */
29 
30  /*
31  * Initialize Data
32  * It's done in static to avoid having the same sequence than one
33  * the function we want to trace
34  */
35  PLASMA_zplghe_Tile( (double)N, descA, 51 );
36 
37  /* PLASMA ZPOTRF / ZTRTRI / ZLAUUM */
38  /*
39  * Example of the different way to combine several asynchonous calls
40  */
41 #if defined(TRACE_BY_SEQUENCE)
42  {
43  PLASMA_sequence *sequence[3];
46  PLASMA_REQUEST_INITIALIZER };
47 
48  PLASMA_Sequence_Create(&sequence[0]);
49  PLASMA_Sequence_Create(&sequence[1]);
50  PLASMA_Sequence_Create(&sequence[2]);
51 
52  if ( ! iparam[IPARAM_ASYNC] ) {
53  START_TIMING();
54 
55  PLASMA_zpotrf_Tile_Async(uplo, descA, sequence[0], &request[0]);
56  PLASMA_Sequence_Wait(sequence[0]);
57 
58  PLASMA_ztrtri_Tile_Async(uplo, PlasmaNonUnit, descA, sequence[1], &request[1]);
59  PLASMA_Sequence_Wait(sequence[1]);
60 
61  PLASMA_zlauum_Tile_Async(uplo, descA, sequence[2], &request[2]);
62  PLASMA_Sequence_Wait(sequence[2]);
63  STOP_TIMING();
64 
65  } else {
66 
67  START_TIMING();
68  PLASMA_zpotrf_Tile_Async(uplo, descA, sequence[0], &request[0]);
69  PLASMA_ztrtri_Tile_Async(uplo, PlasmaNonUnit, descA, sequence[1], &request[1]);
70  PLASMA_zlauum_Tile_Async(uplo, descA, sequence[2], &request[2]);
71 
72  PLASMA_Sequence_Wait(sequence[0]);
73  PLASMA_Sequence_Wait(sequence[1]);
74  PLASMA_Sequence_Wait(sequence[2]);
75  STOP_TIMING();
76  }
77 
78  PLASMA_Sequence_Destroy(sequence[0]);
79  PLASMA_Sequence_Destroy(sequence[1]);
80  PLASMA_Sequence_Destroy(sequence[2]);
81  }
82 #else
83  {
84  if ( ! iparam[IPARAM_ASYNC] ) {
85 
86  START_TIMING();
87  PLASMA_zpotrf_Tile(uplo, descA);
88  PLASMA_ztrtri_Tile(uplo, PlasmaNonUnit, descA);
89  PLASMA_zlauum_Tile(uplo, descA);
90  STOP_TIMING();
91 
92  } else {
93 
94  /* Default: we use Asynchonous call with only one sequence */
95  PLASMA_sequence *sequence;
98 
99  START_TIMING();
100  PLASMA_Sequence_Create(&sequence);
101  PLASMA_zpotrf_Tile_Async(uplo, descA, sequence, &request[0]);
102  PLASMA_zpotri_Tile_Async(uplo, descA, sequence, &request[1]);
103  PLASMA_Sequence_Wait(sequence);
104  STOP_TIMING();
105 
106  PLASMA_Sequence_Destroy(sequence);
107  }
108  }
109 #endif
110 
111  /* Check the solution */
112  if ( check )
113  {
114  dparam[IPARAM_ANORM] = 0.0;
115  dparam[IPARAM_XNORM] = 0.0;
116  dparam[IPARAM_BNORM] = 0.0;
117  dparam[IPARAM_RES] = 0.0;
118  }
119 
120  PASTE_CODE_FREE_MATRIX( descA );
121 
122  return 0;
123 }