PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
testing_zcgels.c
Go to the documentation of this file.
1 
17 #include <stdlib.h>
18 #include <stdio.h>
19 #include <string.h>
20 #include <math.h>
21 
22 #include <plasma.h>
23 #include <cblas.h>
24 #include <lapacke.h>
25 #include <core_blas.h>
26 
27 #ifndef max
28 #define max(a, b) ((a) > (b) ? (a) : (b))
29 #endif
30 #ifndef min
31 #define min(a, b) ((a) < (b) ? (a) : (b))
32 #endif
33 
34 int check_orthogonality(int, int, int, PLASMA_Complex64_t*, double);
36 int check_solution(int, int, int, PLASMA_Complex64_t*, int, PLASMA_Complex64_t*, PLASMA_Complex64_t*, int, double);
37 
38 int IONE=1;
39 int ISEED[4] = {0,0,0,1}; /* initial seed for zlarnv() */
40 
41 int main (int argc, char **argv)
42 {
43  /* Check for number of arguments*/
44  if ( argc != 7){
45  printf(" Proper Usage is : ./testing_zcgels ncores M N LDA NRHS LDB with \n - ncores : number of cores \n - M : number of rows of the matrix A \n - N : number of columns of the matrix A \n - LDA : leading dimension of the matrix A \n - NRHS : number of RHS \n - LDB : leading dimension of the matrix B\n");
46  exit(1);
47  }
48 
49  int cores = atoi(argv[1]);
50  int M = atoi(argv[2]);
51  int N = atoi(argv[3]);
52  int LDA = atoi(argv[4]);
53  int NRHS = atoi(argv[5]);
54  int LDB = atoi(argv[6]);
55  int LDX = LDB;
56  int ITER;
57 
58  int K = min(M, N);
59  double eps;
60  int info, info_solution;
61  /* int info_ortho, info_factorization; */
62  int i,j;
63  int LDAxN = LDA*N;
64  int LDBxNRHS = LDB*NRHS;
65 
66  PLASMA_Complex64_t *A1 = (PLASMA_Complex64_t *)malloc(LDA*N*sizeof(PLASMA_Complex64_t));
67  PLASMA_Complex64_t *A2 = (PLASMA_Complex64_t *)malloc(LDA*N*sizeof(PLASMA_Complex64_t));
68  PLASMA_Complex64_t *B1 = (PLASMA_Complex64_t *)malloc(LDB*NRHS*sizeof(PLASMA_Complex64_t));
69  PLASMA_Complex64_t *B2 = (PLASMA_Complex64_t *)malloc(LDB*NRHS*sizeof(PLASMA_Complex64_t));
70  PLASMA_Complex64_t *X = (PLASMA_Complex64_t *)malloc(LDX*NRHS*sizeof(PLASMA_Complex64_t));
71  PLASMA_Complex64_t *Q = (PLASMA_Complex64_t *)malloc(LDA*N*sizeof(PLASMA_Complex64_t));
72 
73  /* Check if unable to allocate memory */
74  if ((!A1)||(!A2)||(!B1)||(!B2)||(!Q)||(!X)){
75  printf("Out of Memory \n ");
76  exit(0);
77  }
78 
79  /* Plasma Initialization */
80  PLASMA_Init(cores);
81 
82  /*
83  PLASMA_Disable(PLASMA_AUTOTUNING);
84  PLASMA_Set(PLASMA_TILE_SIZE, 6);
85  PLASMA_Set(PLASMA_INNER_BLOCK_SIZE, 3);
86  */
87 
88  eps = LAPACKE_dlamch_work('e');
89 
90  /*----------------------------------------------------------
91  * TESTING ZCGELS
92  */
93 
94  /* Initialize A1 and A2 */
95  LAPACKE_zlarnv_work(IONE, ISEED, LDAxN, A1);
96  for (i = 0; i < M; i++)
97  for (j = 0; j < N; j++)
98  A2[LDA*j+i] = A1[LDA*j+i] ;
99 
100  /* Initialize B1 and B2 */
101  LAPACKE_zlarnv_work(IONE, ISEED, LDBxNRHS, B1);
102  for (i = 0; i < M; i++)
103  for (j = 0; j < NRHS; j++)
104  B2[LDB*j+i] = B1[LDB*j+i] ;
105 
106  for (i = 0; i < K; i++)
107  Q[LDA*i+i] = 1.0;
108 
109  printf("\n");
110  printf("------ TESTS FOR PLASMA ZCGELS ROUTINE ------- \n");
111  printf(" Size of the Matrix %d by %d\n", M, N);
112  printf("\n");
113  printf(" The matrix A is randomly generated for each test.\n");
114  printf("============\n");
115  printf(" The relative machine precision (eps) is to be %e \n",eps);
116  printf(" Computational tests pass if scaled residuals are less than 60.\n");
117 
118  /* PLASMA ZGELS */
119  info = PLASMA_zcgels(PlasmaNoTrans, M, N, NRHS, A2, LDA, B2, LDB, X, LDX, &ITER);
120  if (info != PLASMA_SUCCESS ) {
121  printf("PLASMA_zcgels is not completed: info = %d\n", info);
122  info_solution = 1;
123  } else {
124  printf(" Solution obtained with %d iterations\n", ITER);
125 
126  /* PLASMA ZGELS */
127  // if (M >= N)
128  // /* Building the economy-size Q */
129  // PLASMA_zungqr(M, N, K, A2, LDA, T, Q, LDA);
130  //else
131  // /* Building the economy-size Q */
132  // PLASMA_zunglq(M, N, K, A2, LDA, T, Q, LDA);
133  /* Check the orthogonality, factorization and the solution */
134  //info_ortho = check_orthogonality(M, N, LDA, Q, eps);
135  //info_factorization = check_factorization(M, N, A1, A2, LDA, Q, eps);
136  info_solution = check_solution(M, N, NRHS, A1, LDA, B1, X, LDB, eps);
137  }
138 
139  //if ((info_solution == 0)&(info_factorization == 0)&(info_ortho == 0)) {
140  if (info_solution == 0) {
141  printf("***************************************************\n");
142  printf(" ---- TESTING ZCGELS ..................... PASSED !\n");
143  printf("***************************************************\n");
144  }
145  else {
146  printf("************************************************\n");
147  printf(" - TESTING ZCGELS .. FAILED !\n");
148  printf("************************************************\n");
149  }
150 
151 
152  free(A1); free(A2); free(B1); free(B2); free(X); free(Q);
153 
154  PLASMA_Finalize();
155 
156  exit(0);
157 }
158 
159 /*-------------------------------------------------------------------
160  * Check the orthogonality of Q
161  */
162 
163 int check_orthogonality(int M, int N, int LDQ, PLASMA_Complex64_t *Q, double eps)
164 {
165  double alpha, beta;
166  double normQ;
167  int info_ortho;
168  int i;
169  int minMN = min(M, N);
170 
171  double *work = (double *)malloc(minMN*sizeof(double));
172 
173  alpha = 1.0;
174  beta = -1.0;
175 
176  /* Build the idendity matrix USE DLASET?*/
177  PLASMA_Complex64_t *Id = (PLASMA_Complex64_t *) malloc(minMN*minMN*sizeof(PLASMA_Complex64_t));
178  memset((void*)Id, 0, minMN*minMN*sizeof(PLASMA_Complex64_t));
179  for (i = 0; i < minMN; i++)
180  Id[i*minMN+i] = (PLASMA_Complex64_t)1.0;
181 
182  /* Perform Id - Q'Q */
183  if (M >= N)
184  cblas_zherk(CblasColMajor, CblasUpper, CblasConjTrans, N, M, alpha, Q, LDQ, beta, Id, N);
185  else
186  cblas_zherk(CblasColMajor, CblasUpper, CblasNoTrans, M, N, alpha, Q, LDQ, beta, Id, M);
187 
188  normQ = LAPACKE_zlansy_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), 'u', minMN, Id, minMN, work);
189 
190  printf("============\n");
191  printf("Checking the orthogonality of Q \n");
192  printf("||Id-Q'*Q||_oo / (N*eps) = %e \n",normQ/(minMN*eps));
193 
194  if ( isnan(normQ / (minMN * eps)) || isinf(normQ / (minMN * eps)) || (normQ / (minMN * eps) > 60.0) ) {
195  printf("-- Orthogonality is suspicious ! \n");
196  info_ortho=1;
197  }
198  else {
199  printf("-- Orthogonality is CORRECT ! \n");
200  info_ortho=0;
201  }
202 
203  free(work); free(Id);
204 
205  return info_ortho;
206 }
207 
208 /*------------------------------------------------------------
209  * Check the factorization QR
210  */
211 
212 int check_factorization(int M, int N, PLASMA_Complex64_t *A1, PLASMA_Complex64_t *A2, int LDA, PLASMA_Complex64_t *Q, double eps )
213 {
214  double Anorm, Rnorm;
215  PLASMA_Complex64_t alpha, beta;
216  int info_factorization;
217  int i,j;
218 
219  PLASMA_Complex64_t *Ql = (PLASMA_Complex64_t *)malloc(M*N*sizeof(PLASMA_Complex64_t));
220  PLASMA_Complex64_t *Residual = (PLASMA_Complex64_t *)malloc(M*N*sizeof(PLASMA_Complex64_t));
221  double *work = (double *)malloc(max(M,N)*sizeof(double));
222 
223  alpha=1.0;
224  beta=0.0;
225 
226  if (M >= N) {
227  /* Extract the R */
228  PLASMA_Complex64_t *R = (PLASMA_Complex64_t *)malloc(N*N*sizeof(PLASMA_Complex64_t));
229  memset((void*)R, 0, N*N*sizeof(PLASMA_Complex64_t));
230  LAPACKE_zlacpy_work(LAPACK_COL_MAJOR,'u', M, N, A2, LDA, R, N);
231 
232  /* Perform Ql=Q*R */
233  memset((void*)Ql, 0, M*N*sizeof(PLASMA_Complex64_t));
234  cblas_zgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, N, CBLAS_SADDR(alpha), Q, LDA, R, N, CBLAS_SADDR(beta), Ql, M);
235  free(R);
236  }
237  else {
238  /* Extract the L */
239  PLASMA_Complex64_t *L = (PLASMA_Complex64_t *)malloc(M*M*sizeof(PLASMA_Complex64_t));
240  memset((void*)L, 0, M*M*sizeof(PLASMA_Complex64_t));
241  LAPACKE_zlacpy_work(LAPACK_COL_MAJOR,'l', M, N, A2, LDA, L, M);
242 
243  /* Perform Ql=LQ */
244  memset((void*)Ql, 0, M*N*sizeof(PLASMA_Complex64_t));
245  cblas_zgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, M, CBLAS_SADDR(alpha), L, M, Q, LDA, CBLAS_SADDR(beta), Ql, M);
246  free(L);
247  }
248 
249  /* Compute the Residual */
250  for (i = 0; i < M; i++)
251  for (j = 0 ; j < N; j++)
252  Residual[j*M+i] = A1[j*LDA+i]-Ql[j*M+i];
253 
254  Rnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Residual, M, work);
255  Anorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, A2, LDA, work);
256 
257  if (M >= N) {
258  printf("============\n");
259  printf("Checking the QR Factorization \n");
260  printf("-- ||A-QR||_oo/(||A||_oo.N.eps) = %e \n",Rnorm/(Anorm*N*eps));
261  }
262  else {
263  printf("============\n");
264  printf("Checking the LQ Factorization \n");
265  printf("-- ||A-LQ||_oo/(||A||_oo.N.eps) = %e \n",Rnorm/(Anorm*N*eps));
266  }
267 
268  if (isnan(Rnorm / (Anorm * N *eps)) || isinf(Rnorm / (Anorm * N *eps)) || (Rnorm / (Anorm * N * eps) > 60.0) ) {
269  printf("-- Factorization is suspicious ! \n");
270  info_factorization = 1;
271  }
272  else {
273  printf("-- Factorization is CORRECT ! \n");
274  info_factorization = 0;
275  }
276 
277  free(work); free(Ql); free(Residual);
278 
279  return info_factorization;
280 }
281 
282 /*--------------------------------------------------------------
283  * Check the solution
284  */
285 
286 int check_solution(int M, int N, int NRHS, PLASMA_Complex64_t *A1, int LDA, PLASMA_Complex64_t *B1, PLASMA_Complex64_t *B2, int LDB, double eps)
287 {
288  int info_solution;
289  double Rnorm, Anorm, Xnorm, Bnorm;
290  PLASMA_Complex64_t alpha, beta;
291  double result;
292  double *work = (double *)malloc(max(M, N)* sizeof(double));
293 
294  alpha = 1.0;
295  beta = -1.0;
296 
297  Anorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, A1, LDA, work);
298  Xnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, NRHS, B2, LDB, work);
299  Bnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, NRHS, B1, LDB, work);
300 
301  cblas_zgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, NRHS, N, CBLAS_SADDR(alpha), A1, LDA, B2, LDB, CBLAS_SADDR(beta), B1, LDB);
302 
303  if (M >= N) {
304  PLASMA_Complex64_t *Residual = (PLASMA_Complex64_t *)malloc(M*NRHS*sizeof(PLASMA_Complex64_t));
305  memset((void*)Residual, 0, M*NRHS*sizeof(PLASMA_Complex64_t));
306  cblas_zgemm(CblasColMajor, CblasConjTrans, CblasNoTrans, N, NRHS, M, CBLAS_SADDR(alpha), A1, LDA, B1, LDB, CBLAS_SADDR(beta), Residual, M);
307  Rnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, NRHS, Residual, M, work);
308  free(Residual);
309  }
310  else {
311  PLASMA_Complex64_t *Residual = (PLASMA_Complex64_t *)malloc(N*NRHS*sizeof(PLASMA_Complex64_t));
312  memset((void*)Residual, 0, N*NRHS*sizeof(PLASMA_Complex64_t));
313  cblas_zgemm(CblasColMajor, CblasConjTrans, CblasNoTrans, N, NRHS, M, CBLAS_SADDR(alpha), A1, LDA, B1, LDB, CBLAS_SADDR(beta), Residual, N);
314  Rnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, NRHS, Residual, N, work);
315  free(Residual);
316  }
317 
318  result = Rnorm / ( (Anorm*Xnorm+Bnorm)*N*eps ) ;
319  printf("============\n");
320  printf("Checking the Residual of the solution \n");
321  printf("-- ||Ax-B||_oo/((||A||_oo||x||_oo+||B||_oo).N.eps) = %e \n", result);
322 
323  if ( isnan(Xnorm) || isinf(Xnorm) || isnan(result) || isinf(result) || (result > 60.0) ) {
324  printf("-- The solution is suspicious ! \n");
325  info_solution = 1;
326  }
327  else{
328  printf("-- The solution is CORRECT ! \n");
329  info_solution = 0;
330  }
331  free(work);
332  return info_solution;
333 }