PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
testing_cposv.c
Go to the documentation of this file.
1 
15 #include <stdlib.h>
16 #include <stdio.h>
17 #include <string.h>
18 #include <math.h>
19 
20 #include <plasma.h>
21 #include <cblas.h>
22 #include <lapacke.h>
23 #include <core_blas.h>
24 #include "testing_cmain.h"
25 
28  blas_colmajor = 102 };
29 
31  blas_base = 151,
32  blas_t = 152,
33  blas_rnd = 153,
34  blas_ieee = 154,
35  blas_emin = 155,
36  blas_emax = 156,
37  blas_eps = 157,
38  blas_prec = 158,
41  blas_sfmin = 161};
42 
52 
53 static void
54 BLAS_error(char *rname, int err, int val, int x) {
55  fprintf( stderr, "%s %d %d %d\n", rname, err, val, x );
56  abort();
57 }
58 
59 static
60 void
61 BLAS_cge_norm(enum blas_order_type order, enum blas_norm_type norm,
62  int m, int n, const PLASMA_Complex32_t *a, int lda, float *res) {
63  int i, j; float anorm, v;
64  char rname[] = "BLAS_cge_norm";
65 
66  if (order != blas_colmajor) BLAS_error( rname, -1, order, 0 );
67 
68  if (norm == blas_frobenius_norm) {
69  anorm = 0.0f;
70  for (j = n; j; --j) {
71  for (i = m; i; --i) {
72  v = a[0];
73  anorm += v * v;
74  a++;
75  }
76  a += lda - m;
77  }
78  anorm = sqrt( anorm );
79  } else if (norm == blas_inf_norm) {
80  anorm = 0.0f;
81  for (i = 0; i < m; ++i) {
82  v = 0.0f;
83  for (j = 0; j < n; ++j) {
84  v += cabsf( a[i + j * lda] );
85  }
86  if (v > anorm)
87  anorm = v;
88  }
89  } else {
90  BLAS_error( rname, -2, norm, 0 );
91  return;
92  }
93 
94  if (res) *res = anorm;
95 }
96 
97 static
98 float
99 BLAS_spow_di(float x, int n) {
100  float rv = 1.0;
101 
102  if (n < 0) {
103  n = -n;
104  x = 1.0 / x;
105  }
106 
107  for (; n; n >>= 1, x *= x) {
108  if (n & 1)
109  rv *= x;
110  }
111 
112  return rv;
113 }
114 
115 static
116 float
117 BLAS_sfpinfo(enum blas_cmach_type cmach) {
118  float eps = 1.0, r = 1.0, o = 1.0, b = 2.0;
119  int t = 53, l = 1024, m = -1021;
120  char rname[] = "BLAS_sfpinfo";
121 
122  if ((sizeof eps) == sizeof(float)) {
123  t = 24;
124  l = 128;
125  m = -125;
126  } else {
127  t = 53;
128  l = 1024;
129  m = -1021;
130  }
131 
132  /* for (i = 0; i < t; ++i) eps *= half; */
133  eps = BLAS_spow_di( b, -t );
134  /* for (i = 0; i >= m; --i) r *= half; */
135  r = BLAS_spow_di( b, m-1 );
136 
137  o -= eps;
138  /* for (i = 0; i < l; ++i) o *= b; */
139  o = (o * BLAS_spow_di( b, l-1 )) * b;
140 
141  switch (cmach) {
142  case blas_eps: return eps;
143  case blas_sfmin: return r;
144  default:
145  BLAS_error( rname, -1, cmach, 0 );
146  break;
147  }
148  return 0.0;
149 }
150 
151 static int check_factorization(int, PLASMA_Complex32_t*, PLASMA_Complex32_t*, int, int , float);
152 static int check_solution(int, int, PLASMA_Complex32_t*, int, PLASMA_Complex32_t*, PLASMA_Complex32_t*, int, float);
153 
154 int testing_cposv(int argc, char **argv)
155 {
156 
157  /* Check for number of arguments*/
158  if (argc != 4){
159  USAGE("POSV", "N LDA NRHS LDB",
160  " - N : the size of the matrix\n"
161  " - LDA : leading dimension of the matrix A\n"
162  " - NRHS : number of RHS\n"
163  " - LDB : leading dimension of the RHS B\n");
164  return -1;
165  }
166 
167  int N = atoi(argv[0]);
168  int LDA = atoi(argv[1]);
169  int NRHS = atoi(argv[2]);
170  int LDB = atoi(argv[3]);
171  float eps;
172  int uplo;
173  int info_solution, info_factorization;
174  int trans1, trans2;
175 
176  PLASMA_Complex32_t *A1 = (PLASMA_Complex32_t *)malloc(LDA*N*sizeof(PLASMA_Complex32_t));
177  PLASMA_Complex32_t *A2 = (PLASMA_Complex32_t *)malloc(LDA*N*sizeof(PLASMA_Complex32_t));
178  PLASMA_Complex32_t *B1 = (PLASMA_Complex32_t *)malloc(LDB*NRHS*sizeof(PLASMA_Complex32_t));
179  PLASMA_Complex32_t *B2 = (PLASMA_Complex32_t *)malloc(LDB*NRHS*sizeof(PLASMA_Complex32_t));
180 
181  /* Check if unable to allocate memory */
182  if ((!A1)||(!A2)||(!B1)||(!B2)){
183  printf("Out of Memory \n ");
184  return -2;
185  }
186 
187  eps = BLAS_sfpinfo( blas_eps );
188 
189  uplo = PlasmaUpper;
190  trans1 = uplo == PlasmaUpper ? PlasmaConjTrans : PlasmaNoTrans;
191  trans2 = uplo == PlasmaUpper ? PlasmaNoTrans : PlasmaConjTrans;
192 
193  /*-------------------------------------------------------------
194  * TESTING CPOSV
195  */
196 
197  /* Initialize A1 and A2 for Symmetric Positif Matrix */
198  PLASMA_cplghe( (float)N, N, A1, LDA, 51 );
199  PLASMA_clacpy( PlasmaUpperLower, N, N, A1, LDA, A2, LDA );
200 
201  /* Initialize B1 and B2 */
202  PLASMA_cplrnt( N, NRHS, B1, LDB, 371 );
203  PLASMA_clacpy( PlasmaUpperLower, N, NRHS, B1, LDB, B2, LDB );
204 
205  printf("\n");
206  printf("------ TESTS FOR PLASMA CPOSV ROUTINE ------- \n");
207  printf(" Size of the Matrix %d by %d\n", N, N);
208  printf("\n");
209  printf(" The matrix A is randomly generated for each test.\n");
210  printf("============\n");
211  printf(" The relative machine precision (eps) is to be %e \n", eps);
212  printf(" Computational tests pass if scaled residuals are less than 60.\n");
213 
214  /* PLASMA CPOSV */
215  PLASMA_cposv(uplo, N, NRHS, A2, LDA, B2, LDB);
216 
217  /* Check the factorization and the solution */
218  info_factorization = check_factorization( N, A1, A2, LDA, uplo, eps);
219  info_solution = check_solution(N, NRHS, A1, LDA, B1, B2, LDB, eps);
220 
221  if ( (info_solution == 0) && (info_factorization == 0) ) {
222  printf("***************************************************\n");
223  printf(" ---- TESTING CPOSV ...................... PASSED !\n");
224  printf("***************************************************\n");
225  }
226  else {
227  printf("***************************************************\n");
228  printf(" - TESTING CPOSV ... FAILED !\n");
229  printf("***************************************************\n");
230  }
231 
232  /*-------------------------------------------------------------
233  * TESTING CPOTRF + CPOTRS
234  */
235 
236  /* Initialize A1 and A2 for Symmetric Positif Matrix */
237  PLASMA_cplghe( (float)N, N, A1, LDA, 51 );
238  PLASMA_clacpy( PlasmaUpperLower, N, N, A1, LDA, A2, LDA );
239 
240  /* Initialize B1 and B2 */
241  PLASMA_cplrnt( N, NRHS, B1, LDB, 371 );
242  PLASMA_clacpy( PlasmaUpperLower, N, NRHS, B1, LDB, B2, LDB );
243 
244  /* Plasma routines */
245  PLASMA_cpotrf(uplo, N, A2, LDA);
246  PLASMA_cpotrs(uplo, N, NRHS, A2, LDA, B2, LDB);
247 
248  printf("\n");
249  printf("------ TESTS FOR PLASMA CPOTRF + CPOTRS ROUTINE ------- \n");
250  printf(" Size of the Matrix %d by %d\n", N, N);
251  printf("\n");
252  printf(" The matrix A is randomly generated for each test.\n");
253  printf("============\n");
254  printf(" The relative machine precision (eps) is to be %e \n", eps);
255  printf(" Computational tests pass if scaled residuals are less than 60.\n");
256 
257  /* Check the factorization and the solution */
258  info_factorization = check_factorization( N, A1, A2, LDA, uplo, eps);
259  info_solution = check_solution(N, NRHS, A1, LDA, B1, B2, LDB, eps);
260 
261  if ((info_solution == 0)&(info_factorization == 0)){
262  printf("***************************************************\n");
263  printf(" ---- TESTING CPOTRF + CPOTRS ............ PASSED !\n");
264  printf("***************************************************\n");
265  }
266  else{
267  printf("****************************************************\n");
268  printf(" - TESTING CPOTRF + CPOTRS ... FAILED !\n");
269  printf("****************************************************\n");
270  }
271 
272  /*-------------------------------------------------------------
273  * TESTING CPOTRF + ZPTRSM + CTRSM
274  */
275 
276  /* Initialize A1 and A2 for Symmetric Positif Matrix */
277  PLASMA_cplghe( (float)N, N, A1, LDA, 51 );
278  PLASMA_clacpy( PlasmaUpperLower, N, N, A1, LDA, A2, LDA );
279 
280  /* Initialize B1 and B2 */
281  PLASMA_cplrnt( N, NRHS, B1, LDB, 371 );
282  PLASMA_clacpy( PlasmaUpperLower, N, NRHS, B1, LDB, B2, LDB );
283 
284  /* PLASMA routines */
285  PLASMA_cpotrf(uplo, N, A2, LDA);
286  PLASMA_ctrsm(PlasmaLeft, uplo, trans1, PlasmaNonUnit,
287  N, NRHS, 1.0, A2, LDA, B2, LDB);
288  PLASMA_ctrsm(PlasmaLeft, uplo, trans2, PlasmaNonUnit,
289  N, NRHS, 1.0, A2, LDA, B2, LDB);
290 
291  printf("\n");
292  printf("------ TESTS FOR PLASMA CPOTRF + CTRSM + CTRSM ROUTINE ------- \n");
293  printf(" Size of the Matrix %d by %d\n", N, N);
294  printf("\n");
295  printf(" The matrix A is randomly generated for each test.\n");
296  printf("============\n");
297  printf(" The relative machine precision (eps) is to be %e \n", eps);
298  printf(" Computational tests pass if scaled residuals are less than 60.\n");
299 
300  /* Check the factorization and the solution */
301  info_factorization = check_factorization( N, A1, A2, LDA, uplo, eps);
302  info_solution = check_solution(N, NRHS, A1, LDA, B1, B2, LDB, eps);
303 
304  if ((info_solution == 0)&(info_factorization == 0)){
305  printf("***************************************************\n");
306  printf(" ---- TESTING CPOTRF + CTRSM + CTRSM ..... PASSED !\n");
307  printf("***************************************************\n");
308  }
309  else{
310  printf("***************************************************\n");
311  printf(" - TESTING CPOTRF + CTRSM + CTRSM ... FAILED !\n");
312  printf("***************************************************\n");
313  }
314 
315  free(A1); free(A2); free(B1); free(B2);
316 
317  return 0;
318 }
319 
320 
321 /*------------------------------------------------------------------------
322  * Check the factorization of the matrix A2
323  */
324 static int check_factorization(int N, PLASMA_Complex32_t *A1, PLASMA_Complex32_t *A2, int LDA, int uplo, float eps)
325 {
326  float Anorm, Rnorm;
327  PLASMA_Complex32_t alpha;
328  int info_factorization;
329  int i,j;
330 
331  PLASMA_Complex32_t *Residual = (PLASMA_Complex32_t *)malloc(N*N*sizeof(PLASMA_Complex32_t));
332  PLASMA_Complex32_t *L1 = (PLASMA_Complex32_t *)malloc(N*N*sizeof(PLASMA_Complex32_t));
333  PLASMA_Complex32_t *L2 = (PLASMA_Complex32_t *)malloc(N*N*sizeof(PLASMA_Complex32_t));
334  float *work = (float *)malloc(N*sizeof(float));
335 
336  memset((void*)L1, 0, N*N*sizeof(PLASMA_Complex32_t));
337  memset((void*)L2, 0, N*N*sizeof(PLASMA_Complex32_t));
338 
339  alpha= 1.0;
340 
341  LAPACKE_clacpy_work(LAPACK_COL_MAJOR,' ', N, N, A1, LDA, Residual, N);
342 
343  /* Dealing with L'L or U'U */
344  if (uplo == PlasmaUpper){
345  LAPACKE_clacpy_work(LAPACK_COL_MAJOR,'u', N, N, A2, LDA, L1, N);
346  LAPACKE_clacpy_work(LAPACK_COL_MAJOR,'u', N, N, A2, LDA, L2, N);
348  }
349  else{
350  LAPACKE_clacpy_work(LAPACK_COL_MAJOR,'l', N, N, A2, LDA, L1, N);
351  LAPACKE_clacpy_work(LAPACK_COL_MAJOR,'l', N, N, A2, LDA, L2, N);
353  }
354 
355  /* Compute the Residual || A -L'L|| */
356  for (i = 0; i < N; i++)
357  for (j = 0; j < N; j++)
358  Residual[j*N+i] = L2[j*N+i] - Residual[j*N+i];
359 
360  BLAS_cge_norm( blas_colmajor, blas_inf_norm, N, N, Residual, N, &Rnorm );
361  BLAS_cge_norm( blas_colmajor, blas_inf_norm, N, N, A1, LDA, &Anorm );
362 
363  printf("============\n");
364  printf("Checking the Cholesky Factorization \n");
365  printf("-- ||L'L-A||_oo/(||A||_oo.N.eps) = %e \n",Rnorm/(Anorm*N*eps));
366 
367  if ( isnan(Rnorm/(Anorm*N*eps)) || isinf(Rnorm/(Anorm*N*eps)) || (Rnorm/(Anorm*N*eps) > 60.0) ){
368  printf("-- Factorization is suspicious ! \n");
369  info_factorization = 1;
370  }
371  else{
372  printf("-- Factorization is CORRECT ! \n");
373  info_factorization = 0;
374  }
375 
376  free(Residual); free(L1); free(L2); free(work);
377 
378  return info_factorization;
379 }
380 
381 
382 /*------------------------------------------------------------------------
383  * Check the accuracy of the solution of the linear system
384  */
385 
386 static int check_solution(int N, int NRHS, PLASMA_Complex32_t *A1, int LDA, PLASMA_Complex32_t *B1, PLASMA_Complex32_t *B2, int LDB, float eps )
387 {
388  int info_solution;
389  float Rnorm, Anorm, Xnorm, Bnorm, result;
390  PLASMA_Complex32_t alpha, beta;
391  float *work = (float *)malloc(N*sizeof(float));
392 
393  alpha = 1.0;
394  beta = -1.0;
395 
396  BLAS_cge_norm( blas_colmajor, blas_inf_norm, N, NRHS, B2, LDB, &Xnorm );
397  BLAS_cge_norm( blas_colmajor, blas_inf_norm, N, N, A1, LDA, &Anorm );
398  BLAS_cge_norm( blas_colmajor, blas_inf_norm, N, NRHS, B1, LDB, &Bnorm );
399 
400  cblas_cgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, N, NRHS, N, CBLAS_SADDR(alpha), A1, LDA, B2, LDB, CBLAS_SADDR(beta), B1, LDB);
401  BLAS_cge_norm( blas_colmajor, blas_inf_norm, N, NRHS, B1, LDB, &Rnorm );
402 
403  if (getenv("PLASMA_TESTING_VERBOSE"))
404  printf( "||A||_oo=%f\n||X||_oo=%f\n||B||_oo=%f\n||A X - B||_oo=%e\n", Anorm, Xnorm, Bnorm, Rnorm );
405 
406  result = Rnorm / ( (Anorm*Xnorm+Bnorm)*N*eps ) ;
407  printf("============\n");
408  printf("Checking the Residual of the solution \n");
409  printf("-- ||Ax-B||_oo/((||A||_oo||x||_oo+||B||_oo).N.eps) = %e \n", result);
410 
411  if ( isnan(Xnorm) || isinf(Xnorm) || isnan(result) || isinf(result) || (result > 60.0) ) {
412  printf("-- The solution is suspicious ! \n");
413  info_solution = 1;
414  }
415  else{
416  printf("-- The solution is CORRECT ! \n");
417  info_solution = 0;
418  }
419 
420  free(work);
421 
422  return info_solution;
423 }