PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
testing_dpotri.c
Go to the documentation of this file.
1 
15 #include <stdlib.h>
16 #include <stdio.h>
17 #include <string.h>
18 #include <math.h>
19 
20 #include <plasma.h>
21 #include <cblas.h>
22 #include <lapacke.h>
23 #include <core_blas.h>
24 #include "testing_dmain.h"
25 
28  blas_colmajor = 102 };
29 
31  blas_base = 151,
32  blas_t = 152,
33  blas_rnd = 153,
34  blas_ieee = 154,
35  blas_emin = 155,
36  blas_emax = 156,
37  blas_eps = 157,
38  blas_prec = 158,
41  blas_sfmin = 161};
42 
52 
53 static void
54 BLAS_error(char *rname, int err, int val, int x) {
55  fprintf( stderr, "%s %d %d %d\n", rname, err, val, x );
56  abort();
57 }
58 
59 static
60 void
61 BLAS_dge_norm(enum blas_order_type order, enum blas_norm_type norm,
62  int m, int n, const double *a, int lda, double *res) {
63  int i, j; float anorm, v;
64  char rname[] = "BLAS_dge_norm";
65 
66  if (order != blas_colmajor) BLAS_error( rname, -1, order, 0 );
67 
68  if (norm == blas_frobenius_norm) {
69  anorm = 0.0f;
70  for (j = n; j; --j) {
71  for (i = m; i; --i) {
72  v = a[0];
73  anorm += v * v;
74  a++;
75  }
76  a += lda - m;
77  }
78  anorm = sqrt( anorm );
79  } else if (norm == blas_inf_norm) {
80  anorm = 0.0f;
81  for (i = 0; i < m; ++i) {
82  v = 0.0f;
83  for (j = 0; j < n; ++j) {
84  v += fabs( a[i + j * lda] );
85  }
86  if (v > anorm)
87  anorm = v;
88  }
89  } else if (norm == blas_one_norm) {
90  anorm = 0.0f;
91  for (i = 0; i < m; ++i) {
92  v = 0.0f;
93  for (j = 0; j < n; ++j) {
94  v += fabs( a[i + j * lda] );
95  }
96  if (v > anorm)
97  anorm = v;
98  }
99  } else {
100  BLAS_error( rname, -2, norm, 0 );
101  return;
102  }
103 
104  if (res) *res = anorm;
105 }
106 
107 static
108 double
109 BLAS_dpow_di(double x, int n) {
110  double rv = 1.0;
111 
112  if (n < 0) {
113  n = -n;
114  x = 1.0 / x;
115  }
116 
117  for (; n; n >>= 1, x *= x) {
118  if (n & 1)
119  rv *= x;
120  }
121 
122  return rv;
123 }
124 
125 static
126 double
127 BLAS_dfpinfo(enum blas_cmach_type cmach) {
128  double eps = 1.0, r = 1.0, o = 1.0, b = 2.0;
129  int t = 53, l = 1024, m = -1021;
130  char rname[] = "BLAS_dfpinfo";
131 
132  if ((sizeof eps) == sizeof(float)) {
133  t = 24;
134  l = 128;
135  m = -125;
136  } else {
137  t = 53;
138  l = 1024;
139  m = -1021;
140  }
141 
142  /* for (i = 0; i < t; ++i) eps *= half; */
143  eps = BLAS_dpow_di( b, -t );
144  /* for (i = 0; i >= m; --i) r *= half; */
145  r = BLAS_dpow_di( b, m-1 );
146 
147  o -= eps;
148  /* for (i = 0; i < l; ++i) o *= b; */
149  o = (o * BLAS_dpow_di( b, l-1 )) * b;
150 
151  switch (cmach) {
152  case blas_eps: return eps;
153  case blas_sfmin: return r;
154  default:
155  BLAS_error( rname, -1, cmach, 0 );
156  break;
157  }
158  return 0.0;
159 }
160 
161 static int check_factorization(int, double*, double*, int, int, double);
162 static int check_inverse(int, double *, double *, int, int, double);
163 
164 int testing_dpotri(int argc, char **argv)
165 {
166 
167  /* Check for number of arguments*/
168  if (argc != 2){
169  USAGE("POTRI", "N LDA",
170  " - N : the size of the matrix\n"
171  " - LDA : leading dimension of the matrix A\n");
172  return -1;
173  }
174 
175  int N = atoi(argv[0]);
176  int LDA = atoi(argv[1]);
177  double eps;
178  int uplo;
179  int info_inverse, info_factorization;
180 
181  double *A1 = (double *)malloc(LDA*N*sizeof(double));
182  double *A2 = (double *)malloc(LDA*N*sizeof(double));
183  double *WORK = (double *)malloc(2*LDA*sizeof(double));
184 
185  /* Check if unable to allocate memory */
186  if ((!A1)||(!A2)){
187  printf("Out of Memory \n ");
188  return -2;
189  }
190 
191  eps = BLAS_dfpinfo( blas_eps );
192 
193  uplo = PlasmaUpper;
194 
195  /*-------------------------------------------------------------
196  * TESTING DPOTRI
197  */
198 
199  /* Initialize A1 and A2 for Symmetric Positif Matrix */
200  PLASMA_dplgsy( (double)N, N, A1, LDA, 51 );
201  PLASMA_dlacpy( PlasmaUpperLower, N, N, A1, LDA, A2, LDA );
202 
203  printf("\n");
204  printf("------ TESTS FOR PLASMA DPOTRI ROUTINE ------- \n");
205  printf(" Size of the Matrix %d by %d\n", N, N);
206  printf("\n");
207  printf(" The matrix A is randomly generated for each test.\n");
208  printf("============\n");
209  printf(" The relative machine precision (eps) is to be %e \n", eps);
210  printf(" Computational tests pass if scaled residuals are less than 60.\n");
211 
212  /* PLASMA DPOTRF */
213  PLASMA_dpotrf(uplo, N, A2, LDA);
214 
215  /* Check the factorization */
216  info_factorization = check_factorization( N, A1, A2, LDA, uplo, eps);
217 
218  /* PLASMA DPOTRI */
219  PLASMA_dpotri(uplo, N, A2, LDA);
220 
221  /* Check the inverse */
222  info_inverse = check_inverse(N, A1, A2, LDA, uplo, eps);
223 
224  if ( (info_inverse == 0) && (info_factorization == 0) ) {
225  printf("***************************************************\n");
226  printf(" ---- TESTING DPOTRI ..................... PASSED !\n");
227  printf("***************************************************\n");
228  }
229  else {
230  printf("***************************************************\n");
231  printf(" - TESTING DPOTRI ... FAILED !\n");
232  printf("***************************************************\n");
233  }
234 
235  free(A1); free(A2); free(WORK);
236 
237  return 0;
238 }
239 
240 
241 /*------------------------------------------------------------------------
242  * Check the factorization of the matrix A2
243  */
244 static int check_factorization(int N, double *A1, double *A2, int LDA, int uplo, double eps)
245 {
246  double Anorm, Rnorm;
247  double alpha;
248  int info_factorization;
249  int i,j;
250 
251  double *Residual = (double *)malloc(N*N*sizeof(double));
252  double *L1 = (double *)malloc(N*N*sizeof(double));
253  double *L2 = (double *)malloc(N*N*sizeof(double));
254  double *work = (double *)malloc(N*sizeof(double));
255 
256  memset((void*)L1, 0, N*N*sizeof(double));
257  memset((void*)L2, 0, N*N*sizeof(double));
258 
259  alpha= 1.0;
260 
261  LAPACKE_dlacpy_work(LAPACK_COL_MAJOR,' ', N, N, A1, LDA, Residual, N);
262 
263  /* Dealing with L'L or U'U */
264  if (uplo == PlasmaUpper){
265  LAPACKE_dlacpy_work(LAPACK_COL_MAJOR,'u', N, N, A2, LDA, L1, N);
266  LAPACKE_dlacpy_work(LAPACK_COL_MAJOR,'u', N, N, A2, LDA, L2, N);
267  cblas_dtrmm(CblasColMajor, CblasLeft, CblasUpper, CblasTrans, CblasNonUnit, N, N, (alpha), L1, N, L2, N);
268  }
269  else{
270  LAPACKE_dlacpy_work(LAPACK_COL_MAJOR,'l', N, N, A2, LDA, L1, N);
271  LAPACKE_dlacpy_work(LAPACK_COL_MAJOR,'l', N, N, A2, LDA, L2, N);
272  cblas_dtrmm(CblasColMajor, CblasRight, CblasLower, CblasTrans, CblasNonUnit, N, N, (alpha), L1, N, L2, N);
273  }
274 
275  /* Compute the Residual || A -L'L|| */
276  for (i = 0; i < N; i++)
277  for (j = 0; j < N; j++)
278  Residual[j*N+i] = L2[j*N+i] - Residual[j*N+i];
279 
280  BLAS_dge_norm( blas_colmajor, blas_inf_norm, N, N, Residual, N, &Rnorm );
281  BLAS_dge_norm( blas_colmajor, blas_inf_norm, N, N, A1, LDA, &Anorm );
282 
283  printf("============\n");
284  printf("Checking the Cholesky Factorization \n");
285  printf("-- ||L'L-A||_oo/(||A||_oo.N.eps) = %e \n",Rnorm/(Anorm*N*eps));
286 
287  if ( isnan(Rnorm/(Anorm*N*eps)) || isinf(Rnorm/(Anorm*N*eps)) || (Rnorm/(Anorm*N*eps) > 60.0) ){
288  printf("-- Factorization is suspicious ! \n");
289  info_factorization = 1;
290  }
291  else{
292  printf("-- Factorization is CORRECT ! \n");
293  info_factorization = 0;
294  }
295 
296  free(Residual); free(L1); free(L2); free(work);
297 
298  return info_factorization;
299 }
300 
301 
302 /*------------------------------------------------------------------------
303  * Check the accuracy of the computed inverse
304  */
305 
306 static int check_inverse(int N, double *A1, double *A2, int LDA, int uplo, double eps )
307 {
308  int info_inverse;
309  int i, j;
310  double Rnorm, Anorm, Ainvnorm, result;
311  double alpha, beta, zone;
312  double *work = (double *)malloc(N*N*sizeof(double));
313 
314  alpha = -1.0;
315  beta = 0.0;
316  zone = 1.0;
317 
318  /* Rebuild the other part of the inverse matrix */
319  if(uplo == PlasmaUpper){
320  for(j=0; j<N; j++)
321  for(i=0; i<j; i++)
322  *(A2+j+i*LDA) = *(A2+i+j*LDA);
323  cblas_dsymm(CblasColMajor, CblasLeft, CblasUpper, N, N, (alpha), A2, LDA, A1, LDA, (beta), work, N);
324 
325  }
326  else {
327  for(j=0; j<N; j++)
328  for(i=j; i<N; i++)
329  *(A2+j+i*LDA) = *(A2+i+j*LDA);
330  cblas_dsymm(CblasColMajor, CblasLeft, CblasLower, N, N, (alpha), A2, LDA, A1, LDA, (beta), work, N);
331  }
332 
333  /* Add the identity matrix to work */
334  for(i=0; i<N; i++)
335  *(work+i+i*N) = *(work+i+i*N) + zone;
336 
337  BLAS_dge_norm( blas_colmajor, blas_one_norm, N, N, work, N, &Rnorm );
338  BLAS_dge_norm( blas_colmajor, blas_one_norm, N, N, A1, LDA, &Anorm );
339  BLAS_dge_norm( blas_colmajor, blas_one_norm, N, N, A2, LDA, &Ainvnorm );
340 
341  if (getenv("PLASMA_TESTING_VERBOSE"))
342  printf( "||A||_1=%f\n||Ainv||_1=%f\n||Id - A*Ainv||_1=%e\n", Anorm, Ainvnorm, Rnorm );
343 
344  result = Rnorm / ( (Anorm*Ainvnorm)*N*eps ) ;
345  printf("============\n");
346  printf("Checking the Residual of the inverse \n");
347  printf("-- ||Id - A*Ainv||_1/((||A||_1||Ainv||_1).N.eps) = %e \n", result);
348 
349  if ( isnan(Ainvnorm) || isinf(Ainvnorm) || isnan(result) || isinf(result) || (result > 60.0) ) {
350  printf("-- The inverse is suspicious ! \n");
351  info_inverse = 1;
352  }
353  else{
354  printf("-- The inverse is CORRECT ! \n");
355  info_inverse = 0;
356  }
357 
358  free(work);
359 
360  return info_inverse;
361 }