PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
example_cpotrf.c
Go to the documentation of this file.
1 
17 #include <stdlib.h>
18 #include <stdio.h>
19 #include <string.h>
20 #include <math.h>
21 
22 #include <plasma.h>
23 #include <cblas.h>
24 #include <lapacke.h>
25 #include <core_blas.h>
26 
28 
29 int IONE=1;
30 int ISEED[4] = {0,0,0,1}; /* initial seed for clarnv() */
31 
32 int main ()
33 {
34 
35  int cores = 2;
36  int N = 10 ;
37  int LDA = 10 ;
38  int info_factorization;
39 
40  PLASMA_Complex32_t *A1 = (PLASMA_Complex32_t *)malloc(LDA*N*sizeof(PLASMA_Complex32_t));
41  PLASMA_Complex32_t *A2 = (PLASMA_Complex32_t *)malloc(LDA*N*sizeof(PLASMA_Complex32_t));
42 
43  /* Check if unable to allocate memory */
44  if ((!A1)||(!A2)){
45  printf("Out of Memory \n ");
46  return EXIT_SUCCESS;
47  }
48 
49  /* Plasma Initialize */
50  PLASMA_Init(cores);
51  printf("-- PLASMA is initialized to run on %d cores. \n",cores);
52 
53  /* Initialize A1 and A2 for Symmetric Positive Matrix */
54  PLASMA_cplghe( (float)N, N, A1, LDA, 51 );
55  PLASMA_clacpy( PlasmaUpperLower, N, N, A1, LDA, A2, LDA );
56 
57  /* Plasma routines */
58  PLASMA_cpotrf(PlasmaUpper, N, A2, LDA);
59 
60  /* Check the factorization */
61  info_factorization = check_factorization( N, A1, A2, LDA, PlasmaUpper);
62 
63  if ( info_factorization != 0 )
64  printf("-- Error in CPOTRF example ! \n");
65  else
66  printf("-- Run of CPOTRF example successful ! \n");
67 
68  free(A1); free(A2);
69 
71 
72  return EXIT_SUCCESS;
73 }
74 
75 
76 /*------------------------------------------------------------------------
77  * Check the factorization of the matrix A2
78  */
79 
81 {
82  float Anorm, Rnorm;
83  PLASMA_Complex32_t alpha;
84  int info_factorization;
85  int i,j;
86  float eps;
87 
88  eps = LAPACKE_slamch_work('e');
89 
90  PLASMA_Complex32_t *Residual = (PLASMA_Complex32_t *)malloc(N*N*sizeof(PLASMA_Complex32_t));
91  PLASMA_Complex32_t *L1 = (PLASMA_Complex32_t *)malloc(N*N*sizeof(PLASMA_Complex32_t));
92  PLASMA_Complex32_t *L2 = (PLASMA_Complex32_t *)malloc(N*N*sizeof(PLASMA_Complex32_t));
93  float *work = (float *)malloc(N*sizeof(float));
94 
95  memset((void*)L1, 0, N*N*sizeof(PLASMA_Complex32_t));
96  memset((void*)L2, 0, N*N*sizeof(PLASMA_Complex32_t));
97 
98  alpha= 1.0;
99 
100  LAPACKE_clacpy_work(LAPACK_COL_MAJOR,' ', N, N, A1, LDA, Residual, N);
101 
102  /* Dealing with L'L or U'U */
103  if (uplo == PlasmaUpper){
104  LAPACKE_clacpy_work(LAPACK_COL_MAJOR,'u', N, N, A2, LDA, L1, N);
105  LAPACKE_clacpy_work(LAPACK_COL_MAJOR,'u', N, N, A2, LDA, L2, N);
107  }
108  else{
109  LAPACKE_clacpy_work(LAPACK_COL_MAJOR,'l', N, N, A2, LDA, L1, N);
110  LAPACKE_clacpy_work(LAPACK_COL_MAJOR,'l', N, N, A2, LDA, L2, N);
112  }
113 
114  /* Compute the Residual || A -L'L|| */
115  for (i = 0; i < N; i++)
116  for (j = 0; j < N; j++)
117  Residual[j*N+i] = L2[j*N+i] - Residual[j*N+i];
118 
119  Rnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Residual, N, work);
120  Anorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, A1, LDA, work);
121 
122  printf("============\n");
123  printf("Checking the Cholesky Factorization \n");
124  printf("-- ||L'L-A||_oo/(||A||_oo.N.eps) = %e \n",Rnorm/(Anorm*N*eps));
125 
126  if ( isnan(Rnorm/(Anorm*N*eps)) || (Rnorm/(Anorm*N*eps) > 10.0) ){
127  printf("-- Factorization is suspicious ! \n");
128  info_factorization = 1;
129  }
130  else{
131  printf("-- Factorization is CORRECT ! \n");
132  info_factorization = 0;
133  }
134 
135  free(Residual); free(L1); free(L2); free(work);
136 
137  return info_factorization;
138 }