PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
testing_chegst.c
Go to the documentation of this file.
1 
15 #include <stdlib.h>
16 #include <stdio.h>
17 #include <string.h>
18 #include <math.h>
19 
20 #include <plasma.h>
21 #include <cblas.h>
22 #include <lapacke.h>
23 #include <core_blas.h>
24 #include "testing_cmain.h"
25 
26 #undef REAL
27 #define COMPLEX
28 
29 static int check_transformation(int, int, int, PLASMA_Complex32_t*, PLASMA_Complex32_t*, int, PLASMA_Complex32_t*, int, float);
30 static int check_factorization(int, PLASMA_Complex32_t*, PLASMA_Complex32_t*, int, int , float);
31 
32 int testing_chegst(int argc, char **argv)
33 {
34  /* Check for number of arguments*/
35  if (argc != 3) {
36  USAGE("HEGST", "N LDA LDB",
37  " - N : size of the matrices A and B\n"
38  " - LDA : leading dimension of the matrix A\n"
39  " - LDB : leading dimension of the matrix B\n");
40  return -1;
41  }
42 
43  float eps = LAPACKE_slamch_work('e');
44  int N = atoi(argv[0]);
45  int LDA = atoi(argv[1]);
46  int LDB = atoi(argv[2]);
47  int info_transformation, info_factorization;
48  int i, u;
49  int LDAxN = LDA*N;
50  int LDBxN = LDB*N;
51 
52  PLASMA_Complex32_t *A1 = (PLASMA_Complex32_t *)malloc(LDAxN*sizeof(PLASMA_Complex32_t));
53  PLASMA_Complex32_t *A2 = (PLASMA_Complex32_t *)malloc(LDAxN*sizeof(PLASMA_Complex32_t));
54  PLASMA_Complex32_t *B1 = (PLASMA_Complex32_t *)malloc(LDBxN*sizeof(PLASMA_Complex32_t));
55  PLASMA_Complex32_t *B2 = (PLASMA_Complex32_t *)malloc(LDBxN*sizeof(PLASMA_Complex32_t));
56  PLASMA_Complex32_t *Ainit = (PLASMA_Complex32_t *)malloc(LDAxN*sizeof(PLASMA_Complex32_t));
57  PLASMA_Complex32_t *Binit = (PLASMA_Complex32_t *)malloc(LDBxN*sizeof(PLASMA_Complex32_t));
58 
59  /* Check if unable to allocate memory */
60  if ((!A1)||(!A2)||(!B1)||(!B2)||(!Ainit)||(!Binit)){
61  printf("Out of Memory \n ");
62  return -2;
63  }
64 
65  /*----------------------------------------------------------
66  * TESTING CHEGST
67  */
68 
69  /* Initialize A1 and A2 */
70  PLASMA_cplghe(0., N, A1, LDA, 5198);
71  LAPACKE_clacpy_work(LAPACK_COL_MAJOR, 'A', N, N, A1, LDA, Ainit, LDA);
72 
73  /* Initialize B1 and B2 */
74  PLASMA_cplghe((float)N, N, B1, LDB, 4231);
75  LAPACKE_clacpy_work(LAPACK_COL_MAJOR, 'A', N, N, B1, LDB, Binit, LDB);
76 
77  printf("\n");
78  printf("------ TESTS FOR PLASMA CHEGST ROUTINE ------- \n");
79  printf(" Size of the Matrix %d by %d\n", N, N);
80  printf("\n");
81  printf(" The matrices A and B are randomly generated for each test.\n");
82  printf("============\n");
83  printf(" The relative machine precision (eps) is to be %e \n",eps);
84  printf(" Computational tests pass if scaled residuals are less than 60.\n");
85 
86  /*----------------------------------------------------------
87  * TESTING CHEGST
88  */
89 
90  for (i=0; i<3; i++) {
91  for (u=0; u<2; u++) {
92  memcpy(A2, Ainit, LDAxN*sizeof(PLASMA_Complex32_t));
93  memcpy(B2, Binit, LDBxN*sizeof(PLASMA_Complex32_t));
94 
95  PLASMA_cpotrf(uplo[u], N, B2, LDB);
96  PLASMA_chegst(itype[i], uplo[u], N, A2, LDA, B2, LDB);
97 
98  /* Check the Cholesky factorization and the transformation */
99  info_factorization = check_factorization(N, B1, B2, LDB, uplo[u], eps);
100  info_transformation = check_transformation(itype[i], uplo[u], N, A1, A2, LDA, B2, LDB, eps);
101 
102  if ( (info_transformation == 0) && (info_factorization == 0) ) {
103  printf("***************************************************\n");
104  printf(" ---- TESTING CHEGST (%s, %s) ....... PASSED !\n", itypestr[i], uplostr[u]);
105  printf("***************************************************\n");
106  }
107  else {
108  printf("************************************************\n");
109  printf(" - TESTING CHEGST (%s, %s) ... FAILED !\n", itypestr[i], uplostr[u]);
110  printf("************************************************\n");
111  }
112  }
113  }
114 
115  free(A1);
116  free(A2);
117  free(B1);
118  free(B2);
119  free(Ainit);
120  free(Binit);
121 
122  return 0;
123 }
124 /*------------------------------------------------------------------------
125  * Check the factorization of the matrix A2
126  */
127 static int check_factorization(int N, PLASMA_Complex32_t *A1, PLASMA_Complex32_t *A2, int LDA, int uplo, float eps)
128 {
129  PLASMA_Complex32_t alpha = 1.0;
130  float Anorm, Rnorm, result;
131  int info_factorization;
132  int i,j;
133 
134  PLASMA_Complex32_t *Residual = (PLASMA_Complex32_t *)malloc(N*N*sizeof(PLASMA_Complex32_t));
135  PLASMA_Complex32_t *L1 = (PLASMA_Complex32_t *)malloc(N*N*sizeof(PLASMA_Complex32_t));
136  PLASMA_Complex32_t *L2 = (PLASMA_Complex32_t *)malloc(N*N*sizeof(PLASMA_Complex32_t));
137  float *work = (float *)malloc(N*sizeof(float));
138 
139  memset((void*)L1, 0, N*N*sizeof(PLASMA_Complex32_t));
140  memset((void*)L2, 0, N*N*sizeof(PLASMA_Complex32_t));
141 
142  LAPACKE_clacpy_work(LAPACK_COL_MAJOR,' ', N, N, A1, LDA, Residual, N);
143 
144  /* Dealing with L'L or U'U */
145  LAPACKE_clacpy_work(LAPACK_COL_MAJOR, lapack_const(uplo), N, N, A2, LDA, L1, N);
146  LAPACKE_clacpy_work(LAPACK_COL_MAJOR, lapack_const(uplo), N, N, A2, LDA, L2, N);
147  if (uplo == PlasmaUpper)
149  else
151 
152  /* Compute the Residual || A - L'L|| */
153  for (i = 0; i < N; i++)
154  for (j = 0; j < N; j++)
155  Residual[j*N+i] = L2[j*N+i] - Residual[j*N+i];
156 
157  Rnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Residual, N, work);
158  Anorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, A1, LDA, work);
159 
160  result = Rnorm / ( Anorm * N * eps );
161  printf("============\n");
162  printf("Checking the Cholesky Factorization \n");
163  printf("-- ||L'L-A||_oo/(||A||_oo.N.eps) = %e \n", result);
164 
165  if ( isnan(result) || isinf(result) || (result > 60.0) ){
166  printf("-- Factorization is suspicious ! \n");
167  info_factorization = 1;
168  }
169  else{
170  printf("-- Factorization is CORRECT ! \n");
171  info_factorization = 0;
172  }
173 
174  free(Residual); free(L1); free(L2); free(work);
175 
176  return info_factorization;
177 }
178 
179 /*------------------------------------------------------------
180  * Check the Transformation to standard eigenvalue problem
181  */
182 
183 static int check_transformation(int itype, int uplo, int N, PLASMA_Complex32_t *A1, PLASMA_Complex32_t *A2, int LDA, PLASMA_Complex32_t *B2, int LDB, float eps)
184 {
185  PLASMA_Complex32_t alpha = 1.0;
186  float Anorm, Rnorm, result;
187  int info_transformation;
188  int i, j;
189  char *str;
190 
191  PLASMA_Complex32_t *Residual = (PLASMA_Complex32_t *)malloc(N*N*sizeof(PLASMA_Complex32_t));
192  PLASMA_Complex32_t *Aorig = (PLASMA_Complex32_t *)malloc(N*N*sizeof(PLASMA_Complex32_t));
193  float *work = (float *)malloc(N*sizeof(float));
194 
195  LAPACKE_clacpy_work(LAPACK_COL_MAJOR, 'a', N, N, A1, LDA, Residual, N);
196  LAPACKE_clacpy_work(LAPACK_COL_MAJOR, lapack_const(uplo), N, N, A2, LDA, Aorig, N);
197 
198  /* Rebuild the symmetry of A2 */
199  if (uplo == PlasmaLower) {
200  for (j = 0; j < N; j++)
201  for (i = j+1; i < N; i++)
202  Aorig[j+i*N] = conjf(Aorig[i+j*N]);
203  } else {
204  for (i = 0; i < N; i++)
205  for (j = i+1; j < N; j++)
206  Aorig[j+i*N] = conjf(Aorig[i+j*N]);
207  }
208 
209  if (itype == 1) {
210  if (uplo == PlasmaLower) {
211  str = "L*A2*L'";
212  cblas_ctrmm(CblasColMajor, CblasLeft, CblasLower, CblasNoTrans, CblasNonUnit, N, N, CBLAS_SADDR(alpha), B2, LDB, Aorig, N);
214  }
215  else{
216  str = "U'*A2*U";
218  cblas_ctrmm(CblasColMajor, CblasRight, CblasUpper, CblasNoTrans, CblasNonUnit, N, N, CBLAS_SADDR(alpha), B2, LDB, Aorig, N);
219  }
220  }
221  else {
222  if (uplo == PlasmaLower) {
223  str = "inv(L')*A2*inv(L)";
225  cblas_ctrsm(CblasColMajor, CblasRight, CblasLower, CblasNoTrans, CblasNonUnit, N, N, CBLAS_SADDR(alpha), B2, LDB, Aorig, N);
226  }
227  else{
228  str = "inv(U)*A2*inv(U')";
229  cblas_ctrsm(CblasColMajor, CblasLeft, CblasUpper, CblasNoTrans, CblasNonUnit, N, N, CBLAS_SADDR(alpha), B2, LDB, Aorig, N);
231 
232  }
233  }
234 
235  /* Compute the Residual ( A1 - W ) */
236  for (i = 0; i < N; i++)
237  for (j = 0; j < N; j++)
238  Residual[j*N+i] = Aorig[j*N+i] - Residual[j*N+i];
239 
240  Rnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Residual, N, work);
241  Anorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, A1, LDA, work);
242 
243  result = Rnorm / (Anorm * N *eps);
244  printf("============\n");
245  printf("Checking the global transformation \n");
246  printf("-- ||A1-%s||_oo/(||A1||_oo.N.eps) = %e \n", str, result );
247 
248  if (isnan(result) || isinf(result) || (result > 60.0) ) {
249  printf("-- Transformation is suspicious ! \n");
250  info_transformation = 1;
251  }
252  else {
253  printf("-- Transformation is CORRECT ! \n");
254  info_transformation = 0;
255  }
256 
257  free(Residual); free(Aorig); free(work);
258 
259  return info_transformation;
260 }