PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
testing_cgesvd.c
Go to the documentation of this file.
1 
15 #include <stdlib.h>
16 #include <stdio.h>
17 #include <string.h>
18 #include <math.h>
19 
20 #include <plasma.h>
21 #include <cblas.h>
22 #include <lapacke.h>
23 #include <core_blas.h>
24 #include "testing_cmain.h"
25 
26 #undef REAL
27 #define COMPLEX
28 
29 static int check_orthogonality(int, int, int, PLASMA_Complex32_t*, int, float);
30 static int check_reduction(int, int, PLASMA_Complex32_t*, PLASMA_Complex32_t*, int, PLASMA_Complex32_t*, int, PLASMA_Complex32_t*, int, float);
31 static int check_solution(int, float*, float*, float);
32 
33 int testing_cgesvd(int argc, char **argv)
34 {
35  int tree = 0;
36 
37  if ( argc < 1 ){
38  goto usage;
39  } else {
40  tree = atoi(argv[0]);
41  }
42 
43  /* Check for number of arguments*/
44  if ( ((tree == 0) && (argc != 4)) ||
45  ((tree != 0) && (argc != 5)) ){
46  usage:
47  USAGE("GESVD", "MODE M N LDA [RH]",
48  " - MODE : 0: flat, 1: tree (RH needed)\n"
49  " - M : number of rows of the matrix A\n"
50  " - N : number of columns of the matrix A\n"
51  " - LDA : leading dimension of the matrix A\n"
52  " - RH : Size of each subdomains\n");
53  return -1;
54  }
55 
56  int M = atoi(argv[1]);
57  int N = atoi(argv[2]);
58  int LDA = atoi(argv[3]);
59  int rh;
60  if ( tree ) {
61  rh = atoi(argv[4]);
64  }
65 
66  if (LDA < M){
67  printf("LDA should be >= M !\n");
68  return -1;
69  }
70 
71  float eps = LAPACKE_slamch_work('e');
72  float dmax = 1.0;
73  PLASMA_enum vecu = PlasmaNoVec;
74  PLASMA_enum vecvt = PlasmaNoVec;
75  int info_orthou = 0;
76  int info_orthovt = 0;
77  int info_solution = 0;
78  int info_reduction = 0;
79  int minMN = min(M, N);
80  int mode = 4;
81  float rcond = (float) minMN;
82 
83  PLASMA_Complex32_t *A1 = (PLASMA_Complex32_t *)malloc(LDA*N*sizeof(PLASMA_Complex32_t));
84  float *S1 = (float *) malloc(minMN*sizeof(float));
85  float *S2 = (float *) malloc(minMN*sizeof(float));
86  PLASMA_Complex32_t *work = (PLASMA_Complex32_t *)malloc(3*max(M, N)* sizeof(PLASMA_Complex32_t));
87  PLASMA_Complex32_t *A2 = NULL;
88  PLASMA_Complex32_t *U = NULL;
89  PLASMA_Complex32_t *VT = NULL;
90  PLASMA_desc *T;
91 
92  /* Check if unable to allocate memory */
93  if ( (!A1) || (!S1) || (!S2) || (!work) ) {
94  printf("Out of Memory \n ");
95  return -2;
96  }
97 
98  /*
99  PLASMA_Disable(PLASMA_AUTOTUNING);
100  PLASMA_Set(PLASMA_TILE_SIZE, 120);
101  PLASMA_Set(PLASMA_INNER_BLOCK_SIZE, 40);
102  */
103 
107 
108  /*----------------------------------------------------------
109  * TESTING CGESVD
110  */
111  /* Initialize A1 */
112  LAPACKE_clatms_work( LAPACK_COL_MAJOR, M, N,
114  lapack_const(PlasmaNonsymPosv), S1, mode, rcond,
115  dmax, M, N,
116  lapack_const(PlasmaNoPacking), A1, LDA, work );
117  free(work);
118 
119  /* Copy A1 for check */
120  if ( (vecu == PlasmaVec) && (vecvt == PlasmaVec) ) {
121  A2 = (PLASMA_Complex32_t *)malloc(LDA*N*sizeof(PLASMA_Complex32_t));
122  LAPACKE_clacpy_work(LAPACK_COL_MAJOR, 'A', M, N, A1, LDA, A2, LDA);
123  }
124  if ( vecu == PlasmaVec ) {
125  U = (PLASMA_Complex32_t *)malloc(M*M*sizeof(PLASMA_Complex32_t));
126  LAPACKE_claset_work(LAPACK_COL_MAJOR, 'A', M, M, 0., 1., U, M);
127  }
128  if ( vecvt == PlasmaVec ) {
129  VT = (PLASMA_Complex32_t *)malloc(N*N*sizeof(PLASMA_Complex32_t));
130  LAPACKE_claset_work(LAPACK_COL_MAJOR, 'A', N, N, 0., 1., VT, N);
131  }
132 
133  /* PLASMA CGESVD */
134  PLASMA_cgesvd(vecu, vecvt, M, N, A1, LDA, S2, U, M, VT, N, T);
135 
136  if (getenv("PLASMA_TESTING_VERBOSE"))
137  {
138  int i;
139  printf("Eigenvalues original\n");
140  for (i = 0; i < min(N,25); i++){
141  printf("%f ", S1[i]);
142  }
143  printf("\n");
144 
145  printf("Eigenvalues computed\n");
146  for (i = 0; i < min(N,25); i++){
147  printf("%f ", S2[i]);
148  }
149  printf("\n");
150  }
151 
152  printf("\n");
153  printf("------ TESTS FOR PLASMA CGESVD ROUTINE ------- \n");
154  printf(" Size of the Matrix %d by %d\n", M, N);
155  printf("\n");
156  printf(" The matrix A is randomly generated for each test.\n");
157  printf("============\n");
158  printf(" The relative machine precision (eps) is to be %e \n",eps);
159  printf(" Computational tests pass if scaled residuals are less than 60.\n");
160 
161 
162  /* Check the orthogonality, reduction and the singular values */
163  if ( vecu == PlasmaVec )
164  info_orthou = check_orthogonality(PlasmaLeft, M, M, U, M, eps);
165 
166  if ( vecvt == PlasmaVec )
167  info_orthovt = check_orthogonality(PlasmaRight, N, N, VT, N, eps);
168 
169  /*
170  * WARNING: For now, Q is associated to Band tridiagonal reduction and
171  * not to the final tridiagonal reduction, so we can not call the check
172  * Need to accumulate the orthogonal transformations
173  * during the bulge chasing to be able to perform the next test!
174  */
175  if ( (vecu == PlasmaVec) && (vecvt == PlasmaVec) && 0 )
176  info_reduction = check_reduction(M, N, A2, A1, LDA, U, M, VT, N, eps);
177 
178  info_solution = check_solution(minMN, S1, S2, eps);
179 
180  if ( (info_solution == 0) & (info_orthou == 0) &
181  (info_orthovt == 0) & (info_reduction == 0) ) {
182  if (M >= N) {
183  printf("***************************************************\n");
184  printf(" ---- TESTING CGESVD .. M >= N ........... PASSED !\n");
185  printf("***************************************************\n");
186  }
187  else {
188  printf("***************************************************\n");
189  printf(" ---- TESTING CGESVD .. M < N ............ PASSED !\n");
190  printf("***************************************************\n");
191  }
192  }
193  else {
194  if (M >= N) {
195  printf("************************************************\n");
196  printf(" - TESTING CGESVD .. M >= N .. FAILED !\n");
197  printf("************************************************\n");
198  }
199  else {
200  printf("************************************************\n");
201  printf(" - TESTING CGESVD .. M < N .. FAILED !\n");
202  printf("************************************************\n");
203  }
204  }
205 
206  if ( A2 != NULL ) free(A2);
207  if ( U != NULL ) free(U);
208  if ( VT != NULL ) free(VT);
209  free(A1); free(S1); free(S2);
211 
212  return 0;
213 }
214 
215 /*-------------------------------------------------------------------
216  * Check the orthogonality of Q
217  */
218 static int check_orthogonality(int side, int M, int N, PLASMA_Complex32_t *Q, int LDQ, float eps)
219 {
220  float alpha = 1.0;
221  float beta = -1.0;
222  float normQ, result;
223  int info_ortho;
224  int minMN = min(M, N);
225  float *work = (float *)malloc(minMN*sizeof(float));
226 
227  /* Build the idendity matrix */
228  PLASMA_Complex32_t *Id = (PLASMA_Complex32_t *) malloc(minMN*minMN*sizeof(PLASMA_Complex32_t));
229  LAPACKE_claset_work(LAPACK_COL_MAJOR, 'A', minMN, minMN, 0., 1., Id, minMN);
230 
231  /* Perform Id - Q'Q */
232  if (M >= N)
233  cblas_cherk(CblasColMajor, CblasUpper, CblasConjTrans, N, M, alpha, Q, LDQ, beta, Id, N);
234  else
235  cblas_cherk(CblasColMajor, CblasUpper, CblasNoTrans, M, N, alpha, Q, LDQ, beta, Id, M);
236 
237  normQ = LAPACKE_clansy_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), 'U', minMN, Id, minMN, work);
238 
239  if (getenv("PLASMA_TESTING_VERBOSE"))
240  printf( "||Q||_oo=%f\n", normQ );
241 
242  printf("================================\n");
243  result = normQ / (M * eps);
244  if (side == PlasmaLeft)
245  {
246  printf("Checking the orthogonality of U (Left singular vectors)\n");
247  printf("||Id-U'*U||_oo / (N*eps) = %e \n", result);
248  }
249  else
250  {
251  printf("Checking the orthogonality of VT (Right singular vectors)\n");
252  printf("||Id-VT'*VT||_oo / (N*eps) = %e \n", result);
253  }
254 
255  if ( isnan(result) || isinf(result) || (result > 60.0) ) {
256  printf("-- Orthogonality is suspicious ! \n");
257  info_ortho = 1;
258  }
259  else {
260  printf("-- Orthogonality is CORRECT ! \n");
261  info_ortho = 0;
262  }
263 
264  free(work); free(Id);
265 
266  return info_ortho;
267 }
268 
269 /*------------------------------------------------------------
270  * Check the bidiagonal reduction
271  */
272 static int check_reduction(int M, int N, PLASMA_Complex32_t *A1, PLASMA_Complex32_t *A2, int LDA,
273  PLASMA_Complex32_t *U, int LDU, PLASMA_Complex32_t *VT, int LDVT, float eps )
274 {
275  PLASMA_Complex32_t alpha = 1.0;
276  PLASMA_Complex32_t beta = 0.0;
277  float Anorm, Rnorm, result;
278  int info_reduction;
279  int i, j;
280  int maxMN = max(M, N);
281 
282  PLASMA_Complex32_t *Aorig = (PLASMA_Complex32_t *)malloc(M*N*sizeof(PLASMA_Complex32_t));
283  PLASMA_Complex32_t *Residual = (PLASMA_Complex32_t *)malloc(M*N*sizeof(PLASMA_Complex32_t));
284  PLASMA_Complex32_t *B = (PLASMA_Complex32_t *)malloc(M*N*sizeof(PLASMA_Complex32_t));
285  float *work = (float *)malloc(N*sizeof(float));
286 
287  memset((void*)B, 0, M*N*sizeof(PLASMA_Complex32_t));
288 
289  /* Rebuild the B */
290  LAPACKE_clacpy_work(LAPACK_COL_MAJOR, PlasmaUpperLower, M, N, A2, LDA, B, M);
291  memset((void*)Aorig, 0, M*N*sizeof(PLASMA_Complex32_t));
292 
293  /* Compute Aorig = U * B * VT */
294  cblas_cgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, M, CBLAS_SADDR(alpha), U, LDU, B, M, CBLAS_SADDR(beta), Aorig, M);
295  cblas_cgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, N, CBLAS_SADDR(alpha), Aorig, M, VT, LDVT, CBLAS_SADDR(beta), B, M);
296 
297  /* Compute the Residual */
298  for (i = 0; i < M; i++)
299  for (j = 0 ; j < N; j++)
300  Residual[j*M+i] = A1[j*LDA+i] - B[j*M+i];
301 
302  Rnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Residual, M, work);
303  Anorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, A2, LDA, work);
304 
305  if (getenv("PLASMA_TESTING_VERBOSE"))
306  printf( "||A||_oo=%f\n||A - U*B*VT||_oo=%e\n", Anorm, Rnorm );
307 
308  result = Rnorm / ( Anorm * maxMN * eps);
309  printf("============\n");
310  printf("Checking the bidiagonal reduction \n");
311  printf("-- ||A-U*B*VT||_oo/(||A||_oo.max(M,N).eps) = %e \n", result);
312 
313  if ( isnan(result) || isinf(result) || (result > 60.0) ) {
314  printf("-- Reduction is suspicious ! \n");
315  info_reduction = 1;
316  }
317  else {
318  printf("-- Reduction is CORRECT ! \n");
319  info_reduction = 0;
320  }
321 
322  free(Aorig); free(Residual); free(B); free(work);
323 
324  return info_reduction;
325 }
326 
327 static int check_solution(int N, float *E1, float *E2, float eps)
328 {
329  int info_solution, i;
330  float resid;
331  float maxtmp;
332  float maxel = fabs( fabs(E1[0]) - fabs(E2[0]) );
333  float maxeig = max( fabs(E1[0]), fabs(E2[0]) );
334  for (i = 1; i < N; i++){
335  resid = fabs(fabs(E1[i])-fabs(E2[i]));
336  maxtmp = max(fabs(E1[i]), fabs(E2[i]));
337 
338  /* Update */
339  maxeig = max(maxtmp, maxeig);
340  maxel = max(resid, maxel );
341  }
342 
343  maxel = maxel / (maxeig * N * eps);
344  printf(" ======================================================\n");
345  printf(" Checking the singular values of A\n");
346  printf(" | D - eigcomputed | / (|D| * N * eps) : %15.3E \n", maxel );
347 
348  if ( isnan(maxel) || isinf(maxel) || (maxel > 100) ) {
349  printf("-- The singular values are suspicious ! \n");
350  info_solution = 1;
351  }
352  else{
353  printf("-- The singular values are CORRECT ! \n");
354  info_solution = 0;
355  }
356 
357  return info_solution;
358 }