PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
testing_cher2k.c
Go to the documentation of this file.
1 
15 #include <stdlib.h>
16 #include <stdio.h>
17 #include <string.h>
18 #include <math.h>
19 
20 #include <plasma.h>
21 #include <cblas.h>
22 #include <lapacke.h>
23 #include <core_blas.h>
24 #include "testing_cmain.h"
25 
26 static int check_solution(PLASMA_enum uplo, PLASMA_enum trans, int N, int K,
27  PLASMA_Complex32_t alpha, PLASMA_Complex32_t *A, int LDA,
28  PLASMA_Complex32_t *B, int LDB,
29  float beta, PLASMA_Complex32_t *Cref, PLASMA_Complex32_t *Cplasma, int LDC);
30 
31 
32 int testing_cher2k(int argc, char **argv)
33 {
34  /* Check for number of arguments*/
35  if ( argc != 7 ){
36  USAGE("HER2K", "alpha beta M N LDA LDB LDC",
37  " - alpha : alpha coefficient\n"
38  " - beta : beta coefficient\n"
39  " - N : number of columns and rows of matrix C and number of row of matrix A and B\n"
40  " - K : number of columns of matrix A and B\n"
41  " - LDA : leading dimension of matrix A\n"
42  " - LDB : leading dimension of matrix B\n"
43  " - LDC : leading dimension of matrix C\n");
44  return -1;
45  }
46 
47  PLASMA_Complex32_t alpha = (PLASMA_Complex32_t) atol(argv[0]);
48  float beta = (float) atol(argv[1]);
49  int N = atoi(argv[2]);
50  int K = atoi(argv[3]);
51  int LDA = atoi(argv[4]);
52  int LDB = atoi(argv[5]);
53  int LDC = atoi(argv[6]);
54  int NKmax = max(N, K);
55 
56  float eps;
57  int info_solution;
58  int u, t;
59  size_t LDAxK = LDA*NKmax;
60  size_t LDBxK = LDB*NKmax;
61  size_t LDCxN = LDC*N;
62 
63  PLASMA_Complex32_t *A = (PLASMA_Complex32_t *)malloc(LDAxK*sizeof(PLASMA_Complex32_t));
64  PLASMA_Complex32_t *B = (PLASMA_Complex32_t *)malloc(LDBxK*sizeof(PLASMA_Complex32_t));
65  PLASMA_Complex32_t *C = (PLASMA_Complex32_t *)malloc(LDCxN*sizeof(PLASMA_Complex32_t));
66  PLASMA_Complex32_t *Cinit = (PLASMA_Complex32_t *)malloc(LDCxN*sizeof(PLASMA_Complex32_t));
67  PLASMA_Complex32_t *Cfinal = (PLASMA_Complex32_t *)malloc(LDCxN*sizeof(PLASMA_Complex32_t));
68 
69  /* Check if unable to allocate memory */
70  if ( (!A) || (!B) || (!Cinit) || (!Cfinal) ){
71  printf("Out of Memory \n ");
72  return -2;
73  }
74 
75  eps = LAPACKE_slamch_work('e');
76 
77  printf("\n");
78  printf("------ TESTS FOR PLASMA CHER2K ROUTINE ------- \n");
79  printf(" Size of the Matrix C %d by %d\n", N, K);
80  printf("\n");
81  printf(" The matrix A is randomly generated for each test.\n");
82  printf("============\n");
83  printf(" The relative machine precision (eps) is to be %e \n",eps);
84  printf(" Computational tests pass if scaled residuals are less than 10.\n");
85 
86  /*----------------------------------------------------------
87  * TESTING CHER2K
88  */
89 
90  /* Initialize A,B */
91  LAPACKE_clarnv_work(IONE, ISEED, LDAxK, A);
92  LAPACKE_clarnv_work(IONE, ISEED, LDBxK, B);
93 
94  /* Initialize C */
95  PLASMA_cplghe( (float)0., N, C, LDC, 51 );
96 
97  for (u=0; u<2; u++) {
98  for (t=0; t<3; t++) {
99  if (trans[t] == PlasmaTrans) continue;
100 
101  memcpy(Cinit, C, LDCxN*sizeof(PLASMA_Complex32_t));
102  memcpy(Cfinal, C, LDCxN*sizeof(PLASMA_Complex32_t));
103 
104  /* PLASMA CHER2K */
105  PLASMA_cher2k(uplo[u], trans[t], N, K, alpha, A, LDA, B, LDB, beta, Cfinal, LDC);
106 
107  /* Check the solution */
108  info_solution = check_solution(uplo[u], trans[t], N, K,
109  alpha, A, LDA, B, LDB, beta, Cinit, Cfinal, LDC);
110 
111  if (info_solution == 0) {
112  printf("***************************************************\n");
113  printf(" ---- TESTING CHER2K (%5s, %s) ........... PASSED !\n", uplostr[u], transstr[t]);
114  printf("***************************************************\n");
115  }
116  else {
117  printf("************************************************\n");
118  printf(" - TESTING CHER2K (%5s, %s) ... FAILED !\n", uplostr[u], transstr[t]);
119  printf("************************************************\n");
120  }
121  }
122  }
123 
124  free(A); free(B); free(C);
125  free(Cinit); free(Cfinal);
126 
127  return 0;
128 }
129 
130 /*--------------------------------------------------------------
131  * Check the solution
132  */
133 
134 static int check_solution(PLASMA_enum uplo, PLASMA_enum trans, int N, int K,
135  PLASMA_Complex32_t alpha, PLASMA_Complex32_t *A, int LDA,
136  PLASMA_Complex32_t *B, int LDB,
137  float beta, PLASMA_Complex32_t *Cref, PLASMA_Complex32_t *Cplasma, int LDC)
138 {
139  int info_solution;
140  float Anorm, Bnorm, Cinitnorm, Cplasmanorm, Clapacknorm, Rnorm, result;
141  float eps;
142  PLASMA_Complex32_t beta_const;
143 
144  float *work = (float *)malloc(max(N, K)* sizeof(float));
145 
146  beta_const = -1.0;
147  Anorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm),
148  (trans == PlasmaNoTrans) ? N : K,
149  (trans == PlasmaNoTrans) ? K : N, A, LDA, work);
150  Bnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm),
151  (trans == PlasmaNoTrans) ? N : K,
152  (trans == PlasmaNoTrans) ? K : N, B, LDB, work);
153  Cinitnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Cref, LDC, work);
154  Cplasmanorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Cplasma, LDC, work);
155 
157  N, K, CBLAS_SADDR(alpha), A, LDA, B, LDB, (beta), Cref, LDC);
158 
159  Clapacknorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Cref, LDC, work);
160 
161  cblas_caxpy(LDC*N, CBLAS_SADDR(beta_const), Cplasma, 1, Cref, 1);
162 
163  Rnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Cref, LDC, work);
164 
165  eps = LAPACKE_slamch_work('e');
166 
167  printf("Rnorm %e, Anorm %e, Cinitnorm %e, Cplasmanorm %e, Clapacknorm %e\n",
168  Rnorm, Anorm, Cinitnorm, Cplasmanorm, Clapacknorm);
169 
170  result = Rnorm / ((Anorm + Bnorm + Cinitnorm) * N * eps);
171  printf("============\n");
172  printf("Checking the norm of the difference against reference CHER2K \n");
173  printf("-- ||Cplasma - Clapack||_oo/((||A||_oo+||C||_oo).N.eps) = %e \n", result);
174 
175  if ( isnan(Rnorm) || isinf(Rnorm) || isnan(result) || isinf(result) || (result > 10.0) ) {
176  printf("-- The solution is suspicious ! \n");
177  info_solution = 1;
178  }
179  else {
180  printf("-- The solution is CORRECT ! \n");
181  info_solution= 0 ;
182  }
183 
184  free(work);
185 
186  return info_solution;
187 }