PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
testing_cgemm.c
Go to the documentation of this file.
1 
16 #include <stdlib.h>
17 #include <stdio.h>
18 #include <string.h>
19 #include <math.h>
20 
21 #include <plasma.h>
22 #include <cblas.h>
23 #include <lapacke.h>
24 #include <core_blas.h>
25 #include "testing_cmain.h"
26 
27 #undef REAL
28 #define COMPLEX
29 
30 static int check_solution(PLASMA_enum transA, PLASMA_enum transB, int M, int N, int K,
31  PLASMA_Complex32_t alpha, PLASMA_Complex32_t *A, int LDA,
32  PLASMA_Complex32_t *B, int LDB,
33  PLASMA_Complex32_t beta, PLASMA_Complex32_t *Cref, PLASMA_Complex32_t *Cplasma, int LDC);
34 
35 int testing_cgemm(int argc, char **argv)
36 {
37  /* Check for number of arguments*/
38  if ( argc != 8) {
39  USAGE("GEMM", "alpha beta M N K LDA LDB LDC",
40  " - alpha : alpha coefficient\n"
41  " - beta : beta coefficient\n"
42  " - M : number of rows of matrices A and C\n"
43  " - N : number of columns of matrices B and C\n"
44  " - K : number of columns of matrix A / number of rows of matrix B\n"
45  " - LDA : leading dimension of matrix A\n"
46  " - LDB : leading dimension of matrix B\n"
47  " - LDC : leading dimension of matrix C\n");
48  return -1;
49  }
50 
51  PLASMA_Complex32_t alpha = (PLASMA_Complex32_t) atol(argv[0]);
52  PLASMA_Complex32_t beta = (PLASMA_Complex32_t) atol(argv[1]);
53  int M = atoi(argv[2]);
54  int N = atoi(argv[3]);
55  int K = atoi(argv[4]);
56  int LDA = atoi(argv[5]);
57  int LDB = atoi(argv[6]);
58  int LDC = atoi(argv[7]);
59 
60  float eps;
61  int info_solution;
62  int i, j, ta, tb;
63  int LDAxK = LDA*max(M,K);
64  int LDBxN = LDB*max(K,N);
65  int LDCxN = LDC*N;
66 
67  PLASMA_Complex32_t *A = (PLASMA_Complex32_t *)malloc(LDAxK*sizeof(PLASMA_Complex32_t));
68  PLASMA_Complex32_t *B = (PLASMA_Complex32_t *)malloc(LDBxN*sizeof(PLASMA_Complex32_t));
69  PLASMA_Complex32_t *C = (PLASMA_Complex32_t *)malloc(LDCxN*sizeof(PLASMA_Complex32_t));
70  PLASMA_Complex32_t *Cinit = (PLASMA_Complex32_t *)malloc(LDCxN*sizeof(PLASMA_Complex32_t));
71  PLASMA_Complex32_t *Cfinal = (PLASMA_Complex32_t *)malloc(LDCxN*sizeof(PLASMA_Complex32_t));
72 
73  /* Check if unable to allocate memory */
74  if ((!A)||(!B)||(!Cinit)||(!Cfinal)){
75  printf("Out of Memory \n ");
76  return -2;
77  }
78 
79  eps = LAPACKE_slamch_work('e');
80 
81  printf("\n");
82  printf("------ TESTS FOR PLASMA CGEMM ROUTINE ------- \n");
83  printf(" Size of the Matrix %d by %d\n", M, N);
84  printf("\n");
85  printf(" The matrix A is randomly generated for each test.\n");
86  printf("============\n");
87  printf(" The relative machine precision (eps) is to be %e \n",eps);
88  printf(" Computational tests pass if scaled residuals are less than 10.\n");
89 
90  /*----------------------------------------------------------
91  * TESTING CGEMM
92  */
93 
94  /* Initialize A, B, C */
95  LAPACKE_clarnv_work(IONE, ISEED, LDAxK, A);
96  LAPACKE_clarnv_work(IONE, ISEED, LDBxN, B);
97  LAPACKE_clarnv_work(IONE, ISEED, LDCxN, C);
98 
99 #ifdef COMPLEX
100  for (ta=0; ta<3; ta++) {
101  for (tb=0; tb<3; tb++) {
102 #else
103  for (ta=0; ta<2; ta++) {
104  for (tb=0; tb<2; tb++) {
105 #endif
106  for ( i = 0; i < M; i++)
107  for ( j = 0; j < N; j++)
108  Cinit[LDC*j+i] = C[LDC*j+i];
109  for ( i = 0; i < M; i++)
110  for ( j = 0; j < N; j++)
111  Cfinal[LDC*j+i] = C[LDC*j+i];
112 
113  /* PLASMA CGEMM */
114  PLASMA_cgemm(trans[ta], trans[tb], M, N, K, alpha, A, LDA, B, LDB, beta, Cfinal, LDC);
115 
116  /* Check the solution */
117  info_solution = check_solution(trans[ta], trans[tb], M, N, K,
118  alpha, A, LDA, B, LDB, beta, Cinit, Cfinal, LDC);
119 
120  if (info_solution == 0) {
121  printf("***************************************************\n");
122  printf(" ---- TESTING CGEMM (%s, %s) ............... PASSED !\n", transstr[ta], transstr[tb]);
123  printf("***************************************************\n");
124  }
125  else {
126  printf("************************************************\n");
127  printf(" - TESTING CGEMM (%s, %s) ... FAILED !\n", transstr[ta], transstr[tb]);
128  printf("************************************************\n");
129  }
130  }
131  }
132 #ifdef _UNUSED_
133  }}
134 #endif
135  free(A); free(B); free(C);
136  free(Cinit); free(Cfinal);
137 
138  return 0;
139 }
140 
141 /*--------------------------------------------------------------
142  * Check the solution
143  */
144 
145 static int check_solution(PLASMA_enum transA, PLASMA_enum transB, int M, int N, int K,
146  PLASMA_Complex32_t alpha, PLASMA_Complex32_t *A, int LDA,
147  PLASMA_Complex32_t *B, int LDB,
148  PLASMA_Complex32_t beta, PLASMA_Complex32_t *Cref, PLASMA_Complex32_t *Cplasma, int LDC)
149 {
150  int info_solution;
151  float Anorm, Bnorm, Cinitnorm, Cplasmanorm, Clapacknorm, Rnorm, result;
152  float eps;
153  PLASMA_Complex32_t beta_const;
154 
155  float *work = (float *)malloc(max(K,max(M, N))* sizeof(float));
156  int Am, An, Bm, Bn;
157 
158  beta_const = -1.0;
159 
160  if (transA == PlasmaNoTrans) {
161  Am = M; An = K;
162  } else {
163  Am = K; An = M;
164  }
165  if (transB == PlasmaNoTrans) {
166  Bm = K; Bn = N;
167  } else {
168  Bm = N; Bn = K;
169  }
170 
171  Anorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), Am, An, A, LDA, work);
172  Bnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), Bm, Bn, B, LDB, work);
173  Cinitnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Cref, LDC, work);
174  Cplasmanorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Cplasma, LDC, work);
175 
176  cblas_cgemm(CblasColMajor, (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB, M, N, K,
177  CBLAS_SADDR(alpha), A, LDA, B, LDB, CBLAS_SADDR(beta), Cref, LDC);
178 
179  Clapacknorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Cref, LDC, work);
180 
181  cblas_caxpy(LDC * N, CBLAS_SADDR(beta_const), Cplasma, 1, Cref, 1);
182 
183  Rnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Cref, LDC, work);
184 
185  eps = LAPACKE_slamch_work('e');
186 
187  printf("Rnorm %e, Anorm %e, Bnorm %e, Cinitnorm %e, Cplasmanorm %e, Clapacknorm %e\n",
188  Rnorm, Anorm, Bnorm, Cinitnorm, Cplasmanorm, Clapacknorm);
189 
190  result = Rnorm / ((Anorm + Bnorm + Cinitnorm) * N * eps);
191  printf("============\n");
192  printf("Checking the norm of the difference against reference CGEMM \n");
193  printf("-- ||Cplasma - Clapack||_oo/((||A||_oo+||B||_oo+||C||_oo).N.eps) = %e \n",
194  result);
195 
196  if ( isnan(Rnorm) || isinf(Rnorm) || isnan(result) || isinf(result) || (result > 10.0) ) {
197  printf("-- The solution is suspicious ! \n");
198  info_solution = 1;
199  }
200  else {
201  printf("-- The solution is CORRECT ! \n");
202  info_solution= 0 ;
203  }
204 
205  free(work);
206 
207  return info_solution;
208 }