30 static int check_reduction(
int,
int,
int,
int,
float*,
float*,
int,
float*,
int,
float*,
float);
32 static int check_solution(
int,
int,
int,
float*,
int,
float*,
float*,
int,
float);
40 USAGE(
"HEGV",
"N LDA LDB",
41 " - N : size of the matrices A and B\n"
42 " - LDA : leading dimension of the matrix A\n"
43 " - LDB : leading dimension of the matrix B\n");
47 float eps = LAPACKE_slamch_work(
'e');
49 int N = atoi(argv[0]);
50 int LDA = atoi(argv[1]);
51 int LDB = atoi(argv[2]);
58 int info_solution = 0;
59 int info_reduction = 0;
62 float *A1 = (
float *)malloc(LDAxN*
sizeof(
float));
63 float *A2 = (
float *)malloc(LDAxN*
sizeof(
float));
64 float *B1 = (
float *)malloc(LDBxN*
sizeof(
float));
65 float *B2 = (
float *)malloc(LDBxN*
sizeof(
float));
66 float *
Q = (
float *)malloc(LDQxN*
sizeof(
float));
67 float *Ainit = (
float *)malloc(LDAxN*
sizeof(
float));
68 float *Binit = (
float *)malloc(LDBxN*
sizeof(
float));
69 float *W1 = (
float *)malloc(N*
sizeof(
float));
70 float *W2 = (
float *)malloc(N*
sizeof(
float));
71 float *work = (
float *)malloc(3*N*
sizeof(
float));
75 if ((!A1)||(!A2)||(!B1)||(!B2)||(!Q)||(!Ainit)||(!Binit)){
76 printf(
"Out of Memory \n ");
96 LAPACKE_slacpy_work(LAPACK_COL_MAJOR,
'A', N, N, A1, LDA, Ainit, LDA);
100 LAPACKE_slacpy_work(LAPACK_COL_MAJOR,
'A', N, N, B1, LDB, Binit, LDB);
103 printf(
"------ TESTS FOR PLASMA SSYGV ROUTINE ------- \n");
104 printf(
" Size of the Matrix %d by %d\n", N, N);
106 printf(
" The matrix A is randomly generated for each test.\n");
107 printf(
"============\n");
108 printf(
" The relative machine precision (eps) is to be %e \n",eps);
109 printf(
" Computational tests pass if scaled residuals are less than 60.\n");
115 for (i=0; i<3; i++) {
116 for (u=0; u<2; u++) {
117 LAPACKE_slaset_work(LAPACK_COL_MAJOR,
'A', LDA, N, 0., 1., Q, LDA);
119 memcpy(A2, Ainit, LDAxN*
sizeof(
float));
120 memcpy(B2, Binit, LDBxN*
sizeof(
float));
122 PLASMA_ssygv(
itype[i], vec,
uplo[u], N, A2, LDA, B2, LDB, W2, T, Q, LDQ);
132 info_reduction = check_reduction(
itype[i],
uplo[u], N, 1, A1, A2, LDA, B2, LDB, Q, eps);
134 memcpy(A1, Ainit, LDAxN*
sizeof(
float));
135 memcpy(B1, Binit, LDBxN*
sizeof(
float));
137 LAPACKE_ssygv( LAPACK_COL_MAJOR,
139 N, A1, LDA, B1, LDB, W1);
144 if ( (info_ortho == 0) & (info_reduction == 0) & (info_solution == 0)) {
145 printf(
"***************************************************\n");
146 printf(
" ---- TESTING SSYGV (%s, %s) ...................... PASSED !\n",
itypestr[i],
uplostr[u]);
147 printf(
"***************************************************\n");
150 printf(
"************************************************\n");
151 printf(
" - TESTING SSYGV (%s, %s) ... FAILED !\n",
itypestr[i],
uplostr[u]);
152 printf(
"************************************************\n");
181 int minMN =
min(M, N);
182 float *work = (
float *)malloc(minMN*
sizeof(
float));
185 float *Id = (
float *) malloc(minMN*minMN*
sizeof(
float));
186 LAPACKE_slaset_work(LAPACK_COL_MAJOR,
'A', minMN, minMN, 0., 1., Id, minMN);
190 cblas_ssyrk(
CblasColMajor,
CblasUpper,
CblasTrans, N, M, alpha, Q, LDQ, beta, Id, N);
192 cblas_ssyrk(
CblasColMajor,
CblasUpper,
CblasNoTrans, M, N, alpha, Q, LDQ, beta, Id, M);
196 result = normQ / (minMN * eps);
197 printf(
"============\n");
198 printf(
"Checking the orthogonality of Q \n");
199 printf(
"||Id-Q'*Q||_oo / (minMN*eps) = %e \n", result);
201 if ( isnan(result) || isinf(result) || (result > 60.0) ) {
202 printf(
"-- Orthogonality is suspicious ! \n");
206 printf(
"-- Orthogonality is CORRECT ! \n");
210 free(work); free(Id);
219 static int check_reduction(
int itype,
int uplo,
int N,
int bw,
220 float *A1,
float *A2,
int LDA,
221 float *B2,
int LDB,
float *Q,
float eps )
225 float Anorm, Rnorm, result;
230 float *Aorig = (
float *)malloc(N*N*
sizeof(
float));
231 float *Residual = (
float *)malloc(N*N*
sizeof(
float));
232 float *
T = (
float *)malloc(N*N*
sizeof(
float));
233 float *work = (
float *)malloc(N*
sizeof(
float));
235 memset((
void*)T, 0, N*N*
sizeof(
float));
236 LAPACKE_slacpy_work(LAPACK_COL_MAJOR,
' ', N, N, A1, LDA, Residual, N);
239 LAPACKE_slacpy_work(LAPACK_COL_MAJOR,
lapack_const(uplo), N, N, A2, LDA, T, N);
243 for (i = bw+1; i < N; i++)
244 for (j = 0 ; (j < N) && (j < i-bw); j++)
248 for (i = 0; i < N; i++)
249 for (j = 0 ; j < i; j++)
250 T[i*N+j] = (T[j*N+i]);
253 for (j = bw+1; j < N; j++)
254 for (i = 0 ; (i < N) && (i < j-bw); i++)
257 for (i = 0; i < N; i++)
258 for (j = i+1 ; j < N; j++)
259 T[i*N+j] = (T[j*N+i]);
262 memset((
void*)Aorig, 0, N*N*
sizeof(
float));
269 cblas_sgemm(
CblasColMajor,
CblasNoTrans,
CblasNoTrans, N, N, N, (alpha), Q, LDA, T, N, (beta), Aorig, N);
270 cblas_sgemm(
CblasColMajor,
CblasNoTrans,
CblasTrans, N, N, N, (alpha), Aorig, N, Q, LDA, (beta), T, N);
272 LAPACKE_slacpy_work(LAPACK_COL_MAJOR,
PlasmaUpperLower, N, N, T, N, Aorig, N);
274 cblas_strmm(
CblasColMajor,
CblasLeft,
CblasLower,
CblasNoTrans,
CblasNonUnit, N, N, (alpha), B2, LDB, Aorig, N);
275 cblas_strmm(
CblasColMajor,
CblasRight,
CblasLower,
CblasTrans,
CblasNonUnit, N, N, (alpha), B2, LDB, Aorig, N);
282 cblas_sgemm(
CblasColMajor,
CblasTrans,
CblasNoTrans, N, N, N, (alpha), Q, LDA, T, N, (beta), Aorig, N);
283 cblas_sgemm(
CblasColMajor,
CblasNoTrans,
CblasNoTrans, N, N, N, (alpha), Aorig, N, Q, LDA, (beta), T, N);
285 LAPACKE_slacpy_work(LAPACK_COL_MAJOR,
'A', N, N, T, N, Aorig, N);
287 cblas_strmm(
CblasColMajor,
CblasLeft,
CblasUpper,
CblasTrans,
CblasNonUnit, N, N, (alpha), B2, LDB, Aorig, N);
288 cblas_strmm(
CblasColMajor,
CblasRight,
CblasUpper,
CblasNoTrans,
CblasNonUnit, N, N, (alpha), B2, LDB, Aorig, N);
293 str =
"inv(L')*Q*A2*Q'*inv(L)";
296 cblas_sgemm(
CblasColMajor,
CblasNoTrans,
CblasNoTrans, N, N, N, (alpha), Q, LDA, T, N, (beta), Aorig, N);
297 cblas_sgemm(
CblasColMajor,
CblasNoTrans,
CblasTrans, N, N, N, (alpha), Aorig, N, Q, LDA, (beta), T, N );
299 LAPACKE_slacpy_work(LAPACK_COL_MAJOR,
'A', N, N, T, N, Aorig, N);
301 cblas_strsm(
CblasColMajor,
CblasLeft,
CblasLower,
CblasTrans,
CblasNonUnit, N, N, (alpha), B2, LDB, Aorig, N);
302 cblas_strsm(
CblasColMajor,
CblasRight,
CblasLower,
CblasNoTrans,
CblasNonUnit, N, N, (alpha), B2, LDB, Aorig, N);
305 str =
"inv(U)*Q*A2*Q'*inv(U')";
308 cblas_sgemm(
CblasColMajor,
CblasTrans,
CblasNoTrans, N, N, N, (alpha), Q, LDA, T, N, (beta), Aorig, N);
309 cblas_sgemm(
CblasColMajor,
CblasNoTrans,
CblasNoTrans, N, N, N, (alpha), Aorig, N, Q, LDA, (beta), T, N);
311 LAPACKE_slacpy_work(LAPACK_COL_MAJOR,
'A', N, N, T, N, Aorig, N);
313 cblas_strsm(
CblasColMajor,
CblasLeft,
CblasUpper,
CblasNoTrans,
CblasNonUnit, N, N, (alpha), B2, LDB, Aorig, N);
314 cblas_strsm(
CblasColMajor,
CblasRight,
CblasUpper,
CblasTrans,
CblasNonUnit, N, N, (alpha), B2, LDB, Aorig, N);
319 for (i = 0; i < N; i++)
320 for (j = 0 ; j < N; j++)
321 Residual[j*N+i] = A1[j*LDA+i]-Aorig[j*N+i];
326 result = Rnorm / (Anorm * N *eps);
327 printf(
"============\n");
328 printf(
"Checking the tridiagonal reduction \n");
329 printf(
"-- ||A-%s||_oo/(||A||_oo.N.eps) = %e \n", str, result );
331 if (isnan(result) || isinf(result) || (result > 60.0) ) {
332 printf(
"-- Reduction is suspicious ! \n");
336 printf(
"-- Reduction is CORRECT ! \n");
340 free(Aorig); free(Residual); free(T); free(work);
342 return info_reduction;
350 int info_solution, i;
353 float maxel = fabs( fabs(E1[0]) - fabs(E2[0]) );
354 float maxeig =
max( fabs(E1[0]), fabs(E2[0]) );
355 for (i = 1; i < N; i++){
356 resid = fabs(fabs(E1[i])-fabs(E2[i]));
357 maxtmp =
max(fabs(E1[i]), fabs(E2[i]));
360 maxeig =
max(maxtmp, maxeig);
361 maxel =
max(resid, maxel );
364 maxel = maxel / (maxeig * N * eps);
365 printf(
" ======================================================\n");
366 printf(
" | D - eigcomputed | / (|D| * N * eps) : %15.3E \n", maxel );
367 printf(
" ======================================================\n");
369 printf(
"============\n");
370 printf(
"Checking the eigenvalues of A\n");
371 if ( isnan(maxel) || isinf(maxel) || (maxel > 100) ) {
372 printf(
"-- The eigenvalues are suspicious ! \n");
376 printf(
"-- The eigenvalues are CORRECT ! \n");
380 return info_solution;
384 static int check_solution(
int M,
int N,
int NRHS,
float *A1,
int LDA,
385 float *B1,
float *B2,
int LDB,
float eps)
391 int maxMN =
max(M, N);
392 float Rnorm, Anorm, Xnorm, Bnorm, result;
393 float *work = (
float *)malloc(
max(maxMN, NRHS)*
sizeof(float));
404 Residual = (
float *)malloc(maxMN*NRHS*
sizeof(
float));
405 memset((
void*)Residual, 0, maxMN*NRHS*
sizeof(
float));
409 (beta), Residual, maxMN);
415 if (getenv(
"PLASMA_TESTING_VERBOSE"))
416 printf(
"||A||_oo=%f\n||X||_oo=%f\n||B||_oo=%f\n||A X - B||_oo=%e\n", Anorm, Xnorm, Bnorm, Rnorm );
418 result = Rnorm / ( (Anorm*Xnorm+Bnorm)*N*eps ) ;
419 printf(
"============\n");
420 printf(
"Checking the Residual of the solution \n");
421 printf(
"-- ||Ax-B||_oo/((||A||_oo||x||_oo+||B||_oo).N.eps) = %e \n", result);
423 if ( isnan(Xnorm) || isinf(Xnorm) || isnan(result) || isinf(result) || (result > 60.0) ) {
424 printf(
"-- The solution is suspicious ! \n");
428 printf(
"-- The solution is CORRECT ! \n");
431 return info_solution;