62 BLAS_error(
char *rname,
int err,
int val,
int x) {
63 fprintf( stderr,
"%s %d %d %d\n", rname, err, val, x );
71 int i, j;
float anorm, v;
72 char rname[] =
"BLAS_ssy_norm";
74 if (order !=
blas_colmajor) BLAS_error( rname, -1, order, 0 );
79 for (i = 0; i < n; ++i) {
81 for (j = 0; j < i; ++j) {
82 v += fabsf( a[j + i * lda] );
84 for (j = i; j < n; ++j) {
85 v += fabsf( a[i + j * lda] );
91 BLAS_error( rname, -3, norm, 0 );
95 BLAS_error( rname, -2, norm, 0 );
99 if (res) *res = anorm;
105 int m,
int n,
const float *a,
int lda,
float *res) {
106 int i, j;
float anorm, v;
107 char rname[] =
"BLAS_sge_norm";
109 if (order !=
blas_colmajor) BLAS_error( rname, -1, order, 0 );
113 for (j = n; j; --j) {
114 for (i = m; i; --i) {
121 anorm = sqrt( anorm );
124 for (i = 0; i < m; ++i) {
126 for (j = 0; j < n; ++j) {
127 v += fabsf( a[i + j * lda] );
133 BLAS_error( rname, -2, norm, 0 );
137 if (res) *res = anorm;
142 BLAS_spow_di(
float x,
int n) {
150 for (; n; n >>= 1, x *= x) {
161 float eps = 1.0, r = 1.0, o = 1.0, b = 2.0;
162 int t = 53, l = 1024, m = -1021;
163 char rname[] =
"BLAS_sfpinfo";
165 if ((
sizeof eps) ==
sizeof(
float)) {
176 eps = BLAS_spow_di( b, -t );
178 r = BLAS_spow_di( b, m-1 );
182 o = (o * BLAS_spow_di( b, l-1 )) * b;
188 BLAS_error( rname, -1, cmach, 0 );
196 static int check_solution(
int,
int,
int,
float*,
int,
float*,
float*,
int,
float);
205 mode = atoi(argv[0]);
209 if ( ((mode == 0) && (argc != 6)) ||
210 ((mode != 0) && (argc != 7)) ){
212 USAGE(
"GELS",
"MODE M N LDA NRHS LDB [RH]",
213 " - MODE : 0: flat, 1: tree (RH needed)\n"
214 " - M : number of rows of the matrix A\n"
215 " - N : number of columns of the matrix A\n"
216 " - LDA : leading dimension of the matrix A\n"
217 " - NRHS : number of RHS\n"
218 " - LDB : leading dimension of the matrix B\n"
219 " - RH : Size of each subdomains\n");
223 int M = atoi(argv[1]);
224 int N = atoi(argv[2]);
225 int LDA = atoi(argv[3]);
226 int NRHS = atoi(argv[4]);
227 int LDB = atoi(argv[5]);
232 int info_ortho, info_solution, info_factorization;
235 int LDBxNRHS = LDB*NRHS;
237 float *A1 = (
float *)malloc(LDA*N*
sizeof(
float));
238 float *A2 = (
float *)malloc(LDA*N*
sizeof(
float));
239 float *B1 = (
float *)malloc(LDB*NRHS*
sizeof(
float));
240 float *B2 = (
float *)malloc(LDB*NRHS*
sizeof(
float));
241 float *
Q = (
float *)malloc(LDA*N*
sizeof(
float));
245 if ((!A1)||(!A2)||(!B1)||(!B2)||(!Q)){
246 printf(
"Out of Memory \n ");
265 LAPACKE_slarnv_work(
IONE,
ISEED, LDAxN, A1);
266 for (i = 0; i < M; i++)
267 for (j = 0; j < N; j++)
268 A2[LDA*j+i] = A1[LDA*j+i] ;
271 LAPACKE_slarnv_work(
IONE,
ISEED, LDBxNRHS, B1);
272 for (i = 0; i < M; i++)
273 for (j = 0; j < NRHS; j++)
274 B2[LDB*j+i] = B1[LDB*j+i] ;
276 memset((
void*)Q, 0, LDA*N*
sizeof(
float));
277 for (i = 0; i < K; i++)
292 printf(
"------ TESTS FOR PLASMA SGELS ROUTINE ------- \n");
293 printf(
" Size of the Matrix %d by %d\n", M, N);
295 printf(
" The matrix A is randomly generated for each test.\n");
296 printf(
"============\n");
297 printf(
" The relative machine precision (eps) is to be %e \n",eps);
298 printf(
" Computational tests pass if scaled residuals are less than 60.\n");
303 info_solution =
check_solution(M, N, NRHS, A1, LDA, B1, B2, LDB, eps);
305 if ((info_solution == 0)&(info_factorization == 0)&(info_ortho == 0)) {
306 printf(
"***************************************************\n");
307 printf(
" ---- TESTING SGELS ...................... PASSED !\n");
308 printf(
"***************************************************\n");
311 printf(
"************************************************\n");
312 printf(
" - TESTING SGELS ... FAILED !\n");
313 printf(
"************************************************\n");
321 LAPACKE_slarnv_work(
IONE,
ISEED, LDAxN, A1);
322 for (i = 0; i < M; i++)
323 for (j = 0; j < N; j++)
324 A2[LDA*j+i] = A1[LDA*j+i];
327 LAPACKE_slarnv_work(
IONE,
ISEED, LDBxNRHS, B1);
328 for (i = 0; i < M; i++)
329 for (j = 0; j < NRHS; j++)
330 B2[LDB*j+i] = B1[LDB*j+i];
332 memset((
void*)Q, 0, LDA*N*
sizeof(
float));
333 for (i = 0; i < K; i++)
338 printf(
"------ TESTS FOR PLASMA SGEQRF + SGEQRS ROUTINE ------- \n");
339 printf(
" Size of the Matrix %d by %d\n", M, N);
341 printf(
" The matrix A is randomly generated for each test.\n");
342 printf(
"============\n");
343 printf(
" The relative machine precision (eps) is to be %e \n", eps);
344 printf(
" Computational tests pass if scaled residuals are less than 60.\n");
354 info_solution =
check_solution(M, N, NRHS, A1, LDA, B1, B2, LDB, eps);
356 if ((info_solution == 0)&(info_factorization == 0)&(info_ortho == 0)) {
357 printf(
"***************************************************\n");
358 printf(
" ---- TESTING SGEQRF + SGEQRS ............ PASSED !\n");
359 printf(
"***************************************************\n");
362 printf(
"***************************************************\n");
363 printf(
" - TESTING SGEQRF + SGEQRS ... FAILED !\n");
364 printf(
"***************************************************\n");
369 printf(
"------ TESTS FOR PLASMA SGELQF + SGELQS ROUTINE ------- \n");
370 printf(
" Size of the Matrix %d by %d\n", M, N);
372 printf(
" The matrix A is randomly generated for each test.\n");
373 printf(
"============\n");
374 printf(
" The relative machine precision (eps) is to be %e \n", eps);
375 printf(
" Computational tests pass if scaled residuals are less than 60.\n");
385 info_solution =
check_solution(M, N, NRHS, A1, LDA, B1, B2, LDB, eps);
387 if ( (info_solution == 0) & (info_factorization == 0) & (info_ortho == 0) ) {
388 printf(
"***************************************************\n");
389 printf(
" ---- TESTING SGELQF + SGELQS ............ PASSED !\n");
390 printf(
"***************************************************\n");
393 printf(
"***************************************************\n");
394 printf(
" - TESTING SGELQF + SGELQS ... FAILED !\n");
395 printf(
"***************************************************\n");
404 LAPACKE_slarnv_work(
IONE,
ISEED, LDAxN, A1);
405 for (i = 0; i < M; i++)
406 for (j = 0; j < N; j++)
407 A2[LDA*j+i] = A1[LDA*j+i];
410 memset(B2, 0, LDB*NRHS*
sizeof(
float));
411 LAPACKE_slarnv_work(
IONE,
ISEED, LDBxNRHS, B1);
412 for (i = 0; i < M; i++)
413 for (j = 0; j < NRHS; j++)
414 B2[LDB*j+i] = B1[LDB*j+i];
417 memset((
void*)Q, 0, LDA*N*
sizeof(
float));
418 for (i = 0; i < K; i++)
423 printf(
"------ TESTS FOR PLASMA SGEQRF + SORMQR + STRSM ROUTINE ------- \n");
424 printf(
" Size of the Matrix %d by %d\n", M, N);
426 printf(
" The matrix A is randomly generated for each test.\n");
427 printf(
"============\n");
428 printf(
" The relative machine precision (eps) is to be %e \n",eps);
429 printf(
" Computational tests pass if scaled residuals are less than 60.\n");
434 PLASMA_strsm(
PlasmaLeft,
PlasmaUpper,
PlasmaNoTrans,
PlasmaNonUnit, N, NRHS, 1.0, A2, LDA, B2, LDB);
438 printf(
"------ TESTS FOR PLASMA SGELQF + SORMLQ + STRSM ROUTINE ------- \n");
439 printf(
" Size of the Matrix %d by %d\n", M, N);
441 printf(
" The matrix A is randomly generated for each test.\n");
442 printf(
"============\n");
443 printf(
" The relative machine precision (eps) is to be %e \n",eps);
444 printf(
" Computational tests pass if scaled residuals are less than 60.\n");
447 PLASMA_strsm(
PlasmaLeft,
PlasmaLower,
PlasmaNoTrans,
PlasmaNonUnit, M, NRHS, 1.0, A2, LDA, B2, LDB);
455 info_solution =
check_solution(M, N, NRHS, A1, LDA, B1, B2, LDB, eps);
457 if ( (info_solution == 0) & (info_factorization == 0) & (info_ortho == 0) ) {
459 printf(
"***************************************************\n");
460 printf(
" ---- TESTING SGEQRF + SORMQR + STRSM .... PASSED !\n");
461 printf(
"***************************************************\n");
464 printf(
"***************************************************\n");
465 printf(
" ---- TESTING SGELQF + STRSM + SORMLQ .... PASSED !\n");
466 printf(
"***************************************************\n");
471 printf(
"***************************************************\n");
472 printf(
" - TESTING SGEQRF + SORMQR + STRSM ... FAILED !\n");
473 printf(
"***************************************************\n");
476 printf(
"***************************************************\n");
477 printf(
" - TESTING SGELQF + STRSM + SORMLQ ... FAILED !\n");
478 printf(
"***************************************************\n");
482 free(A1); free(A2); free(B1); free(B2); free(Q); free(T);
497 int minMN =
min(M, N);
499 float *work = (
float *)malloc(minMN*
sizeof(
float));
505 float *Id = (
float *) malloc(minMN*minMN*
sizeof(
float));
506 memset((
void*)Id, 0, minMN*minMN*
sizeof(
float));
507 for (i = 0; i < minMN; i++)
508 Id[i*minMN+i] = (
float)1.0;
512 cblas_ssyrk(
CblasColMajor,
CblasUpper,
CblasTrans, N, M, alpha, Q, LDQ, beta, Id, N);
514 cblas_ssyrk(
CblasColMajor,
CblasUpper,
CblasNoTrans, M, N, alpha, Q, LDQ, beta, Id, M);
518 printf(
"============\n");
519 printf(
"Checking the orthogonality of Q \n");
520 printf(
"||Id-Q'*Q||_oo / (N*eps) = %e \n", normQ/(minMN*eps));
522 if ( isnan(normQ / (minMN * eps)) || isinf(normQ / (minMN * eps)) || (normQ / (minMN * eps) > 60.0) ) {
523 printf(
"-- Orthogonality is suspicious ! \n");
527 printf(
"-- Orthogonality is CORRECT ! \n");
531 free(work); free(Id);
540 static int check_factorization(
int M,
int N,
float *A1,
float *A2,
int LDA,
float *Q,
float eps )
544 int info_factorization;
547 float *Ql = (
float *)malloc(M*N*
sizeof(
float));
548 float *Residual = (
float *)malloc(M*N*
sizeof(
float));
549 float *work = (
float *)malloc(
max(M,N)*
sizeof(float));
556 float *R = (
float *)malloc(N*N*
sizeof(
float));
557 memset((
void*)R, 0, N*N*
sizeof(
float));
558 LAPACKE_slacpy_work(LAPACK_COL_MAJOR,
'u', M, N, A2, LDA, R, N);
561 memset((
void*)Ql, 0, M*N*
sizeof(
float));
562 cblas_sgemm(
CblasColMajor,
CblasNoTrans,
CblasNoTrans, M, N, N, (alpha), Q, LDA, R, N, (beta), Ql, M);
567 float *
L = (
float *)malloc(M*M*
sizeof(
float));
568 memset((
void*)L, 0, M*M*
sizeof(
float));
569 LAPACKE_slacpy_work(LAPACK_COL_MAJOR,
'l', M, N, A2, LDA, L, M);
572 memset((
void*)Ql, 0, M*N*
sizeof(
float));
573 cblas_sgemm(
CblasColMajor,
CblasNoTrans,
CblasNoTrans, M, N, M, (alpha), L, M, Q, LDA, (beta), Ql, M);
578 for (i = 0; i < M; i++)
579 for (j = 0 ; j < N; j++)
580 Residual[j*M+i] = A1[j*LDA+i]-Ql[j*M+i];
586 printf(
"============\n");
587 printf(
"Checking the QR Factorization \n");
588 printf(
"-- ||A-QR||_oo/(||A||_oo.N.eps) = %e \n",Rnorm/(Anorm*N*eps));
591 printf(
"============\n");
592 printf(
"Checking the LQ Factorization \n");
593 printf(
"-- ||A-LQ||_oo/(||A||_oo.N.eps) = %e \n",Rnorm/(Anorm*N*eps));
596 if (isnan(Rnorm / (Anorm * N *eps)) || isinf(Rnorm / (Anorm * N *eps)) || (Rnorm / (Anorm * N * eps) > 60.0) ) {
597 printf(
"-- Factorization is suspicious ! \n");
598 info_factorization = 1;
601 printf(
"-- Factorization is CORRECT ! \n");
602 info_factorization = 0;
605 free(work); free(Ql); free(Residual);
607 return info_factorization;
614 static int check_solution(
int M,
int N,
int NRHS,
float *A1,
int LDA,
float *B1,
float *B2,
int LDB,
float eps)
617 float Rnorm, Anorm, Xnorm, Bnorm;
620 float *work = (
float *)malloc(
max(M, N)*
sizeof(float));
629 cblas_sgemm(
CblasColMajor,
CblasNoTrans,
CblasNoTrans, M, NRHS, N, (alpha), A1, LDA, B2, LDB, (beta), B1, LDB);
632 float *Residual = (
float *)malloc(M*NRHS*
sizeof(
float));
633 memset((
void*)Residual, 0, M*NRHS*
sizeof(
float));
634 cblas_sgemm(
CblasColMajor,
CblasTrans,
CblasNoTrans, N, NRHS, M, (alpha), A1, LDA, B1, LDB, (beta), Residual, M);
639 float *Residual = (
float *)malloc(N*NRHS*
sizeof(
float));
640 memset((
void*)Residual, 0, N*NRHS*
sizeof(
float));
641 cblas_sgemm(
CblasColMajor,
CblasTrans,
CblasNoTrans, N, NRHS, M, (alpha), A1, LDA, B1, LDB, (beta), Residual, N);
646 if (getenv(
"PLASMA_TESTING_VERBOSE"))
647 printf(
"||A||_oo=%f\n||X||_oo=%f\n||B||_oo=%f\n||A X - B||_oo=%e\n", Anorm, Xnorm, Bnorm, Rnorm );
649 result = Rnorm / ( (Anorm*Xnorm+Bnorm)*N*eps ) ;
650 printf(
"============\n");
651 printf(
"Checking the Residual of the solution \n");
652 printf(
"-- ||Ax-B||_oo/((||A||_oo||x||_oo+||B||_oo).N.eps) = %e \n", result);
654 if ( isnan(Xnorm) || isinf(Xnorm) || isnan(result) || isinf(result) || (result > 60.0) ) {
655 printf(
"-- The solution is suspicious ! \n");
659 printf(
"-- The solution is CORRECT ! \n");
663 return info_solution;