PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
dauxiliary.c
Go to the documentation of this file.
1 
6 #include <stdlib.h>
7 #include <stdio.h>
8 #include <string.h>
9 #include <math.h>
10 #include <cblas.h>
11 #include <lapacke.h>
12 #include <plasma.h>
13 #include <core_blas.h>
14 #include "auxiliary.h"
15 
16 /*-------------------------------------------------------------------
17  * Check the orthogonality of Q
18  */
19 
20 int d_check_orthogonality(int M, int N, int LDQ, double *Q)
21 {
22  double alpha, beta;
23  double normQ;
24  int info_ortho;
25  int i;
26  int minMN = min(M, N);
27  double eps;
28  double *work = (double *)malloc(minMN*sizeof(double));
29 
30  eps = LAPACKE_dlamch_work('e');
31  alpha = 1.0;
32  beta = -1.0;
33 
34  /* Build the idendity matrix USE DLASET?*/
35  double *Id = (double *) malloc(minMN*minMN*sizeof(double));
36  memset((void*)Id, 0, minMN*minMN*sizeof(double));
37  for (i = 0; i < minMN; i++)
38  Id[i*minMN+i] = (double)1.0;
39 
40  /* Perform Id - Q'Q */
41  if (M >= N)
42  cblas_dsyrk(CblasColMajor, CblasUpper, CblasTrans, N, M, alpha, Q, LDQ, beta, Id, N);
43  else
44  cblas_dsyrk(CblasColMajor, CblasUpper, CblasNoTrans, M, N, alpha, Q, LDQ, beta, Id, M);
45 
46  normQ = LAPACKE_dlansy_work(LAPACK_COL_MAJOR, 'i', 'u', minMN, Id, minMN, work);
47 
48  printf("============\n");
49  printf("Checking the orthogonality of Q \n");
50  printf("||Id-Q'*Q||_oo / (N*eps) = %e \n",normQ/(minMN*eps));
51 
52  if ( isnan(normQ / (minMN * eps)) || (normQ / (minMN * eps) > 10.0) ) {
53  printf("-- Orthogonality is suspicious ! \n");
54  info_ortho=1;
55  }
56  else {
57  printf("-- Orthogonality is CORRECT ! \n");
58  info_ortho=0;
59  }
60 
61  free(work); free(Id);
62 
63  return info_ortho;
64 }
65 
66 /*------------------------------------------------------------
67  * Check the factorization QR
68  */
69 
70 int d_check_QRfactorization(int M, int N, double *A1, double *A2, int LDA, double *Q)
71 {
72  double Anorm, Rnorm;
73  double alpha, beta;
74  int info_factorization;
75  int i,j;
76  double eps;
77 
78  eps = LAPACKE_dlamch_work('e');
79 
80  double *Ql = (double *)malloc(M*N*sizeof(double));
81  double *Residual = (double *)malloc(M*N*sizeof(double));
82  double *work = (double *)malloc(max(M,N)*sizeof(double));
83 
84  alpha=1.0;
85  beta=0.0;
86 
87  if (M >= N) {
88  /* Extract the R */
89  double *R = (double *)malloc(N*N*sizeof(double));
90  memset((void*)R, 0, N*N*sizeof(double));
91  LAPACKE_dlacpy_work(LAPACK_COL_MAJOR,'u', M, N, A2, LDA, R, N);
92 
93  /* Perform Ql=Q*R */
94  memset((void*)Ql, 0, M*N*sizeof(double));
95  cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, N, (alpha), Q, LDA, R, N, (beta), Ql, M);
96  free(R);
97  }
98  else {
99  /* Extract the L */
100  double *L = (double *)malloc(M*M*sizeof(double));
101  memset((void*)L, 0, M*M*sizeof(double));
102  LAPACKE_dlacpy_work(LAPACK_COL_MAJOR,'l', M, N, A2, LDA, L, M);
103 
104  /* Perform Ql=LQ */
105  memset((void*)Ql, 0, M*N*sizeof(double));
106  cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, M, (alpha), L, M, Q, LDA, (beta), Ql, M);
107  free(L);
108  }
109 
110  /* Compute the Residual */
111  for (i = 0; i < M; i++)
112  for (j = 0 ; j < N; j++)
113  Residual[j*M+i] = A1[j*LDA+i]-Ql[j*M+i];
114 
115  Rnorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, 'i', M, N, Residual, M, work);
116  Anorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, 'i', M, N, A2, LDA, work);
117 
118  if (M >= N) {
119  printf("============\n");
120  printf("Checking the QR Factorization \n");
121  printf("-- ||A-QR||_oo/(||A||_oo.N.eps) = %e \n",Rnorm/(Anorm*N*eps));
122  }
123  else {
124  printf("============\n");
125  printf("Checking the LQ Factorization \n");
126  printf("-- ||A-LQ||_oo/(||A||_oo.N.eps) = %e \n",Rnorm/(Anorm*N*eps));
127  }
128 
129  if (isnan(Rnorm / (Anorm * N *eps)) || (Rnorm / (Anorm * N * eps) > 10.0) ) {
130  printf("-- Factorization is suspicious ! \n");
131  info_factorization = 1;
132  }
133  else {
134  printf("-- Factorization is CORRECT ! \n");
135  info_factorization = 0;
136  }
137 
138  free(work); free(Ql); free(Residual);
139 
140  return info_factorization;
141 }
142 
143 /*------------------------------------------------------------------------
144  * Check the factorization of the matrix A2
145  */
146 
147 int d_check_LLTfactorization(int N, double *A1, double *A2, int LDA, int uplo)
148 {
149  double Anorm, Rnorm;
150  double alpha;
151  int info_factorization;
152  int i,j;
153  double eps;
154 
155  eps = LAPACKE_dlamch_work('e');
156 
157  double *Residual = (double *)malloc(N*N*sizeof(double));
158  double *L1 = (double *)malloc(N*N*sizeof(double));
159  double *L2 = (double *)malloc(N*N*sizeof(double));
160  double *work = (double *)malloc(N*sizeof(double));
161 
162  memset((void*)L1, 0, N*N*sizeof(double));
163  memset((void*)L2, 0, N*N*sizeof(double));
164 
165  alpha= 1.0;
166 
167  LAPACKE_dlacpy_work(LAPACK_COL_MAJOR,' ', N, N, A1, LDA, Residual, N);
168 
169  /* Dealing with L'L or U'U */
170  if (uplo == PlasmaUpper){
171  LAPACKE_dlacpy_work(LAPACK_COL_MAJOR,'u', N, N, A2, LDA, L1, N);
172  LAPACKE_dlacpy_work(LAPACK_COL_MAJOR,'u', N, N, A2, LDA, L2, N);
173  cblas_dtrmm(CblasColMajor, CblasLeft, CblasUpper, CblasTrans, CblasNonUnit, N, N, (alpha), L1, N, L2, N);
174  }
175  else{
176  LAPACKE_dlacpy_work(LAPACK_COL_MAJOR,'l', N, N, A2, LDA, L1, N);
177  LAPACKE_dlacpy_work(LAPACK_COL_MAJOR,'l', N, N, A2, LDA, L2, N);
178  cblas_dtrmm(CblasColMajor, CblasRight, CblasLower, CblasTrans, CblasNonUnit, N, N, (alpha), L1, N, L2, N);
179  }
180 
181  /* Compute the Residual || A -L'L|| */
182  for (i = 0; i < N; i++)
183  for (j = 0; j < N; j++)
184  Residual[j*N+i] = L2[j*N+i] - Residual[j*N+i];
185 
186  Rnorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, 'i', N, N, Residual, N, work);
187  Anorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, 'i', N, N, A1, LDA, work);
188 
189  printf("============\n");
190  printf("Checking the Cholesky Factorization \n");
191  printf("-- ||L'L-A||_oo/(||A||_oo.N.eps) = %e \n",Rnorm/(Anorm*N*eps));
192 
193  if ( isnan(Rnorm/(Anorm*N*eps)) || (Rnorm/(Anorm*N*eps) > 10.0) ){
194  printf("-- Factorization is suspicious ! \n");
195  info_factorization = 1;
196  }
197  else{
198  printf("-- Factorization is CORRECT ! \n");
199  info_factorization = 0;
200  }
201 
202  free(Residual); free(L1); free(L2); free(work);
203 
204  return info_factorization;
205 }
206 
207 /*--------------------------------------------------------------
208  * Check the gemm
209  */
210 double d_check_gemm(PLASMA_enum transA, PLASMA_enum transB, int M, int N, int K,
211  double alpha, double *A, int LDA,
212  double *B, int LDB,
213  double beta, double *Cplasma,
214  double *Cref, int LDC,
215  double *Cinitnorm, double *Cplasmanorm, double *Clapacknorm )
216 {
217  double beta_const = -1.0;
218  double Rnorm;
219  double *work = (double *)malloc(max(K,max(M, N))* sizeof(double));
220 
221  *Cinitnorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, 'i', M, N, Cref, LDC, work);
222  *Cplasmanorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, 'i', M, N, Cplasma, LDC, work);
223 
224  cblas_dgemm(CblasColMajor, (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB, M, N, K,
225  (alpha), A, LDA, B, LDB, (beta), Cref, LDC);
226 
227  *Clapacknorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, 'i', M, N, Cref, LDC, work);
228 
229  cblas_daxpy(LDC * N, (beta_const), Cplasma, 1, Cref, 1);
230 
231  Rnorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, 'i', M, N, Cref, LDC, work);
232 
233  free(work);
234 
235  return Rnorm;
236 }
237 
238 /*--------------------------------------------------------------
239  * Check the trsm
240  */
242  int M, int NRHS, double alpha,
243  double *A, int LDA,
244  double *Bplasma, double *Bref, int LDB,
245  double *Binitnorm, double *Bplasmanorm, double *Blapacknorm )
246 {
247  double beta_const = -1.0;
248  double Rnorm;
249  double *work = (double *)malloc(max(M, NRHS)* sizeof(double));
250  /*double eps = LAPACKE_dlamch_work('e');*/
251 
252  *Binitnorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, 'i', M, NRHS, Bref, LDB, work);
253  *Bplasmanorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, 'm', M, NRHS, Bplasma, LDB, work);
254 
256  (CBLAS_TRANSPOSE)trans, (CBLAS_DIAG)diag, M, NRHS,
257  (alpha), A, LDA, Bref, LDB);
258 
259  *Blapacknorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, 'm', M, NRHS, Bref, LDB, work);
260 
261  cblas_daxpy(LDB * NRHS, (beta_const), Bplasma, 1, Bref, 1);
262 
263  Rnorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, 'm', M, NRHS, Bref, LDB, work);
264  Rnorm = Rnorm / *Blapacknorm;
265  /* max(M,NRHS) * eps);*/
266 
267  free(work);
268 
269  return Rnorm;
270 }
271 
272 /*--------------------------------------------------------------
273  * Check the solution
274  */
275 
276 double d_check_solution(int M, int N, int NRHS, double *A, int LDA,
277  double *B, double *X, int LDB,
278  double *anorm, double *bnorm, double *xnorm )
279 {
280 /* int info_solution; */
281  double Rnorm = -1.00;
282  double zone = 1.0;
283  double mzone = -1.0;
284  double *work = (double *)malloc(max(M, N)* sizeof(double));
285 
286  *anorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, 'i', M, N, A, LDA, work);
287  *xnorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, 'i', M, NRHS, X, LDB, work);
288  *bnorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, 'i', N, NRHS, B, LDB, work);
289 
290  cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, NRHS, N, (zone), A, LDA, X, LDB, (mzone), B, LDB);
291 
292  Rnorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, 'i', N, NRHS, B, M, work);
293 
294  free(work);
295 
296  return Rnorm;
297 }