PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
sauxiliary.c
Go to the documentation of this file.
1 
6 #include <stdlib.h>
7 #include <stdio.h>
8 #include <string.h>
9 #include <math.h>
10 #include <cblas.h>
11 #include <lapacke.h>
12 #include <plasma.h>
13 #include <core_blas.h>
14 #include "auxiliary.h"
15 
16 /*-------------------------------------------------------------------
17  * Check the orthogonality of Q
18  */
19 
20 int s_check_orthogonality(int M, int N, int LDQ, float *Q)
21 {
22  float alpha, beta;
23  float normQ;
24  int info_ortho;
25  int i;
26  int minMN = min(M, N);
27  float eps;
28  float *work = (float *)malloc(minMN*sizeof(float));
29 
30  eps = LAPACKE_slamch_work('e');
31  alpha = 1.0;
32  beta = -1.0;
33 
34  /* Build the idendity matrix USE DLASET?*/
35  float *Id = (float *) malloc(minMN*minMN*sizeof(float));
36  memset((void*)Id, 0, minMN*minMN*sizeof(float));
37  for (i = 0; i < minMN; i++)
38  Id[i*minMN+i] = (float)1.0;
39 
40  /* Perform Id - Q'Q */
41  if (M >= N)
42  cblas_ssyrk(CblasColMajor, CblasUpper, CblasTrans, N, M, alpha, Q, LDQ, beta, Id, N);
43  else
44  cblas_ssyrk(CblasColMajor, CblasUpper, CblasNoTrans, M, N, alpha, Q, LDQ, beta, Id, M);
45 
46  normQ = LAPACKE_slansy_work(LAPACK_COL_MAJOR, 'i', 'u', minMN, Id, minMN, work);
47 
48  printf("============\n");
49  printf("Checking the orthogonality of Q \n");
50  printf("||Id-Q'*Q||_oo / (N*eps) = %e \n",normQ/(minMN*eps));
51 
52  if ( isnan(normQ / (minMN * eps)) || (normQ / (minMN * eps) > 10.0) ) {
53  printf("-- Orthogonality is suspicious ! \n");
54  info_ortho=1;
55  }
56  else {
57  printf("-- Orthogonality is CORRECT ! \n");
58  info_ortho=0;
59  }
60 
61  free(work); free(Id);
62 
63  return info_ortho;
64 }
65 
66 /*------------------------------------------------------------
67  * Check the factorization QR
68  */
69 
70 int s_check_QRfactorization(int M, int N, float *A1, float *A2, int LDA, float *Q)
71 {
72  float Anorm, Rnorm;
73  float alpha, beta;
74  int info_factorization;
75  int i,j;
76  float eps;
77 
78  eps = LAPACKE_slamch_work('e');
79 
80  float *Ql = (float *)malloc(M*N*sizeof(float));
81  float *Residual = (float *)malloc(M*N*sizeof(float));
82  float *work = (float *)malloc(max(M,N)*sizeof(float));
83 
84  alpha=1.0;
85  beta=0.0;
86 
87  if (M >= N) {
88  /* Extract the R */
89  float *R = (float *)malloc(N*N*sizeof(float));
90  memset((void*)R, 0, N*N*sizeof(float));
91  LAPACKE_slacpy_work(LAPACK_COL_MAJOR,'u', M, N, A2, LDA, R, N);
92 
93  /* Perform Ql=Q*R */
94  memset((void*)Ql, 0, M*N*sizeof(float));
95  cblas_sgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, N, (alpha), Q, LDA, R, N, (beta), Ql, M);
96  free(R);
97  }
98  else {
99  /* Extract the L */
100  float *L = (float *)malloc(M*M*sizeof(float));
101  memset((void*)L, 0, M*M*sizeof(float));
102  LAPACKE_slacpy_work(LAPACK_COL_MAJOR,'l', M, N, A2, LDA, L, M);
103 
104  /* Perform Ql=LQ */
105  memset((void*)Ql, 0, M*N*sizeof(float));
106  cblas_sgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, M, (alpha), L, M, Q, LDA, (beta), Ql, M);
107  free(L);
108  }
109 
110  /* Compute the Residual */
111  for (i = 0; i < M; i++)
112  for (j = 0 ; j < N; j++)
113  Residual[j*M+i] = A1[j*LDA+i]-Ql[j*M+i];
114 
115  Rnorm = LAPACKE_slange_work(LAPACK_COL_MAJOR, 'i', M, N, Residual, M, work);
116  Anorm = LAPACKE_slange_work(LAPACK_COL_MAJOR, 'i', M, N, A2, LDA, work);
117 
118  if (M >= N) {
119  printf("============\n");
120  printf("Checking the QR Factorization \n");
121  printf("-- ||A-QR||_oo/(||A||_oo.N.eps) = %e \n",Rnorm/(Anorm*N*eps));
122  }
123  else {
124  printf("============\n");
125  printf("Checking the LQ Factorization \n");
126  printf("-- ||A-LQ||_oo/(||A||_oo.N.eps) = %e \n",Rnorm/(Anorm*N*eps));
127  }
128 
129  if (isnan(Rnorm / (Anorm * N *eps)) || (Rnorm / (Anorm * N * eps) > 10.0) ) {
130  printf("-- Factorization is suspicious ! \n");
131  info_factorization = 1;
132  }
133  else {
134  printf("-- Factorization is CORRECT ! \n");
135  info_factorization = 0;
136  }
137 
138  free(work); free(Ql); free(Residual);
139 
140  return info_factorization;
141 }
142 
143 /*------------------------------------------------------------------------
144  * Check the factorization of the matrix A2
145  */
146 
147 int s_check_LLTfactorization(int N, float *A1, float *A2, int LDA, int uplo)
148 {
149  float Anorm, Rnorm;
150  float alpha;
151  int info_factorization;
152  int i,j;
153  float eps;
154 
155  eps = LAPACKE_slamch_work('e');
156 
157  float *Residual = (float *)malloc(N*N*sizeof(float));
158  float *L1 = (float *)malloc(N*N*sizeof(float));
159  float *L2 = (float *)malloc(N*N*sizeof(float));
160  float *work = (float *)malloc(N*sizeof(float));
161 
162  memset((void*)L1, 0, N*N*sizeof(float));
163  memset((void*)L2, 0, N*N*sizeof(float));
164 
165  alpha= 1.0;
166 
167  LAPACKE_slacpy_work(LAPACK_COL_MAJOR,' ', N, N, A1, LDA, Residual, N);
168 
169  /* Dealing with L'L or U'U */
170  if (uplo == PlasmaUpper){
171  LAPACKE_slacpy_work(LAPACK_COL_MAJOR,'u', N, N, A2, LDA, L1, N);
172  LAPACKE_slacpy_work(LAPACK_COL_MAJOR,'u', N, N, A2, LDA, L2, N);
173  cblas_strmm(CblasColMajor, CblasLeft, CblasUpper, CblasTrans, CblasNonUnit, N, N, (alpha), L1, N, L2, N);
174  }
175  else{
176  LAPACKE_slacpy_work(LAPACK_COL_MAJOR,'l', N, N, A2, LDA, L1, N);
177  LAPACKE_slacpy_work(LAPACK_COL_MAJOR,'l', N, N, A2, LDA, L2, N);
178  cblas_strmm(CblasColMajor, CblasRight, CblasLower, CblasTrans, CblasNonUnit, N, N, (alpha), L1, N, L2, N);
179  }
180 
181  /* Compute the Residual || A -L'L|| */
182  for (i = 0; i < N; i++)
183  for (j = 0; j < N; j++)
184  Residual[j*N+i] = L2[j*N+i] - Residual[j*N+i];
185 
186  Rnorm = LAPACKE_slange_work(LAPACK_COL_MAJOR, 'i', N, N, Residual, N, work);
187  Anorm = LAPACKE_slange_work(LAPACK_COL_MAJOR, 'i', N, N, A1, LDA, work);
188 
189  printf("============\n");
190  printf("Checking the Cholesky Factorization \n");
191  printf("-- ||L'L-A||_oo/(||A||_oo.N.eps) = %e \n",Rnorm/(Anorm*N*eps));
192 
193  if ( isnan(Rnorm/(Anorm*N*eps)) || (Rnorm/(Anorm*N*eps) > 10.0) ){
194  printf("-- Factorization is suspicious ! \n");
195  info_factorization = 1;
196  }
197  else{
198  printf("-- Factorization is CORRECT ! \n");
199  info_factorization = 0;
200  }
201 
202  free(Residual); free(L1); free(L2); free(work);
203 
204  return info_factorization;
205 }
206 
207 /*--------------------------------------------------------------
208  * Check the gemm
209  */
210 float s_check_gemm(PLASMA_enum transA, PLASMA_enum transB, int M, int N, int K,
211  float alpha, float *A, int LDA,
212  float *B, int LDB,
213  float beta, float *Cplasma,
214  float *Cref, int LDC,
215  float *Cinitnorm, float *Cplasmanorm, float *Clapacknorm )
216 {
217  float beta_const = -1.0;
218  float Rnorm;
219  float *work = (float *)malloc(max(K,max(M, N))* sizeof(float));
220 
221  *Cinitnorm = LAPACKE_slange_work(LAPACK_COL_MAJOR, 'i', M, N, Cref, LDC, work);
222  *Cplasmanorm = LAPACKE_slange_work(LAPACK_COL_MAJOR, 'i', M, N, Cplasma, LDC, work);
223 
224  cblas_sgemm(CblasColMajor, (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB, M, N, K,
225  (alpha), A, LDA, B, LDB, (beta), Cref, LDC);
226 
227  *Clapacknorm = LAPACKE_slange_work(LAPACK_COL_MAJOR, 'i', M, N, Cref, LDC, work);
228 
229  cblas_saxpy(LDC * N, (beta_const), Cplasma, 1, Cref, 1);
230 
231  Rnorm = LAPACKE_slange_work(LAPACK_COL_MAJOR, 'i', M, N, Cref, LDC, work);
232 
233  free(work);
234 
235  return Rnorm;
236 }
237 
238 /*--------------------------------------------------------------
239  * Check the trsm
240  */
242  int M, int NRHS, float alpha,
243  float *A, int LDA,
244  float *Bplasma, float *Bref, int LDB,
245  float *Binitnorm, float *Bplasmanorm, float *Blapacknorm )
246 {
247  float beta_const = -1.0;
248  float Rnorm;
249  float *work = (float *)malloc(max(M, NRHS)* sizeof(float));
250  /*float eps = LAPACKE_slamch_work('e');*/
251 
252  *Binitnorm = LAPACKE_slange_work(LAPACK_COL_MAJOR, 'i', M, NRHS, Bref, LDB, work);
253  *Bplasmanorm = LAPACKE_slange_work(LAPACK_COL_MAJOR, 'm', M, NRHS, Bplasma, LDB, work);
254 
256  (CBLAS_TRANSPOSE)trans, (CBLAS_DIAG)diag, M, NRHS,
257  (alpha), A, LDA, Bref, LDB);
258 
259  *Blapacknorm = LAPACKE_slange_work(LAPACK_COL_MAJOR, 'm', M, NRHS, Bref, LDB, work);
260 
261  cblas_saxpy(LDB * NRHS, (beta_const), Bplasma, 1, Bref, 1);
262 
263  Rnorm = LAPACKE_slange_work(LAPACK_COL_MAJOR, 'm', M, NRHS, Bref, LDB, work);
264  Rnorm = Rnorm / *Blapacknorm;
265  /* max(M,NRHS) * eps);*/
266 
267  free(work);
268 
269  return Rnorm;
270 }
271 
272 /*--------------------------------------------------------------
273  * Check the solution
274  */
275 
276 float s_check_solution(int M, int N, int NRHS, float *A, int LDA,
277  float *B, float *X, int LDB,
278  float *anorm, float *bnorm, float *xnorm )
279 {
280 /* int info_solution; */
281  float Rnorm = -1.00;
282  float zone = 1.0;
283  float mzone = -1.0;
284  float *work = (float *)malloc(max(M, N)* sizeof(float));
285 
286  *anorm = LAPACKE_slange_work(LAPACK_COL_MAJOR, 'i', M, N, A, LDA, work);
287  *xnorm = LAPACKE_slange_work(LAPACK_COL_MAJOR, 'i', M, NRHS, X, LDB, work);
288  *bnorm = LAPACKE_slange_work(LAPACK_COL_MAJOR, 'i', N, NRHS, B, LDB, work);
289 
290  cblas_sgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, NRHS, N, (zone), A, LDA, X, LDB, (mzone), B, LDB);
291 
292  Rnorm = LAPACKE_slange_work(LAPACK_COL_MAJOR, 'i', N, NRHS, B, M, work);
293 
294  free(work);
295 
296  return Rnorm;
297 }