PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
example_cunmlq.c
Go to the documentation of this file.
1 
17 #include <stdlib.h>
18 #include <stdio.h>
19 #include <string.h>
20 #include <math.h>
21 
22 #include <plasma.h>
23 #include <cblas.h>
24 #include <lapacke.h>
25 #include <core_blas.h>
26 
27 #ifndef max
28 #define max(a, b) ((a) > (b) ? (a) : (b))
29 #endif
30 #ifndef min
31 #define min(a, b) ((a) < (b) ? (a) : (b))
32 #endif
33 
35 
36 int IONE=1;
37 int ISEED[4] = {0,0,0,1}; /* initial seed for clarnv() */
38 
39 int main ()
40 {
41 
42  int cores = 2;
43  int M = 10;
44  int N = 15;
45  int LDA = 10;
46  int NRHS = 5;
47  int LDB = 15;
48 
49  int info;
50  int info_solution;
51  int i,j;
52  int LDAxN = LDA*N;
53  int LDBxNRHS = LDB*NRHS;
54 
55  PLASMA_Complex32_t *A1 = (PLASMA_Complex32_t *)malloc(LDA*N*sizeof(PLASMA_Complex32_t));
56  PLASMA_Complex32_t *A2 = (PLASMA_Complex32_t *)malloc(LDA*N*sizeof(PLASMA_Complex32_t));
57  PLASMA_Complex32_t *B1 = (PLASMA_Complex32_t *)malloc(LDB*NRHS*sizeof(PLASMA_Complex32_t));
58  PLASMA_Complex32_t *B2 = (PLASMA_Complex32_t *)malloc(LDB*NRHS*sizeof(PLASMA_Complex32_t));
60 
61  /* Check if unable to allocate memory */
62  if ((!A1)||(!A2)||(!B1)||(!B2)){
63  printf("Out of Memory \n ");
64  return EXIT_SUCCESS;
65  }
66 
67  /* Plasma Initialization */
68  PLASMA_Init(cores);
69  printf("-- PLASMA is initialized to run on %d cores. \n",cores);
70 
71  /* Allocate T */
73 
74  /* Initialize A1 and A2 */
75  LAPACKE_clarnv_work(IONE, ISEED, LDAxN, A1);
76  for (i = 0; i < M; i++)
77  for (j = 0; j < N; j++)
78  A2[LDA*j+i] = A1[LDA*j+i] ;
79 
80  /* Initialize B1 and B2 */
81  LAPACKE_clarnv_work(IONE, ISEED, LDBxNRHS, B1);
82  for (i = 0; i < M; i++)
83  for (j = 0; j < NRHS; j++)
84  B2[LDB*j+i] = B1[LDB*j+i] ;
85 
86  /* Factorization QR of the matrix A2 */
87  info = PLASMA_cgelqf(M, N, A2, LDA, T);
88 
89  /* Solve the problem */
91  M, NRHS, (PLASMA_Complex32_t)1.0, A2, LDA, B2, LDB);
92  info = PLASMA_cunmlq(PlasmaLeft, PlasmaConjTrans, N, NRHS, M, A2, LDA, T, B2, LDB);
93 
94  /* Check the solution */
95  info_solution = check_solution(M, N, NRHS, A1, LDA, B1, B2, LDB);
96 
97  if ((info_solution != 0)|(info != 0))
98  printf("-- Error in CUNMLQ example ! \n");
99  else
100  printf("-- Run of CUNMLQ example successful ! \n");
101 
102  free(A1); free(A2); free(B1); free(B2); free(T);
103 
104  PLASMA_Finalize();
105 
106  return EXIT_SUCCESS;
107 }
108 
109 /*--------------------------------------------------------------
110  * Check the solution
111  */
112 
113 int check_solution(int M, int N, int NRHS, PLASMA_Complex32_t *A1, int LDA, PLASMA_Complex32_t *B1, PLASMA_Complex32_t *B2, int LDB)
114 {
115  int info_solution;
116  float Rnorm, Anorm, Xnorm, Bnorm;
117  PLASMA_Complex32_t alpha, beta;
118  float *work = (float *)malloc(max(M, N)* sizeof(float));
119  float eps;
120 
121  eps = LAPACKE_slamch_work('e');
122 
123  alpha = 1.0;
124  beta = -1.0;
125 
126  Anorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, A1, LDA, work);
127  Xnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, NRHS, B2, LDB, work);
128  Bnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, NRHS, B1, LDB, work);
129 
130  cblas_cgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, NRHS, N, CBLAS_SADDR(alpha), A1, LDA, B2, LDB, CBLAS_SADDR(beta), B1, LDB);
131 
132  if (M >= N) {
133  PLASMA_Complex32_t *Residual = (PLASMA_Complex32_t *)malloc(M*NRHS*sizeof(PLASMA_Complex32_t));
134  memset((void*)Residual, 0, M*NRHS*sizeof(PLASMA_Complex32_t));
135  cblas_cgemm(CblasColMajor, CblasConjTrans, CblasNoTrans, N, NRHS, M, CBLAS_SADDR(alpha), A1, LDA, B1, LDB, CBLAS_SADDR(beta), Residual, M);
136  Rnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, NRHS, Residual, M, work);
137  free(Residual);
138  }
139  else {
140  PLASMA_Complex32_t *Residual = (PLASMA_Complex32_t *)malloc(N*NRHS*sizeof(PLASMA_Complex32_t));
141  memset((void*)Residual, 0, N*NRHS*sizeof(PLASMA_Complex32_t));
142  cblas_cgemm(CblasColMajor, CblasConjTrans, CblasNoTrans, N, NRHS, M, CBLAS_SADDR(alpha), A1, LDA, B1, LDB, CBLAS_SADDR(beta), Residual, N);
143  Rnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, NRHS, Residual, N, work);
144  free(Residual);
145  }
146 
147  printf("============\n");
148  printf("Checking the Residual of the solution \n");
149  printf("-- ||Ax-B||_oo/((||A||_oo||x||_oo+||B||)_oo.N.eps) = %e \n",Rnorm/((Anorm*Xnorm+Bnorm)*N*eps));
150 
151  if (isnan(Rnorm / ((Anorm * Xnorm + Bnorm) * N * eps)) || (Rnorm / ((Anorm * Xnorm + Bnorm) * N * eps) > 10.0) ) {
152  printf("-- The solution is suspicious ! \n");
153  info_solution = 1;
154  }
155  else {
156  printf("-- The solution is CORRECT ! \n");
157  info_solution= 0 ;
158  }
159 
160  free(work);
161 
162  return info_solution;
163 }