PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
core_dssssm.c
Go to the documentation of this file.
1 
17 #include <cblas.h>
18 #include "common.h"
19 
20 /***************************************************************************/
86 #if defined(PLASMA_HAVE_WEAK)
87 #pragma weak CORE_dssssm = PCORE_dssssm
88 #define CORE_dssssm PCORE_dssssm
89 #endif
90 int CORE_dssssm(int M1, int N1, int M2, int N2, int K, int IB,
91  double *A1, int LDA1,
92  double *A2, int LDA2,
93  double *L1, int LDL1,
94  double *L2, int LDL2,
95  int *IPIV)
96 {
97  static double zone = 1.0;
98  static double mzone =-1.0;
99 
100  int i, ii, sb;
101  int im, ip;
102 
103  /* Check input arguments */
104  if (M1 < 0) {
105  coreblas_error(1, "Illegal value of M1");
106  return -1;
107  }
108  if (N1 < 0) {
109  coreblas_error(2, "Illegal value of N1");
110  return -2;
111  }
112  if (M2 < 0) {
113  coreblas_error(3, "Illegal value of M2");
114  return -3;
115  }
116  if (N2 < 0) {
117  coreblas_error(4, "Illegal value of N2");
118  return -4;
119  }
120  if (K < 0) {
121  coreblas_error(5, "Illegal value of K");
122  return -5;
123  }
124  if (IB < 0) {
125  coreblas_error(6, "Illegal value of IB");
126  return -6;
127  }
128  if (LDA1 < max(1,M1)) {
129  coreblas_error(8, "Illegal value of LDA1");
130  return -8;
131  }
132  if (LDA2 < max(1,M2)) {
133  coreblas_error(10, "Illegal value of LDA2");
134  return -10;
135  }
136  if (LDL1 < max(1,IB)) {
137  coreblas_error(12, "Illegal value of LDL1");
138  return -12;
139  }
140  if (LDL2 < max(1,M2)) {
141  coreblas_error(14, "Illegal value of LDL2");
142  return -14;
143  }
144 
145  /* Quick return */
146  if ((M1 == 0) || (N1 == 0) || (M2 == 0) || (N2 == 0) || (K == 0) || (IB == 0))
147  return PLASMA_SUCCESS;
148 
149  ip = 0;
150 
151  for(ii = 0; ii < K; ii += IB) {
152  sb = min(K-ii, IB);
153 
154  for(i = 0; i < sb; i++) {
155  im = IPIV[ip]-1;
156 
157  if (im != (ii+i)) {
158  im = im - M1;
159  cblas_dswap(N1, &A1[ii+i], LDA1, &A2[im], LDA2);
160  }
161  ip = ip + 1;
162  }
163 
164  cblas_dtrsm(
167  sb, N1, (zone),
168  &L1[LDL1*ii], LDL1,
169  &A1[ii], LDA1);
170 
171  cblas_dgemm(
173  M2, N2, sb,
174  (mzone), &L2[LDL2*ii], LDL2,
175  &A1[ii], LDA1,
176  (zone), A2, LDA2);
177  }
178  return PLASMA_SUCCESS;
179 }
180 
181 /***************************************************************************/
184 void QUARK_CORE_dssssm(Quark *quark, Quark_Task_Flags *task_flags,
185  int m1, int n1, int m2, int n2, int k, int ib, int nb,
186  double *A1, int lda1,
187  double *A2, int lda2,
188  double *L1, int ldl1,
189  double *L2, int ldl2,
190  int *IPIV)
191 {
193  QUARK_Insert_Task(quark, CORE_dssssm_quark, task_flags,
194  sizeof(int), &m1, VALUE,
195  sizeof(int), &n1, VALUE,
196  sizeof(int), &m2, VALUE,
197  sizeof(int), &n2, VALUE,
198  sizeof(int), &k, VALUE,
199  sizeof(int), &ib, VALUE,
200  sizeof(double)*nb*nb, A1, INOUT,
201  sizeof(int), &lda1, VALUE,
202  sizeof(double)*nb*nb, A2, INOUT | LOCALITY,
203  sizeof(int), &lda2, VALUE,
204  sizeof(double)*ib*nb, L1, INPUT,
205  sizeof(int), &ldl1, VALUE,
206  sizeof(double)*ib*nb, L2, INPUT,
207  sizeof(int), &ldl2, VALUE,
208  sizeof(int)*nb, IPIV, INPUT,
209  0);
210 }
211 
212 /***************************************************************************/
215 #if defined(PLASMA_HAVE_WEAK)
216 #pragma weak CORE_dssssm_quark = PCORE_dssssm_quark
217 #define CORE_dssssm_quark PCORE_dssssm_quark
218 #endif
220 {
221  int m1;
222  int n1;
223  int m2;
224  int n2;
225  int k;
226  int ib;
227  double *A1;
228  int lda1;
229  double *A2;
230  int lda2;
231  double *L1;
232  int ldl1;
233  double *L2;
234  int ldl2;
235  int *IPIV;
236 
237  quark_unpack_args_15(quark, m1, n1, m2, n2, k, ib, A1, lda1, A2, lda2, L1, ldl1, L2, ldl2, IPIV);
238  CORE_dssssm(m1, n1, m2, n2, k, ib, A1, lda1, A2, lda2, L1, ldl1, L2, ldl2, IPIV);
239 }