PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
core_dzasum.c
Go to the documentation of this file.
1 
15 #include <cblas.h>
16 #include <math.h>
17 #include "common.h"
18 
19 /***************************************************************************/
24 #if defined(PLASMA_HAVE_WEAK)
25 #pragma weak CORE_dzasum = PCORE_dzasum
26 #define CORE_dzasum PCORE_dzasum
27 #endif
28 void CORE_dzasum(int storev, int uplo, int M, int N,
29  PLASMA_Complex64_t *A, int lda, double *work)
30 {
31  PLASMA_Complex64_t *tmpA;
32  double *tmpW, sum, abs;
33  int i,j;
34 
35  switch (uplo) {
36  case PlasmaUpper:
37  for (j = 0; j < N; j++) {
38  tmpA = A+(j*lda);
39  sum = 0.0;
40  for (i = 0; i < j; i++) {
41  abs = cabs(*tmpA);
42  sum += abs;
43  work[i] += abs;
44  tmpA++;
45  }
46  work[j] += sum + cabs(*tmpA);
47  }
48  break;
49  case PlasmaLower:
50  for (j = 0; j < N; j++) {
51  tmpA = A+(j*lda)+j;
52 
53  sum = 0.0;
54  work[j] += cabs(*tmpA);
55 
56  tmpA++;
57  for (i = j+1; i < M; i++) {
58  abs = cabs(*tmpA);
59  sum += abs;
60  work[i] += abs;
61  tmpA++;
62  }
63  work[j] += sum;
64  }
65  break;
66  case PlasmaUpperLower:
67  default:
68  if (storev == PlasmaColumnwise) {
69  for (j = 0; j < N; j++) {
70  /* work[j] += cblas_dzasum(M, &(A[j*lda]), 1); */
71  tmpA = A+(j*lda);
72  for (i = 0; i < M; i++) {
73  work[j] += cabs(*tmpA);
74  tmpA++;
75  }
76  }
77  }
78  else {
79  for (j = 0; j < N; j++) {
80  tmpA = A+(j*lda);
81  tmpW = work;
82  for (i = 0; i < M; i++) {
83  /* work[i] += cabs( A[j*lda+i] );*/
84  *tmpW += cabs( *tmpA );
85  tmpA++; tmpW++;
86  }
87  }
88  }
89  }
90 }
91 
92 /***************************************************************************/
95 void QUARK_CORE_dzasum(Quark *quark, Quark_Task_Flags *task_flags,
96  PLASMA_enum storev, PLASMA_enum uplo, int M, int N,
97  PLASMA_Complex64_t *A, int lda, int szeA,
98  double *work, int szeW)
99 {
101  quark, CORE_dzasum_quark, task_flags,
102  sizeof(PLASMA_enum), &storev, VALUE,
103  sizeof(PLASMA_enum), &uplo, VALUE,
104  sizeof(int), &M, VALUE,
105  sizeof(int), &N, VALUE,
106  sizeof(PLASMA_Complex64_t)*szeA, A, INPUT,
107  sizeof(int), &lda, VALUE,
108  sizeof(double)*szeW, work, INOUT,
109  0);
110 }
111 
112 /***************************************************************************/
115 #if defined(PLASMA_HAVE_WEAK)
116 #pragma weak CORE_dzasum_quark = PCORE_dzasum_quark
117 #define CORE_dzasum_quark PCORE_dzasum_quark
118 #endif
120 {
121  int storev;
122  int uplo;
123  int M;
124  int N;
126  int lda;
127  double *work;
128 
129  quark_unpack_args_7(quark, storev, uplo, M, N, A, lda, work);
130  CORE_dzasum(storev, uplo, M, N, A, lda, work);
131 }
132 
133 /***************************************************************************/
136 void QUARK_CORE_dzasum_f1(Quark *quark, Quark_Task_Flags *task_flags,
137  PLASMA_enum storev, PLASMA_enum uplo, int M, int N,
138  PLASMA_Complex64_t *A, int lda, int szeA,
139  double *work, int szeW, double *fake, int szeF)
140 {
143  quark, CORE_dzasum_f1_quark, task_flags,
144  sizeof(PLASMA_enum), &storev, VALUE,
145  sizeof(PLASMA_enum), &uplo, VALUE,
146  sizeof(int), &M, VALUE,
147  sizeof(int), &N, VALUE,
148  sizeof(PLASMA_Complex64_t)*szeA, A, INPUT,
149  sizeof(int), &lda, VALUE,
150  sizeof(double)*szeW, work, INOUT,
151  sizeof(double)*szeF, fake, OUTPUT | GATHERV,
152  0);
153 }
154 
155 /***************************************************************************/
158 #if defined(PLASMA_HAVE_WEAK)
159 #pragma weak CORE_dzasum_f1_quark = PCORE_dzasum_f1_quark
160 #define CORE_dzasum_f1_quark PCORE_dzasum_f1_quark
161 #endif
163 {
164  int storev;
165  int uplo;
166  int M;
167  int N;
169  int lda;
170  double *work;
171  double *fake;
172 
173  quark_unpack_args_8(quark, storev, uplo, M, N, A, lda, work, fake);
174  CORE_dzasum(storev, uplo, M, N, A, lda, work);
175 }