PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
core_scasum.c File Reference
#include <cblas.h>
#include <math.h>
#include "common.h"
Include dependency graph for core_scasum.c:

Go to the source code of this file.

Functions

void CORE_scasum (int storev, int uplo, int M, int N, PLASMA_Complex32_t *A, int lda, float *work)
void QUARK_CORE_scasum (Quark *quark, Quark_Task_Flags *task_flags, PLASMA_enum storev, PLASMA_enum uplo, int M, int N, PLASMA_Complex32_t *A, int lda, int szeA, float *work, int szeW)
void CORE_scasum_quark (Quark *quark)
void QUARK_CORE_scasum_f1 (Quark *quark, Quark_Task_Flags *task_flags, PLASMA_enum storev, PLASMA_enum uplo, int M, int N, PLASMA_Complex32_t *A, int lda, int szeA, float *work, int szeW, float *fake, int szeF)
void CORE_scasum_f1_quark (Quark *quark)

Function Documentation

void CORE_scasum ( int  storev,
int  uplo,
int  M,
int  N,
PLASMA_Complex32_t A,
int  lda,
float *  work 
)

Definition at line 28 of file core_scasum.c.

References cabsf(), PlasmaColumnwise, PlasmaLower, PlasmaUpper, PlasmaUpperLower, and sum().

{
float *tmpW, sum, abs;
int i,j;
switch (uplo) {
for (j = 0; j < N; j++) {
tmpA = A+(j*lda);
sum = 0.0;
for (i = 0; i < j; i++) {
abs = cabsf(*tmpA);
sum += abs;
work[i] += abs;
tmpA++;
}
work[j] += sum + cabsf(*tmpA);
}
break;
for (j = 0; j < N; j++) {
tmpA = A+(j*lda)+j;
sum = 0.0;
work[j] += cabsf(*tmpA);
tmpA++;
for (i = j+1; i < M; i++) {
abs = cabsf(*tmpA);
sum += abs;
work[i] += abs;
tmpA++;
}
work[j] += sum;
}
break;
default:
for (j = 0; j < N; j++) {
/* work[j] += cblas_scasum(M, &(A[j*lda]), 1); */
tmpA = A+(j*lda);
for (i = 0; i < M; i++) {
work[j] += cabsf(*tmpA);
tmpA++;
}
}
}
else {
for (j = 0; j < N; j++) {
tmpA = A+(j*lda);
tmpW = work;
for (i = 0; i < M; i++) {
/* work[i] += cabsf( A[j*lda+i] );*/
*tmpW += cabsf( *tmpA );
tmpA++; tmpW++;
}
}
}
}
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_scasum_f1_quark ( Quark quark)

Definition at line 162 of file core_scasum.c.

References A, CORE_scasum(), quark_unpack_args_8, storev, and uplo.

{
int storev;
int uplo;
int M;
int N;
int lda;
float *work;
float *fake;
quark_unpack_args_8(quark, storev, uplo, M, N, A, lda, work, fake);
CORE_scasum(storev, uplo, M, N, A, lda, work);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_scasum_quark ( Quark quark)

Declarations of QUARK wrappers (called by QUARK) - alphabetical order

Definition at line 119 of file core_scasum.c.

References A, CORE_scasum(), quark_unpack_args_7, storev, and uplo.

{
int storev;
int uplo;
int M;
int N;
int lda;
float *work;
quark_unpack_args_7(quark, storev, uplo, M, N, A, lda, work);
CORE_scasum(storev, uplo, M, N, A, lda, work);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_scasum ( Quark quark,
Quark_Task_Flags task_flags,
PLASMA_enum  storev,
PLASMA_enum  uplo,
int  m,
int  n,
PLASMA_Complex32_t A,
int  lda,
int  szeA,
float *  work,
int  szeW 
)

Declarations of QUARK wrappers (called by PLASMA) - alphabetical order

Definition at line 95 of file core_scasum.c.

References CORE_scasum_quark(), INOUT, INPUT, QUARK_Insert_Task(), and VALUE.

{
quark, CORE_scasum_quark, task_flags,
sizeof(PLASMA_enum), &storev, VALUE,
sizeof(PLASMA_enum), &uplo, VALUE,
sizeof(int), &M, VALUE,
sizeof(int), &N, VALUE,
sizeof(PLASMA_Complex32_t)*szeA, A, INPUT,
sizeof(int), &lda, VALUE,
sizeof(float)*szeW, work, INOUT,
0);
}

Here is the call graph for this function:

void QUARK_CORE_scasum_f1 ( Quark quark,
Quark_Task_Flags task_flags,
PLASMA_enum  storev,
PLASMA_enum  uplo,
int  M,
int  N,
PLASMA_Complex32_t A,
int  lda,
int  szeA,
float *  work,
int  szeW,
float *  fake,
int  szeF 
)

Definition at line 136 of file core_scasum.c.

References CORE_scasum_f1_quark(), DAG_CORE_ASUM, GATHERV, INOUT, INPUT, OUTPUT, QUARK_Insert_Task(), and VALUE.

{
quark, CORE_scasum_f1_quark, task_flags,
sizeof(PLASMA_enum), &storev, VALUE,
sizeof(PLASMA_enum), &uplo, VALUE,
sizeof(int), &M, VALUE,
sizeof(int), &N, VALUE,
sizeof(PLASMA_Complex32_t)*szeA, A, INPUT,
sizeof(int), &lda, VALUE,
sizeof(float)*szeW, work, INOUT,
sizeof(float)*szeF, fake, OUTPUT | GATHERV,
0);
}

Here is the call graph for this function:

Here is the caller graph for this function: