PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
pcsymm.c File Reference
#include "common.h"
Include dependency graph for pcsymm.c:

Go to the source code of this file.

Macros

#define A(m, n)   BLKADDR(A, PLASMA_Complex32_t, m, n)
#define B(m, n)   BLKADDR(B, PLASMA_Complex32_t, m, n)
#define C(m, n)   BLKADDR(C, PLASMA_Complex32_t, m, n)

Functions

void plasma_pcsymm (plasma_context_t *plasma)
void plasma_pcsymm_quark (PLASMA_enum side, PLASMA_enum uplo, PLASMA_Complex32_t alpha, PLASMA_desc A, PLASMA_desc B, PLASMA_Complex32_t beta, PLASMA_desc C, PLASMA_sequence *sequence, PLASMA_request *request)

Detailed Description

PLASMA auxiliary routines PLASMA is a software package provided by Univ. of Tennessee, Univ. of California Berkeley and Univ. of Colorado Denver

Version:
2.4.5
Author:
Emmanuel Agullo
Mathieu Faverge
Date:
2010-11-15 c Tue Nov 22 14:35:39 2011

Definition in file pcsymm.c.


Macro Definition Documentation

#define A (   m,
 
)    BLKADDR(A, PLASMA_Complex32_t, m, n)

Definition at line 18 of file pcsymm.c.

#define B (   m,
 
)    BLKADDR(B, PLASMA_Complex32_t, m, n)

Definition at line 19 of file pcsymm.c.

#define C (   m,
 
)    BLKADDR(C, PLASMA_Complex32_t, m, n)

Definition at line 20 of file pcsymm.c.


Function Documentation

void plasma_pcsymm ( plasma_context_t plasma)

Parallel tile symmetric matrix-matrix multiplication - static scheduling

Definition at line 24 of file pcsymm.c.

References A, B, BLKLDD, C, CORE_cgemm(), CORE_csymm(), plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, PLASMA_RANK, PLASMA_SIZE, PLASMA_SUCCESS, plasma_unpack_args_9, PlasmaLeft, PlasmaLower, PlasmaNoTrans, PlasmaTrans, side, plasma_sequence_t::status, and uplo.

{
PLASMA_sequence *sequence;
PLASMA_request *request;
int k, m, n;
int next_m;
int next_n;
int lda, ldak, ldb, ldc;
int tempmm, tempnn, tempkn, tempkm;
plasma_unpack_args_9(side, uplo, alpha, A, B, beta, C, sequence, request);
if (sequence->status != PLASMA_SUCCESS)
return;
n = 0;
while (m >= C.mt && n < C.nt) {
n++;
m = m-C.mt;
}
while (n < C.nt) {
next_m = m;
next_n = n;
next_m += PLASMA_SIZE;
while (next_m >= C.mt && next_n < C.nt) {
next_n++;
next_m = next_m - C.mt;
}
tempmm = m == C.mt-1 ? C.m-m*C.mb : C.mb;
tempnn = n == C.nt-1 ? C.n-n*C.nb : C.nb;
ldc = BLKLDD(C, m);
/*
* PlasmaLeft / PlasmaLower
*/
if (side == PlasmaLeft) {
lda = BLKLDD(A, m);
if (uplo == PlasmaLower) {
for (k = 0; k < C.mt; k++) {
tempkm = k == C.mt-1 ? C.m-k*C.mb : C.mb;
ldak = BLKLDD(A, k);
ldb = BLKLDD(B, k);
zbeta = k == 0 ? beta : zone;
if (k < m) {
tempmm, tempnn, tempkm,
alpha, A(m, k), lda,
B(k, n), ldb,
zbeta, C(m, n), ldc);
}
else {
if (k == m) {
side, uplo,
tempmm, tempnn,
alpha, A(k, k), ldak,
B(k, n), ldb,
zbeta, C(m, n), ldc);
}
else {
tempmm, tempnn, tempkm,
alpha, A(k, m), ldak,
B(k, n), ldb,
zbeta, C(m, n), ldc);
}
}
}
}
/*
* PlasmaLeft / PlasmaUpper
*/
else {
for (k = 0; k < C.mt; k++) {
tempkm = k == C.mt-1 ? C.m-k*C.mb : C.mb;
ldak = BLKLDD(A, k);
ldb = BLKLDD(B, k);
zbeta = k == 0 ? beta : zone;
if (k < m) {
tempmm, tempnn, tempkm,
alpha, A(k, m), ldak,
B(k, n), ldb,
zbeta, C(m, n), ldc);
}
else {
if (k == m) {
side, uplo,
tempmm, tempnn,
alpha, A(k, k), ldak,
B(k, n), ldb,
zbeta, C(m, n), ldc);
}
else {
tempmm, tempnn, tempkm,
alpha, A(m, k), lda,
B(k, n), ldb,
zbeta, C(m, n), ldc);
}
}
}
}
}
/*
* PlasmaRight / PlasmaLower
*/
else {
lda = BLKLDD(A, n);
ldb = BLKLDD(B, m);
if (uplo == PlasmaLower) {
for (k = 0; k < C.nt; k++) {
tempkn = k == C.nt-1 ? C.n-k*C.nb : C.nb;
ldak = BLKLDD(A, k);
zbeta = k == 0 ? beta : zone;
if (k < n) {
tempmm, tempnn, tempkn,
alpha, B(m, k), ldb,
A(n, k), lda,
zbeta, C(m, n), ldc);
}
else {
if (n == k) {
side, uplo,
tempmm, tempnn,
alpha, A(k, k), ldak,
B(m, k), ldb,
zbeta, C(m, n), ldc);
}
else {
tempmm, tempnn, tempkn,
alpha, B(m, k), ldb,
A(k, n), ldak,
zbeta, C(m, n), ldc);
}
}
}
}
/*
* PlasmaRight / PlasmaUpper
*/
else {
for (k = 0; k < C.nt; k++) {
tempkn = k == C.nt-1 ? C.n-k*C.nb : C.nb;
ldak = BLKLDD(A, k);
zbeta = k == 0 ? beta : zone;
if (k < n) {
tempmm, tempnn, tempkn,
alpha, B(m, k), ldb,
A(k, n), ldak,
zbeta, C(m, n), ldc);
}
else {
if (n == k) {
side, uplo,
tempmm, tempnn,
alpha, A(k, k), ldak,
B(m, k), ldb,
zbeta, C(m, n), ldc);
}
else {
tempmm, tempnn, tempkn,
alpha, B(m, k), ldb,
A(n, k), lda,
zbeta, C(m, n), ldc);
}
}
}
}
}
m = next_m;
n = next_n;
}
}

Here is the call graph for this function:

Here is the caller graph for this function:

void plasma_pcsymm_quark ( PLASMA_enum  side,
PLASMA_enum  uplo,
PLASMA_Complex32_t  alpha,
PLASMA_desc  A,
PLASMA_desc  B,
PLASMA_Complex32_t  beta,
PLASMA_desc  C,
PLASMA_sequence sequence,
PLASMA_request request 
)

Parallel tile symmetric matrix-matrix multiplication - dynamic scheduling

Definition at line 231 of file pcsymm.c.

References A, B, BLKLDD, C, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, plasma_context_self(), PLASMA_SUCCESS, PlasmaLeft, PlasmaLower, PlasmaNoTrans, PlasmaTrans, plasma_context_struct::quark, QUARK_CORE_cgemm(), QUARK_CORE_csymm(), plasma_sequence_t::quark_sequence, QUARK_Task_Flag_Set(), Quark_Task_Flags_Initializer, plasma_sequence_t::status, and TASK_SEQUENCE.

{
int k, m, n;
int lda, ldak, ldb, ldc;
int tempmm, tempnn, tempkn, tempkm;
plasma = plasma_context_self();
if (sequence->status != PLASMA_SUCCESS)
return;
QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
for (m = 0; m < C.mt; m++) {
tempmm = m == C.mt-1 ? C.m-m*C.mb : C.mb;
ldc = BLKLDD(C, m);
for (n = 0; n < C.nt; n++) {
tempnn = n == C.nt-1 ? C.n-n*C.nb : C.nb;
/*
* PlasmaLeft / PlasmaLower
*/
if (side == PlasmaLeft) {
lda = BLKLDD(A, m);
if (uplo == PlasmaLower) {
for (k = 0; k < C.mt; k++) {
tempkm = k == C.mt-1 ? C.m-k*C.mb : C.mb;
ldak = BLKLDD(A, k);
ldb = BLKLDD(B, k);
zbeta = k == 0 ? beta : zone;
if (k < m) {
plasma->quark, &task_flags,
tempmm, tempnn, tempkm, A.mb,
alpha, A(m, k), lda, /* lda * K */
B(k, n), ldb, /* ldb * Y */
zbeta, C(m, n), ldc); /* ldc * Y */
}
else {
if (k == m) {
plasma->quark, &task_flags,
tempmm, tempnn, A.mb,
alpha, A(k, k), ldak, /* ldak * X */
B(k, n), ldb, /* ldb * Y */
zbeta, C(m, n), ldc); /* ldc * Y */
}
else {
plasma->quark, &task_flags,
tempmm, tempnn, tempkm, A.mb,
alpha, A(k, m), ldak, /* ldak * X */
B(k, n), ldb, /* ldb * Y */
zbeta, C(m, n), ldc); /* ldc * Y */
}
}
}
}
/*
* PlasmaLeft / PlasmaUpper
*/
else {
for (k = 0; k < C.mt; k++) {
tempkm = k == C.mt-1 ? C.m-k*C.mb : C.mb;
ldak = BLKLDD(A, k);
ldb = BLKLDD(B, k);
zbeta = k == 0 ? beta : zone;
if (k < m) {
plasma->quark, &task_flags,
tempmm, tempnn, tempkm, A.mb,
alpha, A(k, m), ldak, /* ldak * X */
B(k, n), ldb, /* ldb * Y */
zbeta, C(m, n), ldc); /* ldc * Y */
}
else {
if (k == m) {
plasma->quark, &task_flags,
tempmm, tempnn, A.mb,
alpha, A(k, k), ldak, /* ldak * K */
B(k, n), ldb, /* ldb * Y */
zbeta, C(m, n), ldc); /* ldc * Y */
}
else {
plasma->quark, &task_flags,
tempmm, tempnn, tempkm, A.mb,
alpha, A(m, k), lda, /* lda * K */
B(k, n), ldb, /* ldb * Y */
zbeta, C(m, n), ldc); /* ldc * Y */
}
}
}
}
}
/*
* PlasmaRight / PlasmaLower
*/
else {
lda = BLKLDD(A, n);
ldb = BLKLDD(B, m);
if (uplo == PlasmaLower) {
for (k = 0; k < C.nt; k++) {
tempkn = k == C.nt-1 ? C.n-k*C.nb : C.nb;
ldak = BLKLDD(A, k);
zbeta = k == 0 ? beta : zone;
if (k < n) {
plasma->quark, &task_flags,
tempmm, tempnn, tempkn, A.mb,
alpha, B(m, k), ldb, /* ldb * K */
A(n, k), lda, /* lda * K */
zbeta, C(m, n), ldc); /* ldc * Y */
}
else {
if (k == n) {
plasma->quark, &task_flags,
tempmm, tempnn, A.mb,
alpha, A(k, k), ldak, /* ldak * Y */
B(m, k), ldb, /* ldb * Y */
zbeta, C(m, n), ldc); /* ldc * Y */
}
else {
plasma->quark, &task_flags,
tempmm, tempnn, tempkn, A.mb,
alpha, B(m, k), ldb, /* ldb * K */
A(k, n), ldak, /* ldak * Y */
zbeta, C(m, n), ldc); /* ldc * Y */
}
}
}
}
/*
* PlasmaRight / PlasmaUpper
*/
else {
for (k = 0; k < C.nt; k++) {
tempkn = k == C.nt-1 ? C.n-k*C.nb : C.nb;
ldak = BLKLDD(A, k);
zbeta = k == 0 ? beta : zone;
if (k < n) {
plasma->quark, &task_flags,
tempmm, tempnn, tempkn, A.mb,
alpha, B(m, k), ldb, /* ldb * K */
A(k, n), ldak, /* ldak * Y */
zbeta, C(m, n), ldc); /* ldc * Y */
}
else {
if (k == n) {
plasma->quark, &task_flags,
tempmm, tempnn, A.mb,
alpha, A(k, k), ldak, /* ldak * Y */
B(m, k), ldb, /* ldb * Y */
zbeta, C(m, n), ldc); /* ldc * Y */
}
else {
plasma->quark, &task_flags,
tempmm, tempnn, tempkn, A.mb,
alpha, B(m, k), ldb, /* ldb * K */
A(n, k), lda, /* lda * K */
zbeta, C(m, n), ldc); /* ldc * Y */
}
}
}
}
}
}
}
}

Here is the call graph for this function: