PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
pzpotrf.c File Reference
#include "common.h"
Include dependency graph for pzpotrf.c:

Go to the source code of this file.

Macros

#define A(m, n)   BLKADDR(A, PLASMA_Complex64_t, m, n)

Functions

void plasma_pzpotrf (plasma_context_t *plasma)
void plasma_pzpotrf_quark (PLASMA_enum uplo, PLASMA_desc A, PLASMA_sequence *sequence, PLASMA_request *request)

Detailed Description

PLASMA auxiliary routines PLASMA is a software package provided by Univ. of Tennessee, Univ. of California Berkeley and Univ. of Colorado Denver

Version:
2.4.5
Author:
Jakub Kurzak
Hatem Ltaief
Mathieu Faverge
Date:
2010-11-15 normal z -> s d c

Definition in file pzpotrf.c.


Macro Definition Documentation

#define A (   m,
 
)    BLKADDR(A, PLASMA_Complex64_t, m, n)

Definition at line 19 of file pzpotrf.c.


Function Documentation

void plasma_pzpotrf ( plasma_context_t plasma)

Parallel tile Cholesky factorization - static scheduling

Definition at line 23 of file pzpotrf.c.

References A, BLKLDD, CORE_zgemm(), CORE_zherk(), CORE_zpotrf(), CORE_ztrsm(), plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, PLASMA_RANK, plasma_request_fail(), PLASMA_SIZE, PLASMA_SUCCESS, plasma_unpack_args_4, PlasmaConjTrans, PlasmaLeft, PlasmaLower, PlasmaNonUnit, PlasmaNoTrans, PlasmaRight, PlasmaUpper, ss_abort, ss_aborted, ss_cond_set, ss_cond_wait, ss_finalize, ss_init, plasma_sequence_t::status, and uplo.

{
PLASMA_sequence *sequence;
PLASMA_request *request;
int k, m, n;
int next_k;
int next_m;
int next_n;
int ldak, ldam, ldan;
int info;
int tempkn, tempmn;
plasma_unpack_args_4(uplo, A, sequence, request);
if (sequence->status != PLASMA_SUCCESS)
return;
ss_init(A.nt, A.nt, 0);
k = 0;
while (m >= A.nt) {
k++;
m = m-A.nt+k;
}
n = 0;
while (k < A.nt && m < A.nt && !ss_aborted()) {
next_n = n;
next_m = m;
next_k = k;
next_n++;
if (next_n > next_k) {
next_m += PLASMA_SIZE;
while (next_m >= A.nt && next_k < A.nt) {
next_k++;
next_m = next_m-A.nt+next_k;
}
next_n = 0;
}
tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
tempmn = m == A.nt-1 ? A.n-m*A.nb : A.nb;
ldak = BLKLDD(A, k);
ldan = BLKLDD(A, n);
ldam = BLKLDD(A, m);
if (m == k) {
if (n == k) {
/*
* PlasmaLower
*/
if (uplo == PlasmaLower) {
tempkn,
A(k, k), ldak,
&info);
}
/*
* PlasmaUpper
*/
else {
tempkn,
A(k, k), ldak,
&info);
}
if (info != 0) {
plasma_request_fail(sequence, request, info + A.nb*k);
}
ss_cond_set(k, k, 1);
}
else {
ss_cond_wait(k, n, 1);
/*
* PlasmaLower
*/
if (uplo == PlasmaLower) {
tempkn, A.nb,
-1.0, A(k, n), ldak,
1.0, A(k, k), ldak);
}
/*
* PlasmaUpper
*/
else {
tempkn, A.nb,
-1.0, A(n, k), ldan,
1.0, A(k, k), ldak);
}
}
}
else {
if (n == k) {
ss_cond_wait(k, k, 1);
/*
* PlasmaLower
*/
if (uplo == PlasmaLower) {
tempmn, A.nb,
zone, A(k, k), ldak,
A(m, k), ldam);
}
/*
* PlasmaUpper
*/
else {
A.nb, tempmn,
zone, A(k, k), ldak,
A(k, m), ldak);
}
ss_cond_set(m, k, 1);
}
else {
ss_cond_wait(k, n, 1);
ss_cond_wait(m, n, 1);
/*
* PlasmaLower
*/
if (uplo == PlasmaLower) {
tempmn, A.nb, A.nb,
mzone, A(m, n), ldam,
A(k, n), ldak,
zone, A(m, k), ldam);
}
/*
* PlasmaUpper
*/
else {
A.nb, tempmn, A.nb,
mzone, A(n, k), ldan,
A(n, m), ldan,
zone, A(k, m), ldak);
}
}
}
n = next_n;
m = next_m;
k = next_k;
}
}

Here is the call graph for this function:

Here is the caller graph for this function:

void plasma_pzpotrf_quark ( PLASMA_enum  uplo,
PLASMA_desc  A,
PLASMA_sequence sequence,
PLASMA_request request 
)

Parallel tile Cholesky factorization - dynamic scheduling

Definition at line 190 of file pzpotrf.c.

References A, BLKLDD, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, plasma_context_self(), PLASMA_SUCCESS, PlasmaConjTrans, PlasmaLeft, PlasmaLower, PlasmaNonUnit, PlasmaNoTrans, PlasmaRight, PlasmaUpper, plasma_context_struct::quark, QUARK_CORE_zgemm(), QUARK_CORE_zherk(), QUARK_CORE_zpotrf(), QUARK_CORE_ztrsm(), plasma_sequence_t::quark_sequence, QUARK_Task_Flag_Set(), Quark_Task_Flags_Initializer, plasma_sequence_t::status, and TASK_SEQUENCE.

{
int k, m, n;
int ldak, ldam;
int tempkm, tempmm;
plasma = plasma_context_self();
if (sequence->status != PLASMA_SUCCESS)
return;
QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
/*
* PlasmaLower
*/
if (uplo == PlasmaLower) {
for (k = 0; k < A.mt; k++) {
tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
ldak = BLKLDD(A, k);
plasma->quark, &task_flags,
PlasmaLower, tempkm, A.mb,
A(k, k), ldak,
sequence, request, A.nb*k);
for (m = k+1; m < A.mt; m++) {
tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb;
ldam = BLKLDD(A, m);
plasma->quark, &task_flags,
tempmm, A.mb, A.mb,
zone, A(k, k), ldak,
A(m, k), ldam);
}
for (m = k+1; m < A.mt; m++) {
tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb;
ldam = BLKLDD(A, m);
plasma->quark, &task_flags,
tempmm, A.mb, A.mb,
-1.0, A(m, k), ldam,
1.0, A(m, m), ldam);
for (n = k+1; n < m; n++) {
plasma->quark, &task_flags,
tempmm, A.mb, A.mb, A.mb,
mzone, A(m, k), ldam,
A(n, k), A.mb,
zone, A(m, n), ldam);
}
}
}
}
/*
* PlasmaUpper
*/
else {
for (k = 0; k < A.nt; k++) {
tempkm = k == A.nt-1 ? A.n-k*A.nb : A.nb;
ldak = BLKLDD(A, k);
plasma->quark, &task_flags,
tempkm, A.mb,
A(k, k), ldak,
sequence, request, A.nb*k);
for (m = k+1; m < A.nt; m++) {
tempmm = m == A.nt-1 ? A.n-m*A.nb : A.nb;
plasma->quark, &task_flags,
A.nb, tempmm, A.mb,
zone, A(k, k), ldak,
A(k, m), ldak);
}
for (m = k+1; m < A.nt; m++) {
tempmm = m == A.nt-1 ? A.n-m*A.nb : A.nb;
ldam = BLKLDD(A, m);
plasma->quark, &task_flags,
tempmm, A.mb, A.mb,
-1.0, A(k, m), ldak,
1.0, A(m, m), ldam);
for (n = k+1; n < m; n++) {
plasma->quark, &task_flags,
A.mb, tempmm, A.mb, A.mb,
mzone, A(k, n), ldak,
A(k, m), ldak,
zone, A(n, m), A.mb);
}
}
}
}
}

Here is the call graph for this function: