PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
pstrtri.c File Reference
#include "common.h"
Include dependency graph for pstrtri.c:

Go to the source code of this file.

Macros

#define A(m, n)   BLKADDR(A, float, m, n)

Functions

void plasma_pstrtri_quark (PLASMA_enum uplo, PLASMA_enum diag, PLASMA_desc A, PLASMA_sequence *sequence, PLASMA_request *request)

Detailed Description

PLASMA auxiliary routines PLASMA is a software package provided by Univ. of Tennessee, Univ. of California Berkeley and Univ. of Colorado Denver

Version:
2.4.5
Author:
Julien Langou
Henricus Bouwmeester
Mathieu Faverge
Date:
2010-11-15 s Tue Nov 22 14:35:38 2011

Definition in file pstrtri.c.


Macro Definition Documentation

#define A (   m,
 
)    BLKADDR(A, float, m, n)

Definition at line 19 of file pstrtri.c.


Function Documentation

void plasma_pstrtri_quark ( PLASMA_enum  uplo,
PLASMA_enum  diag,
PLASMA_desc  A,
PLASMA_sequence sequence,
PLASMA_request request 
)

Parallel tile triangular matrix inverse - dynamic scheduling

Definition at line 23 of file pstrtri.c.

References A, BLKLDD, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, plasma_context_self(), PLASMA_SUCCESS, PlasmaLeft, PlasmaLower, PlasmaNoTrans, PlasmaRight, plasma_context_struct::quark, QUARK_CORE_sgemm(), QUARK_CORE_strsm(), QUARK_CORE_strtri(), plasma_sequence_t::quark_sequence, QUARK_Task_Flag_Set(), Quark_Task_Flags_Initializer, plasma_sequence_t::status, and TASK_SEQUENCE.

{
int k, m, n;
int ldam, ldan;
int tempkn, tempmm, tempnn;
float zone = (float) 1.0;
float mzone = (float)-1.0;
plasma = plasma_context_self();
if (sequence->status != PLASMA_SUCCESS)
return;
QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
/*
* PlasmaLower
*/
if (uplo == PlasmaLower) {
for (n = 0; n < A.nt; n++) {
tempnn = n == A.nt-1 ? A.n-n*A.nb : A.nb;
ldan = BLKLDD(A, n);
for (m = n+1; m < A.mt; m++) {
tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb;
ldam = BLKLDD(A, m);
plasma->quark, &task_flags,
tempmm, tempnn, A.mb,
mzone, A(n, n), ldan,
A(m, n), ldam);
}
for (m = n+1; m < A.mt; m++) {
tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb;
ldam = BLKLDD(A, m);
for (k = 0; k < n; k++) {
tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
plasma->quark, &task_flags,
tempmm, tempkn, tempnn, A.mb,
zone, A(m, n), ldam,
A(n, k), ldan,
zone, A(m, k), ldam);
}
}
for (m = 0; m < n; m++) {
tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb;
plasma->quark, &task_flags,
tempnn, tempmm, A.mb,
zone, A(n, n), ldan,
A(n, m), ldan);
}
plasma->quark, &task_flags,
tempnn, A.mb,
A(n, n), ldan,
sequence, request, A.nb*n);
}
}
/*
* PlasmaUpper
*/
else {
for (m = 0; m < A.mt; m++) {
tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb;
ldam = BLKLDD(A, m);
for (n = m+1; n < A.nt; n++) {
tempnn = n == A.nt-1 ? A.n-n*A.nb : A.nb;
plasma->quark, &task_flags,
tempmm, tempnn, A.mb,
mzone, A(m, m), ldam,
A(m, n), ldam);
}
for (n = 0; n < m; n++) {
tempnn = n == A.nt-1 ? A.n-n*A.nb : A.nb;
ldan = BLKLDD(A, n);
for (k = m+1; k < A.nt; k++) {
tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
plasma->quark, &task_flags,
tempnn, tempkn, tempmm, A.mb,
zone, A(n, m), ldan,
A(m, k), ldam,
zone, A(n, k), ldan);
}
plasma->quark, &task_flags,
tempnn, tempmm, A.mb,
zone, A(m, m), ldam,
A(n, m), ldan);
}
plasma->quark, &task_flags,
tempmm, A.mb,
A(m, m), ldam,
sequence, request, A.mb*m);
}
}
}

Here is the call graph for this function: