PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
pztile.c File Reference
#include "common.h"
#include "auxiliary.h"
#include "tile.h"
#include "quark.h"
Include dependency graph for pztile.c:

Go to the source code of this file.

Macros

#define AF77(m, n)   &(Af77[ ((int64_t)A.nb*(int64_t)lda*(int64_t)(n)) + (int64_t)(A.mb*(m)) ])
#define ABDL(m, n)   BLKADDR(A, PLASMA_Complex64_t, m, n)

Functions

void CORE_ztile_zero_quark (Quark *quark)
void plasma_pzlapack_to_tile (plasma_context_t *plasma)
void plasma_pzlapack_to_tile_quark (PLASMA_Complex64_t *Af77, int lda, PLASMA_desc A, PLASMA_sequence *sequence, PLASMA_request *request)
void plasma_pztile_to_lapack (plasma_context_t *plasma)
void plasma_pztile_to_lapack_quark (PLASMA_desc A, PLASMA_Complex64_t *Af77, int lda, PLASMA_sequence *sequence, PLASMA_request *request)
void plasma_pztile_zero (plasma_context_t *plasma)
void plasma_pztile_zero_quark (PLASMA_desc A, PLASMA_sequence *sequence, PLASMA_request *request)

Macro Definition Documentation

#define ABDL (   m,
 
)    BLKADDR(A, PLASMA_Complex64_t, m, n)

Definition at line 22 of file pztile.c.

#define AF77 (   m,
 
)    &(Af77[ ((int64_t)A.nb*(int64_t)lda*(int64_t)(n)) + (int64_t)(A.mb*(m)) ])

Definition at line 21 of file pztile.c.


Function Documentation

void CORE_ztile_zero_quark ( Quark quark)

Definition at line 321 of file pztile.c.

References A, and quark_unpack_args_6.

{
int X1;
int X2;
int Y1;
int Y2;
int lda;
int x, y;
quark_unpack_args_6(quark, X1, X2, Y1, Y2, A, lda);
for (x = X1; x < X2; x++)
for (y = Y1; y < Y2; y++)
A[lda*x+y] = 0.0;
}

Here is the caller graph for this function:

void plasma_pzlapack_to_tile ( plasma_context_t plasma)

Conversion from LAPACK F77 matrix layout to tile layout - static scheduling

Definition at line 29 of file pztile.c.

References A, ABDL, AF77, BLKLDD, CORE_zlacpy(), plasma_desc_t::i, plasma_desc_t::j, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, PLASMA_RANK, PLASMA_SIZE, PLASMA_SUCCESS, plasma_unpack_args_5, PlasmaUpperLower, and plasma_sequence_t::status.

{
int lda;
PLASMA_sequence *sequence;
PLASMA_request *request;
int X1, Y1;
int X2, Y2;
int n, m, ldt;
int next_m;
int next_n;
plasma_unpack_args_5(Af77, lda, A, sequence, request);
if (sequence->status != PLASMA_SUCCESS)
return;
n = 0;
while (m >= A.mt && n < A.nt) {
n++;
m = m-A.mt;
}
while (n < A.nt) {
next_m = m;
next_n = n;
next_m += PLASMA_SIZE;
while (next_m >= A.mt && next_n < A.nt) {
next_n++;
next_m = next_m-A.mt;
}
X1 = n == 0 ? A.j%A.nb : 0;
X2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
Y1 = m == 0 ? A.i%A.mb : 0;
Y2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb;
f77 = AF77(m, n);
bdl = ABDL(m, n);
ldt = BLKLDD(A, m);
PlasmaUpperLower, (Y2-Y1), (X2-X1),
&(f77[X1*lda+Y1]), lda,
&(bdl[X1*lda+Y1]), ldt);
m = next_m;
n = next_n;
}
}

Here is the call graph for this function:

Here is the caller graph for this function:

void plasma_pzlapack_to_tile_quark ( PLASMA_Complex64_t Af77,
int  lda,
PLASMA_desc  A,
PLASMA_sequence sequence,
PLASMA_request request 
)

Conversion from LAPACK F77 matrix layout to tile layout - dynamic scheduling

Definition at line 88 of file pztile.c.

References ABDL, AF77, BLKLDD, plasma_desc_t::i, plasma_desc_t::j, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, plasma_context_self(), PLASMA_SUCCESS, PlasmaUpperLower, plasma_context_struct::quark, QUARK_CORE_zlacpy(), plasma_sequence_t::quark_sequence, QUARK_Task_Flag_Set(), Quark_Task_Flags_Initializer, plasma_sequence_t::status, and TASK_SEQUENCE.

{
int X1, Y1;
int X2, Y2;
int n, m, ldt;
plasma = plasma_context_self();
if (sequence->status != PLASMA_SUCCESS)
return;
QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
for (m = 0; m < A.mt; m++)
{
ldt = BLKLDD(A, m);
for (n = 0; n < A.nt; n++)
{
X1 = n == 0 ? A.j%A.nb : 0;
Y1 = m == 0 ? A.i%A.mb : 0;
X2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
Y2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb;
f77 = AF77(m, n);
bdl = ABDL(m, n);
plasma->quark, &task_flags,
PlasmaUpperLower, (Y2-Y1), (X2-X1), A.mb,
&(f77[X1*lda+Y1]), lda,
&(bdl[X1*lda+Y1]), ldt);
}
}
}

Here is the call graph for this function:

void plasma_pztile_to_lapack ( plasma_context_t plasma)

Conversion from LAPACK F77 matrix layout to tile layout - static scheduling

Definition at line 128 of file pztile.c.

References A, ABDL, AF77, BLKLDD, CORE_zlacpy(), plasma_desc_t::i, plasma_desc_t::j, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, PLASMA_RANK, PLASMA_SIZE, PLASMA_SUCCESS, plasma_unpack_args_5, PlasmaUpperLower, and plasma_sequence_t::status.

{
int lda;
PLASMA_sequence *sequence;
PLASMA_request *request;
int X1, Y1;
int X2, Y2;
int n, m, ldt;
int next_m;
int next_n;
plasma_unpack_args_5(A, Af77, lda, sequence, request);
if (sequence->status != PLASMA_SUCCESS)
return;
n = 0;
while (m >= A.mt && n < A.nt) {
n++;
m = m-A.mt;
}
while (n < A.nt) {
next_m = m;
next_n = n;
next_m += PLASMA_SIZE;
while (next_m >= A.mt && next_n < A.nt) {
next_n++;
next_m = next_m-A.mt;
}
X1 = n == 0 ? A.j%A.nb : 0;
Y1 = m == 0 ? A.i%A.mb : 0;
X2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
Y2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb;
f77 = AF77(m, n);
bdl = ABDL(m, n);
ldt = BLKLDD(A, m);
PlasmaUpperLower, (Y2-Y1), (X2-X1),
&(bdl[X1*lda+Y1]), ldt,
&(f77[X1*lda+Y1]), lda);
m = next_m;
n = next_n;
}
}

Here is the call graph for this function:

Here is the caller graph for this function:

void plasma_pztile_to_lapack_quark ( PLASMA_desc  A,
PLASMA_Complex64_t Af77,
int  lda,
PLASMA_sequence sequence,
PLASMA_request request 
)

Conversion from LAPACK F77 matrix layout to tile layout - dynamic scheduling

Definition at line 187 of file pztile.c.

References ABDL, AF77, BLKLDD, plasma_desc_t::i, plasma_desc_t::j, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, plasma_context_self(), PLASMA_SUCCESS, PlasmaUpperLower, plasma_context_struct::quark, QUARK_CORE_zlacpy(), plasma_sequence_t::quark_sequence, QUARK_Task_Flag_Set(), Quark_Task_Flags_Initializer, plasma_sequence_t::status, and TASK_SEQUENCE.

{
int X1, Y1;
int X2, Y2;
int n, m, ldt;
plasma = plasma_context_self();
if (sequence->status != PLASMA_SUCCESS)
return;
QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
for (m = 0; m < A.mt; m++)
{
ldt = BLKLDD(A, m);
for (n = 0; n < A.nt; n++)
{
X1 = n == 0 ? A.j%A.nb : 0;
Y1 = m == 0 ? A.i%A.mb : 0;
X2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
Y2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb;
f77 = AF77(m, n);
bdl = ABDL(m, n);
plasma->quark, &task_flags,
PlasmaUpperLower, (Y2-Y1), (X2-X1), A.mb,
&(bdl[X1*lda+Y1]), ldt,
&(f77[X1*lda+Y1]), lda);
}
}
}

Here is the call graph for this function:

void plasma_pztile_zero ( plasma_context_t plasma)

Zeroes a submatrix in tile layout - static scheduling

Definition at line 227 of file pztile.c.

References A, ABDL, BLKLDD, plasma_desc_t::i, plasma_desc_t::j, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, PLASMA_RANK, PLASMA_SIZE, PLASMA_SUCCESS, plasma_unpack_args_3, and plasma_sequence_t::status.

{
PLASMA_sequence *sequence;
PLASMA_request *request;
int x, y;
int X1, Y1;
int X2, Y2;
int n, m, ldt;
int next_m;
int next_n;
plasma_unpack_args_3(A, sequence, request);
if (sequence->status != PLASMA_SUCCESS)
return;
n = 0;
while (m >= A.mt && n < A.nt) {
n++;
m = m-A.mt;
}
while (n < A.nt) {
next_m = m;
next_n = n;
next_m += PLASMA_SIZE;
while (next_m >= A.mt && next_n < A.nt) {
next_n++;
next_m = next_m-A.mt;
}
X1 = n == 0 ? A.j%A.nb : 0;
Y1 = m == 0 ? A.i%A.mb : 0;
X2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
Y2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb;
bdl = ABDL(m, n);
ldt = BLKLDD(A, m);
for (x = X1; x < X2; x++)
for (y = Y1; y < Y2; y++)
bdl[ldt*x+y] = 0.0;
m = next_m;
n = next_n;
}
}

Here is the caller graph for this function:

void plasma_pztile_zero_quark ( PLASMA_desc  A,
PLASMA_sequence sequence,
PLASMA_request request 
)

Zeroes a submatrix in tile layout - dynamic scheduling

Definition at line 281 of file pztile.c.

References ABDL, BLKLDD, plasma_desc_t::bsiz, CORE_ztile_zero_quark(), plasma_desc_t::i, plasma_desc_t::j, LOCALITY, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, OUTPUT, plasma_context_self(), PLASMA_SUCCESS, plasma_context_struct::quark, QUARK_Insert_Task(), plasma_sequence_t::quark_sequence, QUARK_Task_Flag_Set(), Quark_Task_Flags_Initializer, plasma_sequence_t::status, TASK_SEQUENCE, and VALUE.

{
int X1, Y1;
int X2, Y2;
int n, m, ldt;
plasma = plasma_context_self();
if (sequence->status != PLASMA_SUCCESS)
return;
QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
for (m = 0; m < A.mt; m++)
{
ldt = BLKLDD(A, m);
for (n = 0; n < A.nt; n++)
{
X1 = n == 0 ? A.j%A.nb : 0;
Y1 = m == 0 ? A.i%A.mb : 0;
X2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
Y2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb;
bdl = ABDL(m, n);
sizeof(int), &X1, VALUE,
sizeof(int), &X2, VALUE,
sizeof(int), &Y1, VALUE,
sizeof(int), &Y2, VALUE,
sizeof(int), &ldt, VALUE,
0);
}
}
}

Here is the call graph for this function: