PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
dgecfi.c File Reference
#include <stdlib.h>
#include <sys/types.h>
#include "common.h"
#include "dgecfi2.h"
Include dependency graph for dgecfi.c:

Go to the source code of this file.

Functions

int PLASMA_dgecfi (int m, int n, double *A, PLASMA_enum f_in, int imb, int inb, PLASMA_enum f_out, int omb, int onb)
int PLASMA_dgecfi_Async (int m, int n, double *A, PLASMA_enum f_in, int imb, int inb, PLASMA_enum f_out, int omb, int onb, PLASMA_sequence *sequence, PLASMA_request *request)

Function Documentation

int PLASMA_dgecfi ( int  m,
int  n,
double *  A,
PLASMA_enum  f_in,
int  imb,
int  inb,
PLASMA_enum  f_out,
int  omb,
int  onb 
)

PLASMA_dgecfi convert the matrice A in place from format f_in to format f_out

Parameters:
[in]mNumber of rows of matrix A
[in]nNumber of columns of matrix A
[in,out]AMatrix of size L*m*n
[in]f_inOriginal format of the matrix A. Must be part of (PlasmaCM, PlasmaRM, PlasmaCCRB, PlasmaCRRB, PlasmaRCRB, PlasmaRRRB)
[in]imbNumber of rows of each block in original format
[in]inbNumber of columns of each block in original format
[in]f_outFormat requested for the matrix A. Must be part of (PlasmaCM, PlasmaRM, PlasmaCCRB, PlasmaCRRB, PlasmaRCRB, PlasmaRRRB)
[in]ombNumber of rows of each block in requested format
[in]onbNumber of columns of each block in requested format
See also:
PLASMA_dgecfi_Async

Definition at line 69 of file dgecfi.c.

References plasma_context_self(), PLASMA_dgecfi_Async(), plasma_dynamic_sync, PLASMA_ERR_NOT_INITIALIZED, plasma_fatal_error(), PLASMA_REQUEST_INITIALIZER, plasma_sequence_create(), plasma_sequence_destroy(), and plasma_sequence_t::status.

{
PLASMA_sequence *sequence = NULL;
int status;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error(__func__, "PLASMA not initialized");
}
plasma_sequence_create(plasma, &sequence);
f_in, imb, inb,
f_out, omb, onb,
sequence, &request);
status = sequence->status;
plasma_sequence_destroy(plasma, sequence);
return status;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int PLASMA_dgecfi_Async ( int  m,
int  n,
double *  A,
PLASMA_enum  f_in,
int  imb,
int  inb,
PLASMA_enum  f_out,
int  omb,
int  onb,
PLASMA_sequence sequence,
PLASMA_request request 
)

PLASMA_dgecfi_Async convert the matrice A in place from format f_in to format f_out

Parameters:
[in]mNumber of rows of matrix A
[in]nNumber of columns of matrix A
[in,out]AMatrix of size L*m*n
[in]f_inOriginal format of the matrix A. Must be part of (PlasmaCM, PlasmaRM, PlasmaCCRB, PlasmaCRRB, PlasmaRCRB, PlasmaRRRB)
[in]imbNumber of rows of each block in original format
[in]inbNumber of columns of each block in original format
[in]f_outFormat requested for the matrix A. Must be part of (PlasmaCM, PlasmaRM, PlasmaCCRB, PlasmaCRRB, PlasmaRCRB, PlasmaRRRB)
[in]ombNumber of rows of each block in requested format
[in]onbNumber of columns of each block in requested format
[in]sequenceIdentifies the sequence of function calls that this call belongs to (for completion checks and exception handling purposes).
[out]requestIdentifies this function call (for exception handling purposes).
See also:
PLASMA_dgecfi

Definition at line 148 of file dgecfi.c.

References CORE_dswpab(), ipt_cal2, ipt_call, max, plasma_context_self(), PLASMA_dgecfi_Async(), PLASMA_ERR_NOT_INITIALIZED, plasma_error(), plasma_fatal_error(), PLASMA_NB, plasma_pdpack(), plasma_pdunpack(), plasma_static_call_6, PLASMA_SUCCESS, PlasmaCCRB, PlasmaCM, PlasmaCRRB, PlasmaRCRB, PlasmaRM, PlasmaRRRB, plasma_context_struct::quark, QUARK_Barrier(), and W.

{
double *W = NULL;
int im1, in1, om1, on1;
size_t A11, A21, A12, A22;
/* Check Plasma context */
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error(__func__, "PLASMA not initialized");
}
/* Check arguments */
if( ( f_in != PlasmaCM ) && ( f_in != PlasmaRM )
&& ( f_in != PlasmaCCRB ) && ( f_in != PlasmaRRRB )
&& ( f_in != PlasmaCRRB ) && ( f_in != PlasmaRCRB ) )
{
plasma_error(__func__, "Input format unknown");
return -4;
}
if( ( f_out != PlasmaCM ) && ( f_out != PlasmaRM )
&& ( f_out != PlasmaCCRB ) && ( f_out != PlasmaRRRB )
&& ( f_out != PlasmaCRRB ) && ( f_out != PlasmaRCRB ) )
{
plasma_error(__func__, "Input format unknown");
return -7;
}
/* quick return */
if( (f_in == f_out) && ( (f_in == PlasmaCM) || (f_in == PlasmaRM))
&& (imb == omb) && ( inb == onb ) ) {
}
if ( (f_in == PlasmaCM) || (f_in == PlasmaRM) )
{
if ( (f_out == PlasmaCM) || (f_out == PlasmaRM) ){
imb = omb = PLASMA_NB;
inb = onb = PLASMA_NB;
} else {
imb = omb;
inb = onb;
}
}
else if ( (f_out == PlasmaCM) || (f_out == PlasmaRM) )
{
omb = imb;
onb = inb;
}
/* calculate number of full blocks */
im1 = (m / imb) * imb;
in1 = (n / inb) * inb;
om1 = (m / omb) * omb;
on1 = (n / onb) * onb;
/* separate the four submatrices A11, A12, A21, A22 */
if( f_in == PlasmaCM ) {
if( om1 < m ) {
int, m,
int, on1,
double*, A,
int, (m-om1),
PLASMA_sequence*, sequence,
PLASMA_request*, request);
if ( on1 < n) {
int, m,
int, (n-on1),
double*, &(A[m*on1]),
int, (m-om1),
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}
}
}
else if ( f_in == PlasmaRM ) {
if( on1 < n ) {
int, n,
int, om1,
double*, A,
int, (n-on1),
PLASMA_sequence*, sequence,
PLASMA_request*, request);
if( om1 < m ) {
int, n,
int, (m-om1),
double*, &(A[n*om1]),
int, (n-on1),
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}
}
}
/* blocked format to blocked format conversion with different block sizes */
if( (f_in != PlasmaCM) && (f_in != PlasmaRM) &&
(f_out != PlasmaCM) && (f_out != PlasmaRM) ) {
if( (imb != omb) || (inb != onb) ) {
if( (f_in == PlasmaRRRB) || (f_out == PlasmaRRRB ) ) {
PLASMA_dgecfi_Async(m, n, A, f_in, imb, inb, PlasmaRM, 1, 1 , sequence, request);
PLASMA_dgecfi_Async(m, n, A, PlasmaRM, 1, 1, f_out, omb, onb, sequence, request);
}
else {
PLASMA_dgecfi_Async(m, n, A, f_in, imb, inb, PlasmaCM, 1, 1 , sequence, request);
PLASMA_dgecfi_Async(m, n, A, PlasmaCM, 1, 1, f_out, omb, onb, sequence, request);
}
}
}
if( (f_in == PlasmaCM) || (f_in == PlasmaCCRB) || (f_in == PlasmaCRRB) )
{
A11 = 0;
A21 = im1*in1;
A12 = m *in1;
A22 = m *in1 + im1*(n-in1);
}
else
{
A11 = 0;
A12 = im1*in1;
A21 = im1*n;
A22 = im1*n + in1*(m-im1);
}
switch ( f_in ) {
case PlasmaCM :
switch ( f_out ) {
case PlasmaCM : break;
case PlasmaCCRB : ipt_call(cm2ccrb, om1, on1, omb, onb); break;
case PlasmaCRRB : ipt_call(cm2crrb, om1, on1, omb, onb); break;
case PlasmaRCRB : ipt_call(cm2rcrb, om1, on1, omb, onb); break;
case PlasmaRRRB : ipt_call(cm2rrrb, om1, on1, omb, onb); break;
case PlasmaRM : ipt_call(cm2rm, om1, on1, omb, onb); break;
default: ;
}
break;
case PlasmaCCRB:
switch ( f_out ) {
case PlasmaCM : ipt_call(ccrb2cm, im1, in1, imb, inb); break;
case PlasmaCCRB : break;
case PlasmaCRRB : ipt_cal2(ccrb2crrb, im1, in1, imb, inb); break;
case PlasmaRCRB : ipt_call(ccrb2rcrb, im1, in1, imb, inb); break;
case PlasmaRRRB : ipt_call(ccrb2rrrb, im1, in1, imb, inb); break;
case PlasmaRM : ipt_call(ccrb2rm, im1, in1, imb, inb); break;
default: ;
}
break;
case PlasmaCRRB:
switch ( f_out ) {
case PlasmaCM : ipt_call(crrb2cm, im1, in1, imb, inb); break;
case PlasmaCCRB : ipt_cal2(crrb2ccrb, im1, in1, imb, inb); break;
case PlasmaCRRB : break;
case PlasmaRCRB : ipt_call(crrb2rcrb, im1, in1, imb, inb); break;
case PlasmaRRRB : ipt_call(crrb2rrrb, im1, in1, imb, inb); break;
case PlasmaRM : ipt_call(crrb2rm, im1, in1, imb, inb); break;
default: ;
}
break;
case PlasmaRCRB:
switch ( f_out ) {
case PlasmaCM : ipt_call(rcrb2cm, im1, in1, imb, inb); break;
case PlasmaCCRB : ipt_call(rcrb2ccrb, im1, in1, imb, inb); break;
case PlasmaCRRB : ipt_call(rcrb2crrb, im1, in1, imb, inb); break;
case PlasmaRCRB : break;
case PlasmaRRRB : ipt_cal2(rcrb2rrrb, im1, in1, imb, inb); break;
case PlasmaRM : ipt_call(rcrb2rm, im1, in1, imb, inb); break;
default: ;
}
break;
case PlasmaRRRB:
switch ( f_out ) {
case PlasmaCM : ipt_call(rrrb2cm, im1, in1, imb, inb); break;
case PlasmaCCRB : ipt_call(rrrb2ccrb, im1, in1, imb, inb); break;
case PlasmaCRRB : ipt_call(rrrb2crrb, im1, in1, imb, inb); break;
case PlasmaRCRB : ipt_cal2(rrrb2rcrb, im1, in1, imb, inb); break;
case PlasmaRRRB : break;
case PlasmaRM : ipt_call(rrrb2rm, im1, in1, imb, inb); break;
default: ;
}
break;
case PlasmaRM:
switch ( f_out ) {
case PlasmaCM : ipt_call(rm2cm, om1, on1, omb, onb); break;
case PlasmaCCRB : ipt_call(rm2ccrb, om1, on1, omb, onb); break;
case PlasmaCRRB : ipt_call(rm2crrb, om1, on1, omb, onb); break;
case PlasmaRCRB : ipt_call(rm2rcrb, om1, on1, omb, onb); break;
case PlasmaRRRB : ipt_call(rm2rrrb, om1, on1, omb, onb); break;
case PlasmaRM : break;
default: ;
}
break;
default: ;
}
/* reorder block */
if( (f_out == PlasmaCM) || (f_out == PlasmaCCRB) || (f_out == PlasmaCRRB) )
{
/* We need to swap A21 and A12 */
if ( A21 > A12 ) {
size_t sze1 = A21-A12;
size_t sze2 = A22-A21;
QUARK_Barrier(plasma->quark);
//plasma_malloc(W, max( in1, on1), double);
W = (double*)malloc( max( sze1, sze2 ) * sizeof(double) );
CORE_dswpab(0, sze1, sze2, &(A[A12]), W);
free(W);
}
}
else {
/* We need to swap A21 and A12 */
if ( A12 > A21 ) {
size_t sze1 = A12-A21;
size_t sze2 = A22-A12;
QUARK_Barrier(plasma->quark);
//plasma_malloc(W, max( in1, on1), double);
W = (double*)malloc( max( sze1, sze2 ) * sizeof(double) );
CORE_dswpab(0, sze1, sze2, &(A[A21]), W);
free(W);
}
}
/* unseparate if output is not blocked */
if( f_out == PlasmaCM ) {
if( im1 < m ) {
int, m,
int, in1,
double*, A,
int, (m-im1),
PLASMA_sequence*, sequence,
PLASMA_request*, request);
if ( in1 < n) {
int, m,
int, (n-in1),
double*, &(A[m*in1]),
int, (m-im1),
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}
}
}
else if( f_out == PlasmaRM ) {
if( in1 < n ) {
int, n,
int, im1,
double*, A,
int, (n-in1),
PLASMA_sequence*, sequence,
PLASMA_request*, request);
if( im1 < m ) {
int, n,
int, (m-im1),
double*, &(A[n*im1]),
int, (n-in1),
PLASMA_sequence*, sequence,
PLASMA_request*, request);
}
}
}
}

Here is the call graph for this function:

Here is the caller graph for this function: