#include "common.h"

Include dependency graph for pcgetmi2.c:

Functions
void	plasma_pcgetmi2 (plasma_context_t *plasma)
void	plasma_pcgetmi2_quark (PLASMA_enum idep, PLASMA_enum odep, PLASMA_enum storev, int m, int n, int mb, int nb, PLASMA_Complex32_t A, PLASMA_sequence sequence, PLASMA_request *request)

Detailed Description

PLASMA InPlaceTransformation module PLASMA is a software package provided by Univ. of Tennessee, Univ. of California Berkeley and Univ. of Colorado Denver

This work is the implementation of an inplace transformation based on the GKK algorithm by Gustavson, Karlsson, Kagstrom and its fortran implementation.

Version:: 2.4.5

Author:: Mathieu Faverge

Date:: 2010-11-15

c Tue Nov 22 14:35:42 2011

Definition in file pcgetmi2.c.

Function Documentation

void plasma_pcgetmi2 ( plasma_context_t * plasma )

plasma_pcgetmi2 - realises nprob independant transpositions. Each subproblem is a tile of mb-by-nb elements. This function use an extra space of PLASMA_SIZE*(mb*nb).

Parameters:

[in] plasma Plasma context to which this call belong to.

See also:: plasma_pcgetmi2_quark

Definition at line 40 of file pcgetmi2.c.

References A, CORE_cgetrip(), plasma_private_alloc(), plasma_private_free(), PLASMA_RANK, PLASMA_SIZE, PLASMA_SUCCESS, plasma_unpack_args_10, PlasmaComplexFloat, plasma_sequence_t::status, and storev.

                                               {
    PLASMA_sequence *sequence;
    PLASMA_request *request;
    PLASMA_Complex32_t *A, *Al, *work;
    PLASMA_enum storev, idep, odep;
    int         i, m, n, mb, nb, nprob;
    int         size, bsiz;
    plasma_unpack_args_10(idep, odep, storev, m, n, mb, nb, A, sequence, request);
    if (sequence->status != PLASMA_SUCCESS)
        return;
    /* quick return */
    if( (mb < 2) || (nb < 2) ) {
        return ;
    }
    size  = PLASMA_SIZE;
    bsiz  = mb*nb;
    nprob = ( m / mb ) * ( n / nb );
    work = (PLASMA_Complex32_t*)plasma_private_alloc(plasma, mb*nb, PlasmaComplexFloat);
    for (i=PLASMA_RANK; i<nprob; i+=size) {
        Al = &(A[ i * bsiz]);
        CORE_cgetrip(mb, nb, Al, work);
    }
    plasma_private_free(plasma, work);
}

Here is the call graph for this function:

void plasma_pcgetmi2_quark	(	PLASMA_enum	idep,
		PLASMA_enum	odep,
		PLASMA_enum	storev,
		int	m,
		int	n,
		int	mb,
		int	nb,
		PLASMA_Complex32_t *	A,
		PLASMA_sequence *	sequence,
		PLASMA_request *	request
	)

plasma_pcgetmi2_quark - realises nprob independant transpositions. Each subproblem is a tile of mb-by-nb elements. This function use an extra space of PLASMA_SIZE*(mb*nb). This is a maximum in case of dynamic scheduling.

Parameters:

[in]	idep	PlasmaIPT_Nodep: No fake dependencies are added. PlasmaIPT_Panel: A gatherv is added on each panel and panel size is m*nb. PlasmaIPT_All: A gatherv is added on the whole matrix.
[in]	odep	PlasmaIPT_Nodep: No fake dependencies are added. PlasmaIPT_Panel: A gatherv is added on each panel and panel size is m*nb. PlasmaIPT_All: A gatherv is added on the whole matrix.
[in]	storev	PlasmaColumnWise: Data stored in column major. PlasmaRowWise: Data stored in row major.
[in]	m	Number of row of A if tiles are sorted in column major format, number of columns otherwise.
[in]	n	Number of columns of A if tiles are sorted in column major format, number of rows otherwise.
[in]	mb	Number of rows in each individual subproblem if storev == PlasmaColumnWise, number of columns otherwise. mmb must be 0.
[in]	nb	Number of columns in each individual subproblem if storev == PlasmaColumnWise, number of rows otherwise. nnb must be 0.
[in,out]	A	Matrix of size m*n.
[in]	sequence	Identifies the sequence of function calls that this call belongs to (for completion checks and exception handling purposes).
[out]	request	Identifies this function call (for exception handling purposes).

See also:: plasma_pcgetmi2

Definition at line 128 of file pcgetmi2.c.

References GATHERV, INOUT, INPUT, plasma_context_self(), PLASMA_SUCCESS, PlasmaColumnwise, PlasmaIPT_All, PlasmaIPT_NoDep, PlasmaIPT_Panel, plasma_context_struct::quark, QUARK_CORE_cgetrip(), QUARK_CORE_cgetrip_f1(), QUARK_CORE_cgetrip_f2(), plasma_sequence_t::quark_sequence, QUARK_Task_Flag_Set(), Quark_Task_Flags_Initializer, plasma_sequence_t::status, and TASK_SEQUENCE.

{
    plasma_context_t *plasma;
    Quark_Task_Flags task_flags = Quark_Task_Flags_Initializer;
    PLASMA_Complex32_t *Al, *Ap;
    int i, j, nprob, mt, nt;
    int bsiz, psiz, size;
    plasma = plasma_context_self();
    if (sequence->status != PLASMA_SUCCESS)
        return;
    /* quick return */
    if( (mb < 2) || (nb < 2) ) {
        return ;
    }
    QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
    bsiz  = mb*nb;
    if ( storev == PlasmaColumnwise ) {
        psiz = m*nb;
        mt = ( m / mb );
        nt = ( n / nb );
    } else {
        psiz = n*mb;
        mt = ( n / nb );
        nt = ( m / mb );    
    }
    size = m*n;
    switch ( idep ) {
/*
 * Dependencies on each panel as input
 */
    case PlasmaIPT_Panel:
        switch ( odep ) {
        case PlasmaIPT_Panel:
            for (j=0; j<nt; j++) {
                Ap = A + (psiz*j);
                for (i=0; i<mt; i++) {
                    Al = Ap + i*bsiz;
                    QUARK_CORE_cgetrip_f1(
                        plasma->quark, &task_flags,
                        mb, nb, Al, bsiz, 
                        Ap, psiz, INOUT|GATHERV);
                }
            }
            break;
              
        case PlasmaIPT_All:
            for (j=0; j<nt; j++) {
                Ap = A + (psiz*j);
                for (i=0; i<mt; i++) {
                    Al = Ap + i*bsiz;
                    
                    QUARK_CORE_cgetrip_f2(plasma->quark, &task_flags,
                                          mb, nb, Al, bsiz, 
                                          Ap, size, INPUT,
                                          A,  size, INOUT|GATHERV);
                }
            }
            break;
        case PlasmaIPT_NoDep:
        default:
            for (j=0; j<nt; j++) {
                Ap = A + (psiz*j);
                for (i=0; i<mt; i++) {
                    Al = Ap + i*bsiz;
                    QUARK_CORE_cgetrip_f1(
                        plasma->quark, &task_flags,
                        mb, nb, Al, bsiz, 
                        Ap, psiz, INPUT);
                }
            }
        }
        break;
        
/*
 * Dependency on all the matrix as input
 */
    case PlasmaIPT_All:
        switch ( odep ) {
        case PlasmaIPT_Panel:
            for (j=0; j<nt; j++) {
                Ap = A + (psiz*j);
                for (i=0; i<mt; i++) {
                    Al = Ap + i*bsiz;
                    QUARK_CORE_cgetrip_f2(
                        plasma->quark, &task_flags,
                        mb, nb, Al, bsiz, 
                        A,  size, INPUT,
                        Ap, psiz, INOUT|GATHERV);
                }
            }
            break;
              
        case PlasmaIPT_All:
            nprob = mt*nt;
            for (i=0; i<nprob; i++) {
                QUARK_CORE_cgetrip_f1(plasma->quark, &task_flags,
                                      mb, nb, &(A[ i*bsiz ]), bsiz, 
                                      A, size, INOUT|GATHERV);
            }
            break;
            
        case PlasmaIPT_NoDep:
        default:
            nprob = mt*nt;
            for (i=0; i<nprob; i++) {
                QUARK_CORE_cgetrip_f1(plasma->quark, &task_flags,
                                      mb, nb, &(A[ i*bsiz ]), bsiz, 
                                      A, size, INPUT);
            }
        }
        break;
        
/*
 * No Dependencies as input
 */
    case PlasmaIPT_NoDep:
    default:
        switch ( odep ) {
        case PlasmaIPT_Panel:
            for (j=0; j<nt; j++) {
                Ap = A + (psiz*j);
                for (i=0; i<mt; i++) {
                    Al = Ap + i*bsiz;
                    QUARK_CORE_cgetrip_f1(
                        plasma->quark, &task_flags,
                        mb, nb, Al, bsiz, 
                        Ap, psiz, INOUT|GATHERV);
                }
            }
            break;
              
        case PlasmaIPT_All:
            nprob = mt*nt;
            for (i=0; i<nprob; i++) {
                QUARK_CORE_cgetrip_f1(plasma->quark, &task_flags,
                                      mb, nb, &(A[ i*bsiz ]), bsiz, 
                                      A, size, INOUT|GATHERV);
            }
            break;
        case PlasmaIPT_NoDep:
        default:
            nprob = mt*nt;
            for (i=0; i<nprob; i++) {
                QUARK_CORE_cgetrip(plasma->quark, &task_flags,
                                   mb, nb,  &(A[ i*bsiz ]), bsiz);
            }
        }
    }
}

Here is the call graph for this function:

Functions

Detailed Description

Function Documentation