#include <stdlib.h>
#include <sys/types.h>
#include "common.h"

Include dependency graph for pdpack.c:

Functions
void	plasma_pdpack (plasma_context_t *plasma)
void	plasma_pdunpack (plasma_context_t *plasma)

Detailed Description

PLASMA InPlaceTransformation module PLASMA is a software package provided by Univ. of Tennessee, Univ. of California Berkeley and Univ. of Colorado Denver

This work is the implementation of an inplace transformation based on the GKK algorithm by Gustavson, Karlsson, Kagstrom and its fortran implementation.

Version:: 2.4.5

Author:: Mathieu Faverge

Date:: 2010-11-15

d Tue Nov 22 14:35:42 2011

Definition in file pdpack.c.

Function Documentation

void plasma_pdpack ( plasma_context_t * plasma )

plasma_pdpack pack all extra elements at the end of the matrix

 +&mdash;&mdash;&mdash;&mdash;&mdash;+
 |               |
 |               |
 |     A11       |
 |               |
 |               |
 +&mdash;&mdash;&mdash;&mdash;&mdash;+
 |     A21       |
 +&mdash;&mdash;&mdash;&mdash;&mdash;+

This matrix is initially stored as (example of Column Major, it's the same for row major. We just consider the transpose matrix) : A11(:,0), A21(:,0), A11(:,1), A21(:,1), ...

On exit, it's stored as follow. A11(:,:), A12(:,:)

Parameters:

[in]	plasma	Plasma context
[in]	m	Number of rows in matrix A
[in]	n	Number of columns in matrix A
[in,out]	A	Matrix A to pack. (see above for entry and exit format)
[in]	m0	Number of rows of A21

Definition at line 65 of file pdpack.c.

References A, CORE_dlacpy(), min, plasma_barrier(), plasma_private_alloc(), plasma_private_free(), PLASMA_RANK, PLASMA_SIZE, PLASMA_SUCCESS, plasma_unpack_args_6, PlasmaRealDouble, PlasmaUpperLower, plasma_sequence_t::status, and W.

{
    double *A, *W, *Wl;
    PLASMA_sequence *sequence;
    PLASMA_request *request;
    int m, n, m0;
    int i, m1, size, rank, start, end, bs, mod;
    plasma_unpack_args_6(m, n, A, m0, sequence, request);
    if (sequence->status != PLASMA_SUCCESS)
        return;
    /* Quick return */
    if ( n <= 1 )
      return;
    m1 = m - m0;
    size = PLASMA_SIZE;
    rank = PLASMA_RANK;
    mod   = (n-1) % size;
    bs    = (n-1) / size;
    start = rank * bs;
    if ( rank < mod ) {
        bs++;
    }
    start += min( mod, rank );
    W  = (double*)plasma_private_alloc(plasma, (m0*bs), PlasmaRealDouble);
    Wl = (double*)plasma_private_alloc(plasma, m1,      PlasmaRealDouble);
    /* Save leftover pieces that are otherwise going to be overwritten */
    CORE_dlacpy( PlasmaUpperLower, m0, bs, &(A[(int64_t)start*m+m1]), m, W, m0 );
    /* Pack A */
    end = ((n-1) / size) * size + 1;
    for(i=rank+1; i<end; i+=size) {
        memcpy( Wl, &(A[i*m]), m1*sizeof(double));
        plasma_barrier(plasma);
        memcpy( &(A[i*m1]), Wl, m1*sizeof(double));
    }
    if ( rank < (n - end)) {
        i = end + rank;
        memcpy( Wl, &(A[i*m]), m1*sizeof(double));
        plasma_barrier(plasma);
        memcpy( &(A[i*m1]), Wl, m1*sizeof(double));
    }
    else
        plasma_barrier(plasma);
    /* Restore leftover pieces */
    CORE_dlacpy( PlasmaUpperLower, m0, bs, W, m0, &(A[(int64_t)m1*n+start*m0]), m0 );
    plasma_private_free(plasma, W);
    plasma_private_free(plasma, Wl);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void plasma_pdunpack ( plasma_context_t * plasma )

plasma_pdunpack unpack all extra elements from the end of the matrix

 +&mdash;&mdash;&mdash;&mdash;&mdash;+
 |               |
 |               |
 |     A11       |
 |               |
 |               |
 +&mdash;&mdash;&mdash;&mdash;&mdash;+
 |     A21       |
 +&mdash;&mdash;&mdash;&mdash;&mdash;+

This matrix is initially stored as (example of Column Major, it's the same for row major. We just consider the transpose matrix) : A11(:,:), A12(:,:)

On exit, it's stored as follow. A11(:,0), A21(:,0), A11(:,1), A21(:,1), ...