/* ///////////////////////////// P /// L /// A /// S /// M /// A /////////////////////////////// */
/* ///                    PLASMA auxiliary routines (version 2.1.0)                          ///
 * ///                    Author: Jakub Kurzak                                               ///
 * ///                    Release Date: November, 15th 2009                                  ///
 * ///                    PLASMA is a software package provided by Univ. of Tennessee,       ///
 * ///                    Univ. of California Berkeley and Univ. of Colorado Denver          /// */
/* ///////////////////////////////////////////////////////////////////////////////////////////// */
#include "common.h"
#include "auxiliary.h"
#include "tile.h"

/* ///////////////////////////////////////////////////////////////////////////////////////////// */
//  Conversion from LAPACK F77 matrix layout to tile layout
void plasma_lapack_to_tile(plasma_context_t *plasma)
{
    PLASMA_desc A;
    void *Af77;
    int lda;

    int x, y;
    int X1, Y1;
    int X2, Y2;
    int n, m;
    int next_m;
    int next_n;

    plasma_unpack_args_3(Af77, lda, A);

    n = 0;
    m = PLASMA_RANK;
    while (m >= A.mt && n < A.nt) {
        n++;
        m = m-A.mt;
    }

    while (n < A.nt) {
        next_m = m;
        next_n = n;

        next_m += PLASMA_SIZE;
        while (next_m >= A.mt && next_n < A.nt) {
            next_n++;
            next_m = next_m-A.mt;
        }

        X1 = n == 0 ? A.j%A.nb : 0;
        Y1 = m == 0 ? A.i%A.nb : 0;
        X2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
        Y2 = m == A.mt-1 ? (A.i+A.m-1)%A.nb+1 : A.nb;
        switch (A.dtyp) {
            case PlasmaComplexDouble: {
                PLASMA_Complex64_t *f77 = &((PLASMA_Complex64_t*)Af77)[A.nb*(lda*n+m)];
                PLASMA_Complex64_t *bdl = &((PLASMA_Complex64_t*)A.mat)[A.bsiz*(A.lmt*(n+A.j/A.nb)+(m+A.i/A.nb))];
                for (x = X1; x < X2; x++)
                    for (y = Y1; y < Y2; y++)
                        bdl[A.nb*x+y] = f77[lda*x+y];
            }
            break;
            case PlasmaComplexFloat: {
                PLASMA_Complex32_t *f77 = &((PLASMA_Complex32_t*)Af77)[A.nb*(lda*n+m)];
                PLASMA_Complex32_t *bdl = &((PLASMA_Complex32_t*)A.mat)[A.bsiz*(A.lmt*(n+A.j/A.nb)+(m+A.i/A.nb))];
                for (x = X1; x < X2; x++)
                    for (y = Y1; y < Y2; y++)
                        bdl[A.nb*x+y] = f77[lda*x+y];
            }
            break;
            case PlasmaRealDouble: {
                double *f77 = &((double*)Af77)[A.nb*(lda*n+m)];
                double *bdl = &((double*)A.mat)[A.bsiz*(A.lmt*(n+A.j/A.nb)+(m+A.i/A.nb))];
                for (x = X1; x < X2; x++)
                    for (y = Y1; y < Y2; y++)
                        bdl[A.nb*x+y] = f77[lda*x+y];
            }
            break;
            case PlasmaRealFloat: {
                float *f77 = &((float*)Af77)[A.nb*(lda*n+m)];
                float *bdl = &((float*)A.mat)[A.bsiz*(A.lmt*(n+A.j/A.nb)+(m+A.i/A.nb))];
                for (x = X1; x < X2; x++)
                    for (y = Y1; y < Y2; y++)
                        bdl[A.nb*x+y] = f77[lda*x+y];
            }
            break;
        }
        m = next_m;
        n = next_n;
    }
}

/* ///////////////////////////////////////////////////////////////////////////////////////////// */
//  Conversion from tile layout to LAPACK F77 matrix layout
void plasma_tile_to_lapack(plasma_context_t *plasma)
{
    PLASMA_desc A;
    void *Af77;
    int lda;

    int x, y;
    int X1, Y1;
    int X2, Y2;
    int n, m;
    int next_m;
    int next_n;

    plasma_unpack_args_3(A, Af77, lda);

    n = 0;
    m = PLASMA_RANK;
    while (m >= A.mt && n < A.nt) {
        n++;
        m = m-A.mt;
    }

    while (n < A.nt) {
        next_m = m;
        next_n = n;

        next_m += PLASMA_SIZE;
        while (next_m >= A.mt && next_n < A.nt) {
            next_n++;
            next_m = next_m-A.mt;
        }

        X1 = n == 0 ? A.j%A.nb : 0;
        Y1 = m == 0 ? A.i%A.nb : 0;
        X2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
        Y2 = m == A.mt-1 ? (A.i+A.m-1)%A.nb+1 : A.nb;
        switch (A.dtyp) {
            case PlasmaComplexDouble: {
                PLASMA_Complex64_t *f77 = &((PLASMA_Complex64_t*)Af77)[A.nb*(lda*n+m)];
                PLASMA_Complex64_t *bdl = &((PLASMA_Complex64_t*)A.mat)[A.bsiz*(A.lmt*(n+A.j/A.nb)+(m+A.i/A.nb))];
                for (x = X1; x < X2; x++)
                    for (y = Y1; y < Y2; y++)
                        f77[lda*x+y] = bdl[A.nb*x+y];
            }
            break;
            case PlasmaComplexFloat: {
                PLASMA_Complex32_t *f77 = &((PLASMA_Complex32_t*)Af77)[A.nb*(lda*n+m)];
                PLASMA_Complex32_t *bdl = &((PLASMA_Complex32_t*)A.mat)[A.bsiz*(A.lmt*(n+A.j/A.nb)+(m+A.i/A.nb))];
                for (x = X1; x < X2; x++)
                    for (y = Y1; y < Y2; y++)
                        f77[lda*x+y] = bdl[A.nb*x+y];
            }
            break;
            case PlasmaRealDouble: {
                double *f77 = &((double*)Af77)[A.nb*(lda*n+m)];
                double *bdl = &((double*)A.mat)[A.bsiz*(A.lmt*(n+A.j/A.nb)+(m+A.i/A.nb))];
                for (x = X1; x < X2; x++)
                    for (y = Y1; y < Y2; y++)
                        f77[lda*x+y] = bdl[A.nb*x+y];
            }
            break;
            case PlasmaRealFloat: {
                float *f77 = &((float*)Af77)[A.nb*(lda*n+m)];
                float *bdl = &((float*)A.mat)[A.bsiz*(A.lmt*(n+A.j/A.nb)+(m+A.i/A.nb))];
                for (x = X1; x < X2; x++)
                    for (y = Y1; y < Y2; y++)
                        f77[lda*x+y] = bdl[A.nb*x+y];
            }
            break;
        }
        m = next_m;
        n = next_n;
    }
}

/* ///////////////////////////////////////////////////////////////////////////////////////////// */
//  Zeroes a submatrix in tile layout
void plasma_tile_zero(plasma_context_t *plasma)
{
    PLASMA_desc A;

    int x, y;
    int X1, Y1;
    int X2, Y2;
    int n, m;
    int next_m;
    int next_n;

    plasma_unpack_args_1(A);

    n = 0;
    m = PLASMA_RANK;
    while (m >= A.mt && n < A.nt) {
        n++;
        m = m-A.mt;
    }

    while (n < A.nt) {
        next_m = m;
        next_n = n;

        next_m += PLASMA_SIZE;
        while (next_m >= A.mt && next_n < A.nt) {
            next_n++;
            next_m = next_m-A.mt;
        }

        X1 = n == 0 ? A.j%A.nb : 0;
        Y1 = m == 0 ? A.i%A.nb : 0;
        X2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
        Y2 = m == A.mt-1 ? (A.i+A.m-1)%A.nb+1 : A.nb;
        switch (A.dtyp) {
            case PlasmaComplexDouble: {
                PLASMA_Complex64_t *bdl = &((PLASMA_Complex64_t*)A.mat)[A.bsiz*(A.lmt*(n+A.j/A.nb)+(m+A.i/A.nb))];
                for (x = X1; x < X2; x++)
                    for (y = Y1; y < Y2; y++)
                        bdl[A.nb*x+y] = 0.0;
            }
            break;
            case PlasmaComplexFloat: {
                PLASMA_Complex32_t *bdl = &((PLASMA_Complex32_t*)A.mat)[A.bsiz*(A.lmt*(n+A.j/A.nb)+(m+A.i/A.nb))];
                for (x = X1; x < X2; x++)
                    for (y = Y1; y < Y2; y++)
                        bdl[A.nb*x+y] = 0.0;
            }
            break;
            case PlasmaRealDouble: {
                double *bdl = &((double*)A.mat)[A.bsiz*(A.lmt*(n+A.j/A.nb)+(m+A.i/A.nb))];
                for (x = X1; x < X2; x++)
                    for (y = Y1; y < Y2; y++)
                        bdl[A.nb*x+y] = 0.0;
            }
            break;
            case PlasmaRealFloat: {
                float *bdl = &((float*)A.mat)[A.bsiz*(A.lmt*(n+A.j/A.nb)+(m+A.i/A.nb))];
                for (x = X1; x < X2; x++)
                    for (y = Y1; y < Y2; y++)
                        bdl[A.nb*x+y] = 0.0;
            }
            break;
        }
        m = next_m;
        n = next_n;
    }
}

/* /////////////////////////// P /// U /// R /// P /// O /// S /// E /////////////////////////// */
// PLASMA_Lapack_to_Tile - Conversion from LAPACK layout to tile layout.

/* ///////////////////// A /// R /// G /// U /// M /// E /// N /// T /// S ///////////////////// */
// Af77     void* (IN)
//          LAPACK matrix.
//
// LDA      int (IN)
//          The leading dimension of the matrix Af77.
//
// A        PLASMA_desc* (OUT)
//          Descriptor of the PLASMA matrix in tile layout.

/* ///////////// R /// E /// T /// U /// R /// N /////// V /// A /// L /// U /// E ///////////// */
//          = PLASMA_SUCCESS: successful exit

/* //////////////////////////////////// C /// O /// D /// E //////////////////////////////////// */
int PLASMA_Lapack_to_Tile(void *Af77, int LDA, PLASMA_desc *A)
{
    PLASMA_desc descA = *A;
    plasma_context_t *plasma;

    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_Lapack_to_Tile", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check descriptor for correctness */
    if (plasma_desc_check(&descA) != PLASMA_SUCCESS) {
        plasma_error("PLASMA_Lapack_to_Tile", "invalid descriptor");
        return PLASMA_ERR_ILLEGAL_VALUE;
    }
    plasma_parallel_call_3(plasma_lapack_to_tile,
        void*, Af77,
        int, LDA,
        PLASMA_desc, descA);

    return PLASMA_SUCCESS;
}

/* /////////////////////////// P /// U /// R /// P /// O /// S /// E /////////////////////////// */
// PLASMA_Tile_to_Lapack - Conversion from tile layout to LAPACK layout

/* ///////////////////// A /// R /// G /// U /// M /// E /// N /// T /// S ///////////////////// */
// A        PLASMA_desc* (OUT)
//          Descriptor of the PLASMA matrix in tile layout.
//
// Af77     void* (IN)
//          LAPACK matrix.
//
// LDA      int (IN)
//          The leading dimension of the matrix Af77.

/* ///////////// R /// E /// T /// U /// R /// N /////// V /// A /// L /// U /// E ///////////// */
//          = PLASMA_SUCCESS: successful exit

/* //////////////////////////////////// C /// O /// D /// E //////////////////////////////////// */
int PLASMA_Tile_to_Lapack(PLASMA_desc *A, void *Af77, int LDA)
{
    PLASMA_desc descA = *A;
    plasma_context_t *plasma;

    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_Tile_to_Lapack", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check descriptor for correctness */
    if (plasma_desc_check(&descA) != PLASMA_SUCCESS) {
        plasma_error("PLASMA_Tile_to_Lapack", "invalid descriptor");
        return PLASMA_ERR_ILLEGAL_VALUE;
    }
    plasma_parallel_call_3(plasma_tile_to_lapack,
        PLASMA_desc, descA,
        PLASMA_Complex64_t*, Af77,
        int, LDA);

    return PLASMA_SUCCESS;
}
