/**
 *
 * @file tile.c
 *
 *  PLASMA auxiliary routines
 *  PLASMA is a software package provided by Univ. of Tennessee,
 *  Univ. of California Berkeley and Univ. of Colorado Denver
 *
 * @version 2.2.0
 * @author Jakub Kurzak
 * @date 2009-11-15
 *
 **/
#include "common.h"
#include "auxiliary.h"
#include "tile.h"

#define AF77(type, m, n) &(((type*)Af77)[(int64_t)A.nb*((int64_t)lda*(int64_t)n+(int64_t)m)])
#define ABDL(type, m, n) &(((type*)A.mat)[(int64_t)A.bsiz*((int64_t)A.lmt*(int64_t)(n+A.j/A.nb)+(int64_t)(m+A.i/A.nb))])

/***************************************************************************//**
 *  Conversion from LAPACK F77 matrix layout to tile layout
 **/
void plasma_lapack_to_tile(plasma_context_t *plasma)
{
    PLASMA_desc A;
    void *Af77;
    int lda;
    PLASMA_sequence *sequence;
    PLASMA_request *request;

    int x, y;
    int X1, Y1;
    int X2, Y2;
    int n, m;
    int next_m;
    int next_n;

    plasma_unpack_args_5(Af77, lda, A, sequence, request);
    if (sequence->status != PLASMA_SUCCESS)
        return;

    n = 0;
    m = PLASMA_RANK;
    while (m >= A.mt && n < A.nt) {
        n++;
        m = m-A.mt;
    }

    while (n < A.nt) {
        next_m = m;
        next_n = n;

        next_m += PLASMA_SIZE;
        while (next_m >= A.mt && next_n < A.nt) {
            next_n++;
            next_m = next_m-A.mt;
        }

        X1 = n == 0 ? A.j%A.nb : 0;
        Y1 = m == 0 ? A.i%A.nb : 0;
        X2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
        Y2 = m == A.mt-1 ? (A.i+A.m-1)%A.nb+1 : A.nb;
        switch (A.dtyp) {
            case PlasmaComplexDouble: {
                PLASMA_Complex64_t *f77 = AF77(PLASMA_Complex64_t, m, n);
                PLASMA_Complex64_t *bdl = ABDL(PLASMA_Complex64_t, m, n);
                for (x = X1; x < X2; x++)
                    for (y = Y1; y < Y2; y++)
                        bdl[A.nb*x+y] = f77[lda*x+y];
            }
            break;
            case PlasmaComplexFloat: {
                PLASMA_Complex32_t *f77 = AF77(PLASMA_Complex32_t, m, n);
                PLASMA_Complex32_t *bdl = ABDL(PLASMA_Complex32_t, m, n);
                for (x = X1; x < X2; x++)
                    for (y = Y1; y < Y2; y++)
                        bdl[A.nb*x+y] = f77[lda*x+y];
            }
            break;
            case PlasmaRealDouble: {
                double *f77 = AF77(double, m, n);
                double *bdl = ABDL(double, m, n);
                for (x = X1; x < X2; x++)
                    for (y = Y1; y < Y2; y++)
                        bdl[A.nb*x+y] = f77[lda*x+y];
            }
            break;
            case PlasmaRealFloat: {
                float *f77 = AF77(float, m, n);
                float *bdl = ABDL(float, m, n);
                for (x = X1; x < X2; x++)
                    for (y = Y1; y < Y2; y++)
                        bdl[A.nb*x+y] = f77[lda*x+y];
            }
            break;
        }
        m = next_m;
        n = next_n;
    }
}

/***************************************************************************//**
 *  Conversion from tile layout to LAPACK F77 matrix layout
 **/
void plasma_tile_to_lapack(plasma_context_t *plasma)
{
    PLASMA_desc A;
    void *Af77;
    int lda;
    PLASMA_sequence *sequence;
    PLASMA_request *request;

    int x, y;
    int X1, Y1;
    int X2, Y2;
    int n, m;
    int next_m;
    int next_n;

    plasma_unpack_args_5(A, Af77, lda, sequence, request);
    if (sequence->status != PLASMA_SUCCESS)
        return;

    n = 0;
    m = PLASMA_RANK;
    while (m >= A.mt && n < A.nt) {
        n++;
        m = m-A.mt;
    }

    while (n < A.nt) {
        next_m = m;
        next_n = n;

        next_m += PLASMA_SIZE;
        while (next_m >= A.mt && next_n < A.nt) {
            next_n++;
            next_m = next_m-A.mt;
        }

        X1 = n == 0 ? A.j%A.nb : 0;
        Y1 = m == 0 ? A.i%A.nb : 0;
        X2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
        Y2 = m == A.mt-1 ? (A.i+A.m-1)%A.nb+1 : A.nb;
        switch (A.dtyp) {
            case PlasmaComplexDouble: {
                PLASMA_Complex64_t *f77 = AF77(PLASMA_Complex64_t, m, n);
                PLASMA_Complex64_t *bdl = ABDL(PLASMA_Complex64_t, m, n);
                for (x = X1; x < X2; x++)
                    for (y = Y1; y < Y2; y++)
                        f77[lda*x+y] = bdl[A.nb*x+y];
            }
            break;
            case PlasmaComplexFloat: {
                PLASMA_Complex32_t *f77 = AF77(PLASMA_Complex32_t, m, n);
                PLASMA_Complex32_t *bdl = ABDL(PLASMA_Complex32_t, m, n);
                for (x = X1; x < X2; x++)
                    for (y = Y1; y < Y2; y++)
                        f77[lda*x+y] = bdl[A.nb*x+y];
            }
            break;
            case PlasmaRealDouble: {
                double *f77 = AF77(double, m, n);
                double *bdl = ABDL(double, m, n);
                for (x = X1; x < X2; x++)
                    for (y = Y1; y < Y2; y++)
                        f77[lda*x+y] = bdl[A.nb*x+y];
            }
            break;
            case PlasmaRealFloat: {
                float *f77 = AF77(float, m, n);
                float *bdl = ABDL(float, m, n);
                for (x = X1; x < X2; x++)
                    for (y = Y1; y < Y2; y++)
                        f77[lda*x+y] = bdl[A.nb*x+y];
            }
            break;
        }
        m = next_m;
        n = next_n;
    }
}

/***************************************************************************//**
 *  Zeroes a submatrix in tile layout
 **/
void plasma_tile_zero(plasma_context_t *plasma)
{
    PLASMA_desc A;
    PLASMA_sequence *sequence;
    PLASMA_request *request;

    int x, y;
    int X1, Y1;
    int X2, Y2;
    int n, m;
    int next_m;
    int next_n;

    plasma_unpack_args_3(A, sequence, request);
    if (sequence->status != PLASMA_SUCCESS)
        return;

    n = 0;
    m = PLASMA_RANK;
    while (m >= A.mt && n < A.nt) {
        n++;
        m = m-A.mt;
    }

    while (n < A.nt) {
        next_m = m;
        next_n = n;

        next_m += PLASMA_SIZE;
        while (next_m >= A.mt && next_n < A.nt) {
            next_n++;
            next_m = next_m-A.mt;
        }

        X1 = n == 0 ? A.j%A.nb : 0;
        Y1 = m == 0 ? A.i%A.nb : 0;
        X2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
        Y2 = m == A.mt-1 ? (A.i+A.m-1)%A.nb+1 : A.nb;
        switch (A.dtyp) {
            case PlasmaComplexDouble: {
                PLASMA_Complex64_t *bdl = ABDL(PLASMA_Complex64_t, m, n);
                for (x = X1; x < X2; x++)
                    for (y = Y1; y < Y2; y++)
                        bdl[A.nb*x+y] = 0.0;
            }
            break;
            case PlasmaComplexFloat: {
                PLASMA_Complex32_t *bdl = ABDL(PLASMA_Complex32_t, m, n);
                for (x = X1; x < X2; x++)
                    for (y = Y1; y < Y2; y++)
                        bdl[A.nb*x+y] = 0.0;
            }
            break;
            case PlasmaRealDouble: {
                double *bdl = ABDL(double, m, n);
                for (x = X1; x < X2; x++)
                    for (y = Y1; y < Y2; y++)
                        bdl[A.nb*x+y] = 0.0;
            }
            break;
            case PlasmaRealFloat: {
                float *bdl = ABDL(float, m, n);
                for (x = X1; x < X2; x++)
                    for (y = Y1; y < Y2; y++)
                        bdl[A.nb*x+y] = 0.0;
            }
            break;
        }
        m = next_m;
        n = next_n;
    }
}

/***************************************************************************//**
 *
 * @ingroup Auxiliary
 *
 *  PLASMA_Lapack_to_Tile - Conversion from LAPACK layout to tile layout.
 *
 *******************************************************************************
 *
 * @param[in] Af77
 *          LAPACK matrix.
 *
 * @param[in] LDA
 *          The leading dimension of the matrix Af77.
 *
 * @param[out] A
 *          Descriptor of the PLASMA matrix in tile layout.
 *
 *******************************************************************************
 *
 * @return
 *          \retval PLASMA_SUCCESS successful exit
 *
 ******************************************************************************/
int PLASMA_Lapack_to_Tile(void *Af77, int LDA, PLASMA_desc *A)
{
    PLASMA_desc descA = *A;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request;
    int status;

    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_Lapack_to_Tile", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check descriptor for correctness */
    if (plasma_desc_check(&descA) != PLASMA_SUCCESS) {
        plasma_error("PLASMA_Lapack_to_Tile", "invalid descriptor");
        return PLASMA_ERR_ILLEGAL_VALUE;
    }
    plasma_sequence_create(plasma, &sequence);
    plasma_static_call_5(plasma_lapack_to_tile,
        void*, Af77,
        int, LDA,
        PLASMA_desc, descA,
        PLASMA_sequence*, sequence,
        PLASMA_request*, &request);
    plasma_dynamic_sync();
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}

/***************************************************************************//**
 *
 * @ingroup Auxiliary
 *
 *  PLASMA_Tile_to_Lapack - Conversion from tile layout to LAPACK layout.
 *
 *******************************************************************************
 *
 * @param[out] A
 *          Descriptor of the PLASMA matrix in tile layout.
 *
 * @param[in] Af77
 *          LAPACK matrix.
 *
 * @param[in] LDA
 *          The leading dimension of the matrix Af77.
 *
 *******************************************************************************
 *
 * @return
 *          \retval PLASMA_SUCCESS successful exit
 *
 ******************************************************************************/
int PLASMA_Tile_to_Lapack(PLASMA_desc *A, void *Af77, int LDA)
{
    PLASMA_desc descA = *A;
    plasma_context_t *plasma;
    PLASMA_sequence *sequence = NULL;
    PLASMA_request request;
    int status;

    plasma = plasma_context_self();
    if (plasma == NULL) {
        plasma_fatal_error("PLASMA_Tile_to_Lapack", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }
    /* Check descriptor for correctness */
    if (plasma_desc_check(&descA) != PLASMA_SUCCESS) {
        plasma_error("PLASMA_Tile_to_Lapack", "invalid descriptor");
        return PLASMA_ERR_ILLEGAL_VALUE;
    }
    plasma_sequence_create(plasma, &sequence);
    plasma_static_call_5(plasma_tile_to_lapack,
        PLASMA_desc, descA,
        PLASMA_Complex64_t*, Af77,
        int, LDA,
        PLASMA_sequence*, sequence,
        PLASMA_request*, &request);
    plasma_dynamic_sync();
    status = sequence->status;
    plasma_sequence_destroy(plasma, sequence);
    return status;
}
