/**
 *
 * @file core_zssssm.c
 *
 *  PLASMA core_blas kernel
 *  PLASMA is a software package provided by Univ. of Tennessee,
 *  Univ. of California Berkeley and Univ. of Colorado Denver
 *
 * @version 2.2.0
 * @author Hatem Ltaief
 * @author Mathieu Faverge
 * @date 2009-11-15
 *
 **/
#include <cblas.h>
#include "common.h"

/***************************************************************************//**
 *
 * @ingroup CORE_PLASMA_Complex64_t
 *
 *  CORE_ztstrf computes an LU factorization of a complex matrix formed
 *  by an upper triangular NB-by-N tile U on top of a M-by-N tile A
 *  using partial pivoting with row interchanges.
 *
 *  This is the right-looking Level 2.5 BLAS version of the algorithm.
 *
 *******************************************************************************
 *
 * @param[in] M1
 *         The number of rows of the tile A1.  M1 >= 0.
 *
 * @param[in] M2
 *         The number of rows of the tile A2.  M2 >= 0.
 *
 * @param[in] NN
 *         The number of columns of the tiles A1 and A2.  NN >= 0.
 *
 * @param[in] IB
 *         The inner-blocking size.  IB >= 0.
 *
 * @param[in] K
 *
 * @param[in,out] A1
 *         On entry, the M1-by-NN tile A1.
 *         On exit, A1 is overwritten by the application of L.
 *
 * @param[in] LDA1
 *         The leading dimension of the array A1.  LDA1 >= max(1,M1).
 *
 * @param[in,out] A2
 *         On entry, the M2-by-NN tile A2.
 *         On exit, A2 is overwritten by the application of L.
 *
 * @param[in] LDA2
 *         The leading dimension of the array A2.  LDA2 >= max(1,M2).
 *
 * @param[in] L1
 *         The NB-by-NB lower triangular tile as returned by CORE_ztstrf.
 *
 * @param[in] LDL1
 *         The leading dimension of the array L1.  LDL1 >= max(1,K).
 *
 * @param[in] L2
 *         The NB-by-NB tile as returned by CORE_ztstrf.
 *
 * @param[in] LDL2
 *         The leading dimension of the array L2.  LDL2 >= max(1,NB).
 *
 * @param[in] IPIV
 *         as returned by CORE_ztstrf.
 *
 *******************************************************************************
 *
 * @return
 *         \retval PLASMA_SUCCESS successful exit
 *         \retval <0 if INFO = -k, the k-th argument had an illegal value
 *
 ******************************************************************************/
int CORE_zssssm(int M1, int M2, int NN, int IB, int K,
                PLASMA_Complex64_t *A1, int LDA1,
                PLASMA_Complex64_t *A2, int LDA2,
                PLASMA_Complex64_t *L1, int LDL1,
                PLASMA_Complex64_t *L2, int LDL2,
                int *IPIV)
{
    static PLASMA_Complex64_t zone  = 1.0;
    static PLASMA_Complex64_t mzone =-1.0;

    int i, ii, sb;
    int im, ip;

    /*
     * Check input arguments
     */
    if (M1 < 0) {
        plasma_error("CORE_zssssm", "illegal value of M1");
        return -1;
    }
    if (M2 < 0) {
        plasma_error("CORE_zssssm", "illegal value of M2");
        return -2;
    }
    if (NN < 0) {
        plasma_error("CORE_zssssm", "illegal value of NN");
        return -3;
    }
    if (IB < 0) {
        plasma_error("CORE_zssssm", "illegal value of IB");
        return -4;
    }
    if (K < 0) {
        plasma_error("CORE_zssssm", "illegal value of K");
        return -5;
    }

    /*
     * Quick return
     */
    if ( (M1 == 0) || (M2 == 0) || (NN == 0) || (IB == 0) || (K == 0) )
        return PLASMA_SUCCESS;

    ip = 0;

    for(ii=0; ii<K; ii+=IB) {
        sb = min( K-ii, IB );

        for(i=0; i<IB; i++) {
            im = IPIV[ip]-1;

            if (im != (ii+i)) {
                im = im - M1;
                cblas_zswap(NN, &A1[ii+i], LDA1,
                            &A2[im], LDA2 );
            }
            ip = ip + 1;
        }

        cblas_ztrsm(CblasColMajor, CblasLeft, CblasLower,
                    CblasNoTrans, CblasUnit,
                    sb, NN, CBLAS_SADDR(zone),
                    &L1[LDL1*ii], LDL1,
                    &A1[ii],      LDA1);

        cblas_zgemm(CblasColMajor, CblasNoTrans, CblasNoTrans,
                    M2, NN, sb,
                    CBLAS_SADDR(mzone), &L2[LDL2*ii], LDL2,
                    &A1[ii], LDA1,
                    CBLAS_SADDR(zone), A2, LDA2);
    }
    return PLASMA_SUCCESS;
}

/***************************************************************************//**
 *
 **/
void CORE_zssssm_quark(Quark* quark)
{
    int M1;
    int M2;
    int NN;
    int IB;
    int K;
    PLASMA_Complex64_t *A1;
    int LDA1;
    PLASMA_Complex64_t *A2;
    int LDA2;
    PLASMA_Complex64_t *L1;
    int LDL1;
    PLASMA_Complex64_t *L2;
    int LDL2;
    int *IPIV;

    quark_unpack_args_14(quark, M1, M2, NN, IB, K, A1, LDA1, A2, LDA2, L1, LDL1, L2, LDL2, IPIV);
    CORE_zssssm(M1, M2, NN, IB, K, A1, LDA1, A2, LDA2, L1, LDL1, L2, LDL2, IPIV);
}
