/*////////////////////////////////////////////////////////////////////////////////////////
 *  -- PLASMA --
 *     University of Tennessee
 */
#include "common.h"
#include "allocate.h"
#include "auxiliary.h"

#include <malloc.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <unistd.h>
#include <stdio.h>

/*////////////////////////////////////////////////////////////////////////////////////////
 *  Allocate auxiliary structures WORK[cores_max][NB_max^2] and TAU[cores_max][NB_max]
 */
int plasma_alloc_aux_work_tau()
{
    int size_elems;
    int size_bytes;
    int mem_size;
    double *mem_block;
    int core;

    /* Allocate cache line aligned workspace of size NB_max^2 for each core */
    size_elems = plasma_cntrl.NB_max * plasma_cntrl.NB_max;
    size_bytes = size_elems * sizeof(double);
    size_bytes = roundup(size_bytes, CACHE_LINE_SIZE);
    mem_size = size_bytes * plasma_cntrl.cores_max;

    mem_block = (double*)memalign(CACHE_LINE_SIZE, mem_size);
    if (mem_block == NULL) {
        plasma_error("plasma_alloc_aux", "out of memory");
        return PLASMA_ERR_OUT_OF_MEMORY;
    }
    for (core = 0; core < plasma_cntrl.cores_max; core++) {
        plasma_aux.WORK[core] = mem_block;
        mem_block += size_elems;
    }

    /* Allocate cache line aligned workspace of size NB_max for each core */
    size_elems = plasma_cntrl.NB_max;
    size_bytes = size_elems * sizeof(double);
    size_bytes = roundup(size_bytes, CACHE_LINE_SIZE);
    mem_size = size_bytes * plasma_cntrl.cores_max;

    mem_block = (double*)memalign(CACHE_LINE_SIZE, mem_size);
    if (mem_block == NULL) {
        plasma_error("plasma_alloc_aux", "out of memory");
        return PLASMA_ERR_OUT_OF_MEMORY;
    }
    for (core = 0; core < plasma_cntrl.cores_max; core++) {
        plasma_aux.TAU[core] = mem_block;
        mem_block += size_elems;
    }
    return PLASMA_SUCCESS;
}

/*////////////////////////////////////////////////////////////////////////////////////////
 *  Free auxiliary structures WORK[cores_max][NB_max^2] and TAU[cores_max][NB_max]
 */
int plasma_free_aux_work_tau()
{
    if (plasma_aux.WORK[0] == NULL) {
        plasma_error("plasma_free_aux_work_tau", "attempting to free null pointer");
        return PLASMA_ERR_UNALLOCATED;
    }
    free(plasma_aux.WORK[0]);

    if (plasma_aux.TAU[0] == NULL) {
        plasma_error("plasma_free_aux_work_tau", "attempting to free null pointer");
        return PLASMA_ERR_UNALLOCATED;
    }
    free(plasma_aux.TAU[0]);
    return PLASMA_SUCCESS;
}

/*////////////////////////////////////////////////////////////////////////////////////////
 *  Allocate auxiliary structure progress
 */
int plasma_alloc_aux_progress(PLASMA_long size_elems)
{
    PLASMA_long size_bytes;
    int *mem_block;

    size_bytes = size_elems * sizeof(int);
    size_bytes = roundup(size_bytes, plasma_cntrl.page_size);
    mem_block = (int*)memalign(plasma_cntrl.page_size, size_bytes);
    if (mem_block == NULL) {
        plasma_error("plasma_alloc_aux_progress", "out of memory");
        return PLASMA_ERR_OUT_OF_MEMORY;
    }
    plasma_aux.progress = mem_block;
    plasma_cntrl.progress_size_elems = size_bytes / sizeof(int);
    return PLASMA_SUCCESS;
}

/*////////////////////////////////////////////////////////////////////////////////////////
 *  Free auxiliary structure progress
 */
int plasma_free_aux_progress()
{
    if (plasma_aux.progress == NULL) {
        plasma_error("plasma_free_aux_progress", "attempting to free null pointer");
        return PLASMA_ERR_UNALLOCATED;
    }
    free((void*)plasma_aux.progress);
    plasma_cntrl.progress_size_elems = 0;
    return PLASMA_SUCCESS;
}

/*////////////////////////////////////////////////////////////////////////////////////////
 *  Allocate storage for Block Data Layout
 */
int plasma_alloc_aux_bdl(PLASMA_long size_elems, PLASMA_bool warning)
{
    PLASMA_long size_bytes;
    PLASMA_long byte;
    double *mem_block;
    int status;
    int fmem;

    /* Attempt allocation in huge pages */
    if (plasma_cntrl.huge_pages_total > 0) {
        size_bytes = size_elems * sizeof(double);
        size_bytes = roundup(size_bytes, plasma_cntrl.huge_page_size);
        if (size_bytes <= plasma_cntrl.huge_pages_total * plasma_cntrl.huge_page_size) {
            if (size_bytes <= plasma_cntrl.huge_pages_free * plasma_cntrl.huge_page_size) {
                fmem = open(HUGE_PAGE_FILE_NAME, O_CREAT | O_RDWR, 0755);
                if (fmem != -1) {
                    mem_block = (double*)mmap(0, size_bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE, fmem, 0);
                    if (mem_block != MAP_FAILED) {
                        plasma_aux.bdl_mem = mem_block;
                        plasma_cntrl.huge_page_fmem = fmem;
                        plasma_cntrl.bdl_huge_pages = PLASMA_TRUE;
                        plasma_cntrl.bdl_size_elems = size_bytes / sizeof(double);
                        /* Fault-in huge pages */
                        for (byte = 0; byte < size_bytes; byte += plasma_cntrl.huge_page_size)
                            ((char*)mem_block)[byte] = 0;
                        return PLASMA_SUCCESS;
                    }
                    else {
                        plasma_warning("plasma_alloc_aux_bdl", "mmap() failed");
                        status = close(fmem);
                        if (status != 0) {
                            plasma_error("plasma_alloc_aux_bdl", "close() failed");
                        }
                        else {
                            status = remove(HUGE_PAGE_FILE_NAME);
                            if (status != 0) {
                                plasma_error("plasma_alloc_aux_bdl", "remove() failed");
                            }
                        }
                    }
                }
                else {
                    plasma_warning("plasma_alloc_aux_bdl", "failed to open huge page file");
                }
            }
            else {
                if (warning)
                    plasma_warning("plasma_alloc_aux_bdl", "not enough free huge pages");
            }
        }
        else {
            if (warning)
                plasma_warning("plasma_alloc_aux_bdl", "not enough total huge pages");
        }
    }
    else {
        if (warning)
            plasma_warning("plasma_alloc_aux_bdl", "huge pages not available");
    }
    if (warning)
        plasma_warning("plasma_alloc_aux_bdl", "failed to allocate huge pages");

    /* Allocate in standard pages */
    plasma_cntrl.bdl_huge_pages = PLASMA_FALSE;
    size_bytes = size_elems * sizeof(double);
    size_bytes = roundup(size_bytes, plasma_cntrl.page_size);
    mem_block = (double*)memalign(plasma_cntrl.page_size, size_bytes);
    if (mem_block != NULL) {
        plasma_aux.bdl_mem = mem_block;
        plasma_cntrl.bdl_size_elems = size_bytes / sizeof(double);
        return PLASMA_SUCCESS;
    }
    return PLASMA_ERR_OUT_OF_MEMORY;
}

/*////////////////////////////////////////////////////////////////////////////////////////
 *  Free storage of Block Data Layout
 */
int plasma_free_aux_bdl()
{
    int status;

    if (plasma_cntrl.bdl_huge_pages) {
        if (plasma_aux.bdl_mem == MAP_FAILED) {
            plasma_error("plasma_free_aux_bdl", "attempting to free unmapped memory");
            return PLASMA_ERR_UNALLOCATED;
        }
        status = munmap(plasma_aux.bdl_mem, plasma_cntrl.bdl_size_elems * sizeof(double));
        if (status != 0) {
            plasma_error("plasma_free_aux_bdl", "munmap() failed");
        }
        else {
            status = close(plasma_cntrl.huge_page_fmem);
            if (status != 0) {
                plasma_error("plasma_free_aux_bdl", "close() failed");
            }
            else {
                status = remove(HUGE_PAGE_FILE_NAME);
                if (status != 0)
                    plasma_error("plasma_free_aux_bdl", "remove() failed");
            }
        }
    }
    else {
        if (plasma_aux.bdl_mem == NULL) {
            plasma_error("plasma_free_aux_bdl", "attempting to free null pointer");
            return PLASMA_ERR_UNALLOCATED;
        }
        free(plasma_aux.bdl_mem);
    }
    plasma_cntrl.bdl_size_elems = 0;
    return PLASMA_SUCCESS;
}

/*////////////////////////////////////////////////////////////////////////////////////////
 *  Allocate user's storage for T
 */
double *plasma_Allocate_T(int M, int N)
{
    int status;
    int NB, MT, NT;
    double *T;

    /* Check if initialized */
    if (!plasma_cntrl.initialized) {
        plasma_warning("plasma_allocate_T", "PLASMA not initialized");
        return NULL;
    }

    /* Tune NB & IB depending on M, N & NRHS; Set NBNBSIZE */
    status = plasma_tune(PLASMA_TUNE_DGELS, M, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("plasma_allocate_T", "plasma_tune() failed");
        return NULL;
    }

    /* Set MT & NT */
    NB = plasma_cntrl.NB;
    NT = (N%NB==0) ? (N/NB) : (N/NB+1);
    MT = (M%NB==0) ? (M/NB) : (M/NB+1);

    T = (double*)memalign(plasma_cntrl.page_size, MT*NT*plasma_cntrl.IBNBSIZE*sizeof(double));
    if (T == NULL) {
        plasma_error("plasma_allocate_T", "out of memory");
        return NULL;
    }
    return T;
}

/*////////////////////////////////////////////////////////////////////////////////////////
 *  Free user's storage for T
 */
int plasma_Free_T(double *T)
{
    /* Check if initialized */
    if (!plasma_cntrl.initialized) {
        plasma_warning("plasma_free_T", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }

    if (T == NULL) {
        plasma_error("plasma_free_T", "attempting to free null pointer");
        return PLASMA_ERR_UNALLOCATED;
    }
    free((void*)T);
    return PLASMA_SUCCESS;
}

/*////////////////////////////////////////////////////////////////////////////////////////
 *  Allocate user's storage for L
 */
double *plasma_Allocate_L(int M, int N)
{
    int status;
    int NB, MT, NT;
    double *L;

    /* Check if initialized */
    if (!plasma_cntrl.initialized) {
        plasma_warning("plasma_allocate_L", "PLASMA not initialized");
        return NULL;
    }

    /* Tune NB & IB depending on M, N & NRHS; Set NBNBSIZE */
    status = plasma_tune(PLASMA_TUNE_DGESV, M, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("plasma_allocate_L", "plasma_tune() failed");
        return NULL;
    }

    /* Set MT & NT */
    NB = plasma_cntrl.NB;
    NT = (N%NB==0) ? (N/NB) : (N/NB+1);
    MT = (M%NB==0) ? (M/NB) : (M/NB+1);

    L = (double*)memalign(plasma_cntrl.page_size, MT*NT*plasma_cntrl.IBNBSIZE*sizeof(double));
    if (L == NULL) {
        plasma_error("plasma_allocate_L", "out of memory");
        return NULL;
    }
    return L;
}

/*////////////////////////////////////////////////////////////////////////////////////////
 *  Free user's storage for L
 */
int plasma_Free_L(double *L)
{
    /* Check if initialized */
    if (!plasma_cntrl.initialized) {
        plasma_warning("plasma_free_L", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }

    if (L == NULL) {
        plasma_error("plasma_free_L", "attempting to free null pointer");
        return PLASMA_ERR_UNALLOCATED;
    }
    free((void*)L);
    return PLASMA_SUCCESS;
}

/*////////////////////////////////////////////////////////////////////////////////////////
 *  Allocate user's storage for IPIV
 */
int *plasma_Allocate_IPIV(int M, int N)
{
    int status;
    int NB, MT, NT;
    int *IPIV;

    /* Check if initialized */
    if (!plasma_cntrl.initialized) {
        plasma_warning("plasma_allocate_IPIV", "PLASMA not initialized");
        return NULL;
    }

    /* Tune NB & IB depending on M, N & NRHS; Set NBNBSIZE */
    status = plasma_tune(PLASMA_TUNE_DGESV, M, N, 0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("plasma_allocate_IPIV", "plasma_tune() failed");
        return NULL;
    }

    /* Set MT & NT */
    NB = plasma_cntrl.NB;
    NT = (N%NB==0) ? (N/NB) : (N/NB+1);
    MT = (M%NB==0) ? (M/NB) : (M/NB+1);

    IPIV = (int*)memalign(plasma_cntrl.page_size, (MT*NB)*NT*sizeof(int));
    if (IPIV == NULL) {
        plasma_error("plasma_allocate_IPIV", "out of memory");
        return NULL;
    }
    return IPIV;
}

/*////////////////////////////////////////////////////////////////////////////////////////
 *  Free user's storage for IPIV
 */
int plasma_Free_IPIV(int *IPIV)
{
    /* Check if initialized */
    if (!plasma_cntrl.initialized) {
        plasma_warning("plasma_free_IPIV", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }

    if (IPIV == NULL) {
        plasma_error("plasma_free_IPIV", "attempting to free null pointer");
        return PLASMA_ERR_UNALLOCATED;
    }
    free((void*)IPIV);
    return PLASMA_SUCCESS;
}
