/*////////////////////////////////////////////////////////////////////////////////////////
 *  -- PLASMA --
 *     University of Tennessee
 */
#include "common.h"
#include "initialize.h"
#include "auxiliary.h"
#include "allocate.h"
#include "barrier.h"
#include "core_control.h"

#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <pthread.h>

/*////////////////////////////////////////////////////////////////////////////////////////
 *  Get memory information
 */
int plasma_get_meminfo()
{
    FILE *shell_cmd_fp;
    char sh_cmd_out[SHELL_CMD_OUT_MAX];
    char *fgets_status;
    int pclose_status;

    /* Set cache line size */
    plasma_cntrl.cache_line_size = CACHE_LINE_SIZE;

    /* Get standard page size */
    plasma_cntrl.page_size = sysconf(_SC_PAGESIZE);
    if (plasma_cntrl.page_size <= 0) {
        plasma_error("plasma_get_meminfo", "failed to get page size");
        return PLASMA_ERR_NOT_FOUND;
    }

    /* Find out if huge pages are supported */
    shell_cmd_fp = popen("cat /proc/meminfo | grep Huge", "r");
    if (shell_cmd_fp == NULL) {
        plasma_error("plasma_get_meminfo", "popen() failed");
        return PLASMA_ERR_NOT_SUPPORTED;
    }
    fgets_status = fgets(sh_cmd_out, SHELL_CMD_OUT_MAX, shell_cmd_fp);
    pclose_status = pclose(shell_cmd_fp);
    if (pclose_status == -1) {
        plasma_error("plasma_get_meminfo", "pclose() failed");
        return PLASMA_ERR_FILESYSTEM;
    }
    if (fgets_status != NULL) {
        /* Get total number of huge pages */
        shell_cmd_fp = popen("cat /proc/meminfo | awk '/HugePages_Total/ {print $2}'", "r");
        fgets_status = fgets(sh_cmd_out, SHELL_CMD_OUT_MAX, shell_cmd_fp);
        pclose_status = pclose(shell_cmd_fp);
        if (shell_cmd_fp == NULL || fgets_status == NULL || pclose_status == -1) {
            plasma_error("plasma_get_meminfo", "failed to get total huge pages");
            return PLASMA_ERR_NOT_FOUND;
        }
        plasma_cntrl.huge_pages_total = atoi(sh_cmd_out);

        /* Get number of free huge pages */
        shell_cmd_fp = popen("cat /proc/meminfo | awk '/HugePages_Free/ {print $2}'", "r");
        fgets_status = fgets(sh_cmd_out, SHELL_CMD_OUT_MAX, shell_cmd_fp);
        pclose_status = pclose(shell_cmd_fp);
        if (shell_cmd_fp == NULL || fgets_status == NULL || pclose_status == -1) {
            plasma_error("plasma_get_meminfo", "failed to get free huge pages");
            return PLASMA_ERR_NOT_FOUND;
        }
        plasma_cntrl.huge_pages_free = atoi(sh_cmd_out);

        /* Get huge page size */
        shell_cmd_fp = popen("cat /proc/meminfo | awk '/Hugepagesize/ {print $2}'", "r");
        fgets_status = fgets(sh_cmd_out, SHELL_CMD_OUT_MAX, shell_cmd_fp);
        pclose_status = pclose(shell_cmd_fp);
        if (shell_cmd_fp == NULL || fgets_status == NULL || pclose_status == -1) {
            plasma_error("plasma_get_meminfo", "failed to get huge page size");
            return PLASMA_ERR_NOT_FOUND;
        }
        else {
            plasma_cntrl.huge_page_size = atoi(sh_cmd_out) * 1024;
            if (plasma_cntrl.huge_page_size <= 0) {
                plasma_error("plasma_get_meminfo", "invalid huge page size");
                return PLASMA_ERR_ILLEGAL_VALUE;
            }
        }
    }
    else {
        plasma_cntrl.huge_pages_total = 0;
        plasma_cntrl.huge_pages_free = 0;
        plasma_cntrl.huge_page_size = 0;
    }
    return PLASMA_SUCCESS;
}

/*////////////////////////////////////////////////////////////////////////////////////////
 *  Parallel (per-core) initialization
 */
int plasma_parallel_init(int my_core_id)
{
    plasma_cntrl.thread_id[my_core_id] = pthread_self();
    return PLASMA_SUCCESS;
}

/*////////////////////////////////////////////////////////////////////////////////////////
 *  PLASMA initialization
 */
int plasma_Init(int M, int N, int NRHS)
{
    PLASMA_bool huge_page_warning = PLASMA_TRUE;
    PLASMA_long size_elems;
    PLASMA_long size_bytes;
    int NB, IB, MT, NT, NTRHS;
    int status;
    int core;

    /* Check if not initialized */
    if (plasma_cntrl.initialized) {
        plasma_warning("plasma_init", "PLASMA re-initialized");
        return PLASMA_ERR_REINITIALIZED;
    }

    /* Get system size (number of cores)
       Set number of cores to system size */
    plasma_cntrl.cores_max = sysconf(_SC_NPROCESSORS_ONLN);
    plasma_cntrl.cores_num = plasma_cntrl.cores_max;
    if (plasma_cntrl.cores_max <= 0) {
        plasma_error("plasma_init", "failed to get system size");
        return PLASMA_ERR_NOT_FOUND;
    }
    /* Check if not more cores than the hard limit */
    if (plasma_cntrl.cores_max > CORES_MAX) {
        plasma_error("plasma_init", "not supporting so many cores");
        return PLASMA_ERR_INTERNAL_LIMIT;
    }

    status = plasma_get_meminfo();
    if (status != PLASMA_SUCCESS) {
        plasma_error("plasma_init", "plasma_get_meminfo() failed");
    }

    /* Allocate temporary kernel workspace */
    status = plasma_alloc_aux_work_tau();
    if (status != PLASMA_SUCCESS) {
        plasma_error("plasma_init", "plasma_alloc_work_tau() failed");
        return status;
    }

    /* Allocate progress table using hinted problem size values
     * On failure recursively decrease the size by 25% */
    NB = plasma_cntrl.NB_min;
    MT = (M%NB==0) ? (M/NB) : (M/NB+1);
    NT = (N%NB==0) ? (N/NB) : (N/NB+1);
    NTRHS = (NRHS%NB==0) ? (NRHS/NB) : (NRHS/NB+1);
    size_elems = max(MT, NT)*max(NT, NTRHS);
    do {
        status = plasma_alloc_aux_progress(size_elems);
        if (status != PLASMA_SUCCESS) {
            size_elems = size_elems / 4 * 3;
            if (size_elems == 0) {
                plasma_error("plasma_init", "plasma_alloc_aux_progress() failed");
                return PLASMA_ERR_OUT_OF_MEMORY;
            }
        }
    }
    while (status != PLASMA_SUCCESS);

    /* Allocate bdl memory using hinted problem size values
     * On failure recursively decrease the size by 25% */
    NB = plasma_cntrl.NB_max;
    IB = plasma_cntrl.IB_max;
    MT = (M%NB==0) ? (M/NB) : (M/NB+1);
    NT = (N%NB==0) ? (N/NB) : (N/NB+1);
    NTRHS = (NRHS%NB==0) ? (NRHS/NB) : (NRHS/NB+1);
    size_elems  = (MT*NT + MT*NTRHS)*NB*NB + (MT*NT)*IB*NB;
    size_bytes = size_elems * sizeof(double);
    /* Warn if free huge pages is less than total */
    if (plasma_cntrl.huge_pages_free < plasma_cntrl.huge_pages_total)
        plasma_warning("plasma_init", "not all huge pages are free");
    /* If huge pages available but more requested than free
     * attempt to allocate the number of huge pages free */
    if (plasma_cntrl.huge_pages_free > 0) {
        size_bytes = min(size_bytes, plasma_cntrl.huge_pages_free * plasma_cntrl.huge_page_size);
        size_elems = size_bytes / sizeof(double);
    }
    do {
        status = plasma_alloc_aux_bdl(size_elems, huge_page_warning);
        if (status != PLASMA_SUCCESS) {
            huge_page_warning = PLASMA_FALSE;
            size_elems = size_elems / 4 * 3;
            if (size_elems == 0) {
                plasma_error("plasma_init", "plasma_alloc_aux_bld() failed");
                return PLASMA_ERR_OUT_OF_MEMORY;
            }
        }
    }
    while (status != PLASMA_SUCCESS);

    /* Initialize barrier */
    plasma_barrier_init();

    /* Initialize default thread attributes */
    status = pthread_attr_init(&plasma_cntrl.core_attr);
    if (status != 0) {
        plasma_error("plasma_init()", "pthread_attr_init() failed");
        return status;
    }

    /* Set scope to system */
    status = pthread_attr_setscope(&plasma_cntrl.core_attr, PTHREAD_SCOPE_SYSTEM);
    if (status != 0) {
        plasma_error("plasma_init()", "pthread_attr_setscope() failed");
        return status;
    }

    /* Set concurrency */
    status = pthread_setconcurrency(plasma_cntrl.cores_num);
    if (status != 0) {
        plasma_error("plasma_init()", "pthread_setconcurrency() failed");
        return status;
    }

    /*  Launch threads */
    for (core = 1; core < plasma_cntrl.cores_num; core++) {
        plasma_cntrl.core_num[core] = core;
        pthread_create(
            &plasma_cntrl.core_id[core],
            &plasma_cntrl.core_attr,
             plasma_parallel_section,
            &plasma_cntrl.core_num[core]);
    }
    plasma_cntrl.core_num[0] = 0;

    /* Parallel initializations for core 0 */
    status = plasma_parallel_init(0);
    if (status != PLASMA_SUCCESS) {
        plasma_error("plasma_init", "plasma_parallel_init() failed");
        return status;
    }

    plasma_barrier(0, plasma_cntrl.cores_num);
    plasma_cntrl.initialized = PLASMA_TRUE;
    return PLASMA_SUCCESS;
}

/*////////////////////////////////////////////////////////////////////////////////////////
 *  PLASMA completion
 */
int plasma_Finalize()
{
    int core;
    int status;
    void *exitcodep;

    /* Check if initialized */
    if (!plasma_cntrl.initialized) {
        plasma_warning("plasma_finalize", "PLASMA not initialized");
        return PLASMA_ERR_NOT_INITIALIZED;
    }

    /* Set termination action */
    pthread_mutex_lock(&plasma_cntrl.action_mutex);
    plasma_cntrl.action = PLASMA_ACT_FINALIZE;
    pthread_mutex_unlock(&plasma_cntrl.action_mutex);
    pthread_cond_broadcast(&plasma_cntrl.action_condt);

    /* Barrier and clear action */
    plasma_barrier(0, plasma_cntrl.cores_num);
    plasma_cntrl.action = PLASMA_ACT_STAND_BY;

    // Join threads
    for (core = 1; core < plasma_cntrl.cores_num; core++) {
        status = pthread_join(plasma_cntrl.core_id[core], &exitcodep);
        if (status != 0) {
            plasma_error("plasma_finalize", "pthread_join() failed");
            return status;
        }
    }

    /* Destroy thread attributes */
    status = pthread_attr_destroy(&plasma_cntrl.core_attr);
    if (status != 0) {
        plasma_error("plasma_finalize()", "pthread_attr_destroy() failed");
        return status;
    }

    /* Release memory for storage in BDL */
    status = plasma_free_aux_bdl();
    if (status != PLASMA_SUCCESS) {
        plasma_error("plasma_finalize", "plasma_free_aux_bdl() failed");
    }

    /* Destroy progress table */
    status = plasma_free_aux_progress();
    if (status != PLASMA_SUCCESS) {
        plasma_error("plasma_finalize", "plasma_free_aux_progress() failed");
    }

    /* Destroy temporary kernel workspace */
    status = plasma_free_aux_work_tau();
    if (status != PLASMA_SUCCESS) {
        plasma_error("plasma_finalize", "plasma_free_aux_work_tau() failed");
    }

    plasma_cntrl.initialized = PLASMA_FALSE;
    return PLASMA_SUCCESS;
}
