/* ///////////////////////////// P /// L /// A /// S /// M /// A /////////////////////////////// */
/* ///                    PLASMA auxiliary routines (version 2.1.0)                          ///
 * ///                    Author: Jakub Kurzak, Piotr Luszczek                               ///
 * ///                    Release Date: November, 15th 2009                                  ///
 * ///                    PLASMA is a software package provided by Univ. of Tennessee,       ///
 * ///                    Univ. of California Berkeley and Univ. of Colorado Denver          /// */
/* ///////////////////////////////////////////////////////////////////////////////////////////// */

/* ///////////////////////////////////////////////////////////////////////////////////////////// */
//  PLASMA internals of interest to PLASMA core developer, but not necessarily of interest
//  to PLASMA community contributor.
#ifndef _PLASMA_GLOBAL_H_
#define _PLASMA_GLOBAL_H_

#include <plasma.h>

#include <string.h>

#if defined( _WIN32 ) || defined( _WIN64 )
#include "plasmawinthread.h"
#else
#include <pthread.h>
#endif

/* ///////////////////////////////////////////////////////////////////////////////////////////// */
//  Configuration
// maximum contexts
#define CONTEXTS_MAX         256
// maximum cores per context
#define CONTEXT_THREADS_MAX  256
// size of parallel functions arguments buffer
#define ARGS_BUFF_SIZE       256
// cache line size
#define CACHE_LINE_SIZE      128
// standard page size
#define STANDARD_PAGE_SIZE  4096

/* ///////////////////////////////////////////////////////////////////////////////////////////// */
//  Action commands
#define PLASMA_ACT_STAND_BY     0
#define PLASMA_ACT_PARALLEL     1
#define PLASMA_ACT_FINALIZE     2

/* ///////////////////////////////////////////////////////////////////////////////////////////// */
//  Numerical operations
#define PLASMA_FUNC_SGELS    1
#define PLASMA_FUNC_SPOSV    2
#define PLASMA_FUNC_SGESV    3
#define PLASMA_FUNC_DGELS    4
#define PLASMA_FUNC_DPOSV    5
#define PLASMA_FUNC_DGESV    6
#define PLASMA_FUNC_CGELS    7
#define PLASMA_FUNC_CPOSV    8
#define PLASMA_FUNC_CGESV    9
#define PLASMA_FUNC_ZGELS   10
#define PLASMA_FUNC_ZPOSV   11
#define PLASMA_FUNC_ZGESV   12
#define PLASMA_TUNE_ZCGESV  13
#define PLASMA_TUNE_DSGESV  14
#define PLASMA_FUNC_SGEMM   15
#define PLASMA_FUNC_DGEMM   16
#define PLASMA_FUNC_CGEMM   17
#define PLASMA_FUNC_ZGEMM   18

/* ///////////////////////////////////////////////////////////////////////////////////////////// */
//  Parallel function call - packing of arguments
#define plasma_pack_args_1( \
    type1, arg1) \
{ \
    type1 var1 = (arg1); \
    unsigned char *plasma_ptr = plasma->args_buff; \
    if (sizeof(type1) > ARGS_BUFF_SIZE) \
        plasma_fatal_error("plasma_pack_args_1", "arguments buffer too small"); \
    memcpy(plasma_ptr, &var1, sizeof(type1)); plasma_ptr += sizeof(type1); \
}

#define plasma_pack_args_2( \
    type1, arg1, \
    type2, arg2) \
{ \
    type1 var1 = (arg1); \
    type2 var2 = (arg2); \
    unsigned char *plasma_ptr = plasma->args_buff; \
    if (sizeof(type1) + \
        sizeof(type2) > ARGS_BUFF_SIZE) \
        plasma_fatal_error("plasma_pack_args_2", "arguments buffer too small"); \
    memcpy(plasma_ptr, &var1, sizeof(type1)); plasma_ptr += sizeof(type1); \
    memcpy(plasma_ptr, &var2, sizeof(type2)); plasma_ptr += sizeof(type2); \
}

#define plasma_pack_args_3( \
    type1, arg1, \
    type2, arg2, \
    type3, arg3) \
{ \
    type1 var1 = (arg1); \
    type2 var2 = (arg2); \
    type3 var3 = (arg3); \
    unsigned char *plasma_ptr = plasma->args_buff; \
    if (sizeof(type1) + \
        sizeof(type2) + \
        sizeof(type3) > ARGS_BUFF_SIZE) \
        plasma_fatal_error("plasma_pack_args_3", "arguments buffer too small"); \
    memcpy(plasma_ptr, &var1, sizeof(type1)); plasma_ptr += sizeof(type1); \
    memcpy(plasma_ptr, &var2, sizeof(type2)); plasma_ptr += sizeof(type2); \
    memcpy(plasma_ptr, &var3, sizeof(type3)); plasma_ptr += sizeof(type3); \
}

#define plasma_pack_args_4( \
    type1, arg1, \
    type2, arg2, \
    type3, arg3, \
    type4, arg4) \
{ \
    type1 var1 = (arg1); \
    type2 var2 = (arg2); \
    type3 var3 = (arg3); \
    type4 var4 = (arg4); \
    unsigned char *plasma_ptr = plasma->args_buff; \
    if (sizeof(type1) + \
        sizeof(type2) + \
        sizeof(type3) + \
        sizeof(type4) > ARGS_BUFF_SIZE) \
        plasma_fatal_error("plasma_pack_args_4", "arguments buffer too small"); \
    memcpy(plasma_ptr, &var1, sizeof(type1)); plasma_ptr += sizeof(type1); \
    memcpy(plasma_ptr, &var2, sizeof(type2)); plasma_ptr += sizeof(type2); \
    memcpy(plasma_ptr, &var3, sizeof(type3)); plasma_ptr += sizeof(type3); \
    memcpy(plasma_ptr, &var4, sizeof(type4)); plasma_ptr += sizeof(type4); \
}

#define plasma_pack_args_7( \
    type1, arg1, \
    type2, arg2, \
    type3, arg3, \
    type4, arg4, \
    type5, arg5, \
    type6, arg6, \
    type7, arg7) \
{ \
    type1 var1 = (arg1); \
    type2 var2 = (arg2); \
    type3 var3 = (arg3); \
    type4 var4 = (arg4); \
    type5 var5 = (arg5); \
    type6 var6 = (arg6); \
    type7 var7 = (arg7); \
    unsigned char *plasma_ptr = plasma->args_buff; \
    if (sizeof(type1) + \
        sizeof(type2) + \
        sizeof(type3) + \
        sizeof(type4) + \
        sizeof(type5) + \
        sizeof(type6) + \
        sizeof(type7) > ARGS_BUFF_SIZE) \
        plasma_fatal_error("plasma_pack_args_7", "arguments buffer too small"); \
    memcpy(plasma_ptr, &var1, sizeof(type1)); plasma_ptr += sizeof(type1); \
    memcpy(plasma_ptr, &var2, sizeof(type2)); plasma_ptr += sizeof(type2); \
    memcpy(plasma_ptr, &var3, sizeof(type3)); plasma_ptr += sizeof(type3); \
    memcpy(plasma_ptr, &var4, sizeof(type4)); plasma_ptr += sizeof(type4); \
    memcpy(plasma_ptr, &var5, sizeof(type5)); plasma_ptr += sizeof(type5); \
    memcpy(plasma_ptr, &var6, sizeof(type6)); plasma_ptr += sizeof(type6); \
    memcpy(plasma_ptr, &var7, sizeof(type7)); plasma_ptr += sizeof(type7); \
}

/* ///////////////////////////////////////////////////////////////////////////////////////////// */
//  Parallel function call - thread control
#define plasma_parallel_call(parallel_function) \
{ \
    pthread_mutex_lock(&plasma->action_mutex); \
    plasma->action = PLASMA_ACT_PARALLEL; \
    plasma->parallel_func_ptr = &parallel_function; \
    pthread_mutex_unlock(&plasma->action_mutex); \
    pthread_cond_broadcast(&plasma->action_condt); \
    plasma_barrier(plasma); \
    plasma->action = PLASMA_ACT_STAND_BY; \
    parallel_function(plasma); \
    plasma_barrier(plasma); \
}

/* ///////////////////////////////////////////////////////////////////////////////////////////// */
//  Parallel function call - packing of arguments and thread control
#define plasma_parallel_call_1( \
           parallel_function, \
    type1, arg1) \
    plasma_pack_args_1( \
        type1, (arg1)) \
    plasma_parallel_call(parallel_function)

#define plasma_parallel_call_2( \
           parallel_function, \
    type1, arg1, \
    type2, arg2) \
    plasma_pack_args_2( \
        type1, (arg1), \
        type2, (arg2)) \
    plasma_parallel_call(parallel_function)

#define plasma_parallel_call_3( \
           parallel_function, \
    type1, arg1, \
    type2, arg2, \
    type3, arg3) \
    plasma_pack_args_3( \
        type1, (arg1), \
        type2, (arg2), \
        type3, (arg3)) \
    plasma_parallel_call(parallel_function)

#define plasma_parallel_call_4( \
           parallel_function, \
    type1, arg1, \
    type2, arg2, \
    type3, arg3, \
    type4, arg4) \
    plasma_pack_args_4( \
        type1, (arg1), \
        type2, (arg2), \
        type3, (arg3), \
        type4, (arg4)) \
    plasma_parallel_call(parallel_function)

#define plasma_parallel_call_7( \
           parallel_function, \
    type1, arg1, \
    type2, arg2, \
    type3, arg3, \
    type4, arg4, \
    type5, arg5, \
    type6, arg6, \
    type7, arg7) \
    plasma_pack_args_7( \
        type1, (arg1), \
        type2, (arg2), \
        type3, (arg3), \
        type4, (arg4), \
        type5, (arg5), \
        type6, (arg6), \
        type7, (arg7)) \
    plasma_parallel_call(parallel_function)

/* ///////////////////////////////////////////////////////////////////////////////////////////// */
//  Parallel function call - unpacking of arguments
#define plasma_unpack_args_1( \
    arg1) \
{ \
    unsigned char *plasma_ptr = plasma->args_buff; \
    memcpy(&arg1, plasma_ptr, sizeof(arg1)); plasma_ptr += sizeof(arg1); \
}

#define plasma_unpack_args_2( \
    arg1, \
    arg2) \
{ \
    unsigned char *plasma_ptr = plasma->args_buff; \
    memcpy(&arg1, plasma_ptr, sizeof(arg1)); plasma_ptr += sizeof(arg1); \
    memcpy(&arg2, plasma_ptr, sizeof(arg2)); plasma_ptr += sizeof(arg2); \
}

#define plasma_unpack_args_3( \
    arg1, \
    arg2, \
    arg3) \
{ \
    unsigned char *plasma_ptr = plasma->args_buff; \
    memcpy(&arg1, plasma_ptr, sizeof(arg1)); plasma_ptr += sizeof(arg1); \
    memcpy(&arg2, plasma_ptr, sizeof(arg2)); plasma_ptr += sizeof(arg2); \
    memcpy(&arg3, plasma_ptr, sizeof(arg3)); plasma_ptr += sizeof(arg3); \
}

#define plasma_unpack_args_4( \
    arg1, \
    arg2, \
    arg3, \
    arg4) \
{ \
    unsigned char *plasma_ptr = plasma->args_buff; \
    memcpy(&arg1, plasma_ptr, sizeof(arg1)); plasma_ptr += sizeof(arg1); \
    memcpy(&arg2, plasma_ptr, sizeof(arg2)); plasma_ptr += sizeof(arg2); \
    memcpy(&arg3, plasma_ptr, sizeof(arg3)); plasma_ptr += sizeof(arg3); \
    memcpy(&arg4, plasma_ptr, sizeof(arg4)); plasma_ptr += sizeof(arg4); \
}

#define plasma_unpack_args_7( \
    arg1, \
    arg2, \
    arg3, \
    arg4, \
    arg5, \
    arg6, \
    arg7) \
{ \
    unsigned char *plasma_ptr = plasma->args_buff; \
    memcpy(&arg1, plasma_ptr, sizeof(arg1)); plasma_ptr += sizeof(arg1); \
    memcpy(&arg2, plasma_ptr, sizeof(arg2)); plasma_ptr += sizeof(arg2); \
    memcpy(&arg3, plasma_ptr, sizeof(arg3)); plasma_ptr += sizeof(arg3); \
    memcpy(&arg4, plasma_ptr, sizeof(arg4)); plasma_ptr += sizeof(arg4); \
    memcpy(&arg5, plasma_ptr, sizeof(arg5)); plasma_ptr += sizeof(arg5); \
    memcpy(&arg6, plasma_ptr, sizeof(arg6)); plasma_ptr += sizeof(arg6); \
    memcpy(&arg7, plasma_ptr, sizeof(arg7)); plasma_ptr += sizeof(arg7); \
}

#endif
