//==============================================================================================
//
//  Innovative Computing Laboratory - Computer Science Department - University of Tennessee
//  Written by Jakub Kurzak
//
//==============================================================================================

#include <cbe_mfc.h>
#include <spu_mfcio.h>

#include "spu_blas.h"
#include "spu_barrier.h"

//----------------------------------------------------------------------------------------------

extern int spus_num;
extern int my_spu_id;
extern GlobalParams spu_global_params;

//----------------------------------------------------------

volatile int barrier __attribute__ ((aligned (16))) = 0;
volatile int one_ack __attribute__ ((aligned (16))) = 1;

//----------------------------------------------------------------------------------------------

void spu_barrier_()
{
    int i;
    unsigned int tag = 10;


    if (spus_num > 1)
    {
        for (i = 0; i < 2; i++)
        {
            if (my_spu_id == 0)
            {
                barrier = 0;
                mfc_put((void*)&one_ack, spu_global_params.local_store[(my_spu_id+1)%spus_num]
                    + (unsigned int)(&barrier), sizeof(unsigned int), tag, 0, 0);
                while (barrier == 0);
            }
            else
            {
                while (barrier == 0);
                barrier = 0;
                mfc_put((void*)&one_ack, spu_global_params.local_store[(my_spu_id+1)%spus_num]
                    + (unsigned int)(&barrier) , sizeof(unsigned int), tag, 0, 0);
            }
        }
    }
}

//----------------------------------------------------------------------------------------------

void spu_barrier()
{
    int i;
    unsigned int tag = 10;

    //----------------------------------------------------------

    extern int spu_event_num;
    extern int spu_event_log[];

    #define spu_log_event(start, end, event)\
        spu_event_log[spu_event_num+0] = start;\
        spu_event_log[spu_event_num+1] = end;\
        spu_event_log[spu_event_num+2] = event;\
        spu_event_num += 4;\
        spu_event_num &= 1024-1;\

    int start;
    int end;

    //----------------------------------------------------------

    start = spu_read_decrementer();

    if (spus_num > 1)
    {
        for (i = 0; i < 2; i++)
        {
            if (my_spu_id == 0)
            {
                barrier = 0;
                mfc_put((void*)&one_ack, spu_global_params.local_store[(my_spu_id+1)%spus_num]
                    + (unsigned int)(&barrier), sizeof(unsigned int), tag, 0, 0);
                while (barrier == 0);
            }
            else
            {
                while (barrier == 0);
                barrier = 0;
                mfc_put((void*)&one_ack, spu_global_params.local_store[(my_spu_id+1)%spus_num]
                    + (unsigned int)(&barrier) , sizeof(unsigned int), tag, 0, 0);
            }
        }
    }

    end = spu_read_decrementer();
    spu_log_event(start, end, 0xC0C000);
}

//----------------------------------------------------------------------------------------------
