html/doxygen/prt__packet_8c_source.html

#include "prt_packet.h"


prt_packet_t *prt_packet_new_host(size_t size, void *data)

{

    // Allocate the packet.

    prt_packet_t *packet = (prt_packet_t*)malloc(sizeof(prt_packet_t));

    prt_assert(packet != NULL, "malloc failed");


    // Initialize the packet.

    packet->size = size;

    packet->num_refs = 1;

    packet->location = PRT_LOCATION_HOST;


    // IF the data pointer is NULL.

    if (data == NULL) {

        // Allocate host memory.

        packet->data = malloc(size);

        prt_assert(packet->data != NULL, "malloc failed");

    }

    else {

        // Assign the pointer.

        packet->data = data;

    }

    // Register memory usage and return.

    svg_trace_memory_host(size);

    return packet;

}


prt_packet_t *prt_packet_new_device(size_t size, void *data, prt_vdp_t *vdp)

{

    // Allocate the packet.

    prt_packet_t *packet = (prt_packet_t*)malloc(sizeof(prt_packet_t));

    prt_assert(packet != NULL, "malloc failed");


    // Initialize the packet.

    packet->size = size;

    packet->num_refs = 1;

    packet->location = PRT_LOCATION_DEVICE;

    packet->device_rank = vdp->device->rank;

    packet->devmem = vdp->device->vsa->devmem[packet->device_rank];


    // IF the data pointer is NULL.

    if (data == NULL) {

        // Allocate device memory.

        packet->data = gpu_malloc(packet->devmem, size);

        prt_assert(packet->data != NULL, "gpu_malloc failed");

    }

    else {

        // Assign the pointer.

        packet->data = data;

    }

    // Register memory usage and return.

    svg_trace_memory_device(size);

    return packet;

}


void prt_packet_resize_host(prt_packet_t *packet, size_t size)

{

    // Check packet location.

    prt_assert(packet->location == PRT_LOCATION_HOST, "wrong packet location");


    // Register memory usage.

    svg_trace_memory_host(-packet->size+size);


    // Reallocate the data buffer.

    packet->data = realloc(packet->data, size);

    prt_assert(packet->data != NULL, "realloc failed");

    packet->size = size;

}


void prt_packet_release_host(prt_packet_t *packet)

{

    int num_refs = __sync_sub_and_fetch(&packet->num_refs, 1);

    prt_assert(num_refs >= 0, "negative number of data references");

    if (num_refs == 0) {

        // Register memory usage.

        svg_trace_memory_host(-packet->size);

        // Free the payload and the packet.

        free(packet->data);

        free(packet);

    }

}


void prt_packet_release_device(prt_packet_t *packet)

{

    int num_refs = __sync_sub_and_fetch(&packet->num_refs, 1);

    prt_assert(num_refs >= 0, "negative number of data references");

    if (num_refs == 0) {

        // Set the device.

        cudaError_t error = cudaSetDevice(packet->device_rank);

        prt_assert(error == cudaSuccess, cudaGetErrorString(error));

        // Free the payload.

        int retval = gpu_free(packet->devmem, packet->data);

        prt_assert(retval == 0, "gpu_free failed");

        // Register memory usage.

        svg_trace_memory_device(-packet->size);

        // Free the packet.

        free(packet);

    }

}


void prt_packet_host_to_device(prt_packet_t *src_packet, prt_channel_t *channel)

{

    // Set device to the destination device.

    cudaError_t error = cudaSetDevice(channel->dst_vdp->device->rank);

    prt_assert(error == cudaSuccess, cudaGetErrorString(error));


    // Create a new device packet.

    prt_packet_t *dst_packet = prt_packet_new_device(

        src_packet->size, NULL, channel->dst_vdp);


    // Put the copy in the channel stream.

    svg_trace_start_dma(channel->in_stream);

    cudaMemcpyAsync(

        dst_packet->data, src_packet->data, src_packet->size,

        cudaMemcpyHostToDevice, channel->in_stream);

    svg_trace_stop_dma(channel->in_stream, Silver);


    // Set up the callback to put the new device packet in the channel.

    prt_callback_finish_t *callback =

        prt_callback_finish_new(src_packet, dst_packet, channel);

    __sync_fetch_and_add(&channel->proxy->num_callbacks, 1);

    cudaStreamAddCallback(

        channel->in_stream, prt_callback_finish_handler, (void*)callback, 0);

}


void prt_packet_device_to_host(prt_packet_t *src_packet, prt_channel_t *channel)

{

    // Set device to the source device.

    cudaError_t error = cudaSetDevice(src_packet->device_rank);

    prt_assert(error == cudaSuccess, cudaGetErrorString(error));


    // Create new host packet.

    prt_packet_t *dst_packet = prt_packet_new_host(src_packet->size, NULL);


    // Put the copy in the channel stream.

    svg_trace_start_dma(channel->out_stream);

    cudaMemcpyAsync(

        dst_packet->data, src_packet->data, src_packet->size,

        cudaMemcpyDeviceToHost, channel->out_stream);

    svg_trace_stop_dma(channel->out_stream, Silver);


    // Set up the callback to put the new host packet in the channel.

    prt_callback_finish_t *callback =

        prt_callback_finish_new(src_packet, dst_packet, channel);

    __sync_fetch_and_add(&channel->proxy->num_callbacks, 1);

    cudaStreamAddCallback(

        channel->out_stream, prt_callback_finish_handler, (void*)callback, 0);

}


void prt_packet_device_to_device(prt_packet_t *src_packet, prt_channel_t *channel)

{

    // Set device to the source device.

    cudaError_t error = cudaSetDevice(src_packet->device_rank);

    prt_assert(error == cudaSuccess, cudaGetErrorString(error));


    // Create new host packet.

    prt_packet_t *dst_packet = prt_packet_new_host(src_packet->size, NULL);


    // Put the copy in the channel stream.

    svg_trace_start_dma(channel->out_stream);

    cudaMemcpyAsync(dst_packet->data, src_packet->data, src_packet->size,

        cudaMemcpyDeviceToHost, channel->out_stream);

    svg_trace_stop_dma(channel->out_stream, Silver);


    // Set up the callback to queue the host to device transfer.

    prt_callback_queue_t *callback =

        prt_callback_queue_new(

            src_packet, dst_packet, channel, PRT_HOST_TO_DEVICE, -1);

    __sync_fetch_and_add(&channel->proxy->num_callbacks, 1);

    cudaStreamAddCallback(

        channel->out_stream, prt_callback_queue_handler, (void*)callback, 0);

}


void prt_packet_device_to_device_direct(

    prt_packet_t *src_packet, prt_channel_t *channel)

{

    // Set device to the source device.

    cudaError_t error = cudaSetDevice(src_packet->device_rank);

    prt_assert(error == cudaSuccess, cudaGetErrorString(error));


    // Create new host packet.

    prt_packet_t *dst_packet = prt_packet_new_device(

        src_packet->size, NULL, channel->dst_vdp);


    // Put the copy in the channel stream.

    svg_trace_start_dma(channel->out_stream);

    cudaMemcpyPeerAsync(

        dst_packet->data, channel->dst_vdp->device->rank,

        src_packet->data, src_packet->device_rank,

        src_packet->size, channel->out_stream);

    svg_trace_stop_dma(channel->out_stream, Silver);


    // Set up the callback to put the new device packet in the channel.

    prt_callback_finish_t *callback =

        prt_callback_finish_new(src_packet, dst_packet, channel);

    __sync_fetch_and_add(&channel->proxy->num_callbacks, 1);

    cudaStreamAddCallback(

        channel->out_stream, prt_callback_finish_handler, (void*)callback, 0);

}


void prt_packet_device_mpi_to_host(

    prt_packet_t *src_packet, prt_channel_t *channel, int agent)

{

    // Set device to the source device.

    cudaError_t error = cudaSetDevice(src_packet->device_rank);

    prt_assert(error == cudaSuccess, cudaGetErrorString(error));


    // Create new host packet.

    prt_packet_t *dst_packet = prt_packet_new_host(src_packet->size, NULL);


    // Put the copy in the channel stream.

    svg_trace_start_dma(channel->out_stream);

    cudaMemcpyAsync(dst_packet->data, src_packet->data, src_packet->size,

        cudaMemcpyDeviceToHost, channel->out_stream);

    svg_trace_stop_dma(channel->out_stream, Silver);


    // Set up the callback to queue send from host.

    prt_callback_queue_t *callback =

        prt_callback_queue_new(

            src_packet, dst_packet, channel, PRT_DEVICE_MPI_FROM_HOST, agent);

    __sync_fetch_and_add(&channel->proxy->num_callbacks, 1);

    cudaStreamAddCallback(

        channel->out_stream, prt_callback_queue_handler, (void*)callback, 0);

}