29 int *tuple,
int counter,
31 size_t local_store_size,
32 int num_inputs,
int num_outputs,
int color)
35 prt_assert(tuple != NULL,
"NULL tuple");
36 prt_assert(counter > 0,
"counter not larger than zero");
37 prt_assert(
function != NULL,
"NULL VDP function");
38 prt_assert(num_inputs >= 0,
"negative number of inputs");
39 prt_assert(num_outputs >= 0,
"negative number of outputs");
43 prt_assert(vdp != NULL,
"malloc failed");
50 vdp->counter = counter;
51 vdp->function =
function;
55 vdp->num_inputs = num_inputs;
56 if (vdp->num_inputs > 0) {
59 prt_assert(vdp->input != NULL,
"malloc failed");
62 vdp->num_outputs = num_outputs;
63 if (vdp->num_outputs > 0) {
66 prt_assert(vdp->output != NULL,
"malloc failed");
69 if (local_store_size > 0) {
70 vdp->local_store = (
void*)malloc(local_store_size);
71 prt_assert(vdp->local_store != NULL,
"malloc failed");
90 prt_assert(vdp != NULL,
"NULL VDP");
93 prt_assert(vdp->tuple != NULL,
"NULL tuple");
98 for (i = 0; i < vdp->num_inputs; i++) {
104 for (i = 0; i < vdp->num_outputs; i++) {
108 if (vdp->location == PRT_LOCATION_HOST &&
109 channel->dst_node != vdp->thread->vsa->node_rank)
113 else if (vdp->location == PRT_LOCATION_DEVICE &&
114 channel->dst_node != vdp->device->vsa->node_rank)
119 if (vdp->num_inputs > 0)
123 if (vdp->num_outputs > 0)
127 if (vdp->local_store != NULL)
128 free(vdp->local_store);
130 if (vdp->location == PRT_LOCATION_DEVICE) {
133 error = cudaSetDevice(vdp->device->rank);
134 prt_assert(error == cudaSuccess, cudaGetErrorString(error));
136 error = cudaStreamDestroy(vdp->stream);
137 prt_assert(error == cudaSuccess, cudaGetErrorString(error));
155 prt_assert(vdp != NULL,
"NULL VDP");
158 prt_assert(vdp->tuple != NULL,
"NULL tuple");
163 for (i = 0; i < vdp->num_inputs; i++) {
169 for (i = 0; i < vdp->num_outputs; i++) {
175 if (vdp->num_inputs > 0)
179 if (vdp->num_outputs > 0)
183 if (vdp->local_store != NULL)
184 free(vdp->local_store);
205 prt_assert(vdp != NULL,
"NULL VDP");
206 prt_assert(channel != NULL,
"NULL channel");
207 prt_assert(direction == PRT_INPUT_CHANNEL || direction == PRT_OUTPUT_CHANNEL,
208 "undefined direction");
209 prt_assert(slot >= 0,
"negative slot");
212 if (direction == PRT_INPUT_CHANNEL) {
215 "input channel destination tuple does not match VDP tuple");
217 prt_assert(vdp->input[slot] == NULL,
218 "inserting channel in occupied input slot");
221 prt_assert(slot >= 0 && slot < vdp->num_inputs,
"slot out of range");
222 vdp->input[slot] = channel;
224 channel->dst_vdp = vdp;
227 else if (direction == PRT_OUTPUT_CHANNEL) {
230 "output channel source tuple does not match VDP tuple");
232 prt_assert(vdp->output[slot] == NULL,
233 "inserting channel in occupied output slot");
236 prt_assert(slot >= 0 && slot < vdp->num_outputs,
"slot out of range");
237 vdp->output[slot] = channel;
239 channel->src_vdp = vdp;
261 prt_assert(vdp != NULL,
"NULL VDP");
262 prt_assert(size > 0,
"packet size equals zero");
263 prt_assert(size <= INT_MAX,
"packet size larger than INT_MAX");
266 prt_assert(vdp != NULL,
"NULL VDP");
269 if (vdp->location == PRT_LOCATION_HOST)
274 if (vdp->location == PRT_LOCATION_DEVICE)
278 prt_error(
"NULL thread and device");
301 prt_assert(vdp != NULL,
"NULL VDP");
302 prt_assert(size > 0,
"packet size equals zero");
303 prt_assert(size <= INT_MAX,
"packet size larger than INT_MAX");
304 prt_assert(data != NULL,
"NULL data pointer");
312 packet->data, data, size, cudaMemcpyHostToDevice, vdp->stream);
333 prt_assert(vdp != NULL,
"NULL VDP");
334 prt_assert(packet != NULL,
"NULL packet");
337 if (vdp->location == PRT_LOCATION_HOST) {
345 __sync_fetch_and_add(&vdp->vsa->proxy->num_callbacks, 1);
346 cudaStreamAddCallback(
363 prt_assert(vdp != NULL,
"NULL VDP");
364 prt_assert(channel_num >= 0 && channel_num < vdp->num_outputs,
365 "channel number out of range");
366 prt_assert(packet != NULL,
"NULL packet");
367 prt_assert(packet->size <= vdp->output[channel_num]->size,
368 "packet size larger than channel size");
371 if (vdp->location == PRT_LOCATION_HOST)
375 else if (vdp->location == PRT_LOCATION_DEVICE)
380 prt_error(
"NULL thread and device");
396 prt_assert(vdp != NULL,
"NULL VDP");
397 prt_assert(channel_num >= 0 && channel_num < vdp->num_inputs,
398 "channel number out of range");
399 prt_assert(vdp->input[channel_num]->active,
400 "popping from an inactive channel");
417 prt_assert(vdp != NULL,
"NULL VDP");
418 prt_assert(channel_num >= 0 && channel_num < vdp->num_inputs,
419 "channel number out of range");
436 prt_assert(vdp != NULL,
"NULL VDP");
437 prt_assert(channel_num >= 0 && channel_num < vdp->num_inputs,
438 "channel number out of range");
459 prt_assert(vdp != NULL,
"NULL VDP");
463 for (i = 0; i < vdp->num_inputs; i++)
467 if (vdp->input[i] != NULL &&
468 vdp->input[i]->active &&