30 int *src_tuple,
int src_slot,
31 int *dst_tuple,
int dst_slot)
34 prt_assert(size > 0,
"channel size equals zero");
35 prt_assert(size <= INT_MAX,
"channel size larger than INT_MAX");
36 prt_assert(src_tuple != NULL,
"NULL source tuple");
37 prt_assert(src_slot >= 0,
"negative source slot");
38 prt_assert(dst_tuple != NULL,
"NULL destination tuple");
39 prt_assert(dst_slot >= 0,
"negative destination slot");
43 prt_assert(channel != NULL,
"malloc failed");
46 channel->dst_vdp = NULL;
47 channel->src_vdp = NULL;
48 channel->proxy = NULL;
50 channel->src_tuple = src_tuple;
51 channel->dst_tuple = dst_tuple;
52 channel->src_slot = src_slot;
53 channel->dst_slot = dst_slot;
58 prt_assert(channel->packets != NULL,
"icl_deque_new() failed");
73 prt_assert(channel != NULL,
"NULL channel");
76 prt_assert(channel->src_tuple != NULL,
"NULL tuple");
77 free(channel->src_tuple);
80 prt_assert(channel->dst_tuple != NULL,
"NULL tuple");
81 free(channel->dst_tuple);
86 prt_assert(channel->packets != NULL,
"NULL packets list");
89 prt_assert(size == 0,
"non-epty packet list");
110 __sync_fetch_and_add(&packet->num_refs, 1);
113 if (channel->src_node != channel->dst_node) {
117 packet, packet->size, channel->dst_node, channel->tag);
120 channel->proxy->sends_requested[vdp->thread->agent_rank], request);
124 if (channel->dst_vdp->location == PRT_LOCATION_HOST) {
157 __sync_fetch_and_add(&packet->num_refs, 1);
160 if (channel->src_node != channel->dst_node) {
165 NULL, packet, channel,
166 PRT_DEVICE_MPI_TO_HOST,
167 vdp->device->agent_rank);
168 __sync_fetch_and_add(&channel->proxy->num_callbacks, 1);
169 cudaStreamAddCallback(
174 if (channel->dst_vdp->location == PRT_LOCATION_HOST) {
179 NULL, packet, channel, PRT_DEVICE_TO_HOST, -1);
180 __sync_fetch_and_add(&channel->proxy->num_callbacks, 1);
181 cudaStreamAddCallback(
186 if (packet->device_rank == channel->dst_vdp->device->rank) {
191 __sync_fetch_and_add(&channel->proxy->num_callbacks, 1);
192 cudaStreamAddCallback(
200 NULL, packet, channel, PRT_DEVICE_TO_DEVICE, -1);
201 __sync_fetch_and_add(&channel->proxy->num_callbacks, 1);
202 cudaStreamAddCallback(
222 prt_assert(channel->packets != NULL,
"NULL list of packets");
225 prt_assert(node != NULL,
"empty list of packets");
228 prt_assert(packet != NULL,
"NULL packet");
246 prt_assert(channel != NULL,
"NULL channel");
247 prt_assert(channel->packets != NULL,
"NULL list of packets");
272 if (c1->src_slot < c2->src_slot)
return -1;
273 if (c1->src_slot > c2->src_slot)
return 1;
278 if (c1->dst_slot < c2->dst_slot)
return -1;
279 if (c1->dst_slot > c2->dst_slot)
return 1;