PULSAR  2.0.0
Parallel Ultra-Light Systolic Array Runtime
 All Data Structures Files Functions Typedefs Enumerations Macros Groups
gpu_malloc.c
Go to the documentation of this file.
1 
13 #include "gpu_malloc.h"
14 
16 
24 gpu_malloc_t* gpu_malloc_init(int _max_segment, size_t _unit_size)
25 {
26  gpu_malloc_t *gdata = (gpu_malloc_t*)malloc(sizeof(gpu_malloc_t));
27  void *ptr = NULL;
28  segment_t *s;
29  cudaError_t rc;
30  int i;
31 
32  gdata->base = NULL;
33  gdata->allocated_segments = NULL;
34  gdata->free_segments = NULL;
35  gdata->unit_size = _unit_size;
36  gdata->max_segment = _max_segment+2;
37 
38  rc = (cudaError_t)cudaMalloc(&ptr, (_max_segment * gdata->unit_size));
39  gdata->base = ptr;
40  if ((cudaSuccess != rc) || (NULL == gdata->base)) {
41  free(gdata);
42  return NULL;
43  }
44  for(i = 0 ; i < _max_segment; i++) {
45  s = (segment_t*)malloc(sizeof(segment_t));
46  s->next = gdata->free_segments;
47  gdata->free_segments = s;
48  }
49  // First and last segments are persistent. Simplifies the algorithm.
50  gdata->allocated_segments = (segment_t*)malloc(sizeof(segment_t));
51  gdata->allocated_segments->start_index = 0;
52  gdata->allocated_segments->nb_units = 1;
53  gdata->allocated_segments->nb_free = _max_segment;
54 
55  gdata->allocated_segments->next = (segment_t*)malloc(sizeof(segment_t));
56  gdata->allocated_segments->next->start_index = _max_segment+1;
57  gdata->allocated_segments->next->nb_units = 1;
58  gdata->allocated_segments->next->nb_free = 0;
59  gdata->allocated_segments->next->next = NULL;
60 
61  return gdata;
62 }
63 
65 
74 {
75  segment_t *s;
76  cudaError_t rc;
77 
78  while (NULL != gdata->allocated_segments) {
79  s = gdata->allocated_segments->next;
80  free(gdata->allocated_segments);
81  gdata->allocated_segments = s;
82  }
83  while( NULL != gdata->free_segments ) {
84  s = gdata->free_segments->next;
85  free(gdata->free_segments);
86  gdata->free_segments = s;
87  }
88  rc = (cudaError_t)cudaFree(gdata->base);
89  if (cudaSuccess != rc)
90  return -1;
91  gdata->max_segment = 0;
92  gdata->unit_size = 0;
93  gdata->base = NULL;
94  return 0;
95 }
96 
98 
106 void* gpu_malloc(gpu_malloc_t *gdata, size_t size)
107 {
108  int nb_units = size/gdata->unit_size;
109  if (size % gdata->unit_size != 0)
110  nb_units++;
111 
112  segment_t *s, *n;
113  for(s = gdata->allocated_segments; s->next != NULL; s = s->next) {
114  if ( s->nb_free > nb_units ) {
115  if (nb_units <= 0)
116  return NULL;
117 
118  n = gdata->free_segments;
119  gdata->free_segments = gdata->free_segments->next;
120 
121  n->start_index = s->start_index + s->nb_units;
122  n->nb_units = nb_units;
123  n->nb_free = s->nb_free - n->nb_units;
124  n->next = s->next;
125  s->nb_free = 0;
126  s->next = n;
127  return (void*)(gdata->base + (n->start_index * gdata->unit_size));
128  }
129  }
130  return NULL;
131 }
132 
134 
143 int gpu_free(gpu_malloc_t *gdata, void *add)
144 {
145  segment_t *s, *p;
146  int tid;
147 
148  p = gdata->allocated_segments;
149  tid = ((char*)add - gdata->base) / gdata->unit_size;
150 
151  for (s = gdata->allocated_segments->next; s->next != NULL; s = s->next) {
152  if (s->start_index == tid) {
153  p->next = s->next;
154  p->nb_free += s->nb_units + s->nb_free;
155 
156  s->next = gdata->free_segments;
157  gdata->free_segments = s;
158 
159  return 0;
160  }
161  p = s;
162  }
163  return -1;
164 }