The ROme OpTimistic Simulator  2.0.0
A General-Purpose Multithreaded Parallel/Distributed Simulation Platform
binding.c
Go to the documentation of this file.
1 
29 #include <stdbool.h>
30 #include <string.h>
31 #include <stdlib.h>
32 
33 #include <arch/atomic.h>
34 #include <core/core.h>
35 #include <core/timer.h>
36 #include <datatypes/list.h>
37 #include <scheduler/binding.h>
38 #include <scheduler/process.h>
39 #include <scheduler/scheduler.h>
40 #include <statistics/statistics.h>
41 #include <gvt/gvt.h>
42 
43 #include <arch/thread.h>
44 
45 #define REBIND_INTERVAL 10.0
46 
47 struct lp_cost_id {
48  double workload_factor;
49  unsigned int id;
50 };
51 
52 struct lp_cost_id *lp_cost;
53 
55 static __thread bool first_lp_binding = true;
56 
57 static unsigned int *new_LPS_binding;
58 static timer rebinding_timer;
59 
60 #ifdef HAVE_LP_REBINDING
61 static int binding_acquire_phase = 0;
62 static __thread int local_binding_acquire_phase = 0;
63 
64 static int binding_phase = 0;
65 static __thread int local_binding_phase = 0;
66 #endif
67 
68 static atomic_t worker_thread_reduction;
69 
75 static inline void LPs_block_binding(void)
76 {
77  unsigned int i, j;
78  unsigned int buf1;
79  unsigned int offset;
80  unsigned int block_leftover;
81  struct lp_struct *lp;
82 
83  buf1 = (n_prc / n_cores);
84  block_leftover = n_prc - buf1 * n_cores;
85 
86  if (block_leftover > 0) {
87  buf1++;
88  }
89 
90  n_prc_per_thread = 0;
91  i = 0;
92  offset = 0;
93 
94  while (i < n_prc) {
95  j = 0;
96  while (j < buf1) {
97  if (offset == local_tid) {
98  lp = lps_blocks[i];
99  LPS_bound_set(n_prc_per_thread++, lp);
100  lp->worker_thread = local_tid;
101  }
102  i++;
103  j++;
104  }
105  offset++;
106  block_leftover--;
107  if (block_leftover == 0) {
108  buf1--;
109  }
110  }
111 }
112 
124 static int compare_lp_cost(const void *a, const void *b)
125 {
126  struct lp_cost_id *A = (struct lp_cost_id *)a;
127  struct lp_cost_id *B = (struct lp_cost_id *)b;
128 
129  return (B->workload_factor - A->workload_factor);
130 }
131 
143 static inline void LP_knapsack(void)
144 {
145  register unsigned int i, j;
146  double reference_knapsack = 0;
147  bool assigned;
148  double assignments[n_cores];
149 
150  if (!master_thread())
151  return;
152 
153  // Estimate the reference knapsack
154  for (i = 0; i < n_prc; i++) {
155  reference_knapsack += lp_cost[i].workload_factor;
156  }
157  reference_knapsack /= n_cores;
158 
159  // Sort the expected times
160  qsort(lp_cost, n_prc, sizeof(struct lp_cost_id), compare_lp_cost);
161 
162  // At least one LP per thread
163  bzero(assignments, sizeof(double) * n_cores);
164  j = 0;
165  for (i = 0; i < n_cores; i++) {
166  assignments[j] += lp_cost[i].workload_factor;
167  new_LPS_binding[i] = j;
168  j++;
169  }
170 
171  // Very suboptimal approximation of knapsack
172  for (; i < n_prc; i++) {
173  assigned = false;
174 
175  for (j = 0; j < n_cores; j++) {
176  // Simulate assignment
177  if (assignments[j] + lp_cost[i].workload_factor <=
178  reference_knapsack) {
179  assignments[j] += lp_cost[i].workload_factor;
180  new_LPS_binding[i] = j;
181  assigned = true;
182  break;
183  }
184  }
185 
186  if (assigned == false)
187  break;
188  }
189 
190  // Check for leftovers
191  if (i < n_prc) {
192  j = 0;
193  for (; i < n_prc; i++) {
194  new_LPS_binding[i] = j;
195  j = (j + 1) % n_cores;
196  }
197  }
198 }
199 
200 #ifdef HAVE_LP_REBINDING
201 
202 static void post_local_reduction(void)
203 {
204  unsigned int i = 0;
205  msg_t *first_evt, *last_evt;
206 
207  foreach_bound_lp(lp) {
208  first_evt = list_head(lp->queue_in);
209  last_evt = list_tail(lp->queue_in);
210 
211  lp_cost[lp->lid.to_int].id = i++; // TODO: do we really need this?
212  lp_cost[lp->lid.to_int].workload_factor =
213  list_sizeof(lp->queue_in);
214  lp_cost[lp->lid.to_int].workload_factor *=
215  statistics_get_lp_data(lp, STAT_GET_EVENT_TIME_LP);
216  lp_cost[lp->lid.to_int].workload_factor /= (last_evt->
217  timestamp -
218  first_evt->
219  timestamp);
220  }
221 }
222 
223 static void install_binding(void)
224 {
225  unsigned int i = 0;
226 
227  n_prc_per_thread = 0;
228 
229  foreach_lp(lp) {
230  if (new_LPS_binding[i++] == local_tid) {
231  LPS_bound_set(n_prc_per_thread++, lp);
232 
233  if (local_tid != lp->worker_thread) {
234  lp->worker_thread = local_tid;
235  }
236  }
237  }
238 }
239 
240 #endif
241 
252 void rebind_LPs(void)
253 {
254 
255  if (unlikely(first_lp_binding)) {
256  first_lp_binding = false;
257 
258  initialize_binding_blocks();
259 
261 
262  timer_start(rebinding_timer);
263 
264  if (master_thread()) {
265  new_LPS_binding = rsalloc(sizeof(int) * n_prc);
266 
267  lp_cost = rsalloc(sizeof(struct lp_cost_id) * n_prc);
268 
269  atomic_set(&worker_thread_reduction, n_cores);
270  }
271 
272  return;
273  }
274 #ifdef HAVE_LP_REBINDING
275  if (master_thread()) {
276  if (unlikely
277  (timer_value_seconds(rebinding_timer) >= REBIND_INTERVAL)) {
278  timer_restart(rebinding_timer);
279  binding_phase++;
280  }
281 
282  if (atomic_read(&worker_thread_reduction) == 0) {
283 
284  LP_knapsack();
285 
286  binding_acquire_phase++;
287  }
288  }
289 
290  if (local_binding_phase < binding_phase) {
291  local_binding_phase = binding_phase;
292  post_local_reduction();
293  atomic_dec(&worker_thread_reduction);
294  }
295 
296  if (local_binding_acquire_phase < binding_acquire_phase) {
297  local_binding_acquire_phase = binding_acquire_phase;
298 
299  install_binding();
300 
301 #ifdef HAVE_PREEMPTION
302  reset_min_in_transit(local_tid);
303 #endif
304 
306  atomic_set(&worker_thread_reduction, n_cores);
307  }
308 
309  }
310 #endif
311 }
struct lp_struct ** lps_blocks
Maintain LPs&#39; simulation and execution states.
Definition: process.c:44
#define atomic_read(v)
Read operation on an atomic counter.
Definition: atomic.h:66
Core ROOT-Sim functionalities.
unsigned int n_cores
Total number of cores required for simulation.
Definition: core.c:61
void atomic_dec(atomic_t *)
Definition: x86.c:103
Timers.
Statistics module.
The ROOT-Sim scheduler main module header.
Load sharing rules across worker threads.
Generic thread management facilities.
#define atomic_set(v, i)
Set operation on an atomic counter.
Definition: atomic.h:69
static void LP_knapsack(void)
Definition: binding.c:143
Generic Lists.
Message Type definition.
Definition: core.h:164
bool thread_barrier(barrier_t *b)
Definition: thread.c:200
LP control blocks.
Atomic operations.
barrier_t all_thread_barrier
Barrier for all worker threads.
Definition: core.c:49
#define list_head(list)
Definition: list.h:74
#define master_thread()
This macro expands to true if the current KLT is the master thread for the local kernel.
Definition: thread.h:155
static __thread bool first_lp_binding
A guard to know whether this is the first invocation or not.
Definition: binding.c:55
unsigned int n_prc
Number of logical processes hosted by the current kernel instance.
Definition: core.c:67
Global Virtual Time.
__thread unsigned int n_prc_per_thread
This is used to keep track of how many LPs were bound to the current KLT.
Definition: scheduler.c:69
static int compare_lp_cost(const void *a, const void *b)
Definition: binding.c:124
void rebind_LPs(void)
Definition: binding.c:252
__thread unsigned int local_tid
Definition: thread.c:72
#define list_tail(list)
Definition: list.h:81
unsigned int worker_thread
ID of the worker thread towards which the LP is bound.
Definition: process.h:85
#define unlikely(exp)
Optimize the branch as likely not taken.
Definition: core.h:74
static void LPs_block_binding(void)
Definition: binding.c:75