The ROme OpTimistic Simulator  2.0.0
A General-Purpose Multithreaded Parallel/Distributed Simulation Platform
state.c
Go to the documentation of this file.
1 
34 #include <stdlib.h>
35 #include <math.h>
36 #include <string.h>
37 
38 #include <core/core.h>
39 #include <core/init.h>
40 #include <core/timer.h>
41 #include <datatypes/list.h>
42 #include <scheduler/process.h>
43 #include <scheduler/scheduler.h>
44 #include <mm/state.h>
46 #include <mm/mm.h>
47 #include <statistics/statistics.h>
48 
55 bool LogState(struct lp_struct *lp)
56 {
57  bool take_snapshot = false;
58  state_t *new_state;
59 
60  if (unlikely(is_blocked_state(lp->state))) {
61  return take_snapshot;
62  }
63  // Keep track of the invocations to LogState
64  lp->from_last_ckpt++;
65 
66  if (lp->state_log_forced) {
67  lp->state_log_forced = false;
68  lp->from_last_ckpt = 0;
69 
70  take_snapshot = true;
71  goto skip_switch;
72  }
73  // Switch on the checkpointing mode
74  switch (rootsim_config.checkpointing) {
75 
76  case STATE_SAVING_COPY:
77  take_snapshot = true;
78  break;
79 
81  if (lp->from_last_ckpt >= lp->ckpt_period) {
82  take_snapshot = true;
83  lp->from_last_ckpt = 0;
84  }
85  break;
86 
87  default:
88  rootsim_error(true, "State saving mode not supported.");
89  }
90 
91  skip_switch:
92 
93  // Shall we take a log?
94  if (take_snapshot) {
95 
96  // Allocate the state buffer
97  new_state = rsalloc(sizeof(*new_state));
98 
99  // Associate the checkpoint with current LVT and last-executed event
100  new_state->lvt = lvt(lp);
101  new_state->last_event = lp->bound;
102 
103  // Log simulation model buffers
104  new_state->log = log_state(lp);
105 
106  // Log members of lp_struct which must be restored
107  new_state->state = lp->state;
108  new_state->base_pointer = lp->current_base_pointer;
109 
110  // Log library-related states
111  memcpy(&new_state->numerical, &lp->numerical,
112  sizeof(numerical_state_t));
113 
114  if(&topology_settings && topology_settings.write_enabled){
115  new_state->topology = rsalloc(topology_global.chkp_size);
116  memcpy(new_state->topology, lp->topology,
117  topology_global.chkp_size);
118  }
119 
120  if(&abm_settings){
121  new_state->region_data = abm_do_checkpoint(lp->region);
122  }
123 
124  // Link the new checkpoint to the state chain
125  list_insert_tail(lp->queue_states, new_state);
126 
127  }
128 
129  return take_snapshot;
130 }
131 
132 void RestoreState(struct lp_struct *lp, state_t * restore_state)
133 {
134  // Restore simulation model buffers
135  log_restore(lp, restore_state);
136 
137  // Restore members of lp_struct which have been checkpointed
138  lp->current_base_pointer = restore_state->base_pointer;
139  lp->state = restore_state->state;
140 
141  // Restore library-related states
142  memcpy(&lp->numerical, &restore_state->numerical,
143  sizeof(numerical_state_t));
144 
145  if(&topology_settings && topology_settings.write_enabled){
146  memcpy(lp->topology, restore_state->topology,
147  topology_global.chkp_size);
148  }
149 
150  if(&abm_settings)
151  abm_restore_checkpoint(restore_state->region_data, lp->region);
152 
153 #ifdef HAVE_CROSS_STATE
154  lp->ECS_index = 0;
155  lp->wait_on_rendezvous = 0;
156  lp->wait_on_object = 0;
157 #endif
158 }
159 
173 unsigned int silent_execution(struct lp_struct *lp, msg_t *evt, msg_t *final_evt)
174 {
175  unsigned int events = 0;
176  unsigned short int old_state;
177 
178  // current state can be either idle READY, BLOCKED or ROLLBACK, so we save it and then put it back in place
179  old_state = lp->state;
180  lp->state = LP_STATE_SILENT_EXEC;
181 
182  // This is true if the restored state was taken exactly after the new bound
183  if (evt == final_evt)
184  goto out;
185 
186  evt = list_next(evt);
187  final_evt = list_next(final_evt);
188 
189  // Reprocess events. Outgoing messages are explicitly discarded, as this part of
190  // the simulation has been already executed at least once
191  while (evt != NULL && evt != final_evt) {
192 
193  if (unlikely(!reprocess_control_msg(evt))) {
194  evt = list_next(evt);
195  continue;
196  }
197 
198  events++;
199  activate_LP(lp, evt);
200  evt = list_next(evt);
201  }
202 
203  out:
204  lp->state = old_state;
205  return events;
206 }
207 
220 void rollback(struct lp_struct *lp)
221 {
222  state_t *restore_state, *s;
223  msg_t *last_correct_event;
224  msg_t *last_restored_event;
225  unsigned int reprocessed_events;
226 
227  // Sanity check
228  if (unlikely(lp->state != LP_STATE_ROLLBACK)) {
229  rootsim_error(false, "I'm asked to roll back LP %d's execution, but rollback_bound is not set. Ignoring...\n",
230  lp->gid.to_int);
231  return;
232  }
233 
234  // Discard any possible execution state related to a blocked execution
235  memcpy(&lp->context, &lp->default_context, sizeof(LP_context_t));
236 
237  statistics_post_data(lp, STAT_ROLLBACK, 1.0);
238 
239  last_correct_event = lp->bound;
240  // Send antimessages
241  send_antimessages(lp, last_correct_event->timestamp);
242 
243  // Find the state to be restored, and prune the wrongly computed states
244  restore_state = list_tail(lp->queue_states);
245  while (restore_state != NULL && restore_state->lvt > last_correct_event->timestamp) { // It's > rather than >= because we have already taken into account simultaneous events
246  s = restore_state;
247  restore_state = list_prev(restore_state);
248  log_delete(s->log);
249 #ifndef NDEBUG
250  s->last_event = (void *)0xBABEBEEF;
251 #endif
252  list_delete_by_content(lp->queue_states, s);
253  }
254  // Restore the simulation state and correct the state base pointer
255  RestoreState(lp, restore_state);
256 
257  last_restored_event = restore_state->last_event;
258  reprocessed_events = silent_execution(lp, last_restored_event, last_correct_event);
259  statistics_post_data(lp, STAT_SILENT, (double)reprocessed_events);
260 
261  // TODO: silent execution resets the LP state to the previous
262  // value, so it should be the last function to be called within rollback()
263  // Control messages must be rolled back as well
264  rollback_control_message(lp, last_correct_event->timestamp);
265 }
266 
283 {
284  state_t *barrier_state;
285 
286  if (unlikely(D_EQUAL(simtime, 0.0))) {
287  return list_head(lp->queue_states);
288  }
289 
290  barrier_state = list_tail(lp->queue_states);
291 
292  // Must point to the state with lvt immediately before the GVT
293  while (barrier_state != NULL && barrier_state->lvt >= simtime) {
294  barrier_state = list_prev(barrier_state);
295  }
296  if (barrier_state == NULL) {
297  barrier_state = list_head(lp->queue_states);
298  }
299 
300 /*
301  // TODO Search for the first full log before the gvt
302  while(true) {
303  if(is_incremental(current->log) == false)
304  break;
305  current = list_prev(current);
306  }
307 */
308 
309  return barrier_state;
310 }
311 
321 void SetState(void *new_state)
322 {
323  current->current_base_pointer = new_state;
324 }
325 
334 void set_checkpoint_mode(int ckpt_mode)
335 {
336  rootsim_config.checkpointing = ckpt_mode;
337 }
338 
349 void set_checkpoint_period(struct lp_struct *lp, int period)
350 {
351  lp->ckpt_period = period;
352 }
353 
365 {
366  lp->state_log_forced = true;
367 }
Communication Routines.
#define lvt(lp)
Definition: process.h:168
msg_t * bound
Pointer to the last correctly processed event.
Definition: process.h:106
unsigned int silent_execution(struct lp_struct *lp, msg_t *evt, msg_t *final_evt)
Definition: state.c:173
void force_LP_checkpoint(struct lp_struct *lp)
Definition: state.c:364
Initialization routines.
Structure for LP&#39;s state.
Definition: state.h:49
void log_delete(void *ckpt)
Definition: checkpoints.c:374
simtime_t lvt
Simulation time associated with the state log.
Definition: state.h:55
Core ROOT-Sim functionalities.
void set_checkpoint_mode(int ckpt_mode)
Definition: state.c:334
unsigned chkp_size
Definition: topology.h:18
int checkpointing
Type of checkpointing scheme (e.g., PSS, CSS, ...)
Definition: init.h:65
unsigned char * abm_do_checkpoint(region_abm_t *region)
Definition: abm_layer.c:161
bool state_log_forced
If this variable is set, the next invocation to LogState() takes a new state log, independently of th...
Definition: process.h:97
unsigned int to_int
The GID numerical value.
Definition: core.h:133
Timers.
Statistics module.
The ROOT-Sim scheduler main module header.
short unsigned int state
Execution state.
Definition: state.h:64
#define list_prev(ptr)
Definition: list.h:95
__thread struct lp_struct * current
This is a per-thread variable pointing to the block state of the LP currently scheduled.
Definition: scheduler.c:72
void log_restore(struct lp_struct *lp, state_t *state_queue_node)
Definition: checkpoints.c:358
void * log_state(struct lp_struct *lp)
Definition: checkpoints.c:195
double simtime_t
This defines the type with whom timestamps are represented.
Definition: ROOT-Sim.h:55
simulation_configuration rootsim_config
This global variable holds the configuration for the current simulation.
Definition: core.c:70
unsigned int from_last_ckpt
Counts how many events executed from the last checkpoint (to support PSS)
Definition: process.h:94
void * base_pointer
This is a pointer used to keep track of changes to simulation states via SetState() ...
Definition: state.h:66
Memory Manager main header.
topology_t * topology
pointer to the topology struct
Definition: process.h:152
Generic Lists.
Message Type definition.
Definition: core.h:164
void rollback(struct lp_struct *lp)
Definition: state.c:220
msg_t * last_event
This log has been taken after the execution of this event.
Definition: state.h:59
void * log
A pointer to the actual log.
Definition: state.h:57
LP control blocks.
void activate_LP(struct lp_struct *next, msg_t *evt)
Definition: scheduler.c:260
#define list_head(list)
Definition: list.h:74
#define D_EQUAL(a, b)
Equality condition for doubles.
Definition: core.h:94
region_abm_t * region
pointer to the region struct
Definition: process.h:155
GID_t gid
Global ID of the LP.
Definition: process.h:82
state_t * find_time_barrier(struct lp_struct *lp, simtime_t simtime)
Definition: state.c:282
short unsigned int state
Current execution state of the LP.
Definition: process.h:88
LP state management.
LP_context_t context
LP execution state.
Definition: process.h:67
LP_context_t default_context
LP execution state when blocked during the execution of an event.
Definition: process.h:70
void send_antimessages(struct lp_struct *lp, simtime_t after_simtime)
Send all antimessages for a certain LP.
#define list_next(ptr)
Definition: list.h:88
void abm_restore_checkpoint(unsigned char *data, region_abm_t *region)
Definition: abm_layer.c:198
void SetState(void *new_state)
Definition: state.c:321
void set_checkpoint_period(struct lp_struct *lp, int period)
Definition: state.c:349
#define list_tail(list)
Definition: list.h:81
unsigned int ckpt_period
This variable mainains the current checkpointing interval for the LP.
Definition: process.h:91
void * current_base_pointer
The current state base pointer (updated by SetState())
Definition: process.h:100
#define unlikely(exp)
Optimize the branch as likely not taken.
Definition: core.h:74
This structure describes the CPU context of a User-Level Thread.
Definition: jmp.h:42
bool LogState(struct lp_struct *lp)
Definition: state.c:55