The ROme OpTimistic Simulator  2.0.0
A General-Purpose Multithreaded Parallel/Distributed Simulation Platform
state.c
Go to the documentation of this file.
1 
34 #include <stdlib.h>
35 #include <math.h>
36 #include <string.h>
37 
38 #include <core/core.h>
39 #include <core/init.h>
40 #include <core/timer.h>
41 #include <gvt/ccgs.h>
42 #include <datatypes/list.h>
43 #include <scheduler/process.h>
44 #include <scheduler/scheduler.h>
45 #include <mm/state.h>
47 #include <mm/mm.h>
48 #include <statistics/statistics.h>
49 
56 bool LogState(struct lp_struct *lp)
57 {
58  bool take_snapshot = false;
59  state_t *new_state;
60 
61  if (unlikely(is_blocked_state(lp->state))) {
62  return take_snapshot;
63  }
64  // Keep track of the invocations to LogState
65  lp->from_last_ckpt++;
66 
67  if (lp->state_log_forced) {
68  lp->state_log_forced = false;
69  lp->from_last_ckpt = 0;
70 
71  take_snapshot = true;
72  goto skip_switch;
73  }
74  // Switch on the checkpointing mode
75  switch (rootsim_config.checkpointing) {
76 
77  case STATE_SAVING_COPY:
78  take_snapshot = true;
79  break;
80 
82  if (lp->from_last_ckpt >= lp->ckpt_period) {
83  take_snapshot = true;
84  lp->from_last_ckpt = 0;
85  }
86  break;
87 
88  default:
89  rootsim_error(true, "State saving mode not supported.");
90  }
91 
92  skip_switch:
93 
94  // Shall we take a log?
95  if (take_snapshot) {
96 
97  // Allocate the state buffer
98  new_state = rsalloc(sizeof(*new_state));
99 
100  // Associate the checkpoint with current LVT and last-executed event
101  new_state->lvt = lvt(lp);
102  new_state->last_event = lp->bound;
103 
104  // Log simulation model buffers
105  new_state->log = log_state(lp);
106 
107  // Log members of lp_struct which must be restored
108  new_state->state = lp->state;
109  new_state->base_pointer = lp->current_base_pointer;
110 
111  // Early evaluation of simulation termination.
112  new_state->simulation_completed = ccgs_lp_can_halt(lp);
113 
114  // Log library-related states
115  memcpy(&new_state->numerical, &lp->numerical, sizeof(numerical_state_t));
116 
117  if(&topology_settings && topology_settings.write_enabled) {
118  new_state->topology = rsalloc(topology_global.chkp_size);
119  memcpy(new_state->topology, lp->topology, topology_global.chkp_size);
120  }
121 
122  if(&abm_settings) {
123  new_state->region_data = abm_do_checkpoint(lp->region);
124  }
125 
126  // Link the new checkpoint to the state chain
127  list_insert_tail(lp->queue_states, new_state);
128 
129  }
130 
131  return take_snapshot;
132 }
133 
134 void RestoreState(struct lp_struct *lp, state_t * restore_state)
135 {
136  // Restore simulation model buffers
137  log_restore(lp, restore_state);
138 
139  // Restore members of lp_struct which have been checkpointed
140  lp->current_base_pointer = restore_state->base_pointer;
141  lp->state = restore_state->state;
142 
143  // Restore library-related states
144  memcpy(&lp->numerical, &restore_state->numerical,
145  sizeof(numerical_state_t));
146 
147  if(&topology_settings && topology_settings.write_enabled){
148  memcpy(lp->topology, restore_state->topology,
149  topology_global.chkp_size);
150  }
151 
152  if(&abm_settings)
153  abm_restore_checkpoint(restore_state->region_data, lp->region);
154 
155 #ifdef HAVE_CROSS_STATE
156  lp->ECS_index = 0;
157  lp->wait_on_rendezvous = 0;
158  lp->wait_on_object = 0;
159 #endif
160 }
161 
175 unsigned int silent_execution(struct lp_struct *lp, msg_t *evt, msg_t *final_evt)
176 {
177  unsigned int events = 0;
178  unsigned short int old_state;
179 
180  // current state can be either idle READY, BLOCKED or ROLLBACK, so we save it and then put it back in place
181  old_state = lp->state;
182  lp->state = LP_STATE_SILENT_EXEC;
183 
184  // This is true if the restored state was taken exactly after the new bound
185  if (evt == final_evt)
186  goto out;
187 
188  evt = list_next(evt);
189  final_evt = list_next(final_evt);
190 
191  // Reprocess events. Outgoing messages are explicitly discarded, as this part of
192  // the simulation has been already executed at least once
193  while (evt != NULL && evt != final_evt) {
194 
195  if (unlikely(!reprocess_control_msg(evt))) {
196  evt = list_next(evt);
197  continue;
198  }
199 
200  events++;
201  activate_LP(lp, evt);
202  evt = list_next(evt);
203  }
204 
205  out:
206  lp->state = old_state;
207  return events;
208 }
209 
222 void rollback(struct lp_struct *lp)
223 {
224  state_t *restore_state, *s;
225  msg_t *last_correct_event;
226  msg_t *last_restored_event;
227  unsigned int reprocessed_events;
228 
229  // Sanity check
230  if (unlikely(lp->state != LP_STATE_ROLLBACK)) {
231  rootsim_error(false, "I'm asked to roll back LP %d's execution, but rollback_bound is not set. Ignoring...\n",
232  lp->gid.to_int);
233  return;
234  }
235 
236  // Discard any possible execution state related to a blocked execution
237  memcpy(&lp->context, &lp->default_context, sizeof(LP_context_t));
238 
239  statistics_post_data(lp, STAT_ROLLBACK, 1.0);
240 
241  last_correct_event = lp->bound;
242  // Send antimessages
243  send_antimessages(lp, last_correct_event->timestamp);
244 
245  // Find the state to be restored, and prune the wrongly computed states
246  restore_state = list_tail(lp->queue_states);
247  while (restore_state != NULL && restore_state->lvt > last_correct_event->timestamp) { // It's > rather than >= because we have already taken into account simultaneous events
248  s = restore_state;
249  restore_state = list_prev(restore_state);
250  log_delete(s->log);
251 #ifndef NDEBUG
252  s->last_event = (void *)0xBABEBEEF;
253 #endif
254  list_delete_by_content(lp->queue_states, s);
255  }
256  // Restore the simulation state and correct the state base pointer
257  RestoreState(lp, restore_state);
258 
259  last_restored_event = restore_state->last_event;
260  reprocessed_events = silent_execution(lp, last_restored_event, last_correct_event);
261  statistics_post_data(lp, STAT_SILENT, (double)reprocessed_events);
262 
263  // TODO: silent execution resets the LP state to the previous
264  // value, so it should be the last function to be called within rollback()
265  // Control messages must be rolled back as well
266  rollback_control_message(lp, last_correct_event->timestamp);
267 }
268 
285 {
286  state_t *barrier_state;
287 
288  if (unlikely(D_EQUAL(simtime, 0.0))) {
289  return list_head(lp->queue_states);
290  }
291 
292  barrier_state = list_tail(lp->queue_states);
293 
294  // Must point to the state with lvt immediately before the GVT
295  while (barrier_state != NULL && barrier_state->lvt >= simtime) {
296  barrier_state = list_prev(barrier_state);
297  }
298  if (barrier_state == NULL) {
299  barrier_state = list_head(lp->queue_states);
300  }
301 
302 /*
303  // TODO Search for the first full log before the gvt
304  while(true) {
305  if(is_incremental(current->log) == false)
306  break;
307  current = list_prev(current);
308  }
309 */
310 
311  return barrier_state;
312 }
313 
323 void SetState(void *new_state)
324 {
325  current->current_base_pointer = new_state;
326 }
327 
336 void set_checkpoint_mode(int ckpt_mode)
337 {
338  rootsim_config.checkpointing = ckpt_mode;
339 }
340 
351 void set_checkpoint_period(struct lp_struct *lp, int period)
352 {
353  lp->ckpt_period = period;
354 }
355 
367 {
368  lp->state_log_forced = true;
369 }
Communication Routines.
#define lvt(lp)
Definition: process.h:168
msg_t * bound
Pointer to the last correctly processed event.
Definition: process.h:106
unsigned int silent_execution(struct lp_struct *lp, msg_t *evt, msg_t *final_evt)
Definition: state.c:175
void force_LP_checkpoint(struct lp_struct *lp)
Definition: state.c:366
Initialization routines.
Structure for LP&#39;s state.
Definition: state.h:49
void log_delete(void *ckpt)
Definition: checkpoints.c:371
simtime_t lvt
Simulation time associated with the state log.
Definition: state.h:55
Core ROOT-Sim functionalities.
void set_checkpoint_mode(int ckpt_mode)
Definition: state.c:336
unsigned chkp_size
Definition: topology.h:18
int checkpointing
Type of checkpointing scheme (e.g., PSS, CSS, ...)
Definition: init.h:64
bool ccgs_lp_can_halt(struct lp_struct *lp)
Evaluate if the simulation can be halted according to some LP.
Definition: ccgs.c:153
unsigned char * abm_do_checkpoint(region_abm_t *region)
Definition: abm_layer.c:161
bool state_log_forced
If this variable is set, the next invocation to LogState() takes a new state log, independently of th...
Definition: process.h:97
unsigned int to_int
The GID numerical value.
Definition: core.h:133
Timers.
Statistics module.
The ROOT-Sim scheduler main module header.
short unsigned int state
Execution state.
Definition: state.h:64
#define list_prev(ptr)
Definition: list.h:95
__thread struct lp_struct * current
This is a per-thread variable pointing to the block state of the LP currently scheduled.
Definition: scheduler.c:72
Consistent and Committed Global State.
void log_restore(struct lp_struct *lp, state_t *state_queue_node)
Definition: checkpoints.c:355
void * log_state(struct lp_struct *lp)
Definition: checkpoints.c:195
double simtime_t
This defines the type with whom timestamps are represented.
Definition: ROOT-Sim.h:56
simulation_configuration rootsim_config
This global variable holds the configuration for the current simulation.
Definition: core.c:70
unsigned int from_last_ckpt
Counts how many events executed from the last checkpoint (to support PSS)
Definition: process.h:94
void * base_pointer
This is a pointer used to keep track of changes to simulation states via SetState() ...
Definition: state.h:67
Memory Manager main header.
topology_t * topology
pointer to the topology struct
Definition: process.h:152
Generic Lists.
Message Type definition.
Definition: core.h:164
void rollback(struct lp_struct *lp)
Definition: state.c:222
msg_t * last_event
This log has been taken after the execution of this event.
Definition: state.h:59
void * log
A pointer to the actual log.
Definition: state.h:57
LP control blocks.
void activate_LP(struct lp_struct *next, msg_t *evt)
Definition: scheduler.c:260
#define list_head(list)
Definition: list.h:74
#define D_EQUAL(a, b)
Equality condition for doubles.
Definition: core.h:94
region_abm_t * region
pointer to the region struct
Definition: process.h:155
GID_t gid
Global ID of the LP.
Definition: process.h:82
state_t * find_time_barrier(struct lp_struct *lp, simtime_t simtime)
Definition: state.c:284
short unsigned int state
Current execution state of the LP.
Definition: process.h:88
LP state management.
bool simulation_completed
Termination predicate for this LP.
Definition: state.h:70
LP_context_t context
LP execution state.
Definition: process.h:67
LP_context_t default_context
LP execution state when blocked during the execution of an event.
Definition: process.h:70
void send_antimessages(struct lp_struct *lp, simtime_t after_simtime)
Send all antimessages for a certain LP.
#define list_next(ptr)
Definition: list.h:88
void abm_restore_checkpoint(unsigned char *data, region_abm_t *region)
Definition: abm_layer.c:198
void SetState(void *new_state)
Definition: state.c:323
void set_checkpoint_period(struct lp_struct *lp, int period)
Definition: state.c:351
#define list_tail(list)
Definition: list.h:81
unsigned int ckpt_period
This variable mainains the current checkpointing interval for the LP.
Definition: process.h:91
void * current_base_pointer
The current state base pointer (updated by SetState())
Definition: process.h:100
#define unlikely(exp)
Optimize the branch as likely not taken.
Definition: core.h:74
This structure describes the CPU context of a User-Level Thread.
Definition: jmp.h:42
bool LogState(struct lp_struct *lp)
Definition: state.c:56