The ROme OpTimistic Simulator  2.0.0
A General-Purpose Multithreaded Parallel/Distributed Simulation Platform
schedule.c
Go to the documentation of this file.
1 
28 #define EXPORT_SYMTAB
29 #include <linux/module.h>
30 #include <linux/kernel.h>
31 #include <linux/fs.h>
32 #include <linux/cdev.h>
33 #include <linux/errno.h>
34 #include <linux/device.h>
35 #include <linux/kprobes.h>
36 #include <linux/mutex.h>
37 #include <linux/mm.h>
38 #include <linux/sched.h>
39 #include <linux/slab.h>
40 #include <linux/version.h>
41 #include <linux/interrupt.h>
42 #include <linux/time.h>
43 #include <linux/string.h>
44 #include <linux/vmalloc.h>
45 #include <linux/preempt.h>
46 #include <asm/atomic.h>
47 #include "ld.h"
48 #include "lend.h"
49 //#include <asm/page.h>
50 //#include <asm/cacheflush.h>
51 //#include <asm/apic.h>
52 
53 // This gives access to read_cr0() and write_cr0()
54 #if LINUX_VERSION_CODE > KERNEL_VERSION(3,3,0)
55  #include <asm/switch_to.h>
56 #else
57  #include <asm/system.h>
58 #endif
59 #ifndef X86_CR0_WP
60 #define X86_CR0_WP 0x00010000
61 #endif
62 
63 // This macro was added in kernel 3.5
64 #if LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0)
65  #define APIC_EOI_ACK 0x0 /* Docs say 0 for future compat. */
66 #endif
67 
68 #include "schedule.h"
69 
70 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,25)
71 #error Unsupported Kernel Version
72 #endif
73 
74 
75 
76 #define DEBUG if(1) // change with 0 for removing module coded debug stuff
77 #define DEBUG_SCHEDULE_HOOK if(1) // change with 0 for removing schedule_hook coded debug stuff
78 
79 
80 
81 unsigned long the_hook = 0;
82 #define PERMISSION_MASK (S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH)
83 module_param(the_hook, ulong, PERMISSION_MASK);
84 unsigned int audit_counter = 0;
85 #undef PERMISSION_MASK
86 #define PERMISSION_MASK (S_IRUSR | S_IRGRP | S_IROTH)
87 module_param(audit_counter, int, PERMISSION_MASK);
88 
89 static atomic_t synch_leave;
90 static atomic_t synch_enter;
91 
92 atomic_t count ;
93 atomic_t reference_count ;
94 ulong phase = 0;//this is used to implement a phase based retry protocol for umounting this module
95 
96 static void synchronize_all_slaves(void *);
97 static int schedule_hook_init(void);
98 static void schedule_unpatch(void);
99 static int schedule_patch(void);
100 static void print_bytes(char *str, unsigned char *ptr, size_t s);
101 extern void schedule_hook(void);
102 extern void schedule_hook_2(void);
103 
104 MODULE_LICENSE("GPL");
105 MODULE_AUTHOR("Alessandro Pellegrini <pellegrini@dis.uniroma1.it>, Francesco Quaglia <quaglia@dis.uniroma1.it>");
106 MODULE_DESCRIPTION("Run time patch of the Linux kernel scheduler to support the execution of a generic custom function upon thread reschedule");
107 module_init(schedule_hook_init);
108 module_exit(schedule_unpatch);
109 
110 /* MODULE VARIABLES */
111 
112 void *finish_task_switch = (void *)FTS_ADDR;
113 void *finish_task_switch_next = (void *)FTS_ADDR_NEXT;
114 
115 typedef struct backup{
116  char bytes[128];
117  void* init_addr;
118  unsigned short int len;
119 } backup_t;
120 
121 backup_t *b;
122 
123 unsigned short int backup_count = 0;
124 
125 typedef struct instr {
126  void* ptr;
127  unsigned char bytecode[16];
128  short unsigned int size;
129 } instr_t;
130 
131 
132 // These are set in hook.S
133 extern void schedule_hook_end(void);
134 extern void schedule_hook_patch_point(void);
135 extern void schedule_hook_patch_point_2(void);
136 
137 
138 static void synchronize_all_slaves(void *info) {
139  (void)info;
140 
141  printk(KERN_DEBUG "%s: cpu %d entering synchronize_all_slaves\n", KBUILD_MODNAME, smp_processor_id());
142  atomic_dec(&synch_enter);
143  preempt_disable();
144  while(atomic_read(&synch_leave) > 0);
145  preempt_enable();
146  printk(KERN_DEBUG "%s: cpu %d leaving synchronize_all_slaves\n", KBUILD_MODNAME, smp_processor_id());
147 }
148 
149 #define synchronize_all() do { \
150  printk("cpu %d asking from unpreemptive synchronization\n", smp_processor_id()); \
151  atomic_set(&synch_enter, num_online_cpus() - 1); \
152  atomic_set(&synch_leave, 1); \
153  preempt_disable(); \
154  smp_call_function_many(cpu_online_mask, synchronize_all_slaves, NULL, false); /* false because we manually synchronize */ \
155  while(atomic_read(&synch_enter) > 0); \
156  printk("cpu %d all kernel threads synchronized\n", smp_processor_id()); \
157  } while(0)
158 
159 #define unsynchronize_all() do { \
160  printk("cpu %d freeing other kernel threads\n", smp_processor_id()); \
161  atomic_set(&synch_leave, 0); \
162  preempt_enable(); \
163  } while(0)
164 
165 
166 static void print_bytes(char *str, unsigned char *ptr, size_t s) {
167  size_t i;
168 
169  printk(KERN_DEBUG "%s: %s: ", KBUILD_MODNAME, str);
170  for(i = 0; i < s; i++)
171  printk(KERN_CONT "%02x ", ptr[i]);
172  printk(KERN_CONT "\n");
173 }
174 
175 
176 
177 static int schedule_patch(void) {
178 
179  int pos = 0;
180  long displacement;
181  unsigned long cr0;
182  unsigned char bytes_to_redirect[6];
183  int i=0;
184  int j;
185  int k=0;
186  int count;
187  int size;
188  int patch_size, patch_offset;
189  void *temp;
190  void *upper_bound, *lower_bound;
191 
192  instr_t* v=(instr_t*) kmalloc(((unsigned char*)(finish_task_switch_next)-(unsigned char*)(finish_task_switch))*sizeof(instr_t), GFP_KERNEL);
193  if(!v){
194  printk("Errore 1\n");
195  return 0;
196  }
197  memset(v, 0, ((unsigned char*)(finish_task_switch_next)-(unsigned char*)(finish_task_switch))*sizeof(instr_t));
198 
199 
200  b=(backup_t*) kmalloc(20*sizeof(backup_t), GFP_KERNEL); //this size is the maximum number of return this module can handle
201  if(!b){
202  printk("Errore 2\n");
203  return 0;
204  }
205 
206  memset(b, 0, 20*sizeof(backup_t));
207 
208  //instr_t* v will contain all the instructions between finish_task_switch and finish_task_switch_next
209  temp=(void*)finish_task_switch; // copy of finish_task_switch for ld
210 
211  while (temp<finish_task_switch_next){
212  size=length_disasm(temp, MODE_X64);
213  v[i].size=size;
214  v[i].ptr=(void*)((char*)(temp));
215  memcpy(v[i].bytecode, (unsigned char*)(temp), size);
216  i++;
217  temp=((unsigned char*)(temp))+size;
218  }
219  //instr_t* v populated successfully
220  print_bytes("finish_task_switch before patch", finish_task_switch, finish_task_switch_next-finish_task_switch);
221 
222  for (j=0; j<i; j++){
223  print_bytes("instruction:", v[j].bytecode, v[j].size);
224  //printk("\t address: %p\n", v[j].ptr);
225  }
226 
227  //disable memory protection
228  cr0 = read_cr0();
229  write_cr0(cr0 & ~X86_CR0_WP);
230 
231  //HERE FIND 0XC3 PREVIOUS INSTRUCTIONS, BACKUP THEM INTO AN ARRAY AND PATCH SCHEDULE
232  print_bytes("finish_task_switch_next before self patching", (unsigned char *)finish_task_switch, finish_task_switch_next-finish_task_switch);
233  for (j=0; j<i; j++){
234  if (v[j].size==1 && v[j].bytecode[0]==0xc3){ //return found
235  printk(KERN_DEBUG "%s: return found at address %p, offset %p\n", KBUILD_MODNAME, (unsigned char*)v[j].ptr, (void*)(v[j].ptr-finish_task_switch));
236  count=1;
237  do{
238  k+=v[j-count].size;
239  count++;
240  }while(k<5);
241  count--;
242  print_bytes("upper bound instruction: ", v[j-count].bytecode, v[j-count].size);
243  upper_bound=v[j-count].ptr;
244  lower_bound=v[j].ptr;
245  size=lower_bound-upper_bound;
246  printk(KERN_DEBUG "%s: size=%d\n", KBUILD_MODNAME, size);
247  b[backup_count].init_addr=upper_bound;
248  b[backup_count].len=size;
249  memcpy(b[backup_count].bytes, upper_bound, size);
250  print_bytes("Backup bytes: ", b[backup_count].bytes, size);
251 
252  //Assemble JMP
253  displacement=((long)((long)schedule_hook+(long)backup_count*((long)schedule_hook_2-(long)schedule_hook))-(long)upper_bound)-5;
254  //displacement=((long)schedule_hook-(long)finish_task_switch_next);
255  pos = 0;
256  bytes_to_redirect[pos++] = 0xe9;
257  bytes_to_redirect[pos++] = (unsigned char)(displacement & 0xff);
258  bytes_to_redirect[pos++] = (unsigned char)(displacement >> 8 & 0xff);
259  bytes_to_redirect[pos++] = (unsigned char)(displacement >> 16 & 0xff);
260  bytes_to_redirect[pos++] = (unsigned char)(displacement >> 24 & 0xff);
261 
262  print_bytes("assembled jump is", bytes_to_redirect, 5);
263 
264  backup_count++;
265 
266  //Patch schedule()
267  //if (backup_count!=-1)
268  memcpy(upper_bound, bytes_to_redirect, 5);
269  }
270  }
271 
272  print_bytes("schedule after patch: ", finish_task_switch, finish_task_switch_next-finish_task_switch);
273 
274  //here all bytes are backed-up
275 
276  patch_size=schedule_hook_2-schedule_hook_patch_point;
277  patch_offset=schedule_hook_patch_point_2-schedule_hook_patch_point;
278  printk(KERN_DEBUG "%s: schedule_hook is at address %p\n", KBUILD_MODNAME, schedule_hook);
279  print_bytes("schedule_hook_patch_point: ", (unsigned char *)schedule_hook_patch_point, patch_size);
280 
281  //patch schedule_hook_patch_point
282 
283  for (j=0; j<backup_count; j++){
284  memcpy(schedule_hook_patch_point+j*patch_offset, b[j].bytes, b[j].len);
285  print_bytes("schedule_hook_patch_point after patch: ", (unsigned char *)schedule_hook+j*(schedule_hook_2-schedule_hook), (schedule_hook_2-schedule_hook));
286  }
287  write_cr0(cr0);
288  kfree(v);
289  return 0;
290 }
291 
292 
293 static void schedule_unpatch(void) {
294 
295  unsigned long cr0;
296  int i;
297 
298  synchronize_all();
299  printk(KERN_DEBUG "%s: restoring standard schedule function...\n", KBUILD_MODNAME);
300 
301 
302  // To unpatch, simply place back original bytes of finish_task_switch
303  cr0 = read_cr0();
304  write_cr0(cr0 & ~X86_CR0_WP);
305 
306 
307  // Restore original bytes of finish_task_switch postamble
308  for (i=0; i<backup_count; i++){
309  memcpy(b[i].init_addr, b[i].bytes, b[i].len);
310  }
311 
312  write_cr0(cr0);
313  kfree(b);
314  print_bytes("Schedule after restore: ", finish_task_switch, finish_task_switch_next-finish_task_switch);
315  printk(KERN_INFO "%s: standard schedule function correctly restored...\n", KBUILD_MODNAME);
316  unsynchronize_all();
317  return;
318 }
319 
320 
321 static int schedule_hook_init(void) {
322 
323  int ret = 0;
324 
325  atomic_set(&count,0);
326  atomic_set(&reference_count,0);
327 
328  printk(KERN_INFO "%s: mounting the module\n", KBUILD_MODNAME);
329 
330  ret = schedule_patch();
331 
332  return 0;
333 }
334 
#define atomic_read(v)
Read operation on an atomic counter.
Definition: atomic.h:66
void atomic_dec(atomic_t *)
Definition: x86.c:103
#define atomic_set(v, i)
Set operation on an atomic counter.
Definition: atomic.h:69
This is the header file to configure the timestretch module.