38 #include <linux/module.h> 39 #include <linux/kernel.h> 41 #include <linux/cdev.h> 42 #include <linux/errno.h> 43 #include <linux/device.h> 44 #include <linux/kprobes.h> 45 #include <linux/mutex.h> 47 #include <linux/sched.h> 48 #include <linux/slab.h> 49 #include <linux/version.h> 50 #include <asm/tlbflush.h> 52 #include <asm/cacheflush.h> 53 #include <linux/uaccess.h> 57 #define AUXILIARY_FRAMES 256 59 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,25) 60 #error Unsupported Kernel Version 64 #define SET_BIT(p,n) ((*(ulong *)(p)) |= (1LL << (n))) 65 #define CLR_BIT(p,n) ((*(ulong *)(p)) &= ~((1LL) << (n))) 66 #define GET_BIT(p,n) ((*(ulong *)(p)) & (1LL << (n))) 69 typedef void (*do_page_fault_t)(
struct pt_regs*,
unsigned long);
72 static int rs_ktblmgr_init(
void);
73 static void rs_ktblmgr_cleanup(
void);
74 static int rs_ktblmgr_open(
struct inode *,
struct file *);
75 static int rs_ktblmgr_release(
struct inode *,
struct file *);
76 static long rs_ktblmgr_ioctl(
struct file *,
unsigned int,
unsigned long);
78 MODULE_LICENSE(
"GPL");
79 MODULE_AUTHOR(
"Alessandro Pellegrini <pellegrini@dis.uniroma1.it>, Francesco Quaglia <quaglia@dis.uniroma1.it>");
80 MODULE_DESCRIPTION(
"ROOT-Sim Multiple Page Table Kernel Module");
81 module_init(rs_ktblmgr_init);
82 module_exit(rs_ktblmgr_cleanup);
85 void (*rootsim_pager)(void)=NULL;
99 struct mutex pgd_get_mutex;
100 struct mm_struct *mm_struct_addr[SIBLING_PGD];
101 void *pgd_addr[SIBLING_PGD];
102 unsigned int managed_pgds = 0;
103 struct mm_struct *original_view[SIBLING_PGD];
106 int stack_index = AUXILIARY_FRAMES - 1;
107 void * auxiliary_frames[AUXILIARY_FRAMES];
109 int root_sim_processes[SIBLING_PGD]={[0 ... (SIBLING_PGD-1)] = -1};
113 int currently_open[SIBLING_PGD][MAX_CROSS_STATE_DEPENDENCIES];
114 int open_index[SIBLING_PGD]={[0 ... (SIBLING_PGD-1)] = -1};
116 void **ancestor_pml4;
118 int restore_pml4_entries;
119 int mapped_processes;
123 struct vm_area_struct* changed_mode_mmap;
124 struct vm_operations_struct * original_vm_ops;
125 struct vm_operations_struct auxiliary_vm_ops_table;
126 struct vm_area_struct *target_vma;
128 int (*original_fault_handler)(
struct vm_area_struct *vma,
struct vm_fault *vmf);
130 static DEVICE_ATTR(multimap, S_IRUSR|S_IRGRP|S_IROTH, NULL, NULL);
133 struct file_operations
fops = {
134 open: rs_ktblmgr_open,
135 unlocked_ioctl: rs_ktblmgr_ioctl,
136 compat_ioctl: rs_ktblmgr_ioctl,
137 release: rs_ktblmgr_release
143 static inline void rootsim_load_cr3(pgd_t *pgdir) {
144 __asm__ __volatile__ (
"mov %0, %%cr3" ::
"r" (__pa(pgdir)));
147 static void set_single_pte_sticky_flag(
void *target_address) {
154 pdp = (
void **)__va((ulong)pgd[PML4(target_address)] & 0xfffffffffffff000);
155 pde = (
void **)__va((ulong)pdp[PDP(target_address)] & 0xfffffffffffff000);
156 pte = (
void **)__va((ulong)pde[PDE(target_address)] & 0xfffffffffffff000);
158 SET_BIT(&pte[PTE(target_address)], 9);
161 static void set_pte_sticky_flags(
ioctl_info *info) {
169 pdp = (
void **)__va((ulong)pgd[PML4(info->base_address)] & 0xfffffffffffff000);
170 pde = (
void **)__va((ulong)pdp[PDP(info->base_address)] & 0xfffffffffffff000);
173 SET_BIT(&pdp[PDP(info->base_address)], 11);
175 for(i = 0; i < 512; i++) {
176 pte = (
void **)__va((ulong)pde[i] & 0xfffffffffffff000);
179 for(j = 0; j < 512; j++) {
181 if(GET_BIT(&pte[j], 0)) {
192 static int get_pde_sticky_bit(
void *target_address) {
197 pdp = (
void **)__va((ulong)pgd[PML4(target_address)] & 0xfffffffffffff000);
199 return GET_BIT(&pdp[PDP(target_address)], 11);
202 static int get_pte_sticky_bit(
void *target_address) {
208 pdp = (
void **)__va((ulong)pgd[PML4(target_address)] & 0xfffffffffffff000);
209 pde = (
void **)__va((ulong)pdp[PDP(target_address)] & 0xfffffffffffff000);
211 if(pde[PDE(target_address)] == NULL) {
215 return GET_BIT(&pde[PDE(target_address)], 9);
218 static int get_presence_bit(
void *target_address) {
225 pdp = (
void **)__va((ulong)pgd[PML4(target_address)] & 0xfffffffffffff000);
226 pde = (
void **)__va((ulong)pdp[PDP(target_address)] & 0xfffffffffffff000);
229 if(pde[PDE(target_address)] == NULL) {
233 pte = (
void **)__va((ulong)pde[PDE(target_address)] & 0xfffffffffffff000);
235 return GET_BIT(&pte[PTE(target_address)], 0);
238 static void set_presence_bit(
void *target_address) {
245 pdp = (
void **)__va((ulong)pgd[PML4(target_address)] & 0xfffffffffffff000);
246 pde = (
void **)__va((ulong)pdp[PDP(target_address)] & 0xfffffffffffff000);
247 pte = (
void **)__va((ulong)pde[PDE(target_address)] & 0xfffffffffffff000);
249 if(GET_BIT(&pte[PTE(target_address)], 9)) {
250 SET_BIT(&pte[PTE(target_address)], 0);
252 printk(
"Oh, guarda che sto dentro all'else!!!!\n");
256 static void set_page_privilege(
ioctl_info *info) {
263 void *base_address = info->base_address;
264 void *final_address = info->base_address + info->count * PAGE_SIZE;
267 pdp = (
void **)__va((ulong)pgd[PML4(info->base_address)] & 0xfffffffffffff000);
268 pde = (
void **)__va((ulong)pdp[PDP(info->base_address)] & 0xfffffffffffff000);
270 for(i = PDE(base_address); i <= PDE(final_address); i++) {
271 pte = (
void **)__va((ulong)pde[i] & 0xfffffffffffff000);
276 for(j = 0; j < 512; j++) {
281 if(info->write_mode) {
290 static void set_single_page_privilege(
ioctl_info *info) {
297 pdp = (
void **)__va((ulong)pgd[PML4(info->base_address)] & 0xfffffffffffff000);
298 pde = (
void **)__va((ulong)pdp[PDP(info->base_address)] & 0xfffffffffffff000);
299 pte = (
void **)__va((ulong)pde[PDE(info->base_address)] & 0xfffffffffffff000);
301 if(info->write_mode) {
302 SET_BIT(&pte[PTE(info->base_address)], 1);
304 CLR_BIT(&pte[PTE(info->base_address)], 1);
311 void root_sim_page_fault(
struct pt_regs* regs,
long error_code, do_page_fault_t kernel_handler) {
312 void *target_address;
316 ulong *auxiliary_stack_pointer;
321 kernel_handler(regs, error_code);
326 for(i = 0; i < SIBLING_PGD; i++) {
327 if (root_sim_processes[i] ==
current->pid) {
329 target_address = (
void *)read_cr2();
331 if(PML4(target_address) < restore_pml4 || PML4(target_address) >= restore_pml4 + restore_pml4_entries) {
333 kernel_handler(regs, error_code);
339 my_pgd =(
void **)pgd_addr[i];
340 my_pdp =(
void *)my_pgd[PML4(target_address)];
341 my_pdp = __va((ulong)my_pdp & 0xfffffffffffff000);
347 root_sim_fault_info[i]->rcx = regs->cx;
348 root_sim_fault_info[i]->rip = regs->ip;
349 root_sim_fault_info[i]->target_address = (
long long)target_address;
350 root_sim_fault_info[i]->target_gid = (PML4(target_address) - restore_pml4) * 512 + PDP(target_address);
352 if((ulong)my_pdp[PDP(target_address)] == NULL) {
353 printk(
"ECS Major Fault at %p\n", target_address);
355 root_sim_fault_info[i]->fault_type = ECS_MAJOR_FAULT;
357 if(get_pte_sticky_bit(target_address) != 0) {
358 printk(
"ECS Minor Fault (1) at %p\n", target_address);
362 root_sim_fault_info[i]->fault_type = ECS_MINOR_FAULT;
363 set_presence_bit(target_address);
365 if(get_presence_bit(target_address) == 0) {
366 printk(
"Materializing page for %p\n", target_address);
367 kernel_handler(regs, error_code);
368 if(get_pde_sticky_bit(target_address)) {
369 printk(
"ECS Minor Fault (2) at %p\n", target_address);
370 set_single_pte_sticky_flag(target_address);
375 root_sim_fault_info[i]->fault_type = ECS_CHANGE_PAGE_PRIVILEGE;
377 info.base_address = (
void *)((
long long)target_address & (~(PAGE_SIZE-1)));
381 set_single_page_privilege(&info);
384 rootsim_load_cr3(pgd_addr[i]);
389 printk(
"Activating userspace handler\n");
391 rs_ktblmgr_ioctl(NULL, IOCTL_UNSCHEDULE_ON_PGD, (
int)i);
394 auxiliary_stack_pointer = (ulong *)regs->sp;
395 auxiliary_stack_pointer--;
396 copy_to_user((
void *)auxiliary_stack_pointer, (
void *)®s->ip, 8);
397 regs->sp = (long)auxiliary_stack_pointer;
404 kernel_handler(regs, error_code);
407 EXPORT_SYMBOL(root_sim_page_fault);
409 int rs_ktblmgr_open(
struct inode *inode,
struct file *filp) {
412 if (((filp->f_flags & O_ACCMODE) == O_WRONLY)
413 || ((filp->f_flags & O_ACCMODE) == O_RDWR)) {
418 if (!mutex_trylock(&rs_ktblmgr_mutex)) {
419 printk(KERN_INFO
"%s: Trying to open an already-opened special device file\n", KBUILD_MODNAME);
427 int rs_ktblmgr_release(
struct inode *inode,
struct file *filp) {
437 involved_pml4 = restore_pml4_entries;
439 for (j=0;j<SIBLING_PGD;j++){
442 original_view[j] = NULL;
447 if(original_view[j]!=NULL){
449 pgd_entry = (
void **)pgd_addr[i];
450 for (i=0; i<involved_pml4; i++){
452 printk(
"\tPML4 ENTRY FOR CLOSE DEVICE IS %d\n",pml4);
454 temp = pgd_entry[pml4];
455 temp = (
void *)((ulong) temp & 0xfffffffffffff000);
456 printk(
"temp is %p\n", temp);
457 address = (
void *)__va(temp);
460 printk(
"would free address at %p\n", address);
462 pgd_entry[pml4] = ancestor_pml4[pml4];
465 original_view[j]=NULL;
469 mutex_unlock(&rs_ktblmgr_mutex);
475 static long rs_ktblmgr_ioctl(
struct file *filp,
unsigned int cmd,
unsigned long arg) {
486 int scheduled_object;
488 int *scheduled_objects;
489 int scheduled_objects_count;
494 case IOCTL_SET_ANCESTOR_PGD:
495 ancestor_pml4 = (
void **)
current->
mm->pgd;
500 printk(
"Entering IOCTL_GET_PGD\n");
503 mutex_lock(&pgd_get_mutex);
504 for (i = 0; i < SIBLING_PGD; i++) {
505 if (original_view[i] == NULL) {
506 memcpy((
void *)pgd_addr[i], (
void *)(
current->
mm->pgd), 4096);
516 mutex_unlock(&pgd_get_mutex);
519 if(descriptor != -1) {
524 involved_pml4 = restore_pml4_entries;
526 pgd_entry = (
void **)pgd_addr[descriptor];
528 for (i = 0; i < involved_pml4; i++) {
530 address = (
void *)__get_free_pages(GFP_KERNEL, 0);
531 memset(address,0,4096);
533 temp = pgd_entry[pml4];
535 temp = (
void *)((ulong) temp & 0x0000000000000fff);
536 address = (
void *)__pa(address);
537 temp = (
void *)((ulong)address | (ulong)temp);
538 pgd_entry[pml4] = temp;
543 printk(
"Leaving IOCTL_GET_PGD\n");
547 case IOCTL_SCHEDULE_ON_PGD:
551 scheduled_objects_count = ((
ioctl_info*)arg)->count;
552 scheduled_objects = ((
ioctl_info*)arg)->objects;
555 if (original_view[descriptor] != NULL) {
557 for(i = 0; i < scheduled_objects_count; i++) {
560 copy_from_user((
void *)&scheduled_object,(
void *)&scheduled_objects[i],
sizeof(
int));
561 open_index[descriptor]++;
562 currently_open[descriptor][open_index[descriptor]]=scheduled_object;
565 pml4 = restore_pml4 + OBJECT_TO_PML4(scheduled_object);
566 my_pgd =(
void **) pgd_addr[descriptor];
567 my_pdp =(
void *) my_pgd[pml4];
568 my_pdp = __va((ulong)my_pdp & 0xfffffffffffff000);
570 ancestor_pdp =(
void *) ancestor_pml4[pml4];
571 ancestor_pdp = __va((ulong)ancestor_pdp & 0xfffffffffffff000);
574 my_pdp[OBJECT_TO_PDP(scheduled_object)] = ancestor_pdp[OBJECT_TO_PDP(scheduled_object)];
578 root_sim_processes[descriptor] =
current->pid;
579 rootsim_load_cr3(pgd_addr[descriptor]);
587 case IOCTL_UNSCHEDULE_ON_PGD:
591 if ((original_view[descriptor] != NULL) && (
current->
mm->pgd != NULL)) {
593 root_sim_processes[descriptor] = -1;
596 for(i=open_index[descriptor];i>=0;i--){
598 object_to_close = currently_open[descriptor][i];
600 pml4 = restore_pml4 + OBJECT_TO_PML4(object_to_close);
601 my_pgd =(
void **)pgd_addr[descriptor];
602 my_pdp =(
void *)my_pgd[pml4];
603 my_pdp = __va((ulong)my_pdp & 0xfffffffffffff000);
607 my_pdp[OBJECT_TO_PDP(object_to_close)] = NULL;
609 open_index[descriptor] = -1;
617 case IOCTL_SET_VM_RANGE:
621 mapped_processes = (((
ioctl_info*)arg)->mapped_processes);
622 involved_pml4 = (((
ioctl_info*)arg)->mapped_processes) >> 9;
623 if ( (
unsigned)((
ioctl_info*)arg)->mapped_processes & 0x00000000000001ff ) involved_pml4++;
628 printk(
"LOGGING CHANGE VIEW INVOLVING %u PROCESSES AND %d PML4 ENTRIES STARTING FROM ENTRY %d (address %p)\n",((
ioctl_info*)arg)->mapped_processes,involved_pml4,pml4, ((
ioctl_info*)arg)->addr);
630 restore_pml4_entries = involved_pml4;
637 case IOCTL_SET_PAGE_PRIVILEGE:
643 case IOCTL_PROTECT_REMOTE_LP:
657 void the_pager(
void) {
661 for(i=0;i<SIBLING_PGD;i++){
662 if ((root_sim_processes[i])==(
current->pid)){
663 rootsim_load_cr3(pgd_addr[i]);
671 static int rs_ktblmgr_init(
void) {
679 rootsim_pager = the_pager;
682 mutex_init(&pgd_get_mutex);
686 major = register_chrdev(0,
"rs_ktblmgr", &
fops);
690 goto failed_chrdevreg;
692 printk(
"major for ktblmgr is %d\n",
major);
696 dev_cl = class_create(THIS_MODULE,
"rootsim");
700 goto failed_classreg;
717 ret = device_create_file(
device, &dev_attr_multimap);
719 printk(KERN_WARNING
"rs_ktblmgr: failed to create write /sys endpoint - continuing without\n");
723 mutex_init(&rs_ktblmgr_mutex);
728 for (i = 0; i < SIBLING_PGD; i++) {
730 original_view[i] = NULL;
732 if ( ! (mm_struct_addr[i] = kmalloc(
sizeof(
struct mm_struct), GFP_KERNEL)))
737 if (!(pgd_addr[i] = (
void *)__get_free_pages(GFP_KERNEL, 0))) {
738 kfree(mm_struct_addr[i]);
741 mm_struct_addr[i]->pgd = pgd_addr[i];
742 if ((
void *)pgd_addr[i] != (
void *)((
struct mm_struct *)mm_struct_addr[i])->pgd) {
743 printk(
"bad referencing between mm_struct and pgd\n");
749 printk(KERN_INFO
"Correctly allocated %d sibling pgds\n", managed_pgds);
765 memset(&kp, 0,
sizeof(kp));
766 kp.symbol_name =
"flush_tlb_all";
767 if (!register_kprobe(&kp)) {
769 unregister_kprobe(&kp);
780 unregister_chrdev(
major,
"rs_ktblmgr");
786 printk(KERN_ERR
"rs_ktblmgr: something wrong while preallocatin pgds\n");
792 static void rs_ktblmgr_cleanup(
void) {
800 rootsim_pager = NULL;
801 unregister_chrdev(
major,
"rs_ktblmgr");
803 for (; managed_pgds > 0; managed_pgds--) {
804 __free_pages((
void *)mm_struct_addr[managed_pgds-1],0);
805 kfree(mm_struct_addr[managed_pgds-1]);
struct file_operations fops
File operations for the module.
struct memory_map * mm
Memory map of the LP.
__thread struct lp_struct * current
This is a per-thread variable pointing to the block state of the LP currently scheduled.
static int major
Device major number.
static DEFINE_MUTEX(rs_ktblmgr_mutex)
Only one process can access this device (before spawning threads!)
static struct device * device
Device being created.
void(* flush_tlb_all_lookup)(void)
This is to access the actual flush_tlb_all using a kernel proble.
static struct class * dev_cl
Device class being created.