The ROme OpTimistic Simulator  2.0.0
A General-Purpose Multithreaded Parallel/Distributed Simulation Platform
jmp.S
1 /**
2 * @file arch/x86/jmp.S
3 *
4 * @brief x86 ULT support header
5 *
6 * This header defines all the facilities to implement User-Level Threads
7 * on x86.
8 *
9 * The core of the implementation is found in jmp.S which is written in
10 * assembly. jmp.S is undocumented here, but looking at its source will
11 * give an explanation of all the functions and how they behave.
12 *
13 * @copyright
14 * Copyright (C) 2008-2019 HPDCS Group
15 * https://hpdcs.github.io
16 *
17 * This file is part of ROOT-Sim (ROme OpTimistic Simulator).
18 *
19 * ROOT-Sim is free software; you can redistribute it and/or modify it under the
20 * terms of the GNU General Public License as published by the Free Software
21 * Foundation; only version 3 of the License applies.
22 *
23 * ROOT-Sim is distributed in the hope that it will be useful, but WITHOUT ANY
24 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
25 * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
26 *
27 * You should have received a copy of the GNU General Public License along with
28 * ROOT-Sim; if not, write to the Free Software Foundation, Inc.,
29 * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
30 *
31 * @author Alessandro Pellegrini
32 *
33 * @date December, 2015
34 */
35 
36 .file "jmp.S"
37 
38 #ifdef OS_LINUX
39 #if defined(__x86_64__)
40 
41 #include <arch/asm_defines.h>
42 
43 .text
44 
45 # _set_jmp: create a snapshot of the current running ULT, for later restore.
46 #
47 # Stack organization of this function after the first three push instruction:
48 #
49 # _______________
50 # | FLAGS |
51 # |-------------|
52 # | R11 |
53 # |-------------| <-- Saved RSP points here
54 # | RAX |
55 # |-------------|
56 # | Ret. Addr. |
57 # |-------------|
58 # | Old RDI | <-- This is pushed by wrapping macro
59 # |-------------|
60 #
61 # Arguments to this function:
62 # - RDI: pointer to the CPU context where to store the current (before call) context
63 #
64 # The following defines keep the offsets described in the picture above
65 # starting from the stack pointer. This is because this function does
66 # not actually creates a valid stack frame, to save some registers
67 
68 #define old_flags 0
69 #define old_r11 8
70 #define old_rax 16
71 #define ret_addr 24
72 #define old_rdi 32
73 
74 .align 8
75 .globl _set_jmp
76 .type _set_jmp, @function
77 _set_jmp:
78  pushq %rax # save rax, it will point to the context
79  pushq %r11 # save r11, it will be used as the source
80 
81  # Save only status flags
82  lahf
83  seto %al
84  pushq %rax
85 
86  # Save the context
87  movq %rdi, %rax # rax points to the context
88  movq old_rax(%rsp), %r11 # r11 keeps the 'old' rax
89  # Save GP registers at appropriate offsets
90  movq %r11, offsetof_exec_context_t_rax(%rax)
91  movq %rdx, offsetof_exec_context_t_rdx(%rax)
92  movq %rcx, offsetof_exec_context_t_rcx(%rax)
93  movq %rbx, offsetof_exec_context_t_rbx(%rax)
94  movq %rsp, offsetof_exec_context_t_rsp(%rax)
95  # saved rsp must point one quadword above the old return address
96  addq $16, offsetof_exec_context_t_rsp(%rax)
97  movq %rbp, offsetof_exec_context_t_rbp(%rax)
98  movq %rsi, offsetof_exec_context_t_rsi(%rax)
99  # old 'rdi' was pushed by the surrounding macro
100  movq old_rdi(%rsp), %r11
101  movq %r11, offsetof_exec_context_t_rdi(%rax)
102  movq %r8, offsetof_exec_context_t_r8(%rax)
103  movq %r9, offsetof_exec_context_t_r9(%rax)
104  movq %r10, offsetof_exec_context_t_r10(%rax)
105  # make r11 keep the 'old' r11
106  movq old_r11(%rsp), %r11
107  movq %r11, offsetof_exec_context_t_r11(%rax)
108  movq %r12, offsetof_exec_context_t_r12(%rax)
109  movq %r13, offsetof_exec_context_t_r13(%rax)
110  movq %r14, offsetof_exec_context_t_r14(%rax)
111  movq %r15, offsetof_exec_context_t_r15(%rax)
112  # save flags, pushed at the beginning of this function
113  movq old_flags(%rsp), %rdx
114  movq %rdx, offsetof_exec_context_t_flags(%rax)
115 
116  # Save the original return address
117  movq ret_addr(%rsp), %r11
118  movq %r11, offsetof_exec_context_t_rip(%rax)
119 
120  # Now save other registers. fxsave wants memory aligned to 16 byte.
121  # The context structure is aligned to 16 bytes. We have 18 8-byte
122  # registers, so the next address is exactly the 'others' buffer.
123  fxsave offsetof_exec_context_t_fpu(%rax)
124 
125  addq $24, %rsp
126  xorq %rax, %rax # return 0 because the context is being created
127  ret
128 
129 .size _set_jmp, .-_set_jmp
130 
131 
132 #undef old_flags
133 #undef old_r11
134 #undef old_rax
135 #undef ret_addr
136 #undef old_rdi
137 
138 
139 
140 # _long_jmp: restore a previously saved ULT context.
141 #
142 # This long_jmp version does not restore RAX. In fact, the common usage is to
143 # call longjmp(context, val); and RAX is set to val. This function respects this,
144 # so that if RAX should be restored as well, it is possible to call
145 # long_jmp(context, context->rax);
146 #
147 # Arguments to this function:
148 # - RDI: pointer to the CPU context to restore
149 # - RSI: return value of this function call
150 #
151 # The return address at the end of the function does not return control to the
152 # caller, rather to the instruction immediately after the set_jmp call.
153 #
154 # The organization of this long_jmp implementation works as well if the stack of
155 # the caller and the stack of the destination of the long jump are different.
156 
157 
158 .align 8
159 .globl _long_jmp
160 .type _long_jmp, @function
161 _long_jmp:
162  movq %rdi, %rax # rax now points to the context
163 
164  movq offsetof_exec_context_t_rip(%rax), %r10 # This is the old return address
165  movq offsetof_exec_context_t_rsp(%rax), %r11 # r11 is the old rsp
166  movq %r10, 8(%r11) # restore the old return address
167 
168  movq %rsi, (%r11) # Put on the old stack the desired return value
169 
170  # Restore GP registers from the saved context
171  movq offsetof_exec_context_t_rdx(%rax), %rdx
172  movq offsetof_exec_context_t_rcx(%rax), %rcx
173  movq offsetof_exec_context_t_rbx(%rax), %rbx
174  movq offsetof_exec_context_t_rsp(%rax), %rsp
175  movq offsetof_exec_context_t_rbp(%rax), %rbp
176  movq offsetof_exec_context_t_rsi(%rax), %rsi
177  movq offsetof_exec_context_t_r8(%rax), %r8
178  movq offsetof_exec_context_t_r9(%rax), %r9
179  movq offsetof_exec_context_t_r10(%rax), %r10
180  movq offsetof_exec_context_t_r11(%rax), %r11
181  movq offsetof_exec_context_t_r12(%rax), %r12
182  movq offsetof_exec_context_t_r13(%rax), %r13
183  movq offsetof_exec_context_t_r14(%rax), %r14
184 
185  # Restore FLAGS
186  movq offsetof_exec_context_t_flags(%rax), %rax # this is flags
187  addb $0x7f, %al # Overflows if OF was set
188  sahf
189 
190  # Restore remaining rdi and r15
191  movq %rdi, %rax # rax now points again to context
192  movq offsetof_exec_context_t_rdi(%rax), %rdi
193  movq offsetof_exec_context_t_r15(%rax), %r15
194 
195  # Restore other registers
196  fxrstor offsetof_exec_context_t_fpu(%rax)
197 
198  # (possibly) change stack
199  movq offsetof_exec_context_t_rsp(%rax), %rsp
200  # Set the desired return value. This was RSI passed to the function,
201  # which was moved on top of the new stack (now restored) at the
202  # beginning of this function
203  popq %rax
204  ret # do the long jump
205 
206 .size _long_jmp, .-_long_jmp
207 
208 
209 
210 
211 
212 # _context_create: create a context for a new ULT. The new ULT is assumed
213 # to never return from the first function it starts in (no valid return
214 # address is stored in the initial context).
215 #
216 # Parameters to this function are as follows:
217 # - RDI: context to be setup (2nd argument)
218 # - RSI: function to be activated in the new ULT (3rd argument)
219 # - RDX: arguments to be passed to the above function (4th argument)
220 # - RCX: a pointer to the newly allocated stack (5th argument)
221 # - R8: the size of the newly allocated stack (6th argument)
222 #
223 # We do not clobber any callee-save register here.
224 
225 .align 8
226 .globl context_create
227 .type context_create, @function
228 context_create:
229 
230  # Initialize the FPU context to the values kept in this function
231  # (we do not care about the values, but FPU configuration is saved)
232  fxsave offsetof_exec_context_t_fpu(%rdi)
233 
234  # Compute and store the stack pointer
235  addq %r8, %rcx
236  # subq $8 would theoretically be enough to have a valid stack
237  # but _set_jump and _long_jump expect a caller to already
238  # have populated it: we need to reserve 8 bytes for the fake
239  # return address and 8 more to keep 16 bytes alignment.
240  subq $24, %rcx
241  movq %rcx, offsetof_exec_context_t_rsp(%rdi)
242 
243  # Set the function to be activated and its parameter
244  movq %rsi, offsetof_exec_context_t_rip(%rdi)
245  movq %rdx, offsetof_exec_context_t_rdi(%rdi)
246 
247  ret
248 
249 .size context_create, .-context_create
250 
251 #endif /* defined(__x86_64__) */
252 #endif /* OS_LINUX */
253