4 * @brief x86 ULT support header
6 * This header defines all the facilities to implement User-Level Threads
9 * The core of the implementation is found in jmp.S which is written in
10 * assembly. jmp.S is undocumented here, but looking at its source will
11 * give an explanation of all the functions and how they behave.
14 * Copyright (C) 2008-2019 HPDCS Group
15 * https://hpdcs.github.io
17 * This file is part of ROOT-Sim (ROme OpTimistic Simulator).
19 * ROOT-Sim is free software; you can redistribute it and/or modify it under the
20 * terms of the GNU General Public License as published by the Free Software
21 * Foundation; only version 3 of the License applies.
23 * ROOT-Sim is distributed in the hope that it will be useful, but WITHOUT ANY
24 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
25 * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
27 * You should have received a copy of the GNU General Public License along with
28 * ROOT-Sim; if not, write to the Free Software Foundation, Inc.,
29 * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
31 * @author Alessandro Pellegrini
33 * @date December, 2015
39 #if defined(__x86_64__)
41 #include <arch/asm_defines.h>
45 # _set_jmp: create a snapshot of the current running ULT, for later restore.
47 # Stack organization of this function after the first three push instruction:
53 # |-------------| <-- Saved RSP points here
58 # | Old RDI | <-- This is pushed by wrapping macro
61 # Arguments to this function:
62 # - RDI: pointer to the CPU context where to store the current (before call) context
64 # The following defines keep the offsets described in the picture above
65 # starting from the stack pointer. This is because this function does
66 # not actually creates a valid stack frame, to save some registers
76 .type _set_jmp, @function
78 pushq %rax # save rax, it will point to the context
79 pushq %r11 # save r11, it will be used as the source
81 # Save only status flags
87 movq %rdi, %rax # rax points to the context
88 movq old_rax(%rsp), %r11 # r11 keeps the 'old' rax
89 # Save GP registers at appropriate offsets
90 movq %r11, offsetof_exec_context_t_rax(%rax)
91 movq %rdx, offsetof_exec_context_t_rdx(%rax)
92 movq %rcx, offsetof_exec_context_t_rcx(%rax)
93 movq %rbx, offsetof_exec_context_t_rbx(%rax)
94 movq %rsp, offsetof_exec_context_t_rsp(%rax)
95 # saved rsp must point one quadword above the old return address
96 addq $16, offsetof_exec_context_t_rsp(%rax)
97 movq %rbp, offsetof_exec_context_t_rbp(%rax)
98 movq %rsi, offsetof_exec_context_t_rsi(%rax)
99 # old 'rdi' was pushed by the surrounding macro
100 movq old_rdi(%rsp), %r11
101 movq %r11, offsetof_exec_context_t_rdi(%rax)
102 movq %r8, offsetof_exec_context_t_r8(%rax)
103 movq %r9, offsetof_exec_context_t_r9(%rax)
104 movq %r10, offsetof_exec_context_t_r10(%rax)
105 # make r11 keep the 'old' r11
106 movq old_r11(%rsp), %r11
107 movq %r11, offsetof_exec_context_t_r11(%rax)
108 movq %r12, offsetof_exec_context_t_r12(%rax)
109 movq %r13, offsetof_exec_context_t_r13(%rax)
110 movq %r14, offsetof_exec_context_t_r14(%rax)
111 movq %r15, offsetof_exec_context_t_r15(%rax)
112 # save flags, pushed at the beginning of this function
113 movq old_flags(%rsp), %rdx
114 movq %rdx, offsetof_exec_context_t_flags(%rax)
116 # Save the original return address
117 movq ret_addr(%rsp), %r11
118 movq %r11, offsetof_exec_context_t_rip(%rax)
120 # Now save other registers. fxsave wants memory aligned to 16 byte.
121 # The context structure is aligned to 16 bytes. We have 18 8-byte
122 # registers, so the next address is exactly the 'others' buffer.
123 fxsave offsetof_exec_context_t_fpu(%rax)
126 xorq %rax, %rax # return 0 because the context is being created
129 .size _set_jmp, .-_set_jmp
140 # _long_jmp: restore a previously saved ULT context.
142 # This long_jmp version does not restore RAX. In fact, the common usage is to
143 # call longjmp(context, val); and RAX is set to val. This function respects this,
144 # so that if RAX should be restored as well, it is possible to call
145 # long_jmp(context, context->rax);
147 # Arguments to this function:
148 # - RDI: pointer to the CPU context to restore
149 # - RSI: return value of this function call
151 # The return address at the end of the function does not return control to the
152 # caller, rather to the instruction immediately after the set_jmp call.
154 # The organization of this long_jmp implementation works as well if the stack of
155 # the caller and the stack of the destination of the long jump are different.
160 .type _long_jmp, @function
162 movq %rdi, %rax # rax now points to the context
164 movq offsetof_exec_context_t_rip(%rax), %r10 # This is the old return address
165 movq offsetof_exec_context_t_rsp(%rax), %r11 # r11 is the old rsp
166 movq %r10, 8(%r11) # restore the old return address
168 movq %rsi, (%r11) # Put on the old stack the desired return value
170 # Restore GP registers from the saved context
171 movq offsetof_exec_context_t_rdx(%rax), %rdx
172 movq offsetof_exec_context_t_rcx(%rax), %rcx
173 movq offsetof_exec_context_t_rbx(%rax), %rbx
174 movq offsetof_exec_context_t_rsp(%rax), %rsp
175 movq offsetof_exec_context_t_rbp(%rax), %rbp
176 movq offsetof_exec_context_t_rsi(%rax), %rsi
177 movq offsetof_exec_context_t_r8(%rax), %r8
178 movq offsetof_exec_context_t_r9(%rax), %r9
179 movq offsetof_exec_context_t_r10(%rax), %r10
180 movq offsetof_exec_context_t_r11(%rax), %r11
181 movq offsetof_exec_context_t_r12(%rax), %r12
182 movq offsetof_exec_context_t_r13(%rax), %r13
183 movq offsetof_exec_context_t_r14(%rax), %r14
186 movq offsetof_exec_context_t_flags(%rax), %rax # this is flags
187 addb $0x7f, %al # Overflows if OF was set
190 # Restore remaining rdi and r15
191 movq %rdi, %rax # rax now points again to context
192 movq offsetof_exec_context_t_rdi(%rax), %rdi
193 movq offsetof_exec_context_t_r15(%rax), %r15
195 # Restore other registers
196 fxrstor offsetof_exec_context_t_fpu(%rax)
198 # (possibly) change stack
199 movq offsetof_exec_context_t_rsp(%rax), %rsp
200 # Set the desired return value. This was RSI passed to the function,
201 # which was moved on top of the new stack (now restored) at the
202 # beginning of this function
204 ret # do the long jump
206 .size _long_jmp, .-_long_jmp
212 # _context_create: create a context for a new ULT. The new ULT is assumed
213 # to never return from the first function it starts in (no valid return
214 # address is stored in the initial context).
216 # Parameters to this function are as follows:
217 # - RDI: context to be setup (2nd argument)
218 # - RSI: function to be activated in the new ULT (3rd argument)
219 # - RDX: arguments to be passed to the above function (4th argument)
220 # - RCX: a pointer to the newly allocated stack (5th argument)
221 # - R8: the size of the newly allocated stack (6th argument)
223 # We do not clobber any callee-save register here.
226 .globl context_create
227 .type context_create, @function
230 # Initialize the FPU context to the values kept in this function
231 # (we do not care about the values, but FPU configuration is saved)
232 fxsave offsetof_exec_context_t_fpu(%rdi)
234 # Compute and store the stack pointer
236 # subq $8 would theoretically be enough to have a valid stack
237 # but _set_jump and _long_jump expect a caller to already
238 # have populated it: we need to reserve 8 bytes for the fake
239 # return address and 8 more to keep 16 bytes alignment.
241 movq %rcx, offsetof_exec_context_t_rsp(%rdi)
243 # Set the function to be activated and its parameter
244 movq %rsi, offsetof_exec_context_t_rip(%rdi)
245 movq %rdx, offsetof_exec_context_t_rdi(%rdi)
249 .size context_create, .-context_create
251 #endif /* defined(__x86_64__) */
252 #endif /* OS_LINUX */