6 #include "exec/exec-all.h"
7 #include "exec/cpu_ldst.h"
11 #endif /* DEBUG_REMAP */
13 #include "exec/user/abitypes.h"
15 #include "exec/user/thunk.h"
16 #include "syscall_defs.h"
17 #include "target_syscall.h"
18 #include "exec/gdbstub.h"
20 /* This is the size of the host kernel's sigset_t, needed where we make
21 * direct system calls that take a sigset_t pointer and a size.
23 #define SIGSET_T_SIZE (_NSIG / 8)
25 /* This struct is used to hold certain information about the image.
26 * Basically, it replicates in user space what would be certain
27 * task_struct fields in the kernel
38 abi_ulong reserve_brk
;
40 abi_ulong start_stack
;
41 abi_ulong stack_limit
;
43 abi_ulong code_offset
;
44 abi_ulong data_offset
;
49 abi_ulong arg_strings
;
50 abi_ulong env_strings
;
51 abi_ulong file_string
;
56 /* The fields below are used in FDPIC mode. */
57 abi_ulong loadmap_addr
;
60 abi_ulong pt_dynamic_addr
;
61 abi_ulong interpreter_loadmap_addr
;
62 abi_ulong interpreter_pt_dynamic_addr
;
63 struct image_info
*other_info
;
71 /* Information about the current linux thread */
72 struct vm86_saved_state
{
73 uint32_t eax
; /* return code */
83 uint16_t cs
, ss
, ds
, es
, fs
, gs
;
87 #if defined(TARGET_ARM) && defined(TARGET_ABI32)
89 #include "nwfpe/fpa11.h"
92 #define MAX_SIGQUEUE_SIZE 1024
94 struct emulated_sigtable
{
95 int pending
; /* true if signal is pending */
96 target_siginfo_t info
;
99 /* NOTE: we force a big alignment so that the stack stored after is
101 typedef struct TaskState
{
102 pid_t ts_tid
; /* tid (or pid) of this task */
110 #if defined(TARGET_I386) && !defined(TARGET_X86_64)
111 abi_ulong target_v86
;
112 struct vm86_saved_state vm86_saved_regs
;
113 struct target_vm86plus_struct vm86plus
;
117 abi_ulong child_tidptr
;
121 #if defined(TARGET_ARM) || defined(TARGET_M68K)
122 /* Extra fields for semihosted binaries. */
124 abi_ulong heap_limit
;
126 abi_ulong stack_base
;
127 int used
; /* non zero if used */
128 struct image_info
*info
;
129 struct linux_binprm
*bprm
;
131 struct emulated_sigtable sync_signal
;
132 struct emulated_sigtable sigtab
[TARGET_NSIG
];
133 /* This thread's signal mask, as requested by the guest program.
134 * The actual signal mask of this thread may differ:
135 * + we don't let SIGSEGV and SIGBUS be blocked while running guest code
136 * + sometimes we block all signals to avoid races
138 sigset_t signal_mask
;
139 /* The signal mask imposed by a guest sigsuspend syscall, if we are
140 * currently in the middle of such a syscall
142 sigset_t sigsuspend_mask
;
143 /* Nonzero if we're leaving a sigsuspend and sigsuspend_mask is valid. */
146 /* Nonzero if process_pending_signals() needs to do something (either
147 * handle a pending signal or unblock signals).
148 * This flag is written from a signal handler so should be accessed via
149 * the atomic_read() and atomic_set() functions. (It is not accessed
150 * from multiple threads.)
154 /* This thread's sigaltstack, if it has one */
155 struct target_sigaltstack sigaltstack_used
;
156 } __attribute__((aligned(16))) TaskState
;
158 extern char *exec_path
;
159 void init_task_state(TaskState
*ts
);
160 void task_settid(TaskState
*);
161 void stop_all_tasks(void);
162 extern const char *qemu_uname_release
;
163 extern unsigned long mmap_min_addr
;
165 /* ??? See if we can avoid exposing so much of the loader internals. */
167 /* Read a good amount of data initially, to hopefully get all the
168 program headers loaded. */
169 #define BPRM_BUF_SIZE 1024
172 * This structure is used to hold the arguments that are
173 * used when loading binaries.
175 struct linux_binprm
{
176 char buf
[BPRM_BUF_SIZE
] __attribute__((aligned
));
183 char * filename
; /* Name of binary */
184 int (*core_dump
)(int, const CPUArchState
*); /* coredump routine */
187 void do_init_thread(struct target_pt_regs
*regs
, struct image_info
*infop
);
188 abi_ulong
loader_build_argptr(int envc
, int argc
, abi_ulong sp
,
189 abi_ulong stringp
, int push_ptr
);
190 int loader_exec(int fdexec
, const char *filename
, char **argv
, char **envp
,
191 struct target_pt_regs
* regs
, struct image_info
*infop
,
192 struct linux_binprm
*);
194 /* Returns true if the image uses the FDPIC ABI. If this is the case,
195 * we have to provide some information (loadmap, pt_dynamic_info) such
196 * that the program can be relocated adequately. This is also useful
197 * when handling signals.
199 int info_is_fdpic(struct image_info
*info
);
201 uint32_t get_elf_eflags(int fd
);
202 int load_elf_binary(struct linux_binprm
*bprm
, struct image_info
*info
);
203 int load_flt_binary(struct linux_binprm
*bprm
, struct image_info
*info
);
205 abi_long
memcpy_to_target(abi_ulong dest
, const void *src
,
207 void target_set_brk(abi_ulong new_brk
);
208 abi_long
do_brk(abi_ulong new_brk
);
209 void syscall_init(void);
210 abi_long
do_syscall(void *cpu_env
, int num
, abi_long arg1
,
211 abi_long arg2
, abi_long arg3
, abi_long arg4
,
212 abi_long arg5
, abi_long arg6
, abi_long arg7
,
214 void gemu_log(const char *fmt
, ...) GCC_FMT_ATTR(1, 2);
215 extern __thread CPUState
*thread_cpu
;
216 void cpu_loop(CPUArchState
*env
);
217 const char *target_strerror(int err
);
218 int get_osversion(void);
219 void init_qemu_uname_release(void);
220 void fork_start(void);
221 void fork_end(int child
);
223 /* Creates the initial guest address space in the host memory space using
224 * the given host start address hint and size. The guest_start parameter
225 * specifies the start address of the guest space. guest_base will be the
226 * difference between the host start address computed by this function and
227 * guest_start. If fixed is specified, then the mapped address space must
228 * start at host_start. The real start address of the mapped memory space is
229 * returned or -1 if there was an error.
231 unsigned long init_guest_space(unsigned long host_start
,
232 unsigned long host_size
,
233 unsigned long guest_start
,
236 #include "qemu/log.h"
242 * @int number: number of system call to make
243 * ...: arguments to the system call
245 * Call a system call if guest signal not pending.
246 * This has the same API as the libc syscall() function, except that it
247 * may return -1 with errno == TARGET_ERESTARTSYS if a signal was pending.
249 * Returns: the system call result, or -1 with an error code in errno
250 * (Errnos are host errnos; we rely on TARGET_ERESTARTSYS not clashing
251 * with any of the host errno values.)
254 /* A guide to using safe_syscall() to handle interactions between guest
255 * syscalls and guest signals:
257 * Guest syscalls come in two flavours:
259 * (1) Non-interruptible syscalls
261 * These are guest syscalls that never get interrupted by signals and
262 * so never return EINTR. They can be implemented straightforwardly in
263 * QEMU: just make sure that if the implementation code has to make any
264 * blocking calls that those calls are retried if they return EINTR.
265 * It's also OK to implement these with safe_syscall, though it will be
266 * a little less efficient if a signal is delivered at the 'wrong' moment.
268 * Some non-interruptible syscalls need to be handled using block_signals()
269 * to block signals for the duration of the syscall. This mainly applies
270 * to code which needs to modify the data structures used by the
271 * host_signal_handler() function and the functions it calls, including
272 * all syscalls which change the thread's signal mask.
274 * (2) Interruptible syscalls
276 * These are guest syscalls that can be interrupted by signals and
277 * for which we need to either return EINTR or arrange for the guest
278 * syscall to be restarted. This category includes both syscalls which
279 * always restart (and in the kernel return -ERESTARTNOINTR), ones
280 * which only restart if there is no handler (kernel returns -ERESTARTNOHAND
281 * or -ERESTART_RESTARTBLOCK), and the most common kind which restart
282 * if the handler was registered with SA_RESTART (kernel returns
283 * -ERESTARTSYS). System calls which are only interruptible in some
284 * situations (like 'open') also need to be handled this way.
286 * Here it is important that the host syscall is made
287 * via this safe_syscall() function, and *not* via the host libc.
288 * If the host libc is used then the implementation will appear to work
289 * most of the time, but there will be a race condition where a
290 * signal could arrive just before we make the host syscall inside libc,
291 * and then then guest syscall will not correctly be interrupted.
292 * Instead the implementation of the guest syscall can use the safe_syscall
293 * function but otherwise just return the result or errno in the usual
294 * way; the main loop code will take care of restarting the syscall
297 * (If the implementation needs to make multiple host syscalls this is
298 * OK; any which might really block must be via safe_syscall(); for those
299 * which are only technically blocking (ie which we know in practice won't
300 * stay in the host kernel indefinitely) it's OK to use libc if necessary.
301 * You must be able to cope with backing out correctly if some safe_syscall
302 * you make in the implementation returns either -TARGET_ERESTARTSYS or
305 * block_signals() cannot be used for interruptible syscalls.
308 * How and why the safe_syscall implementation works:
310 * The basic setup is that we make the host syscall via a known
311 * section of host native assembly. If a signal occurs, our signal
312 * handler checks the interrupted host PC against the addresse of that
313 * known section. If the PC is before or at the address of the syscall
314 * instruction then we change the PC to point at a "return
315 * -TARGET_ERESTARTSYS" code path instead, and then exit the signal handler
316 * (causing the safe_syscall() call to immediately return that value).
317 * Then in the main.c loop if we see this magic return value we adjust
318 * the guest PC to wind it back to before the system call, and invoke
319 * the guest signal handler as usual.
321 * This winding-back will happen in two cases:
322 * (1) signal came in just before we took the host syscall (a race);
323 * in this case we'll take the guest signal and have another go
324 * at the syscall afterwards, and this is indistinguishable for the
325 * guest from the timing having been different such that the guest
326 * signal really did win the race
327 * (2) signal came in while the host syscall was blocking, and the
328 * host kernel decided the syscall should be restarted;
329 * in this case we want to restart the guest syscall also, and so
330 * rewinding is the right thing. (Note that "restart" semantics mean
331 * "first call the signal handler, then reattempt the syscall".)
332 * The other situation to consider is when a signal came in while the
333 * host syscall was blocking, and the host kernel decided that the syscall
334 * should not be restarted; in this case QEMU's host signal handler will
335 * be invoked with the PC pointing just after the syscall instruction,
336 * with registers indicating an EINTR return; the special code in the
337 * handler will not kick in, and we will return EINTR to the guest as
340 * Notice that we can leave the host kernel to make the decision for
341 * us about whether to do a restart of the syscall or not; we do not
342 * need to check SA_RESTART flags in QEMU or distinguish the various
343 * kinds of restartability.
345 #ifdef HAVE_SAFE_SYSCALL
346 /* The core part of this function is implemented in assembly */
347 extern long safe_syscall_base(int *pending
, long number
, ...);
349 #define safe_syscall(...) \
352 int *psp_ = &((TaskState *)thread_cpu->opaque)->signal_pending; \
353 ret_ = safe_syscall_base(psp_, __VA_ARGS__); \
354 if (is_error(ret_)) { \
363 /* Fallback for architectures which don't yet provide a safe-syscall assembly
364 * fragment; note that this is racy!
365 * This should go away when all host architectures have been updated.
367 #define safe_syscall syscall
372 int host_to_target_waitstatus(int status
);
375 void print_syscall(int num
,
376 abi_long arg1
, abi_long arg2
, abi_long arg3
,
377 abi_long arg4
, abi_long arg5
, abi_long arg6
);
378 void print_syscall_ret(int num
, abi_long arg1
);
380 * print_taken_signal:
381 * @target_signum: target signal being taken
382 * @tinfo: target_siginfo_t which will be passed to the guest for the signal
384 * Print strace output indicating that this signal is being taken by the guest,
385 * in a format similar to:
386 * --- SIGSEGV {si_signo=SIGSEGV, si_code=SI_KERNEL, si_addr=0} ---
388 void print_taken_signal(int target_signum
, const target_siginfo_t
*tinfo
);
389 extern int do_strace
;
392 void process_pending_signals(CPUArchState
*cpu_env
);
393 void signal_init(void);
394 int queue_signal(CPUArchState
*env
, int sig
, int si_type
,
395 target_siginfo_t
*info
);
396 void host_to_target_siginfo(target_siginfo_t
*tinfo
, const siginfo_t
*info
);
397 void target_to_host_siginfo(siginfo_t
*info
, const target_siginfo_t
*tinfo
);
398 int target_to_host_signal(int sig
);
399 int host_to_target_signal(int sig
);
400 long do_sigreturn(CPUArchState
*env
);
401 long do_rt_sigreturn(CPUArchState
*env
);
402 abi_long
do_sigaltstack(abi_ulong uss_addr
, abi_ulong uoss_addr
, abi_ulong sp
);
403 int do_sigprocmask(int how
, const sigset_t
*set
, sigset_t
*oldset
);
404 abi_long
do_swapcontext(CPUArchState
*env
, abi_ulong uold_ctx
,
405 abi_ulong unew_ctx
, abi_long ctx_size
);
407 * block_signals: block all signals while handling this guest syscall
409 * Block all signals, and arrange that the signal mask is returned to
410 * its correct value for the guest before we resume execution of guest code.
411 * If this function returns non-zero, then the caller should immediately
412 * return -TARGET_ERESTARTSYS to the main loop, which will take the pending
413 * signal and restart execution of the syscall.
414 * If block_signals() returns zero, then the caller can continue with
415 * emulation of the system call knowing that no signals can be taken
416 * (and therefore that no race conditions will result).
417 * This should only be called once, because if it is called a second time
418 * it will always return non-zero. (Think of it like a mutex that can't
419 * be recursively locked.)
420 * Signals will be unblocked again by process_pending_signals().
422 * Return value: non-zero if there was a pending signal, zero if not.
424 int block_signals(void); /* Returns non zero if signal pending */
428 void save_v86_state(CPUX86State
*env
);
429 void handle_vm86_trap(CPUX86State
*env
, int trapno
);
430 void handle_vm86_fault(CPUX86State
*env
);
431 int do_vm86(CPUX86State
*env
, long subfunction
, abi_ulong v86_addr
);
432 #elif defined(TARGET_SPARC64)
433 void sparc64_set_context(CPUSPARCState
*env
);
434 void sparc64_get_context(CPUSPARCState
*env
);
438 int target_mprotect(abi_ulong start
, abi_ulong len
, int prot
);
439 abi_long
target_mmap(abi_ulong start
, abi_ulong len
, int prot
,
440 int flags
, int fd
, abi_ulong offset
);
441 int target_munmap(abi_ulong start
, abi_ulong len
);
442 abi_long
target_mremap(abi_ulong old_addr
, abi_ulong old_size
,
443 abi_ulong new_size
, unsigned long flags
,
445 extern unsigned long last_brk
;
446 extern abi_ulong mmap_next_start
;
447 abi_ulong
mmap_find_vma(abi_ulong
, abi_ulong
, abi_ulong
);
448 void mmap_fork_start(void);
449 void mmap_fork_end(int child
);
452 extern unsigned long guest_stack_size
;
456 #define VERIFY_READ 0
457 #define VERIFY_WRITE 1 /* implies read access */
459 static inline int access_ok(int type
, abi_ulong addr
, abi_ulong size
)
461 return guest_addr_valid(addr
) &&
462 (size
== 0 || guest_addr_valid(addr
+ size
- 1)) &&
463 page_check_range((target_ulong
)addr
, size
,
464 (type
== VERIFY_READ
) ? PAGE_READ
: (PAGE_READ
| PAGE_WRITE
)) == 0;
467 /* NOTE __get_user and __put_user use host pointers and don't check access.
468 These are usually used to access struct data members once the struct has
469 been locked - usually with lock_user_struct. */
473 * - Use __builtin_choose_expr to avoid type promotion from ?:,
474 * - Invalid sizes result in a compile time error stemming from
475 * the fact that abort has no parameters.
476 * - It's easier to use the endian-specific unaligned load/store
477 * functions than host-endian unaligned load/store plus tswapN.
478 * - The pragmas are necessary only to silence a clang false-positive
479 * warning: see https://bugs.llvm.org/show_bug.cgi?id=39113 .
480 * - gcc has bugs in its _Pragma() support in some versions, eg
481 * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=83256 -- so we only
482 * include the warning-suppression pragmas for clang
484 #if defined(__clang__) && __has_warning("-Waddress-of-packed-member")
485 #define PRAGMA_DISABLE_PACKED_WARNING \
486 _Pragma("GCC diagnostic push"); \
487 _Pragma("GCC diagnostic ignored \"-Waddress-of-packed-member\"")
489 #define PRAGMA_REENABLE_PACKED_WARNING \
490 _Pragma("GCC diagnostic pop")
493 #define PRAGMA_DISABLE_PACKED_WARNING
494 #define PRAGMA_REENABLE_PACKED_WARNING
497 #define __put_user_e(x, hptr, e) \
499 PRAGMA_DISABLE_PACKED_WARNING; \
500 (__builtin_choose_expr(sizeof(*(hptr)) == 1, stb_p, \
501 __builtin_choose_expr(sizeof(*(hptr)) == 2, stw_##e##_p, \
502 __builtin_choose_expr(sizeof(*(hptr)) == 4, stl_##e##_p, \
503 __builtin_choose_expr(sizeof(*(hptr)) == 8, stq_##e##_p, abort)))) \
504 ((hptr), (x)), (void)0); \
505 PRAGMA_REENABLE_PACKED_WARNING; \
508 #define __get_user_e(x, hptr, e) \
510 PRAGMA_DISABLE_PACKED_WARNING; \
511 ((x) = (typeof(*hptr))( \
512 __builtin_choose_expr(sizeof(*(hptr)) == 1, ldub_p, \
513 __builtin_choose_expr(sizeof(*(hptr)) == 2, lduw_##e##_p, \
514 __builtin_choose_expr(sizeof(*(hptr)) == 4, ldl_##e##_p, \
515 __builtin_choose_expr(sizeof(*(hptr)) == 8, ldq_##e##_p, abort)))) \
517 PRAGMA_REENABLE_PACKED_WARNING; \
521 #ifdef TARGET_WORDS_BIGENDIAN
522 # define __put_user(x, hptr) __put_user_e(x, hptr, be)
523 # define __get_user(x, hptr) __get_user_e(x, hptr, be)
525 # define __put_user(x, hptr) __put_user_e(x, hptr, le)
526 # define __get_user(x, hptr) __get_user_e(x, hptr, le)
529 /* put_user()/get_user() take a guest address and check access */
530 /* These are usually used to access an atomic data type, such as an int,
531 * that has been passed by address. These internally perform locking
532 * and unlocking on the data type.
534 #define put_user(x, gaddr, target_type) \
536 abi_ulong __gaddr = (gaddr); \
537 target_type *__hptr; \
538 abi_long __ret = 0; \
539 if ((__hptr = lock_user(VERIFY_WRITE, __gaddr, sizeof(target_type), 0))) { \
540 __put_user((x), __hptr); \
541 unlock_user(__hptr, __gaddr, sizeof(target_type)); \
543 __ret = -TARGET_EFAULT; \
547 #define get_user(x, gaddr, target_type) \
549 abi_ulong __gaddr = (gaddr); \
550 target_type *__hptr; \
551 abi_long __ret = 0; \
552 if ((__hptr = lock_user(VERIFY_READ, __gaddr, sizeof(target_type), 1))) { \
553 __get_user((x), __hptr); \
554 unlock_user(__hptr, __gaddr, 0); \
556 /* avoid warning */ \
558 __ret = -TARGET_EFAULT; \
563 #define put_user_ual(x, gaddr) put_user((x), (gaddr), abi_ulong)
564 #define put_user_sal(x, gaddr) put_user((x), (gaddr), abi_long)
565 #define put_user_u64(x, gaddr) put_user((x), (gaddr), uint64_t)
566 #define put_user_s64(x, gaddr) put_user((x), (gaddr), int64_t)
567 #define put_user_u32(x, gaddr) put_user((x), (gaddr), uint32_t)
568 #define put_user_s32(x, gaddr) put_user((x), (gaddr), int32_t)
569 #define put_user_u16(x, gaddr) put_user((x), (gaddr), uint16_t)
570 #define put_user_s16(x, gaddr) put_user((x), (gaddr), int16_t)
571 #define put_user_u8(x, gaddr) put_user((x), (gaddr), uint8_t)
572 #define put_user_s8(x, gaddr) put_user((x), (gaddr), int8_t)
574 #define get_user_ual(x, gaddr) get_user((x), (gaddr), abi_ulong)
575 #define get_user_sal(x, gaddr) get_user((x), (gaddr), abi_long)
576 #define get_user_u64(x, gaddr) get_user((x), (gaddr), uint64_t)
577 #define get_user_s64(x, gaddr) get_user((x), (gaddr), int64_t)
578 #define get_user_u32(x, gaddr) get_user((x), (gaddr), uint32_t)
579 #define get_user_s32(x, gaddr) get_user((x), (gaddr), int32_t)
580 #define get_user_u16(x, gaddr) get_user((x), (gaddr), uint16_t)
581 #define get_user_s16(x, gaddr) get_user((x), (gaddr), int16_t)
582 #define get_user_u8(x, gaddr) get_user((x), (gaddr), uint8_t)
583 #define get_user_s8(x, gaddr) get_user((x), (gaddr), int8_t)
585 /* copy_from_user() and copy_to_user() are usually used to copy data
586 * buffers between the target and host. These internally perform
587 * locking/unlocking of the memory.
589 abi_long
copy_from_user(void *hptr
, abi_ulong gaddr
, size_t len
);
590 abi_long
copy_to_user(abi_ulong gaddr
, void *hptr
, size_t len
);
592 /* Functions for accessing guest memory. The tget and tput functions
593 read/write single values, byteswapping as necessary. The lock_user function
594 gets a pointer to a contiguous area of guest memory, but does not perform
595 any byteswapping. lock_user may return either a pointer to the guest
596 memory, or a temporary buffer. */
598 /* Lock an area of guest memory into the host. If copy is true then the
599 host area will have the same contents as the guest. */
600 static inline void *lock_user(int type
, abi_ulong guest_addr
, long len
, int copy
)
602 if (!access_ok(type
, guest_addr
, len
))
607 addr
= g_malloc(len
);
609 memcpy(addr
, g2h(guest_addr
), len
);
611 memset(addr
, 0, len
);
615 return g2h(guest_addr
);
619 /* Unlock an area of guest memory. The first LEN bytes must be
620 flushed back to guest memory. host_ptr = NULL is explicitly
621 allowed and does nothing. */
622 static inline void unlock_user(void *host_ptr
, abi_ulong guest_addr
,
629 if (host_ptr
== g2h(guest_addr
))
632 memcpy(g2h(guest_addr
), host_ptr
, len
);
637 /* Return the length of a string in target memory or -TARGET_EFAULT if
639 abi_long
target_strlen(abi_ulong gaddr
);
641 /* Like lock_user but for null terminated strings. */
642 static inline void *lock_user_string(abi_ulong guest_addr
)
645 len
= target_strlen(guest_addr
);
648 return lock_user(VERIFY_READ
, guest_addr
, (long)(len
+ 1), 1);
651 /* Helper macros for locking/unlocking a target struct. */
652 #define lock_user_struct(type, host_ptr, guest_addr, copy) \
653 (host_ptr = lock_user(type, guest_addr, sizeof(*host_ptr), copy))
654 #define unlock_user_struct(host_ptr, guest_addr, copy) \
655 unlock_user(host_ptr, guest_addr, (copy) ? sizeof(*host_ptr) : 0)
659 static inline int is_error(abi_long ret
)
661 return (abi_ulong
)ret
>= (abi_ulong
)(-4096);
665 * preexit_cleanup: housekeeping before the guest exits
668 * code: the exit code
670 void preexit_cleanup(CPUArchState
*env
, int code
);
672 /* Include target-specific struct and function definitions;
673 * they may need access to the target-independent structures
674 * above, so include them last.
676 #include "target_cpu.h"
677 #include "target_structs.h"