block: fix deadlock in bdrv_co_flush
[qemu/kevin.git] / linux-user / qemu.h
blobbef465de4d9337dead893a97bf65d3769895b37d
1 #ifndef QEMU_H
2 #define QEMU_H
4 #include "hostdep.h"
5 #include "cpu.h"
6 #include "exec/exec-all.h"
7 #include "exec/cpu_ldst.h"
9 #undef DEBUG_REMAP
10 #ifdef DEBUG_REMAP
11 #endif /* DEBUG_REMAP */
13 #include "exec/user/abitypes.h"
15 #include "exec/user/thunk.h"
16 #include "syscall_defs.h"
17 #include "target_syscall.h"
18 #include "exec/gdbstub.h"
19 #include "qemu/queue.h"
21 #define THREAD __thread
23 /* This is the size of the host kernel's sigset_t, needed where we make
24 * direct system calls that take a sigset_t pointer and a size.
26 #define SIGSET_T_SIZE (_NSIG / 8)
28 /* This struct is used to hold certain information about the image.
29 * Basically, it replicates in user space what would be certain
30 * task_struct fields in the kernel
32 struct image_info {
33 abi_ulong load_bias;
34 abi_ulong load_addr;
35 abi_ulong start_code;
36 abi_ulong end_code;
37 abi_ulong start_data;
38 abi_ulong end_data;
39 abi_ulong start_brk;
40 abi_ulong brk;
41 abi_ulong start_mmap;
42 abi_ulong start_stack;
43 abi_ulong stack_limit;
44 abi_ulong entry;
45 abi_ulong code_offset;
46 abi_ulong data_offset;
47 abi_ulong saved_auxv;
48 abi_ulong auxv_len;
49 abi_ulong arg_start;
50 abi_ulong arg_end;
51 uint32_t elf_flags;
52 int personality;
53 #ifdef CONFIG_USE_FDPIC
54 abi_ulong loadmap_addr;
55 uint16_t nsegs;
56 void *loadsegs;
57 abi_ulong pt_dynamic_addr;
58 struct image_info *other_info;
59 #endif
62 #ifdef TARGET_I386
63 /* Information about the current linux thread */
64 struct vm86_saved_state {
65 uint32_t eax; /* return code */
66 uint32_t ebx;
67 uint32_t ecx;
68 uint32_t edx;
69 uint32_t esi;
70 uint32_t edi;
71 uint32_t ebp;
72 uint32_t esp;
73 uint32_t eflags;
74 uint32_t eip;
75 uint16_t cs, ss, ds, es, fs, gs;
77 #endif
79 #if defined(TARGET_ARM) && defined(TARGET_ABI32)
80 /* FPU emulator */
81 #include "nwfpe/fpa11.h"
82 #endif
84 #define MAX_SIGQUEUE_SIZE 1024
86 struct emulated_sigtable {
87 int pending; /* true if signal is pending */
88 target_siginfo_t info;
91 /* NOTE: we force a big alignment so that the stack stored after is
92 aligned too */
93 typedef struct TaskState {
94 pid_t ts_tid; /* tid (or pid) of this task */
95 #ifdef TARGET_ARM
96 # ifdef TARGET_ABI32
97 /* FPA state */
98 FPA11 fpa;
99 # endif
100 int swi_errno;
101 #endif
102 #ifdef TARGET_UNICORE32
103 int swi_errno;
104 #endif
105 #if defined(TARGET_I386) && !defined(TARGET_X86_64)
106 abi_ulong target_v86;
107 struct vm86_saved_state vm86_saved_regs;
108 struct target_vm86plus_struct vm86plus;
109 uint32_t v86flags;
110 uint32_t v86mask;
111 #endif
112 abi_ulong child_tidptr;
113 #ifdef TARGET_M68K
114 int sim_syscalls;
115 abi_ulong tp_value;
116 #endif
117 #if defined(TARGET_ARM) || defined(TARGET_M68K) || defined(TARGET_UNICORE32)
118 /* Extra fields for semihosted binaries. */
119 abi_ulong heap_base;
120 abi_ulong heap_limit;
121 #endif
122 abi_ulong stack_base;
123 int used; /* non zero if used */
124 struct image_info *info;
125 struct linux_binprm *bprm;
127 struct emulated_sigtable sync_signal;
128 struct emulated_sigtable sigtab[TARGET_NSIG];
129 /* This thread's signal mask, as requested by the guest program.
130 * The actual signal mask of this thread may differ:
131 * + we don't let SIGSEGV and SIGBUS be blocked while running guest code
132 * + sometimes we block all signals to avoid races
134 sigset_t signal_mask;
135 /* The signal mask imposed by a guest sigsuspend syscall, if we are
136 * currently in the middle of such a syscall
138 sigset_t sigsuspend_mask;
139 /* Nonzero if we're leaving a sigsuspend and sigsuspend_mask is valid. */
140 int in_sigsuspend;
142 /* Nonzero if process_pending_signals() needs to do something (either
143 * handle a pending signal or unblock signals).
144 * This flag is written from a signal handler so should be accessed via
145 * the atomic_read() and atomic_write() functions. (It is not accessed
146 * from multiple threads.)
148 int signal_pending;
150 } __attribute__((aligned(16))) TaskState;
152 extern char *exec_path;
153 void init_task_state(TaskState *ts);
154 void task_settid(TaskState *);
155 void stop_all_tasks(void);
156 extern const char *qemu_uname_release;
157 extern unsigned long mmap_min_addr;
159 /* ??? See if we can avoid exposing so much of the loader internals. */
161 /* Read a good amount of data initially, to hopefully get all the
162 program headers loaded. */
163 #define BPRM_BUF_SIZE 1024
166 * This structure is used to hold the arguments that are
167 * used when loading binaries.
169 struct linux_binprm {
170 char buf[BPRM_BUF_SIZE] __attribute__((aligned));
171 abi_ulong p;
172 int fd;
173 int e_uid, e_gid;
174 int argc, envc;
175 char **argv;
176 char **envp;
177 char * filename; /* Name of binary */
178 int (*core_dump)(int, const CPUArchState *); /* coredump routine */
181 void do_init_thread(struct target_pt_regs *regs, struct image_info *infop);
182 abi_ulong loader_build_argptr(int envc, int argc, abi_ulong sp,
183 abi_ulong stringp, int push_ptr);
184 int loader_exec(int fdexec, const char *filename, char **argv, char **envp,
185 struct target_pt_regs * regs, struct image_info *infop,
186 struct linux_binprm *);
188 int load_elf_binary(struct linux_binprm *bprm, struct image_info *info);
189 int load_flt_binary(struct linux_binprm *bprm, struct image_info *info);
191 abi_long memcpy_to_target(abi_ulong dest, const void *src,
192 unsigned long len);
193 void target_set_brk(abi_ulong new_brk);
194 abi_long do_brk(abi_ulong new_brk);
195 void syscall_init(void);
196 abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
197 abi_long arg2, abi_long arg3, abi_long arg4,
198 abi_long arg5, abi_long arg6, abi_long arg7,
199 abi_long arg8);
200 void gemu_log(const char *fmt, ...) GCC_FMT_ATTR(1, 2);
201 extern THREAD CPUState *thread_cpu;
202 void cpu_loop(CPUArchState *env);
203 const char *target_strerror(int err);
204 int get_osversion(void);
205 void init_qemu_uname_release(void);
206 void fork_start(void);
207 void fork_end(int child);
209 /* Creates the initial guest address space in the host memory space using
210 * the given host start address hint and size. The guest_start parameter
211 * specifies the start address of the guest space. guest_base will be the
212 * difference between the host start address computed by this function and
213 * guest_start. If fixed is specified, then the mapped address space must
214 * start at host_start. The real start address of the mapped memory space is
215 * returned or -1 if there was an error.
217 unsigned long init_guest_space(unsigned long host_start,
218 unsigned long host_size,
219 unsigned long guest_start,
220 bool fixed);
222 #include "qemu/log.h"
224 /* safe_syscall.S */
227 * safe_syscall:
228 * @int number: number of system call to make
229 * ...: arguments to the system call
231 * Call a system call if guest signal not pending.
232 * This has the same API as the libc syscall() function, except that it
233 * may return -1 with errno == TARGET_ERESTARTSYS if a signal was pending.
235 * Returns: the system call result, or -1 with an error code in errno
236 * (Errnos are host errnos; we rely on TARGET_ERESTARTSYS not clashing
237 * with any of the host errno values.)
240 /* A guide to using safe_syscall() to handle interactions between guest
241 * syscalls and guest signals:
243 * Guest syscalls come in two flavours:
245 * (1) Non-interruptible syscalls
247 * These are guest syscalls that never get interrupted by signals and
248 * so never return EINTR. They can be implemented straightforwardly in
249 * QEMU: just make sure that if the implementation code has to make any
250 * blocking calls that those calls are retried if they return EINTR.
251 * It's also OK to implement these with safe_syscall, though it will be
252 * a little less efficient if a signal is delivered at the 'wrong' moment.
254 * Some non-interruptible syscalls need to be handled using block_signals()
255 * to block signals for the duration of the syscall. This mainly applies
256 * to code which needs to modify the data structures used by the
257 * host_signal_handler() function and the functions it calls, including
258 * all syscalls which change the thread's signal mask.
260 * (2) Interruptible syscalls
262 * These are guest syscalls that can be interrupted by signals and
263 * for which we need to either return EINTR or arrange for the guest
264 * syscall to be restarted. This category includes both syscalls which
265 * always restart (and in the kernel return -ERESTARTNOINTR), ones
266 * which only restart if there is no handler (kernel returns -ERESTARTNOHAND
267 * or -ERESTART_RESTARTBLOCK), and the most common kind which restart
268 * if the handler was registered with SA_RESTART (kernel returns
269 * -ERESTARTSYS). System calls which are only interruptible in some
270 * situations (like 'open') also need to be handled this way.
272 * Here it is important that the host syscall is made
273 * via this safe_syscall() function, and *not* via the host libc.
274 * If the host libc is used then the implementation will appear to work
275 * most of the time, but there will be a race condition where a
276 * signal could arrive just before we make the host syscall inside libc,
277 * and then then guest syscall will not correctly be interrupted.
278 * Instead the implementation of the guest syscall can use the safe_syscall
279 * function but otherwise just return the result or errno in the usual
280 * way; the main loop code will take care of restarting the syscall
281 * if appropriate.
283 * (If the implementation needs to make multiple host syscalls this is
284 * OK; any which might really block must be via safe_syscall(); for those
285 * which are only technically blocking (ie which we know in practice won't
286 * stay in the host kernel indefinitely) it's OK to use libc if necessary.
287 * You must be able to cope with backing out correctly if some safe_syscall
288 * you make in the implementation returns either -TARGET_ERESTARTSYS or
289 * EINTR though.)
291 * block_signals() cannot be used for interruptible syscalls.
294 * How and why the safe_syscall implementation works:
296 * The basic setup is that we make the host syscall via a known
297 * section of host native assembly. If a signal occurs, our signal
298 * handler checks the interrupted host PC against the addresse of that
299 * known section. If the PC is before or at the address of the syscall
300 * instruction then we change the PC to point at a "return
301 * -TARGET_ERESTARTSYS" code path instead, and then exit the signal handler
302 * (causing the safe_syscall() call to immediately return that value).
303 * Then in the main.c loop if we see this magic return value we adjust
304 * the guest PC to wind it back to before the system call, and invoke
305 * the guest signal handler as usual.
307 * This winding-back will happen in two cases:
308 * (1) signal came in just before we took the host syscall (a race);
309 * in this case we'll take the guest signal and have another go
310 * at the syscall afterwards, and this is indistinguishable for the
311 * guest from the timing having been different such that the guest
312 * signal really did win the race
313 * (2) signal came in while the host syscall was blocking, and the
314 * host kernel decided the syscall should be restarted;
315 * in this case we want to restart the guest syscall also, and so
316 * rewinding is the right thing. (Note that "restart" semantics mean
317 * "first call the signal handler, then reattempt the syscall".)
318 * The other situation to consider is when a signal came in while the
319 * host syscall was blocking, and the host kernel decided that the syscall
320 * should not be restarted; in this case QEMU's host signal handler will
321 * be invoked with the PC pointing just after the syscall instruction,
322 * with registers indicating an EINTR return; the special code in the
323 * handler will not kick in, and we will return EINTR to the guest as
324 * we should.
326 * Notice that we can leave the host kernel to make the decision for
327 * us about whether to do a restart of the syscall or not; we do not
328 * need to check SA_RESTART flags in QEMU or distinguish the various
329 * kinds of restartability.
331 #ifdef HAVE_SAFE_SYSCALL
332 /* The core part of this function is implemented in assembly */
333 extern long safe_syscall_base(int *pending, long number, ...);
335 #define safe_syscall(...) \
336 ({ \
337 long ret_; \
338 int *psp_ = &((TaskState *)thread_cpu->opaque)->signal_pending; \
339 ret_ = safe_syscall_base(psp_, __VA_ARGS__); \
340 if (is_error(ret_)) { \
341 errno = -ret_; \
342 ret_ = -1; \
344 ret_; \
347 #else
349 /* Fallback for architectures which don't yet provide a safe-syscall assembly
350 * fragment; note that this is racy!
351 * This should go away when all host architectures have been updated.
353 #define safe_syscall syscall
355 #endif
357 /* syscall.c */
358 int host_to_target_waitstatus(int status);
360 /* strace.c */
361 void print_syscall(int num,
362 abi_long arg1, abi_long arg2, abi_long arg3,
363 abi_long arg4, abi_long arg5, abi_long arg6);
364 void print_syscall_ret(int num, abi_long arg1);
365 extern int do_strace;
367 /* signal.c */
368 void process_pending_signals(CPUArchState *cpu_env);
369 void signal_init(void);
370 int queue_signal(CPUArchState *env, int sig, target_siginfo_t *info);
371 void host_to_target_siginfo(target_siginfo_t *tinfo, const siginfo_t *info);
372 void target_to_host_siginfo(siginfo_t *info, const target_siginfo_t *tinfo);
373 int target_to_host_signal(int sig);
374 int host_to_target_signal(int sig);
375 long do_sigreturn(CPUArchState *env);
376 long do_rt_sigreturn(CPUArchState *env);
377 abi_long do_sigaltstack(abi_ulong uss_addr, abi_ulong uoss_addr, abi_ulong sp);
378 int do_sigprocmask(int how, const sigset_t *set, sigset_t *oldset);
380 * block_signals: block all signals while handling this guest syscall
382 * Block all signals, and arrange that the signal mask is returned to
383 * its correct value for the guest before we resume execution of guest code.
384 * If this function returns non-zero, then the caller should immediately
385 * return -TARGET_ERESTARTSYS to the main loop, which will take the pending
386 * signal and restart execution of the syscall.
387 * If block_signals() returns zero, then the caller can continue with
388 * emulation of the system call knowing that no signals can be taken
389 * (and therefore that no race conditions will result).
390 * This should only be called once, because if it is called a second time
391 * it will always return non-zero. (Think of it like a mutex that can't
392 * be recursively locked.)
393 * Signals will be unblocked again by process_pending_signals().
395 * Return value: non-zero if there was a pending signal, zero if not.
397 int block_signals(void); /* Returns non zero if signal pending */
399 #ifdef TARGET_I386
400 /* vm86.c */
401 void save_v86_state(CPUX86State *env);
402 void handle_vm86_trap(CPUX86State *env, int trapno);
403 void handle_vm86_fault(CPUX86State *env);
404 int do_vm86(CPUX86State *env, long subfunction, abi_ulong v86_addr);
405 #elif defined(TARGET_SPARC64)
406 void sparc64_set_context(CPUSPARCState *env);
407 void sparc64_get_context(CPUSPARCState *env);
408 #endif
410 /* mmap.c */
411 int target_mprotect(abi_ulong start, abi_ulong len, int prot);
412 abi_long target_mmap(abi_ulong start, abi_ulong len, int prot,
413 int flags, int fd, abi_ulong offset);
414 int target_munmap(abi_ulong start, abi_ulong len);
415 abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
416 abi_ulong new_size, unsigned long flags,
417 abi_ulong new_addr);
418 int target_msync(abi_ulong start, abi_ulong len, int flags);
419 extern unsigned long last_brk;
420 extern abi_ulong mmap_next_start;
421 abi_ulong mmap_find_vma(abi_ulong, abi_ulong);
422 void mmap_fork_start(void);
423 void mmap_fork_end(int child);
425 /* main.c */
426 extern unsigned long guest_stack_size;
428 /* user access */
430 #define VERIFY_READ 0
431 #define VERIFY_WRITE 1 /* implies read access */
433 static inline int access_ok(int type, abi_ulong addr, abi_ulong size)
435 return page_check_range((target_ulong)addr, size,
436 (type == VERIFY_READ) ? PAGE_READ : (PAGE_READ | PAGE_WRITE)) == 0;
439 /* NOTE __get_user and __put_user use host pointers and don't check access.
440 These are usually used to access struct data members once the struct has
441 been locked - usually with lock_user_struct. */
443 /* Tricky points:
444 - Use __builtin_choose_expr to avoid type promotion from ?:,
445 - Invalid sizes result in a compile time error stemming from
446 the fact that abort has no parameters.
447 - It's easier to use the endian-specific unaligned load/store
448 functions than host-endian unaligned load/store plus tswapN. */
450 #define __put_user_e(x, hptr, e) \
451 (__builtin_choose_expr(sizeof(*(hptr)) == 1, stb_p, \
452 __builtin_choose_expr(sizeof(*(hptr)) == 2, stw_##e##_p, \
453 __builtin_choose_expr(sizeof(*(hptr)) == 4, stl_##e##_p, \
454 __builtin_choose_expr(sizeof(*(hptr)) == 8, stq_##e##_p, abort)))) \
455 ((hptr), (x)), (void)0)
457 #define __get_user_e(x, hptr, e) \
458 ((x) = (typeof(*hptr))( \
459 __builtin_choose_expr(sizeof(*(hptr)) == 1, ldub_p, \
460 __builtin_choose_expr(sizeof(*(hptr)) == 2, lduw_##e##_p, \
461 __builtin_choose_expr(sizeof(*(hptr)) == 4, ldl_##e##_p, \
462 __builtin_choose_expr(sizeof(*(hptr)) == 8, ldq_##e##_p, abort)))) \
463 (hptr)), (void)0)
465 #ifdef TARGET_WORDS_BIGENDIAN
466 # define __put_user(x, hptr) __put_user_e(x, hptr, be)
467 # define __get_user(x, hptr) __get_user_e(x, hptr, be)
468 #else
469 # define __put_user(x, hptr) __put_user_e(x, hptr, le)
470 # define __get_user(x, hptr) __get_user_e(x, hptr, le)
471 #endif
473 /* put_user()/get_user() take a guest address and check access */
474 /* These are usually used to access an atomic data type, such as an int,
475 * that has been passed by address. These internally perform locking
476 * and unlocking on the data type.
478 #define put_user(x, gaddr, target_type) \
479 ({ \
480 abi_ulong __gaddr = (gaddr); \
481 target_type *__hptr; \
482 abi_long __ret = 0; \
483 if ((__hptr = lock_user(VERIFY_WRITE, __gaddr, sizeof(target_type), 0))) { \
484 __put_user((x), __hptr); \
485 unlock_user(__hptr, __gaddr, sizeof(target_type)); \
486 } else \
487 __ret = -TARGET_EFAULT; \
488 __ret; \
491 #define get_user(x, gaddr, target_type) \
492 ({ \
493 abi_ulong __gaddr = (gaddr); \
494 target_type *__hptr; \
495 abi_long __ret = 0; \
496 if ((__hptr = lock_user(VERIFY_READ, __gaddr, sizeof(target_type), 1))) { \
497 __get_user((x), __hptr); \
498 unlock_user(__hptr, __gaddr, 0); \
499 } else { \
500 /* avoid warning */ \
501 (x) = 0; \
502 __ret = -TARGET_EFAULT; \
504 __ret; \
507 #define put_user_ual(x, gaddr) put_user((x), (gaddr), abi_ulong)
508 #define put_user_sal(x, gaddr) put_user((x), (gaddr), abi_long)
509 #define put_user_u64(x, gaddr) put_user((x), (gaddr), uint64_t)
510 #define put_user_s64(x, gaddr) put_user((x), (gaddr), int64_t)
511 #define put_user_u32(x, gaddr) put_user((x), (gaddr), uint32_t)
512 #define put_user_s32(x, gaddr) put_user((x), (gaddr), int32_t)
513 #define put_user_u16(x, gaddr) put_user((x), (gaddr), uint16_t)
514 #define put_user_s16(x, gaddr) put_user((x), (gaddr), int16_t)
515 #define put_user_u8(x, gaddr) put_user((x), (gaddr), uint8_t)
516 #define put_user_s8(x, gaddr) put_user((x), (gaddr), int8_t)
518 #define get_user_ual(x, gaddr) get_user((x), (gaddr), abi_ulong)
519 #define get_user_sal(x, gaddr) get_user((x), (gaddr), abi_long)
520 #define get_user_u64(x, gaddr) get_user((x), (gaddr), uint64_t)
521 #define get_user_s64(x, gaddr) get_user((x), (gaddr), int64_t)
522 #define get_user_u32(x, gaddr) get_user((x), (gaddr), uint32_t)
523 #define get_user_s32(x, gaddr) get_user((x), (gaddr), int32_t)
524 #define get_user_u16(x, gaddr) get_user((x), (gaddr), uint16_t)
525 #define get_user_s16(x, gaddr) get_user((x), (gaddr), int16_t)
526 #define get_user_u8(x, gaddr) get_user((x), (gaddr), uint8_t)
527 #define get_user_s8(x, gaddr) get_user((x), (gaddr), int8_t)
529 /* copy_from_user() and copy_to_user() are usually used to copy data
530 * buffers between the target and host. These internally perform
531 * locking/unlocking of the memory.
533 abi_long copy_from_user(void *hptr, abi_ulong gaddr, size_t len);
534 abi_long copy_to_user(abi_ulong gaddr, void *hptr, size_t len);
536 /* Functions for accessing guest memory. The tget and tput functions
537 read/write single values, byteswapping as necessary. The lock_user function
538 gets a pointer to a contiguous area of guest memory, but does not perform
539 any byteswapping. lock_user may return either a pointer to the guest
540 memory, or a temporary buffer. */
542 /* Lock an area of guest memory into the host. If copy is true then the
543 host area will have the same contents as the guest. */
544 static inline void *lock_user(int type, abi_ulong guest_addr, long len, int copy)
546 if (!access_ok(type, guest_addr, len))
547 return NULL;
548 #ifdef DEBUG_REMAP
550 void *addr;
551 addr = malloc(len);
552 if (copy)
553 memcpy(addr, g2h(guest_addr), len);
554 else
555 memset(addr, 0, len);
556 return addr;
558 #else
559 return g2h(guest_addr);
560 #endif
563 /* Unlock an area of guest memory. The first LEN bytes must be
564 flushed back to guest memory. host_ptr = NULL is explicitly
565 allowed and does nothing. */
566 static inline void unlock_user(void *host_ptr, abi_ulong guest_addr,
567 long len)
570 #ifdef DEBUG_REMAP
571 if (!host_ptr)
572 return;
573 if (host_ptr == g2h(guest_addr))
574 return;
575 if (len > 0)
576 memcpy(g2h(guest_addr), host_ptr, len);
577 free(host_ptr);
578 #endif
581 /* Return the length of a string in target memory or -TARGET_EFAULT if
582 access error. */
583 abi_long target_strlen(abi_ulong gaddr);
585 /* Like lock_user but for null terminated strings. */
586 static inline void *lock_user_string(abi_ulong guest_addr)
588 abi_long len;
589 len = target_strlen(guest_addr);
590 if (len < 0)
591 return NULL;
592 return lock_user(VERIFY_READ, guest_addr, (long)(len + 1), 1);
595 /* Helper macros for locking/unlocking a target struct. */
596 #define lock_user_struct(type, host_ptr, guest_addr, copy) \
597 (host_ptr = lock_user(type, guest_addr, sizeof(*host_ptr), copy))
598 #define unlock_user_struct(host_ptr, guest_addr, copy) \
599 unlock_user(host_ptr, guest_addr, (copy) ? sizeof(*host_ptr) : 0)
601 #include <pthread.h>
603 /* Include target-specific struct and function definitions;
604 * they may need access to the target-independent structures
605 * above, so include them last.
607 #include "target_cpu.h"
608 #include "target_signal.h"
609 #include "target_structs.h"
611 #endif /* QEMU_H */