winebuild: Don't include any Windows header to avoid data types dependencies.
[wine/multimedia.git] / tools / winebuild / relay.c
blob50b47d977f71979db8829a04111e03f495c5a316
1 /*
2 * Relay calls helper routines
4 * Copyright 1993 Robert J. Amstadt
5 * Copyright 1995 Martin von Loewis
6 * Copyright 1995, 1996, 1997 Alexandre Julliard
7 * Copyright 1997 Eric Youngdale
8 * Copyright 1999 Ulrich Weigand
10 * This library is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with this library; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
25 #include "config.h"
26 #include "wine/port.h"
28 #include <ctype.h>
29 #include <stdarg.h>
31 #include "build.h"
33 /* offset of the stack pointer relative to %fs:(0) */
34 #define STACKOFFSET 0xc0 /* FIELD_OFFSET(TEB,WOW32Reserved) */
36 /* fix this if the ntdll_thread_regs structure is changed */
37 #define GS_OFFSET 0x1d8 /* FIELD_OFFSET(TEB,SystemReserved2) + FIELD_OFFSET(ntdll_thread_data,gs) */
39 #define DPMI_VIF_OFFSET (0x1fc + 0) /* FIELD_OFFSET(TEB,GdiTebBatch) + FIELD_OFFSET(WINE_VM86_TEB_INFO,dpmi_vif) */
40 #define VM86_PENDING_OFFSET (0x1fc + 4) /* FIELD_OFFSET(TEB,GdiTebBatch) + FIELD_OFFSET(WINE_VM86_TEB_INFO,vm86_pending) */
42 static void function_header( const char *name )
44 output( "\n\t.align %d\n", get_alignment(4) );
45 output( "\t%s\n", func_declaration(name) );
46 output( "%s\n", asm_globl(name) );
50 /*******************************************************************
51 * BuildCallFrom16Core
53 * This routine builds the core routines used in 16->32 thunks:
54 * CallFrom16Word, CallFrom16Long, CallFrom16Register, and CallFrom16Thunk.
56 * These routines are intended to be called via a far call (with 32-bit
57 * operand size) from 16-bit code. The 16-bit code stub must push %bp,
58 * the 32-bit entry point to be called, and the argument conversion
59 * routine to be used (see stack layout below).
61 * The core routine completes the STACK16FRAME on the 16-bit stack and
62 * switches to the 32-bit stack. Then, the argument conversion routine
63 * is called; it gets passed the 32-bit entry point and a pointer to the
64 * 16-bit arguments (on the 16-bit stack) as parameters. (You can either
65 * use conversion routines automatically generated by BuildCallFrom16,
66 * or write your own for special purposes.)
68 * The conversion routine must call the 32-bit entry point, passing it
69 * the converted arguments, and return its return value to the core.
70 * After the conversion routine has returned, the core switches back
71 * to the 16-bit stack, converts the return value to the DX:AX format
72 * (CallFrom16Long), and returns to the 16-bit call stub. All parameters,
73 * including %bp, are popped off the stack.
75 * The 16-bit call stub now returns to the caller, popping the 16-bit
76 * arguments if necessary (pascal calling convention).
78 * In the case of a 'register' function, CallFrom16Register fills a
79 * CONTEXT86 structure with the values all registers had at the point
80 * the first instruction of the 16-bit call stub was about to be
81 * executed. A pointer to this CONTEXT86 is passed as third parameter
82 * to the argument conversion routine, which typically passes it on
83 * to the called 32-bit entry point.
85 * CallFrom16Thunk is a special variant used by the implementation of
86 * the Win95 16->32 thunk functions C16ThkSL and C16ThkSL01 and is
87 * implemented as follows:
88 * On entry, the EBX register is set up to contain a flat pointer to the
89 * 16-bit stack such that EBX+22 points to the first argument.
90 * Then, the entry point is called, while EBP is set up to point
91 * to the return address (on the 32-bit stack).
92 * The called function returns with CX set to the number of bytes
93 * to be popped of the caller's stack.
95 * Stack layout upon entry to the core routine (STACK16FRAME):
96 * ... ...
97 * (sp+24) word first 16-bit arg
98 * (sp+22) word cs
99 * (sp+20) word ip
100 * (sp+18) word bp
101 * (sp+14) long 32-bit entry point (reused for Win16 mutex recursion count)
102 * (sp+12) word ip of actual entry point (necessary for relay debugging)
103 * (sp+8) long relay (argument conversion) function entry point
104 * (sp+4) long cs of 16-bit entry point
105 * (sp) long ip of 16-bit entry point
107 * Added on the stack:
108 * (sp-2) word saved gs
109 * (sp-4) word saved fs
110 * (sp-6) word saved es
111 * (sp-8) word saved ds
112 * (sp-12) long saved ebp
113 * (sp-16) long saved ecx
114 * (sp-20) long saved edx
115 * (sp-24) long saved previous stack
117 static void BuildCallFrom16Core( int reg_func, int thunk )
119 /* Function header */
120 if (thunk) function_header( "__wine_call_from_16_thunk" );
121 else if (reg_func) function_header( "__wine_call_from_16_regs" );
122 else function_header( "__wine_call_from_16" );
124 /* Create STACK16FRAME (except STACK32FRAME link) */
125 output( "\tpushw %%gs\n" );
126 output( "\tpushw %%fs\n" );
127 output( "\tpushw %%es\n" );
128 output( "\tpushw %%ds\n" );
129 output( "\tpushl %%ebp\n" );
130 output( "\tpushl %%ecx\n" );
131 output( "\tpushl %%edx\n" );
133 /* Save original EFlags register */
134 if (reg_func) output( "\tpushfl\n" );
136 if ( UsePIC )
138 output( "\tcall 1f\n" );
139 output( "1:\tpopl %%ecx\n" );
140 output( "\t.byte 0x2e\n\tmovl %s-1b(%%ecx),%%edx\n", asm_name("CallTo16_DataSelector") );
142 else
143 output( "\t.byte 0x2e\n\tmovl %s,%%edx\n", asm_name("CallTo16_DataSelector") );
145 /* Load 32-bit segment registers */
146 output( "\tmovw %%dx, %%ds\n" );
147 output( "\tmovw %%dx, %%es\n" );
149 if ( UsePIC )
150 output( "\tmovw %s-1b(%%ecx), %%fs\n", asm_name("CallTo16_TebSelector") );
151 else
152 output( "\tmovw %s, %%fs\n", asm_name("CallTo16_TebSelector") );
154 output( "\t.byte 0x64\n\tmov (%d),%%gs\n", GS_OFFSET );
156 /* Translate STACK16FRAME base to flat offset in %edx */
157 output( "\tmovw %%ss, %%dx\n" );
158 output( "\tandl $0xfff8, %%edx\n" );
159 output( "\tshrl $1, %%edx\n" );
160 if (UsePIC)
162 output( "\taddl wine_ldt_copy_ptr-1b(%%ecx),%%edx\n" );
163 output( "\tmovl (%%edx), %%edx\n" );
165 else
166 output( "\tmovl %s(%%edx), %%edx\n", asm_name("wine_ldt_copy") );
167 output( "\tmovzwl %%sp, %%ebp\n" );
168 output( "\tleal %d(%%ebp,%%edx), %%edx\n", reg_func ? 0 : -4 );
170 /* Get saved flags into %ecx */
171 if (reg_func) output( "\tpopl %%ecx\n" );
173 /* Get the 32-bit stack pointer from the TEB and complete STACK16FRAME */
174 output( "\t.byte 0x64\n\tmovl (%d), %%ebp\n", STACKOFFSET );
175 output( "\tpushl %%ebp\n" );
177 /* Switch stacks */
178 output( "\t.byte 0x64\n\tmovw %%ss, (%d)\n", STACKOFFSET + 2 );
179 output( "\t.byte 0x64\n\tmovw %%sp, (%d)\n", STACKOFFSET );
180 output( "\tpushl %%ds\n" );
181 output( "\tpopl %%ss\n" );
182 output( "\tmovl %%ebp, %%esp\n" );
183 output( "\taddl $0x20,%%ebp\n"); /* FIELD_OFFSET(STACK32FRAME,ebp) */
186 /* At this point:
187 STACK16FRAME is completely set up
188 DS, ES, SS: flat data segment
189 FS: current TEB
190 ESP: points to last STACK32FRAME
191 EBP: points to ebp member of last STACK32FRAME
192 EDX: points to current STACK16FRAME
193 ECX: contains saved flags
194 all other registers: unchanged */
196 /* Special case: C16ThkSL stub */
197 if ( thunk )
199 /* Set up registers as expected and call thunk */
200 output( "\tleal 0x1a(%%edx),%%ebx\n" ); /* sizeof(STACK16FRAME)-22 */
201 output( "\tleal -4(%%esp), %%ebp\n" );
203 output( "\tcall *0x26(%%edx)\n"); /* FIELD_OFFSET(STACK16FRAME,entry_point) */
205 /* Switch stack back */
206 output( "\t.byte 0x64\n\tmovw (%d), %%ss\n", STACKOFFSET+2 );
207 output( "\t.byte 0x64\n\tmovzwl (%d), %%esp\n", STACKOFFSET );
208 output( "\t.byte 0x64\n\tpopl (%d)\n", STACKOFFSET );
210 /* Restore registers and return directly to caller */
211 output( "\taddl $8, %%esp\n" );
212 output( "\tpopl %%ebp\n" );
213 output( "\tpopw %%ds\n" );
214 output( "\tpopw %%es\n" );
215 output( "\tpopw %%fs\n" );
216 output( "\tpopw %%gs\n" );
217 output( "\taddl $20, %%esp\n" );
219 output( "\txorb %%ch, %%ch\n" );
220 output( "\tpopl %%ebx\n" );
221 output( "\taddw %%cx, %%sp\n" );
222 output( "\tpush %%ebx\n" );
224 output( "\t.byte 0x66\n" );
225 output( "\tlret\n" );
227 return;
231 /* Build register CONTEXT */
232 if ( reg_func )
234 output( "\tsubl $0x2cc,%%esp\n" ); /* sizeof(CONTEXT86) */
236 output( "\tmovl %%ecx,0xc0(%%esp)\n" ); /* EFlags */
238 output( "\tmovl %%eax,0xb0(%%esp)\n" ); /* Eax */
239 output( "\tmovl %%ebx,0xa4(%%esp)\n" ); /* Ebx */
240 output( "\tmovl %%esi,0xa0(%%esp)\n" ); /* Esi */
241 output( "\tmovl %%edi,0x9c(%%esp)\n" ); /* Edi */
243 output( "\tmovl 0x0c(%%edx),%%eax\n"); /* FIELD_OFFSET(STACK16FRAME,ebp) */
244 output( "\tmovl %%eax,0xb4(%%esp)\n" ); /* Ebp */
245 output( "\tmovl 0x08(%%edx),%%eax\n"); /* FIELD_OFFSET(STACK16FRAME,ecx) */
246 output( "\tmovl %%eax,0xac(%%esp)\n" ); /* Ecx */
247 output( "\tmovl 0x04(%%edx),%%eax\n"); /* FIELD_OFFSET(STACK16FRAME,edx) */
248 output( "\tmovl %%eax,0xa8(%%esp)\n" ); /* Edx */
250 output( "\tmovzwl 0x10(%%edx),%%eax\n"); /* FIELD_OFFSET(STACK16FRAME,ds) */
251 output( "\tmovl %%eax,0x98(%%esp)\n" ); /* SegDs */
252 output( "\tmovzwl 0x12(%%edx),%%eax\n"); /* FIELD_OFFSET(STACK16FRAME,es) */
253 output( "\tmovl %%eax,0x94(%%esp)\n" ); /* SegEs */
254 output( "\tmovzwl 0x14(%%edx),%%eax\n"); /* FIELD_OFFSET(STACK16FRAME,fs) */
255 output( "\tmovl %%eax,0x90(%%esp)\n" ); /* SegFs */
256 output( "\tmovzwl 0x16(%%edx),%%eax\n"); /* FIELD_OFFSET(STACK16FRAME,gs) */
257 output( "\tmovl %%eax,0x8c(%%esp)\n" ); /* SegGs */
259 output( "\tmovzwl 0x2e(%%edx),%%eax\n"); /* FIELD_OFFSET(STACK16FRAME,cs) */
260 output( "\tmovl %%eax,0xbc(%%esp)\n" ); /* SegCs */
261 output( "\tmovzwl 0x2c(%%edx),%%eax\n"); /* FIELD_OFFSET(STACK16FRAME,ip) */
262 output( "\tmovl %%eax,0xb8(%%esp)\n" ); /* Eip */
264 output( "\t.byte 0x64\n\tmovzwl (%d), %%eax\n", STACKOFFSET+2 );
265 output( "\tmovl %%eax,0xc8(%%esp)\n" ); /* SegSs */
266 output( "\t.byte 0x64\n\tmovzwl (%d), %%eax\n", STACKOFFSET );
267 output( "\taddl $0x2c,%%eax\n"); /* FIELD_OFFSET(STACK16FRAME,ip) */
268 output( "\tmovl %%eax,0xc4(%%esp)\n" ); /* Esp */
269 #if 0
270 output( "\tfsave 0x1c(%%esp)\n" ); /* FloatSave */
271 #endif
273 /* Push address of CONTEXT86 structure -- popped by the relay routine */
274 output( "\tmovl %%esp,%%eax\n" );
275 output( "\tandl $~15,%%esp\n" );
276 output( "\tsubl $4,%%esp\n" );
277 output( "\tpushl %%eax\n" );
279 else
281 output( "\tsubl $8,%%esp\n" );
282 output( "\tandl $~15,%%esp\n" );
283 output( "\taddl $8,%%esp\n" );
286 /* Call relay routine (which will call the API entry point) */
287 output( "\tleal 0x30(%%edx),%%eax\n" ); /* sizeof(STACK16FRAME) */
288 output( "\tpushl %%eax\n" );
289 output( "\tpushl 0x26(%%edx)\n"); /* FIELD_OFFSET(STACK16FRAME,entry_point) */
290 output( "\tcall *0x20(%%edx)\n"); /* FIELD_OFFSET(STACK16FRAME,relay) */
292 if ( reg_func )
294 output( "\tleal -748(%%ebp),%%ebx\n" ); /* sizeof(CONTEXT) + FIELD_OFFSET(STACK32FRAME,ebp) */
296 /* Switch stack back */
297 output( "\t.byte 0x64\n\tmovw (%d), %%ss\n", STACKOFFSET+2 );
298 output( "\t.byte 0x64\n\tmovzwl (%d), %%esp\n", STACKOFFSET );
299 output( "\t.byte 0x64\n\tpopl (%d)\n", STACKOFFSET );
301 /* Get return address to CallFrom16 stub */
302 output( "\taddw $0x14,%%sp\n" ); /* FIELD_OFFSET(STACK16FRAME,callfrom_ip)-4 */
303 output( "\tpopl %%eax\n" );
304 output( "\tpopl %%edx\n" );
306 /* Restore all registers from CONTEXT */
307 output( "\tmovw 0xc8(%%ebx),%%ss\n"); /* SegSs */
308 output( "\tmovl 0xc4(%%ebx),%%esp\n"); /* Esp */
309 output( "\taddl $4, %%esp\n" ); /* room for final return address */
311 output( "\tpushw 0xbc(%%ebx)\n"); /* SegCs */
312 output( "\tpushw 0xb8(%%ebx)\n"); /* Eip */
313 output( "\tpushl %%edx\n" );
314 output( "\tpushl %%eax\n" );
315 output( "\tpushl 0xc0(%%ebx)\n"); /* EFlags */
316 output( "\tpushl 0x98(%%ebx)\n"); /* SegDs */
318 output( "\tpushl 0x94(%%ebx)\n"); /* SegEs */
319 output( "\tpopl %%es\n" );
320 output( "\tpushl 0x90(%%ebx)\n"); /* SegFs */
321 output( "\tpopl %%fs\n" );
322 output( "\tpushl 0x8c(%%ebx)\n"); /* SegGs */
323 output( "\tpopl %%gs\n" );
325 output( "\tmovl 0xb4(%%ebx),%%ebp\n"); /* Ebp */
326 output( "\tmovl 0xa0(%%ebx),%%esi\n"); /* Esi */
327 output( "\tmovl 0x9c(%%ebx),%%edi\n"); /* Edi */
328 output( "\tmovl 0xb0(%%ebx),%%eax\n"); /* Eax */
329 output( "\tmovl 0xa8(%%ebx),%%edx\n"); /* Edx */
330 output( "\tmovl 0xac(%%ebx),%%ecx\n"); /* Ecx */
331 output( "\tmovl 0xa4(%%ebx),%%ebx\n"); /* Ebx */
333 output( "\tpopl %%ds\n" );
334 output( "\tpopfl\n" );
335 output( "\tlret\n" );
337 else
339 /* Switch stack back */
340 output( "\t.byte 0x64\n\tmovw (%d), %%ss\n", STACKOFFSET+2 );
341 output( "\t.byte 0x64\n\tmovzwl (%d), %%esp\n", STACKOFFSET );
342 output( "\t.byte 0x64\n\tpopl (%d)\n", STACKOFFSET );
344 /* Restore registers */
345 output( "\tpopl %%edx\n" );
346 output( "\tpopl %%ecx\n" );
347 output( "\tpopl %%ebp\n" );
348 output( "\tpopw %%ds\n" );
349 output( "\tpopw %%es\n" );
350 output( "\tpopw %%fs\n" );
351 output( "\tpopw %%gs\n" );
353 /* Return to return stub which will return to caller */
354 output( "\tlret $12\n" );
356 if (thunk) output_function_size( "__wine_call_from_16_thunk" );
357 else if (reg_func) output_function_size( "__wine_call_from_16_regs" );
358 else output_function_size( "__wine_call_from_16" );
362 /*******************************************************************
363 * BuildCallTo16Core
365 * This routine builds the core routines used in 32->16 thunks:
367 * extern DWORD WINAPI wine_call_to_16( FARPROC16 target, DWORD cbArgs, PEXCEPTION_HANDLER handler );
368 * extern void WINAPI wine_call_to_16_regs( CONTEXT86 *context, DWORD cbArgs, PEXCEPTION_HANDLER handler );
370 * These routines can be called directly from 32-bit code.
372 * All routines expect that the 16-bit stack contents (arguments) and the
373 * return address (segptr to CallTo16_Ret) were already set up by the
374 * caller; nb_args must contain the number of bytes to be conserved. The
375 * 16-bit SS:SP will be set accordingly.
377 * All other registers are either taken from the CONTEXT86 structure
378 * or else set to default values. The target routine address is either
379 * given directly or taken from the CONTEXT86.
381 static void BuildCallTo16Core( int reg_func )
383 const char *name = reg_func ? "wine_call_to_16_regs" : "wine_call_to_16";
385 /* Function header */
386 function_header( name );
388 /* Function entry sequence */
389 output( "\tpushl %%ebp\n" );
390 output( "\tmovl %%esp, %%ebp\n" );
392 /* Save the 32-bit registers */
393 output( "\tpushl %%ebx\n" );
394 output( "\tpushl %%esi\n" );
395 output( "\tpushl %%edi\n" );
396 output( "\t.byte 0x64\n\tmov %%gs,(%d)\n", GS_OFFSET );
398 /* Setup exception frame */
399 output( "\t.byte 0x64\n\tpushl (%d)\n", STACKOFFSET );
400 output( "\tpushl 16(%%ebp)\n" ); /* handler */
401 output( "\t.byte 0x64\n\tpushl (0)\n" );
402 output( "\t.byte 0x64\n\tmovl %%esp,(0)\n" );
404 /* Call the actual CallTo16 routine (simulate a lcall) */
405 output( "\tpushl %%cs\n" );
406 output( "\tcall .L%s\n", name );
408 /* Remove exception frame */
409 output( "\t.byte 0x64\n\tpopl (0)\n" );
410 output( "\taddl $4, %%esp\n" );
411 output( "\t.byte 0x64\n\tpopl (%d)\n", STACKOFFSET );
413 if ( !reg_func )
415 /* Convert return value */
416 output( "\tandl $0xffff,%%eax\n" );
417 output( "\tshll $16,%%edx\n" );
418 output( "\torl %%edx,%%eax\n" );
420 else
423 * Modify CONTEXT86 structure to contain new values
425 * NOTE: We restore only EAX, EBX, EDX, EDX, EBP, and ESP.
426 * The segment registers as well as ESI and EDI should
427 * not be modified by a well-behaved 16-bit routine in
428 * any case. [If necessary, we could restore them as well,
429 * at the cost of a somewhat less efficient return path.]
432 output( "\tmovl 0x14(%%esp),%%edi\n" ); /* FIELD_OFFSET(STACK32FRAME,target) - FIELD_OFFSET(STACK32FRAME,edi) */
433 /* everything above edi has been popped already */
435 output( "\tmovl %%eax,0xb0(%%edi)\n"); /* Eax */
436 output( "\tmovl %%ebx,0xa4(%%edi)\n"); /* Ebx */
437 output( "\tmovl %%ecx,0xac(%%edi)\n"); /* Ecx */
438 output( "\tmovl %%edx,0xa8(%%edi)\n"); /* Edx */
439 output( "\tmovl %%ebp,0xb4(%%edi)\n"); /* Ebp */
440 output( "\tmovl %%esi,0xc4(%%edi)\n"); /* Esp */
441 /* The return glue code saved %esp into %esi */
444 /* Restore the 32-bit registers */
445 output( "\tpopl %%edi\n" );
446 output( "\tpopl %%esi\n" );
447 output( "\tpopl %%ebx\n" );
449 /* Function exit sequence */
450 output( "\tpopl %%ebp\n" );
451 output( "\tret $12\n" );
454 /* Start of the actual CallTo16 routine */
456 output( ".L%s:\n", name );
458 /* Switch to the 16-bit stack */
459 output( "\tmovl %%esp,%%edx\n" );
460 output( "\t.byte 0x64\n\tmovw (%d),%%ss\n", STACKOFFSET + 2);
461 output( "\t.byte 0x64\n\tmovw (%d),%%sp\n", STACKOFFSET );
462 output( "\t.byte 0x64\n\tmovl %%edx,(%d)\n", STACKOFFSET );
464 /* Make %bp point to the previous stackframe (built by CallFrom16) */
465 output( "\tmovzwl %%sp,%%ebp\n" );
466 output( "\tleal 0x2a(%%ebp),%%ebp\n"); /* FIELD_OFFSET(STACK16FRAME,bp) */
468 /* Add the specified offset to the new sp */
469 output( "\tsubw 0x2c(%%edx), %%sp\n"); /* FIELD_OFFSET(STACK32FRAME,nb_args) */
471 if (reg_func)
473 /* Push the called routine address */
474 output( "\tmovl 0x28(%%edx),%%edx\n"); /* FIELD_OFFSET(STACK32FRAME,target) */
475 output( "\tpushw 0xbc(%%edx)\n"); /* SegCs */
476 output( "\tpushw 0xb8(%%edx)\n"); /* Eip */
478 /* Get the registers */
479 output( "\tpushw 0x98(%%edx)\n"); /* SegDs */
480 output( "\tpushl 0x94(%%edx)\n"); /* SegEs */
481 output( "\tpopl %%es\n" );
482 output( "\tpushl 0x90(%%edx)\n"); /* SegFs */
483 output( "\tpopl %%fs\n" );
484 output( "\tpushl 0x8c(%%edx)\n"); /* SegGs */
485 output( "\tpopl %%gs\n" );
486 output( "\tmovl 0xb4(%%edx),%%ebp\n"); /* Ebp */
487 output( "\tmovl 0xa0(%%edx),%%esi\n"); /* Esi */
488 output( "\tmovl 0x9c(%%edx),%%edi\n"); /* Edi */
489 output( "\tmovl 0xb0(%%edx),%%eax\n"); /* Eax */
490 output( "\tmovl 0xa4(%%edx),%%ebx\n"); /* Ebx */
491 output( "\tmovl 0xac(%%edx),%%ecx\n"); /* Ecx */
492 output( "\tmovl 0xa8(%%edx),%%edx\n"); /* Edx */
494 /* Get the 16-bit ds */
495 output( "\tpopw %%ds\n" );
497 else /* not a register function */
499 /* Push the called routine address */
500 output( "\tpushl 0x28(%%edx)\n"); /* FIELD_OFFSET(STACK32FRAME,target) */
502 /* Set %fs and %gs to the value saved by the last CallFrom16 */
503 output( "\tpushw -22(%%ebp)\n" ); /* FIELD_OFFSET(STACK16FRAME,fs)-FIELD_OFFSET(STACK16FRAME,bp) */
504 output( "\tpopw %%fs\n" );
505 output( "\tpushw -20(%%ebp)\n" ); /* FIELD_OFFSET(STACK16FRAME,gs)-FIELD_OFFSET(STACK16FRAME,bp) */
506 output( "\tpopw %%gs\n" );
508 /* Set %ds and %es (and %ax just in case) equal to %ss */
509 output( "\tmovw %%ss,%%ax\n" );
510 output( "\tmovw %%ax,%%ds\n" );
511 output( "\tmovw %%ax,%%es\n" );
514 /* Jump to the called routine */
515 output( "\t.byte 0x66\n" );
516 output( "\tlret\n" );
518 /* Function footer */
519 output_function_size( name );
523 /*******************************************************************
524 * BuildRet16Func
526 * Build the return code for 16-bit callbacks
528 static void BuildRet16Func(void)
530 function_header( "__wine_call_to_16_ret" );
532 /* Save %esp into %esi */
533 output( "\tmovl %%esp,%%esi\n" );
535 /* Restore 32-bit segment registers */
537 output( "\t.byte 0x2e\n\tmovl %s", asm_name("CallTo16_DataSelector") );
538 output( "-%s,%%edi\n", asm_name("__wine_call16_start") );
539 output( "\tmovw %%di,%%ds\n" );
540 output( "\tmovw %%di,%%es\n" );
542 output( "\t.byte 0x2e\n\tmov %s", asm_name("CallTo16_TebSelector") );
543 output( "-%s,%%fs\n", asm_name("__wine_call16_start") );
545 output( "\t.byte 0x64\n\tmov (%d),%%gs\n", GS_OFFSET );
547 /* Restore the 32-bit stack */
549 output( "\tmovw %%di,%%ss\n" );
550 output( "\t.byte 0x64\n\tmovl (%d),%%esp\n", STACKOFFSET );
552 /* Return to caller */
554 output( "\tlret\n" );
555 output_function_size( "__wine_call_to_16_ret" );
559 /*******************************************************************
560 * BuildCallTo32CBClient
562 * Call a CBClient relay stub from 32-bit code (KERNEL.620).
564 * Since the relay stub is itself 32-bit, this should not be a problem;
565 * unfortunately, the relay stubs are expected to switch back to a
566 * 16-bit stack (and 16-bit code) after completion :-(
568 * This would conflict with our 16- vs. 32-bit stack handling, so
569 * we simply switch *back* to our 32-bit stack before returning to
570 * the caller ...
572 * The CBClient relay stub expects to be called with the following
573 * 16-bit stack layout, and with ebp and ebx pointing into the 16-bit
574 * stack at the designated places:
576 * ...
577 * (ebp+14) original arguments to the callback routine
578 * (ebp+10) far return address to original caller
579 * (ebp+6) Thunklet target address
580 * (ebp+2) Thunklet relay ID code
581 * (ebp) BP (saved by CBClientGlueSL)
582 * (ebp-2) SI (saved by CBClientGlueSL)
583 * (ebp-4) DI (saved by CBClientGlueSL)
584 * (ebp-6) DS (saved by CBClientGlueSL)
586 * ... buffer space used by the 16-bit side glue for temp copies
588 * (ebx+4) far return address to 16-bit side glue code
589 * (ebx) saved 16-bit ss:sp (pointing to ebx+4)
591 * The 32-bit side glue code accesses both the original arguments (via ebp)
592 * and the temporary copies prepared by the 16-bit side glue (via ebx).
593 * After completion, the stub will load ss:sp from the buffer at ebx
594 * and perform a far return to 16-bit code.
596 * To trick the relay stub into returning to us, we replace the 16-bit
597 * return address to the glue code by a cs:ip pair pointing to our
598 * return entry point (the original return address is saved first).
599 * Our return stub thus called will then reload the 32-bit ss:esp and
600 * return to 32-bit code (by using and ss:esp value that we have also
601 * pushed onto the 16-bit stack before and a cs:eip values found at
602 * that position on the 32-bit stack). The ss:esp to be restored is
603 * found relative to the 16-bit stack pointer at:
605 * (ebx-4) ss (flat)
606 * (ebx-8) sp (32-bit stack pointer)
608 * The second variant of this routine, CALL32_CBClientEx, which is used
609 * to implement KERNEL.621, has to cope with yet another problem: Here,
610 * the 32-bit side directly returns to the caller of the CBClient thunklet,
611 * restoring registers saved by CBClientGlueSL and cleaning up the stack.
612 * As we have to return to our 32-bit code first, we have to adapt the
613 * layout of our temporary area so as to include values for the registers
614 * that are to be restored, and later (in the implementation of KERNEL.621)
615 * we *really* restore them. The return stub restores DS, DI, SI, and BP
616 * from the stack, skips the next 8 bytes (CBClient relay code / target),
617 * and then performs a lret NN, where NN is the number of arguments to be
618 * removed. Thus, we prepare our temporary area as follows:
620 * (ebx+22) 16-bit cs (this segment)
621 * (ebx+20) 16-bit ip ('16-bit' return entry point)
622 * (ebx+16) 32-bit ss (flat)
623 * (ebx+12) 32-bit sp (32-bit stack pointer)
624 * (ebx+10) 16-bit bp (points to ebx+24)
625 * (ebx+8) 16-bit si (ignored)
626 * (ebx+6) 16-bit di (ignored)
627 * (ebx+4) 16-bit ds (we actually use the flat DS here)
628 * (ebx+2) 16-bit ss (16-bit stack segment)
629 * (ebx+0) 16-bit sp (points to ebx+4)
631 * Note that we ensure that DS is not changed and remains the flat segment,
632 * and the 32-bit stack pointer our own return stub needs fits just
633 * perfectly into the 8 bytes that are skipped by the Windows stub.
634 * One problem is that we have to determine the number of removed arguments,
635 * as these have to be really removed in KERNEL.621. Thus, the BP value
636 * that we place in the temporary area to be restored, contains the value
637 * that SP would have if no arguments were removed. By comparing the actual
638 * value of SP with this value in our return stub we can compute the number
639 * of removed arguments. This is then returned to KERNEL.621.
641 * The stack layout of this function:
642 * (ebp+20) nArgs pointer to variable receiving nr. of args (Ex only)
643 * (ebp+16) esi pointer to caller's esi value
644 * (ebp+12) arg ebp value to be set for relay stub
645 * (ebp+8) func CBClient relay stub address
646 * (ebp+4) ret addr
647 * (ebp) ebp
649 static void BuildCallTo32CBClient( int isEx )
651 function_header( isEx ? "CALL32_CBClientEx" : "CALL32_CBClient" );
653 /* Entry code */
655 output( "\tpushl %%ebp\n" );
656 output( "\tmovl %%esp,%%ebp\n" );
657 output( "\tpushl %%edi\n" );
658 output( "\tpushl %%esi\n" );
659 output( "\tpushl %%ebx\n" );
661 /* Get pointer to temporary area and save the 32-bit stack pointer */
663 output( "\tmovl 16(%%ebp), %%ebx\n" );
664 output( "\tleal -8(%%esp), %%eax\n" );
666 if ( !isEx )
667 output( "\tmovl %%eax, -8(%%ebx)\n" );
668 else
669 output( "\tmovl %%eax, 12(%%ebx)\n" );
671 /* Set up registers and call CBClient relay stub (simulating a far call) */
673 output( "\tmovl 20(%%ebp), %%esi\n" );
674 output( "\tmovl (%%esi), %%esi\n" );
676 output( "\tmovl 8(%%ebp), %%eax\n" );
677 output( "\tmovl 12(%%ebp), %%ebp\n" );
679 output( "\tpushl %%cs\n" );
680 output( "\tcall *%%eax\n" );
682 /* Return new esi value to caller */
684 output( "\tmovl 32(%%esp), %%edi\n" );
685 output( "\tmovl %%esi, (%%edi)\n" );
687 /* Return argument size to caller */
688 if ( isEx )
690 output( "\tmovl 36(%%esp), %%ebx\n" );
691 output( "\tmovl %%ebp, (%%ebx)\n" );
694 /* Restore registers and return */
696 output( "\tpopl %%ebx\n" );
697 output( "\tpopl %%esi\n" );
698 output( "\tpopl %%edi\n" );
699 output( "\tpopl %%ebp\n" );
700 output( "\tret\n" );
701 output_function_size( isEx ? "CALL32_CBClientEx" : "CALL32_CBClient" );
703 /* '16-bit' return stub */
705 function_header( isEx ? "CALL32_CBClientEx_Ret" : "CALL32_CBClient_Ret" );
706 if ( !isEx )
708 output( "\tmovzwl %%sp, %%ebx\n" );
709 output( "\tlssl %%ss:-16(%%ebx), %%esp\n" );
711 else
713 output( "\tmovzwl %%bp, %%ebx\n" );
714 output( "\tsubw %%bp, %%sp\n" );
715 output( "\tmovzwl %%sp, %%ebp\n" );
716 output( "\tlssl %%ss:-12(%%ebx), %%esp\n" );
718 output( "\tlret\n" );
719 output_function_size( isEx ? "CALL32_CBClientEx_Ret" : "CALL32_CBClient_Ret" );
723 /*******************************************************************
724 * BuildCallFrom32Regs
726 * Build a 32-bit-to-Wine call-back function for a 'register' function.
727 * 'args' is the number of dword arguments.
729 * Stack layout:
730 * ...
731 * (ebp+20) first arg
732 * (ebp+16) ret addr to user code
733 * (ebp+12) func to call (relative to relay code ret addr)
734 * (ebp+8) number of args
735 * (ebp+4) ret addr to relay code
736 * (ebp+0) saved ebp
737 * (ebp-128) buffer area to allow stack frame manipulation
738 * (ebp-332) CONTEXT86 struct
739 * (ebp-336) padding for stack alignment
740 * (ebp-336-n) CONTEXT86 *argument
741 * .... other arguments copied from (ebp+12)
743 * The entry point routine is called with a CONTEXT* extra argument,
744 * following the normal args. In this context structure, EIP_reg
745 * contains the return address to user code, and ESP_reg the stack
746 * pointer on return (with the return address and arguments already
747 * removed).
749 static void BuildCallFrom32Regs(void)
751 static const int STACK_SPACE = 128 + 0x2cc /* sizeof(CONTEXT86) */;
753 /* Function header */
755 function_header( "__wine_call_from_32_regs" );
757 /* Allocate some buffer space on the stack */
759 output( "\tpushl %%ebp\n" );
760 output( "\tmovl %%esp,%%ebp\n ");
761 output( "\tleal -%d(%%esp),%%esp\n", STACK_SPACE );
763 /* Build the context structure */
765 output( "\tmovl %%eax,0xb0(%%esp)\n" ); /* Eax */
766 output( "\tpushfl\n" );
767 output( "\tpopl %%eax\n" );
768 output( "\tmovl %%eax,0xc0(%%esp)\n"); /* EFlags */
769 output( "\tmovl 0(%%ebp),%%eax\n" );
770 output( "\tmovl %%eax,0xb4(%%esp)\n"); /* Ebp */
771 output( "\tmovl %%ebx,0xa4(%%esp)\n"); /* Ebx */
772 output( "\tmovl %%ecx,0xac(%%esp)\n"); /* Ecx */
773 output( "\tmovl %%edx,0xa8(%%esp)\n"); /* Edx */
774 output( "\tmovl %%esi,0xa0(%%esp)\n"); /* Esi */
775 output( "\tmovl %%edi,0x9c(%%esp)\n"); /* Edi */
777 output( "\txorl %%eax,%%eax\n" );
778 output( "\tmovw %%cs,%%ax\n" );
779 output( "\tmovl %%eax,0xbc(%%esp)\n"); /* SegCs */
780 output( "\tmovw %%es,%%ax\n" );
781 output( "\tmovl %%eax,0x94(%%esp)\n"); /* SegEs */
782 output( "\tmovw %%fs,%%ax\n" );
783 output( "\tmovl %%eax,0x90(%%esp)\n"); /* SegFs */
784 output( "\tmovw %%gs,%%ax\n" );
785 output( "\tmovl %%eax,0x8c(%%esp)\n"); /* SegGs */
786 output( "\tmovw %%ss,%%ax\n" );
787 output( "\tmovl %%eax,0xc8(%%esp)\n"); /* SegSs */
788 output( "\tmovw %%ds,%%ax\n" );
789 output( "\tmovl %%eax,0x98(%%esp)\n"); /* SegDs */
790 output( "\tmovw %%ax,%%es\n" ); /* set %es equal to %ds just in case */
792 output( "\tmovl $0x10007,0(%%esp)\n"); /* ContextFlags */
794 output( "\tmovl 16(%%ebp),%%eax\n" ); /* Get %eip at time of call */
795 output( "\tmovl %%eax,0xb8(%%esp)\n"); /* Eip */
797 /* Transfer the arguments */
799 output( "\tmovl 8(%%ebp),%%ecx\n" ); /* fetch number of args to copy */
800 output( "\tleal 4(,%%ecx,4),%%edx\n" ); /* add 4 for context arg */
801 output( "\tsubl %%edx,%%esp\n" );
802 output( "\tandl $~15,%%esp\n" );
803 output( "\tleal 20(%%ebp),%%esi\n" ); /* get %esp at time of call */
804 output( "\tmovl %%esp,%%edi\n" );
805 output( "\ttest %%ecx,%%ecx\n" );
806 output( "\tjz 1f\n" );
807 output( "\tcld\n" );
808 output( "\trep\n\tmovsl\n" ); /* copy args */
809 output( "1:\tleal %d(%%ebp),%%eax\n", -STACK_SPACE ); /* get addr of context struct */
810 output( "\tmovl %%eax,(%%edi)\n" ); /* and pass it as extra arg */
811 output( "\tmovl %%esi,%d(%%ebp)\n", 0xc4 /* Esp */ - STACK_SPACE );
813 /* Call the entry point */
815 output( "\tmovl 4(%%ebp),%%eax\n" ); /* get relay code addr */
816 output( "\taddl 12(%%ebp),%%eax\n" );
817 output( "\tcall *%%eax\n" );
818 output( "\tleal -%d(%%ebp),%%ecx\n", STACK_SPACE );
820 /* Restore the context structure */
822 output( "2:\tpushl 0x94(%%ecx)\n"); /* SegEs */
823 output( "\tpopl %%es\n" );
824 output( "\tpushl 0x90(%%ecx)\n"); /* SegFs */
825 output( "\tpopl %%fs\n" );
826 output( "\tpushl 0x8c(%%ecx)\n"); /* SegGs */
827 output( "\tpopl %%gs\n" );
829 output( "\tmovl 0x9c(%%ecx),%%edi\n"); /* Edi */
830 output( "\tmovl 0xa0(%%ecx),%%esi\n"); /* Esi */
831 output( "\tmovl 0xa8(%%ecx),%%edx\n"); /* Edx */
832 output( "\tmovl 0xa4(%%ecx),%%ebx\n"); /* Ebx */
833 output( "\tmovl 0xb0(%%ecx),%%eax\n"); /* Eax */
834 output( "\tmovl 0xb4(%%ecx),%%ebp\n"); /* Ebp */
836 output( "\tpushl 0xc8(%%ecx)\n"); /* SegSs */
837 output( "\tpopl %%ss\n" );
838 output( "\tmovl 0xc4(%%ecx),%%esp\n"); /* Esp */
840 output( "\tpushl 0xc0(%%ecx)\n"); /* EFlags */
841 output( "\tpushl 0xbc(%%ecx)\n"); /* SegCs */
842 output( "\tpushl 0xb8(%%ecx)\n"); /* Eip */
843 output( "\tpushl 0x98(%%ecx)\n"); /* SegDs */
844 output( "\tmovl 0xac(%%ecx),%%ecx\n"); /* Ecx */
846 output( "\tpopl %%ds\n" );
847 output( "\tiret\n" );
848 output_function_size( "__wine_call_from_32_regs" );
850 function_header( "__wine_call_from_32_restore_regs" );
851 output( "\tmovl 4(%%esp),%%ecx\n" );
852 output( "\tjmp 2b\n" );
853 output_function_size( "__wine_call_from_32_restore_regs" );
857 /*******************************************************************
858 * BuildPendingEventCheck
860 * Build a function that checks whether there are any
861 * pending DPMI events.
863 * Stack layout:
865 * (sp+12) long eflags
866 * (sp+6) long cs
867 * (sp+2) long ip
868 * (sp) word fs
870 * On entry to function, fs register points to a valid TEB.
871 * On exit from function, stack will be popped.
873 static void BuildPendingEventCheck(void)
875 /* Function header */
877 function_header( "DPMI_PendingEventCheck" );
879 /* Check for pending events. */
881 output( "\t.byte 0x64\n\ttestl $0xffffffff,(%d)\n", VM86_PENDING_OFFSET );
882 output( "\tje %s\n", asm_name("DPMI_PendingEventCheck_Cleanup") );
883 output( "\t.byte 0x64\n\ttestl $0xffffffff,(%d)\n", DPMI_VIF_OFFSET );
884 output( "\tje %s\n", asm_name("DPMI_PendingEventCheck_Cleanup") );
886 /* Process pending events. */
888 output( "\tsti\n" );
890 /* Start cleanup. Restore fs register. */
892 output( "%s\n", asm_globl("DPMI_PendingEventCheck_Cleanup") );
893 output( "\tpopw %%fs\n" );
895 /* Return from function. */
897 output( "%s\n", asm_globl("DPMI_PendingEventCheck_Return") );
898 output( "\tiret\n" );
900 output_function_size( "DPMI_PendingEventCheck" );
904 /*******************************************************************
905 * BuildRelays16
907 * Build all the 16-bit relay callbacks
909 void BuildRelays16(void)
911 if (target_cpu != CPU_x86)
913 output( "/* File not used with this architecture. Do not edit! */\n\n" );
914 return;
917 /* File header */
919 output( "/* File generated automatically. Do not edit! */\n\n" );
920 output( "\t.text\n" );
922 output( "%s:\n\n", asm_name("__wine_spec_thunk_text_16") );
924 output( "%s\n", asm_globl("__wine_call16_start") );
926 /* Standard CallFrom16 routine */
927 BuildCallFrom16Core( 0, 0 );
929 /* Register CallFrom16 routine */
930 BuildCallFrom16Core( 1, 0 );
932 /* C16ThkSL CallFrom16 routine */
933 BuildCallFrom16Core( 0, 1 );
935 /* Standard CallTo16 routine */
936 BuildCallTo16Core( 0 );
938 /* Register CallTo16 routine */
939 BuildCallTo16Core( 1 );
941 /* Standard CallTo16 return stub */
942 BuildRet16Func();
944 /* CBClientThunkSL routine */
945 BuildCallTo32CBClient( 0 );
947 /* CBClientThunkSLEx routine */
948 BuildCallTo32CBClient( 1 );
950 /* Pending DPMI events check stub */
951 BuildPendingEventCheck();
953 output( "%s\n", asm_globl("__wine_call16_end") );
954 output_function_size( "__wine_spec_thunk_text_16" );
956 /* Declare the return address and data selector variables */
957 output( "\n\t.data\n\t.align %d\n", get_alignment(4) );
958 output( "%s\n\t.long 0\n", asm_globl("CallTo16_DataSelector") );
959 output( "%s\n\t.long 0\n", asm_globl("CallTo16_TebSelector") );
960 if (UsePIC) output( "wine_ldt_copy_ptr:\t.long %s\n", asm_name("wine_ldt_copy") );
962 output( "\t.text\n" );
963 output( "%s:\n\n", asm_name("__wine_spec_thunk_text_32") );
964 BuildCallFrom32Regs();
965 output_function_size( "__wine_spec_thunk_text_32" );
967 output_gnu_stack_note();
971 /*******************************************************************
972 * build_call_from_regs_x86_64
974 * Build the register saving code for a 'register' entry point.
976 * Stack layout:
977 * ...
978 * (rsp+16) first arg
979 * (rsp+8) ret addr to user code
980 * (rsp) ret addr to relay code
981 * (rsp-128) buffer area to allow stack frame manipulation
983 * Parameters:
984 * %rcx number of args
985 * %rdx entry point
987 static void build_call_from_regs_x86_64(void)
989 static const int STACK_SPACE = 128 + 0x4d0; /* size of x86_64 context */
991 /* Function header */
993 function_header( "__wine_call_from_regs" );
995 output( "\tsubq $%u,%%rsp\n", STACK_SPACE );
997 /* save registers into the context */
999 output( "\tmovq %%rax,0x78(%%rsp)\n" );
1000 output( "\tmovq %u(%%rsp),%%rax\n", STACK_SPACE + 16 ); /* saved %rcx on stack */
1001 output( "\tmovq %%rax,0x80(%%rsp)\n" );
1002 output( "\tmovq %u(%%rsp),%%rax\n", STACK_SPACE + 24 ); /* saved %rdx on stack */
1003 output( "\tmovq %%rax,0x88(%%rsp)\n" );
1004 output( "\tmovq %%rbx,0x90(%%rsp)\n" );
1005 output( "\tleaq %u(%%rsp),%%rax\n", STACK_SPACE + 16 );
1006 output( "\tmovq %%rax,0x98(%%rsp)\n" );
1007 output( "\tmovq %%rbp,0xa0(%%rsp)\n" );
1008 output( "\tmovq %%rsi,0xa8(%%rsp)\n" );
1009 output( "\tmovq %%rdi,0xb0(%%rsp)\n" );
1010 output( "\tmovq %%r8,0xb8(%%rsp)\n" );
1011 output( "\tmovq %%r9,0xc0(%%rsp)\n" );
1012 output( "\tmovq %%r10,0xc8(%%rsp)\n" );
1013 output( "\tmovq %%r11,0xd0(%%rsp)\n" );
1014 output( "\tmovq %%r12,0xd8(%%rsp)\n" );
1015 output( "\tmovq %%r13,0xe0(%%rsp)\n" );
1016 output( "\tmovq %%r14,0xe8(%%rsp)\n" );
1017 output( "\tmovq %%r15,0xf0(%%rsp)\n" );
1018 output( "\tmovq %u(%%rsp),%%rax\n", STACK_SPACE + 8 );
1019 output( "\tmovq %%rax,0xf8(%%rsp)\n" );
1021 output( "\tstmxcsr 0x34(%%rsp)\n" );
1022 output( "\tfxsave 0x100(%%rsp)\n" );
1023 output( "\tmovdqa %%xmm0,0x1a0(%%rsp)\n" );
1024 output( "\tmovdqa %%xmm1,0x1b0(%%rsp)\n" );
1025 output( "\tmovdqa %%xmm2,0x1c0(%%rsp)\n" );
1026 output( "\tmovdqa %%xmm3,0x1d0(%%rsp)\n" );
1027 output( "\tmovdqa %%xmm4,0x1e0(%%rsp)\n" );
1028 output( "\tmovdqa %%xmm5,0x1f0(%%rsp)\n" );
1029 output( "\tmovdqa %%xmm6,0x200(%%rsp)\n" );
1030 output( "\tmovdqa %%xmm7,0x210(%%rsp)\n" );
1031 output( "\tmovdqa %%xmm8,0x220(%%rsp)\n" );
1032 output( "\tmovdqa %%xmm9,0x230(%%rsp)\n" );
1033 output( "\tmovdqa %%xmm10,0x240(%%rsp)\n" );
1034 output( "\tmovdqa %%xmm11,0x250(%%rsp)\n" );
1035 output( "\tmovdqa %%xmm12,0x260(%%rsp)\n" );
1036 output( "\tmovdqa %%xmm13,0x270(%%rsp)\n" );
1037 output( "\tmovdqa %%xmm14,0x280(%%rsp)\n" );
1038 output( "\tmovdqa %%xmm15,0x290(%%rsp)\n" );
1040 output( "\tmovw %%cs,0x38(%%rsp)\n" );
1041 output( "\tmovw %%ds,0x3a(%%rsp)\n" );
1042 output( "\tmovw %%es,0x3c(%%rsp)\n" );
1043 output( "\tmovw %%fs,0x3e(%%rsp)\n" );
1044 output( "\tmovw %%gs,0x40(%%rsp)\n" );
1045 output( "\tmovw %%ss,0x42(%%rsp)\n" );
1046 output( "\tpushfq\n" );
1047 output( "\tpopq %%rax\n" );
1048 output( "\tmovl %%eax,0x44(%%rsp)\n" );
1050 output( "\tmovl $0x%x,0x30(%%rsp)\n", 0x0010000f );
1052 /* transfer the arguments */
1054 output( "\tmovq %%r8,%u(%%rsp)\n", STACK_SPACE + 32 );
1055 output( "\tmovq %%r9,%u(%%rsp)\n", STACK_SPACE + 40 );
1056 output( "\tmovq $4,%%rax\n" );
1057 output( "\tleaq %u(%%rsp),%%rsi\n", STACK_SPACE + 16 );
1058 output( "\tcmpq %%rax,%%rcx\n" );
1059 output( "\tcmovgq %%rcx,%%rax\n" );
1060 output( "\tmovq %%rsp,%%rbx\n" );
1061 output( "\tleaq 16(,%%rax,8),%%rax\n" ); /* add 8 for context arg and 8 for rounding */
1062 output( "\tandq $~15,%%rax\n" );
1063 output( "\tsubq %%rax,%%rsp\n" );
1064 output( "\tmovq %%rsp,%%rdi\n" );
1065 output( "\tjrcxz 1f\n" );
1066 output( "\tcld\n" );
1067 output( "\trep\n\tmovsq\n" );
1068 output( "1:\tmovq %%rbx,0(%%rdi)\n" ); /* context arg */
1070 /* call the entry point */
1072 output( "\tmovq %%rdx,%%rax\n" );
1073 output( "\tmovq 0(%%rsp),%%rcx\n" );
1074 output( "\tmovq 8(%%rsp),%%rdx\n" );
1075 output( "\tmovq 16(%%rsp),%%r8\n" );
1076 output( "\tmovq 24(%%rsp),%%r9\n" );
1077 output( "\tcallq *%%rax\n" );
1079 /* restore the context structure */
1081 output( "1:\tmovq 0x80(%%rbx),%%rcx\n" );
1082 output( "\tmovq 0x88(%%rbx),%%rdx\n" );
1083 output( "\tmovq 0xa0(%%rbx),%%rbp\n" );
1084 output( "\tmovq 0xa8(%%rbx),%%rsi\n" );
1085 output( "\tmovq 0xb0(%%rbx),%%rdi\n" );
1086 output( "\tmovq 0xb8(%%rbx),%%r8\n" );
1087 output( "\tmovq 0xc0(%%rbx),%%r9\n" );
1088 output( "\tmovq 0xc8(%%rbx),%%r10\n" );
1089 output( "\tmovq 0xd0(%%rbx),%%r11\n" );
1090 output( "\tmovq 0xd8(%%rbx),%%r12\n" );
1091 output( "\tmovq 0xe0(%%rbx),%%r13\n" );
1092 output( "\tmovq 0xe8(%%rbx),%%r14\n" );
1093 output( "\tmovq 0xf0(%%rbx),%%r15\n" );
1095 output( "\tmovdqa 0x1a0(%%rbx),%%xmm0\n" );
1096 output( "\tmovdqa 0x1b0(%%rbx),%%xmm1\n" );
1097 output( "\tmovdqa 0x1c0(%%rbx),%%xmm2\n" );
1098 output( "\tmovdqa 0x1d0(%%rbx),%%xmm3\n" );
1099 output( "\tmovdqa 0x1e0(%%rbx),%%xmm4\n" );
1100 output( "\tmovdqa 0x1f0(%%rbx),%%xmm5\n" );
1101 output( "\tmovdqa 0x200(%%rbx),%%xmm6\n" );
1102 output( "\tmovdqa 0x210(%%rbx),%%xmm7\n" );
1103 output( "\tmovdqa 0x220(%%rbx),%%xmm8\n" );
1104 output( "\tmovdqa 0x230(%%rbx),%%xmm9\n" );
1105 output( "\tmovdqa 0x240(%%rbx),%%xmm10\n" );
1106 output( "\tmovdqa 0x250(%%rbx),%%xmm11\n" );
1107 output( "\tmovdqa 0x260(%%rbx),%%xmm12\n" );
1108 output( "\tmovdqa 0x270(%%rbx),%%xmm13\n" );
1109 output( "\tmovdqa 0x280(%%rbx),%%xmm14\n" );
1110 output( "\tmovdqa 0x290(%%rbx),%%xmm15\n" );
1111 output( "\tfxrstor 0x100(%%rbx)\n" );
1112 output( "\tldmxcsr 0x34(%%rbx)\n" );
1114 output( "\tmovl 0x44(%%rbx),%%eax\n" );
1115 output( "\tpushq %%rax\n" );
1116 output( "\tpopfq\n" );
1118 output( "\tmovq 0x98(%%rbx),%%rax\n" ); /* stack pointer */
1119 output( "\tpushq 0xf8(%%rbx)\n" ); /* return address */
1120 output( "\tpopq -8(%%rax)\n" );
1121 output( "\tpushq 0x78(%%rbx)\n" ); /* rax */
1122 output( "\tpopq -16(%%rax)\n" );
1123 output( "\tmovq 0x90(%%rbx),%%rbx\n" );
1124 output( "\tleaq -16(%%rax),%%rsp\n" );
1125 output( "\tpopq %%rax\n" );
1126 output( "\tret\n" );
1128 output_function_size( "__wine_call_from_regs" );
1130 function_header( "__wine_restore_regs" );
1131 output( "\tmovq %%rcx,%%rbx\n" );
1132 output( "\tjmp 1b\n" );
1133 output_function_size( "__wine_restore_regs" );
1137 /*******************************************************************
1138 * BuildRelays32
1140 * Build all the 32-bit relay callbacks
1142 void BuildRelays32(void)
1144 switch (target_cpu)
1146 case CPU_x86:
1147 output( "/* File generated automatically. Do not edit! */\n\n" );
1148 output( "\t.text\n" );
1149 output( "%s:\n\n", asm_name("__wine_spec_thunk_text_32") );
1151 /* 32-bit register entry point */
1152 BuildCallFrom32Regs();
1154 output_function_size( "__wine_spec_thunk_text_32" );
1155 output_gnu_stack_note();
1156 break;
1158 case CPU_x86_64:
1159 output( "/* File generated automatically. Do not edit! */\n\n" );
1160 output( "\t.text\n" );
1161 build_call_from_regs_x86_64();
1162 output_gnu_stack_note();
1163 break;
1165 default:
1166 output( "/* File not used with this architecture. Do not edit! */\n\n" );
1167 return;