msvcrt: Import floorf implementation from musl.
[wine.git] / tools / winebuild / relay.c
blob0544aa6cbe9d52556172508df7f9c0365e3f868f
1 /*
2 * Relay calls helper routines
4 * Copyright 1993 Robert J. Amstadt
5 * Copyright 1995 Martin von Loewis
6 * Copyright 1995, 1996, 1997 Alexandre Julliard
7 * Copyright 1997 Eric Youngdale
8 * Copyright 1999 Ulrich Weigand
10 * This library is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with this library; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
25 #include "config.h"
26 #include "wine/port.h"
28 #include <ctype.h>
29 #include <stdarg.h>
31 #include "build.h"
33 /* offset of the stack pointer relative to %fs:(0) */
34 #define STACKOFFSET 0x10c /* FIELD_OFFSET(TEB,SystemReserved1) */
36 /* fix this if the x86_thread_data structure is changed */
37 #define GS_OFFSET 0x1d8 /* FIELD_OFFSET(TEB,SystemReserved2) + FIELD_OFFSET(struct x86_thread_data,gs) */
40 static void function_header( const char *name )
42 output( "\n\t.align %d\n", get_alignment(4) );
43 output( "\t%s\n", func_declaration(name) );
44 output( "%s\n", asm_globl(name) );
48 /*******************************************************************
49 * BuildCallFrom16Core
51 * This routine builds the core routines used in 16->32 thunks:
52 * CallFrom16Word, CallFrom16Long, CallFrom16Register, and CallFrom16Thunk.
54 * These routines are intended to be called via a far call (with 32-bit
55 * operand size) from 16-bit code. The 16-bit code stub must push %bp,
56 * the 32-bit entry point to be called, and the argument conversion
57 * routine to be used (see stack layout below).
59 * The core routine completes the STACK16FRAME on the 16-bit stack and
60 * switches to the 32-bit stack. Then, the argument conversion routine
61 * is called; it gets passed the 32-bit entry point and a pointer to the
62 * 16-bit arguments (on the 16-bit stack) as parameters. (You can either
63 * use conversion routines automatically generated by BuildCallFrom16,
64 * or write your own for special purposes.)
66 * The conversion routine must call the 32-bit entry point, passing it
67 * the converted arguments, and return its return value to the core.
68 * After the conversion routine has returned, the core switches back
69 * to the 16-bit stack, converts the return value to the DX:AX format
70 * (CallFrom16Long), and returns to the 16-bit call stub. All parameters,
71 * including %bp, are popped off the stack.
73 * The 16-bit call stub now returns to the caller, popping the 16-bit
74 * arguments if necessary (pascal calling convention).
76 * In the case of a 'register' function, CallFrom16Register fills a
77 * CONTEXT86 structure with the values all registers had at the point
78 * the first instruction of the 16-bit call stub was about to be
79 * executed. A pointer to this CONTEXT86 is passed as third parameter
80 * to the argument conversion routine, which typically passes it on
81 * to the called 32-bit entry point.
83 * CallFrom16Thunk is a special variant used by the implementation of
84 * the Win95 16->32 thunk functions C16ThkSL and C16ThkSL01 and is
85 * implemented as follows:
86 * On entry, the EBX register is set up to contain a flat pointer to the
87 * 16-bit stack such that EBX+22 points to the first argument.
88 * Then, the entry point is called, while EBP is set up to point
89 * to the return address (on the 32-bit stack).
90 * The called function returns with CX set to the number of bytes
91 * to be popped of the caller's stack.
93 * Stack layout upon entry to the core routine (STACK16FRAME):
94 * ... ...
95 * (sp+24) word first 16-bit arg
96 * (sp+22) word cs
97 * (sp+20) word ip
98 * (sp+18) word bp
99 * (sp+14) long 32-bit entry point (reused for Win16 mutex recursion count)
100 * (sp+12) word ip of actual entry point (necessary for relay debugging)
101 * (sp+8) long relay (argument conversion) function entry point
102 * (sp+4) long cs of 16-bit entry point
103 * (sp) long ip of 16-bit entry point
105 * Added on the stack:
106 * (sp-2) word saved gs
107 * (sp-4) word saved fs
108 * (sp-6) word saved es
109 * (sp-8) word saved ds
110 * (sp-12) long saved ebp
111 * (sp-16) long saved ecx
112 * (sp-20) long saved edx
113 * (sp-24) long saved previous stack
115 static void BuildCallFrom16Core( int reg_func, int thunk )
117 /* Function header */
118 if (thunk) function_header( "__wine_call_from_16_thunk" );
119 else if (reg_func) function_header( "__wine_call_from_16_regs" );
120 else function_header( "__wine_call_from_16" );
122 /* Create STACK16FRAME (except STACK32FRAME link) */
123 output( "\tpushw %%gs\n" );
124 output( "\tpushw %%fs\n" );
125 output( "\tpushw %%es\n" );
126 output( "\tpushw %%ds\n" );
127 output( "\tpushl %%ebp\n" );
128 output( "\tpushl %%ecx\n" );
129 output( "\tpushl %%edx\n" );
131 /* Save original EFlags register */
132 if (reg_func) output( "\tpushfl\n" );
134 if ( UsePIC )
136 output( "\tcall 1f\n" );
137 output( "1:\tpopl %%ecx\n" );
138 output( "\t.byte 0x2e\n\tmovl %s-1b(%%ecx),%%edx\n", asm_name("CallTo16_DataSelector") );
140 else
141 output( "\t.byte 0x2e\n\tmovl %s,%%edx\n", asm_name("CallTo16_DataSelector") );
143 /* Load 32-bit segment registers */
144 output( "\tmovw %%dx, %%ds\n" );
145 output( "\tmovw %%dx, %%es\n" );
147 if ( UsePIC )
148 output( "\tmovw %s-1b(%%ecx), %%fs\n", asm_name("CallTo16_TebSelector") );
149 else
150 output( "\tmovw %s, %%fs\n", asm_name("CallTo16_TebSelector") );
152 output( "\t.byte 0x64\n\tmov (%d),%%gs\n", GS_OFFSET );
154 /* Translate STACK16FRAME base to flat offset in %edx */
155 output( "\tmovw %%ss, %%dx\n" );
156 output( "\tandl $0xfff8, %%edx\n" );
157 output( "\tshrl $1, %%edx\n" );
158 if (UsePIC)
159 output( "\taddl .Lwine_ldt_copy_ptr-1b(%%ecx),%%edx\n" );
160 else
161 output( "\taddl .Lwine_ldt_copy_ptr,%%edx\n" );
162 output( "\tmovl (%%edx), %%edx\n" );
163 output( "\tmovzwl %%sp, %%ebp\n" );
164 output( "\tleal %d(%%ebp,%%edx), %%edx\n", reg_func ? 0 : -4 );
166 /* Get saved flags into %ecx */
167 if (reg_func) output( "\tpopl %%ecx\n" );
169 /* Get the 32-bit stack pointer from the TEB and complete STACK16FRAME */
170 output( "\t.byte 0x64\n\tmovl (%d), %%ebp\n", STACKOFFSET );
171 output( "\tpushl %%ebp\n" );
173 /* Switch stacks */
174 output( "\t.byte 0x64\n\tmovw %%ss, (%d)\n", STACKOFFSET + 2 );
175 output( "\t.byte 0x64\n\tmovw %%sp, (%d)\n", STACKOFFSET );
176 output( "\tpushl %%ds\n" );
177 output( "\tpopl %%ss\n" );
178 output( "\tmovl %%ebp, %%esp\n" );
179 output( "\taddl $0x20,%%ebp\n"); /* FIELD_OFFSET(STACK32FRAME,ebp) */
182 /* At this point:
183 STACK16FRAME is completely set up
184 DS, ES, SS: flat data segment
185 FS: current TEB
186 ESP: points to last STACK32FRAME
187 EBP: points to ebp member of last STACK32FRAME
188 EDX: points to current STACK16FRAME
189 ECX: contains saved flags
190 all other registers: unchanged */
192 /* Special case: C16ThkSL stub */
193 if ( thunk )
195 /* Set up registers as expected and call thunk */
196 output( "\tleal 0x1a(%%edx),%%ebx\n" ); /* sizeof(STACK16FRAME)-22 */
197 output( "\tleal -4(%%esp), %%ebp\n" );
199 output( "\tcall *0x26(%%edx)\n"); /* FIELD_OFFSET(STACK16FRAME,entry_point) */
201 /* Switch stack back */
202 output( "\t.byte 0x64\n\tmovw (%d), %%ss\n", STACKOFFSET+2 );
203 output( "\t.byte 0x64\n\tmovzwl (%d), %%esp\n", STACKOFFSET );
204 output( "\t.byte 0x64\n\tpopl (%d)\n", STACKOFFSET );
206 /* Restore registers and return directly to caller */
207 output( "\taddl $8, %%esp\n" );
208 output( "\tpopl %%ebp\n" );
209 output( "\tpopw %%ds\n" );
210 output( "\tpopw %%es\n" );
211 output( "\tpopw %%fs\n" );
212 output( "\tpopw %%gs\n" );
213 output( "\taddl $20, %%esp\n" );
215 output( "\txorb %%ch, %%ch\n" );
216 output( "\tpopl %%ebx\n" );
217 output( "\taddw %%cx, %%sp\n" );
218 output( "\tpush %%ebx\n" );
220 output( "\t.byte 0x66\n" );
221 output( "\tlret\n" );
223 output_function_size( "__wine_call_from_16_thunk" );
224 return;
228 /* Build register CONTEXT */
229 if ( reg_func )
231 output( "\tsubl $0x2cc,%%esp\n" ); /* sizeof(CONTEXT86) */
233 output( "\tmovl %%ecx,0xc0(%%esp)\n" ); /* EFlags */
235 output( "\tmovl %%eax,0xb0(%%esp)\n" ); /* Eax */
236 output( "\tmovl %%ebx,0xa4(%%esp)\n" ); /* Ebx */
237 output( "\tmovl %%esi,0xa0(%%esp)\n" ); /* Esi */
238 output( "\tmovl %%edi,0x9c(%%esp)\n" ); /* Edi */
240 output( "\tmovl 0x0c(%%edx),%%eax\n"); /* FIELD_OFFSET(STACK16FRAME,ebp) */
241 output( "\tmovl %%eax,0xb4(%%esp)\n" ); /* Ebp */
242 output( "\tmovl 0x08(%%edx),%%eax\n"); /* FIELD_OFFSET(STACK16FRAME,ecx) */
243 output( "\tmovl %%eax,0xac(%%esp)\n" ); /* Ecx */
244 output( "\tmovl 0x04(%%edx),%%eax\n"); /* FIELD_OFFSET(STACK16FRAME,edx) */
245 output( "\tmovl %%eax,0xa8(%%esp)\n" ); /* Edx */
247 output( "\tmovzwl 0x10(%%edx),%%eax\n"); /* FIELD_OFFSET(STACK16FRAME,ds) */
248 output( "\tmovl %%eax,0x98(%%esp)\n" ); /* SegDs */
249 output( "\tmovzwl 0x12(%%edx),%%eax\n"); /* FIELD_OFFSET(STACK16FRAME,es) */
250 output( "\tmovl %%eax,0x94(%%esp)\n" ); /* SegEs */
251 output( "\tmovzwl 0x14(%%edx),%%eax\n"); /* FIELD_OFFSET(STACK16FRAME,fs) */
252 output( "\tmovl %%eax,0x90(%%esp)\n" ); /* SegFs */
253 output( "\tmovzwl 0x16(%%edx),%%eax\n"); /* FIELD_OFFSET(STACK16FRAME,gs) */
254 output( "\tmovl %%eax,0x8c(%%esp)\n" ); /* SegGs */
256 output( "\tmovzwl 0x2e(%%edx),%%eax\n"); /* FIELD_OFFSET(STACK16FRAME,cs) */
257 output( "\tmovl %%eax,0xbc(%%esp)\n" ); /* SegCs */
258 output( "\tmovzwl 0x2c(%%edx),%%eax\n"); /* FIELD_OFFSET(STACK16FRAME,ip) */
259 output( "\tmovl %%eax,0xb8(%%esp)\n" ); /* Eip */
261 output( "\t.byte 0x64\n\tmovzwl (%d), %%eax\n", STACKOFFSET+2 );
262 output( "\tmovl %%eax,0xc8(%%esp)\n" ); /* SegSs */
263 output( "\t.byte 0x64\n\tmovzwl (%d), %%eax\n", STACKOFFSET );
264 output( "\taddl $0x2c,%%eax\n"); /* FIELD_OFFSET(STACK16FRAME,ip) */
265 output( "\tmovl %%eax,0xc4(%%esp)\n" ); /* Esp */
266 #if 0
267 output( "\tfsave 0x1c(%%esp)\n" ); /* FloatSave */
268 #endif
270 /* Push address of CONTEXT86 structure -- popped by the relay routine */
271 output( "\tmovl %%esp,%%eax\n" );
272 output( "\tandl $~15,%%esp\n" );
273 output( "\tsubl $4,%%esp\n" );
274 output( "\tpushl %%eax\n" );
276 else
278 output( "\tsubl $8,%%esp\n" );
279 output( "\tandl $~15,%%esp\n" );
280 output( "\taddl $8,%%esp\n" );
283 /* Call relay routine (which will call the API entry point) */
284 output( "\tleal 0x30(%%edx),%%eax\n" ); /* sizeof(STACK16FRAME) */
285 output( "\tpushl %%eax\n" );
286 output( "\tpushl 0x26(%%edx)\n"); /* FIELD_OFFSET(STACK16FRAME,entry_point) */
287 output( "\tcall *0x20(%%edx)\n"); /* FIELD_OFFSET(STACK16FRAME,relay) */
289 if ( reg_func )
291 output( "\tleal -748(%%ebp),%%ebx\n" ); /* sizeof(CONTEXT) + FIELD_OFFSET(STACK32FRAME,ebp) */
293 /* Switch stack back */
294 output( "\t.byte 0x64\n\tmovw (%d), %%ss\n", STACKOFFSET+2 );
295 output( "\t.byte 0x64\n\tmovzwl (%d), %%esp\n", STACKOFFSET );
296 output( "\t.byte 0x64\n\tpopl (%d)\n", STACKOFFSET );
298 /* Get return address to CallFrom16 stub */
299 output( "\taddw $0x14,%%sp\n" ); /* FIELD_OFFSET(STACK16FRAME,callfrom_ip)-4 */
300 output( "\tpopl %%eax\n" );
301 output( "\tpopl %%edx\n" );
303 /* Restore all registers from CONTEXT */
304 output( "\tmovw 0xc8(%%ebx),%%ss\n"); /* SegSs */
305 output( "\tmovl 0xc4(%%ebx),%%esp\n"); /* Esp */
306 output( "\taddl $4, %%esp\n" ); /* room for final return address */
308 output( "\tpushw 0xbc(%%ebx)\n"); /* SegCs */
309 output( "\tpushw 0xb8(%%ebx)\n"); /* Eip */
310 output( "\tpushl %%edx\n" );
311 output( "\tpushl %%eax\n" );
312 output( "\tpushl 0xc0(%%ebx)\n"); /* EFlags */
313 output( "\tpushl 0x98(%%ebx)\n"); /* SegDs */
315 output( "\tpushl 0x94(%%ebx)\n"); /* SegEs */
316 output( "\tpopl %%es\n" );
317 output( "\tpushl 0x90(%%ebx)\n"); /* SegFs */
318 output( "\tpopl %%fs\n" );
319 output( "\tpushl 0x8c(%%ebx)\n"); /* SegGs */
320 output( "\tpopl %%gs\n" );
322 output( "\tmovl 0xb4(%%ebx),%%ebp\n"); /* Ebp */
323 output( "\tmovl 0xa0(%%ebx),%%esi\n"); /* Esi */
324 output( "\tmovl 0x9c(%%ebx),%%edi\n"); /* Edi */
325 output( "\tmovl 0xb0(%%ebx),%%eax\n"); /* Eax */
326 output( "\tmovl 0xa8(%%ebx),%%edx\n"); /* Edx */
327 output( "\tmovl 0xac(%%ebx),%%ecx\n"); /* Ecx */
328 output( "\tmovl 0xa4(%%ebx),%%ebx\n"); /* Ebx */
330 output( "\tpopl %%ds\n" );
331 output( "\tpopfl\n" );
332 output( "\tlret\n" );
334 output_function_size( "__wine_call_from_16_regs" );
336 else
338 /* Switch stack back */
339 output( "\t.byte 0x64\n\tmovw (%d), %%ss\n", STACKOFFSET+2 );
340 output( "\t.byte 0x64\n\tmovzwl (%d), %%esp\n", STACKOFFSET );
341 output( "\t.byte 0x64\n\tpopl (%d)\n", STACKOFFSET );
343 /* Restore registers */
344 output( "\tpopl %%edx\n" );
345 output( "\tpopl %%ecx\n" );
346 output( "\tpopl %%ebp\n" );
347 output( "\tpopw %%ds\n" );
348 output( "\tpopw %%es\n" );
349 output( "\tpopw %%fs\n" );
350 output( "\tpopw %%gs\n" );
352 /* Return to return stub which will return to caller */
353 output( "\tlret $12\n" );
355 output_function_size( "__wine_call_from_16" );
360 /*******************************************************************
361 * BuildCallTo16Core
363 * This routine builds the core routines used in 32->16 thunks:
365 * extern DWORD WINAPI wine_call_to_16( FARPROC16 target, DWORD cbArgs, PEXCEPTION_HANDLER handler );
366 * extern void WINAPI wine_call_to_16_regs( CONTEXT86 *context, DWORD cbArgs, PEXCEPTION_HANDLER handler );
368 * These routines can be called directly from 32-bit code.
370 * All routines expect that the 16-bit stack contents (arguments) and the
371 * return address (segptr to CallTo16_Ret) were already set up by the
372 * caller; nb_args must contain the number of bytes to be conserved. The
373 * 16-bit SS:SP will be set accordingly.
375 * All other registers are either taken from the CONTEXT86 structure
376 * or else set to default values. The target routine address is either
377 * given directly or taken from the CONTEXT86.
379 static void BuildCallTo16Core( int reg_func )
381 const char *name = reg_func ? "wine_call_to_16_regs" : "wine_call_to_16";
382 const char *func_name = is_pe() ? strmake( "%s@12", name ) : name;
384 /* Function header */
385 function_header( func_name );
387 /* Function entry sequence */
388 output_cfi( ".cfi_startproc" );
389 output( "\tpushl %%ebp\n" );
390 output_cfi( ".cfi_adjust_cfa_offset 4" );
391 output_cfi( ".cfi_rel_offset %%ebp,0" );
392 output( "\tmovl %%esp, %%ebp\n" );
393 output_cfi( ".cfi_def_cfa_register %%ebp" );
395 /* Save the 32-bit registers */
396 output( "\tpushl %%ebx\n" );
397 output_cfi( ".cfi_rel_offset %%ebx,-4" );
398 output( "\tpushl %%esi\n" );
399 output_cfi( ".cfi_rel_offset %%esi,-8" );
400 output( "\tpushl %%edi\n" );
401 output_cfi( ".cfi_rel_offset %%edi,-12" );
402 output( "\t.byte 0x64\n\tmov %%gs,(%d)\n", GS_OFFSET );
404 /* Setup exception frame */
405 output( "\t.byte 0x64\n\tpushl (%d)\n", STACKOFFSET );
406 output( "\tpushl 16(%%ebp)\n" ); /* handler */
407 output( "\t.byte 0x64\n\tpushl (0)\n" );
408 output( "\t.byte 0x64\n\tmovl %%esp,(0)\n" );
410 /* Call the actual CallTo16 routine (simulate a lcall) */
411 output( "\tpushl %%cs\n" );
412 output( "\tcall .L%s\n", name );
414 /* Remove exception frame */
415 output( "\t.byte 0x64\n\tpopl (0)\n" );
416 output( "\taddl $4, %%esp\n" );
417 output( "\t.byte 0x64\n\tpopl (%d)\n", STACKOFFSET );
419 if ( !reg_func )
421 /* Convert return value */
422 output( "\tandl $0xffff,%%eax\n" );
423 output( "\tshll $16,%%edx\n" );
424 output( "\torl %%edx,%%eax\n" );
426 else
429 * Modify CONTEXT86 structure to contain new values
431 * NOTE: We restore only EAX, EBX, EDX, EDX, EBP, and ESP.
432 * The segment registers as well as ESI and EDI should
433 * not be modified by a well-behaved 16-bit routine in
434 * any case. [If necessary, we could restore them as well,
435 * at the cost of a somewhat less efficient return path.]
438 output( "\tmovl 0x14(%%esp),%%edi\n" ); /* FIELD_OFFSET(STACK32FRAME,target) - FIELD_OFFSET(STACK32FRAME,edi) */
439 /* everything above edi has been popped already */
441 output( "\tmovl %%eax,0xb0(%%edi)\n"); /* Eax */
442 output( "\tmovl %%ebx,0xa4(%%edi)\n"); /* Ebx */
443 output( "\tmovl %%ecx,0xac(%%edi)\n"); /* Ecx */
444 output( "\tmovl %%edx,0xa8(%%edi)\n"); /* Edx */
445 output( "\tmovl %%ebp,0xb4(%%edi)\n"); /* Ebp */
446 output( "\tmovl %%esi,0xc4(%%edi)\n"); /* Esp */
447 /* The return glue code saved %esp into %esi */
450 /* Restore the 32-bit registers */
451 output( "\tpopl %%edi\n" );
452 output_cfi( ".cfi_same_value %%edi" );
453 output( "\tpopl %%esi\n" );
454 output_cfi( ".cfi_same_value %%esi" );
455 output( "\tpopl %%ebx\n" );
456 output_cfi( ".cfi_same_value %%ebx" );
458 /* Function exit sequence */
459 output( "\tpopl %%ebp\n" );
460 output_cfi( ".cfi_def_cfa %%esp,4" );
461 output_cfi( ".cfi_same_value %%ebp" );
462 output( "\tret $12\n" );
463 output_cfi( ".cfi_endproc" );
466 /* Start of the actual CallTo16 routine */
468 output( ".L%s:\n", name );
470 /* Switch to the 16-bit stack */
471 output( "\tmovl %%esp,%%edx\n" );
472 output( "\t.byte 0x64\n\tmovw (%d),%%ss\n", STACKOFFSET + 2);
473 output( "\t.byte 0x64\n\tmovw (%d),%%sp\n", STACKOFFSET );
474 output( "\t.byte 0x64\n\tmovl %%edx,(%d)\n", STACKOFFSET );
476 /* Make %bp point to the previous stackframe (built by CallFrom16) */
477 output( "\tmovzwl %%sp,%%ebp\n" );
478 output( "\tleal 0x2a(%%ebp),%%ebp\n"); /* FIELD_OFFSET(STACK16FRAME,bp) */
480 /* Add the specified offset to the new sp */
481 output( "\tsubw 0x2c(%%edx), %%sp\n"); /* FIELD_OFFSET(STACK32FRAME,nb_args) */
483 if (reg_func)
485 /* Push the called routine address */
486 output( "\tmovl 0x28(%%edx),%%edx\n"); /* FIELD_OFFSET(STACK32FRAME,target) */
487 output( "\tpushw 0xbc(%%edx)\n"); /* SegCs */
488 output( "\tpushw 0xb8(%%edx)\n"); /* Eip */
490 /* Get the registers */
491 output( "\tpushw 0x98(%%edx)\n"); /* SegDs */
492 output( "\tpushl 0x94(%%edx)\n"); /* SegEs */
493 output( "\tpopl %%es\n" );
494 output( "\tmovl 0xb4(%%edx),%%ebp\n"); /* Ebp */
495 output( "\tmovl 0xa0(%%edx),%%esi\n"); /* Esi */
496 output( "\tmovl 0x9c(%%edx),%%edi\n"); /* Edi */
497 output( "\tmovl 0xb0(%%edx),%%eax\n"); /* Eax */
498 output( "\tmovl 0xa4(%%edx),%%ebx\n"); /* Ebx */
499 output( "\tmovl 0xac(%%edx),%%ecx\n"); /* Ecx */
500 output( "\tmovl 0xa8(%%edx),%%edx\n"); /* Edx */
502 /* Get the 16-bit ds */
503 output( "\tpopw %%ds\n" );
505 else /* not a register function */
507 /* Push the called routine address */
508 output( "\tpushl 0x28(%%edx)\n"); /* FIELD_OFFSET(STACK32FRAME,target) */
510 /* Set %fs and %gs to the value saved by the last CallFrom16 */
511 output( "\tpushw -22(%%ebp)\n" ); /* FIELD_OFFSET(STACK16FRAME,fs)-FIELD_OFFSET(STACK16FRAME,bp) */
512 output( "\tpopw %%fs\n" );
513 output( "\tpushw -20(%%ebp)\n" ); /* FIELD_OFFSET(STACK16FRAME,gs)-FIELD_OFFSET(STACK16FRAME,bp) */
514 output( "\tpopw %%gs\n" );
516 /* Set %ds and %es (and %ax just in case) equal to %ss */
517 output( "\tmovw %%ss,%%ax\n" );
518 output( "\tmovw %%ax,%%ds\n" );
519 output( "\tmovw %%ax,%%es\n" );
522 /* Jump to the called routine */
523 output( "\t.byte 0x66\n" );
524 output( "\tlret\n" );
526 /* Function footer */
527 output_function_size( func_name );
531 /*******************************************************************
532 * BuildRet16Func
534 * Build the return code for 16-bit callbacks
536 static void BuildRet16Func(void)
538 function_header( "__wine_call_to_16_ret" );
540 /* Save %esp into %esi */
541 output( "\tmovl %%esp,%%esi\n" );
543 /* Restore 32-bit segment registers */
545 output( "\t.byte 0x2e\n\tmovl %s", asm_name("CallTo16_DataSelector") );
546 output( "-%s,%%edi\n", asm_name("__wine_call16_start") );
547 output( "\tmovw %%di,%%ds\n" );
548 output( "\tmovw %%di,%%es\n" );
550 output( "\t.byte 0x2e\n\tmov %s", asm_name("CallTo16_TebSelector") );
551 output( "-%s,%%fs\n", asm_name("__wine_call16_start") );
553 output( "\t.byte 0x64\n\tmov (%d),%%gs\n", GS_OFFSET );
555 /* Restore the 32-bit stack */
557 output( "\tmovw %%di,%%ss\n" );
558 output( "\t.byte 0x64\n\tmovl (%d),%%esp\n", STACKOFFSET );
560 /* Return to caller */
562 output( "\tlret\n" );
563 output_function_size( "__wine_call_to_16_ret" );
567 /*******************************************************************
568 * BuildCallTo32CBClient
570 * Call a CBClient relay stub from 32-bit code (KERNEL.620).
572 * Since the relay stub is itself 32-bit, this should not be a problem;
573 * unfortunately, the relay stubs are expected to switch back to a
574 * 16-bit stack (and 16-bit code) after completion :-(
576 * This would conflict with our 16- vs. 32-bit stack handling, so
577 * we simply switch *back* to our 32-bit stack before returning to
578 * the caller ...
580 * The CBClient relay stub expects to be called with the following
581 * 16-bit stack layout, and with ebp and ebx pointing into the 16-bit
582 * stack at the designated places:
584 * ...
585 * (ebp+14) original arguments to the callback routine
586 * (ebp+10) far return address to original caller
587 * (ebp+6) Thunklet target address
588 * (ebp+2) Thunklet relay ID code
589 * (ebp) BP (saved by CBClientGlueSL)
590 * (ebp-2) SI (saved by CBClientGlueSL)
591 * (ebp-4) DI (saved by CBClientGlueSL)
592 * (ebp-6) DS (saved by CBClientGlueSL)
594 * ... buffer space used by the 16-bit side glue for temp copies
596 * (ebx+4) far return address to 16-bit side glue code
597 * (ebx) saved 16-bit ss:sp (pointing to ebx+4)
599 * The 32-bit side glue code accesses both the original arguments (via ebp)
600 * and the temporary copies prepared by the 16-bit side glue (via ebx).
601 * After completion, the stub will load ss:sp from the buffer at ebx
602 * and perform a far return to 16-bit code.
604 * To trick the relay stub into returning to us, we replace the 16-bit
605 * return address to the glue code by a cs:ip pair pointing to our
606 * return entry point (the original return address is saved first).
607 * Our return stub thus called will then reload the 32-bit ss:esp and
608 * return to 32-bit code (by using and ss:esp value that we have also
609 * pushed onto the 16-bit stack before and a cs:eip values found at
610 * that position on the 32-bit stack). The ss:esp to be restored is
611 * found relative to the 16-bit stack pointer at:
613 * (ebx-4) ss (flat)
614 * (ebx-8) sp (32-bit stack pointer)
616 * The second variant of this routine, CALL32_CBClientEx, which is used
617 * to implement KERNEL.621, has to cope with yet another problem: Here,
618 * the 32-bit side directly returns to the caller of the CBClient thunklet,
619 * restoring registers saved by CBClientGlueSL and cleaning up the stack.
620 * As we have to return to our 32-bit code first, we have to adapt the
621 * layout of our temporary area so as to include values for the registers
622 * that are to be restored, and later (in the implementation of KERNEL.621)
623 * we *really* restore them. The return stub restores DS, DI, SI, and BP
624 * from the stack, skips the next 8 bytes (CBClient relay code / target),
625 * and then performs a lret NN, where NN is the number of arguments to be
626 * removed. Thus, we prepare our temporary area as follows:
628 * (ebx+22) 16-bit cs (this segment)
629 * (ebx+20) 16-bit ip ('16-bit' return entry point)
630 * (ebx+16) 32-bit ss (flat)
631 * (ebx+12) 32-bit sp (32-bit stack pointer)
632 * (ebx+10) 16-bit bp (points to ebx+24)
633 * (ebx+8) 16-bit si (ignored)
634 * (ebx+6) 16-bit di (ignored)
635 * (ebx+4) 16-bit ds (we actually use the flat DS here)
636 * (ebx+2) 16-bit ss (16-bit stack segment)
637 * (ebx+0) 16-bit sp (points to ebx+4)
639 * Note that we ensure that DS is not changed and remains the flat segment,
640 * and the 32-bit stack pointer our own return stub needs fits just
641 * perfectly into the 8 bytes that are skipped by the Windows stub.
642 * One problem is that we have to determine the number of removed arguments,
643 * as these have to be really removed in KERNEL.621. Thus, the BP value
644 * that we place in the temporary area to be restored, contains the value
645 * that SP would have if no arguments were removed. By comparing the actual
646 * value of SP with this value in our return stub we can compute the number
647 * of removed arguments. This is then returned to KERNEL.621.
649 * The stack layout of this function:
650 * (ebp+20) nArgs pointer to variable receiving nr. of args (Ex only)
651 * (ebp+16) esi pointer to caller's esi value
652 * (ebp+12) arg ebp value to be set for relay stub
653 * (ebp+8) func CBClient relay stub address
654 * (ebp+4) ret addr
655 * (ebp) ebp
657 static void BuildCallTo32CBClient( int isEx )
659 function_header( isEx ? "CALL32_CBClientEx" : "CALL32_CBClient" );
661 /* Entry code */
663 output_cfi( ".cfi_startproc" );
664 output( "\tpushl %%ebp\n" );
665 output_cfi( ".cfi_adjust_cfa_offset 4" );
666 output_cfi( ".cfi_rel_offset %%ebp,0" );
667 output( "\tmovl %%esp,%%ebp\n" );
668 output_cfi( ".cfi_def_cfa_register %%ebp" );
669 output( "\tpushl %%edi\n" );
670 output_cfi( ".cfi_rel_offset %%edi,-4" );
671 output( "\tpushl %%esi\n" );
672 output_cfi( ".cfi_rel_offset %%esi,-8" );
673 output( "\tpushl %%ebx\n" );
674 output_cfi( ".cfi_rel_offset %%ebx,-12" );
676 /* Get pointer to temporary area and save the 32-bit stack pointer */
678 output( "\tmovl 16(%%ebp), %%ebx\n" );
679 output( "\tleal -8(%%esp), %%eax\n" );
681 if ( !isEx )
682 output( "\tmovl %%eax, -8(%%ebx)\n" );
683 else
684 output( "\tmovl %%eax, 12(%%ebx)\n" );
686 /* Set up registers and call CBClient relay stub (simulating a far call) */
688 output( "\tmovl 20(%%ebp), %%esi\n" );
689 output( "\tmovl (%%esi), %%esi\n" );
691 output( "\tmovl 8(%%ebp), %%eax\n" );
692 output( "\tmovl 12(%%ebp), %%ebp\n" );
694 output( "\tpushl %%cs\n" );
695 output( "\tcall *%%eax\n" );
697 /* Return new esi value to caller */
699 output( "\tmovl 32(%%esp), %%edi\n" );
700 output( "\tmovl %%esi, (%%edi)\n" );
702 /* Return argument size to caller */
703 if ( isEx )
705 output( "\tmovl 36(%%esp), %%ebx\n" );
706 output( "\tmovl %%ebp, (%%ebx)\n" );
709 /* Restore registers and return */
711 output( "\tpopl %%ebx\n" );
712 output_cfi( ".cfi_same_value %%ebx" );
713 output( "\tpopl %%esi\n" );
714 output_cfi( ".cfi_same_value %%esi" );
715 output( "\tpopl %%edi\n" );
716 output_cfi( ".cfi_same_value %%edi" );
717 output( "\tpopl %%ebp\n" );
718 output_cfi( ".cfi_def_cfa %%esp,4" );
719 output_cfi( ".cfi_same_value %%ebp" );
720 output( "\tret\n" );
721 output_cfi( ".cfi_endproc" );
722 output_function_size( isEx ? "CALL32_CBClientEx" : "CALL32_CBClient" );
724 /* '16-bit' return stub */
726 function_header( isEx ? "CALL32_CBClientEx_Ret" : "CALL32_CBClient_Ret" );
727 if ( !isEx )
729 output( "\tmovzwl %%sp, %%ebx\n" );
730 output( "\tlssl %%ss:-16(%%ebx), %%esp\n" );
732 else
734 output( "\tmovzwl %%bp, %%ebx\n" );
735 output( "\tsubw %%bp, %%sp\n" );
736 output( "\tmovzwl %%sp, %%ebp\n" );
737 output( "\tlssl %%ss:-12(%%ebx), %%esp\n" );
739 output( "\tlret\n" );
740 output_function_size( isEx ? "CALL32_CBClientEx_Ret" : "CALL32_CBClient_Ret" );
744 /*******************************************************************
745 * output_asm_relays16
747 * Build all the 16-bit relay callbacks
749 void output_asm_relays16(void)
751 /* File header */
753 output( "\t.text\n" );
754 output( "%s:\n\n", asm_name("__wine_spec_thunk_text_16") );
756 output( "%s\n", asm_globl("__wine_call16_start") );
758 /* Standard CallFrom16 routine */
759 BuildCallFrom16Core( 0, 0 );
761 /* Register CallFrom16 routine */
762 BuildCallFrom16Core( 1, 0 );
764 /* C16ThkSL CallFrom16 routine */
765 BuildCallFrom16Core( 0, 1 );
767 /* Standard CallTo16 routine */
768 BuildCallTo16Core( 0 );
770 /* Register CallTo16 routine */
771 BuildCallTo16Core( 1 );
773 /* Standard CallTo16 return stub */
774 BuildRet16Func();
776 /* CBClientThunkSL routine */
777 BuildCallTo32CBClient( 0 );
779 /* CBClientThunkSLEx routine */
780 BuildCallTo32CBClient( 1 );
782 output( "%s\n", asm_globl("__wine_call16_end") );
783 output_function_size( "__wine_spec_thunk_text_16" );
785 /* Declare the return address and data selector variables */
786 output( "\n\t.data\n\t.align %d\n", get_alignment(4) );
787 output( "%s\n\t.long 0\n", asm_globl("CallTo16_DataSelector") );
788 output( "%s\n\t.long 0\n", asm_globl("CallTo16_TebSelector") );