PPC JIT optimizations (System.Math instruction inlining) (#11964)
[mono-project.git] / mono / mini / tramp-arm64-gsharedvt.c
blobbe1adf646d2a1ece84140cd10827e23bf1c9b820
1 /**
2 * \file
3 * gsharedvt support code for arm64
5 * Authors:
6 * Zoltan Varga <vargaz@gmail.com>
8 * Copyright 2013 Xamarin, Inc (http://www.xamarin.com)
9 * Licensed under the MIT license. See LICENSE file in the project root for full license information.
11 #include <mono/metadata/abi-details.h>
13 #include "mini.h"
14 #include "mini-arm64.h"
15 #include "mini-arm64-gsharedvt.h"
18 * GSHAREDVT
20 #ifdef MONO_ARCH_GSHAREDVT_SUPPORTED
23 * mono_arch_get_gsharedvt_arg_trampoline:
25 * See tramp-x86.c for documentation.
27 gpointer
28 mono_arch_get_gsharedvt_arg_trampoline (MonoDomain *domain, gpointer arg, gpointer addr)
30 guint8 *code, *buf;
31 int buf_len = 40;
34 * Return a trampoline which calls ADDR passing in ARG.
35 * Pass the argument in ip1, clobbering ip0.
37 buf = code = mono_global_codeman_reserve (buf_len);
39 code = mono_arm_emit_imm64 (code, ARMREG_IP1, (guint64)arg);
40 code = mono_arm_emit_imm64 (code, ARMREG_IP0, (guint64)addr);
42 arm_brx (code, ARMREG_IP0);
44 g_assert ((code - buf) < buf_len);
45 mono_arch_flush_icache (buf, code - buf);
47 return buf;
50 gpointer
51 mono_arm_start_gsharedvt_call (GSharedVtCallInfo *info, gpointer *caller, gpointer *callee, gpointer mrgctx_reg)
53 int i;
55 /* Set vtype ret arg */
56 if (info->vret_slot != -1) {
57 g_assert (info->vret_slot);
58 callee [info->vret_arg_reg] = &callee [info->vret_slot];
61 for (i = 0; i < info->map_count; ++i) {
62 int src = info->map [i * 2];
63 int dst = info->map [(i * 2) + 1];
64 int arg_marshal = (src >> 18) & 0xf;
65 int arg_size = (src >> 22) & 0xf;
67 if (G_UNLIKELY (arg_size)) {
68 int src_offset = (src >> 26) & 0xf;
69 int dst_offset = (dst >> 26) & 0xf;
70 int src_slot, dst_slot;
71 guint8 *src_ptr, *dst_ptr;
74 * Argument passed in part of a stack slot on ios.
75 * src_offset/dst_offset is the offset within the stack slot.
77 switch (arg_marshal) {
78 case GSHAREDVT_ARG_NONE:
79 src_slot = src & 0xffff;
80 dst_slot = dst & 0xffff;
81 src_ptr = (guint8*)(caller + src_slot) + src_offset;
82 dst_ptr = (guint8*)(callee + dst_slot) + dst_offset;
83 break;
84 case GSHAREDVT_ARG_BYREF_TO_BYVAL:
85 src_slot = src & 0x3f;
86 dst_slot = dst & 0xffff;
87 src_ptr = (guint8*)caller [src_slot];
88 dst_ptr = (guint8*)(callee + dst_slot) + dst_offset;
89 break;
90 case GSHAREDVT_ARG_BYVAL_TO_BYREF_HFAR4:
91 case GSHAREDVT_ARG_BYREF_TO_BYVAL_HFAR4:
92 case GSHAREDVT_ARG_BYREF_TO_BYREF:
93 g_assert_not_reached ();
94 break;
95 case GSHAREDVT_ARG_BYVAL_TO_BYREF:
96 src_slot = src & 0x3f;
97 src_ptr = (guint8*)(caller + src_slot) + src_offset;
98 callee [dst] = src_ptr;
99 break;
100 default:
101 NOT_IMPLEMENTED;
102 break;
105 if (arg_marshal == GSHAREDVT_ARG_BYVAL_TO_BYREF)
106 continue;
108 switch (arg_size) {
109 case GSHAREDVT_ARG_SIZE_I1:
110 *(gint8*)dst_ptr = *(gint8*)src_ptr;
111 break;
112 case GSHAREDVT_ARG_SIZE_U1:
113 *(guint8*)dst_ptr = *(guint8*)src_ptr;
114 break;
115 case GSHAREDVT_ARG_SIZE_I2:
116 *(gint16*)dst_ptr = *(gint16*)src_ptr;
117 break;
118 case GSHAREDVT_ARG_SIZE_U2:
119 *(guint16*)dst_ptr = *(guint16*)src_ptr;
120 break;
121 case GSHAREDVT_ARG_SIZE_I4:
122 *(gint32*)dst_ptr = *(gint32*)src_ptr;
123 break;
124 case GSHAREDVT_ARG_SIZE_U4:
125 *(guint32*)dst_ptr = *(guint32*)src_ptr;
126 break;
127 default:
128 g_assert_not_reached ();
130 continue;
133 switch (arg_marshal) {
134 case GSHAREDVT_ARG_NONE:
135 callee [dst] = caller [src];
136 break;
137 case GSHAREDVT_ARG_BYVAL_TO_BYREF:
138 /* gsharedvt argument passed by addr in reg/stack slot */
139 src = src & 0x3f;
140 callee [dst] = caller + src;
141 break;
142 case GSHAREDVT_ARG_BYVAL_TO_BYREF_HFAR4: {
143 int nslots = (src >> 6) & 0xff;
144 int src_slot = src & 0x3f;
145 int j;
146 float *dst_arr = (float*)(caller + src_slot);
148 /* The r4 hfa is in separate slots, need to compress them together in place */
149 for (j = 0; j < nslots; ++j)
150 dst_arr [j] = *(float*)(caller + src_slot + j);
152 callee [dst] = caller + src_slot;
153 break;
155 case GSHAREDVT_ARG_BYREF_TO_BYVAL: {
156 int nslots = (src >> 6) & 0xff;
157 int src_slot = src & 0x3f;
158 int j;
159 gpointer *addr = (gpointer*)caller [src_slot];
161 for (j = 0; j < nslots; ++j)
162 callee [dst + j] = addr [j];
163 break;
165 case GSHAREDVT_ARG_BYREF_TO_BYVAL_HFAR4: {
166 int nslots = (src >> 6) & 0xff;
167 int src_slot = src & 0x3f;
168 int j;
169 guint32 *addr = (guint32*)(caller [src_slot]);
171 /* addr points to an array of floats, need to load them to registers */
172 for (j = 0; j < nslots; ++j)
173 callee [dst + j] = GUINT_TO_POINTER (addr [j]);
174 break;
176 case GSHAREDVT_ARG_BYREF_TO_BYREF: {
177 int src_slot = src & 0x3f;
179 callee [dst] = caller [src_slot];
180 break;
182 default:
183 g_assert_not_reached ();
184 break;
188 if (info->vcall_offset != -1) {
189 MonoObject *this_obj = (MonoObject*)caller [0];
191 if (G_UNLIKELY (!this_obj))
192 return NULL;
193 if (info->vcall_offset == MONO_GSHAREDVT_DEL_INVOKE_VT_OFFSET)
194 /* delegate invoke */
195 return ((MonoDelegate*)this_obj)->invoke_impl;
196 else
197 return *(gpointer*)((char*)this_obj->vtable + info->vcall_offset);
198 } else if (info->calli) {
199 /* The address to call is passed in the mrgctx reg */
200 return mrgctx_reg;
201 } else {
202 return info->addr;
206 #ifndef DISABLE_JIT
208 gpointer
209 mono_arch_get_gsharedvt_trampoline (MonoTrampInfo **info, gboolean aot)
211 guint8 *code, *buf;
212 int buf_len, cfa_offset;
213 GSList *unwind_ops = NULL;
214 MonoJumpInfo *ji = NULL;
215 guint8 *br_out, *br [64], *br_ret [64], *bcc_ret [64];
216 int i, n_arg_regs, n_arg_fregs, offset, arg_reg, info_offset, rgctx_arg_reg_offset;
217 int caller_reg_area_offset, callee_reg_area_offset, callee_stack_area_offset;
218 int br_ret_index, bcc_ret_index;
220 buf_len = 2048;
221 buf = code = mono_global_codeman_reserve (buf_len);
224 * We are being called by an gsharedvt arg trampoline, the info argument is in IP1.
226 arg_reg = ARMREG_IP1;
227 n_arg_regs = NUM_GSHAREDVT_ARG_GREGS;
228 n_arg_fregs = NUM_GSHAREDVT_ARG_FREGS;
230 /* Compute stack frame size and offsets */
231 offset = 0;
232 /* frame block */
233 offset += 2 * 8;
234 /* info argument */
235 info_offset = offset;
236 offset += 8;
237 /* saved rgctx */
238 rgctx_arg_reg_offset = offset;
239 offset += 8;
240 /* alignment */
241 offset += 8;
242 /* argument regs */
243 caller_reg_area_offset = offset;
244 offset += (n_arg_regs + n_arg_fregs) * 8;
246 /* We need the argument regs to be saved at the top of the frame */
247 g_assert (offset % MONO_ARCH_FRAME_ALIGNMENT == 0);
249 cfa_offset = offset;
251 /* Setup frame */
252 arm_stpx_pre (code, ARMREG_FP, ARMREG_LR, ARMREG_SP, -cfa_offset);
253 mono_add_unwind_op_def_cfa (unwind_ops, code, buf, ARMREG_SP, cfa_offset);
254 mono_add_unwind_op_offset (unwind_ops, code, buf, ARMREG_FP, -cfa_offset + 0);
255 mono_add_unwind_op_offset (unwind_ops, code, buf, ARMREG_LR, -cfa_offset + 8);
256 arm_movspx (code, ARMREG_FP, ARMREG_SP);
257 mono_add_unwind_op_def_cfa_reg (unwind_ops, code, buf, ARMREG_FP);
259 /* Save info argument */
260 arm_strx (code, arg_reg, ARMREG_FP, info_offset);
262 /* Save rgxctx */
263 arm_strx (code, MONO_ARCH_RGCTX_REG, ARMREG_FP, rgctx_arg_reg_offset);
265 /* Save argument regs below the stack arguments */
266 for (i = 0; i < n_arg_regs; ++i)
267 arm_strx (code, i, ARMREG_SP, caller_reg_area_offset + (i * 8));
268 // FIXME: Only do this if fp regs are used
269 for (i = 0; i < n_arg_fregs; ++i)
270 arm_strfpx (code, i, ARMREG_SP, caller_reg_area_offset + ((n_arg_regs + i) * 8));
272 /* Allocate callee area */
273 arm_ldrw (code, ARMREG_IP0, arg_reg, MONO_STRUCT_OFFSET (GSharedVtCallInfo, stack_usage));
274 arm_movspx (code, ARMREG_LR, ARMREG_SP);
275 arm_subx (code, ARMREG_LR, ARMREG_LR, ARMREG_IP0);
276 arm_movspx (code, ARMREG_SP, ARMREG_LR);
277 /* Allocate callee register area just below the callee area so it can be accessed from start_gsharedvt_call using negative offsets */
278 /* The + 8 is for alignment */
279 callee_reg_area_offset = 8;
280 callee_stack_area_offset = callee_reg_area_offset + (n_arg_regs * sizeof (target_mgreg_t));
281 arm_subx_imm (code, ARMREG_SP, ARMREG_SP, ((n_arg_regs + n_arg_fregs) * sizeof (target_mgreg_t)) + 8);
284 * The stack now looks like this:
285 * <caller frame>
286 * <saved r0-r8>
287 * <our frame>
288 * <saved fp, lr> <- fp
289 * <callee area> <- sp
292 /* Call start_gsharedvt_call () */
293 /* arg1 == info */
294 arm_ldrx (code, ARMREG_R0, ARMREG_FP, info_offset);
295 /* arg2 = caller stack area */
296 arm_addx_imm (code, ARMREG_R1, ARMREG_FP, caller_reg_area_offset);
297 /* arg3 == callee stack area */
298 arm_addx_imm (code, ARMREG_R2, ARMREG_SP, callee_reg_area_offset);
299 /* arg4 = mrgctx reg */
300 arm_ldrx (code, ARMREG_R3, ARMREG_FP, rgctx_arg_reg_offset);
302 if (aot)
303 code = mono_arm_emit_aotconst (&ji, code, buf, ARMREG_IP0, MONO_PATCH_INFO_JIT_ICALL_ADDR, "mono_arm_start_gsharedvt_call");
304 else
305 code = mono_arm_emit_imm64 (code, ARMREG_IP0, (guint64)mono_arm_start_gsharedvt_call);
306 arm_blrx (code, ARMREG_IP0);
308 /* Make the real method call */
309 /* R0 contains the addr to call */
310 arm_movx (code, ARMREG_IP1, ARMREG_R0);
311 /* Load rgxctx */
312 arm_ldrx (code, MONO_ARCH_RGCTX_REG, ARMREG_FP, rgctx_arg_reg_offset);
313 /* Load argument registers */
314 // FIXME:
315 for (i = 0; i < n_arg_regs; ++i)
316 arm_ldrx (code, i, ARMREG_SP, callee_reg_area_offset + (i * 8));
317 // FIXME: Only do this if needed
318 for (i = 0; i < n_arg_fregs; ++i)
319 arm_ldrfpx (code, i, ARMREG_SP, callee_reg_area_offset + ((n_arg_regs + i) * 8));
320 /* Clear callee reg area */
321 arm_addx_imm (code, ARMREG_SP, ARMREG_SP, ((n_arg_regs + n_arg_fregs) * sizeof (target_mgreg_t)) + 8);
322 /* Make the call */
323 arm_blrx (code, ARMREG_IP1);
325 br_ret_index = 0;
326 bcc_ret_index = 0;
328 // FIXME: Use a switch
329 /* Branch between IN/OUT cases */
330 arm_ldrx (code, ARMREG_IP1, ARMREG_FP, info_offset);
331 arm_ldrw (code, ARMREG_IP1, ARMREG_IP1, MONO_STRUCT_OFFSET (GSharedVtCallInfo, gsharedvt_in));
332 br_out = code;
333 arm_cbzx (code, ARMREG_IP1, 0);
335 /* IN CASE */
337 /* IP1 == return marshalling type */
338 arm_ldrx (code, ARMREG_IP1, ARMREG_FP, info_offset);
339 arm_ldrw (code, ARMREG_IP1, ARMREG_IP1, MONO_STRUCT_OFFSET (GSharedVtCallInfo, ret_marshal));
341 /* Continue if no marshalling required */
342 // FIXME: Use cmpx_imm
343 code = mono_arm_emit_imm64 (code, ARMREG_IP0, GSHAREDVT_RET_NONE);
344 arm_cmpx (code, ARMREG_IP0, ARMREG_IP1);
345 bcc_ret [bcc_ret_index ++] = code;
346 arm_bcc (code, ARMCOND_EQ, 0);
348 /* Compute vret area address in LR */
349 arm_ldrx (code, ARMREG_LR, ARMREG_FP, info_offset);
350 arm_ldrw (code, ARMREG_LR, ARMREG_LR, MONO_STRUCT_OFFSET (GSharedVtCallInfo, vret_slot));
351 arm_subx_imm (code, ARMREG_LR, ARMREG_LR, n_arg_regs + n_arg_fregs);
352 arm_lslx (code, ARMREG_LR, ARMREG_LR, 3);
353 arm_movspx (code, ARMREG_IP0, ARMREG_SP);
354 arm_addx (code, ARMREG_LR, ARMREG_IP0, ARMREG_LR);
356 /* Branch to specific marshalling code */
357 for (i = GSHAREDVT_RET_NONE; i < GSHAREDVT_RET_NUM; ++i) {
358 code = mono_arm_emit_imm64 (code, ARMREG_IP0, i);
359 arm_cmpx (code, ARMREG_IP0, ARMREG_IP1);
360 br [i] = code;
361 arm_bcc (code, ARMCOND_EQ, 0);
364 arm_brk (code, 0);
367 * The address of the return value area is in LR, have to load it into
368 * registers.
370 for (i = GSHAREDVT_RET_NONE; i < GSHAREDVT_RET_NUM; ++i) {
371 mono_arm_patch (br [i], code, MONO_R_ARM64_BCC);
372 switch (i) {
373 case GSHAREDVT_RET_NONE:
374 break;
375 case GSHAREDVT_RET_I8:
376 arm_ldrx (code, ARMREG_R0, ARMREG_LR, 0);
377 break;
378 case GSHAREDVT_RET_I1:
379 arm_ldrsbx (code, ARMREG_R0, ARMREG_LR, 0);
380 break;
381 case GSHAREDVT_RET_U1:
382 arm_ldrb (code, ARMREG_R0, ARMREG_LR, 0);
383 break;
384 case GSHAREDVT_RET_I2:
385 arm_ldrshx (code, ARMREG_R0, ARMREG_LR, 0);
386 break;
387 case GSHAREDVT_RET_U2:
388 arm_ldrh (code, ARMREG_R0, ARMREG_LR, 0);
389 break;
390 case GSHAREDVT_RET_I4:
391 arm_ldrswx (code, ARMREG_R0, ARMREG_LR, 0);
392 break;
393 case GSHAREDVT_RET_U4:
394 arm_ldrw (code, ARMREG_R0, ARMREG_LR, 0);
395 break;
396 case GSHAREDVT_RET_R8:
397 arm_ldrfpx (code, ARMREG_D0, ARMREG_LR, 0);
398 break;
399 case GSHAREDVT_RET_R4:
400 arm_ldrfpw (code, ARMREG_D0, ARMREG_LR, 0);
401 break;
402 case GSHAREDVT_RET_IREGS_1:
403 case GSHAREDVT_RET_IREGS_2:
404 case GSHAREDVT_RET_IREGS_3:
405 case GSHAREDVT_RET_IREGS_4:
406 case GSHAREDVT_RET_IREGS_5:
407 case GSHAREDVT_RET_IREGS_6:
408 case GSHAREDVT_RET_IREGS_7:
409 case GSHAREDVT_RET_IREGS_8: {
410 int j;
412 for (j = 0; j < i - GSHAREDVT_RET_IREGS_1 + 1; ++j)
413 arm_ldrx (code, j, ARMREG_LR, j * 8);
414 break;
416 case GSHAREDVT_RET_HFAR8_1:
417 case GSHAREDVT_RET_HFAR8_2:
418 case GSHAREDVT_RET_HFAR8_3:
419 case GSHAREDVT_RET_HFAR8_4: {
420 int j;
422 for (j = 0; j < i - GSHAREDVT_RET_HFAR8_1 + 1; ++j)
423 arm_ldrfpx (code, j, ARMREG_LR, j * 8);
424 break;
426 case GSHAREDVT_RET_HFAR4_1:
427 case GSHAREDVT_RET_HFAR4_2:
428 case GSHAREDVT_RET_HFAR4_3:
429 case GSHAREDVT_RET_HFAR4_4: {
430 int j;
432 for (j = 0; j < i - GSHAREDVT_RET_HFAR4_1 + 1; ++j)
433 arm_ldrfpw (code, j, ARMREG_LR, j * 4);
434 break;
436 default:
437 g_assert_not_reached ();
438 break;
440 br_ret [br_ret_index ++] = code;
441 arm_b (code, 0);
444 /* OUT CASE */
445 mono_arm_patch (br_out, code, MONO_R_ARM64_CBZ);
447 /* Compute vret area address in LR */
448 arm_ldrx (code, ARMREG_LR, ARMREG_FP, caller_reg_area_offset + (ARMREG_R8 * 8));
450 /* IP1 == return marshalling type */
451 arm_ldrx (code, ARMREG_IP1, ARMREG_FP, info_offset);
452 arm_ldrw (code, ARMREG_IP1, ARMREG_IP1, MONO_STRUCT_OFFSET (GSharedVtCallInfo, ret_marshal));
454 /* Branch to specific marshalling code */
455 for (i = GSHAREDVT_RET_NONE; i < GSHAREDVT_RET_NUM; ++i) {
456 code = mono_arm_emit_imm64 (code, ARMREG_IP0, i);
457 arm_cmpx (code, ARMREG_IP0, ARMREG_IP1);
458 br [i] = code;
459 arm_bcc (code, ARMCOND_EQ, 0);
463 * The return value is in registers, need to save to the return area passed by the caller in
464 * R8.
466 for (i = GSHAREDVT_RET_NONE; i < GSHAREDVT_RET_NUM; ++i) {
467 mono_arm_patch (br [i], code, MONO_R_ARM64_BCC);
468 switch (i) {
469 case GSHAREDVT_RET_NONE:
470 break;
471 case GSHAREDVT_RET_I8:
472 arm_strx (code, ARMREG_R0, ARMREG_LR, 0);
473 break;
474 case GSHAREDVT_RET_I1:
475 case GSHAREDVT_RET_U1:
476 arm_strb (code, ARMREG_R0, ARMREG_LR, 0);
477 break;
478 case GSHAREDVT_RET_I2:
479 case GSHAREDVT_RET_U2:
480 arm_strh (code, ARMREG_R0, ARMREG_LR, 0);
481 break;
482 case GSHAREDVT_RET_I4:
483 case GSHAREDVT_RET_U4:
484 arm_strw (code, ARMREG_R0, ARMREG_LR, 0);
485 break;
486 case GSHAREDVT_RET_R8:
487 arm_strfpx (code, ARMREG_D0, ARMREG_LR, 0);
488 break;
489 case GSHAREDVT_RET_R4:
490 arm_strfpw (code, ARMREG_D0, ARMREG_LR, 0);
491 break;
492 case GSHAREDVT_RET_IREGS_1:
493 case GSHAREDVT_RET_IREGS_2:
494 case GSHAREDVT_RET_IREGS_3:
495 case GSHAREDVT_RET_IREGS_4:
496 case GSHAREDVT_RET_IREGS_5:
497 case GSHAREDVT_RET_IREGS_6:
498 case GSHAREDVT_RET_IREGS_7:
499 case GSHAREDVT_RET_IREGS_8: {
500 int j;
502 for (j = 0; j < i - GSHAREDVT_RET_IREGS_1 + 1; ++j)
503 arm_strx (code, j, ARMREG_LR, j * 8);
504 break;
506 case GSHAREDVT_RET_HFAR8_1:
507 case GSHAREDVT_RET_HFAR8_2:
508 case GSHAREDVT_RET_HFAR8_3:
509 case GSHAREDVT_RET_HFAR8_4: {
510 int j;
512 for (j = 0; j < i - GSHAREDVT_RET_HFAR8_1 + 1; ++j)
513 arm_strfpx (code, j, ARMREG_LR, j * 8);
514 break;
516 case GSHAREDVT_RET_HFAR4_1:
517 case GSHAREDVT_RET_HFAR4_2:
518 case GSHAREDVT_RET_HFAR4_3:
519 case GSHAREDVT_RET_HFAR4_4: {
520 int j;
522 for (j = 0; j < i - GSHAREDVT_RET_HFAR4_1 + 1; ++j)
523 arm_strfpw (code, j, ARMREG_LR, j * 4);
524 break;
526 default:
527 arm_brk (code, i);
528 break;
530 br_ret [br_ret_index ++] = code;
531 arm_b (code, 0);
534 arm_brk (code, 0);
536 for (i = 0; i < br_ret_index; ++i)
537 mono_arm_patch (br_ret [i], code, MONO_R_ARM64_B);
538 for (i = 0; i < bcc_ret_index; ++i)
539 mono_arm_patch (bcc_ret [i], code, MONO_R_ARM64_BCC);
541 /* Normal return */
542 arm_movspx (code, ARMREG_SP, ARMREG_FP);
543 arm_ldpx_post (code, ARMREG_FP, ARMREG_LR, ARMREG_SP, offset);
544 arm_retx (code, ARMREG_LR);
546 g_assert ((code - buf) < buf_len);
548 if (info)
549 *info = mono_tramp_info_create ("gsharedvt_trampoline", buf, code - buf, ji, unwind_ops);
551 mono_arch_flush_icache (buf, code - buf);
552 return buf;
555 #else
557 gpointer
558 mono_arch_get_gsharedvt_trampoline (MonoTrampInfo **info, gboolean aot)
560 g_assert_not_reached ();
561 return NULL;
564 #endif
566 #endif /* MONO_ARCH_GSHAREDVT_SUPPORTED */