2 Copyright (C) 2004-2007, The Perl Foundation.
7 src/pic.c - Polymorphic Inline Cache
11 The PIC supports inline caching for MMD and object method lookups in
12 prederefed run cores. Additionally opcodes that do some kind of lookup
13 like C<new_p_sc> are changed to faster variants.
15 For non-prederefed run-cores there's a less efficient variant which
18 * the bytecode segment has an index per cached opcode (code->pic_index)
19 * this index points into pic_store
20 * TODO use the cache in opcodes
22 =head1 OPERATION SCHEME
27 +--------------+---------------+----+----+-----------------+----------+
28 | infix_ic_p_p | .MMD_SUBTRACT | P5 | P6 | callmethodcc_sc | "method" |
29 +--------------+---------------+----+----+-----------------+----------+
31 In init_prederef the opcodes are replaced with prederef__, operands
32 are replaced with their addresses (&) in the const table or offsets
33 (+) in the register frame:
36 +--------------+---------------+----+----+-----------------+----------+
37 | prederef__ | .MMD_SUBTRACT | +P5| +P6| prederef__ |&"method" |
38 +--------------+---------------+----+----+-----------------+----------+
40 we have code->pic_index with an index into pic_store - the pic_index is
41 half the size of the bytecode and addressed with pc_offset/2:
48 During predereferencing the opcode gets rewritten to the PIC variant,
49 the constant infix operation number is replaced with a pointer to the MIC
50 in the pic_store at the index pic_index:
53 +--------------------+-----+----+----+-----------------------+-----+
54 | pic_infix___ic_p_p | MIC1|+P5 |+P6 | pic_callmethodcc___sc | MIC2|
55 +--------------------+-----+----+----+-----------------------+-----+
57 This can be further optimized due to static inlining:
60 +--------------------+-----+----+----+-----------------------+-----+
61 | pic_inline_sub_p_p | MIC1|+P5 |+P6 | pic_callmethodcc___sc | MIC2|
62 +--------------------+-----+----+----+-----------------------+-----+
64 The opcode is an opcode number for the switched core or the actual code address
65 for the direct-threaded CGP core. With a little help of the JIT system we could
66 also dynamicall create inlined code.
68 Runcores with r/o (mmaped) bytecode can't be rewritten in this way, the
69 lookup of the cache has to be done in the opcode itself.
79 #include "parrot/parrot.h"
80 #include "parrot/oplib/ops.h"
82 #ifdef HAVE_COMPUTED_GOTO
83 # include "parrot/oplib/core_ops_cgp.h"
87 # include "parrot/exec.h"
93 /* needs a Makefile dependency */
94 /* #include "pmc/pmc_integer.h" */
96 extern void Parrot_Integer_i_subtract_Integer(Interp
* , PMC
* pmc
, PMC
* value
);
98 #define OP_AS_OFFS(o) (_reg_base + ((opcode_t*)cur_opcode)[o])
101 * hack to turn on inlining - just sub_p_p for mops done
104 #define ENABLE_INLINING 0
108 =item C<void parrot_PIC_alloc_store(Interp *, PackFile_ByteCode *, size_t n);>
110 Initialize the PIC storage for the given code segment with the capacitiy of
111 holding at least C<n> MIC entries. The PIC_store itself, room for C<n> MICs and
112 some space for PICs is allocated as one piece. MICs are returned from the start
113 of usable memory, PICs from the rear.
115 =item C<void parrot_PIC_destroy(Interp *, PackFile_ByteCode *);>
117 Free memory for the PIC storage.
124 parrot_PIC_alloc_store(Interp
*interp
, PackFile_ByteCode
*cs
, size_t n
)
127 Parrot_PIC_store
*store
;
130 * estimated 95% of calls are monomorphic, 5% are polymorphic
133 #define POLYMORPHIC 0.05
135 poly
= (size_t)(n
* POLYMORPHIC
) * sizeof (Parrot_PIC
);
137 poly
= 2 * sizeof (Parrot_PIC
);
138 size
= n
* sizeof (Parrot_MIC
) + poly
+ sizeof (Parrot_PIC_store
);
140 store
= (Parrot_PIC_store
*)mem_sys_allocate_zeroed(size
);
142 cs
->pic_store
= store
;
144 store
->pic
= (Parrot_PIC
*)((char *)store
+ size
);
145 store
->usable
= poly
;
146 store
->mic
= (Parrot_MIC
*)((char*)store
+ sizeof (Parrot_PIC_store
));
151 parrot_PIC_destroy(Interp
*interp
, PackFile_ByteCode
*cs
)
153 Parrot_PIC_store
*store
;
155 for (store
= cs
->pic_store
; store
;) {
156 Parrot_PIC_store
* const prev
= store
->prev
;
160 cs
->pic_store
= NULL
;
165 =item C<int parrot_PIC_op_is_cached(Interp *, int op_code);>
167 Return true, if the opcode needs a PIC slot.
172 parrot_PIC_op_is_cached(Interp
*interp
, int op_code
)
175 case PARROT_OP_infix_ic_p_p
:
176 case PARROT_OP_get_params_pc
:
177 case PARROT_OP_set_returns_pc
:
178 case PARROT_OP_set_args_pc
:
185 =item C<Parrot_MIC* parrot_PIC_alloc_mic(Interp*, size_t n);>
187 =item C<Parrot_MIC* parrot_PIC_alloc_pic(Interp*);>
189 Allocate a new PIC or MIC structure for the C<n>th cached opcode in this
197 parrot_PIC_alloc_mic(Interp
*interp
, size_t n
)
199 Parrot_PIC_store
* const store
= interp
->code
->pic_store
;
200 assert(n
< store
->n_mics
);
201 return store
->mic
+ n
;
205 parrot_PIC_alloc_pic(Interp
*interp
)
207 Parrot_PIC_store
*store
= interp
->code
->pic_store
;
208 Parrot_PIC_store
*new_store
;
210 if (store
->usable
< sizeof (Parrot_PIC
)) {
212 (size_t)(store
->n_mics
* POLYMORPHIC
) * sizeof (Parrot_PIC
);
214 size
= 2 * sizeof (Parrot_PIC
);
215 new_store
= (Parrot_PIC_store
*)mem_sys_allocate_zeroed(size
+ sizeof (Parrot_PIC_store
));
216 new_store
->prev
= store
;
217 interp
->code
->pic_store
= new_store
;
219 new_store
->pic
= (Parrot_PIC
*)((char *)new_store
+ size
+
220 sizeof (Parrot_PIC_store
));
221 new_store
->usable
= size
;
223 * the addon store has only poly-morphic slots
224 * point the monomorphic to the old store
226 new_store
->mic
= store
->mic
;
227 new_store
->n_mics
= store
->n_mics
;
230 store
->usable
-= sizeof (Parrot_PIC
);
236 =item C<void parrot_PIC_prederef(Interp *, opcode_t op,
237 void **pc_pred, int type)>
239 Define either the normal prederef function or the PIC stub, if PIC for
240 this opcode function is available. Called from C<do_prederef>.
248 parrot_pic_opcode(Interp
*interp
, INTVAL op
)
250 const int core
= interp
->run_core
;
251 #ifdef HAVE_COMPUTED_GOTO
255 if (core
== PARROT_SWITCH_CORE
|| core
== PARROT_SWITCH_JIT_CORE
)
257 #ifdef HAVE_COMPUTED_GOTO
258 cg_lib
= PARROT_CORE_CGP_OPLIB_INIT(1);
259 return ((void**)cg_lib
->op_func_table
)[op
];
266 pass_int(Interp
*interp
, PMC
*sig
, char *src_base
, void **src
,
267 char *dest_base
, void **dest
)
269 int i
, n
= SIG_ELEMS(sig
);
270 for (i
= 2 ; n
; ++i
, --n
) {
271 const INTVAL arg
= *(INTVAL
*)(src_base
+ ((opcode_t
*)src
)[i
]);
272 *(INTVAL
*)(dest_base
+ ((opcode_t
*)dest
)[i
])= arg
;
278 pass_num(Interp
*interp
, PMC
*sig
, char *src_base
, void **src
,
279 char *dest_base
, void **dest
)
281 int i
, n
= SIG_ELEMS(sig
);
282 for (i
= 2 ; n
; ++i
, --n
) {
283 const FLOATVAL arg
= *(FLOATVAL
*)(src_base
+ ((opcode_t
*)src
)[i
]);
284 *(FLOATVAL
*)(dest_base
+ ((opcode_t
*)dest
)[i
])= arg
;
290 pass_str(Interp
*interp
, PMC
*sig
, char *src_base
, void **src
,
291 char *dest_base
, void **dest
)
293 int i
, n
= SIG_ELEMS(sig
);
294 for (i
= 2 ; n
; ++i
, --n
) {
295 STRING
* const arg
= *(STRING
* *)(src_base
+ ((opcode_t
*)src
)[i
]);
296 *(STRING
* *)(dest_base
+ ((opcode_t
*)dest
)[i
])= arg
;
302 pass_pmc(Interp
*interp
, PMC
*sig
, char *src_base
, void **src
,
303 char *dest_base
, void **dest
)
305 int i
, n
= SIG_ELEMS(sig
);
306 for (i
= 2 ; n
; ++i
, --n
) {
307 PMC
* const arg
= *(PMC
* *)(src_base
+ ((opcode_t
*)src
)[i
]);
308 *(PMC
* *)(dest_base
+ ((opcode_t
*)dest
)[i
])= arg
;
314 pass_mixed(Interp
*interp
, PMC
*sig
, char *src_base
, void **src
,
315 char *dest_base
, void **dest
)
318 int i
, n
= SIG_ELEMS(sig
);
325 bitp
= SIG_ARRAY(sig
);
326 for (i
= 2 ; n
; ++i
, --n
) {
327 const INTVAL bits
= *bitp
++;
329 case PARROT_ARG_INTVAL
:
330 argI
= *(INTVAL
*)(src_base
+ ((opcode_t
*)src
)[i
]);
331 *(INTVAL
*)(dest_base
+ ((opcode_t
*)dest
)[i
])= argI
;
333 case PARROT_ARG_INTVAL
|PARROT_ARG_CONSTANT
:
334 argI
= (INTVAL
)(src
)[i
];
335 *(INTVAL
*)(dest_base
+ ((opcode_t
*)dest
)[i
])= argI
;
337 case PARROT_ARG_FLOATVAL
:
338 argN
= *(FLOATVAL
*)(src_base
+ ((opcode_t
*)src
)[i
]);
339 *(FLOATVAL
*)(dest_base
+ ((opcode_t
*)dest
)[i
])= argN
;
341 case PARROT_ARG_FLOATVAL
|PARROT_ARG_CONSTANT
:
342 argN
= *(FLOATVAL
*)(src
)[i
];
343 *(FLOATVAL
*)(dest_base
+ ((opcode_t
*)dest
)[i
])= argN
;
345 case PARROT_ARG_STRING
:
346 argS
= *(STRING
**)(src_base
+ ((opcode_t
*)src
)[i
]);
347 *(STRING
* *)(dest_base
+ ((opcode_t
*)dest
)[i
])= argS
;
349 case PARROT_ARG_STRING
|PARROT_ARG_CONSTANT
:
350 argS
= (STRING
*)(src
)[i
];
351 *(STRING
* *)(dest_base
+ ((opcode_t
*)dest
)[i
])= argS
;
354 argP
= *(PMC
**)(src_base
+ ((opcode_t
*)src
)[i
]);
355 *(PMC
* *)(dest_base
+ ((opcode_t
*)dest
)[i
])= argP
;
357 case PARROT_ARG_PMC
|PARROT_ARG_CONSTANT
:
358 argP
= (PMC
*)(src
)[i
];
359 *(PMC
* *)(dest_base
+ ((opcode_t
*)dest
)[i
])= argP
;
362 internal_exception(1, "bogus signature 0x%x", bits
);
369 * return argument count and type of the signature or -1 if not pic-able
370 * the type PARROT_ARG_CONSTANT stands for mixed types or constants
373 parrot_pic_check_sig(Interp
*interp
, const PMC
*sig1
, const PMC
*sig2
, int *type
/*NN*/)
375 int i
, n
, t0
, t1
, t2
;
376 t0
= 0; /* silence compiler uninit warning */
378 ASSERT_SIG_PMC(sig1
);
379 ASSERT_SIG_PMC(sig2
);
381 if (n
!= SIG_ELEMS(sig2
))
387 for (i
= 0; i
< n
; ++i
) {
388 t1
= SIG_ITEM(sig1
, i
);
389 t2
= SIG_ITEM(sig2
, i
);
391 t0
= t1
& PARROT_ARG_TYPE_MASK
;
394 if (t1
& PARROT_ARG_CONSTANT
) {
395 *type
= PARROT_ARG_CONSTANT
;
396 t1
&= ~PARROT_ARG_CONSTANT
;
398 if (t1
& ~PARROT_ARG_TYPE_MASK
)
400 if (t2
& PARROT_ARG_CONSTANT
) {
401 *type
= PARROT_ARG_CONSTANT
;
402 t2
&= ~PARROT_ARG_CONSTANT
;
404 if (t2
& ~PARROT_ARG_TYPE_MASK
)
409 *type
= PARROT_ARG_CONSTANT
;
415 is_pic_param(Interp
*interp
, void **pc
, Parrot_MIC
* const mic
, opcode_t op
)
419 parrot_context_t
*caller_ctx
;
421 PMC
* const sig1
= (PMC
*)(pc
[1]);
422 parrot_context_t
* const ctx
= CONTEXT(interp
->ctx
);
426 if (op
== PARROT_OP_set_returns_pc
) {
427 PMC
* const ccont
= ctx
->current_cont
;
428 if (!PMC_cont(ccont
)->address
)
430 caller_ctx
= PMC_cont(ccont
)->to_ctx
;
431 args
= caller_ctx
->current_results
;
434 caller_ctx
= ctx
->caller_ctx
;
435 args
= interp
->current_args
;
438 const INTVAL const_nr
= args
[1];
439 /* check current_args signature */
440 sig2
= caller_ctx
->constants
[const_nr
]->u
.key
;
441 n
= parrot_pic_check_sig(interp
, sig1
, sig2
, &type
);
446 if (SIG_ELEMS(sig1
) == 0) {
454 case PARROT_ARG_INTVAL
:
455 mic
->lru
.f
.real_function
= (funcptr_t
)pass_int
;
457 case PARROT_ARG_FLOATVAL
:
458 mic
->lru
.f
.real_function
= (funcptr_t
)pass_num
;
460 case PARROT_ARG_STRING
:
461 mic
->lru
.f
.real_function
= (funcptr_t
)pass_str
;
464 mic
->lru
.f
.real_function
= (funcptr_t
)pass_pmc
;
466 case PARROT_ARG_CONSTANT
:
467 mic
->lru
.f
.real_function
= (funcptr_t
)pass_mixed
;
473 /* remember this sig2 - it has to match the other end at call time */
474 mic
->lru
.u
.signature
= sig2
;
480 is_pic_func(Interp
*interp
, void **pc
, Parrot_MIC
*mic
, int core_type
)
483 * if we have these opcodes
485 * set_args '(..)' ...
487 * get_results '(..)' ...
490 * and all args are matching the called sub and we don't have
491 * too many args, and only INTVAL or FLOATVAL, the
492 * whole sequence is replaced by the C<callr> pic opcode.
494 * Oh, I forgot to mention - the to-be-called C function is of
495 * course compiled on-the-fly by the JIT compiler ;)
500 PMC
*sub
, *sig_results
;
504 parrot_context_t
* const ctx
= CONTEXT(interp
->ctx
);
505 PMC
* const sig_args
= (PMC
*)(pc
[1]);
507 ASSERT_SIG_PMC(sig_args
);
508 n
= SIG_ELEMS(sig_args
);
509 interp
->current_args
= (opcode_t
*)pc
+ ctx
->pred_offset
;
511 op
= (opcode_t
*)pc
+ ctx
->pred_offset
;
512 if (*op
!= PARROT_OP_set_p_pc
)
514 do_prederef(pc
, interp
, core_type
);
516 assert(PObj_is_PMC_TEST(sub
));
517 if (sub
->vtable
->base_type
!= enum_class_Sub
)
519 pc
+= 3; /* results */
520 op
= (opcode_t
*)pc
+ ctx
->pred_offset
;
521 if (*op
!= PARROT_OP_get_results_pc
)
523 do_prederef(pc
, interp
, core_type
);
524 sig_results
= (PMC
*)(pc
[1]);
525 ASSERT_SIG_PMC(sig_results
);
527 ctx
->current_results
= (opcode_t
*)pc
+ ctx
->pred_offset
;
528 if (!parrot_pic_is_safe_to_jit(interp
, sub
,
529 sig_args
, sig_results
, &flags
))
531 mic
->lru
.f
.real_function
= parrot_pic_JIT_sub(interp
, sub
, flags
);
532 mic
->m
.sig
= sig_args
;
537 parrot_PIC_prederef(Interp
*interp
, opcode_t op
, void **pc_pred
, int core
)
539 op_func_t
* const prederef_op_func
= interp
->op_lib
->op_func_table
;
540 opcode_t
* const cur_opcode
= (opcode_t
*)pc_pred
;
541 Parrot_MIC
*mic
= NULL
;
543 if (parrot_PIC_op_is_cached(interp
, op
)) {
544 PackFile_ByteCode
*cs
= interp
->code
;
545 size_t n
= cur_opcode
- (opcode_t
*)cs
->prederef
.code
;
547 * pic_index is half the size of the code
548 * XXX if it's there - pbc_merge needs updates
550 assert(cs
->pic_index
);
551 n
= cs
->pic_index
->data
[n
/ 2];
552 mic
= parrot_PIC_alloc_mic(interp
, n
);
555 case PARROT_OP_new_p_sc
:
558 STRING
* const _class
= (STRING
*)cur_opcode
[2];
559 type
= pmc_type(interp
, _class
);
561 type
= pmc_type(interp
, _class
);
563 real_exception(interp
, NULL
, NO_CLASS
,
564 "Class '%Ss' not found", _class
);
565 pc_pred
[2] = (void*)type
;
566 op
= PARROT_OP_new_p_ic
;
569 case PARROT_OP_infix_ic_p_p
:
570 mic
->m
.func_nr
= (INTVAL
) cur_opcode
[1];
571 pc_pred
[1] = (void*) mic
;
572 op
= PARROT_OP_pic_infix___ic_p_p
;
574 case PARROT_OP_get_params_pc
:
575 if (is_pic_param(interp
, pc_pred
, mic
, op
)) {
576 pc_pred
[1] = (void*) mic
;
577 op
= PARROT_OP_pic_get_params___pc
;
580 case PARROT_OP_set_returns_pc
:
581 if (is_pic_param(interp
, pc_pred
, mic
, op
)) {
582 pc_pred
[1] = (void*) mic
;
583 op
= PARROT_OP_pic_set_returns___pc
;
586 case PARROT_OP_set_args_pc
:
587 if (is_pic_func(interp
, pc_pred
, mic
, core
)) {
588 pc_pred
[1] = (void*) mic
;
589 op
= PARROT_OP_pic_callr___pc
;
596 if (core
== PARROT_SWITCH_CORE
|| core
== PARROT_SWITCH_JIT_CORE
)
597 *pc_pred
= (void**) op
;
599 *pc_pred
= ((void **)prederef_op_func
)[op
];
603 parrot_pic_move(Interp
* interp
, Parrot_MIC
*mic
)
606 * MIC slot is empty - use it
608 if (!mic
->lru
.u
.type
)
611 * need more cache slots - allocate one PIC
614 mic
->pic
= parrot_PIC_alloc_pic(interp
);
618 * PIC was already used - shift slots up
620 Parrot_PIC
* const pic
= mic
->pic
;
622 pic
->lru
[2].u
.type
= pic
->lru
[1].u
.type
;
623 pic
->lru
[2].f
.sub
= pic
->lru
[1].f
.sub
;
624 pic
->lru
[1].u
.type
= pic
->lru
[0].u
.type
;
625 pic
->lru
[1].f
.sub
= pic
->lru
[0].f
.sub
;
626 pic
->lru
[0].u
.type
= mic
->lru
.u
.type
;
627 pic
->lru
[0].f
.sub
= mic
->lru
.f
.sub
;
633 parrot_pic_find_infix_v_pp(Interp
*interp
, PMC
*left
, PMC
*right
,
634 Parrot_MIC
*mic
, opcode_t
*cur_opcode
)
638 INTVAL left_type
, right_type
;
640 * if 2 threads are entering here, there is a chance
641 * that one moves the lru structure under the other thread
642 * and vv - just lock in case
646 * if (TRY_LOCK_INTERPRETER(i) == EBUSY)
649 LOCK_INTERPRETER(interp
);
651 * move entries back and set topmost entry
653 parrot_pic_move(interp
, mic
);
655 * get real dispatch function
657 left_type
= VTABLE_type(interp
, left
);
658 right_type
= VTABLE_type(interp
, right
);
659 func
= get_mmd_dispatch_type(interp
,
660 mic
->m
.func_nr
, left_type
, right_type
, &is_pmc
);
662 const size_t offs
= cur_opcode
- (opcode_t
*)interp
->code
->prederef
.code
;
663 opcode_t
* real_op
= interp
->code
->base
.data
+ offs
+ 1;
664 /* set prederef code address to orig slot for now
666 ((void**)cur_opcode
)[0] =
667 parrot_pic_opcode(interp
, PARROT_OP_infix_ic_p_p
);
668 /* restore 1st operand i.e. .MMD_func_nr */
669 ((void**)cur_opcode
)[1] = (void*)*real_op
;
670 mic
->lru
.f
.sub
= (PMC
*)F2DPTR(func
);
673 INTVAL op
= PARROT_OP_pic_infix___ic_p_p
;
676 if (func
== (funcptr_t
)Parrot_Integer_i_subtract_Integer
&& !mic
->pic
)
677 op
= PARROT_OP_pic_inline_sub___ic_p_p
;
679 ((void**)cur_opcode
)[0] =
680 parrot_pic_opcode(interp
, op
);
681 mic
->lru
.f
.real_function
= func
;
683 mic
->lru
.u
.type
= (left_type
<< 16) | right_type
;
684 UNLOCK_INTERPRETER(interp
);
693 Leopold Toetsch with many hints from Ken Fox.
697 F<src/mmd.c>, F<src/object.c>, F<src/interpreter.c>, F<ops/core_ops_cgp.c>,
698 F<include/parrot/pic.h>, F<ops/pic.ops>
707 * c-file-style: "parrot"
709 * vim: expandtab shiftwidth=4: