Bug 607719 followup: deep tests that do not match the current config will run in...
[tamarin-stm.git] / nanojit / LIR.h
blob2f362bd041fb9c2f126e142ec939460d99f79d99
1 /* -*- Mode: C++; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 4 -*- */
2 /* vi: set ts=4 sw=4 expandtab: (add to ~/.vimrc: set modeline modelines=5) */
3 /* ***** BEGIN LICENSE BLOCK *****
4 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
6 * The contents of this file are subject to the Mozilla Public License Version
7 * 1.1 (the "License"); you may not use this file except in compliance with
8 * the License. You may obtain a copy of the License at
9 * http://www.mozilla.org/MPL/
11 * Software distributed under the License is distributed on an "AS IS" basis,
12 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 * for the specific language governing rights and limitations under the
14 * License.
16 * The Original Code is [Open Source Virtual Machine].
18 * The Initial Developer of the Original Code is
19 * Adobe System Incorporated.
20 * Portions created by the Initial Developer are Copyright (C) 2004-2007
21 * the Initial Developer. All Rights Reserved.
23 * Contributor(s):
24 * Adobe AS3 Team
26 * Alternatively, the contents of this file may be used under the terms of
27 * either the GNU General Public License Version 2 or later (the "GPL"), or
28 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
29 * in which case the provisions of the GPL or the LGPL are applicable instead
30 * of those above. If you wish to allow use of your version of this file only
31 * under the terms of either the GPL or the LGPL, and not to allow others to
32 * use your version of this file under the terms of the MPL, indicate your
33 * decision by deleting the provisions above and replace them with the notice
34 * and other provisions required by the GPL or the LGPL. If you do not delete
35 * the provisions above, a recipient may use your version of this file under
36 * the terms of any one of the MPL, the GPL or the LGPL.
38 * ***** END LICENSE BLOCK ***** */
40 #ifndef __nanojit_LIR__
41 #define __nanojit_LIR__
43 namespace nanojit
45 enum LOpcode
46 #if defined(_MSC_VER) && _MSC_VER >= 1400
47 #pragma warning(disable:4480) // nonstandard extension used: specifying underlying type for enum
48 : unsigned
49 #endif
51 #define OP___(op, number, repKind, retType, isCse) \
52 LIR_##op = (number),
53 #include "LIRopcode.tbl"
54 LIR_sentinel,
55 #undef OP___
57 #ifdef NANOJIT_64BIT
58 # define PTR_SIZE(a,b) b
59 #else
60 # define PTR_SIZE(a,b) a
61 #endif
63 // Pointer-sized synonyms.
65 LIR_paramp = PTR_SIZE(LIR_parami, LIR_paramq),
67 LIR_retp = PTR_SIZE(LIR_reti, LIR_retq),
69 LIR_livep = PTR_SIZE(LIR_livei, LIR_liveq),
71 LIR_ldp = PTR_SIZE(LIR_ldi, LIR_ldq),
73 LIR_stp = PTR_SIZE(LIR_sti, LIR_stq),
75 LIR_callp = PTR_SIZE(LIR_calli, LIR_callq),
77 LIR_eqp = PTR_SIZE(LIR_eqi, LIR_eqq),
78 LIR_ltp = PTR_SIZE(LIR_lti, LIR_ltq),
79 LIR_gtp = PTR_SIZE(LIR_gti, LIR_gtq),
80 LIR_lep = PTR_SIZE(LIR_lei, LIR_leq),
81 LIR_gep = PTR_SIZE(LIR_gei, LIR_geq),
82 LIR_ltup = PTR_SIZE(LIR_ltui, LIR_ltuq),
83 LIR_gtup = PTR_SIZE(LIR_gtui, LIR_gtuq),
84 LIR_leup = PTR_SIZE(LIR_leui, LIR_leuq),
85 LIR_geup = PTR_SIZE(LIR_geui, LIR_geuq),
87 LIR_addp = PTR_SIZE(LIR_addi, LIR_addq),
88 LIR_subp = PTR_SIZE(LIR_subi, LIR_subq),
89 LIR_addjovp = PTR_SIZE(LIR_addjovi, LIR_addjovq),
91 LIR_andp = PTR_SIZE(LIR_andi, LIR_andq),
92 LIR_orp = PTR_SIZE(LIR_ori, LIR_orq),
93 LIR_xorp = PTR_SIZE(LIR_xori, LIR_xorq),
95 LIR_lshp = PTR_SIZE(LIR_lshi, LIR_lshq),
96 LIR_rshp = PTR_SIZE(LIR_rshi, LIR_rshq),
97 LIR_rshup = PTR_SIZE(LIR_rshui, LIR_rshuq),
99 LIR_cmovp = PTR_SIZE(LIR_cmovi, LIR_cmovq)
102 // 32-bit integer comparisons must be contiguous, as must 64-bit integer
103 // comparisons and 64-bit float comparisons.
104 NanoStaticAssert(LIR_eqi + 1 == LIR_lti &&
105 LIR_eqi + 2 == LIR_gti &&
106 LIR_eqi + 3 == LIR_lei &&
107 LIR_eqi + 4 == LIR_gei &&
108 LIR_eqi + 5 == LIR_ltui &&
109 LIR_eqi + 6 == LIR_gtui &&
110 LIR_eqi + 7 == LIR_leui &&
111 LIR_eqi + 8 == LIR_geui);
112 #ifdef NANOJIT_64BIT
113 NanoStaticAssert(LIR_eqq + 1 == LIR_ltq &&
114 LIR_eqq + 2 == LIR_gtq &&
115 LIR_eqq + 3 == LIR_leq &&
116 LIR_eqq + 4 == LIR_geq &&
117 LIR_eqq + 5 == LIR_ltuq &&
118 LIR_eqq + 6 == LIR_gtuq &&
119 LIR_eqq + 7 == LIR_leuq &&
120 LIR_eqq + 8 == LIR_geuq);
121 #endif
122 NanoStaticAssert(LIR_eqd + 1 == LIR_ltd &&
123 LIR_eqd + 2 == LIR_gtd &&
124 LIR_eqd + 3 == LIR_led &&
125 LIR_eqd + 4 == LIR_ged);
127 // Various opcodes must be changeable to their opposite with op^1
128 // (although we use invertXyz() when possible, ie. outside static
129 // assertions).
130 NanoStaticAssert((LIR_jt^1) == LIR_jf && (LIR_jf^1) == LIR_jt);
132 NanoStaticAssert((LIR_xt^1) == LIR_xf && (LIR_xf^1) == LIR_xt);
134 NanoStaticAssert((LIR_lti^1) == LIR_gti && (LIR_gti^1) == LIR_lti);
135 NanoStaticAssert((LIR_lei^1) == LIR_gei && (LIR_gei^1) == LIR_lei);
136 NanoStaticAssert((LIR_ltui^1) == LIR_gtui && (LIR_gtui^1) == LIR_ltui);
137 NanoStaticAssert((LIR_leui^1) == LIR_geui && (LIR_geui^1) == LIR_leui);
139 #ifdef NANOJIT_64BIT
140 NanoStaticAssert((LIR_ltq^1) == LIR_gtq && (LIR_gtq^1) == LIR_ltq);
141 NanoStaticAssert((LIR_leq^1) == LIR_geq && (LIR_geq^1) == LIR_leq);
142 NanoStaticAssert((LIR_ltuq^1) == LIR_gtuq && (LIR_gtuq^1) == LIR_ltuq);
143 NanoStaticAssert((LIR_leuq^1) == LIR_geuq && (LIR_geuq^1) == LIR_leuq);
144 #endif
146 NanoStaticAssert((LIR_ltd^1) == LIR_gtd && (LIR_gtd^1) == LIR_ltd);
147 NanoStaticAssert((LIR_led^1) == LIR_ged && (LIR_ged^1) == LIR_led);
150 struct GuardRecord;
151 struct SideExit;
153 enum AbiKind {
154 ABI_FASTCALL,
155 ABI_THISCALL,
156 ABI_STDCALL,
157 ABI_CDECL
160 // This is much the same as LTy, but we need to distinguish signed and
161 // unsigned 32-bit ints so that they will be extended to 64-bits correctly
162 // on 64-bit platforms.
164 // All values must fit into three bits. See CallInfo for details.
165 enum ArgType {
166 ARGTYPE_V = 0, // void
167 ARGTYPE_I = 1, // int32_t
168 ARGTYPE_UI = 2, // uint32_t
169 #ifdef NANOJIT_64BIT
170 ARGTYPE_Q = 3, // uint64_t
171 #endif
172 ARGTYPE_D = 4, // double
174 // aliases
175 ARGTYPE_P = PTR_SIZE(ARGTYPE_I, ARGTYPE_Q), // pointer
176 ARGTYPE_B = ARGTYPE_I // bool
179 enum IndirectCall {
180 CALL_INDIRECT = 0
183 //-----------------------------------------------------------------------
184 // Aliasing
185 // --------
186 // *Aliasing* occurs when a single memory location can be accessed through
187 // multiple names. For example, consider this code:
189 // ld a[0]
190 // sti b[0]
191 // ld a[0]
193 // In general, it's possible that a[0] and b[0] may refer to the same
194 // memory location. This means, for example, that you cannot safely
195 // perform CSE on the two loads. However, if you know that 'a' cannot be
196 // an alias of 'b' (ie. the two loads do not alias with the store) then
197 // you can safely perform CSE.
199 // Access regions
200 // --------------
201 // Doing alias analysis precisely is difficult. But it turns out that
202 // keeping track of aliasing at a coarse level is enough to help with many
203 // optimisations. So we conceptually divide the memory that is accessible
204 // from LIR into a small number of "access regions" (aka. "Acc"). An
205 // access region may be non-contiguous. No two access regions can
206 // overlap. The union of all access regions covers all memory accessible
207 // from LIR.
209 // In general a (static) load or store may be executed more than once, and
210 // thus may access multiple regions; however, in practice almost all
211 // loads and stores will obviously access only a single region. A
212 // function called from LIR may load and/or store multiple access regions
213 // (even if executed only once).
215 // If two loads/stores/calls are known to not access the same region(s),
216 // then they do not alias.
218 // All regions are defined by the embedding. It makes sense to add new
219 // embedding-specific access regions when doing so will help with one or
220 // more optimisations.
222 // Access region sets and instruction markings
223 // -------------------------------------------
224 // Each load/store is marked with an "access region set" (aka. "AccSet"),
225 // which is a set of one or more access regions. This indicates which
226 // parts of LIR-accessible memory the load/store may touch.
228 // Each function called from LIR is also marked with an access region set
229 // for memory stored to by the function. (We could also have a marking
230 // for memory loads done by the function, but there's no need at the
231 // moment.) These markings apply to the function itself, not the call
232 // site, ie. they're not context-sensitive.
234 // These load/store/call markings MUST BE ACCURATE -- if not then invalid
235 // optimisations might occur that change the meaning of the code.
236 // However, they can safely be imprecise (ie. conservative), ie. a
237 // load/store/call can be marked with an access region set that is a
238 // superset of the actual access region set. Such imprecision is safe but
239 // may reduce optimisation opportunities.
241 // Optimisations that use access region info
242 // -----------------------------------------
243 // Currently only CseFilter uses this, and only for determining whether
244 // loads can be CSE'd. Note that CseFilter treats loads that are marked
245 // with a single access region precisely, but all loads marked with
246 // multiple access regions get lumped together. So if you can't mark a
247 // load with a single access region, you might as well use ACC_LOAD_ANY.
248 //-----------------------------------------------------------------------
250 // An access region set is represented as a bitset. Using a uint32_t
251 // restricts us to at most 32 alias regions for the moment. This could be
252 // expanded to a uint64_t easily if needed.
253 typedef uint32_t AccSet;
254 static const int NUM_ACCS = sizeof(AccSet) * 8;
256 // Some common (non-singleton) access region sets. ACCSET_NONE does not make
257 // sense for loads or stores (which must access at least one region), it
258 // only makes sense for calls.
260 static const AccSet ACCSET_NONE = 0x0;
261 static const AccSet ACCSET_ALL = 0xffffffff;
262 static const AccSet ACCSET_LOAD_ANY = ACCSET_ALL; // synonym
263 static const AccSet ACCSET_STORE_ANY = ACCSET_ALL; // synonym
265 inline bool isSingletonAccSet(AccSet accSet) {
266 // This is a neat way of testing if a value has only one bit set.
267 return (accSet & (accSet - 1)) == 0;
270 // Full AccSets don't fit into load and store instructions. But
271 // load/store AccSets almost always contain a single access region. We
272 // take advantage of this to create a compressed AccSet, MiniAccSet, that
273 // does fit.
275 // The 32 single-region AccSets get compressed into a number in the range
276 // 0..31 (according to the position of the set bit), and all other
277 // (multi-region) AccSets get converted into MINI_ACCSET_MULTIPLE. So the
278 // representation is lossy in the latter case, but that case is rare for
279 // loads/stores. We use a full AccSet for the storeAccSets of calls, for
280 // which multi-region AccSets are common.
282 // We wrap the uint8_t inside a struct to avoid the possiblity of subtle
283 // bugs caused by mixing up AccSet and MiniAccSet, which is easy to do.
284 // However, the struct gets padded inside LInsLd in an inconsistent way on
285 // Windows, so we actually store a MiniAccSetVal inside LInsLd. Sigh.
286 // But we use MiniAccSet everywhere else.
288 typedef uint8_t MiniAccSetVal;
289 struct MiniAccSet { MiniAccSetVal val; };
290 static const MiniAccSet MINI_ACCSET_MULTIPLE = { 99 };
292 static MiniAccSet compressAccSet(AccSet accSet) {
293 if (isSingletonAccSet(accSet)) {
294 MiniAccSet ret = { uint8_t(msbSet32(accSet)) };
295 return ret;
298 // If we got here, it must be a multi-region AccSet.
299 return MINI_ACCSET_MULTIPLE;
302 static AccSet decompressMiniAccSet(MiniAccSet miniAccSet) {
303 return (miniAccSet.val == MINI_ACCSET_MULTIPLE.val) ? ACCSET_ALL : (1 << miniAccSet.val);
306 // The LoadQual affects how a load can be optimised:
308 // - CONST: These loads are guaranteed to always return the same value
309 // during a single execution of a fragment (but the value is allowed to
310 // change between executions of the fragment). This means that the
311 // location is never stored to by the LIR, and is never modified by an
312 // external entity while the fragment is running.
314 // - NORMAL: These loads may be stored to by the LIR, but are never
315 // modified by an external entity while the fragment is running.
317 // - VOLATILE: These loads may be stored to by the LIR, and may be
318 // modified by an external entity while the fragment is running.
320 // This gives a lattice with the ordering: CONST < NORMAL < VOLATILE.
321 // As usual, it's safe to mark a load with a value higher (less precise)
322 // that actual, but it may result in fewer optimisations occurring.
324 // Generally CONST loads are highly amenable to optimisation (eg. CSE),
325 // VOLATILE loads are entirely unoptimisable, and NORMAL loads are in
326 // between and require some alias analysis to optimise.
328 // Note that CONST has a stronger meaning to "const" in C and C++; in C
329 // and C++ a "const" variable may be modified by an external entity, such
330 // as hardware. Hence "const volatile" makes sense in C and C++, but
331 // CONST+VOLATILE doesn't make sense in LIR.
333 // Note also that a 2-bit bitfield in LInsLd is used to hold LoadQual
334 // values, so you can one add one more value without expanding it.
336 enum LoadQual {
337 LOAD_CONST = 0,
338 LOAD_NORMAL = 1,
339 LOAD_VOLATILE = 2
342 struct CallInfo
344 private:
345 // In CallInfo::_typesig, each entry is three bits.
346 static const int TYPESIG_FIELDSZB = 3;
347 static const int TYPESIG_FIELDMASK = 7;
349 public:
350 uintptr_t _address;
351 uint32_t _typesig:27; // 9 3-bit fields indicating arg type, by ARGTYPE above (including ret type): a1 a2 a3 a4 a5 ret
352 AbiKind _abi:3;
353 uint32_t _isPure:1; // _isPure=1 means no side-effects, result only depends on args
354 AccSet _storeAccSet; // access regions stored by the function
355 verbose_only ( const char* _name; )
357 // The following encode 'r func()' through to 'r func(a1, a2, a3, a4, a5, a6, a7, a8)'.
358 static inline uint32_t typeSig0(ArgType r) {
359 return r;
361 static inline uint32_t typeSig1(ArgType r, ArgType a1) {
362 return a1 << TYPESIG_FIELDSZB*1 | typeSig0(r);
364 static inline uint32_t typeSig2(ArgType r, ArgType a1, ArgType a2) {
365 return a1 << TYPESIG_FIELDSZB*2 | typeSig1(r, a2);
367 static inline uint32_t typeSig3(ArgType r, ArgType a1, ArgType a2, ArgType a3) {
368 return a1 << TYPESIG_FIELDSZB*3 | typeSig2(r, a2, a3);
370 static inline uint32_t typeSig4(ArgType r, ArgType a1, ArgType a2, ArgType a3, ArgType a4) {
371 return a1 << TYPESIG_FIELDSZB*4 | typeSig3(r, a2, a3, a4);
373 static inline uint32_t typeSig5(ArgType r, ArgType a1, ArgType a2, ArgType a3,
374 ArgType a4, ArgType a5) {
375 return a1 << TYPESIG_FIELDSZB*5 | typeSig4(r, a2, a3, a4, a5);
377 static inline uint32_t typeSig6(ArgType r, ArgType a1, ArgType a2, ArgType a3,
378 ArgType a4, ArgType a5, ArgType a6) {
379 return a1 << TYPESIG_FIELDSZB*6 | typeSig5(r, a2, a3, a4, a5, a6);
381 static inline uint32_t typeSig7(ArgType r, ArgType a1, ArgType a2, ArgType a3,
382 ArgType a4, ArgType a5, ArgType a6, ArgType a7) {
383 return a1 << TYPESIG_FIELDSZB*7 | typeSig6(r, a2, a3, a4, a5, a6, a7);
385 static inline uint32_t typeSig8(ArgType r, ArgType a1, ArgType a2, ArgType a3, ArgType a4,
386 ArgType a5, ArgType a6, ArgType a7, ArgType a8) {
387 return a1 << TYPESIG_FIELDSZB*8 | typeSig7(r, a2, a3, a4, a5, a6, a7, a8);
389 // Encode 'r func(a1, ..., aN))'
390 static inline uint32_t typeSigN(ArgType r, int N, ArgType a[]) {
391 uint32_t typesig = r;
392 for (int i = 0; i < N; i++) {
393 typesig |= a[i] << TYPESIG_FIELDSZB*(N-i);
395 return typesig;
398 uint32_t count_args() const;
399 uint32_t count_int32_args() const;
400 // Nb: uses right-to-left order, eg. sizes[0] is the size of the right-most arg.
401 // XXX: See bug 525815 for fixing this.
402 uint32_t getArgTypes(ArgType* types) const;
404 inline ArgType returnType() const {
405 return ArgType(_typesig & TYPESIG_FIELDMASK);
408 inline bool isIndirect() const {
409 return _address < 256;
414 * Record for extra data used to compile switches as jump tables.
416 struct SwitchInfo
418 NIns** table; // Jump table; a jump address is NIns*
419 uint32_t count; // Number of table entries
420 // Index value at last execution of the switch. The index value
421 // is the offset into the jump table. Thus it is computed as
422 // (switch expression) - (lowest case value).
423 uint32_t index;
426 // Array holding the 'isCse' field from LIRopcode.tbl.
427 extern const int8_t isCses[]; // cannot be uint8_t, some values are negative
429 inline bool isCseOpcode(LOpcode op) {
430 NanoAssert(isCses[op] != -1); // see LIRopcode.tbl to understand this
431 return isCses[op] == 1;
433 inline bool isLiveOpcode(LOpcode op) {
434 return
435 #if defined NANOJIT_64BIT
436 op == LIR_liveq ||
437 #endif
438 op == LIR_livei || op == LIR_lived;
440 inline bool isRetOpcode(LOpcode op) {
441 return
442 #if defined NANOJIT_64BIT
443 op == LIR_retq ||
444 #endif
445 op == LIR_reti || op == LIR_retd;
447 inline bool isCmovOpcode(LOpcode op) {
448 return
449 #if defined NANOJIT_64BIT
450 op == LIR_cmovq ||
451 #endif
452 op == LIR_cmovi ||
453 op == LIR_cmovd;
455 inline bool isCmpIOpcode(LOpcode op) {
456 return LIR_eqi <= op && op <= LIR_geui;
458 inline bool isCmpSIOpcode(LOpcode op) {
459 return LIR_eqi <= op && op <= LIR_gei;
461 inline bool isCmpUIOpcode(LOpcode op) {
462 return LIR_eqi == op || (LIR_ltui <= op && op <= LIR_geui);
464 #ifdef NANOJIT_64BIT
465 inline bool isCmpQOpcode(LOpcode op) {
466 return LIR_eqq <= op && op <= LIR_geuq;
468 inline bool isCmpSQOpcode(LOpcode op) {
469 return LIR_eqq <= op && op <= LIR_geq;
471 inline bool isCmpUQOpcode(LOpcode op) {
472 return LIR_eqq == op || (LIR_ltuq <= op && op <= LIR_geuq);
474 #endif
475 inline bool isCmpDOpcode(LOpcode op) {
476 return LIR_eqd <= op && op <= LIR_ged;
478 inline bool isCmpOpcode(LOpcode op) {
479 return isCmpIOpcode(op) ||
480 #if defined NANOJIT_64BIT
481 isCmpQOpcode(op) ||
482 #endif
483 isCmpDOpcode(op);
486 inline LOpcode invertCondJmpOpcode(LOpcode op) {
487 NanoAssert(op == LIR_jt || op == LIR_jf);
488 return LOpcode(op ^ 1);
490 inline LOpcode invertCondGuardOpcode(LOpcode op) {
491 NanoAssert(op == LIR_xt || op == LIR_xf);
492 return LOpcode(op ^ 1);
494 inline LOpcode invertCmpOpcode(LOpcode op) {
495 NanoAssert(isCmpOpcode(op));
496 return LOpcode(op ^ 1);
499 inline LOpcode getCallOpcode(const CallInfo* ci) {
500 LOpcode op = LIR_callp;
501 switch (ci->returnType()) {
502 case ARGTYPE_V: op = LIR_callv; break;
503 case ARGTYPE_I:
504 case ARGTYPE_UI: op = LIR_calli; break;
505 #ifdef NANOJIT_64BIT
506 case ARGTYPE_Q: op = LIR_callq; break;
507 #endif
508 case ARGTYPE_D: op = LIR_calld; break;
509 default: NanoAssert(0); break;
511 return op;
514 LOpcode arithOpcodeD2I(LOpcode op);
515 #ifdef NANOJIT_64BIT
516 LOpcode cmpOpcodeI2Q(LOpcode op);
517 #endif
518 LOpcode cmpOpcodeD2I(LOpcode op);
519 LOpcode cmpOpcodeD2UI(LOpcode op);
521 // Array holding the 'repKind' field from LIRopcode.tbl.
522 extern const uint8_t repKinds[];
524 enum LTy {
525 LTy_V, // void: no value/no type
526 LTy_I, // int: 32-bit integer
527 #ifdef NANOJIT_64BIT
528 LTy_Q, // quad: 64-bit integer
529 #endif
530 LTy_D, // double: 64-bit float
532 LTy_P = PTR_SIZE(LTy_I, LTy_Q) // word-sized integer
535 // Array holding the 'retType' field from LIRopcode.tbl.
536 extern const LTy retTypes[];
538 inline RegisterMask rmask(Register r)
540 return RegisterMask(1) << REGNUM(r);
543 //-----------------------------------------------------------------------
544 // Low-level instructions. This is a bit complicated, because we have a
545 // variable-width representation to minimise space usage.
547 // - Instruction size is always an integral multiple of word size.
549 // - Every instruction has at least one word, holding the opcode and the
550 // reservation info ("SharedFields"). That word is in class LIns.
552 // - Beyond that, most instructions have 1, 2 or 3 extra words. These
553 // extra words are in classes LInsOp1, LInsOp2, etc (collectively called
554 // "LInsXYZ" in what follows). Each LInsXYZ class also contains an LIns,
555 // accessible by the 'ins' member, which holds the LIns data.
557 // - LIR is written forward, but read backwards. When reading backwards,
558 // in order to find the opcode, it must be in a predictable place in the
559 // LInsXYZ isn't affected by instruction width. Therefore, the LIns
560 // word (which contains the opcode) is always the *last* word in an
561 // instruction.
563 // - Each instruction is created by casting pre-allocated bytes from a
564 // LirBuffer to the LInsXYZ type. Therefore there are no constructors
565 // for LIns or LInsXYZ.
567 // - The standard handle for an instruction is a LIns*. This actually
568 // points to the LIns word, ie. to the final word in the instruction.
569 // This is a bit odd, but it allows the instruction's opcode to be
570 // easily accessed. Once you've looked at the opcode and know what kind
571 // of instruction it is, if you want to access any of the other words,
572 // you need to use toLInsXYZ(), which takes the LIns* and gives you an
573 // LInsXYZ*, ie. the pointer to the actual start of the instruction's
574 // bytes. From there you can access the instruction-specific extra
575 // words.
577 // - However, from outside class LIns, LInsXYZ isn't visible, nor is
578 // toLInsXYZ() -- from outside LIns, all LIR instructions are handled
579 // via LIns pointers and get/set methods are used for all LIns/LInsXYZ
580 // accesses. In fact, all data members in LInsXYZ are private and can
581 // only be accessed by LIns, which is a friend class. The only thing
582 // anyone outside LIns can do with a LInsXYZ is call getLIns().
584 // - An example Op2 instruction and the likely pointers to it (each line
585 // represents a word, and pointers to a line point to the start of the
586 // word on that line):
588 // [ oprnd_2 <-- LInsOp2* insOp2 == toLInsOp2(ins)
589 // oprnd_1
590 // opcode + resv ] <-- LIns* ins
592 // - LIR_skip instructions are used to link code chunks. If the first
593 // instruction on a chunk isn't a LIR_start, it will be a skip, and the
594 // skip's operand will point to the last LIns on the preceding chunk.
595 // LInsSk has the same layout as LInsOp1, but we represent it as a
596 // different class because there are some places where we treat
597 // skips specially and so having it separate seems like a good idea.
599 // - Various things about the size and layout of LIns and LInsXYZ are
600 // statically checked in staticSanityCheck(). In particular, this is
601 // worthwhile because there's nothing that guarantees that all the
602 // LInsXYZ classes have a size that is a multiple of word size (but in
603 // practice all sane compilers use a layout that results in this). We
604 // also check that every LInsXYZ is word-aligned in
605 // LirBuffer::makeRoom(); this seems sensible to avoid potential
606 // slowdowns due to misalignment. It relies on chunks themselves being
607 // word-aligned, which is extremely likely.
609 // - There is an enum, LInsRepKind, with one member for each of the
610 // LInsXYZ kinds. Each opcode is categorised with its LInsRepKind value
611 // in LIRopcode.tbl, and this is used in various places.
612 //-----------------------------------------------------------------------
614 enum LInsRepKind {
615 // LRK_XYZ corresponds to class LInsXYZ.
616 LRK_Op0,
617 LRK_Op1,
618 LRK_Op2,
619 LRK_Op3,
620 LRK_Ld,
621 LRK_St,
622 LRK_Sk,
623 LRK_C,
624 LRK_P,
625 LRK_I,
626 LRK_QorD,
627 LRK_Jtbl,
628 LRK_None // this one is used for unused opcode numbers
631 class LInsOp0;
632 class LInsOp1;
633 class LInsOp2;
634 class LInsOp3;
635 class LInsLd;
636 class LInsSt;
637 class LInsSk;
638 class LInsC;
639 class LInsP;
640 class LInsI;
641 class LInsQorD;
642 class LInsJtbl;
644 class LIns
646 private:
647 // SharedFields: fields shared by all LIns kinds.
649 // The .inReg, .regnum, .inAr and .arIndex fields form a "reservation"
650 // that is used temporarily during assembly to record information
651 // relating to register allocation. See class RegAlloc for more
652 // details. Note: all combinations of .inReg/.inAr are possible, ie.
653 // 0/0, 0/1, 1/0, 1/1.
655 // The .isResultLive field is only used for instructions that return
656 // results. It indicates if the result is live. It's set (if
657 // appropriate) and used only during the codegen pass.
659 struct SharedFields {
660 uint32_t inReg:1; // if 1, 'reg' is active
661 uint32_t regnum:7;
662 uint32_t inAr:1; // if 1, 'arIndex' is active
663 uint32_t isResultLive:1; // if 1, the instruction's result is live
665 uint32_t arIndex:14; // index into stack frame; displ is -4*arIndex
667 LOpcode opcode:8; // instruction's opcode
670 union {
671 SharedFields sharedFields;
672 // Force sizeof(LIns)==8 and 8-byte alignment on 64-bit machines.
673 // This is necessary because sizeof(SharedFields)==4 and we want all
674 // instances of LIns to be pointer-aligned.
675 void* wholeWord;
678 inline void initSharedFields(LOpcode opcode)
680 // We must zero .inReg, .inAR and .isResultLive, but zeroing the
681 // whole word is easier. Then we set the opcode.
682 wholeWord = 0;
683 sharedFields.opcode = opcode;
686 // LIns-to-LInsXYZ converters.
687 inline LInsOp0* toLInsOp0() const;
688 inline LInsOp1* toLInsOp1() const;
689 inline LInsOp2* toLInsOp2() const;
690 inline LInsOp3* toLInsOp3() const;
691 inline LInsLd* toLInsLd() const;
692 inline LInsSt* toLInsSt() const;
693 inline LInsSk* toLInsSk() const;
694 inline LInsC* toLInsC() const;
695 inline LInsP* toLInsP() const;
696 inline LInsI* toLInsI() const;
697 inline LInsQorD* toLInsQorD() const;
698 inline LInsJtbl*toLInsJtbl()const;
700 void staticSanityCheck();
702 public:
703 // LIns initializers.
704 inline void initLInsOp0(LOpcode opcode);
705 inline void initLInsOp1(LOpcode opcode, LIns* oprnd1);
706 inline void initLInsOp2(LOpcode opcode, LIns* oprnd1, LIns* oprnd2);
707 inline void initLInsOp3(LOpcode opcode, LIns* oprnd1, LIns* oprnd2, LIns* oprnd3);
708 inline void initLInsLd(LOpcode opcode, LIns* val, int32_t d, AccSet accSet, LoadQual loadQual);
709 inline void initLInsSt(LOpcode opcode, LIns* val, LIns* base, int32_t d, AccSet accSet);
710 inline void initLInsSk(LIns* prevLIns);
711 // Nb: args[] must be allocated and initialised before being passed in;
712 // initLInsC() just copies the pointer into the LInsC.
713 inline void initLInsC(LOpcode opcode, LIns** args, const CallInfo* ci);
714 inline void initLInsP(int32_t arg, int32_t kind);
715 inline void initLInsI(LOpcode opcode, int32_t immI);
716 inline void initLInsQorD(LOpcode opcode, uint64_t immQorD);
717 inline void initLInsJtbl(LIns* index, uint32_t size, LIns** table);
719 LOpcode opcode() const { return sharedFields.opcode; }
721 // Generally, void instructions (statements) are always live and
722 // non-void instructions (expressions) are live if used by another
723 // live instruction. But there are some trickier cases.
724 // Any non-void instruction can be marked isResultLive=1 even
725 // when it is unreachable, e.g. due to an always-taken branch.
726 // The assembler marks it live if it sees any uses, regardless of
727 // whether those uses are in reachable code or not.
728 bool isLive() const {
729 return isV() ||
730 sharedFields.isResultLive ||
731 (isCall() && !callInfo()->_isPure) || // impure calls are always live
732 isop(LIR_paramp); // LIR_paramp is always live
734 void setResultLive() {
735 NanoAssert(!isV());
736 sharedFields.isResultLive = 1;
739 // XXX: old reservation manipulating functions. See bug 538924.
740 // Replacement strategy:
741 // - deprecated_markAsClear() --> clearReg() and/or clearArIndex()
742 // - deprecated_hasKnownReg() --> isInReg()
743 // - deprecated_getReg() --> getReg() after checking isInReg()
745 void deprecated_markAsClear() {
746 sharedFields.inReg = 0;
747 sharedFields.inAr = 0;
749 bool deprecated_hasKnownReg() {
750 NanoAssert(isExtant());
751 return isInReg();
753 Register deprecated_getReg() {
754 NanoAssert(isExtant());
755 if (isInReg()) {
756 Register r = { sharedFields.regnum };
757 return r;
758 } else {
759 return deprecated_UnknownReg;
762 uint32_t deprecated_getArIndex() {
763 NanoAssert(isExtant());
764 return ( isInAr() ? sharedFields.arIndex : 0 );
767 // Reservation manipulation.
769 // "Extant" mean "in existence, still existing, surviving". In other
770 // words, has the value been computed explicitly (not folded into
771 // something else) and is it still available (in a register or spill
772 // slot) for use?
773 bool isExtant() {
774 return isInReg() || isInAr();
776 bool isInReg() {
777 return sharedFields.inReg;
779 bool isInRegMask(RegisterMask allow) {
780 return isInReg() && (rmask(getReg()) & allow);
782 Register getReg() {
783 NanoAssert(isInReg());
784 Register r = { sharedFields.regnum };
785 return r;
787 void setReg(Register r) {
788 sharedFields.inReg = 1;
789 sharedFields.regnum = REGNUM(r);
791 void clearReg() {
792 sharedFields.inReg = 0;
794 bool isInAr() {
795 return sharedFields.inAr;
797 uint32_t getArIndex() {
798 NanoAssert(isInAr());
799 return sharedFields.arIndex;
801 void setArIndex(uint32_t arIndex) {
802 sharedFields.inAr = 1;
803 sharedFields.arIndex = arIndex;
805 void clearArIndex() {
806 sharedFields.inAr = 0;
809 // For various instruction kinds.
810 inline LIns* oprnd1() const;
811 inline LIns* oprnd2() const;
812 inline LIns* oprnd3() const;
814 // For branches.
815 inline LIns* getTarget() const;
816 inline void setTarget(LIns* label);
818 // For guards.
819 inline GuardRecord* record() const;
821 // For loads.
822 inline LoadQual loadQual() const;
824 // For loads/stores.
825 inline int32_t disp() const;
826 inline MiniAccSet miniAccSet() const;
827 inline AccSet accSet() const;
829 // For LInsSk.
830 inline LIns* prevLIns() const;
832 // For LInsP.
833 inline uint8_t paramArg() const;
834 inline uint8_t paramKind() const;
836 // For LInsI.
837 inline int32_t immI() const;
839 // For LInsQorD.
840 #ifdef NANOJIT_64BIT
841 inline int32_t immQlo() const;
842 inline uint64_t immQ() const;
843 #endif
844 inline int32_t immDlo() const;
845 inline int32_t immDhi() const;
846 inline double immD() const;
847 inline uint64_t immDasQ() const;
849 // For LIR_allocp.
850 inline int32_t size() const;
851 inline void setSize(int32_t nbytes);
853 // For LInsC.
854 inline LIns* arg(uint32_t i) const; // right-to-left-order: arg(0) is rightmost
855 inline uint32_t argc() const;
856 inline LIns* callArgN(uint32_t n) const;
857 inline const CallInfo* callInfo() const;
859 // For LIR_jtbl
860 inline uint32_t getTableSize() const;
861 inline LIns* getTarget(uint32_t index) const;
862 inline void setTarget(uint32_t index, LIns* label) const;
864 // isLInsXYZ() returns true if the instruction has the LInsXYZ form.
865 // Note that there is some overlap with other predicates, eg.
866 // isStore()==isLInsSt(), isCall()==isLInsC(), but that's ok; these
867 // ones are used mostly to check that opcodes are appropriate for
868 // instruction layouts, the others are used for non-debugging
869 // purposes.
870 bool isLInsOp0() const {
871 NanoAssert(LRK_None != repKinds[opcode()]);
872 return LRK_Op0 == repKinds[opcode()];
874 bool isLInsOp1() const {
875 NanoAssert(LRK_None != repKinds[opcode()]);
876 return LRK_Op1 == repKinds[opcode()];
878 bool isLInsOp2() const {
879 NanoAssert(LRK_None != repKinds[opcode()]);
880 return LRK_Op2 == repKinds[opcode()];
882 bool isLInsOp3() const {
883 NanoAssert(LRK_None != repKinds[opcode()]);
884 return LRK_Op3 == repKinds[opcode()];
886 bool isLInsLd() const {
887 NanoAssert(LRK_None != repKinds[opcode()]);
888 return LRK_Ld == repKinds[opcode()];
890 bool isLInsSt() const {
891 NanoAssert(LRK_None != repKinds[opcode()]);
892 return LRK_St == repKinds[opcode()];
894 bool isLInsSk() const {
895 NanoAssert(LRK_None != repKinds[opcode()]);
896 return LRK_Sk == repKinds[opcode()];
898 bool isLInsC() const {
899 NanoAssert(LRK_None != repKinds[opcode()]);
900 return LRK_C == repKinds[opcode()];
902 bool isLInsP() const {
903 NanoAssert(LRK_None != repKinds[opcode()]);
904 return LRK_P == repKinds[opcode()];
906 bool isLInsI() const {
907 NanoAssert(LRK_None != repKinds[opcode()]);
908 return LRK_I == repKinds[opcode()];
910 bool isLInsQorD() const {
911 NanoAssert(LRK_None != repKinds[opcode()]);
912 return LRK_QorD == repKinds[opcode()];
914 bool isLInsJtbl() const {
915 NanoAssert(LRK_None != repKinds[opcode()]);
916 return LRK_Jtbl == repKinds[opcode()];
919 // LIns predicates.
920 bool isop(LOpcode o) const {
921 return opcode() == o;
923 bool isRet() const {
924 return isRetOpcode(opcode());
926 bool isCmp() const {
927 return isCmpOpcode(opcode());
929 bool isCall() const {
930 return isop(LIR_callv) ||
931 isop(LIR_calli) ||
932 #if defined NANOJIT_64BIT
933 isop(LIR_callq) ||
934 #endif
935 isop(LIR_calld);
937 bool isCmov() const {
938 return isCmovOpcode(opcode());
940 bool isStore() const {
941 return isLInsSt();
943 bool isLoad() const {
944 return isLInsLd();
946 bool isGuard() const {
947 return isop(LIR_x) || isop(LIR_xf) || isop(LIR_xt) ||
948 isop(LIR_xbarrier) || isop(LIR_xtbl) ||
949 isop(LIR_addxovi) || isop(LIR_subxovi) || isop(LIR_mulxovi);
951 bool isJov() const {
952 return
953 #ifdef NANOJIT_64BIT
954 isop(LIR_addjovq) || isop(LIR_subjovq) ||
955 #endif
956 isop(LIR_addjovi) || isop(LIR_subjovi) || isop(LIR_muljovi);
958 // True if the instruction is a 32-bit integer immediate.
959 bool isImmI() const {
960 return isop(LIR_immi);
962 // True if the instruction is a 32-bit integer immediate and
963 // has the value 'val' when treated as a 32-bit signed integer.
964 bool isImmI(int32_t val) const {
965 return isImmI() && immI()==val;
967 #ifdef NANOJIT_64BIT
968 // True if the instruction is a 64-bit integer immediate.
969 bool isImmQ() const {
970 return isop(LIR_immq);
972 #endif
973 // True if the instruction is a pointer-sized integer immediate.
974 bool isImmP() const
976 #ifdef NANOJIT_64BIT
977 return isImmQ();
978 #else
979 return isImmI();
980 #endif
982 // True if the instruction is a 64-bit float immediate.
983 bool isImmD() const {
984 return isop(LIR_immd);
986 // True if the instruction is a 64-bit integer or float immediate.
987 bool isImmQorD() const {
988 return
989 #ifdef NANOJIT_64BIT
990 isImmQ() ||
991 #endif
992 isImmD();
994 // True if the instruction an any type of immediate.
995 bool isImmAny() const {
996 return isImmI() || isImmQorD();
999 bool isBranch() const {
1000 return isop(LIR_jt) || isop(LIR_jf) || isop(LIR_j) || isop(LIR_jtbl) || isJov();
1003 LTy retType() const {
1004 return retTypes[opcode()];
1006 bool isV() const {
1007 return retType() == LTy_V;
1009 bool isI() const {
1010 return retType() == LTy_I;
1012 #ifdef NANOJIT_64BIT
1013 bool isQ() const {
1014 return retType() == LTy_Q;
1016 #endif
1017 bool isD() const {
1018 return retType() == LTy_D;
1020 bool isQorD() const {
1021 return
1022 #ifdef NANOJIT_64BIT
1023 isQ() ||
1024 #endif
1025 isD();
1027 bool isP() const {
1028 #ifdef NANOJIT_64BIT
1029 return isQ();
1030 #else
1031 return isI();
1032 #endif
1035 inline void* immP() const
1037 #ifdef NANOJIT_64BIT
1038 return (void*)immQ();
1039 #else
1040 return (void*)immI();
1041 #endif
1045 typedef SeqBuilder<LIns*> InsList;
1046 typedef SeqBuilder<char*> StringList;
1049 // 0-operand form. Used for LIR_start and LIR_label.
1050 class LInsOp0
1052 private:
1053 friend class LIns;
1055 LIns ins;
1057 public:
1058 LIns* getLIns() { return &ins; };
1061 // 1-operand form. Used for LIR_reti, unary arithmetic/logic ops, etc.
1062 class LInsOp1
1064 private:
1065 friend class LIns;
1067 LIns* oprnd_1;
1069 LIns ins;
1071 public:
1072 LIns* getLIns() { return &ins; };
1075 // 2-operand form. Used for guards, branches, comparisons, binary
1076 // arithmetic/logic ops, etc.
1077 class LInsOp2
1079 private:
1080 friend class LIns;
1082 LIns* oprnd_2;
1084 LIns* oprnd_1;
1086 LIns ins;
1088 public:
1089 LIns* getLIns() { return &ins; };
1092 // 3-operand form. Used for conditional moves, jov branches, and xov guards.
1093 class LInsOp3
1095 private:
1096 friend class LIns;
1098 LIns* oprnd_3;
1100 LIns* oprnd_2;
1102 LIns* oprnd_1;
1104 LIns ins;
1106 public:
1107 LIns* getLIns() { return &ins; };
1110 // Used for all loads.
1111 class LInsLd
1113 private:
1114 friend class LIns;
1116 // Nb: the LIR writer pipeline handles things if a displacement
1117 // exceeds 16 bits. This is rare, but does happen occasionally. We
1118 // could go to 24 bits but then it would happen so rarely that the
1119 // handler code would be difficult to test and thus untrustworthy.
1121 // Nb: the types of these bitfields are all 32-bit integers to ensure
1122 // they are fully packed on Windows, sigh. Also, 'loadQual' is
1123 // unsigned to ensure the values 0, 1, and 2 all fit in 2 bits.
1125 // Nb: explicit signed keyword for bitfield types is required,
1126 // some compilers may treat them as unsigned without it.
1127 // See Bugzilla 584219 comment #18
1128 signed int disp:16;
1129 signed int miniAccSetVal:8;
1130 uint32_t loadQual:2;
1132 LIns* oprnd_1;
1134 LIns ins;
1136 public:
1137 LIns* getLIns() { return &ins; };
1140 // Used for all stores.
1141 class LInsSt
1143 private:
1144 friend class LIns;
1146 int16_t disp;
1147 MiniAccSetVal miniAccSetVal;
1149 LIns* oprnd_2;
1151 LIns* oprnd_1;
1153 LIns ins;
1155 public:
1156 LIns* getLIns() { return &ins; };
1159 // Used for LIR_skip.
1160 class LInsSk
1162 private:
1163 friend class LIns;
1165 LIns* prevLIns;
1167 LIns ins;
1169 public:
1170 LIns* getLIns() { return &ins; };
1173 // Used for all variants of LIR_call.
1174 class LInsC
1176 private:
1177 friend class LIns;
1179 // Arguments in reverse order, just like insCall() (ie. args[0] holds
1180 // the rightmost arg). The array should be allocated by the same
1181 // allocator as the LIR buffers, because it has the same lifetime.
1182 LIns** args;
1184 const CallInfo* ci;
1186 LIns ins;
1188 public:
1189 LIns* getLIns() { return &ins; };
1192 // Used for LIR_paramp.
1193 class LInsP
1195 private:
1196 friend class LIns;
1198 uintptr_t arg:8;
1199 uintptr_t kind:8;
1201 LIns ins;
1203 public:
1204 LIns* getLIns() { return &ins; };
1207 // Used for LIR_immi and LIR_allocp.
1208 class LInsI
1210 private:
1211 friend class LIns;
1213 int32_t immI;
1215 LIns ins;
1217 public:
1218 LIns* getLIns() { return &ins; };
1221 // Used for LIR_immq and LIR_immd.
1222 class LInsQorD
1224 private:
1225 friend class LIns;
1227 int32_t immQorDlo;
1229 int32_t immQorDhi;
1231 LIns ins;
1233 public:
1234 LIns* getLIns() { return &ins; };
1237 // Used for LIR_jtbl. 'oprnd_1' must be a uint32_t index in
1238 // the range 0 <= index < size; no range check is performed.
1239 // 'table' is an array of labels.
1240 class LInsJtbl
1242 private:
1243 friend class LIns;
1245 uint32_t size; // number of entries in table
1246 LIns** table; // pointer to table[size] with same lifetime as this LInsJtbl
1247 LIns* oprnd_1; // uint32_t index expression
1249 LIns ins;
1251 public:
1252 LIns* getLIns() { return &ins; }
1255 // Used only as a placeholder for OP___ macros for unused opcodes in
1256 // LIRopcode.tbl.
1257 class LInsNone
1261 LInsOp0* LIns::toLInsOp0() const { return (LInsOp0* )(uintptr_t(this+1) - sizeof(LInsOp0 )); }
1262 LInsOp1* LIns::toLInsOp1() const { return (LInsOp1* )(uintptr_t(this+1) - sizeof(LInsOp1 )); }
1263 LInsOp2* LIns::toLInsOp2() const { return (LInsOp2* )(uintptr_t(this+1) - sizeof(LInsOp2 )); }
1264 LInsOp3* LIns::toLInsOp3() const { return (LInsOp3* )(uintptr_t(this+1) - sizeof(LInsOp3 )); }
1265 LInsLd* LIns::toLInsLd() const { return (LInsLd* )(uintptr_t(this+1) - sizeof(LInsLd )); }
1266 LInsSt* LIns::toLInsSt() const { return (LInsSt* )(uintptr_t(this+1) - sizeof(LInsSt )); }
1267 LInsSk* LIns::toLInsSk() const { return (LInsSk* )(uintptr_t(this+1) - sizeof(LInsSk )); }
1268 LInsC* LIns::toLInsC() const { return (LInsC* )(uintptr_t(this+1) - sizeof(LInsC )); }
1269 LInsP* LIns::toLInsP() const { return (LInsP* )(uintptr_t(this+1) - sizeof(LInsP )); }
1270 LInsI* LIns::toLInsI() const { return (LInsI* )(uintptr_t(this+1) - sizeof(LInsI )); }
1271 LInsQorD* LIns::toLInsQorD() const { return (LInsQorD*)(uintptr_t(this+1) - sizeof(LInsQorD)); }
1272 LInsJtbl* LIns::toLInsJtbl() const { return (LInsJtbl*)(uintptr_t(this+1) - sizeof(LInsJtbl)); }
1274 void LIns::initLInsOp0(LOpcode opcode) {
1275 initSharedFields(opcode);
1276 NanoAssert(isLInsOp0());
1278 void LIns::initLInsOp1(LOpcode opcode, LIns* oprnd1) {
1279 initSharedFields(opcode);
1280 toLInsOp1()->oprnd_1 = oprnd1;
1281 NanoAssert(isLInsOp1());
1283 void LIns::initLInsOp2(LOpcode opcode, LIns* oprnd1, LIns* oprnd2) {
1284 initSharedFields(opcode);
1285 toLInsOp2()->oprnd_1 = oprnd1;
1286 toLInsOp2()->oprnd_2 = oprnd2;
1287 NanoAssert(isLInsOp2());
1289 void LIns::initLInsOp3(LOpcode opcode, LIns* oprnd1, LIns* oprnd2, LIns* oprnd3) {
1290 initSharedFields(opcode);
1291 toLInsOp3()->oprnd_1 = oprnd1;
1292 toLInsOp3()->oprnd_2 = oprnd2;
1293 toLInsOp3()->oprnd_3 = oprnd3;
1294 NanoAssert(isLInsOp3());
1296 void LIns::initLInsLd(LOpcode opcode, LIns* val, int32_t d, AccSet accSet, LoadQual loadQual) {
1297 initSharedFields(opcode);
1298 toLInsLd()->oprnd_1 = val;
1299 NanoAssert(d == int16_t(d));
1300 toLInsLd()->disp = int16_t(d);
1301 toLInsLd()->miniAccSetVal = compressAccSet(accSet).val;
1302 toLInsLd()->loadQual = loadQual;
1303 NanoAssert(isLInsLd());
1305 void LIns::initLInsSt(LOpcode opcode, LIns* val, LIns* base, int32_t d, AccSet accSet) {
1306 initSharedFields(opcode);
1307 toLInsSt()->oprnd_1 = val;
1308 toLInsSt()->oprnd_2 = base;
1309 NanoAssert(d == int16_t(d));
1310 toLInsSt()->disp = int16_t(d);
1311 toLInsSt()->miniAccSetVal = compressAccSet(accSet).val;
1312 NanoAssert(isLInsSt());
1314 void LIns::initLInsSk(LIns* prevLIns) {
1315 initSharedFields(LIR_skip);
1316 toLInsSk()->prevLIns = prevLIns;
1317 NanoAssert(isLInsSk());
1319 void LIns::initLInsC(LOpcode opcode, LIns** args, const CallInfo* ci) {
1320 initSharedFields(opcode);
1321 toLInsC()->args = args;
1322 toLInsC()->ci = ci;
1323 NanoAssert(isLInsC());
1325 void LIns::initLInsP(int32_t arg, int32_t kind) {
1326 initSharedFields(LIR_paramp);
1327 NanoAssert(isU8(arg) && isU8(kind));
1328 toLInsP()->arg = arg;
1329 toLInsP()->kind = kind;
1330 NanoAssert(isLInsP());
1332 void LIns::initLInsI(LOpcode opcode, int32_t immI) {
1333 initSharedFields(opcode);
1334 toLInsI()->immI = immI;
1335 NanoAssert(isLInsI());
1337 void LIns::initLInsQorD(LOpcode opcode, uint64_t immQorD) {
1338 initSharedFields(opcode);
1339 toLInsQorD()->immQorDlo = int32_t(immQorD);
1340 toLInsQorD()->immQorDhi = int32_t(immQorD >> 32);
1341 NanoAssert(isLInsQorD());
1343 void LIns::initLInsJtbl(LIns* index, uint32_t size, LIns** table) {
1344 initSharedFields(LIR_jtbl);
1345 toLInsJtbl()->oprnd_1 = index;
1346 toLInsJtbl()->table = table;
1347 toLInsJtbl()->size = size;
1348 NanoAssert(isLInsJtbl());
1351 LIns* LIns::oprnd1() const {
1352 NanoAssert(isLInsOp1() || isLInsOp2() || isLInsOp3() || isLInsLd() || isLInsSt() || isLInsJtbl());
1353 return toLInsOp2()->oprnd_1;
1355 LIns* LIns::oprnd2() const {
1356 NanoAssert(isLInsOp2() || isLInsOp3() || isLInsSt());
1357 return toLInsOp2()->oprnd_2;
1359 LIns* LIns::oprnd3() const {
1360 NanoAssert(isLInsOp3());
1361 return toLInsOp3()->oprnd_3;
1364 LIns* LIns::getTarget() const {
1365 NanoAssert(isBranch() && !isop(LIR_jtbl));
1366 if (isJov())
1367 return oprnd3();
1368 else
1369 return oprnd2();
1372 void LIns::setTarget(LIns* label) {
1373 NanoAssert(label && label->isop(LIR_label));
1374 NanoAssert(isBranch() && !isop(LIR_jtbl));
1375 if (isJov())
1376 toLInsOp3()->oprnd_3 = label;
1377 else
1378 toLInsOp2()->oprnd_2 = label;
1381 LIns* LIns::getTarget(uint32_t index) const {
1382 NanoAssert(isop(LIR_jtbl));
1383 NanoAssert(index < toLInsJtbl()->size);
1384 return toLInsJtbl()->table[index];
1387 void LIns::setTarget(uint32_t index, LIns* label) const {
1388 NanoAssert(label && label->isop(LIR_label));
1389 NanoAssert(isop(LIR_jtbl));
1390 NanoAssert(index < toLInsJtbl()->size);
1391 toLInsJtbl()->table[index] = label;
1394 GuardRecord *LIns::record() const {
1395 NanoAssert(isGuard());
1396 switch (opcode()) {
1397 case LIR_x:
1398 case LIR_xt:
1399 case LIR_xf:
1400 case LIR_xtbl:
1401 case LIR_xbarrier:
1402 return (GuardRecord*)oprnd2();
1404 case LIR_addxovi:
1405 case LIR_subxovi:
1406 case LIR_mulxovi:
1407 return (GuardRecord*)oprnd3();
1409 default:
1410 NanoAssert(0);
1411 return NULL;
1415 LoadQual LIns::loadQual() const {
1416 NanoAssert(isLInsLd());
1417 return (LoadQual)toLInsLd()->loadQual;
1420 int32_t LIns::disp() const {
1421 if (isLInsSt()) {
1422 return toLInsSt()->disp;
1423 } else {
1424 NanoAssert(isLInsLd());
1425 return toLInsLd()->disp;
1429 MiniAccSet LIns::miniAccSet() const {
1430 MiniAccSet miniAccSet;
1431 if (isLInsSt()) {
1432 miniAccSet.val = toLInsSt()->miniAccSetVal;
1433 } else {
1434 NanoAssert(isLInsLd());
1435 miniAccSet.val = toLInsLd()->miniAccSetVal;
1437 return miniAccSet;
1440 AccSet LIns::accSet() const {
1441 return decompressMiniAccSet(miniAccSet());
1444 LIns* LIns::prevLIns() const {
1445 NanoAssert(isLInsSk());
1446 return toLInsSk()->prevLIns;
1449 inline uint8_t LIns::paramArg() const { NanoAssert(isop(LIR_paramp)); return toLInsP()->arg; }
1450 inline uint8_t LIns::paramKind() const { NanoAssert(isop(LIR_paramp)); return toLInsP()->kind; }
1452 inline int32_t LIns::immI() const { NanoAssert(isImmI()); return toLInsI()->immI; }
1454 #ifdef NANOJIT_64BIT
1455 inline int32_t LIns::immQlo() const { NanoAssert(isImmQ()); return toLInsQorD()->immQorDlo; }
1456 uint64_t LIns::immQ() const {
1457 NanoAssert(isImmQ());
1458 return (uint64_t(toLInsQorD()->immQorDhi) << 32) | uint32_t(toLInsQorD()->immQorDlo);
1460 #endif
1461 inline int32_t LIns::immDlo() const { NanoAssert(isImmD()); return toLInsQorD()->immQorDlo; }
1462 inline int32_t LIns::immDhi() const { NanoAssert(isImmD()); return toLInsQorD()->immQorDhi; }
1463 double LIns::immD() const {
1464 NanoAssert(isImmD());
1465 union {
1466 double f;
1467 uint64_t q;
1468 } u;
1469 u.q = immDasQ();
1470 return u.f;
1472 uint64_t LIns::immDasQ() const {
1473 NanoAssert(isImmD());
1474 return (uint64_t(toLInsQorD()->immQorDhi) << 32) | uint32_t(toLInsQorD()->immQorDlo);
1477 int32_t LIns::size() const {
1478 NanoAssert(isop(LIR_allocp));
1479 return toLInsI()->immI << 2;
1482 void LIns::setSize(int32_t nbytes) {
1483 NanoAssert(isop(LIR_allocp));
1484 NanoAssert(nbytes > 0);
1485 toLInsI()->immI = (nbytes+3)>>2; // # of required 32bit words
1488 // Index args in reverse order, i.e. arg(0) returns the rightmost arg.
1489 // Nb: this must be kept in sync with insCall().
1490 LIns* LIns::arg(uint32_t i) const
1492 NanoAssert(isCall());
1493 NanoAssert(i < callInfo()->count_args());
1494 return toLInsC()->args[i]; // args[] is in right-to-left order as well
1497 uint32_t LIns::argc() const {
1498 return callInfo()->count_args();
1501 LIns* LIns::callArgN(uint32_t n) const
1503 return arg(argc()-n-1);
1506 const CallInfo* LIns::callInfo() const
1508 NanoAssert(isCall());
1509 return toLInsC()->ci;
1512 uint32_t LIns::getTableSize() const
1514 NanoAssert(isLInsJtbl());
1515 return toLInsJtbl()->size;
1518 class LirWriter
1520 public:
1521 LirWriter *out;
1523 LirWriter(LirWriter* out)
1524 : out(out) {}
1525 virtual ~LirWriter() {}
1527 virtual LIns* ins0(LOpcode v) {
1528 return out->ins0(v);
1530 virtual LIns* ins1(LOpcode v, LIns* a) {
1531 return out->ins1(v, a);
1533 virtual LIns* ins2(LOpcode v, LIns* a, LIns* b) {
1534 return out->ins2(v, a, b);
1536 virtual LIns* ins3(LOpcode v, LIns* a, LIns* b, LIns* c) {
1537 return out->ins3(v, a, b, c);
1539 virtual LIns* insGuard(LOpcode v, LIns *c, GuardRecord *gr) {
1540 return out->insGuard(v, c, gr);
1542 virtual LIns* insGuardXov(LOpcode v, LIns *a, LIns* b, GuardRecord *gr) {
1543 return out->insGuardXov(v, a, b, gr);
1545 virtual LIns* insBranch(LOpcode v, LIns* condition, LIns* to) {
1546 return out->insBranch(v, condition, to);
1548 virtual LIns* insBranchJov(LOpcode v, LIns* a, LIns* b, LIns* to) {
1549 return out->insBranchJov(v, a, b, to);
1551 // arg: 0=first, 1=second, ...
1552 // kind: 0=arg 1=saved-reg
1553 virtual LIns* insParam(int32_t arg, int32_t kind) {
1554 return out->insParam(arg, kind);
1556 virtual LIns* insImmI(int32_t imm) {
1557 return out->insImmI(imm);
1559 #ifdef NANOJIT_64BIT
1560 virtual LIns* insImmQ(uint64_t imm) {
1561 return out->insImmQ(imm);
1563 #endif
1564 virtual LIns* insImmD(double d) {
1565 return out->insImmD(d);
1567 virtual LIns* insLoad(LOpcode op, LIns* base, int32_t d, AccSet accSet, LoadQual loadQual) {
1568 return out->insLoad(op, base, d, accSet, loadQual);
1570 virtual LIns* insStore(LOpcode op, LIns* value, LIns* base, int32_t d, AccSet accSet) {
1571 return out->insStore(op, value, base, d, accSet);
1573 // args[] is in reverse order, ie. args[0] holds the rightmost arg.
1574 virtual LIns* insCall(const CallInfo *call, LIns* args[]) {
1575 return out->insCall(call, args);
1577 virtual LIns* insAlloc(int32_t size) {
1578 NanoAssert(size != 0);
1579 return out->insAlloc(size);
1581 virtual LIns* insJtbl(LIns* index, uint32_t size) {
1582 return out->insJtbl(index, size);
1584 virtual LIns* insComment(const char* str) {
1585 return out->insComment(str);
1588 // convenience functions
1590 // Inserts a conditional to execute and branches to execute if
1591 // the condition is true and false respectively.
1592 LIns* insChoose(LIns* cond, LIns* iftrue, LIns* iffalse, bool use_cmov);
1594 // Inserts an integer comparison to 0
1595 LIns* insEqI_0(LIns* oprnd1) {
1596 return ins2ImmI(LIR_eqi, oprnd1, 0);
1599 // Inserts a pointer comparison to 0
1600 LIns* insEqP_0(LIns* oprnd1) {
1601 return ins2(LIR_eqp, oprnd1, insImmWord(0));
1604 // Inserts a binary operation where the second operand is an
1605 // integer immediate.
1606 LIns* ins2ImmI(LOpcode v, LIns* oprnd1, int32_t imm) {
1607 return ins2(v, oprnd1, insImmI(imm));
1610 LIns* insImmP(const void *ptr) {
1611 #ifdef NANOJIT_64BIT
1612 return insImmQ((uint64_t)ptr);
1613 #else
1614 return insImmI((int32_t)ptr);
1615 #endif
1618 LIns* insImmWord(intptr_t value) {
1619 #ifdef NANOJIT_64BIT
1620 return insImmQ(value);
1621 #else
1622 return insImmI(value);
1623 #endif
1626 // Sign-extend integers to native integers. On 32-bit this is a no-op.
1627 LIns* insI2P(LIns* intIns) {
1628 #ifdef NANOJIT_64BIT
1629 return ins1(LIR_i2q, intIns);
1630 #else
1631 return intIns;
1632 #endif
1635 // Zero-extend integers to native integers. On 32-bit this is a no-op.
1636 LIns* insUI2P(LIns* uintIns) {
1637 #ifdef NANOJIT_64BIT
1638 return ins1(LIR_ui2uq, uintIns);
1639 #else
1640 return uintIns;
1641 #endif
1644 // Do a load with LoadQual==LOAD_NORMAL.
1645 LIns* insLoad(LOpcode op, LIns* base, int32_t d, AccSet accSet) {
1646 return insLoad(op, base, d, accSet, LOAD_NORMAL);
1649 // Chooses LIR_sti, LIR_stq or LIR_std according to the type of 'value'.
1650 LIns* insStore(LIns* value, LIns* base, int32_t d, AccSet accSet);
1654 #ifdef NJ_VERBOSE
1655 extern const char* lirNames[];
1657 // Maps address ranges to meaningful names.
1658 class AddrNameMap
1660 Allocator& allocator;
1661 class Entry
1663 public:
1664 Entry(int) : name(0), size(0), align(0) {}
1665 Entry(char *n, size_t s, size_t a) : name(n), size(s), align(a) {}
1666 char* name;
1667 size_t size:29, align:3;
1669 TreeMap<const void*, Entry*> names; // maps code regions to names
1670 public:
1671 AddrNameMap(Allocator& allocator);
1672 void addAddrRange(const void *p, size_t size, size_t align, const char *name);
1673 void lookupAddr(void *p, char*& name, int32_t& offset);
1676 // Maps LIR instructions to meaningful names.
1677 class LirNameMap
1679 private:
1680 Allocator& alloc;
1682 // A small string-wrapper class, required because we need '==' to
1683 // compare string contents, not string pointers, when strings are used
1684 // as keys in CountMap.
1685 struct Str {
1686 Allocator& alloc;
1687 char* s;
1689 Str(Allocator& alloc_, const char* s_) : alloc(alloc_) {
1690 s = new (alloc) char[1+strlen(s_)];
1691 strcpy(s, s_);
1694 bool operator==(const Str& str) const {
1695 return (0 == strcmp(this->s, str.s));
1699 // Similar to 'struct Str' -- we need to hash the string's contents,
1700 // not its pointer.
1701 template<class K> struct StrHash {
1702 static size_t hash(const Str &k) {
1703 // (const void*) cast is required by ARM RVCT 2.2
1704 return murmurhash((const void*)k.s, strlen(k.s));
1708 template <class Key, class H=DefaultHash<Key> >
1709 class CountMap: public HashMap<Key, int, H> {
1710 public:
1711 CountMap(Allocator& alloc) : HashMap<Key, int, H>(alloc, 128) {}
1712 int add(Key k) {
1713 int c = 1;
1714 if (this->containsKey(k)) {
1715 c = 1+this->get(k);
1717 this->put(k,c);
1718 return c;
1722 CountMap<int> lircounts;
1723 CountMap<const CallInfo *> funccounts;
1724 CountMap<Str, StrHash<Str> > namecounts;
1726 void addNameWithSuffix(LIns* i, const char *s, int suffix, bool ignoreOneSuffix);
1728 class Entry
1730 public:
1731 Entry(int) : name(0) {}
1732 Entry(char* n) : name(n) {}
1733 char* name;
1736 HashMap<LIns*, Entry*> names;
1738 public:
1739 LirNameMap(Allocator& alloc)
1740 : alloc(alloc),
1741 lircounts(alloc),
1742 funccounts(alloc),
1743 namecounts(alloc),
1744 names(alloc)
1747 void addName(LIns* ins, const char *s); // gives 'ins' a special name
1748 const char* createName(LIns* ins); // gives 'ins' a generic name
1749 const char* lookupName(LIns* ins);
1752 // We use big buffers for cases where we need to fit a whole instruction,
1753 // and smaller buffers for all the others. These should easily be long
1754 // enough, but for safety the formatXyz() functions check and won't exceed
1755 // those limits.
1756 class InsBuf {
1757 public:
1758 static const size_t len = 1000;
1759 char buf[len];
1761 class RefBuf {
1762 public:
1763 static const size_t len = 200;
1764 char buf[len];
1767 class LInsPrinter
1769 private:
1770 Allocator& alloc;
1771 const int EMB_NUM_USED_ACCS;
1773 char *formatImmI(RefBuf* buf, int32_t c);
1774 #ifdef NANOJIT_64BIT
1775 char *formatImmQ(RefBuf* buf, uint64_t c);
1776 #endif
1777 char *formatImmD(RefBuf* buf, double c);
1778 void formatGuard(InsBuf* buf, LIns* ins); // defined by the embedder
1779 void formatGuardXov(InsBuf* buf, LIns* ins); // defined by the embedder
1780 static const char* accNames[]; // defined by the embedder
1782 public:
1784 LInsPrinter(Allocator& alloc, int embNumUsedAccs)
1785 : alloc(alloc), EMB_NUM_USED_ACCS(embNumUsedAccs)
1787 addrNameMap = new (alloc) AddrNameMap(alloc);
1788 lirNameMap = new (alloc) LirNameMap(alloc);
1791 char *formatAddr(RefBuf* buf, void* p);
1792 char *formatRef(RefBuf* buf, LIns* ref, bool showImmValue = true);
1793 char *formatIns(InsBuf* buf, LIns* ins);
1794 char *formatAccSet(RefBuf* buf, AccSet accSet);
1796 AddrNameMap* addrNameMap;
1797 LirNameMap* lirNameMap;
1801 class VerboseWriter : public LirWriter
1803 InsList code;
1804 LInsPrinter* printer;
1805 LogControl* logc;
1806 const char* const prefix;
1807 bool const always_flush;
1808 public:
1809 VerboseWriter(Allocator& alloc, LirWriter *out, LInsPrinter* printer, LogControl* logc,
1810 const char* prefix = "", bool always_flush = false)
1811 : LirWriter(out), code(alloc), printer(printer), logc(logc), prefix(prefix), always_flush(always_flush)
1814 LIns* add(LIns* i) {
1815 if (i) {
1816 code.add(i);
1817 if (always_flush)
1818 flush();
1820 return i;
1823 LIns* add_flush(LIns* i) {
1824 if ((i = add(i)) != 0)
1825 flush();
1826 return i;
1829 void flush()
1831 if (!code.isEmpty()) {
1832 InsBuf b;
1833 for (Seq<LIns*>* p = code.get(); p != NULL; p = p->tail)
1834 logc->printf("%s %s\n", prefix, printer->formatIns(&b, p->head));
1835 code.clear();
1839 LIns* insGuard(LOpcode op, LIns* cond, GuardRecord *gr) {
1840 return add_flush(out->insGuard(op,cond,gr));
1843 LIns* insGuardXov(LOpcode op, LIns* a, LIns* b, GuardRecord *gr) {
1844 return add(out->insGuardXov(op,a,b,gr));
1847 LIns* insBranch(LOpcode v, LIns* condition, LIns* to) {
1848 return add_flush(out->insBranch(v, condition, to));
1851 LIns* insBranchJov(LOpcode v, LIns* a, LIns* b, LIns* to) {
1852 return add(out->insBranchJov(v, a, b, to));
1855 LIns* insJtbl(LIns* index, uint32_t size) {
1856 return add_flush(out->insJtbl(index, size));
1859 LIns* ins0(LOpcode v) {
1860 if (v == LIR_label || v == LIR_start) {
1861 flush();
1863 return add(out->ins0(v));
1866 LIns* ins1(LOpcode v, LIns* a) {
1867 return isRetOpcode(v) ? add_flush(out->ins1(v, a)) : add(out->ins1(v, a));
1869 LIns* ins2(LOpcode v, LIns* a, LIns* b) {
1870 return add(out->ins2(v, a, b));
1872 LIns* ins3(LOpcode v, LIns* a, LIns* b, LIns* c) {
1873 return add(out->ins3(v, a, b, c));
1875 LIns* insCall(const CallInfo *call, LIns* args[]) {
1876 return add_flush(out->insCall(call, args));
1878 LIns* insParam(int32_t i, int32_t kind) {
1879 return add(out->insParam(i, kind));
1881 LIns* insLoad(LOpcode v, LIns* base, int32_t disp, AccSet accSet, LoadQual loadQual) {
1882 return add(out->insLoad(v, base, disp, accSet, loadQual));
1884 LIns* insStore(LOpcode op, LIns* v, LIns* b, int32_t d, AccSet accSet) {
1885 return add_flush(out->insStore(op, v, b, d, accSet));
1887 LIns* insAlloc(int32_t size) {
1888 return add(out->insAlloc(size));
1890 LIns* insImmI(int32_t imm) {
1891 return add(out->insImmI(imm));
1893 #ifdef NANOJIT_64BIT
1894 LIns* insImmQ(uint64_t imm) {
1895 return add(out->insImmQ(imm));
1897 #endif
1898 LIns* insImmD(double d) {
1899 return add(out->insImmD(d));
1902 LIns* insComment(const char* str) {
1903 return add_flush(out->insComment(str));
1907 #endif
1909 class ExprFilter: public LirWriter
1911 public:
1912 ExprFilter(LirWriter *out) : LirWriter(out) {}
1913 LIns* ins1(LOpcode v, LIns* a);
1914 LIns* ins2(LOpcode v, LIns* a, LIns* b);
1915 LIns* ins3(LOpcode v, LIns* a, LIns* b, LIns* c);
1916 LIns* insGuard(LOpcode, LIns* cond, GuardRecord *);
1917 LIns* insGuardXov(LOpcode, LIns* a, LIns* b, GuardRecord *);
1918 LIns* insBranch(LOpcode, LIns* cond, LIns* target);
1919 LIns* insBranchJov(LOpcode, LIns* a, LIns* b, LIns* target);
1920 LIns* insLoad(LOpcode op, LIns* base, int32_t off, AccSet accSet, LoadQual loadQual);
1921 private:
1922 LIns* simplifyOverflowArith(LOpcode op, LIns** opnd1, LIns** opnd2);
1925 class CseFilter: public LirWriter
1927 enum NLKind {
1928 // We divide instruction kinds into groups. LIns0 isn't present
1929 // because we don't need to record any 0-ary instructions. Loads
1930 // aren't here, they're handled separately.
1931 NLImmISmall = 0,
1932 NLImmILarge = 1,
1933 NLImmQ = 2, // only occurs on 64-bit platforms
1934 NLImmD = 3,
1935 NL1 = 4,
1936 NL2 = 5,
1937 NL3 = 6,
1938 NLCall = 7,
1940 NLFirst = 0,
1941 NLLast = 7,
1942 // Need a value after "last" to outsmart compilers that insist last+1 is impossible.
1943 NLInvalid = 8
1945 #define nextNLKind(kind) NLKind(kind+1)
1947 // There is one table for each NLKind. This lets us size the lists
1948 // appropriately (some instruction kinds are more common than others).
1949 // It also lets us have NLKind-specific find/add/grow functions, which
1950 // are faster than generic versions.
1952 // Nb: m_listNL and m_capNL sizes must be a power of 2.
1953 // Don't start m_capNL too small, or we'll waste time growing and rehashing.
1954 // Don't start m_capNL too large, will waste memory.
1956 LIns** m_listNL[NLLast + 1];
1957 uint32_t m_capNL[ NLLast + 1];
1958 uint32_t m_usedNL[NLLast + 1];
1959 typedef uint32_t (CseFilter::*find_t)(LIns*);
1960 find_t m_findNL[NLLast + 1];
1962 // Similarly, for loads, there is one table for each CseAcc. A CseAcc
1963 // is like a normal access region, but there are two extra possible
1964 // values: CSE_ACC_CONST, which is where we put all CONST-qualified
1965 // loads, and CSE_ACC_MULTIPLE, where we put all multi-region loads.
1966 // All remaining loads are single-region and go in the table entry for
1967 // their region.
1969 // This arrangement makes the removal of invalidated loads fast -- we
1970 // can invalidate all loads from a single region by clearing that
1971 // region's table.
1973 typedef uint8_t CseAcc; // same type as MiniAccSet
1975 static const uint8_t CSE_NUM_ACCS = NUM_ACCS + 2;
1977 // These values would be 'static const' except they are defined in
1978 // terms of EMB_NUM_USED_ACCS which is itself not 'static const'
1979 // because it's passed in by the embedding.
1980 const uint8_t EMB_NUM_USED_ACCS; // number of access regions used by the embedding
1981 const uint8_t CSE_NUM_USED_ACCS; // EMB_NUM_USED_ACCS + 2
1982 const CseAcc CSE_ACC_CONST; // EMB_NUM_USED_ACCS + 0
1983 const CseAcc CSE_ACC_MULTIPLE; // EMB_NUM_USED_ACCS + 1
1985 // We will only use CSE_NUM_USED_ACCS of these entries, ie. the
1986 // number of lists allocated depends on the number of access regions
1987 // in use by the embedding.
1988 LIns** m_listL[CSE_NUM_ACCS];
1989 uint32_t m_capL[ CSE_NUM_ACCS];
1990 uint32_t m_usedL[CSE_NUM_ACCS];
1992 AccSet storesSinceLastLoad; // regions stored to since the last load
1994 Allocator& alloc;
1996 // After a conditional guard such as "xf cmp", we know that 'cmp' must
1997 // be true, else we would have side-exited. So if we see 'cmp' again
1998 // we can treat it like a constant. This table records such
1999 // comparisons.
2000 HashMap <LIns*, bool> knownCmpValues;
2002 // If true, we will not add new instructions to the CSE tables, but we
2003 // will continue to CSE instructions that match existing table
2004 // entries. Load instructions will still be removed if aliasing
2005 // stores are encountered.
2006 bool suspended;
2008 CseAcc miniAccSetToCseAcc(MiniAccSet miniAccSet, LoadQual loadQual) {
2009 NanoAssert(miniAccSet.val < NUM_ACCS || miniAccSet.val == MINI_ACCSET_MULTIPLE.val);
2010 return (loadQual == LOAD_CONST) ? CSE_ACC_CONST :
2011 (miniAccSet.val == MINI_ACCSET_MULTIPLE.val) ? CSE_ACC_MULTIPLE :
2012 miniAccSet.val;
2015 static uint32_t hash8(uint32_t hash, const uint8_t data);
2016 static uint32_t hash32(uint32_t hash, const uint32_t data);
2017 static uint32_t hashptr(uint32_t hash, const void* data);
2018 static uint32_t hashfinish(uint32_t hash);
2020 static uint32_t hashImmI(int32_t);
2021 static uint32_t hashImmQorD(uint64_t); // not NANOJIT_64BIT-only -- used by findImmD()
2022 static uint32_t hash1(LOpcode op, LIns*);
2023 static uint32_t hash2(LOpcode op, LIns*, LIns*);
2024 static uint32_t hash3(LOpcode op, LIns*, LIns*, LIns*);
2025 static uint32_t hashLoad(LOpcode op, LIns*, int32_t);
2026 static uint32_t hashCall(const CallInfo *call, uint32_t argc, LIns* args[]);
2028 // These versions are used before an LIns has been created.
2029 LIns* findImmISmall(int32_t a, uint32_t &k);
2030 LIns* findImmILarge(int32_t a, uint32_t &k);
2031 #ifdef NANOJIT_64BIT
2032 LIns* findImmQ(uint64_t a, uint32_t &k);
2033 #endif
2034 LIns* findImmD(uint64_t d, uint32_t &k);
2035 LIns* find1(LOpcode v, LIns* a, uint32_t &k);
2036 LIns* find2(LOpcode v, LIns* a, LIns* b, uint32_t &k);
2037 LIns* find3(LOpcode v, LIns* a, LIns* b, LIns* c, uint32_t &k);
2038 LIns* findLoad(LOpcode v, LIns* a, int32_t b, MiniAccSet miniAccSet, LoadQual loadQual,
2039 uint32_t &k);
2040 LIns* findCall(const CallInfo *call, uint32_t argc, LIns* args[], uint32_t &k);
2042 // These versions are used after an LIns has been created; they are
2043 // used for rehashing after growing. They just call onto the
2044 // multi-arg versions above.
2045 uint32_t findImmISmall(LIns* ins);
2046 uint32_t findImmILarge(LIns* ins);
2047 #ifdef NANOJIT_64BIT
2048 uint32_t findImmQ(LIns* ins);
2049 #endif
2050 uint32_t findImmD(LIns* ins);
2051 uint32_t find1(LIns* ins);
2052 uint32_t find2(LIns* ins);
2053 uint32_t find3(LIns* ins);
2054 uint32_t findCall(LIns* ins);
2055 uint32_t findLoad(LIns* ins);
2057 void growNL(NLKind kind);
2058 void growL(CseAcc cseAcc);
2060 void addNLImmISmall(LIns* ins, uint32_t k);
2061 // 'k' is the index found by findXYZ().
2062 void addNL(NLKind kind, LIns* ins, uint32_t k);
2063 void addL(LIns* ins, uint32_t k);
2065 void clearAll(); // clears all tables
2066 void clearNL(NLKind); // clears one non-load table
2067 void clearL(CseAcc); // clears one load table
2069 public:
2070 CseFilter(LirWriter *out, uint8_t embNumUsedAccs, Allocator&);
2072 LIns* insImmI(int32_t imm);
2073 #ifdef NANOJIT_64BIT
2074 LIns* insImmQ(uint64_t q);
2075 #endif
2076 LIns* insImmD(double d);
2077 LIns* ins0(LOpcode v);
2078 LIns* ins1(LOpcode v, LIns*);
2079 LIns* ins2(LOpcode v, LIns*, LIns*);
2080 LIns* ins3(LOpcode v, LIns*, LIns*, LIns*);
2081 LIns* insLoad(LOpcode op, LIns* base, int32_t d, AccSet accSet, LoadQual loadQual);
2082 LIns* insStore(LOpcode op, LIns* value, LIns* base, int32_t d, AccSet accSet);
2083 LIns* insCall(const CallInfo *call, LIns* args[]);
2084 LIns* insGuard(LOpcode op, LIns* cond, GuardRecord *gr);
2085 LIns* insGuardXov(LOpcode op, LIns* a, LIns* b, GuardRecord *gr);
2087 // These functions provide control over CSE in the face of control
2088 // flow. A suspend()/resume() pair may be put around a synthetic
2089 // control flow diamond, preventing the inserted label from resetting
2090 // the CSE state. A suspend() call must be dominated by a resume()
2091 // call, else incorrect code could result.
2092 void suspend() { suspended = true; }
2093 void resume() { suspended = false; }
2096 class LirBuffer
2098 public:
2099 LirBuffer(Allocator& alloc);
2100 void clear();
2101 uintptr_t makeRoom(size_t szB); // make room for an instruction
2103 debug_only (void validate() const;)
2104 verbose_only(LInsPrinter* printer;)
2106 int32_t insCount();
2108 // stats
2109 struct
2111 uint32_t lir; // # instructions
2113 _stats;
2115 AbiKind abi;
2116 LIns *state, *param1, *sp, *rp;
2117 LIns* savedRegs[NumSavedRegs+1]; // Allocate an extra element in case NumSavedRegs == 0
2119 protected:
2120 friend class LirBufWriter;
2122 /** Each chunk is just a raw area of LIns instances, with no header
2123 and no more than 8-byte alignment. The chunk size is somewhat arbitrary. */
2124 static const size_t CHUNK_SZB = 8000;
2126 /** Get CHUNK_SZB more memory for LIR instructions. */
2127 void chunkAlloc();
2128 void moveToNewChunk(uintptr_t addrOfLastLInsOnCurrentChunk);
2130 Allocator& _allocator;
2131 uintptr_t _unused; // next unused instruction slot in the current LIR chunk
2132 uintptr_t _limit; // one past the last usable byte of the current LIR chunk
2135 class LirBufWriter : public LirWriter
2137 LirBuffer* _buf; // underlying buffer housing the instructions
2138 const Config& _config;
2140 public:
2141 LirBufWriter(LirBuffer* buf, const Config& config)
2142 : LirWriter(0), _buf(buf), _config(config) {
2145 // LirWriter interface
2146 LIns* insLoad(LOpcode op, LIns* base, int32_t disp, AccSet accSet, LoadQual loadQual);
2147 LIns* insStore(LOpcode op, LIns* o1, LIns* o2, int32_t disp, AccSet accSet);
2148 LIns* ins0(LOpcode op);
2149 LIns* ins1(LOpcode op, LIns* o1);
2150 LIns* ins2(LOpcode op, LIns* o1, LIns* o2);
2151 LIns* ins3(LOpcode op, LIns* o1, LIns* o2, LIns* o3);
2152 LIns* insParam(int32_t i, int32_t kind);
2153 LIns* insImmI(int32_t imm);
2154 #ifdef NANOJIT_64BIT
2155 LIns* insImmQ(uint64_t imm);
2156 #endif
2157 LIns* insImmD(double d);
2158 LIns* insCall(const CallInfo *call, LIns* args[]);
2159 LIns* insGuard(LOpcode op, LIns* cond, GuardRecord *gr);
2160 LIns* insGuardXov(LOpcode op, LIns* a, LIns* b, GuardRecord *gr);
2161 LIns* insBranch(LOpcode v, LIns* condition, LIns* to);
2162 LIns* insBranchJov(LOpcode v, LIns* a, LIns* b, LIns* to);
2163 LIns* insAlloc(int32_t size);
2164 LIns* insJtbl(LIns* index, uint32_t size);
2165 LIns* insComment(const char* str);
2168 class LirFilter
2170 public:
2171 LirFilter *in;
2172 LirFilter(LirFilter *in) : in(in) {}
2173 virtual ~LirFilter(){}
2175 // It's crucial that once this reaches the LIR_start at the beginning
2176 // of the buffer, that it just keeps returning that LIR_start LIns on
2177 // any subsequent calls.
2178 virtual LIns* read() {
2179 return in->read();
2181 virtual LIns* finalIns() {
2182 return in->finalIns();
2186 // concrete
2187 class LirReader : public LirFilter
2189 LIns* _ins; // next instruction to be read; invariant: is never a skip
2190 LIns* _finalIns; // final instruction in the stream; ie. the first one to be read
2192 public:
2193 LirReader(LIns* ins) : LirFilter(0), _ins(ins), _finalIns(ins)
2195 // The last instruction for a fragment shouldn't be a skip.
2196 // (Actually, if the last *inserted* instruction exactly fills up
2197 // a chunk, a new chunk will be created, and thus the last *written*
2198 // instruction will be a skip -- the one needed for the
2199 // cross-chunk link. But the last *inserted* instruction is what
2200 // is recorded and used to initialise each LirReader, and that is
2201 // what is seen here, and therefore this assertion holds.)
2202 NanoAssert(ins && !ins->isop(LIR_skip));
2204 virtual ~LirReader() {}
2206 // Returns next instruction and advances to the prior instruction.
2207 // Invariant: never returns a skip.
2208 LIns* read();
2210 LIns* finalIns() {
2211 return _finalIns;
2215 verbose_only(void live(LirFilter* in, Allocator& alloc, Fragment* frag, LogControl*);)
2217 // WARNING: StackFilter assumes that all stack entries are eight bytes.
2218 // Some of its optimisations aren't valid if that isn't true. See
2219 // StackFilter::read() for more details.
2220 class StackFilter: public LirFilter
2222 LIns* sp;
2223 BitSet stk;
2224 int top;
2225 int getTop(LIns* br);
2227 public:
2228 StackFilter(LirFilter *in, Allocator& alloc, LIns* sp);
2229 LIns* read();
2232 // This type is used to perform a simple interval analysis of 32-bit
2233 // add/sub/mul. It lets us avoid overflow checks in some cases.
2234 struct Interval
2236 // The bounds are 64-bit integers so that any overflow from a 32-bit
2237 // operation can be safely detected.
2239 // If 'hasOverflowed' is false, 'lo' and 'hi' must be in the range
2240 // I32_MIN..I32_MAX. If 'hasOverflowed' is true, 'lo' and 'hi' should
2241 // not be trusted (and in debug builds we set them both to a special
2242 // value UNTRUSTWORTHY that is outside the I32_MIN..I32_MAX range to
2243 // facilitate sanity checking).
2245 int64_t lo;
2246 int64_t hi;
2247 bool hasOverflowed;
2249 static const int64_t I32_MIN = int64_t(int32_t(0x80000000));
2250 static const int64_t I32_MAX = int64_t(int32_t(0x7fffffff));
2252 #ifdef DEBUG
2253 static const int64_t UNTRUSTWORTHY = int64_t(0xdeafdeadbeeffeedLL);
2255 bool isSane() {
2256 return (hasOverflowed && lo == UNTRUSTWORTHY && hi == UNTRUSTWORTHY) ||
2257 (!hasOverflowed && lo <= hi && I32_MIN <= lo && hi <= I32_MAX);
2259 #endif
2261 Interval(int64_t lo_, int64_t hi_) {
2262 if (lo_ < I32_MIN || I32_MAX < hi_) {
2263 hasOverflowed = true;
2264 #ifdef DEBUG
2265 lo = UNTRUSTWORTHY;
2266 hi = UNTRUSTWORTHY;
2267 #endif
2268 } else {
2269 hasOverflowed = false;
2270 lo = lo_;
2271 hi = hi_;
2273 NanoAssert(isSane());
2276 static Interval OverflowInterval() {
2277 Interval interval(0, 0);
2278 #ifdef DEBUG
2279 interval.lo = UNTRUSTWORTHY;
2280 interval.hi = UNTRUSTWORTHY;
2281 #endif
2282 interval.hasOverflowed = true;
2283 return interval;
2286 static Interval of(LIns* ins, int32_t lim);
2288 static Interval add(Interval x, Interval y);
2289 static Interval sub(Interval x, Interval y);
2290 static Interval mul(Interval x, Interval y);
2292 bool canBeZero() {
2293 NanoAssert(isSane());
2294 return hasOverflowed || (lo <= 0 && 0 <= hi);
2297 bool canBeNegative() {
2298 NanoAssert(isSane());
2299 return hasOverflowed || (lo < 0);
2303 #if NJ_SOFTFLOAT_SUPPORTED
2304 struct SoftFloatOps
2306 const CallInfo* opmap[LIR_sentinel];
2307 SoftFloatOps();
2310 extern const SoftFloatOps softFloatOps;
2312 // Replaces fpu ops with function calls, for platforms lacking float
2313 // hardware (eg. some ARM machines).
2314 class SoftFloatFilter: public LirWriter
2316 public:
2317 static const CallInfo* opmap[LIR_sentinel];
2319 SoftFloatFilter(LirWriter *out);
2320 LIns *split(LIns *a);
2321 LIns *split(const CallInfo *call, LIns* args[]);
2322 LIns *callD1(const CallInfo *call, LIns *a);
2323 LIns *callD2(const CallInfo *call, LIns *a, LIns *b);
2324 LIns *callI1(const CallInfo *call, LIns *a);
2325 LIns *cmpD(const CallInfo *call, LIns *a, LIns *b);
2326 LIns *ins1(LOpcode op, LIns *a);
2327 LIns *ins2(LOpcode op, LIns *a, LIns *b);
2328 LIns *insCall(const CallInfo *ci, LIns* args[]);
2330 #endif
2332 #ifdef DEBUG
2333 // This class does thorough checking of LIR. It checks *implicit* LIR
2334 // instructions, ie. LIR instructions specified via arguments -- to
2335 // methods like insLoad() -- that have not yet been converted into
2336 // *explicit* LIns objects in a LirBuffer. The reason for this is that if
2337 // we wait until the LIR instructions are explicit, they will have gone
2338 // through the entire writer pipeline and been optimised. By checking
2339 // implicit LIR instructions we can check the LIR code at the start of the
2340 // writer pipeline, exactly as it is generated by the compiler front-end.
2342 // A general note about the errors produced by this class: for
2343 // TraceMonkey, they won't include special names for instructions that
2344 // have them unless TMFLAGS is specified.
2345 class ValidateWriter : public LirWriter
2347 private:
2348 LInsPrinter* printer;
2349 const char* whereInPipeline;
2351 const char* type2string(LTy type);
2352 void typeCheckArgs(LOpcode op, int nArgs, LTy formals[], LIns* args[]);
2353 void errorStructureShouldBe(LOpcode op, const char* argDesc, int argN, LIns* arg,
2354 const char* shouldBeDesc);
2355 void errorAccSet(const char* what, AccSet accSet, const char* shouldDesc);
2356 void errorLoadQual(const char* what, LoadQual loadQual);
2357 void checkLInsHasOpcode(LOpcode op, int argN, LIns* ins, LOpcode op2);
2358 void checkLInsIsACondOrConst(LOpcode op, int argN, LIns* ins);
2359 void checkLInsIsNull(LOpcode op, int argN, LIns* ins);
2360 void checkAccSet(LOpcode op, LIns* base, int32_t disp, AccSet accSet); // defined by the embedder
2362 // These can be set by the embedder and used in checkAccSet().
2363 void** checkAccSetExtras;
2365 public:
2366 ValidateWriter(LirWriter* out, LInsPrinter* printer, const char* where);
2367 void setCheckAccSetExtras(void** extras) { checkAccSetExtras = extras; }
2369 LIns* insLoad(LOpcode op, LIns* base, int32_t d, AccSet accSet, LoadQual loadQual);
2370 LIns* insStore(LOpcode op, LIns* value, LIns* base, int32_t d, AccSet accSet);
2371 LIns* ins0(LOpcode v);
2372 LIns* ins1(LOpcode v, LIns* a);
2373 LIns* ins2(LOpcode v, LIns* a, LIns* b);
2374 LIns* ins3(LOpcode v, LIns* a, LIns* b, LIns* c);
2375 LIns* insParam(int32_t arg, int32_t kind);
2376 LIns* insImmI(int32_t imm);
2377 #ifdef NANOJIT_64BIT
2378 LIns* insImmQ(uint64_t imm);
2379 #endif
2380 LIns* insImmD(double d);
2381 LIns* insCall(const CallInfo *call, LIns* args[]);
2382 LIns* insGuard(LOpcode v, LIns *c, GuardRecord *gr);
2383 LIns* insGuardXov(LOpcode v, LIns* a, LIns* b, GuardRecord* gr);
2384 LIns* insBranch(LOpcode v, LIns* condition, LIns* to);
2385 LIns* insBranchJov(LOpcode v, LIns* a, LIns* b, LIns* to);
2386 LIns* insAlloc(int32_t size);
2387 LIns* insJtbl(LIns* index, uint32_t size);
2390 // This just checks things that aren't possible to check in
2391 // ValidateWriter, eg. whether all branch targets are set and are labels.
2392 class ValidateReader: public LirFilter {
2393 public:
2394 ValidateReader(LirFilter* in);
2395 LIns* read();
2397 #endif
2399 #ifdef NJ_VERBOSE
2400 /* A listing filter for LIR, going through backwards. It merely
2401 passes its input to its output, but notes it down too. When
2402 finish() is called, prints out what went through. Is intended to be
2403 used to print arbitrary intermediate transformation stages of
2404 LIR. */
2405 class ReverseLister : public LirFilter
2407 Allocator& _alloc;
2408 LInsPrinter* _printer;
2409 const char* _title;
2410 StringList _strs;
2411 LogControl* _logc;
2412 LIns* _prevIns;
2413 public:
2414 ReverseLister(LirFilter* in, Allocator& alloc,
2415 LInsPrinter* printer, LogControl* logc, const char* title)
2416 : LirFilter(in)
2417 , _alloc(alloc)
2418 , _printer(printer)
2419 , _title(title)
2420 , _strs(alloc)
2421 , _logc(logc)
2422 , _prevIns(NULL)
2425 void finish();
2426 LIns* read();
2428 #endif
2431 #endif // __nanojit_LIR__