tree-ssa-threadupdate.c: Include tree-cfg.h and tree-pass.h
[official-gcc.git] / gcc / config / arm / neon.ml
blobca9a4c06aa645591445c84446ea7f688234ddf13
1 (* Common code for ARM NEON header file, documentation and test case
2 generators.
4 Copyright (C) 2006-2013 Free Software Foundation, Inc.
5 Contributed by CodeSourcery.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. *)
23 (* Shorthand types for vector elements. *)
24 type elts = S8 | S16 | S32 | S64 | F16 | F32 | U8 | U16 | U32 | U64 | P8 | P16
25 | I8 | I16 | I32 | I64 | B8 | B16 | B32 | B64 | Conv of elts * elts
26 | Cast of elts * elts | NoElts
28 type eltclass = Signed | Unsigned | Float | Poly | Int | Bits
29 | ConvClass of eltclass * eltclass | NoType
31 (* These vector types correspond directly to C types. *)
32 type vectype = T_int8x8 | T_int8x16
33 | T_int16x4 | T_int16x8
34 | T_int32x2 | T_int32x4
35 | T_int64x1 | T_int64x2
36 | T_uint8x8 | T_uint8x16
37 | T_uint16x4 | T_uint16x8
38 | T_uint32x2 | T_uint32x4
39 | T_uint64x1 | T_uint64x2
40 | T_float16x4
41 | T_float32x2 | T_float32x4
42 | T_poly8x8 | T_poly8x16
43 | T_poly16x4 | T_poly16x8
44 | T_immediate of int * int
45 | T_int8 | T_int16
46 | T_int32 | T_int64
47 | T_uint8 | T_uint16
48 | T_uint32 | T_uint64
49 | T_poly8 | T_poly16
50 | T_float16 | T_float32
51 | T_arrayof of int * vectype
52 | T_ptrto of vectype | T_const of vectype
53 | T_void | T_intQI
54 | T_intHI | T_intSI
55 | T_intDI | T_floatHF
56 | T_floatSF
58 (* The meanings of the following are:
59 TImode : "Tetra", two registers (four words).
60 EImode : "hExa", three registers (six words).
61 OImode : "Octa", four registers (eight words).
62 CImode : "dodeCa", six registers (twelve words).
63 XImode : "heXadeca", eight registers (sixteen words).
66 type inttype = B_TImode | B_EImode | B_OImode | B_CImode | B_XImode
68 type shape_elt = Dreg | Qreg | Corereg | Immed | VecArray of int * shape_elt
69 | PtrTo of shape_elt | CstPtrTo of shape_elt
70 (* These next ones are used only in the test generator. *)
71 | Element_of_dreg (* Used for "lane" variants. *)
72 | Element_of_qreg (* Likewise. *)
73 | All_elements_of_dreg (* Used for "dup" variants. *)
74 | Alternatives of shape_elt list (* Used for multiple valid operands *)
76 type shape_form = All of int * shape_elt
77 | Long
78 | Long_noreg of shape_elt
79 | Wide
80 | Wide_noreg of shape_elt
81 | Narrow
82 | Long_imm
83 | Narrow_imm
84 | Binary_imm of shape_elt
85 | Use_operands of shape_elt array
86 | By_scalar of shape_elt
87 | Unary_scalar of shape_elt
88 | Wide_lane
89 | Wide_scalar
90 | Pair_result of shape_elt
92 type arity = Arity0 of vectype
93 | Arity1 of vectype * vectype
94 | Arity2 of vectype * vectype * vectype
95 | Arity3 of vectype * vectype * vectype * vectype
96 | Arity4 of vectype * vectype * vectype * vectype * vectype
98 type vecmode = V8QI | V4HI | V4HF |V2SI | V2SF | DI
99 | V16QI | V8HI | V4SI | V4SF | V2DI
100 | QI | HI | SI | SF
102 type opcode =
103 (* Binary ops. *)
104 Vadd
105 | Vmul
106 | Vmla
107 | Vmls
108 | Vfma
109 | Vfms
110 | Vsub
111 | Vceq
112 | Vcge
113 | Vcgt
114 | Vcle
115 | Vclt
116 | Vcage
117 | Vcagt
118 | Vcale
119 | Vcalt
120 | Vtst
121 | Vabd
122 | Vaba
123 | Vmax
124 | Vmin
125 | Vpadd
126 | Vpada
127 | Vpmax
128 | Vpmin
129 | Vrecps
130 | Vrsqrts
131 | Vshl
132 | Vshr_n
133 | Vshl_n
134 | Vsra_n
135 | Vsri
136 | Vsli
137 (* Logic binops. *)
138 | Vand
139 | Vorr
140 | Veor
141 | Vbic
142 | Vorn
143 | Vbsl
144 (* Ops with scalar. *)
145 | Vmul_lane
146 | Vmla_lane
147 | Vmls_lane
148 | Vmul_n
149 | Vmla_n
150 | Vmls_n
151 | Vmull_n
152 | Vmull_lane
153 | Vqdmull_n
154 | Vqdmull_lane
155 | Vqdmulh_n
156 | Vqdmulh_lane
157 (* Unary ops. *)
158 | Vrintn
159 | Vrinta
160 | Vrintp
161 | Vrintm
162 | Vrintz
163 | Vabs
164 | Vneg
165 | Vcls
166 | Vclz
167 | Vcnt
168 | Vrecpe
169 | Vrsqrte
170 | Vmvn
171 (* Vector extract. *)
172 | Vext
173 (* Reverse elements. *)
174 | Vrev64
175 | Vrev32
176 | Vrev16
177 (* Transposition ops. *)
178 | Vtrn
179 | Vzip
180 | Vuzp
181 (* Loads and stores (VLD1/VST1/VLD2...), elements and structures. *)
182 | Vldx of int
183 | Vstx of int
184 | Vldx_lane of int
185 | Vldx_dup of int
186 | Vstx_lane of int
187 (* Set/extract lanes from a vector. *)
188 | Vget_lane
189 | Vset_lane
190 (* Initialize vector from bit pattern. *)
191 | Vcreate
192 (* Set all lanes to same value. *)
193 | Vdup_n
194 | Vmov_n (* Is this the same? *)
195 (* Duplicate scalar to all lanes of vector. *)
196 | Vdup_lane
197 (* Combine vectors. *)
198 | Vcombine
199 (* Get quadword high/low parts. *)
200 | Vget_high
201 | Vget_low
202 (* Convert vectors. *)
203 | Vcvt
204 | Vcvt_n
205 (* Narrow/lengthen vectors. *)
206 | Vmovn
207 | Vmovl
208 (* Table lookup. *)
209 | Vtbl of int
210 | Vtbx of int
211 (* Reinterpret casts. *)
212 | Vreinterp
214 let rev_elems revsize elsize nelts _ =
215 let mask = (revsize / elsize) - 1 in
216 let arr = Array.init nelts
217 (fun i -> i lxor mask) in
218 Array.to_list arr
220 let permute_range i stride nelts increment =
221 let rec build i = function
222 0 -> []
223 | nelts -> i :: (i + stride) :: build (i + increment) (pred nelts) in
224 build i nelts
226 (* Generate a list of integers suitable for vzip. *)
227 let zip_range i stride nelts = permute_range i stride nelts 1
229 (* Generate a list of integers suitable for vunzip. *)
230 let uzip_range i stride nelts = permute_range i stride nelts 4
232 (* Generate a list of integers suitable for trn. *)
233 let trn_range i stride nelts = permute_range i stride nelts 2
235 let zip_elems _ nelts part =
236 match part with
237 `lo -> zip_range 0 nelts (nelts / 2)
238 | `hi -> zip_range (nelts / 2) nelts (nelts / 2)
240 let uzip_elems _ nelts part =
241 match part with
242 `lo -> uzip_range 0 2 (nelts / 2)
243 | `hi -> uzip_range 1 2 (nelts / 2)
245 let trn_elems _ nelts part =
246 match part with
247 `lo -> trn_range 0 nelts (nelts / 2)
248 | `hi -> trn_range 1 nelts (nelts / 2)
250 (* Features used for documentation, to distinguish between some instruction
251 variants, and to signal special requirements (e.g. swapping arguments). *)
253 type features =
254 Halving
255 | Rounding
256 | Saturating
257 | Dst_unsign
258 | High_half
259 | Doubling
260 | Flipped of string (* Builtin name to use with flipped arguments. *)
261 | InfoWord (* Pass an extra word for signage/rounding etc. (always passed
262 for All _, Long, Wide, Narrow shape_forms. *)
263 (* Implement builtin as shuffle. The parameter is a function which returns
264 masks suitable for __builtin_shuffle: arguments are (element size,
265 number of elements, high/low part selector). *)
266 | Use_shuffle of (int -> int -> [`lo|`hi] -> int list)
267 (* A specification as to the shape of instruction expected upon
268 disassembly, used if it differs from the shape used to build the
269 intrinsic prototype. Multiple entries in the constructor's argument
270 indicate that the intrinsic expands to more than one assembly
271 instruction, each with a corresponding shape specified here. *)
272 | Disassembles_as of shape_form list
273 | Builtin_name of string (* Override the name of the builtin. *)
274 (* Override the name of the instruction. If more than one name
275 is specified, it means that the instruction can have any of those
276 names. *)
277 | Instruction_name of string list
278 (* Mark that the intrinsic yields no instructions, or expands to yield
279 behavior that the test generator cannot test. *)
280 | No_op
281 (* Mark that the intrinsic has constant arguments that cannot be set
282 to the defaults (zero for pointers and one otherwise) in the test
283 cases. The function supplied must return the integer to be written
284 into the testcase for the argument number (0-based) supplied to it. *)
285 | Const_valuator of (int -> int)
286 | Fixed_vector_reg
287 | Fixed_core_reg
288 (* Mark that the intrinsic requires __ARM_FEATURE_string to be defined. *)
289 | Requires_feature of string
290 (* Mark that the intrinsic requires a particular architecture version. *)
291 | Requires_arch of int
292 (* Mark that the intrinsic requires a particular bit in __ARM_FP to
293 be set. *)
294 | Requires_FP_bit of int
296 exception MixedMode of elts * elts
298 let rec elt_width = function
299 S8 | U8 | P8 | I8 | B8 -> 8
300 | S16 | U16 | P16 | I16 | B16 | F16 -> 16
301 | S32 | F32 | U32 | I32 | B32 -> 32
302 | S64 | U64 | I64 | B64 -> 64
303 | Conv (a, b) ->
304 let wa = elt_width a and wb = elt_width b in
305 if wa = wb then wa else raise (MixedMode (a, b))
306 | Cast (a, b) -> raise (MixedMode (a, b))
307 | NoElts -> failwith "No elts"
309 let rec elt_class = function
310 S8 | S16 | S32 | S64 -> Signed
311 | U8 | U16 | U32 | U64 -> Unsigned
312 | P8 | P16 -> Poly
313 | F16 | F32 -> Float
314 | I8 | I16 | I32 | I64 -> Int
315 | B8 | B16 | B32 | B64 -> Bits
316 | Conv (a, b) | Cast (a, b) -> ConvClass (elt_class a, elt_class b)
317 | NoElts -> NoType
319 let elt_of_class_width c w =
320 match c, w with
321 Signed, 8 -> S8
322 | Signed, 16 -> S16
323 | Signed, 32 -> S32
324 | Signed, 64 -> S64
325 | Float, 16 -> F16
326 | Float, 32 -> F32
327 | Unsigned, 8 -> U8
328 | Unsigned, 16 -> U16
329 | Unsigned, 32 -> U32
330 | Unsigned, 64 -> U64
331 | Poly, 8 -> P8
332 | Poly, 16 -> P16
333 | Int, 8 -> I8
334 | Int, 16 -> I16
335 | Int, 32 -> I32
336 | Int, 64 -> I64
337 | Bits, 8 -> B8
338 | Bits, 16 -> B16
339 | Bits, 32 -> B32
340 | Bits, 64 -> B64
341 | _ -> failwith "Bad element type"
343 (* Return unsigned integer element the same width as argument. *)
344 let unsigned_of_elt elt =
345 elt_of_class_width Unsigned (elt_width elt)
347 let signed_of_elt elt =
348 elt_of_class_width Signed (elt_width elt)
350 (* Return untyped bits element the same width as argument. *)
351 let bits_of_elt elt =
352 elt_of_class_width Bits (elt_width elt)
354 let non_signed_variant = function
355 S8 -> I8
356 | S16 -> I16
357 | S32 -> I32
358 | S64 -> I64
359 | U8 -> I8
360 | U16 -> I16
361 | U32 -> I32
362 | U64 -> I64
363 | x -> x
365 let poly_unsigned_variant v =
366 let elclass = match elt_class v with
367 Poly -> Unsigned
368 | x -> x in
369 elt_of_class_width elclass (elt_width v)
371 let widen_elt elt =
372 let w = elt_width elt
373 and c = elt_class elt in
374 elt_of_class_width c (w * 2)
376 let narrow_elt elt =
377 let w = elt_width elt
378 and c = elt_class elt in
379 elt_of_class_width c (w / 2)
381 (* If we're trying to find a mode from a "Use_operands" instruction, use the
382 last vector operand as the dominant mode used to invoke the correct builtin.
383 We must stick to this rule in neon.md. *)
384 let find_key_operand operands =
385 let rec scan opno =
386 match operands.(opno) with
387 Qreg -> Qreg
388 | Dreg -> Dreg
389 | VecArray (_, Qreg) -> Qreg
390 | VecArray (_, Dreg) -> Dreg
391 | _ -> scan (opno-1)
393 scan ((Array.length operands) - 1)
395 (* Find a vecmode from a shape_elt ELT for an instruction with shape_form
396 SHAPE. For a Use_operands shape, if ARGPOS is passed then return the mode
397 for the given argument position, else determine which argument to return a
398 mode for automatically. *)
400 let rec mode_of_elt ?argpos elt shape =
401 let flt = match elt_class elt with
402 Float | ConvClass(_, Float) -> true | _ -> false in
403 let idx =
404 match elt_width elt with
405 8 -> 0 | 16 -> 1 | 32 -> 2 | 64 -> 3
406 | _ -> failwith "Bad element width"
407 in match shape with
408 All (_, Dreg) | By_scalar Dreg | Pair_result Dreg | Unary_scalar Dreg
409 | Binary_imm Dreg | Long_noreg Dreg | Wide_noreg Dreg ->
410 if flt then
411 [| V8QI; V4HF; V2SF; DI |].(idx)
412 else
413 [| V8QI; V4HI; V2SI; DI |].(idx)
414 | All (_, Qreg) | By_scalar Qreg | Pair_result Qreg | Unary_scalar Qreg
415 | Binary_imm Qreg | Long_noreg Qreg | Wide_noreg Qreg ->
416 [| V16QI; V8HI; if flt then V4SF else V4SI; V2DI |].(idx)
417 | All (_, (Corereg | PtrTo _ | CstPtrTo _)) ->
418 [| QI; HI; if flt then SF else SI; DI |].(idx)
419 | Long | Wide | Wide_lane | Wide_scalar
420 | Long_imm ->
421 [| V8QI; V4HI; V2SI; DI |].(idx)
422 | Narrow | Narrow_imm -> [| V16QI; V8HI; V4SI; V2DI |].(idx)
423 | Use_operands ops ->
424 begin match argpos with
425 None -> mode_of_elt ?argpos elt (All (0, (find_key_operand ops)))
426 | Some pos -> mode_of_elt ?argpos elt (All (0, ops.(pos)))
428 | _ -> failwith "invalid shape"
430 (* Modify an element type dependent on the shape of the instruction and the
431 operand number. *)
433 let shapemap shape no =
434 let ident = fun x -> x in
435 match shape with
436 All _ | Use_operands _ | By_scalar _ | Pair_result _ | Unary_scalar _
437 | Binary_imm _ -> ident
438 | Long | Long_noreg _ | Wide_scalar | Long_imm ->
439 [| widen_elt; ident; ident |].(no)
440 | Wide | Wide_noreg _ -> [| widen_elt; widen_elt; ident |].(no)
441 | Wide_lane -> [| widen_elt; ident; ident; ident |].(no)
442 | Narrow | Narrow_imm -> [| narrow_elt; ident; ident |].(no)
444 (* Register type (D/Q) of an operand, based on shape and operand number. *)
446 let regmap shape no =
447 match shape with
448 All (_, reg) | Long_noreg reg | Wide_noreg reg -> reg
449 | Long -> [| Qreg; Dreg; Dreg |].(no)
450 | Wide -> [| Qreg; Qreg; Dreg |].(no)
451 | Narrow -> [| Dreg; Qreg; Qreg |].(no)
452 | Wide_lane -> [| Qreg; Dreg; Dreg; Immed |].(no)
453 | Wide_scalar -> [| Qreg; Dreg; Corereg |].(no)
454 | By_scalar reg -> [| reg; reg; Dreg; Immed |].(no)
455 | Unary_scalar reg -> [| reg; Dreg; Immed |].(no)
456 | Pair_result reg -> [| VecArray (2, reg); reg; reg |].(no)
457 | Binary_imm reg -> [| reg; reg; Immed |].(no)
458 | Long_imm -> [| Qreg; Dreg; Immed |].(no)
459 | Narrow_imm -> [| Dreg; Qreg; Immed |].(no)
460 | Use_operands these -> these.(no)
462 let type_for_elt shape elt no =
463 let elt = (shapemap shape no) elt in
464 let reg = regmap shape no in
465 let rec type_for_reg_elt reg elt =
466 match reg with
467 Dreg ->
468 begin match elt with
469 S8 -> T_int8x8
470 | S16 -> T_int16x4
471 | S32 -> T_int32x2
472 | S64 -> T_int64x1
473 | U8 -> T_uint8x8
474 | U16 -> T_uint16x4
475 | U32 -> T_uint32x2
476 | U64 -> T_uint64x1
477 | F16 -> T_float16x4
478 | F32 -> T_float32x2
479 | P8 -> T_poly8x8
480 | P16 -> T_poly16x4
481 | _ -> failwith "Bad elt type for Dreg"
483 | Qreg ->
484 begin match elt with
485 S8 -> T_int8x16
486 | S16 -> T_int16x8
487 | S32 -> T_int32x4
488 | S64 -> T_int64x2
489 | U8 -> T_uint8x16
490 | U16 -> T_uint16x8
491 | U32 -> T_uint32x4
492 | U64 -> T_uint64x2
493 | F32 -> T_float32x4
494 | P8 -> T_poly8x16
495 | P16 -> T_poly16x8
496 | _ -> failwith "Bad elt type for Qreg"
498 | Corereg ->
499 begin match elt with
500 S8 -> T_int8
501 | S16 -> T_int16
502 | S32 -> T_int32
503 | S64 -> T_int64
504 | U8 -> T_uint8
505 | U16 -> T_uint16
506 | U32 -> T_uint32
507 | U64 -> T_uint64
508 | P8 -> T_poly8
509 | P16 -> T_poly16
510 | F32 -> T_float32
511 | _ -> failwith "Bad elt type for Corereg"
513 | Immed ->
514 T_immediate (0, 0)
515 | VecArray (num, sub) ->
516 T_arrayof (num, type_for_reg_elt sub elt)
517 | PtrTo x ->
518 T_ptrto (type_for_reg_elt x elt)
519 | CstPtrTo x ->
520 T_ptrto (T_const (type_for_reg_elt x elt))
521 (* Anything else is solely for the use of the test generator. *)
522 | _ -> assert false
524 type_for_reg_elt reg elt
526 (* Return size of a vector type, in bits. *)
527 let vectype_size = function
528 T_int8x8 | T_int16x4 | T_int32x2 | T_int64x1
529 | T_uint8x8 | T_uint16x4 | T_uint32x2 | T_uint64x1
530 | T_float32x2 | T_poly8x8 | T_poly16x4 | T_float16x4 -> 64
531 | T_int8x16 | T_int16x8 | T_int32x4 | T_int64x2
532 | T_uint8x16 | T_uint16x8 | T_uint32x4 | T_uint64x2
533 | T_float32x4 | T_poly8x16 | T_poly16x8 -> 128
534 | _ -> raise Not_found
536 let inttype_for_array num elttype =
537 let eltsize = vectype_size elttype in
538 let numwords = (num * eltsize) / 32 in
539 match numwords with
540 4 -> B_TImode
541 | 6 -> B_EImode
542 | 8 -> B_OImode
543 | 12 -> B_CImode
544 | 16 -> B_XImode
545 | _ -> failwith ("no int type for size " ^ string_of_int numwords)
547 (* These functions return pairs of (internal, external) types, where "internal"
548 types are those seen by GCC, and "external" are those seen by the assembler.
549 These types aren't necessarily the same, since the intrinsics can munge more
550 than one C type into each assembler opcode. *)
552 let make_sign_invariant func shape elt =
553 let arity, elt' = func shape elt in
554 arity, non_signed_variant elt'
556 (* Don't restrict any types. *)
558 let elts_same make_arity shape elt =
559 let vtype = type_for_elt shape elt in
560 make_arity vtype, elt
562 (* As sign_invar_*, but when sign matters. *)
563 let elts_same_io_lane =
564 elts_same (fun vtype -> Arity4 (vtype 0, vtype 0, vtype 1, vtype 2, vtype 3))
566 let elts_same_io =
567 elts_same (fun vtype -> Arity3 (vtype 0, vtype 0, vtype 1, vtype 2))
569 let elts_same_2_lane =
570 elts_same (fun vtype -> Arity3 (vtype 0, vtype 1, vtype 2, vtype 3))
572 let elts_same_3 = elts_same_2_lane
574 let elts_same_2 =
575 elts_same (fun vtype -> Arity2 (vtype 0, vtype 1, vtype 2))
577 let elts_same_1 =
578 elts_same (fun vtype -> Arity1 (vtype 0, vtype 1))
580 (* Use for signed/unsigned invariant operations (i.e. where the operation
581 doesn't depend on the sign of the data. *)
583 let sign_invar_io_lane = make_sign_invariant elts_same_io_lane
584 let sign_invar_io = make_sign_invariant elts_same_io
585 let sign_invar_2_lane = make_sign_invariant elts_same_2_lane
586 let sign_invar_2 = make_sign_invariant elts_same_2
587 let sign_invar_1 = make_sign_invariant elts_same_1
589 (* Sign-sensitive comparison. *)
591 let cmp_sign_matters shape elt =
592 let vtype = type_for_elt shape elt
593 and rtype = type_for_elt shape (unsigned_of_elt elt) 0 in
594 Arity2 (rtype, vtype 1, vtype 2), elt
596 (* Signed/unsigned invariant comparison. *)
598 let cmp_sign_invar shape elt =
599 let shape', elt' = cmp_sign_matters shape elt in
600 let elt'' =
601 match non_signed_variant elt' with
602 P8 -> I8
603 | x -> x
605 shape', elt''
607 (* Comparison (VTST) where only the element width matters. *)
609 let cmp_bits shape elt =
610 let vtype = type_for_elt shape elt
611 and rtype = type_for_elt shape (unsigned_of_elt elt) 0
612 and bits_only = bits_of_elt elt in
613 Arity2 (rtype, vtype 1, vtype 2), bits_only
615 let reg_shift shape elt =
616 let vtype = type_for_elt shape elt
617 and op2type = type_for_elt shape (signed_of_elt elt) 2 in
618 Arity2 (vtype 0, vtype 1, op2type), elt
620 (* Genericised constant-shift type-generating function. *)
622 let const_shift mkimm ?arity ?result shape elt =
623 let op2type = (shapemap shape 2) elt in
624 let op2width = elt_width op2type in
625 let op2 = mkimm op2width
626 and op1 = type_for_elt shape elt 1
627 and r_elt =
628 match result with
629 None -> elt
630 | Some restriction -> restriction elt in
631 let rtype = type_for_elt shape r_elt 0 in
632 match arity with
633 None -> Arity2 (rtype, op1, op2), elt
634 | Some mkarity -> mkarity rtype op1 op2, elt
636 (* Use for immediate right-shifts. *)
638 let shift_right shape elt =
639 const_shift (fun imm -> T_immediate (1, imm)) shape elt
641 let shift_right_acc shape elt =
642 const_shift (fun imm -> T_immediate (1, imm))
643 ~arity:(fun dst op1 op2 -> Arity3 (dst, dst, op1, op2)) shape elt
645 (* Use for immediate right-shifts when the operation doesn't care about
646 signedness. *)
648 let shift_right_sign_invar =
649 make_sign_invariant shift_right
651 (* Immediate right-shift; result is unsigned even when operand is signed. *)
653 let shift_right_to_uns shape elt =
654 const_shift (fun imm -> T_immediate (1, imm)) ~result:unsigned_of_elt
655 shape elt
657 (* Immediate left-shift. *)
659 let shift_left shape elt =
660 const_shift (fun imm -> T_immediate (0, imm - 1)) shape elt
662 (* Immediate left-shift, unsigned result. *)
664 let shift_left_to_uns shape elt =
665 const_shift (fun imm -> T_immediate (0, imm - 1)) ~result:unsigned_of_elt
666 shape elt
668 (* Immediate left-shift, don't care about signs. *)
670 let shift_left_sign_invar =
671 make_sign_invariant shift_left
673 (* Shift left/right and insert: only element size matters. *)
675 let shift_insert shape elt =
676 let arity, elt =
677 const_shift (fun imm -> T_immediate (1, imm))
678 ~arity:(fun dst op1 op2 -> Arity3 (dst, dst, op1, op2)) shape elt in
679 arity, bits_of_elt elt
681 (* Get/set lane. *)
683 let get_lane shape elt =
684 let vtype = type_for_elt shape elt in
685 Arity2 (vtype 0, vtype 1, vtype 2),
686 (match elt with P8 -> U8 | P16 -> U16 | S32 | U32 | F32 -> B32 | x -> x)
688 let set_lane shape elt =
689 let vtype = type_for_elt shape elt in
690 Arity3 (vtype 0, vtype 1, vtype 2, vtype 3), bits_of_elt elt
692 let set_lane_notype shape elt =
693 let vtype = type_for_elt shape elt in
694 Arity3 (vtype 0, vtype 1, vtype 2, vtype 3), NoElts
696 let create_vector shape elt =
697 let vtype = type_for_elt shape U64 1
698 and rtype = type_for_elt shape elt 0 in
699 Arity1 (rtype, vtype), elt
701 let conv make_arity shape elt =
702 let edest, esrc = match elt with
703 Conv (edest, esrc) | Cast (edest, esrc) -> edest, esrc
704 | _ -> failwith "Non-conversion element in conversion" in
705 let vtype = type_for_elt shape esrc
706 and rtype = type_for_elt shape edest 0 in
707 make_arity rtype vtype, elt
709 let conv_1 = conv (fun rtype vtype -> Arity1 (rtype, vtype 1))
710 let conv_2 = conv (fun rtype vtype -> Arity2 (rtype, vtype 1, vtype 2))
712 (* Operation has an unsigned result even if operands are signed. *)
714 let dst_unsign make_arity shape elt =
715 let vtype = type_for_elt shape elt
716 and rtype = type_for_elt shape (unsigned_of_elt elt) 0 in
717 make_arity rtype vtype, elt
719 let dst_unsign_1 = dst_unsign (fun rtype vtype -> Arity1 (rtype, vtype 1))
721 let make_bits_only func shape elt =
722 let arity, elt' = func shape elt in
723 arity, bits_of_elt elt'
725 (* Extend operation. *)
727 let extend shape elt =
728 let vtype = type_for_elt shape elt in
729 Arity3 (vtype 0, vtype 1, vtype 2, vtype 3), bits_of_elt elt
731 (* Table look-up operations. Operand 2 is signed/unsigned for signed/unsigned
732 integer ops respectively, or unsigned for polynomial ops. *)
734 let table mkarity shape elt =
735 let vtype = type_for_elt shape elt in
736 let op2 = type_for_elt shape (poly_unsigned_variant elt) 2 in
737 mkarity vtype op2, bits_of_elt elt
739 let table_2 = table (fun vtype op2 -> Arity2 (vtype 0, vtype 1, op2))
740 let table_io = table (fun vtype op2 -> Arity3 (vtype 0, vtype 0, vtype 1, op2))
742 (* Operations where only bits matter. *)
744 let bits_1 = make_bits_only elts_same_1
745 let bits_2 = make_bits_only elts_same_2
746 let bits_3 = make_bits_only elts_same_3
748 (* Store insns. *)
749 let store_1 shape elt =
750 let vtype = type_for_elt shape elt in
751 Arity2 (T_void, vtype 0, vtype 1), bits_of_elt elt
753 let store_3 shape elt =
754 let vtype = type_for_elt shape elt in
755 Arity3 (T_void, vtype 0, vtype 1, vtype 2), bits_of_elt elt
757 let make_notype func shape elt =
758 let arity, _ = func shape elt in
759 arity, NoElts
761 let notype_1 = make_notype elts_same_1
762 let notype_2 = make_notype elts_same_2
763 let notype_3 = make_notype elts_same_3
765 (* Bit-select operations (first operand is unsigned int). *)
767 let bit_select shape elt =
768 let vtype = type_for_elt shape elt
769 and itype = type_for_elt shape (unsigned_of_elt elt) in
770 Arity3 (vtype 0, itype 1, vtype 2, vtype 3), NoElts
772 (* Common lists of supported element types. *)
774 let s_8_32 = [S8; S16; S32]
775 let u_8_32 = [U8; U16; U32]
776 let su_8_32 = [S8; S16; S32; U8; U16; U32]
777 let su_8_64 = S64 :: U64 :: su_8_32
778 let su_16_64 = [S16; S32; S64; U16; U32; U64]
779 let pf_su_8_16 = [P8; P16; S8; S16; U8; U16]
780 let pf_su_8_32 = P8 :: P16 :: F32 :: su_8_32
781 let pf_su_8_64 = P8 :: P16 :: F32 :: su_8_64
782 let suf_32 = [S32; U32; F32]
784 let ops =
786 (* Addition. *)
787 Vadd, [], All (3, Dreg), "vadd", sign_invar_2, F32 :: su_8_32;
788 Vadd, [No_op], All (3, Dreg), "vadd", sign_invar_2, [S64; U64];
789 Vadd, [], All (3, Qreg), "vaddQ", sign_invar_2, F32 :: su_8_64;
790 Vadd, [], Long, "vaddl", elts_same_2, su_8_32;
791 Vadd, [], Wide, "vaddw", elts_same_2, su_8_32;
792 Vadd, [Halving], All (3, Dreg), "vhadd", elts_same_2, su_8_32;
793 Vadd, [Halving], All (3, Qreg), "vhaddQ", elts_same_2, su_8_32;
794 Vadd, [Instruction_name ["vrhadd"]; Rounding; Halving],
795 All (3, Dreg), "vRhadd", elts_same_2, su_8_32;
796 Vadd, [Instruction_name ["vrhadd"]; Rounding; Halving],
797 All (3, Qreg), "vRhaddQ", elts_same_2, su_8_32;
798 Vadd, [Saturating], All (3, Dreg), "vqadd", elts_same_2, su_8_64;
799 Vadd, [Saturating], All (3, Qreg), "vqaddQ", elts_same_2, su_8_64;
800 Vadd, [High_half], Narrow, "vaddhn", sign_invar_2, su_16_64;
801 Vadd, [Instruction_name ["vraddhn"]; Rounding; High_half],
802 Narrow, "vRaddhn", sign_invar_2, su_16_64;
804 (* Multiplication. *)
805 Vmul, [], All (3, Dreg), "vmul", sign_invar_2, P8 :: F32 :: su_8_32;
806 Vmul, [], All (3, Qreg), "vmulQ", sign_invar_2, P8 :: F32 :: su_8_32;
807 Vmul, [Saturating; Doubling; High_half], All (3, Dreg), "vqdmulh",
808 elts_same_2, [S16; S32];
809 Vmul, [Saturating; Doubling; High_half], All (3, Qreg), "vqdmulhQ",
810 elts_same_2, [S16; S32];
811 Vmul,
812 [Saturating; Rounding; Doubling; High_half;
813 Instruction_name ["vqrdmulh"]],
814 All (3, Dreg), "vqRdmulh",
815 elts_same_2, [S16; S32];
816 Vmul,
817 [Saturating; Rounding; Doubling; High_half;
818 Instruction_name ["vqrdmulh"]],
819 All (3, Qreg), "vqRdmulhQ",
820 elts_same_2, [S16; S32];
821 Vmul, [], Long, "vmull", elts_same_2, P8 :: su_8_32;
822 Vmul, [Saturating; Doubling], Long, "vqdmull", elts_same_2, [S16; S32];
824 (* Multiply-accumulate. *)
825 Vmla, [], All (3, Dreg), "vmla", sign_invar_io, F32 :: su_8_32;
826 Vmla, [], All (3, Qreg), "vmlaQ", sign_invar_io, F32 :: su_8_32;
827 Vmla, [], Long, "vmlal", elts_same_io, su_8_32;
828 Vmla, [Saturating; Doubling], Long, "vqdmlal", elts_same_io, [S16; S32];
830 (* Multiply-subtract. *)
831 Vmls, [], All (3, Dreg), "vmls", sign_invar_io, F32 :: su_8_32;
832 Vmls, [], All (3, Qreg), "vmlsQ", sign_invar_io, F32 :: su_8_32;
833 Vmls, [], Long, "vmlsl", elts_same_io, su_8_32;
834 Vmls, [Saturating; Doubling], Long, "vqdmlsl", elts_same_io, [S16; S32];
836 (* Fused-multiply-accumulate. *)
837 Vfma, [Requires_feature "FMA"], All (3, Dreg), "vfma", elts_same_io, [F32];
838 Vfma, [Requires_feature "FMA"], All (3, Qreg), "vfmaQ", elts_same_io, [F32];
839 Vfms, [Requires_feature "FMA"], All (3, Dreg), "vfms", elts_same_io, [F32];
840 Vfms, [Requires_feature "FMA"], All (3, Qreg), "vfmsQ", elts_same_io, [F32];
842 (* Round to integral. *)
843 Vrintn, [Builtin_name "vrintn"; Requires_arch 8], Use_operands [| Dreg; Dreg |],
844 "vrndn", elts_same_1, [F32];
845 Vrintn, [Builtin_name "vrintn"; Requires_arch 8], Use_operands [| Qreg; Qreg |],
846 "vrndqn", elts_same_1, [F32];
847 Vrinta, [Builtin_name "vrinta"; Requires_arch 8], Use_operands [| Dreg; Dreg |],
848 "vrnda", elts_same_1, [F32];
849 Vrinta, [Builtin_name "vrinta"; Requires_arch 8], Use_operands [| Qreg; Qreg |],
850 "vrndqa", elts_same_1, [F32];
851 Vrintp, [Builtin_name "vrintp"; Requires_arch 8], Use_operands [| Dreg; Dreg |],
852 "vrndp", elts_same_1, [F32];
853 Vrintp, [Builtin_name "vrintp"; Requires_arch 8], Use_operands [| Qreg; Qreg |],
854 "vrndqp", elts_same_1, [F32];
855 Vrintm, [Builtin_name "vrintm"; Requires_arch 8], Use_operands [| Dreg; Dreg |],
856 "vrndm", elts_same_1, [F32];
857 Vrintm, [Builtin_name "vrintm"; Requires_arch 8], Use_operands [| Qreg; Qreg |],
858 "vrndqm", elts_same_1, [F32];
859 Vrintz, [Builtin_name "vrintz"; Requires_arch 8], Use_operands [| Dreg; Dreg |],
860 "vrnd", elts_same_1, [F32];
861 Vrintz, [Builtin_name "vrintz"; Requires_arch 8], Use_operands [| Qreg; Qreg |],
862 "vrndq", elts_same_1, [F32];
863 (* Subtraction. *)
864 Vsub, [], All (3, Dreg), "vsub", sign_invar_2, F32 :: su_8_32;
865 Vsub, [No_op], All (3, Dreg), "vsub", sign_invar_2, [S64; U64];
866 Vsub, [], All (3, Qreg), "vsubQ", sign_invar_2, F32 :: su_8_64;
867 Vsub, [], Long, "vsubl", elts_same_2, su_8_32;
868 Vsub, [], Wide, "vsubw", elts_same_2, su_8_32;
869 Vsub, [Halving], All (3, Dreg), "vhsub", elts_same_2, su_8_32;
870 Vsub, [Halving], All (3, Qreg), "vhsubQ", elts_same_2, su_8_32;
871 Vsub, [Saturating], All (3, Dreg), "vqsub", elts_same_2, su_8_64;
872 Vsub, [Saturating], All (3, Qreg), "vqsubQ", elts_same_2, su_8_64;
873 Vsub, [High_half], Narrow, "vsubhn", sign_invar_2, su_16_64;
874 Vsub, [Instruction_name ["vrsubhn"]; Rounding; High_half],
875 Narrow, "vRsubhn", sign_invar_2, su_16_64;
877 (* Comparison, equal. *)
878 Vceq, [], All (3, Dreg), "vceq", cmp_sign_invar, P8 :: F32 :: su_8_32;
879 Vceq, [], All (3, Qreg), "vceqQ", cmp_sign_invar, P8 :: F32 :: su_8_32;
881 (* Comparison, greater-than or equal. *)
882 Vcge, [], All (3, Dreg), "vcge", cmp_sign_matters, F32 :: s_8_32;
883 Vcge, [Instruction_name ["vcge"]; Builtin_name "vcgeu"],
884 All (3, Dreg), "vcge", cmp_sign_matters,
885 u_8_32;
886 Vcge, [], All (3, Qreg), "vcgeQ", cmp_sign_matters, F32 :: s_8_32;
887 Vcge, [Instruction_name ["vcge"]; Builtin_name "vcgeu"],
888 All (3, Qreg), "vcgeQ", cmp_sign_matters,
889 u_8_32;
891 (* Comparison, less-than or equal. *)
892 Vcle, [Flipped "vcge"], All (3, Dreg), "vcle", cmp_sign_matters,
893 F32 :: s_8_32;
894 Vcle, [Instruction_name ["vcge"]; Flipped "vcgeu"],
895 All (3, Dreg), "vcle", cmp_sign_matters,
896 u_8_32;
897 Vcle, [Instruction_name ["vcge"]; Flipped "vcgeQ"],
898 All (3, Qreg), "vcleQ", cmp_sign_matters,
899 F32 :: s_8_32;
900 Vcle, [Instruction_name ["vcge"]; Flipped "vcgeuQ"],
901 All (3, Qreg), "vcleQ", cmp_sign_matters,
902 u_8_32;
904 (* Comparison, greater-than. *)
905 Vcgt, [], All (3, Dreg), "vcgt", cmp_sign_matters, F32 :: s_8_32;
906 Vcgt, [Instruction_name ["vcgt"]; Builtin_name "vcgtu"],
907 All (3, Dreg), "vcgt", cmp_sign_matters,
908 u_8_32;
909 Vcgt, [], All (3, Qreg), "vcgtQ", cmp_sign_matters, F32 :: s_8_32;
910 Vcgt, [Instruction_name ["vcgt"]; Builtin_name "vcgtu"],
911 All (3, Qreg), "vcgtQ", cmp_sign_matters,
912 u_8_32;
914 (* Comparison, less-than. *)
915 Vclt, [Flipped "vcgt"], All (3, Dreg), "vclt", cmp_sign_matters,
916 F32 :: s_8_32;
917 Vclt, [Instruction_name ["vcgt"]; Flipped "vcgtu"],
918 All (3, Dreg), "vclt", cmp_sign_matters,
919 u_8_32;
920 Vclt, [Instruction_name ["vcgt"]; Flipped "vcgtQ"],
921 All (3, Qreg), "vcltQ", cmp_sign_matters,
922 F32 :: s_8_32;
923 Vclt, [Instruction_name ["vcgt"]; Flipped "vcgtuQ"],
924 All (3, Qreg), "vcltQ", cmp_sign_matters,
925 u_8_32;
927 (* Compare absolute greater-than or equal. *)
928 Vcage, [Instruction_name ["vacge"]],
929 All (3, Dreg), "vcage", cmp_sign_matters, [F32];
930 Vcage, [Instruction_name ["vacge"]],
931 All (3, Qreg), "vcageQ", cmp_sign_matters, [F32];
933 (* Compare absolute less-than or equal. *)
934 Vcale, [Instruction_name ["vacge"]; Flipped "vcage"],
935 All (3, Dreg), "vcale", cmp_sign_matters, [F32];
936 Vcale, [Instruction_name ["vacge"]; Flipped "vcageQ"],
937 All (3, Qreg), "vcaleQ", cmp_sign_matters, [F32];
939 (* Compare absolute greater-than or equal. *)
940 Vcagt, [Instruction_name ["vacgt"]],
941 All (3, Dreg), "vcagt", cmp_sign_matters, [F32];
942 Vcagt, [Instruction_name ["vacgt"]],
943 All (3, Qreg), "vcagtQ", cmp_sign_matters, [F32];
945 (* Compare absolute less-than or equal. *)
946 Vcalt, [Instruction_name ["vacgt"]; Flipped "vcagt"],
947 All (3, Dreg), "vcalt", cmp_sign_matters, [F32];
948 Vcalt, [Instruction_name ["vacgt"]; Flipped "vcagtQ"],
949 All (3, Qreg), "vcaltQ", cmp_sign_matters, [F32];
951 (* Test bits. *)
952 Vtst, [], All (3, Dreg), "vtst", cmp_bits, P8 :: su_8_32;
953 Vtst, [], All (3, Qreg), "vtstQ", cmp_bits, P8 :: su_8_32;
955 (* Absolute difference. *)
956 Vabd, [], All (3, Dreg), "vabd", elts_same_2, F32 :: su_8_32;
957 Vabd, [], All (3, Qreg), "vabdQ", elts_same_2, F32 :: su_8_32;
958 Vabd, [], Long, "vabdl", elts_same_2, su_8_32;
960 (* Absolute difference and accumulate. *)
961 Vaba, [], All (3, Dreg), "vaba", elts_same_io, su_8_32;
962 Vaba, [], All (3, Qreg), "vabaQ", elts_same_io, su_8_32;
963 Vaba, [], Long, "vabal", elts_same_io, su_8_32;
965 (* Max. *)
966 Vmax, [], All (3, Dreg), "vmax", elts_same_2, F32 :: su_8_32;
967 Vmax, [], All (3, Qreg), "vmaxQ", elts_same_2, F32 :: su_8_32;
969 (* Min. *)
970 Vmin, [], All (3, Dreg), "vmin", elts_same_2, F32 :: su_8_32;
971 Vmin, [], All (3, Qreg), "vminQ", elts_same_2, F32 :: su_8_32;
973 (* Pairwise add. *)
974 Vpadd, [], All (3, Dreg), "vpadd", sign_invar_2, F32 :: su_8_32;
975 Vpadd, [], Long_noreg Dreg, "vpaddl", elts_same_1, su_8_32;
976 Vpadd, [], Long_noreg Qreg, "vpaddlQ", elts_same_1, su_8_32;
978 (* Pairwise add, widen and accumulate. *)
979 Vpada, [], Wide_noreg Dreg, "vpadal", elts_same_2, su_8_32;
980 Vpada, [], Wide_noreg Qreg, "vpadalQ", elts_same_2, su_8_32;
982 (* Folding maximum, minimum. *)
983 Vpmax, [], All (3, Dreg), "vpmax", elts_same_2, F32 :: su_8_32;
984 Vpmin, [], All (3, Dreg), "vpmin", elts_same_2, F32 :: su_8_32;
986 (* Reciprocal step. *)
987 Vrecps, [], All (3, Dreg), "vrecps", elts_same_2, [F32];
988 Vrecps, [], All (3, Qreg), "vrecpsQ", elts_same_2, [F32];
989 Vrsqrts, [], All (3, Dreg), "vrsqrts", elts_same_2, [F32];
990 Vrsqrts, [], All (3, Qreg), "vrsqrtsQ", elts_same_2, [F32];
992 (* Vector shift left. *)
993 Vshl, [], All (3, Dreg), "vshl", reg_shift, su_8_64;
994 Vshl, [], All (3, Qreg), "vshlQ", reg_shift, su_8_64;
995 Vshl, [Instruction_name ["vrshl"]; Rounding],
996 All (3, Dreg), "vRshl", reg_shift, su_8_64;
997 Vshl, [Instruction_name ["vrshl"]; Rounding],
998 All (3, Qreg), "vRshlQ", reg_shift, su_8_64;
999 Vshl, [Saturating], All (3, Dreg), "vqshl", reg_shift, su_8_64;
1000 Vshl, [Saturating], All (3, Qreg), "vqshlQ", reg_shift, su_8_64;
1001 Vshl, [Instruction_name ["vqrshl"]; Saturating; Rounding],
1002 All (3, Dreg), "vqRshl", reg_shift, su_8_64;
1003 Vshl, [Instruction_name ["vqrshl"]; Saturating; Rounding],
1004 All (3, Qreg), "vqRshlQ", reg_shift, su_8_64;
1006 (* Vector shift right by constant. *)
1007 Vshr_n, [], Binary_imm Dreg, "vshr_n", shift_right, su_8_64;
1008 Vshr_n, [], Binary_imm Qreg, "vshrQ_n", shift_right, su_8_64;
1009 Vshr_n, [Instruction_name ["vrshr"]; Rounding], Binary_imm Dreg,
1010 "vRshr_n", shift_right, su_8_64;
1011 Vshr_n, [Instruction_name ["vrshr"]; Rounding], Binary_imm Qreg,
1012 "vRshrQ_n", shift_right, su_8_64;
1013 Vshr_n, [], Narrow_imm, "vshrn_n", shift_right_sign_invar, su_16_64;
1014 Vshr_n, [Instruction_name ["vrshrn"]; Rounding], Narrow_imm, "vRshrn_n",
1015 shift_right_sign_invar, su_16_64;
1016 Vshr_n, [Saturating], Narrow_imm, "vqshrn_n", shift_right, su_16_64;
1017 Vshr_n, [Instruction_name ["vqrshrn"]; Saturating; Rounding], Narrow_imm,
1018 "vqRshrn_n", shift_right, su_16_64;
1019 Vshr_n, [Saturating; Dst_unsign], Narrow_imm, "vqshrun_n",
1020 shift_right_to_uns, [S16; S32; S64];
1021 Vshr_n, [Instruction_name ["vqrshrun"]; Saturating; Dst_unsign; Rounding],
1022 Narrow_imm, "vqRshrun_n", shift_right_to_uns, [S16; S32; S64];
1024 (* Vector shift left by constant. *)
1025 Vshl_n, [], Binary_imm Dreg, "vshl_n", shift_left_sign_invar, su_8_64;
1026 Vshl_n, [], Binary_imm Qreg, "vshlQ_n", shift_left_sign_invar, su_8_64;
1027 Vshl_n, [Saturating], Binary_imm Dreg, "vqshl_n", shift_left, su_8_64;
1028 Vshl_n, [Saturating], Binary_imm Qreg, "vqshlQ_n", shift_left, su_8_64;
1029 Vshl_n, [Saturating; Dst_unsign], Binary_imm Dreg, "vqshlu_n",
1030 shift_left_to_uns, [S8; S16; S32; S64];
1031 Vshl_n, [Saturating; Dst_unsign], Binary_imm Qreg, "vqshluQ_n",
1032 shift_left_to_uns, [S8; S16; S32; S64];
1033 Vshl_n, [], Long_imm, "vshll_n", shift_left, su_8_32;
1035 (* Vector shift right by constant and accumulate. *)
1036 Vsra_n, [], Binary_imm Dreg, "vsra_n", shift_right_acc, su_8_64;
1037 Vsra_n, [], Binary_imm Qreg, "vsraQ_n", shift_right_acc, su_8_64;
1038 Vsra_n, [Instruction_name ["vrsra"]; Rounding], Binary_imm Dreg,
1039 "vRsra_n", shift_right_acc, su_8_64;
1040 Vsra_n, [Instruction_name ["vrsra"]; Rounding], Binary_imm Qreg,
1041 "vRsraQ_n", shift_right_acc, su_8_64;
1043 (* Vector shift right and insert. *)
1044 Vsri, [], Use_operands [| Dreg; Dreg; Immed |], "vsri_n", shift_insert,
1045 P8 :: P16 :: su_8_64;
1046 Vsri, [], Use_operands [| Qreg; Qreg; Immed |], "vsriQ_n", shift_insert,
1047 P8 :: P16 :: su_8_64;
1049 (* Vector shift left and insert. *)
1050 Vsli, [], Use_operands [| Dreg; Dreg; Immed |], "vsli_n", shift_insert,
1051 P8 :: P16 :: su_8_64;
1052 Vsli, [], Use_operands [| Qreg; Qreg; Immed |], "vsliQ_n", shift_insert,
1053 P8 :: P16 :: su_8_64;
1055 (* Absolute value. *)
1056 Vabs, [], All (2, Dreg), "vabs", elts_same_1, [S8; S16; S32; F32];
1057 Vabs, [], All (2, Qreg), "vabsQ", elts_same_1, [S8; S16; S32; F32];
1058 Vabs, [Saturating], All (2, Dreg), "vqabs", elts_same_1, [S8; S16; S32];
1059 Vabs, [Saturating], All (2, Qreg), "vqabsQ", elts_same_1, [S8; S16; S32];
1061 (* Negate. *)
1062 Vneg, [], All (2, Dreg), "vneg", elts_same_1, [S8; S16; S32; F32];
1063 Vneg, [], All (2, Qreg), "vnegQ", elts_same_1, [S8; S16; S32; F32];
1064 Vneg, [Saturating], All (2, Dreg), "vqneg", elts_same_1, [S8; S16; S32];
1065 Vneg, [Saturating], All (2, Qreg), "vqnegQ", elts_same_1, [S8; S16; S32];
1067 (* Bitwise not. *)
1068 Vmvn, [], All (2, Dreg), "vmvn", notype_1, P8 :: su_8_32;
1069 Vmvn, [], All (2, Qreg), "vmvnQ", notype_1, P8 :: su_8_32;
1071 (* Count leading sign bits. *)
1072 Vcls, [], All (2, Dreg), "vcls", elts_same_1, [S8; S16; S32];
1073 Vcls, [], All (2, Qreg), "vclsQ", elts_same_1, [S8; S16; S32];
1075 (* Count leading zeros. *)
1076 Vclz, [], All (2, Dreg), "vclz", sign_invar_1, su_8_32;
1077 Vclz, [], All (2, Qreg), "vclzQ", sign_invar_1, su_8_32;
1079 (* Count number of set bits. *)
1080 Vcnt, [], All (2, Dreg), "vcnt", bits_1, [P8; S8; U8];
1081 Vcnt, [], All (2, Qreg), "vcntQ", bits_1, [P8; S8; U8];
1083 (* Reciprocal estimate. *)
1084 Vrecpe, [], All (2, Dreg), "vrecpe", elts_same_1, [U32; F32];
1085 Vrecpe, [], All (2, Qreg), "vrecpeQ", elts_same_1, [U32; F32];
1087 (* Reciprocal square-root estimate. *)
1088 Vrsqrte, [], All (2, Dreg), "vrsqrte", elts_same_1, [U32; F32];
1089 Vrsqrte, [], All (2, Qreg), "vrsqrteQ", elts_same_1, [U32; F32];
1091 (* Get lanes from a vector. *)
1092 Vget_lane,
1093 [InfoWord; Disassembles_as [Use_operands [| Corereg; Element_of_dreg |]];
1094 Instruction_name ["vmov"]],
1095 Use_operands [| Corereg; Dreg; Immed |],
1096 "vget_lane", get_lane, pf_su_8_32;
1097 Vget_lane,
1098 [No_op;
1099 InfoWord;
1100 Disassembles_as [Use_operands [| Corereg; Corereg; Dreg |]];
1101 Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)],
1102 Use_operands [| Corereg; Dreg; Immed |],
1103 "vget_lane", notype_2, [S64; U64];
1104 Vget_lane,
1105 [InfoWord; Disassembles_as [Use_operands [| Corereg; Element_of_dreg |]];
1106 Instruction_name ["vmov"]],
1107 Use_operands [| Corereg; Qreg; Immed |],
1108 "vgetQ_lane", get_lane, pf_su_8_32;
1109 Vget_lane,
1110 [InfoWord;
1111 Disassembles_as [Use_operands [| Corereg; Corereg; Dreg |]];
1112 Instruction_name ["vmov"; "fmrrd"]; Const_valuator (fun _ -> 0);
1113 Fixed_core_reg],
1114 Use_operands [| Corereg; Qreg; Immed |],
1115 "vgetQ_lane", notype_2, [S64; U64];
1117 (* Set lanes in a vector. *)
1118 Vset_lane, [Disassembles_as [Use_operands [| Element_of_dreg; Corereg |]];
1119 Instruction_name ["vmov"]],
1120 Use_operands [| Dreg; Corereg; Dreg; Immed |], "vset_lane",
1121 set_lane, pf_su_8_32;
1122 Vset_lane, [No_op;
1123 Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]];
1124 Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)],
1125 Use_operands [| Dreg; Corereg; Dreg; Immed |], "vset_lane",
1126 set_lane_notype, [S64; U64];
1127 Vset_lane, [Disassembles_as [Use_operands [| Element_of_dreg; Corereg |]];
1128 Instruction_name ["vmov"]],
1129 Use_operands [| Qreg; Corereg; Qreg; Immed |], "vsetQ_lane",
1130 set_lane, pf_su_8_32;
1131 Vset_lane, [Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]];
1132 Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)],
1133 Use_operands [| Qreg; Corereg; Qreg; Immed |], "vsetQ_lane",
1134 set_lane_notype, [S64; U64];
1136 (* Create vector from literal bit pattern. *)
1137 Vcreate,
1138 [No_op], (* Not really, but it can yield various things that are too
1139 hard for the test generator at this time. *)
1140 Use_operands [| Dreg; Corereg |], "vcreate", create_vector,
1141 pf_su_8_64;
1143 (* Set all lanes to the same value. *)
1144 Vdup_n,
1145 [Disassembles_as [Use_operands [| Dreg;
1146 Alternatives [ Corereg;
1147 Element_of_dreg ] |]]],
1148 Use_operands [| Dreg; Corereg |], "vdup_n", bits_1,
1149 pf_su_8_32;
1150 Vdup_n,
1151 [No_op;
1152 Instruction_name ["vmov"];
1153 Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]]],
1154 Use_operands [| Dreg; Corereg |], "vdup_n", notype_1,
1155 [S64; U64];
1156 Vdup_n,
1157 [Disassembles_as [Use_operands [| Qreg;
1158 Alternatives [ Corereg;
1159 Element_of_dreg ] |]]],
1160 Use_operands [| Qreg; Corereg |], "vdupQ_n", bits_1,
1161 pf_su_8_32;
1162 Vdup_n,
1163 [No_op;
1164 Instruction_name ["vmov"];
1165 Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |];
1166 Use_operands [| Dreg; Corereg; Corereg |]]],
1167 Use_operands [| Qreg; Corereg |], "vdupQ_n", notype_1,
1168 [S64; U64];
1170 (* These are just aliases for the above. *)
1171 Vmov_n,
1172 [Builtin_name "vdup_n";
1173 Disassembles_as [Use_operands [| Dreg;
1174 Alternatives [ Corereg;
1175 Element_of_dreg ] |]]],
1176 Use_operands [| Dreg; Corereg |],
1177 "vmov_n", bits_1, pf_su_8_32;
1178 Vmov_n,
1179 [No_op;
1180 Builtin_name "vdup_n";
1181 Instruction_name ["vmov"];
1182 Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]]],
1183 Use_operands [| Dreg; Corereg |],
1184 "vmov_n", notype_1, [S64; U64];
1185 Vmov_n,
1186 [Builtin_name "vdupQ_n";
1187 Disassembles_as [Use_operands [| Qreg;
1188 Alternatives [ Corereg;
1189 Element_of_dreg ] |]]],
1190 Use_operands [| Qreg; Corereg |],
1191 "vmovQ_n", bits_1, pf_su_8_32;
1192 Vmov_n,
1193 [No_op;
1194 Builtin_name "vdupQ_n";
1195 Instruction_name ["vmov"];
1196 Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |];
1197 Use_operands [| Dreg; Corereg; Corereg |]]],
1198 Use_operands [| Qreg; Corereg |],
1199 "vmovQ_n", notype_1, [S64; U64];
1201 (* Duplicate, lane version. We can't use Use_operands here because the
1202 rightmost register (always Dreg) would be picked up by find_key_operand,
1203 when we want the leftmost register to be used in this case (otherwise
1204 the modes are indistinguishable in neon.md, etc. *)
1205 Vdup_lane,
1206 [Disassembles_as [Use_operands [| Dreg; Element_of_dreg |]]],
1207 Unary_scalar Dreg, "vdup_lane", bits_2, pf_su_8_32;
1208 Vdup_lane,
1209 [No_op; Const_valuator (fun _ -> 0)],
1210 Unary_scalar Dreg, "vdup_lane", bits_2, [S64; U64];
1211 Vdup_lane,
1212 [Disassembles_as [Use_operands [| Qreg; Element_of_dreg |]]],
1213 Unary_scalar Qreg, "vdupQ_lane", bits_2, pf_su_8_32;
1214 Vdup_lane,
1215 [No_op; Const_valuator (fun _ -> 0)],
1216 Unary_scalar Qreg, "vdupQ_lane", bits_2, [S64; U64];
1218 (* Combining vectors. *)
1219 Vcombine, [No_op],
1220 Use_operands [| Qreg; Dreg; Dreg |], "vcombine", notype_2,
1221 pf_su_8_64;
1223 (* Splitting vectors. *)
1224 Vget_high, [No_op],
1225 Use_operands [| Dreg; Qreg |], "vget_high",
1226 notype_1, pf_su_8_64;
1227 Vget_low, [Instruction_name ["vmov"];
1228 Disassembles_as [Use_operands [| Dreg; Dreg |]];
1229 Fixed_vector_reg],
1230 Use_operands [| Dreg; Qreg |], "vget_low",
1231 notype_1, pf_su_8_32;
1232 Vget_low, [No_op],
1233 Use_operands [| Dreg; Qreg |], "vget_low",
1234 notype_1, [S64; U64];
1236 (* Conversions. *)
1237 Vcvt, [InfoWord], All (2, Dreg), "vcvt", conv_1,
1238 [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
1239 Vcvt, [InfoWord], All (2, Qreg), "vcvtQ", conv_1,
1240 [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
1241 Vcvt, [Builtin_name "vcvt" ; Requires_FP_bit 1],
1242 Use_operands [| Dreg; Qreg; |], "vcvt", conv_1, [Conv (F16, F32)];
1243 Vcvt, [Builtin_name "vcvt" ; Requires_FP_bit 1],
1244 Use_operands [| Qreg; Dreg; |], "vcvt", conv_1, [Conv (F32, F16)];
1245 Vcvt_n, [InfoWord], Use_operands [| Dreg; Dreg; Immed |], "vcvt_n", conv_2,
1246 [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
1247 Vcvt_n, [InfoWord], Use_operands [| Qreg; Qreg; Immed |], "vcvtQ_n", conv_2,
1248 [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
1250 (* Move, narrowing. *)
1251 Vmovn, [Disassembles_as [Use_operands [| Dreg; Qreg |]]],
1252 Narrow, "vmovn", sign_invar_1, su_16_64;
1253 Vmovn, [Disassembles_as [Use_operands [| Dreg; Qreg |]]; Saturating],
1254 Narrow, "vqmovn", elts_same_1, su_16_64;
1255 Vmovn,
1256 [Disassembles_as [Use_operands [| Dreg; Qreg |]]; Saturating; Dst_unsign],
1257 Narrow, "vqmovun", dst_unsign_1,
1258 [S16; S32; S64];
1260 (* Move, long. *)
1261 Vmovl, [Disassembles_as [Use_operands [| Qreg; Dreg |]]],
1262 Long, "vmovl", elts_same_1, su_8_32;
1264 (* Table lookup. *)
1265 Vtbl 1,
1266 [Instruction_name ["vtbl"];
1267 Disassembles_as [Use_operands [| Dreg; VecArray (1, Dreg); Dreg |]]],
1268 Use_operands [| Dreg; Dreg; Dreg |], "vtbl1", table_2, [U8; S8; P8];
1269 Vtbl 2, [Instruction_name ["vtbl"]],
1270 Use_operands [| Dreg; VecArray (2, Dreg); Dreg |], "vtbl2", table_2,
1271 [U8; S8; P8];
1272 Vtbl 3, [Instruction_name ["vtbl"]],
1273 Use_operands [| Dreg; VecArray (3, Dreg); Dreg |], "vtbl3", table_2,
1274 [U8; S8; P8];
1275 Vtbl 4, [Instruction_name ["vtbl"]],
1276 Use_operands [| Dreg; VecArray (4, Dreg); Dreg |], "vtbl4", table_2,
1277 [U8; S8; P8];
1279 (* Extended table lookup. *)
1280 Vtbx 1,
1281 [Instruction_name ["vtbx"];
1282 Disassembles_as [Use_operands [| Dreg; VecArray (1, Dreg); Dreg |]]],
1283 Use_operands [| Dreg; Dreg; Dreg |], "vtbx1", table_io, [U8; S8; P8];
1284 Vtbx 2, [Instruction_name ["vtbx"]],
1285 Use_operands [| Dreg; VecArray (2, Dreg); Dreg |], "vtbx2", table_io,
1286 [U8; S8; P8];
1287 Vtbx 3, [Instruction_name ["vtbx"]],
1288 Use_operands [| Dreg; VecArray (3, Dreg); Dreg |], "vtbx3", table_io,
1289 [U8; S8; P8];
1290 Vtbx 4, [Instruction_name ["vtbx"]],
1291 Use_operands [| Dreg; VecArray (4, Dreg); Dreg |], "vtbx4", table_io,
1292 [U8; S8; P8];
1294 (* Multiply, lane. (note: these were undocumented at the time of
1295 writing). *)
1296 Vmul_lane, [], By_scalar Dreg, "vmul_lane", sign_invar_2_lane,
1297 [S16; S32; U16; U32; F32];
1298 Vmul_lane, [], By_scalar Qreg, "vmulQ_lane", sign_invar_2_lane,
1299 [S16; S32; U16; U32; F32];
1301 (* Multiply-accumulate, lane. *)
1302 Vmla_lane, [], By_scalar Dreg, "vmla_lane", sign_invar_io_lane,
1303 [S16; S32; U16; U32; F32];
1304 Vmla_lane, [], By_scalar Qreg, "vmlaQ_lane", sign_invar_io_lane,
1305 [S16; S32; U16; U32; F32];
1306 Vmla_lane, [], Wide_lane, "vmlal_lane", elts_same_io_lane,
1307 [S16; S32; U16; U32];
1308 Vmla_lane, [Saturating; Doubling], Wide_lane, "vqdmlal_lane",
1309 elts_same_io_lane, [S16; S32];
1311 (* Multiply-subtract, lane. *)
1312 Vmls_lane, [], By_scalar Dreg, "vmls_lane", sign_invar_io_lane,
1313 [S16; S32; U16; U32; F32];
1314 Vmls_lane, [], By_scalar Qreg, "vmlsQ_lane", sign_invar_io_lane,
1315 [S16; S32; U16; U32; F32];
1316 Vmls_lane, [], Wide_lane, "vmlsl_lane", elts_same_io_lane,
1317 [S16; S32; U16; U32];
1318 Vmls_lane, [Saturating; Doubling], Wide_lane, "vqdmlsl_lane",
1319 elts_same_io_lane, [S16; S32];
1321 (* Long multiply, lane. *)
1322 Vmull_lane, [],
1323 Wide_lane, "vmull_lane", elts_same_2_lane, [S16; S32; U16; U32];
1325 (* Saturating doubling long multiply, lane. *)
1326 Vqdmull_lane, [Saturating; Doubling],
1327 Wide_lane, "vqdmull_lane", elts_same_2_lane, [S16; S32];
1329 (* Saturating doubling long multiply high, lane. *)
1330 Vqdmulh_lane, [Saturating; Halving],
1331 By_scalar Qreg, "vqdmulhQ_lane", elts_same_2_lane, [S16; S32];
1332 Vqdmulh_lane, [Saturating; Halving],
1333 By_scalar Dreg, "vqdmulh_lane", elts_same_2_lane, [S16; S32];
1334 Vqdmulh_lane, [Saturating; Halving; Rounding;
1335 Instruction_name ["vqrdmulh"]],
1336 By_scalar Qreg, "vqRdmulhQ_lane", elts_same_2_lane, [S16; S32];
1337 Vqdmulh_lane, [Saturating; Halving; Rounding;
1338 Instruction_name ["vqrdmulh"]],
1339 By_scalar Dreg, "vqRdmulh_lane", elts_same_2_lane, [S16; S32];
1341 (* Vector multiply by scalar. *)
1342 Vmul_n, [InfoWord;
1343 Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]],
1344 Use_operands [| Dreg; Dreg; Corereg |], "vmul_n",
1345 sign_invar_2, [S16; S32; U16; U32; F32];
1346 Vmul_n, [InfoWord;
1347 Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]],
1348 Use_operands [| Qreg; Qreg; Corereg |], "vmulQ_n",
1349 sign_invar_2, [S16; S32; U16; U32; F32];
1351 (* Vector long multiply by scalar. *)
1352 Vmull_n, [Instruction_name ["vmull"];
1353 Disassembles_as [Use_operands [| Qreg; Dreg; Element_of_dreg |]]],
1354 Wide_scalar, "vmull_n",
1355 elts_same_2, [S16; S32; U16; U32];
1357 (* Vector saturating doubling long multiply by scalar. *)
1358 Vqdmull_n, [Saturating; Doubling;
1359 Disassembles_as [Use_operands [| Qreg; Dreg;
1360 Element_of_dreg |]]],
1361 Wide_scalar, "vqdmull_n",
1362 elts_same_2, [S16; S32];
1364 (* Vector saturating doubling long multiply high by scalar. *)
1365 Vqdmulh_n,
1366 [Saturating; Halving; InfoWord;
1367 Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]],
1368 Use_operands [| Qreg; Qreg; Corereg |],
1369 "vqdmulhQ_n", elts_same_2, [S16; S32];
1370 Vqdmulh_n,
1371 [Saturating; Halving; InfoWord;
1372 Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]],
1373 Use_operands [| Dreg; Dreg; Corereg |],
1374 "vqdmulh_n", elts_same_2, [S16; S32];
1375 Vqdmulh_n,
1376 [Saturating; Halving; Rounding; InfoWord;
1377 Instruction_name ["vqrdmulh"];
1378 Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]],
1379 Use_operands [| Qreg; Qreg; Corereg |],
1380 "vqRdmulhQ_n", elts_same_2, [S16; S32];
1381 Vqdmulh_n,
1382 [Saturating; Halving; Rounding; InfoWord;
1383 Instruction_name ["vqrdmulh"];
1384 Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]],
1385 Use_operands [| Dreg; Dreg; Corereg |],
1386 "vqRdmulh_n", elts_same_2, [S16; S32];
1388 (* Vector multiply-accumulate by scalar. *)
1389 Vmla_n, [InfoWord;
1390 Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]],
1391 Use_operands [| Dreg; Dreg; Corereg |], "vmla_n",
1392 sign_invar_io, [S16; S32; U16; U32; F32];
1393 Vmla_n, [InfoWord;
1394 Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]],
1395 Use_operands [| Qreg; Qreg; Corereg |], "vmlaQ_n",
1396 sign_invar_io, [S16; S32; U16; U32; F32];
1397 Vmla_n, [], Wide_scalar, "vmlal_n", elts_same_io, [S16; S32; U16; U32];
1398 Vmla_n, [Saturating; Doubling], Wide_scalar, "vqdmlal_n", elts_same_io,
1399 [S16; S32];
1401 (* Vector multiply subtract by scalar. *)
1402 Vmls_n, [InfoWord;
1403 Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]],
1404 Use_operands [| Dreg; Dreg; Corereg |], "vmls_n",
1405 sign_invar_io, [S16; S32; U16; U32; F32];
1406 Vmls_n, [InfoWord;
1407 Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]],
1408 Use_operands [| Qreg; Qreg; Corereg |], "vmlsQ_n",
1409 sign_invar_io, [S16; S32; U16; U32; F32];
1410 Vmls_n, [], Wide_scalar, "vmlsl_n", elts_same_io, [S16; S32; U16; U32];
1411 Vmls_n, [Saturating; Doubling], Wide_scalar, "vqdmlsl_n", elts_same_io,
1412 [S16; S32];
1414 (* Vector extract. *)
1415 Vext, [Const_valuator (fun _ -> 0)],
1416 Use_operands [| Dreg; Dreg; Dreg; Immed |], "vext", extend,
1417 pf_su_8_64;
1418 Vext, [Const_valuator (fun _ -> 0)],
1419 Use_operands [| Qreg; Qreg; Qreg; Immed |], "vextQ", extend,
1420 pf_su_8_64;
1422 (* Reverse elements. *)
1423 Vrev64, [Use_shuffle (rev_elems 64)], All (2, Dreg), "vrev64", bits_1,
1424 P8 :: P16 :: F32 :: su_8_32;
1425 Vrev64, [Use_shuffle (rev_elems 64)], All (2, Qreg), "vrev64Q", bits_1,
1426 P8 :: P16 :: F32 :: su_8_32;
1427 Vrev32, [Use_shuffle (rev_elems 32)], All (2, Dreg), "vrev32", bits_1,
1428 [P8; P16; S8; U8; S16; U16];
1429 Vrev32, [Use_shuffle (rev_elems 32)], All (2, Qreg), "vrev32Q", bits_1,
1430 [P8; P16; S8; U8; S16; U16];
1431 Vrev16, [Use_shuffle (rev_elems 16)], All (2, Dreg), "vrev16", bits_1,
1432 [P8; S8; U8];
1433 Vrev16, [Use_shuffle (rev_elems 16)], All (2, Qreg), "vrev16Q", bits_1,
1434 [P8; S8; U8];
1436 (* Bit selection. *)
1437 Vbsl,
1438 [Instruction_name ["vbsl"; "vbit"; "vbif"];
1439 Disassembles_as [Use_operands [| Dreg; Dreg; Dreg |]]],
1440 Use_operands [| Dreg; Dreg; Dreg; Dreg |], "vbsl", bit_select,
1441 pf_su_8_64;
1442 Vbsl,
1443 [Instruction_name ["vbsl"; "vbit"; "vbif"];
1444 Disassembles_as [Use_operands [| Qreg; Qreg; Qreg |]]],
1445 Use_operands [| Qreg; Qreg; Qreg; Qreg |], "vbslQ", bit_select,
1446 pf_su_8_64;
1448 Vtrn, [Use_shuffle trn_elems], Pair_result Dreg, "vtrn", bits_2, pf_su_8_16;
1449 Vtrn, [Use_shuffle trn_elems; Instruction_name ["vuzp"]], Pair_result Dreg, "vtrn", bits_2, suf_32;
1450 Vtrn, [Use_shuffle trn_elems], Pair_result Qreg, "vtrnQ", bits_2, pf_su_8_32;
1451 (* Zip elements. *)
1452 Vzip, [Use_shuffle zip_elems], Pair_result Dreg, "vzip", bits_2, pf_su_8_16;
1453 Vzip, [Use_shuffle zip_elems; Instruction_name ["vuzp"]], Pair_result Dreg, "vzip", bits_2, suf_32;
1454 Vzip, [Use_shuffle zip_elems], Pair_result Qreg, "vzipQ", bits_2, pf_su_8_32;
1456 (* Unzip elements. *)
1457 Vuzp, [Use_shuffle uzip_elems], Pair_result Dreg, "vuzp", bits_2,
1458 pf_su_8_32;
1459 Vuzp, [Use_shuffle uzip_elems], Pair_result Qreg, "vuzpQ", bits_2,
1460 pf_su_8_32;
1462 (* Element/structure loads. VLD1 variants. *)
1463 Vldx 1,
1464 [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1465 CstPtrTo Corereg |]]],
1466 Use_operands [| Dreg; CstPtrTo Corereg |], "vld1", bits_1,
1467 pf_su_8_64;
1468 Vldx 1, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1469 CstPtrTo Corereg |]]],
1470 Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q", bits_1,
1471 pf_su_8_64;
1473 Vldx_lane 1,
1474 [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg);
1475 CstPtrTo Corereg |]]],
1476 Use_operands [| Dreg; CstPtrTo Corereg; Dreg; Immed |],
1477 "vld1_lane", bits_3, pf_su_8_32;
1478 Vldx_lane 1,
1479 [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1480 CstPtrTo Corereg |]];
1481 Const_valuator (fun _ -> 0)],
1482 Use_operands [| Dreg; CstPtrTo Corereg; Dreg; Immed |],
1483 "vld1_lane", bits_3, [S64; U64];
1484 Vldx_lane 1,
1485 [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg);
1486 CstPtrTo Corereg |]]],
1487 Use_operands [| Qreg; CstPtrTo Corereg; Qreg; Immed |],
1488 "vld1Q_lane", bits_3, pf_su_8_32;
1489 Vldx_lane 1,
1490 [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1491 CstPtrTo Corereg |]]],
1492 Use_operands [| Qreg; CstPtrTo Corereg; Qreg; Immed |],
1493 "vld1Q_lane", bits_3, [S64; U64];
1495 Vldx_dup 1,
1496 [Disassembles_as [Use_operands [| VecArray (1, All_elements_of_dreg);
1497 CstPtrTo Corereg |]]],
1498 Use_operands [| Dreg; CstPtrTo Corereg |], "vld1_dup",
1499 bits_1, pf_su_8_32;
1500 Vldx_dup 1,
1501 [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1502 CstPtrTo Corereg |]]],
1503 Use_operands [| Dreg; CstPtrTo Corereg |], "vld1_dup",
1504 bits_1, [S64; U64];
1505 Vldx_dup 1,
1506 [Disassembles_as [Use_operands [| VecArray (2, All_elements_of_dreg);
1507 CstPtrTo Corereg |]]],
1508 Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q_dup",
1509 bits_1, pf_su_8_32;
1510 (* Treated identically to vld1_dup above as we now
1511 do a single load followed by a duplicate. *)
1512 Vldx_dup 1,
1513 [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1514 CstPtrTo Corereg |]]],
1515 Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q_dup",
1516 bits_1, [S64; U64];
1518 (* VST1 variants. *)
1519 Vstx 1, [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1520 PtrTo Corereg |]]],
1521 Use_operands [| PtrTo Corereg; Dreg |], "vst1",
1522 store_1, pf_su_8_64;
1523 Vstx 1, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1524 PtrTo Corereg |]]],
1525 Use_operands [| PtrTo Corereg; Qreg |], "vst1Q",
1526 store_1, pf_su_8_64;
1528 Vstx_lane 1,
1529 [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg);
1530 CstPtrTo Corereg |]]],
1531 Use_operands [| PtrTo Corereg; Dreg; Immed |],
1532 "vst1_lane", store_3, pf_su_8_32;
1533 Vstx_lane 1,
1534 [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1535 CstPtrTo Corereg |]];
1536 Const_valuator (fun _ -> 0)],
1537 Use_operands [| PtrTo Corereg; Dreg; Immed |],
1538 "vst1_lane", store_3, [U64; S64];
1539 Vstx_lane 1,
1540 [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg);
1541 CstPtrTo Corereg |]]],
1542 Use_operands [| PtrTo Corereg; Qreg; Immed |],
1543 "vst1Q_lane", store_3, pf_su_8_32;
1544 Vstx_lane 1,
1545 [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1546 CstPtrTo Corereg |]]],
1547 Use_operands [| PtrTo Corereg; Qreg; Immed |],
1548 "vst1Q_lane", store_3, [U64; S64];
1550 (* VLD2 variants. *)
1551 Vldx 2, [], Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |],
1552 "vld2", bits_1, pf_su_8_32;
1553 Vldx 2, [Instruction_name ["vld1"]],
1554 Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |],
1555 "vld2", bits_1, [S64; U64];
1556 Vldx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1557 CstPtrTo Corereg |];
1558 Use_operands [| VecArray (2, Dreg);
1559 CstPtrTo Corereg |]]],
1560 Use_operands [| VecArray (2, Qreg); CstPtrTo Corereg |],
1561 "vld2Q", bits_1, pf_su_8_32;
1563 Vldx_lane 2,
1564 [Disassembles_as [Use_operands
1565 [| VecArray (2, Element_of_dreg);
1566 CstPtrTo Corereg |]]],
1567 Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg;
1568 VecArray (2, Dreg); Immed |],
1569 "vld2_lane", bits_3, P8 :: P16 :: F32 :: su_8_32;
1570 Vldx_lane 2,
1571 [Disassembles_as [Use_operands
1572 [| VecArray (2, Element_of_dreg);
1573 CstPtrTo Corereg |]]],
1574 Use_operands [| VecArray (2, Qreg); CstPtrTo Corereg;
1575 VecArray (2, Qreg); Immed |],
1576 "vld2Q_lane", bits_3, [P16; F32; U16; U32; S16; S32];
1578 Vldx_dup 2,
1579 [Disassembles_as [Use_operands
1580 [| VecArray (2, All_elements_of_dreg); CstPtrTo Corereg |]]],
1581 Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |],
1582 "vld2_dup", bits_1, pf_su_8_32;
1583 Vldx_dup 2,
1584 [Instruction_name ["vld1"]; Disassembles_as [Use_operands
1585 [| VecArray (2, Dreg); CstPtrTo Corereg |]]],
1586 Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |],
1587 "vld2_dup", bits_1, [S64; U64];
1589 (* VST2 variants. *)
1590 Vstx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1591 PtrTo Corereg |]]],
1592 Use_operands [| PtrTo Corereg; VecArray (2, Dreg) |], "vst2",
1593 store_1, pf_su_8_32;
1594 Vstx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1595 PtrTo Corereg |]];
1596 Instruction_name ["vst1"]],
1597 Use_operands [| PtrTo Corereg; VecArray (2, Dreg) |], "vst2",
1598 store_1, [S64; U64];
1599 Vstx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1600 PtrTo Corereg |];
1601 Use_operands [| VecArray (2, Dreg);
1602 PtrTo Corereg |]]],
1603 Use_operands [| PtrTo Corereg; VecArray (2, Qreg) |], "vst2Q",
1604 store_1, pf_su_8_32;
1606 Vstx_lane 2,
1607 [Disassembles_as [Use_operands
1608 [| VecArray (2, Element_of_dreg);
1609 CstPtrTo Corereg |]]],
1610 Use_operands [| PtrTo Corereg; VecArray (2, Dreg); Immed |], "vst2_lane",
1611 store_3, P8 :: P16 :: F32 :: su_8_32;
1612 Vstx_lane 2,
1613 [Disassembles_as [Use_operands
1614 [| VecArray (2, Element_of_dreg);
1615 CstPtrTo Corereg |]]],
1616 Use_operands [| PtrTo Corereg; VecArray (2, Qreg); Immed |], "vst2Q_lane",
1617 store_3, [P16; F32; U16; U32; S16; S32];
1619 (* VLD3 variants. *)
1620 Vldx 3, [], Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |],
1621 "vld3", bits_1, pf_su_8_32;
1622 Vldx 3, [Instruction_name ["vld1"]],
1623 Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |],
1624 "vld3", bits_1, [S64; U64];
1625 Vldx 3, [Disassembles_as [Use_operands [| VecArray (3, Dreg);
1626 CstPtrTo Corereg |];
1627 Use_operands [| VecArray (3, Dreg);
1628 CstPtrTo Corereg |]]],
1629 Use_operands [| VecArray (3, Qreg); CstPtrTo Corereg |],
1630 "vld3Q", bits_1, P8 :: P16 :: F32 :: su_8_32;
1632 Vldx_lane 3,
1633 [Disassembles_as [Use_operands
1634 [| VecArray (3, Element_of_dreg);
1635 CstPtrTo Corereg |]]],
1636 Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg;
1637 VecArray (3, Dreg); Immed |],
1638 "vld3_lane", bits_3, P8 :: P16 :: F32 :: su_8_32;
1639 Vldx_lane 3,
1640 [Disassembles_as [Use_operands
1641 [| VecArray (3, Element_of_dreg);
1642 CstPtrTo Corereg |]]],
1643 Use_operands [| VecArray (3, Qreg); CstPtrTo Corereg;
1644 VecArray (3, Qreg); Immed |],
1645 "vld3Q_lane", bits_3, [P16; F32; U16; U32; S16; S32];
1647 Vldx_dup 3,
1648 [Disassembles_as [Use_operands
1649 [| VecArray (3, All_elements_of_dreg); CstPtrTo Corereg |]]],
1650 Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |],
1651 "vld3_dup", bits_1, pf_su_8_32;
1652 Vldx_dup 3,
1653 [Instruction_name ["vld1"]; Disassembles_as [Use_operands
1654 [| VecArray (3, Dreg); CstPtrTo Corereg |]]],
1655 Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |],
1656 "vld3_dup", bits_1, [S64; U64];
1658 (* VST3 variants. *)
1659 Vstx 3, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
1660 PtrTo Corereg |]]],
1661 Use_operands [| PtrTo Corereg; VecArray (3, Dreg) |], "vst3",
1662 store_1, pf_su_8_32;
1663 Vstx 3, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
1664 PtrTo Corereg |]];
1665 Instruction_name ["vst1"]],
1666 Use_operands [| PtrTo Corereg; VecArray (3, Dreg) |], "vst3",
1667 store_1, [S64; U64];
1668 Vstx 3, [Disassembles_as [Use_operands [| VecArray (3, Dreg);
1669 PtrTo Corereg |];
1670 Use_operands [| VecArray (3, Dreg);
1671 PtrTo Corereg |]]],
1672 Use_operands [| PtrTo Corereg; VecArray (3, Qreg) |], "vst3Q",
1673 store_1, pf_su_8_32;
1675 Vstx_lane 3,
1676 [Disassembles_as [Use_operands
1677 [| VecArray (3, Element_of_dreg);
1678 CstPtrTo Corereg |]]],
1679 Use_operands [| PtrTo Corereg; VecArray (3, Dreg); Immed |], "vst3_lane",
1680 store_3, P8 :: P16 :: F32 :: su_8_32;
1681 Vstx_lane 3,
1682 [Disassembles_as [Use_operands
1683 [| VecArray (3, Element_of_dreg);
1684 CstPtrTo Corereg |]]],
1685 Use_operands [| PtrTo Corereg; VecArray (3, Qreg); Immed |], "vst3Q_lane",
1686 store_3, [P16; F32; U16; U32; S16; S32];
1688 (* VLD4/VST4 variants. *)
1689 Vldx 4, [], Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |],
1690 "vld4", bits_1, pf_su_8_32;
1691 Vldx 4, [Instruction_name ["vld1"]],
1692 Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |],
1693 "vld4", bits_1, [S64; U64];
1694 Vldx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
1695 CstPtrTo Corereg |];
1696 Use_operands [| VecArray (4, Dreg);
1697 CstPtrTo Corereg |]]],
1698 Use_operands [| VecArray (4, Qreg); CstPtrTo Corereg |],
1699 "vld4Q", bits_1, P8 :: P16 :: F32 :: su_8_32;
1701 Vldx_lane 4,
1702 [Disassembles_as [Use_operands
1703 [| VecArray (4, Element_of_dreg);
1704 CstPtrTo Corereg |]]],
1705 Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg;
1706 VecArray (4, Dreg); Immed |],
1707 "vld4_lane", bits_3, P8 :: P16 :: F32 :: su_8_32;
1708 Vldx_lane 4,
1709 [Disassembles_as [Use_operands
1710 [| VecArray (4, Element_of_dreg);
1711 CstPtrTo Corereg |]]],
1712 Use_operands [| VecArray (4, Qreg); CstPtrTo Corereg;
1713 VecArray (4, Qreg); Immed |],
1714 "vld4Q_lane", bits_3, [P16; F32; U16; U32; S16; S32];
1716 Vldx_dup 4,
1717 [Disassembles_as [Use_operands
1718 [| VecArray (4, All_elements_of_dreg); CstPtrTo Corereg |]]],
1719 Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |],
1720 "vld4_dup", bits_1, pf_su_8_32;
1721 Vldx_dup 4,
1722 [Instruction_name ["vld1"]; Disassembles_as [Use_operands
1723 [| VecArray (4, Dreg); CstPtrTo Corereg |]]],
1724 Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |],
1725 "vld4_dup", bits_1, [S64; U64];
1727 Vstx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
1728 PtrTo Corereg |]]],
1729 Use_operands [| PtrTo Corereg; VecArray (4, Dreg) |], "vst4",
1730 store_1, pf_su_8_32;
1731 Vstx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
1732 PtrTo Corereg |]];
1733 Instruction_name ["vst1"]],
1734 Use_operands [| PtrTo Corereg; VecArray (4, Dreg) |], "vst4",
1735 store_1, [S64; U64];
1736 Vstx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
1737 PtrTo Corereg |];
1738 Use_operands [| VecArray (4, Dreg);
1739 PtrTo Corereg |]]],
1740 Use_operands [| PtrTo Corereg; VecArray (4, Qreg) |], "vst4Q",
1741 store_1, pf_su_8_32;
1743 Vstx_lane 4,
1744 [Disassembles_as [Use_operands
1745 [| VecArray (4, Element_of_dreg);
1746 CstPtrTo Corereg |]]],
1747 Use_operands [| PtrTo Corereg; VecArray (4, Dreg); Immed |], "vst4_lane",
1748 store_3, P8 :: P16 :: F32 :: su_8_32;
1749 Vstx_lane 4,
1750 [Disassembles_as [Use_operands
1751 [| VecArray (4, Element_of_dreg);
1752 CstPtrTo Corereg |]]],
1753 Use_operands [| PtrTo Corereg; VecArray (4, Qreg); Immed |], "vst4Q_lane",
1754 store_3, [P16; F32; U16; U32; S16; S32];
1756 (* Logical operations. And. *)
1757 Vand, [], All (3, Dreg), "vand", notype_2, su_8_32;
1758 Vand, [No_op], All (3, Dreg), "vand", notype_2, [S64; U64];
1759 Vand, [], All (3, Qreg), "vandQ", notype_2, su_8_64;
1761 (* Or. *)
1762 Vorr, [], All (3, Dreg), "vorr", notype_2, su_8_32;
1763 Vorr, [No_op], All (3, Dreg), "vorr", notype_2, [S64; U64];
1764 Vorr, [], All (3, Qreg), "vorrQ", notype_2, su_8_64;
1766 (* Eor. *)
1767 Veor, [], All (3, Dreg), "veor", notype_2, su_8_32;
1768 Veor, [No_op], All (3, Dreg), "veor", notype_2, [S64; U64];
1769 Veor, [], All (3, Qreg), "veorQ", notype_2, su_8_64;
1771 (* Bic (And-not). *)
1772 Vbic, [], All (3, Dreg), "vbic", notype_2, su_8_32;
1773 Vbic, [No_op], All (3, Dreg), "vbic", notype_2, [S64; U64];
1774 Vbic, [], All (3, Qreg), "vbicQ", notype_2, su_8_64;
1776 (* Or-not. *)
1777 Vorn, [], All (3, Dreg), "vorn", notype_2, su_8_32;
1778 Vorn, [No_op], All (3, Dreg), "vorn", notype_2, [S64; U64];
1779 Vorn, [], All (3, Qreg), "vornQ", notype_2, su_8_64;
1782 let reinterp =
1783 let elems = P8 :: P16 :: F32 :: su_8_64 in
1784 List.fold_right
1785 (fun convto acc ->
1786 let types = List.fold_right
1787 (fun convfrom acc ->
1788 if convfrom <> convto then
1789 Cast (convto, convfrom) :: acc
1790 else
1791 acc)
1792 elems
1795 let dconv = Vreinterp, [No_op], Use_operands [| Dreg; Dreg |],
1796 "vreinterpret", conv_1, types
1797 and qconv = Vreinterp, [No_op], Use_operands [| Qreg; Qreg |],
1798 "vreinterpretQ", conv_1, types in
1799 dconv :: qconv :: acc)
1800 elems
1803 (* Output routines. *)
1805 let rec string_of_elt = function
1806 S8 -> "s8" | S16 -> "s16" | S32 -> "s32" | S64 -> "s64"
1807 | U8 -> "u8" | U16 -> "u16" | U32 -> "u32" | U64 -> "u64"
1808 | I8 -> "i8" | I16 -> "i16" | I32 -> "i32" | I64 -> "i64"
1809 | B8 -> "8" | B16 -> "16" | B32 -> "32" | B64 -> "64"
1810 | F16 -> "f16" | F32 -> "f32" | P8 -> "p8" | P16 -> "p16"
1811 | Conv (a, b) | Cast (a, b) -> string_of_elt a ^ "_" ^ string_of_elt b
1812 | NoElts -> failwith "No elts"
1814 let string_of_elt_dots elt =
1815 match elt with
1816 Conv (a, b) | Cast (a, b) -> string_of_elt a ^ "." ^ string_of_elt b
1817 | _ -> string_of_elt elt
1819 let string_of_vectype vt =
1820 let rec name affix = function
1821 T_int8x8 -> affix "int8x8"
1822 | T_int8x16 -> affix "int8x16"
1823 | T_int16x4 -> affix "int16x4"
1824 | T_int16x8 -> affix "int16x8"
1825 | T_int32x2 -> affix "int32x2"
1826 | T_int32x4 -> affix "int32x4"
1827 | T_int64x1 -> affix "int64x1"
1828 | T_int64x2 -> affix "int64x2"
1829 | T_uint8x8 -> affix "uint8x8"
1830 | T_uint8x16 -> affix "uint8x16"
1831 | T_uint16x4 -> affix "uint16x4"
1832 | T_uint16x8 -> affix "uint16x8"
1833 | T_uint32x2 -> affix "uint32x2"
1834 | T_uint32x4 -> affix "uint32x4"
1835 | T_uint64x1 -> affix "uint64x1"
1836 | T_uint64x2 -> affix "uint64x2"
1837 | T_float16x4 -> affix "float16x4"
1838 | T_float32x2 -> affix "float32x2"
1839 | T_float32x4 -> affix "float32x4"
1840 | T_poly8x8 -> affix "poly8x8"
1841 | T_poly8x16 -> affix "poly8x16"
1842 | T_poly16x4 -> affix "poly16x4"
1843 | T_poly16x8 -> affix "poly16x8"
1844 | T_int8 -> affix "int8"
1845 | T_int16 -> affix "int16"
1846 | T_int32 -> affix "int32"
1847 | T_int64 -> affix "int64"
1848 | T_uint8 -> affix "uint8"
1849 | T_uint16 -> affix "uint16"
1850 | T_uint32 -> affix "uint32"
1851 | T_uint64 -> affix "uint64"
1852 | T_poly8 -> affix "poly8"
1853 | T_poly16 -> affix "poly16"
1854 | T_float16 -> affix "float16"
1855 | T_float32 -> affix "float32"
1856 | T_immediate _ -> "const int"
1857 | T_void -> "void"
1858 | T_intQI -> "__builtin_neon_qi"
1859 | T_intHI -> "__builtin_neon_hi"
1860 | T_intSI -> "__builtin_neon_si"
1861 | T_intDI -> "__builtin_neon_di"
1862 | T_floatHF -> "__builtin_neon_hf"
1863 | T_floatSF -> "__builtin_neon_sf"
1864 | T_arrayof (num, base) ->
1865 let basename = name (fun x -> x) base in
1866 affix (Printf.sprintf "%sx%d" basename num)
1867 | T_ptrto x ->
1868 let basename = name affix x in
1869 Printf.sprintf "%s *" basename
1870 | T_const x ->
1871 let basename = name affix x in
1872 Printf.sprintf "const %s" basename
1874 name (fun x -> x ^ "_t") vt
1876 let string_of_inttype = function
1877 B_TImode -> "__builtin_neon_ti"
1878 | B_EImode -> "__builtin_neon_ei"
1879 | B_OImode -> "__builtin_neon_oi"
1880 | B_CImode -> "__builtin_neon_ci"
1881 | B_XImode -> "__builtin_neon_xi"
1883 let string_of_mode = function
1884 V8QI -> "v8qi" | V4HI -> "v4hi" | V4HF -> "v4hf" | V2SI -> "v2si"
1885 | V2SF -> "v2sf" | DI -> "di" | V16QI -> "v16qi" | V8HI -> "v8hi"
1886 | V4SI -> "v4si" | V4SF -> "v4sf" | V2DI -> "v2di" | QI -> "qi"
1887 | HI -> "hi" | SI -> "si" | SF -> "sf"
1889 (* Use uppercase chars for letters which form part of the intrinsic name, but
1890 should be omitted from the builtin name (the info is passed in an extra
1891 argument, instead). *)
1892 let intrinsic_name name = String.lowercase name
1894 (* Allow the name of the builtin to be overridden by things (e.g. Flipped)
1895 found in the features list. *)
1896 let builtin_name features name =
1897 let name = List.fold_right
1898 (fun el name ->
1899 match el with
1900 Flipped x | Builtin_name x -> x
1901 | _ -> name)
1902 features name in
1903 let islower x = let str = String.make 1 x in (String.lowercase str) = str
1904 and buf = Buffer.create (String.length name) in
1905 String.iter (fun c -> if islower c then Buffer.add_char buf c) name;
1906 Buffer.contents buf
1908 (* Transform an arity into a list of strings. *)
1909 let strings_of_arity a =
1910 match a with
1911 | Arity0 vt -> [string_of_vectype vt]
1912 | Arity1 (vt1, vt2) -> [string_of_vectype vt1; string_of_vectype vt2]
1913 | Arity2 (vt1, vt2, vt3) -> [string_of_vectype vt1;
1914 string_of_vectype vt2;
1915 string_of_vectype vt3]
1916 | Arity3 (vt1, vt2, vt3, vt4) -> [string_of_vectype vt1;
1917 string_of_vectype vt2;
1918 string_of_vectype vt3;
1919 string_of_vectype vt4]
1920 | Arity4 (vt1, vt2, vt3, vt4, vt5) -> [string_of_vectype vt1;
1921 string_of_vectype vt2;
1922 string_of_vectype vt3;
1923 string_of_vectype vt4;
1924 string_of_vectype vt5]
1926 (* Suffixes on the end of builtin names that are to be stripped in order
1927 to obtain the name used as an instruction. They are only stripped if
1928 preceded immediately by an underscore. *)
1929 let suffixes_to_strip = [ "n"; "lane"; "dup" ]
1931 (* Get the possible names of an instruction corresponding to a "name" from the
1932 ops table. This is done by getting the equivalent builtin name and
1933 stripping any suffixes from the list at the top of this file, unless
1934 the features list presents with an Instruction_name entry, in which
1935 case that is used; or unless the features list presents with a Flipped
1936 entry, in which case that is used. If both such entries are present,
1937 the first in the list will be chosen. *)
1938 let get_insn_names features name =
1939 let names = try
1940 begin
1941 match List.find (fun feature -> match feature with
1942 Instruction_name _ -> true
1943 | Flipped _ -> true
1944 | _ -> false) features
1945 with
1946 Instruction_name names -> names
1947 | Flipped name -> [name]
1948 | _ -> assert false
1950 with Not_found -> [builtin_name features name]
1952 begin
1953 List.map (fun name' ->
1955 let underscore = String.rindex name' '_' in
1956 let our_suffix = String.sub name' (underscore + 1)
1957 ((String.length name') - underscore - 1)
1959 let rec strip remaining_suffixes =
1960 match remaining_suffixes with
1961 [] -> name'
1962 | s::ss when our_suffix = s -> String.sub name' 0 underscore
1963 | _::ss -> strip ss
1965 strip suffixes_to_strip
1966 with (Not_found | Invalid_argument _) -> name') names
1969 (* Apply a function to each element of a list and then comma-separate
1970 the resulting strings. *)
1971 let rec commas f elts acc =
1972 match elts with
1973 [] -> acc
1974 | [elt] -> acc ^ (f elt)
1975 | elt::elts ->
1976 commas f elts (acc ^ (f elt) ^ ", ")
1978 (* Given a list of features and the shape specified in the "ops" table, apply
1979 a function to each possible shape that the instruction may have.
1980 By default, this is the "shape" entry in "ops". If the features list
1981 contains a Disassembles_as entry, the shapes contained in that entry are
1982 mapped to corresponding outputs and returned in a list. If there is more
1983 than one Disassembles_as entry, only the first is used. *)
1984 let analyze_all_shapes features shape f =
1986 match List.find (fun feature ->
1987 match feature with Disassembles_as _ -> true
1988 | _ -> false)
1989 features with
1990 Disassembles_as shapes -> List.map f shapes
1991 | _ -> assert false
1992 with Not_found -> [f shape]