1 (* Auto-generate ARM Neon intrinsics header file.
2 Copyright (C) 2006, 2007, 2009 Free Software Foundation, Inc.
3 Contributed by CodeSourcery.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>.
21 This is an O'Caml program. The O'Caml compiler is available from:
25 Or from your favourite OS's friendly packaging system. Tested with version
26 3.09.2, though other versions will probably work too.
30 ocamlc -o neon-gen neon.cmo neon-gen.ml
33 ./neon-gen > arm_neon.h
38 (* The format codes used in the following functions are documented at:
39 http://caml.inria.fr/pub/docs/manual-ocaml/libref/Format.html\
40 #6_printflikefunctionsforprettyprinting
41 (one line, remove the backslash.)
44 (* Following functions can be used to approximate GNU indentation style. *)
45 let start_function () =
46 Format.printf
"@[<v 0>";
49 let end_function nesting
=
51 0 -> Format.printf
"@;@;@]"
52 | _
-> failwith
("Bad nesting (ending function at level "
53 ^
(string_of_int
!nesting
) ^
")")
55 let open_braceblock nesting
=
56 begin match !nesting
with
57 0 -> Format.printf
"@,@<0>{@[<v 2>@,"
58 | _
-> Format.printf
"@,@[<v 2> @<0>{@[<v 2>@,"
62 let close_braceblock nesting
=
65 0 -> Format.printf
"@]@,@<0>}"
66 | _
-> Format.printf
"@]@,@<0>}@]"
68 let print_function arity fnname body
=
69 let ffmt = start_function () in
70 Format.printf
"__extension__ static __inline ";
71 let inl = "__attribute__ ((__always_inline__))" in
72 begin match arity
with
74 Format.printf
"%s %s@,%s (void)" (string_of_vectype ret
) inl fnname
75 | Arity1
(ret
, arg0
) ->
76 Format.printf
"%s %s@,%s (%s __a)" (string_of_vectype ret
) inl fnname
77 (string_of_vectype arg0
)
78 | Arity2
(ret
, arg0
, arg1
) ->
79 Format.printf
"%s %s@,%s (%s __a, %s __b)"
80 (string_of_vectype ret
) inl fnname
(string_of_vectype arg0
)
81 (string_of_vectype arg1
)
82 | Arity3
(ret
, arg0
, arg1
, arg2
) ->
83 Format.printf
"%s %s@,%s (%s __a, %s __b, %s __c)"
84 (string_of_vectype ret
) inl fnname
(string_of_vectype arg0
)
85 (string_of_vectype arg1
) (string_of_vectype arg2
)
86 | Arity4
(ret
, arg0
, arg1
, arg2
, arg3
) ->
87 Format.printf
"%s %s@,%s (%s __a, %s __b, %s __c, %s __d)"
88 (string_of_vectype ret
) inl fnname
(string_of_vectype arg0
)
89 (string_of_vectype arg1
) (string_of_vectype arg2
)
90 (string_of_vectype arg3
)
93 let rec print_lines = function
95 | [line
] -> Format.printf
"%s" line
96 | line
::lines
-> Format.printf
"%s@," line
; print_lines lines
in
98 close_braceblock ffmt;
101 let return_by_ptr features
= List.mem ReturnPtr features
103 let union_string num elts base
=
104 let itype = inttype_for_array num elts
in
105 let iname = string_of_inttype
itype
106 and sname
= string_of_vectype
(T_arrayof
(num
, elts
)) in
107 Printf.sprintf
"union { %s __i; %s __o; } %s" sname
iname base
109 let rec signed_ctype = function
110 T_uint8x8
| T_poly8x8
-> T_int8x8
111 | T_uint8x16
| T_poly8x16
-> T_int8x16
112 | T_uint16x4
| T_poly16x4
-> T_int16x4
113 | T_uint16x8
| T_poly16x8
-> T_int16x8
114 | T_uint32x2
-> T_int32x2
115 | T_uint32x4
-> T_int32x4
116 | T_uint64x1
-> T_int64x1
117 | T_uint64x2
-> T_int64x2
118 (* Cast to types defined by mode in arm.c, not random types pulled in from
119 the <stdint.h> header in use. This fixes incompatible pointer errors when
120 compiling with C++. *)
121 | T_uint8
| T_int8
-> T_intQI
122 | T_uint16
| T_int16
-> T_intHI
123 | T_uint32
| T_int32
-> T_intSI
124 | T_uint64
| T_int64
-> T_intDI
125 | T_float32
-> T_floatSF
127 | T_poly16
-> T_intHI
128 | T_arrayof
(n
, elt
) -> T_arrayof
(n
, signed_ctype elt
)
129 | T_ptrto elt
-> T_ptrto
(signed_ctype elt
)
130 | T_const elt
-> T_const
(signed_ctype elt
)
133 let add_cast ctype cval
=
134 let stype = signed_ctype ctype
in
135 if ctype
<> stype then
136 Printf.sprintf
"(%s) %s" (string_of_vectype
stype) cval
140 let cast_for_return to_ty
= "(" ^
(string_of_vectype to_ty
) ^
")"
142 (* Return a tuple of a list of declarations to go at the start of the function,
143 and a list of statements needed to return THING. *)
144 let return arity
return_by_ptr thing
=
146 Arity0
(ret
) | Arity1
(ret
, _
) | Arity2
(ret
, _
, _
) | Arity3
(ret
, _
, _
, _
)
147 | Arity4
(ret
, _
, _
, _
, _
) ->
149 T_arrayof
(num
, vec
) ->
150 if return_by_ptr then
151 let sname = string_of_vectype ret
in
152 [Printf.sprintf
"%s __rv;" sname],
153 [thing ^
";"; "return __rv;"]
155 let uname = union_string num vec
"__rv" in
156 [uname ^
";"], ["__rv.__o = " ^ thing ^
";"; "return __rv.__i;"]
157 | T_void
-> [], [thing ^
";"]
159 [], ["return " ^
(cast_for_return ret
) ^ thing ^
";"]
161 let rec element_type ctype
=
163 T_arrayof
(_
, v
) -> element_type v
166 let params return_by_ptr ps
=
167 let pdecls = ref [] in
170 T_arrayof
(num
, elts
) ->
171 let uname = union_string num elts
(p ^
"u") in
172 let decl = Printf.sprintf
"%s = { %s };" uname p
in
173 pdecls := decl :: !pdecls;
175 | _
-> add_cast t p
in
176 let plist = match ps
with
178 | Arity1
(_
, t1
) -> [ptype t1
"__a"]
179 | Arity2
(_
, t1
, t2
) -> [ptype t1
"__a"; ptype t2
"__b"]
180 | Arity3
(_
, t1
, t2
, t3
) -> [ptype t1
"__a"; ptype t2
"__b"; ptype t3
"__c"]
181 | Arity4
(_
, t1
, t2
, t3
, t4
) ->
182 [ptype t1
"__a"; ptype t2
"__b"; ptype t3
"__c"; ptype t4
"__d"] in
184 Arity0 ret
| Arity1
(ret
, _
) | Arity2
(ret
, _
, _
) | Arity3
(ret
, _
, _
, _
)
185 | Arity4
(ret
, _
, _
, _
, _
) ->
186 if return_by_ptr then
187 !pdecls, add_cast (T_ptrto
(element_type ret
)) "&__rv.val[0]" :: plist
191 let modify_params features
plist =
193 List.exists
(function Flipped _
-> true | _
-> false) features
in
198 failwith
("Don't know how to flip args " ^
(String.concat
", " plist))
202 (* !!! Decide whether to add an extra information word based on the shape
204 let extra_word shape features paramlist bits
=
207 All _
| Long
| Long_noreg _
| Wide
| Wide_noreg _
| Narrow
208 | By_scalar _
| Wide_scalar
| Wide_lane
| Binary_imm _
| Long_imm
210 | _
-> List.mem InfoWord features
213 paramlist
@ [string_of_int bits
]
217 (* Bit 0 represents signed (1) vs unsigned (0), or float (1) vs poly (0).
218 Bit 1 represents floats & polynomials (1), or ordinary integers (0).
219 Bit 2 represents rounding (1) vs none (0). *)
220 let infoword_value elttype features
=
222 match elt_class elttype
with
223 Signed
| ConvClass
(Signed
, _
) | ConvClass
(_
, Signed
) -> 0b001
227 and rounding_bit
= if List.mem Rounding features
then 0b100 else 0b000 in
228 bits01 lor rounding_bit
230 (* "Cast" type operations will throw an exception in mode_of_elt (actually in
231 elt_width, called from there). Deal with that here, and generate a suffix
232 with multiple modes (<to><from>). *)
233 let rec mode_suffix elttype shape
=
235 let mode = mode_of_elt elttype shape
in
237 with MixedMode
(dst
, src
) ->
238 let dstmode = mode_of_elt dst shape
239 and srcmode
= mode_of_elt src shape
in
240 string_of_mode
dstmode ^ string_of_mode srcmode
242 let print_variant opcode features shape name
(ctype
, asmtype
, elttype
) =
243 let bits = infoword_value elttype features
in
244 let modesuf = mode_suffix elttype shape
in
245 let return_by_ptr = return_by_ptr features
in
246 let pdecls, paramlist
= params return_by_ptr ctype
in
247 let paramlist'
= modify_params features
paramlist in
248 let paramlist''
= extra_word shape features
paramlist'
bits in
249 let parstr = String.concat
", " paramlist''
in
250 let builtin = Printf.sprintf
"__builtin_neon_%s%s (%s)"
251 (builtin_name features name
) modesuf parstr in
252 let rdecls, stmts
= return ctype
return_by_ptr builtin in
253 let body = pdecls @ rdecls @ stmts
254 and fnname
= (intrinsic_name name
) ^
"_" ^
(string_of_elt elttype
) in
255 print_function ctype fnname
body
257 (* When this function processes the element types in the ops table, it rewrites
258 them in a list of tuples (a,b,c):
259 a : C type as an "arity", e.g. Arity1 (T_poly8x8, T_poly8x8)
260 b : Asm type : a single, processed element type, e.g. P16. This is the
261 type which should be attached to the asm opcode.
262 c : Variant type : the unprocessed type for this variant (e.g. in add
263 instructions which don't care about the sign, b might be i16 and c
267 let print_op (opcode
, features
, shape
, name
, munge
, types
) =
268 let sorted_types = List.sort compare types
in
269 let munged_types = List.map
270 (fun elt
-> let c, asm
= munge shape elt
in c, asm
, elt
) sorted_types in
272 (fun variant
-> print_variant opcode features shape name variant
)
276 List.iter
print_op ops
278 (* Output type definitions. Table entries are:
279 cbase : "C" name for the type.
280 abase : "ARM" base name for the type (i.e. int in int8x8_t).
281 esize : element size.
282 enum : element count.
287 (* Doubleword vector types. *)
288 "__builtin_neon_qi", "int", 8, 8;
289 "__builtin_neon_hi", "int", 16, 4;
290 "__builtin_neon_si", "int", 32, 2;
291 "__builtin_neon_di", "int", 64, 1;
292 "__builtin_neon_sf", "float", 32, 2;
293 "__builtin_neon_poly8", "poly", 8, 8;
294 "__builtin_neon_poly16", "poly", 16, 4;
295 "__builtin_neon_uqi", "uint", 8, 8;
296 "__builtin_neon_uhi", "uint", 16, 4;
297 "__builtin_neon_usi", "uint", 32, 2;
298 "__builtin_neon_udi", "uint", 64, 1;
300 (* Quadword vector types. *)
301 "__builtin_neon_qi", "int", 8, 16;
302 "__builtin_neon_hi", "int", 16, 8;
303 "__builtin_neon_si", "int", 32, 4;
304 "__builtin_neon_di", "int", 64, 2;
305 "__builtin_neon_sf", "float", 32, 4;
306 "__builtin_neon_poly8", "poly", 8, 16;
307 "__builtin_neon_poly16", "poly", 16, 8;
308 "__builtin_neon_uqi", "uint", 8, 16;
309 "__builtin_neon_uhi", "uint", 16, 8;
310 "__builtin_neon_usi", "uint", 32, 4;
311 "__builtin_neon_udi", "uint", 64, 2
314 (fun (cbase
, abase
, esize
, enum
) ->
318 | _
-> Printf.sprintf
"\t__attribute__ ((__vector_size__ (%d)))"
319 (esize
* enum
/ 8) in
320 Format.printf
"typedef %s %s%dx%d_t%s;@\n" cbase abase esize enum
attr)
322 Format.print_newline
();
323 (* Extra types not in <stdint.h>. *)
324 Format.printf
"typedef float float32_t;\n";
325 Format.printf
"typedef __builtin_neon_poly8 poly8_t;\n";
326 Format.printf
"typedef __builtin_neon_poly16 poly16_t;\n"
328 (* Output structs containing arrays, for load & store instructions etc. *)
333 "int", 32; "int", 64;
334 "uint", 8; "uint", 16;
335 "uint", 32; "uint", 64;
336 "float", 32; "poly", 8;
339 let writestruct elname elsize regsize arrsize
=
340 let elnum = regsize
/ elsize
in
342 Printf.sprintf
"%s%dx%dx%d_t" elname elsize
elnum arrsize
in
343 let sfmt = start_function () in
344 Format.printf
"typedef struct %s" structname;
345 open_braceblock sfmt;
346 Format.printf
"%s%dx%d_t val[%d];" elname elsize
elnum arrsize
;
347 close_braceblock sfmt;
348 Format.printf
" %s;" structname;
353 (fun (elname
, elsize
) ->
354 writestruct elname elsize
64 n
;
355 writestruct elname elsize
128 n
)
359 let print_lines = List.iter
(fun s
-> Format.printf
"%s@\n" s
)
365 "/* ARM NEON intrinsics include file. This file is generated automatically";
366 " using neon-gen.ml. Please do not edit manually.";
368 " Copyright (C) 2006, 2007, 2009 Free Software Foundation, Inc.";
369 " Contributed by CodeSourcery.";
371 " This file is part of GCC.";
373 " GCC is free software; you can redistribute it and/or modify it";
374 " under the terms of the GNU General Public License as published";
375 " by the Free Software Foundation; either version 3, or (at your";
376 " option) any later version.";
378 " GCC is distributed in the hope that it will be useful, but WITHOUT";
379 " ANY WARRANTY; without even the implied warranty of MERCHANTABILITY";
380 " or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public";
381 " License for more details.";
383 " Under Section 7 of GPL version 3, you are granted additional";
384 " permissions described in the GCC Runtime Library Exception, version";
385 " 3.1, as published by the Free Software Foundation.";
387 " You should have received a copy of the GNU General Public License and";
388 " a copy of the GCC Runtime Library Exception along with this program;";
389 " see the files COPYING3 and COPYING.RUNTIME respectively. If not, see";
390 " <http://www.gnu.org/licenses/>. */";
392 "#ifndef _GCC_ARM_NEON_H";
393 "#define _GCC_ARM_NEON_H 1";
395 "#ifndef __ARM_NEON__";
396 "#error You must enable NEON instructions (e.g. -mfloat-abi=softfp -mfpu=neon) to use arm_neon.h";
399 "#ifdef __cplusplus";
403 "#include <stdint.h>";
407 Format.print_newline
();
409 Format.print_newline
();
412 "#ifdef __cplusplus";