1 (* Auto-generate ARM Neon intrinsics header file.
2 Copyright (C) 2006, 2007, 2009 Free Software Foundation, Inc.
3 Contributed by CodeSourcery.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>.
21 This is an O'Caml program. The O'Caml compiler is available from:
25 Or from your favourite OS's friendly packaging system. Tested with version
26 3.09.2, though other versions will probably work too.
30 ocamlc -o neon-gen neon.cmo neon-gen.ml
33 ./neon-gen > arm_neon.h
38 (* The format codes used in the following functions are documented at:
39 http://caml.inria.fr/pub/docs/manual-ocaml/libref/Format.html\
40 #6_printflikefunctionsforprettyprinting
41 (one line, remove the backslash.)
44 (* Following functions can be used to approximate GNU indentation style. *)
45 let start_function () =
46 Format.printf
"@[<v 0>";
49 let end_function nesting
=
51 0 -> Format.printf
"@;@;@]"
52 | _
-> failwith
("Bad nesting (ending function at level "
53 ^
(string_of_int
!nesting
) ^
")")
55 let open_braceblock nesting
=
56 begin match !nesting
with
57 0 -> Format.printf
"@,@<0>{@[<v 2>@,"
58 | _
-> Format.printf
"@,@[<v 2> @<0>{@[<v 2>@,"
62 let close_braceblock nesting
=
65 0 -> Format.printf
"@]@,@<0>}"
66 | _
-> Format.printf
"@]@,@<0>}@]"
68 let print_function arity fnname body
=
69 let ffmt = start_function () in
70 Format.printf
"__extension__ static __inline ";
71 let inl = "__attribute__ ((__always_inline__))" in
72 begin match arity
with
74 Format.printf
"%s %s@,%s (void)" (string_of_vectype ret
) inl fnname
75 | Arity1
(ret
, arg0
) ->
76 Format.printf
"%s %s@,%s (%s __a)" (string_of_vectype ret
) inl fnname
77 (string_of_vectype arg0
)
78 | Arity2
(ret
, arg0
, arg1
) ->
79 Format.printf
"%s %s@,%s (%s __a, %s __b)"
80 (string_of_vectype ret
) inl fnname
(string_of_vectype arg0
)
81 (string_of_vectype arg1
)
82 | Arity3
(ret
, arg0
, arg1
, arg2
) ->
83 Format.printf
"%s %s@,%s (%s __a, %s __b, %s __c)"
84 (string_of_vectype ret
) inl fnname
(string_of_vectype arg0
)
85 (string_of_vectype arg1
) (string_of_vectype arg2
)
86 | Arity4
(ret
, arg0
, arg1
, arg2
, arg3
) ->
87 Format.printf
"%s %s@,%s (%s __a, %s __b, %s __c, %s __d)"
88 (string_of_vectype ret
) inl fnname
(string_of_vectype arg0
)
89 (string_of_vectype arg1
) (string_of_vectype arg2
)
90 (string_of_vectype arg3
)
93 let rec print_lines = function
95 | [line
] -> Format.printf
"%s" line
96 | line
::lines
-> Format.printf
"%s@," line
; print_lines lines
in
98 close_braceblock ffmt;
101 let return_by_ptr features
= List.mem ReturnPtr features
103 let union_string num elts base
=
104 let itype = inttype_for_array num elts
in
105 let iname = string_of_inttype
itype
106 and sname
= string_of_vectype
(T_arrayof
(num
, elts
)) in
107 Printf.sprintf
"union { %s __i; %s __o; } %s" sname
iname base
109 let rec signed_ctype = function
110 T_uint8x8
| T_poly8x8
-> T_int8x8
111 | T_uint8x16
| T_poly8x16
-> T_int8x16
112 | T_uint16x4
| T_poly16x4
-> T_int16x4
113 | T_uint16x8
| T_poly16x8
-> T_int16x8
114 | T_uint32x2
-> T_int32x2
115 | T_uint32x4
-> T_int32x4
116 | T_uint64x1
-> T_int64x1
117 | T_uint64x2
-> T_int64x2
118 (* Cast to types defined by mode in arm.c, not random types pulled in from
119 the <stdint.h> header in use. This fixes incompatible pointer errors when
120 compiling with C++. *)
121 | T_uint8
| T_int8
-> T_intQI
122 | T_uint16
| T_int16
-> T_intHI
123 | T_uint32
| T_int32
-> T_intSI
124 | T_uint64
| T_int64
-> T_intDI
126 | T_poly16
-> T_intHI
127 | T_arrayof
(n
, elt
) -> T_arrayof
(n
, signed_ctype elt
)
128 | T_ptrto elt
-> T_ptrto
(signed_ctype elt
)
129 | T_const elt
-> T_const
(signed_ctype elt
)
132 let add_cast ctype cval
=
133 let stype = signed_ctype ctype
in
134 if ctype
<> stype then
135 Printf.sprintf
"(%s) %s" (string_of_vectype
stype) cval
139 let cast_for_return to_ty
= "(" ^
(string_of_vectype to_ty
) ^
")"
141 (* Return a tuple of a list of declarations to go at the start of the function,
142 and a list of statements needed to return THING. *)
143 let return arity
return_by_ptr thing
=
145 Arity0
(ret
) | Arity1
(ret
, _
) | Arity2
(ret
, _
, _
) | Arity3
(ret
, _
, _
, _
)
146 | Arity4
(ret
, _
, _
, _
, _
) ->
148 T_arrayof
(num
, vec
) ->
149 if return_by_ptr then
150 let sname = string_of_vectype ret
in
151 [Printf.sprintf
"%s __rv;" sname],
152 [thing ^
";"; "return __rv;"]
154 let uname = union_string num vec
"__rv" in
155 [uname ^
";"], ["__rv.__o = " ^ thing ^
";"; "return __rv.__i;"]
156 | T_void
-> [], [thing ^
";"]
158 [], ["return " ^
(cast_for_return ret
) ^ thing ^
";"]
160 let rec element_type ctype
=
162 T_arrayof
(_
, v
) -> element_type v
165 let params return_by_ptr ps
=
166 let pdecls = ref [] in
169 T_arrayof
(num
, elts
) ->
170 let uname = union_string num elts
(p ^
"u") in
171 let decl = Printf.sprintf
"%s = { %s };" uname p
in
172 pdecls := decl :: !pdecls;
174 | _
-> add_cast t p
in
175 let plist = match ps
with
177 | Arity1
(_
, t1
) -> [ptype t1
"__a"]
178 | Arity2
(_
, t1
, t2
) -> [ptype t1
"__a"; ptype t2
"__b"]
179 | Arity3
(_
, t1
, t2
, t3
) -> [ptype t1
"__a"; ptype t2
"__b"; ptype t3
"__c"]
180 | Arity4
(_
, t1
, t2
, t3
, t4
) ->
181 [ptype t1
"__a"; ptype t2
"__b"; ptype t3
"__c"; ptype t4
"__d"] in
183 Arity0 ret
| Arity1
(ret
, _
) | Arity2
(ret
, _
, _
) | Arity3
(ret
, _
, _
, _
)
184 | Arity4
(ret
, _
, _
, _
, _
) ->
185 if return_by_ptr then
186 !pdecls, add_cast (T_ptrto
(element_type ret
)) "&__rv.val[0]" :: plist
190 let modify_params features
plist =
192 List.exists
(function Flipped _
-> true | _
-> false) features
in
197 failwith
("Don't know how to flip args " ^
(String.concat
", " plist))
201 (* !!! Decide whether to add an extra information word based on the shape
203 let extra_word shape features paramlist bits
=
206 All _
| Long
| Long_noreg _
| Wide
| Wide_noreg _
| Narrow
207 | By_scalar _
| Wide_scalar
| Wide_lane
| Binary_imm _
| Long_imm
209 | _
-> List.mem InfoWord features
212 paramlist
@ [string_of_int bits
]
216 (* Bit 0 represents signed (1) vs unsigned (0), or float (1) vs poly (0).
217 Bit 1 represents floats & polynomials (1), or ordinary integers (0).
218 Bit 2 represents rounding (1) vs none (0). *)
219 let infoword_value elttype features
=
221 match elt_class elttype
with
222 Signed
| ConvClass
(Signed
, _
) | ConvClass
(_
, Signed
) -> 0b001
226 and rounding_bit
= if List.mem Rounding features
then 0b100 else 0b000 in
227 bits01 lor rounding_bit
229 (* "Cast" type operations will throw an exception in mode_of_elt (actually in
230 elt_width, called from there). Deal with that here, and generate a suffix
231 with multiple modes (<to><from>). *)
232 let rec mode_suffix elttype shape
=
234 let mode = mode_of_elt elttype shape
in
236 with MixedMode
(dst
, src
) ->
237 let dstmode = mode_of_elt dst shape
238 and srcmode
= mode_of_elt src shape
in
239 string_of_mode
dstmode ^ string_of_mode srcmode
241 let print_variant opcode features shape name
(ctype
, asmtype
, elttype
) =
242 let bits = infoword_value elttype features
in
243 let modesuf = mode_suffix elttype shape
in
244 let return_by_ptr = return_by_ptr features
in
245 let pdecls, paramlist
= params return_by_ptr ctype
in
246 let paramlist'
= modify_params features
paramlist in
247 let paramlist''
= extra_word shape features
paramlist'
bits in
248 let parstr = String.concat
", " paramlist''
in
249 let builtin = Printf.sprintf
"__builtin_neon_%s%s (%s)"
250 (builtin_name features name
) modesuf parstr in
251 let rdecls, stmts
= return ctype
return_by_ptr builtin in
252 let body = pdecls @ rdecls @ stmts
253 and fnname
= (intrinsic_name name
) ^
"_" ^
(string_of_elt elttype
) in
254 print_function ctype fnname
body
256 (* When this function processes the element types in the ops table, it rewrites
257 them in a list of tuples (a,b,c):
258 a : C type as an "arity", e.g. Arity1 (T_poly8x8, T_poly8x8)
259 b : Asm type : a single, processed element type, e.g. P16. This is the
260 type which should be attached to the asm opcode.
261 c : Variant type : the unprocessed type for this variant (e.g. in add
262 instructions which don't care about the sign, b might be i16 and c
266 let print_op (opcode
, features
, shape
, name
, munge
, types
) =
267 let sorted_types = List.sort compare types
in
268 let munged_types = List.map
269 (fun elt
-> let c, asm
= munge shape elt
in c, asm
, elt
) sorted_types in
271 (fun variant
-> print_variant opcode features shape name variant
)
275 List.iter
print_op ops
277 (* Output type definitions. Table entries are:
278 cbase : "C" name for the type.
279 abase : "ARM" base name for the type (i.e. int in int8x8_t).
280 esize : element size.
281 enum : element count.
286 (* Doubleword vector types. *)
287 "__builtin_neon_qi", "int", 8, 8;
288 "__builtin_neon_hi", "int", 16, 4;
289 "__builtin_neon_si", "int", 32, 2;
290 "__builtin_neon_di", "int", 64, 1;
291 "__builtin_neon_sf", "float", 32, 2;
292 "__builtin_neon_poly8", "poly", 8, 8;
293 "__builtin_neon_poly16", "poly", 16, 4;
294 "__builtin_neon_uqi", "uint", 8, 8;
295 "__builtin_neon_uhi", "uint", 16, 4;
296 "__builtin_neon_usi", "uint", 32, 2;
297 "__builtin_neon_udi", "uint", 64, 1;
299 (* Quadword vector types. *)
300 "__builtin_neon_qi", "int", 8, 16;
301 "__builtin_neon_hi", "int", 16, 8;
302 "__builtin_neon_si", "int", 32, 4;
303 "__builtin_neon_di", "int", 64, 2;
304 "__builtin_neon_sf", "float", 32, 4;
305 "__builtin_neon_poly8", "poly", 8, 16;
306 "__builtin_neon_poly16", "poly", 16, 8;
307 "__builtin_neon_uqi", "uint", 8, 16;
308 "__builtin_neon_uhi", "uint", 16, 8;
309 "__builtin_neon_usi", "uint", 32, 4;
310 "__builtin_neon_udi", "uint", 64, 2
313 (fun (cbase
, abase
, esize
, enum
) ->
317 | _
-> Printf.sprintf
"\t__attribute__ ((__vector_size__ (%d)))"
318 (esize
* enum
/ 8) in
319 Format.printf
"typedef %s %s%dx%d_t%s;@\n" cbase abase esize enum
attr)
321 Format.print_newline
();
322 (* Extra types not in <stdint.h>. *)
323 Format.printf
"typedef __builtin_neon_sf float32_t;\n";
324 Format.printf
"typedef __builtin_neon_poly8 poly8_t;\n";
325 Format.printf
"typedef __builtin_neon_poly16 poly16_t;\n"
327 (* Output structs containing arrays, for load & store instructions etc. *)
332 "int", 32; "int", 64;
333 "uint", 8; "uint", 16;
334 "uint", 32; "uint", 64;
335 "float", 32; "poly", 8;
338 let writestruct elname elsize regsize arrsize
=
339 let elnum = regsize
/ elsize
in
341 Printf.sprintf
"%s%dx%dx%d_t" elname elsize
elnum arrsize
in
342 let sfmt = start_function () in
343 Format.printf
"typedef struct %s" structname;
344 open_braceblock sfmt;
345 Format.printf
"%s%dx%d_t val[%d];" elname elsize
elnum arrsize
;
346 close_braceblock sfmt;
347 Format.printf
" %s;" structname;
352 (fun (elname
, elsize
) ->
353 writestruct elname elsize
64 n
;
354 writestruct elname elsize
128 n
)
358 let print_lines = List.iter
(fun s
-> Format.printf
"%s@\n" s
)
364 "/* ARM NEON intrinsics include file. This file is generated automatically";
365 " using neon-gen.ml. Please do not edit manually.";
367 " Copyright (C) 2006, 2007, 2009 Free Software Foundation, Inc.";
368 " Contributed by CodeSourcery.";
370 " This file is part of GCC.";
372 " GCC is free software; you can redistribute it and/or modify it";
373 " under the terms of the GNU General Public License as published";
374 " by the Free Software Foundation; either version 3, or (at your";
375 " option) any later version.";
377 " GCC is distributed in the hope that it will be useful, but WITHOUT";
378 " ANY WARRANTY; without even the implied warranty of MERCHANTABILITY";
379 " or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public";
380 " License for more details.";
382 " Under Section 7 of GPL version 3, you are granted additional";
383 " permissions described in the GCC Runtime Library Exception, version";
384 " 3.1, as published by the Free Software Foundation.";
386 " You should have received a copy of the GNU General Public License and";
387 " a copy of the GCC Runtime Library Exception along with this program;";
388 " see the files COPYING3 and COPYING.RUNTIME respectively. If not, see";
389 " <http://www.gnu.org/licenses/>. */";
391 "#ifndef _GCC_ARM_NEON_H";
392 "#define _GCC_ARM_NEON_H 1";
394 "#ifndef __ARM_NEON__";
395 "#error You must enable NEON instructions (e.g. -mfloat-abi=softfp -mfpu=neon) to use arm_neon.h";
398 "#ifdef __cplusplus";
402 "#include <stdint.h>";
406 Format.print_newline
();
408 Format.print_newline
();
411 "#ifdef __cplusplus";