2014-01-17 Richard Biener <rguenther@suse.de>
[official-gcc.git] / gcc / config / arm / neon-docgen.ml
blob5788a533e19672207ed59d041df667ef832d1f66
1 (* ARM NEON documentation generator.
3 Copyright (C) 2006-2014 Free Software Foundation, Inc.
4 Contributed by CodeSourcery.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>.
22 This is an O'Caml program. The O'Caml compiler is available from:
24 http://caml.inria.fr/
26 Or from your favourite OS's friendly packaging system. Tested with version
27 3.09.2, though other versions will probably work too.
29 Compile with:
30 ocamlc -c neon.ml
31 ocamlc -o neon-docgen neon.cmo neon-docgen.ml
33 Run with:
34 /path/to/neon-docgen /path/to/gcc/doc/arm-neon-intrinsics.texi
37 open Neon
39 (* The combined "ops" and "reinterp" table. *)
40 let ops_reinterp = reinterp @ ops
42 (* Helper functions for extracting things from the "ops" table. *)
43 let single_opcode desired_opcode () =
44 List.fold_left (fun got_so_far ->
45 fun row ->
46 match row with
47 (opcode, _, _, _, _, _) ->
48 if opcode = desired_opcode then row :: got_so_far
49 else got_so_far
50 ) [] ops_reinterp
52 let multiple_opcodes desired_opcodes () =
53 List.fold_left (fun got_so_far ->
54 fun desired_opcode ->
55 (single_opcode desired_opcode ()) @ got_so_far)
56 [] desired_opcodes
58 let ldx_opcode number () =
59 List.fold_left (fun got_so_far ->
60 fun row ->
61 match row with
62 (opcode, _, _, _, _, _) ->
63 match opcode with
64 Vldx n | Vldx_lane n | Vldx_dup n when n = number ->
65 row :: got_so_far
66 | _ -> got_so_far
67 ) [] ops_reinterp
69 let stx_opcode number () =
70 List.fold_left (fun got_so_far ->
71 fun row ->
72 match row with
73 (opcode, _, _, _, _, _) ->
74 match opcode with
75 Vstx n | Vstx_lane n when n = number ->
76 row :: got_so_far
77 | _ -> got_so_far
78 ) [] ops_reinterp
80 let tbl_opcode () =
81 List.fold_left (fun got_so_far ->
82 fun row ->
83 match row with
84 (opcode, _, _, _, _, _) ->
85 match opcode with
86 Vtbl _ -> row :: got_so_far
87 | _ -> got_so_far
88 ) [] ops_reinterp
90 let tbx_opcode () =
91 List.fold_left (fun got_so_far ->
92 fun row ->
93 match row with
94 (opcode, _, _, _, _, _) ->
95 match opcode with
96 Vtbx _ -> row :: got_so_far
97 | _ -> got_so_far
98 ) [] ops_reinterp
100 (* The groups of intrinsics. *)
101 let intrinsic_groups =
102 [ "Addition", single_opcode Vadd;
103 "Multiplication", single_opcode Vmul;
104 "Multiply-accumulate", single_opcode Vmla;
105 "Multiply-subtract", single_opcode Vmls;
106 "Fused-multiply-accumulate", single_opcode Vfma;
107 "Fused-multiply-subtract", single_opcode Vfms;
108 "Round to integral (to nearest, ties to even)", single_opcode Vrintn;
109 "Round to integral (to nearest, ties away from zero)", single_opcode Vrinta;
110 "Round to integral (towards +Inf)", single_opcode Vrintp;
111 "Round to integral (towards -Inf)", single_opcode Vrintm;
112 "Round to integral (towards 0)", single_opcode Vrintz;
113 "Subtraction", single_opcode Vsub;
114 "Comparison (equal-to)", single_opcode Vceq;
115 "Comparison (greater-than-or-equal-to)", single_opcode Vcge;
116 "Comparison (less-than-or-equal-to)", single_opcode Vcle;
117 "Comparison (greater-than)", single_opcode Vcgt;
118 "Comparison (less-than)", single_opcode Vclt;
119 "Comparison (absolute greater-than-or-equal-to)", single_opcode Vcage;
120 "Comparison (absolute less-than-or-equal-to)", single_opcode Vcale;
121 "Comparison (absolute greater-than)", single_opcode Vcagt;
122 "Comparison (absolute less-than)", single_opcode Vcalt;
123 "Test bits", single_opcode Vtst;
124 "Absolute difference", single_opcode Vabd;
125 "Absolute difference and accumulate", single_opcode Vaba;
126 "Maximum", single_opcode Vmax;
127 "Minimum", single_opcode Vmin;
128 "Pairwise add", single_opcode Vpadd;
129 "Pairwise add, single_opcode widen and accumulate", single_opcode Vpada;
130 "Folding maximum", single_opcode Vpmax;
131 "Folding minimum", single_opcode Vpmin;
132 "Reciprocal step", multiple_opcodes [Vrecps; Vrsqrts];
133 "Vector shift left", single_opcode Vshl;
134 "Vector shift left by constant", single_opcode Vshl_n;
135 "Vector shift right by constant", single_opcode Vshr_n;
136 "Vector shift right by constant and accumulate", single_opcode Vsra_n;
137 "Vector shift right and insert", single_opcode Vsri;
138 "Vector shift left and insert", single_opcode Vsli;
139 "Absolute value", single_opcode Vabs;
140 "Negation", single_opcode Vneg;
141 "Bitwise not", single_opcode Vmvn;
142 "Count leading sign bits", single_opcode Vcls;
143 "Count leading zeros", single_opcode Vclz;
144 "Count number of set bits", single_opcode Vcnt;
145 "Reciprocal estimate", single_opcode Vrecpe;
146 "Reciprocal square-root estimate", single_opcode Vrsqrte;
147 "Get lanes from a vector", single_opcode Vget_lane;
148 "Set lanes in a vector", single_opcode Vset_lane;
149 "Create vector from literal bit pattern", single_opcode Vcreate;
150 "Set all lanes to the same value",
151 multiple_opcodes [Vdup_n; Vmov_n; Vdup_lane];
152 "Combining vectors", single_opcode Vcombine;
153 "Splitting vectors", multiple_opcodes [Vget_high; Vget_low];
154 "Conversions", multiple_opcodes [Vcvt; Vcvt_n];
155 "Move, single_opcode narrowing", single_opcode Vmovn;
156 "Move, single_opcode long", single_opcode Vmovl;
157 "Table lookup", tbl_opcode;
158 "Extended table lookup", tbx_opcode;
159 "Multiply, lane", single_opcode Vmul_lane;
160 "Long multiply, lane", single_opcode Vmull_lane;
161 "Saturating doubling long multiply, lane", single_opcode Vqdmull_lane;
162 "Saturating doubling multiply high, lane", single_opcode Vqdmulh_lane;
163 "Multiply-accumulate, lane", single_opcode Vmla_lane;
164 "Multiply-subtract, lane", single_opcode Vmls_lane;
165 "Vector multiply by scalar", single_opcode Vmul_n;
166 "Vector long multiply by scalar", single_opcode Vmull_n;
167 "Vector saturating doubling long multiply by scalar",
168 single_opcode Vqdmull_n;
169 "Vector saturating doubling multiply high by scalar",
170 single_opcode Vqdmulh_n;
171 "Vector multiply-accumulate by scalar", single_opcode Vmla_n;
172 "Vector multiply-subtract by scalar", single_opcode Vmls_n;
173 "Vector extract", single_opcode Vext;
174 "Reverse elements", multiple_opcodes [Vrev64; Vrev32; Vrev16];
175 "Bit selection", single_opcode Vbsl;
176 "Transpose elements", single_opcode Vtrn;
177 "Zip elements", single_opcode Vzip;
178 "Unzip elements", single_opcode Vuzp;
179 "Element/structure loads, VLD1 variants", ldx_opcode 1;
180 "Element/structure stores, VST1 variants", stx_opcode 1;
181 "Element/structure loads, VLD2 variants", ldx_opcode 2;
182 "Element/structure stores, VST2 variants", stx_opcode 2;
183 "Element/structure loads, VLD3 variants", ldx_opcode 3;
184 "Element/structure stores, VST3 variants", stx_opcode 3;
185 "Element/structure loads, VLD4 variants", ldx_opcode 4;
186 "Element/structure stores, VST4 variants", stx_opcode 4;
187 "Logical operations (AND)", single_opcode Vand;
188 "Logical operations (OR)", single_opcode Vorr;
189 "Logical operations (exclusive OR)", single_opcode Veor;
190 "Logical operations (AND-NOT)", single_opcode Vbic;
191 "Logical operations (OR-NOT)", single_opcode Vorn;
192 "Reinterpret casts", single_opcode Vreinterp ]
194 (* Given an intrinsic shape, produce a string to document the corresponding
195 operand shapes. *)
196 let rec analyze_shape shape =
197 let rec n_things n thing =
198 match n with
199 0 -> []
200 | n -> thing :: (n_things (n - 1) thing)
202 let rec analyze_shape_elt reg_no elt =
203 match elt with
204 Dreg -> "@var{d" ^ (string_of_int reg_no) ^ "}"
205 | Qreg -> "@var{q" ^ (string_of_int reg_no) ^ "}"
206 | Corereg -> "@var{r" ^ (string_of_int reg_no) ^ "}"
207 | Immed -> "#@var{0}"
208 | VecArray (1, elt) ->
209 let elt_regexp = analyze_shape_elt 0 elt in
210 "@{" ^ elt_regexp ^ "@}"
211 | VecArray (n, elt) ->
212 let rec f m =
213 match m with
214 0 -> []
215 | m -> (analyze_shape_elt (m - 1) elt) :: (f (m - 1))
217 let ops = List.rev (f n) in
218 "@{" ^ (commas (fun x -> x) ops "") ^ "@}"
219 | (PtrTo elt | CstPtrTo elt) ->
220 "[" ^ (analyze_shape_elt reg_no elt) ^ "]"
221 | Element_of_dreg -> (analyze_shape_elt reg_no Dreg) ^ "[@var{0}]"
222 | Element_of_qreg -> (analyze_shape_elt reg_no Qreg) ^ "[@var{0}]"
223 | All_elements_of_dreg -> (analyze_shape_elt reg_no Dreg) ^ "[]"
224 | Alternatives alts -> (analyze_shape_elt reg_no (List.hd alts))
226 match shape with
227 All (n, elt) -> commas (analyze_shape_elt 0) (n_things n elt) ""
228 | Long -> (analyze_shape_elt 0 Qreg) ^ ", " ^ (analyze_shape_elt 0 Dreg) ^
229 ", " ^ (analyze_shape_elt 0 Dreg)
230 | Long_noreg elt -> (analyze_shape_elt 0 elt) ^ ", " ^
231 (analyze_shape_elt 0 elt)
232 | Wide -> (analyze_shape_elt 0 Qreg) ^ ", " ^ (analyze_shape_elt 0 Qreg) ^
233 ", " ^ (analyze_shape_elt 0 Dreg)
234 | Wide_noreg elt -> analyze_shape (Long_noreg elt)
235 | Narrow -> (analyze_shape_elt 0 Dreg) ^ ", " ^ (analyze_shape_elt 0 Qreg) ^
236 ", " ^ (analyze_shape_elt 0 Qreg)
237 | Use_operands elts -> commas (analyze_shape_elt 0) (Array.to_list elts) ""
238 | By_scalar Dreg ->
239 analyze_shape (Use_operands [| Dreg; Dreg; Element_of_dreg |])
240 | By_scalar Qreg ->
241 analyze_shape (Use_operands [| Qreg; Qreg; Element_of_dreg |])
242 | By_scalar _ -> assert false
243 | Wide_lane ->
244 analyze_shape (Use_operands [| Qreg; Dreg; Element_of_dreg |])
245 | Wide_scalar ->
246 analyze_shape (Use_operands [| Qreg; Dreg; Element_of_dreg |])
247 | Pair_result elt ->
248 let elt_regexp = analyze_shape_elt 0 elt in
249 let elt_regexp' = analyze_shape_elt 1 elt in
250 elt_regexp ^ ", " ^ elt_regexp'
251 | Unary_scalar _ -> "FIXME Unary_scalar"
252 | Binary_imm elt -> analyze_shape (Use_operands [| elt; elt; Immed |])
253 | Narrow_imm -> analyze_shape (Use_operands [| Dreg; Qreg; Immed |])
254 | Long_imm -> analyze_shape (Use_operands [| Qreg; Dreg; Immed |])
256 (* Document a single intrinsic. *)
257 let describe_intrinsic first chan
258 (elt_ty, (_, features, shape, name, munge, _)) =
259 let c_arity, new_elt_ty = munge shape elt_ty in
260 let c_types = strings_of_arity c_arity in
261 Printf.fprintf chan "@itemize @bullet\n";
262 let item_code = if first then "@item" else "@itemx" in
263 Printf.fprintf chan "%s %s %s_%s (" item_code (List.hd c_types)
264 (intrinsic_name name) (string_of_elt elt_ty);
265 Printf.fprintf chan "%s)\n" (commas (fun ty -> ty) (List.tl c_types) "");
266 if not (List.exists (fun feature -> feature = No_op) features) then
267 begin
268 let print_one_insn name =
269 Printf.fprintf chan "@code{";
270 let no_suffix = (new_elt_ty = NoElts) in
271 let name_with_suffix =
272 if no_suffix then name
273 else name ^ "." ^ (string_of_elt_dots new_elt_ty)
275 let possible_operands = analyze_all_shapes features shape
276 analyze_shape
278 let rec print_one_possible_operand op =
279 Printf.fprintf chan "%s %s}" name_with_suffix op
281 (* If the intrinsic expands to multiple instructions, we assume
282 they are all of the same form. *)
283 print_one_possible_operand (List.hd possible_operands)
285 let rec print_insns names =
286 match names with
287 [] -> ()
288 | [name] -> print_one_insn name
289 | name::names -> (print_one_insn name;
290 Printf.fprintf chan " @emph{or} ";
291 print_insns names)
293 let insn_names = get_insn_names features name in
294 Printf.fprintf chan "@*@emph{Form of expected instruction(s):} ";
295 print_insns insn_names;
296 Printf.fprintf chan "\n"
297 end;
298 Printf.fprintf chan "@end itemize\n";
299 Printf.fprintf chan "\n\n"
301 (* Document a group of intrinsics. *)
302 let document_group chan (group_title, group_extractor) =
303 (* Extract the rows in question from the ops table and then turn them
304 into a list of intrinsics. *)
305 let intrinsics =
306 List.fold_left (fun got_so_far ->
307 fun row ->
308 match row with
309 (_, _, _, _, _, elt_tys) ->
310 List.fold_left (fun got_so_far' ->
311 fun elt_ty ->
312 (elt_ty, row) :: got_so_far')
313 got_so_far elt_tys
314 ) [] (group_extractor ())
316 (* Emit the title for this group. *)
317 Printf.fprintf chan "@subsubsection %s\n\n" group_title;
318 (* Emit a description of each intrinsic. *)
319 List.iter (describe_intrinsic true chan) intrinsics;
320 (* Close this group. *)
321 Printf.fprintf chan "\n\n"
323 let gnu_header chan =
324 List.iter (fun s -> Printf.fprintf chan "%s\n" s) [
325 "@c Copyright (C) 2006-2014 Free Software Foundation, Inc.";
326 "@c This is part of the GCC manual.";
327 "@c For copying conditions, see the file gcc.texi.";
329 "@c This file is generated automatically using gcc/config/arm/neon-docgen.ml";
330 "@c Please do not edit manually."]
332 let crypto_doc =
334 @itemize @bullet
335 @item poly128_t vldrq_p128(poly128_t const *)
336 @end itemize
338 @itemize @bullet
339 @item void vstrq_p128(poly128_t *, poly128_t)
340 @end itemize
342 @itemize @bullet
343 @item uint64x1_t vceq_p64 (poly64x1_t, poly64x1_t)
344 @end itemize
346 @itemize @bullet
347 @item uint64x1_t vtst_p64 (poly64x1_t, poly64x1_t)
348 @end itemize
350 @itemize @bullet
351 @item uint32_t vsha1h_u32 (uint32_t)
352 @*@emph{Form of expected instruction(s):} @code{sha1h.32 @var{q0}, @var{q1}}
353 @end itemize
355 @itemize @bullet
356 @item uint32x4_t vsha1cq_u32 (uint32x4_t, uint32_t, uint32x4_t)
357 @*@emph{Form of expected instruction(s):} @code{sha1c.32 @var{q0}, @var{q1}, @var{q2}}
358 @end itemize
360 @itemize @bullet
361 @item uint32x4_t vsha1pq_u32 (uint32x4_t, uint32_t, uint32x4_t)
362 @*@emph{Form of expected instruction(s):} @code{sha1p.32 @var{q0}, @var{q1}, @var{q2}}
363 @end itemize
365 @itemize @bullet
366 @item uint32x4_t vsha1mq_u32 (uint32x4_t, uint32_t, uint32x4_t)
367 @*@emph{Form of expected instruction(s):} @code{sha1m.32 @var{q0}, @var{q1}, @var{q2}}
368 @end itemize
370 @itemize @bullet
371 @item uint32x4_t vsha1su0q_u32 (uint32x4_t, uint32x4_t, uint32x4_t)
372 @*@emph{Form of expected instruction(s):} @code{sha1su0.32 @var{q0}, @var{q1}, @var{q2}}
373 @end itemize
375 @itemize @bullet
376 @item uint32x4_t vsha1su1q_u32 (uint32x4_t, uint32x4_t)
377 @*@emph{Form of expected instruction(s):} @code{sha1su1.32 @var{q0}, @var{q1}, @var{q2}}
378 @end itemize
380 @itemize @bullet
381 @item uint32x4_t vsha256hq_u32 (uint32x4_t, uint32x4_t, uint32x4_t)
382 @*@emph{Form of expected instruction(s):} @code{sha256h.32 @var{q0}, @var{q1}, @var{q2}}
383 @end itemize
385 @itemize @bullet
386 @item uint32x4_t vsha256h2q_u32 (uint32x4_t, uint32x4_t, uint32x4_t)
387 @*@emph{Form of expected instruction(s):} @code{sha256h2.32 @var{q0}, @var{q1}, @var{q2}}
388 @end itemize
390 @itemize @bullet
391 @item uint32x4_t vsha256su0q_u32 (uint32x4_t, uint32x4_t)
392 @*@emph{Form of expected instruction(s):} @code{sha256su0.32 @var{q0}, @var{q1}}
393 @end itemize
395 @itemize @bullet
396 @item uint32x4_t vsha256su1q_u32 (uint32x4_t, uint32x4_t, uint32x4_t)
397 @*@emph{Form of expected instruction(s):} @code{sha256su1.32 @var{q0}, @var{q1}, @var{q2}}
398 @end itemize
400 @itemize @bullet
401 @item poly128_t vmull_p64 (poly64_t a, poly64_t b)
402 @*@emph{Form of expected instruction(s):} @code{vmull.p64 @var{q0}, @var{d1}, @var{d2}}
403 @end itemize
405 @itemize @bullet
406 @item poly128_t vmull_high_p64 (poly64x2_t a, poly64x2_t b)
407 @*@emph{Form of expected instruction(s):} @code{vmull.p64 @var{q0}, @var{d1}, @var{d2}}
408 @end itemize
411 (* Program entry point. *)
412 let _ =
413 if Array.length Sys.argv <> 2 then
414 failwith "Usage: neon-docgen <output filename>"
415 else
416 let file = Sys.argv.(1) in
418 let chan = open_out file in
419 gnu_header chan;
420 List.iter (document_group chan) intrinsic_groups;
421 Printf.fprintf chan "%s\n" crypto_doc;
422 close_out chan
423 with Sys_error sys ->
424 failwith ("Could not create output file " ^ file ^ ": " ^ sys)