gcc/config/arm/neon-docgen.ml

   1 (* ARM NEON documentation generator.
   2
   3    Copyright (C) 2006-2014 Free Software Foundation, Inc.
   4    Contributed by CodeSourcery.
   5
   6    This file is part of GCC.
   7
   8    GCC is free software; you can redistribute it and/or modify it under
   9    the terms of the GNU General Public License as published by the Free
  10    Software Foundation; either version 3, or (at your option) any later
  11    version.
  12
  13    GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14    WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15    FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16    for more details.
  17
  18    You should have received a copy of the GNU General Public License
  19    along with GCC; see the file COPYING3.  If not see
  20    <http://www.gnu.org/licenses/>.
  21
  22    This is an O'Caml program.  The O'Caml compiler is available from:
  23
  24      http://caml.inria.fr/
  25
  26    Or from your favourite OS's friendly packaging system. Tested with version
  27    3.09.2, though other versions will probably work too.
  28
  29    Compile with:
  30      ocamlc -c neon.ml
  31      ocamlc -o neon-docgen neon.cmo neon-docgen.ml
  32
  33    Run with:
  34      /path/to/neon-docgen /path/to/gcc/doc/arm-neon-intrinsics.texi
  35 *)
  36
  37 open Neon
  38
  39 (* The combined "ops" and "reinterp" table.  *)
  40 let ops_reinterp = reinterp @ ops
  41
  42 (* Helper functions for extracting things from the "ops" table.  *)
  43 let single_opcode desired_opcode () =
  44   List.fold_left (fun got_so_far ->
  45                   fun row ->
  46                     match row with
  47                       (opcode, _, _, _, _, _) ->
  48                         if opcode = desired_opcode then row :: got_so_far
  49                                                    else got_so_far
  50                  ) [] ops_reinterp
  51
  52 let multiple_opcodes desired_opcodes () =
  53   List.fold_left (fun got_so_far ->
  54                   fun desired_opcode ->
  55                     (single_opcode desired_opcode ()) @ got_so_far)
  56                  [] desired_opcodes
  57
  58 let ldx_opcode number () =
  59   List.fold_left (fun got_so_far ->
  60                   fun row ->
  61                     match row with
  62                       (opcode, _, _, _, _, _) ->
  63                         match opcode with
  64                           Vldx n | Vldx_lane n | Vldx_dup n when n = number ->
  65                             row :: got_so_far
  66                           | _ -> got_so_far
  67                  ) [] ops_reinterp
  68
  69 let stx_opcode number () =
  70   List.fold_left (fun got_so_far ->
  71                   fun row ->
  72                     match row with
  73                       (opcode, _, _, _, _, _) ->
  74                         match opcode with
  75                           Vstx n | Vstx_lane n when n = number ->
  76                             row :: got_so_far
  77                           | _ -> got_so_far
  78                  ) [] ops_reinterp
  79
  80 let tbl_opcode () =
  81   List.fold_left (fun got_so_far ->
  82                   fun row ->
  83                     match row with
  84                       (opcode, _, _, _, _, _) ->
  85                         match opcode with
  86                           Vtbl _ -> row :: got_so_far
  87                           | _ -> got_so_far
  88                  ) [] ops_reinterp
  89
  90 let tbx_opcode () =
  91   List.fold_left (fun got_so_far ->
  92                   fun row ->
  93                     match row with
  94                       (opcode, _, _, _, _, _) ->
  95                         match opcode with
  96                           Vtbx _ -> row :: got_so_far
  97                           | _ -> got_so_far
  98                  ) [] ops_reinterp
  99
 100 (* The groups of intrinsics.  *)
 101 let intrinsic_groups =
 102   [ "Addition", single_opcode Vadd;
 103     "Multiplication", single_opcode Vmul;
 104     "Multiply-accumulate", single_opcode Vmla;
 105     "Multiply-subtract", single_opcode Vmls;
 106     "Fused-multiply-accumulate", single_opcode Vfma;
 107     "Fused-multiply-subtract", single_opcode Vfms;
 108     "Round to integral (to nearest, ties to even)", single_opcode Vrintn;
 109     "Round to integral (to nearest, ties away from zero)", single_opcode Vrinta;
 110     "Round to integral (towards +Inf)", single_opcode Vrintp;
 111     "Round to integral (towards -Inf)", single_opcode Vrintm;
 112     "Round to integral (towards 0)", single_opcode Vrintz;
 113     "Subtraction", single_opcode Vsub;
 114     "Comparison (equal-to)", single_opcode Vceq;
 115     "Comparison (greater-than-or-equal-to)", single_opcode Vcge;
 116     "Comparison (less-than-or-equal-to)", single_opcode Vcle;
 117     "Comparison (greater-than)", single_opcode Vcgt;
 118     "Comparison (less-than)", single_opcode Vclt;
 119     "Comparison (absolute greater-than-or-equal-to)", single_opcode Vcage;
 120     "Comparison (absolute less-than-or-equal-to)", single_opcode Vcale;
 121     "Comparison (absolute greater-than)", single_opcode Vcagt;
 122     "Comparison (absolute less-than)", single_opcode Vcalt;
 123     "Test bits", single_opcode Vtst;
 124     "Absolute difference", single_opcode Vabd;
 125     "Absolute difference and accumulate", single_opcode Vaba;
 126     "Maximum", single_opcode Vmax;
 127     "Minimum", single_opcode Vmin;
 128     "Pairwise add", single_opcode Vpadd;
 129     "Pairwise add, single_opcode widen and accumulate", single_opcode Vpada;
 130     "Folding maximum", single_opcode Vpmax;
 131     "Folding minimum", single_opcode Vpmin;
 132     "Reciprocal step", multiple_opcodes [Vrecps; Vrsqrts];
 133     "Vector shift left", single_opcode Vshl;
 134     "Vector shift left by constant", single_opcode Vshl_n;
 135     "Vector shift right by constant", single_opcode Vshr_n;
 136     "Vector shift right by constant and accumulate", single_opcode Vsra_n;
 137     "Vector shift right and insert", single_opcode Vsri;
 138     "Vector shift left and insert", single_opcode Vsli;
 139     "Absolute value", single_opcode Vabs;
 140     "Negation", single_opcode Vneg;
 141     "Bitwise not", single_opcode Vmvn;
 142     "Count leading sign bits", single_opcode Vcls;
 143     "Count leading zeros", single_opcode Vclz;
 144     "Count number of set bits", single_opcode Vcnt;
 145     "Reciprocal estimate", single_opcode Vrecpe;
 146     "Reciprocal square-root estimate", single_opcode Vrsqrte;
 147     "Get lanes from a vector", single_opcode Vget_lane;
 148     "Set lanes in a vector", single_opcode Vset_lane;
 149     "Create vector from literal bit pattern", single_opcode Vcreate;
 150     "Set all lanes to the same value",
 151       multiple_opcodes [Vdup_n; Vmov_n; Vdup_lane];
 152     "Combining vectors", single_opcode Vcombine;
 153     "Splitting vectors", multiple_opcodes [Vget_high; Vget_low];
 154     "Conversions", multiple_opcodes [Vcvt; Vcvt_n];
 155     "Move, single_opcode narrowing", single_opcode Vmovn;
 156     "Move, single_opcode long", single_opcode Vmovl;
 157     "Table lookup", tbl_opcode;
 158     "Extended table lookup", tbx_opcode;
 159     "Multiply, lane", single_opcode Vmul_lane;
 160     "Long multiply, lane", single_opcode Vmull_lane;
 161     "Saturating doubling long multiply, lane", single_opcode Vqdmull_lane;
 162     "Saturating doubling multiply high, lane", single_opcode Vqdmulh_lane;
 163     "Multiply-accumulate, lane", single_opcode Vmla_lane;
 164     "Multiply-subtract, lane", single_opcode Vmls_lane;
 165     "Vector multiply by scalar", single_opcode Vmul_n;
 166     "Vector long multiply by scalar", single_opcode Vmull_n;
 167     "Vector saturating doubling long multiply by scalar",
 168       single_opcode Vqdmull_n;
 169     "Vector saturating doubling multiply high by scalar",
 170       single_opcode Vqdmulh_n;
 171     "Vector multiply-accumulate by scalar", single_opcode Vmla_n;
 172     "Vector multiply-subtract by scalar", single_opcode Vmls_n;
 173     "Vector extract", single_opcode Vext;
 174     "Reverse elements", multiple_opcodes [Vrev64; Vrev32; Vrev16];
 175     "Bit selection", single_opcode Vbsl;
 176     "Transpose elements", single_opcode Vtrn;
 177     "Zip elements", single_opcode Vzip;
 178     "Unzip elements", single_opcode Vuzp;
 179     "Element/structure loads, VLD1 variants", ldx_opcode 1;
 180     "Element/structure stores, VST1 variants", stx_opcode 1;
 181     "Element/structure loads, VLD2 variants", ldx_opcode 2;
 182     "Element/structure stores, VST2 variants", stx_opcode 2;
 183     "Element/structure loads, VLD3 variants", ldx_opcode 3;
 184     "Element/structure stores, VST3 variants", stx_opcode 3;
 185     "Element/structure loads, VLD4 variants", ldx_opcode 4;
 186     "Element/structure stores, VST4 variants", stx_opcode 4;
 187     "Logical operations (AND)", single_opcode Vand;
 188     "Logical operations (OR)", single_opcode Vorr;
 189     "Logical operations (exclusive OR)", single_opcode Veor;
 190     "Logical operations (AND-NOT)", single_opcode Vbic;
 191     "Logical operations (OR-NOT)", single_opcode Vorn;
 192     "Reinterpret casts", single_opcode Vreinterp ]
 193
 194 (* Given an intrinsic shape, produce a string to document the corresponding
 195    operand shapes.  *)
 196 let rec analyze_shape shape =
 197   let rec n_things n thing =
 198     match n with
 199       0 -> []
 200     | n -> thing :: (n_things (n - 1) thing)
 201   in
 202   let rec analyze_shape_elt reg_no elt =
 203     match elt with
 204       Dreg -> "@var{d" ^ (string_of_int reg_no) ^ "}"
 205     | Qreg -> "@var{q" ^ (string_of_int reg_no) ^ "}"
 206     | Corereg -> "@var{r" ^ (string_of_int reg_no) ^ "}"
 207     | Immed -> "#@var{0}"
 208     | VecArray (1, elt) ->
 209         let elt_regexp = analyze_shape_elt 0 elt in
 210           "@{" ^ elt_regexp ^ "@}"
 211     | VecArray (n, elt) ->
 212       let rec f m =
 213         match m with
 214           0 -> []
 215         | m -> (analyze_shape_elt (m - 1) elt) :: (f (m - 1))
 216       in
 217       let ops = List.rev (f n) in
 218         "@{" ^ (commas (fun x -> x) ops "") ^ "@}"
 219     | (PtrTo elt | CstPtrTo elt) ->
 220       "[" ^ (analyze_shape_elt reg_no elt) ^ "]"
 221     | Element_of_dreg -> (analyze_shape_elt reg_no Dreg) ^ "[@var{0}]"
 222     | Element_of_qreg -> (analyze_shape_elt reg_no Qreg) ^ "[@var{0}]"
 223     | All_elements_of_dreg -> (analyze_shape_elt reg_no Dreg) ^ "[]"
 224     | Alternatives alts -> (analyze_shape_elt reg_no (List.hd alts))
 225   in
 226     match shape with
 227       All (n, elt) -> commas (analyze_shape_elt 0) (n_things n elt) ""
 228     | Long -> (analyze_shape_elt 0 Qreg) ^ ", " ^ (analyze_shape_elt 0 Dreg) ^
 229               ", " ^ (analyze_shape_elt 0 Dreg)
 230     | Long_noreg elt -> (analyze_shape_elt 0 elt) ^ ", " ^
 231               (analyze_shape_elt 0 elt)
 232     | Wide -> (analyze_shape_elt 0 Qreg) ^ ", " ^ (analyze_shape_elt 0 Qreg) ^
 233               ", " ^ (analyze_shape_elt 0 Dreg)
 234     | Wide_noreg elt -> analyze_shape (Long_noreg elt)
 235     | Narrow -> (analyze_shape_elt 0 Dreg) ^ ", " ^ (analyze_shape_elt 0 Qreg) ^
 236                 ", " ^ (analyze_shape_elt 0 Qreg)
 237     | Use_operands elts -> commas (analyze_shape_elt 0) (Array.to_list elts) ""
 238     | By_scalar Dreg ->
 239         analyze_shape (Use_operands [| Dreg; Dreg; Element_of_dreg |])
 240     | By_scalar Qreg ->
 241         analyze_shape (Use_operands [| Qreg; Qreg; Element_of_dreg |])
 242     | By_scalar _ -> assert false
 243     | Wide_lane ->
 244         analyze_shape (Use_operands [| Qreg; Dreg; Element_of_dreg |])
 245     | Wide_scalar ->
 246         analyze_shape (Use_operands [| Qreg; Dreg; Element_of_dreg |])
 247     | Pair_result elt ->
 248       let elt_regexp = analyze_shape_elt 0 elt in
 249       let elt_regexp' = analyze_shape_elt 1 elt in
 250         elt_regexp ^ ", " ^ elt_regexp'
 251     | Unary_scalar _ -> "FIXME Unary_scalar"
 252     | Binary_imm elt -> analyze_shape (Use_operands [| elt; elt; Immed |])
 253     | Narrow_imm -> analyze_shape (Use_operands [| Dreg; Qreg; Immed |])
 254     | Long_imm -> analyze_shape (Use_operands [| Qreg; Dreg; Immed |])
 255
 256 (* Document a single intrinsic.  *)
 257 let describe_intrinsic first chan
 258                        (elt_ty, (_, features, shape, name, munge, _)) =
 259   let c_arity, new_elt_ty = munge shape elt_ty in
 260   let c_types = strings_of_arity c_arity in
 261   Printf.fprintf chan "@itemize @bullet\n";
 262   let item_code = if first then "@item" else "@itemx" in
 263     Printf.fprintf chan "%s %s %s_%s (" item_code (List.hd c_types)
 264                    (intrinsic_name name) (string_of_elt elt_ty);
 265     Printf.fprintf chan "%s)\n" (commas (fun ty -> ty) (List.tl c_types) "");
 266     if not (List.exists (fun feature -> feature = No_op) features) then
 267     begin
 268       let print_one_insn name =
 269         Printf.fprintf chan "@code{";
 270         let no_suffix = (new_elt_ty = NoElts) in
 271         let name_with_suffix =
 272           if no_suffix then name
 273           else name ^ "." ^ (string_of_elt_dots new_elt_ty)
 274         in
 275         let possible_operands = analyze_all_shapes features shape
 276                                                    analyze_shape
 277         in
 278         let rec print_one_possible_operand op =
 279           Printf.fprintf chan "%s %s}" name_with_suffix op
 280         in
 281           (* If the intrinsic expands to multiple instructions, we assume
 282              they are all of the same form.  *)
 283           print_one_possible_operand (List.hd possible_operands)
 284       in
 285       let rec print_insns names =
 286         match names with
 287           [] -> ()
 288         | [name] -> print_one_insn name
 289         | name::names -> (print_one_insn name;
 290                           Printf.fprintf chan " @emph{or} ";
 291                           print_insns names)
 292       in
 293       let insn_names = get_insn_names features name in
 294         Printf.fprintf chan "@*@emph{Form of expected instruction(s):} ";
 295         print_insns insn_names;
 296         Printf.fprintf chan "\n"
 297     end;
 298     Printf.fprintf chan "@end itemize\n";
 299     Printf.fprintf chan "\n\n"
 300
 301 (* Document a group of intrinsics.  *)
 302 let document_group chan (group_title, group_extractor) =
 303   (* Extract the rows in question from the ops table and then turn them
 304      into a list of intrinsics.  *)
 305   let intrinsics =
 306     List.fold_left (fun got_so_far ->
 307                     fun row ->
 308                       match row with
 309                         (_, _, _, _, _, elt_tys) ->
 310                           List.fold_left (fun got_so_far' ->
 311                                           fun elt_ty ->
 312                                             (elt_ty, row) :: got_so_far')
 313                                          got_so_far elt_tys
 314                    ) [] (group_extractor ())
 315   in
 316     (* Emit the title for this group.  *)
 317     Printf.fprintf chan "@subsubsection %s\n\n" group_title;
 318     (* Emit a description of each intrinsic.  *)
 319     List.iter (describe_intrinsic true chan) intrinsics;
 320     (* Close this group.  *)
 321     Printf.fprintf chan "\n\n"
 322
 323 let gnu_header chan =
 324   List.iter (fun s -> Printf.fprintf chan "%s\n" s) [
 325   "@c Copyright (C) 2006-2014 Free Software Foundation, Inc.";
 326   "@c This is part of the GCC manual.";
 327   "@c For copying conditions, see the file gcc.texi.";
 328   "";
 329   "@c This file is generated automatically using gcc/config/arm/neon-docgen.ml";
 330   "@c Please do not edit manually."]
 331
 332 let crypto_doc =
 333 "
 334 @itemize @bullet
 335 @item poly128_t vldrq_p128(poly128_t const *)
 336 @end itemize
 337
 338 @itemize @bullet
 339 @item void vstrq_p128(poly128_t *, poly128_t)
 340 @end itemize
 341
 342 @itemize @bullet
 343 @item uint64x1_t vceq_p64 (poly64x1_t, poly64x1_t)
 344 @end itemize
 345
 346 @itemize @bullet
 347 @item uint64x1_t vtst_p64 (poly64x1_t, poly64x1_t)
 348 @end itemize
 349
 350 @itemize @bullet
 351 @item uint32_t vsha1h_u32 (uint32_t)
 352 @*@emph{Form of expected instruction(s):} @code{sha1h.32 @var{q0}, @var{q1}}
 353 @end itemize
 354
 355 @itemize @bullet
 356 @item uint32x4_t vsha1cq_u32 (uint32x4_t, uint32_t, uint32x4_t)
 357 @*@emph{Form of expected instruction(s):} @code{sha1c.32 @var{q0}, @var{q1}, @var{q2}}
 358 @end itemize
 359
 360 @itemize @bullet
 361 @item uint32x4_t vsha1pq_u32 (uint32x4_t, uint32_t, uint32x4_t)
 362 @*@emph{Form of expected instruction(s):} @code{sha1p.32 @var{q0}, @var{q1}, @var{q2}}
 363 @end itemize
 364
 365 @itemize @bullet
 366 @item uint32x4_t vsha1mq_u32 (uint32x4_t, uint32_t, uint32x4_t)
 367 @*@emph{Form of expected instruction(s):} @code{sha1m.32 @var{q0}, @var{q1}, @var{q2}}
 368 @end itemize
 369
 370 @itemize @bullet
 371 @item uint32x4_t vsha1su0q_u32 (uint32x4_t, uint32x4_t, uint32x4_t)
 372 @*@emph{Form of expected instruction(s):} @code{sha1su0.32 @var{q0}, @var{q1}, @var{q2}}
 373 @end itemize
 374
 375 @itemize @bullet
 376 @item uint32x4_t vsha1su1q_u32 (uint32x4_t, uint32x4_t)
 377 @*@emph{Form of expected instruction(s):} @code{sha1su1.32 @var{q0}, @var{q1}, @var{q2}}
 378 @end itemize
 379
 380 @itemize @bullet
 381 @item uint32x4_t vsha256hq_u32 (uint32x4_t, uint32x4_t, uint32x4_t)
 382 @*@emph{Form of expected instruction(s):} @code{sha256h.32 @var{q0}, @var{q1}, @var{q2}}
 383 @end itemize
 384
 385 @itemize @bullet
 386 @item uint32x4_t vsha256h2q_u32 (uint32x4_t, uint32x4_t, uint32x4_t)
 387 @*@emph{Form of expected instruction(s):} @code{sha256h2.32 @var{q0}, @var{q1}, @var{q2}}
 388 @end itemize
 389
 390 @itemize @bullet
 391 @item uint32x4_t vsha256su0q_u32 (uint32x4_t, uint32x4_t)
 392 @*@emph{Form of expected instruction(s):} @code{sha256su0.32 @var{q0}, @var{q1}}
 393 @end itemize
 394
 395 @itemize @bullet
 396 @item uint32x4_t vsha256su1q_u32 (uint32x4_t, uint32x4_t, uint32x4_t)
 397 @*@emph{Form of expected instruction(s):} @code{sha256su1.32 @var{q0}, @var{q1}, @var{q2}}
 398 @end itemize
 399
 400 @itemize @bullet
 401 @item poly128_t vmull_p64 (poly64_t a, poly64_t b)
 402 @*@emph{Form of expected instruction(s):} @code{vmull.p64 @var{q0}, @var{d1}, @var{d2}}
 403 @end itemize
 404
 405 @itemize @bullet
 406 @item poly128_t vmull_high_p64 (poly64x2_t a, poly64x2_t b)
 407 @*@emph{Form of expected instruction(s):} @code{vmull.p64 @var{q0}, @var{d1}, @var{d2}}
 408 @end itemize
 409 "
 410
 411 (* Program entry point.  *)
 412 let _ =
 413   if Array.length Sys.argv <> 2 then
 414     failwith "Usage: neon-docgen <output filename>"
 415   else
 416   let file = Sys.argv.(1) in
 417     try
 418       let chan = open_out file in
 419         gnu_header chan;
 420         List.iter (document_group chan) intrinsic_groups;
 421         Printf.fprintf chan "%s\n" crypto_doc;
 422         close_out chan
 423     with Sys_error sys ->
 424       failwith ("Could not create output file " ^ file ^ ": " ^ sys)