Dict literals
[hiphop-php.git] / hphp / hack / src / format / format_hack.ml
blob161b8ec5ab3dcdd85f9df75b82560e8095aac9e6
1 (**
2 * Copyright (c) 2015, Facebook, Inc.
3 * All rights reserved.
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the "hack" directory of this source tree. An additional grant
7 * of patent rights can be found in the PATENTS file in the same directory.
9 *)
10 (*****************************************************************************)
11 (* Imported modules. *)
12 (*****************************************************************************)
13 open Core
14 open Lexer_hack
16 exception Format_error
17 exception PHP
18 exception One_line
20 (*****************************************************************************)
21 (* The precedence of the Tarrow operator (->). *)
22 (*****************************************************************************)
24 let tarrow_prec = snd (Parser_hack.get_priority Tarrow)
25 let tpipe_prec = snd (Parser_hack.get_priority Tpipe)
27 (*****************************************************************************)
28 (* The environment *)
29 (*****************************************************************************)
31 type char_kind =
32 (* The last emitted token was a new line *)
33 | Newline
35 (* The last emitted token was XHP text *)
36 | Text
38 (* The last emitted token was a space *)
39 | Space
41 (* Everything else *)
42 | Other
44 (* Absolute character position in the input file. *)
45 type char_pos = int
47 type source_tag =
48 (* Line number in the input file *)
49 | Line of int
51 (* Beginning of an indivisible formatting block *)
52 | Block
54 (* Meta-data to be able to reconcile the input file and the
55 * formatted output (useful for Format_diff)
57 type source_pos = char_pos * source_tag
59 type env = {
60 (* The number of spaces for the margin *)
61 margin : int ref ;
63 (* The last kind of token emitted *)
64 last : char_kind ref ;
66 (* The last token emitted *)
67 last_token : Lexer_hack.token ref ;
69 (* The string representing last token emitted *)
70 last_str : string ref ;
72 (* The string that must be outputted when printing
73 * the last token (can be different from last_str, cf function token) *)
74 last_out : string ref ;
76 (* The output buffer *)
77 buffer : Buffer.t ;
79 (* The path of the current file *)
80 file : Path.t ;
82 (* The state of the lexer *)
83 lexbuf : Lexing.lexbuf ;
85 (* The line number of the input *)
86 lb_line : int ref ;
88 (** The number of non-whitespace tokens consumed in this input line.
90 * This is needed for pretty, optional linebreaks for the pipe operator.
92 * For example, these linebreaks between the pipes in the input
93 * are preserved in the output, despite never exceeding 80 char width:
94 * $a = f()
95 * |> g()
96 * |> h()
98 * See also pipe operator tests.
100 input_line_non_ws_token_count : int ref ;
102 (* The precedence of the current binary operator (0 otherwise) *)
103 priority : int ;
105 (* The output character position (from the beginning of the line) *)
106 char_pos : int ref ;
108 (* The output absolute character position *)
109 abs_pos : int ref ;
111 (* The char position after which we break (typically 80).
112 * It can be different from char_size, because in some cases we need
113 * room to leave the semi-colon on the same line.
115 char_break : int ;
116 char_size : int ;
118 (* The precedence of the operator we should try to break *)
119 break_on : int ;
121 (* The output line number *)
122 line : int ref ;
124 (* > 0 if the current try failed
125 * = 0 if the current not trying anything
126 * < 0 if we should not bother trying but force to break
127 * You should checkout the module Try if this doesn't make sense.
129 failed : int ref ;
131 (* The depth of nested try_outer we are currently in *)
132 try_depth : int ;
134 (* True if we are trying to emit something that must fit on one line *)
135 one_line : bool ;
137 (* True if we should set env.failed on failure *)
138 report_fit : bool ;
140 (* True if we are in an attribute (<< ... >>) *)
141 in_attr : bool ;
143 (* The amount of spaces that must be emitted on the next token
144 * (this is for the margin).
146 spaces : int ref ;
148 (* True if we should stop after a certain position. *)
149 stop : int ;
151 (* True when we should not emit to the buffer (useful for region mode) *)
152 silent : bool ref ;
154 (* The beginning of the region we are trying to format. *)
155 from : int ;
157 (* The end of the region we are trying to format. *)
158 to_ : int ;
160 (* When the "keep_source_pos" option is turned on,
161 * the formatter outputs extra tags in the field source_pos.
162 * There are 2 kinds of tags 'Line and 'Block'
163 * (these tags are used by Format_diff).
165 * The tags of the form 'Line' should be read as: at this
166 * point in the text, the input line number was LINE_NUMBER
167 * (useful to reconcile input/output line numbers).
169 * The tags of the form 'Block' should be read as: we have reached the
170 * beginning or the end of an indivisible block.
172 * This information is useful to know which pieces can be formatted
173 * separately. For example, let's consider the following diff:
174 * $x = array(
175 * - 1,
176 * + 23,
177 * 2,
178 * );
179 * It doesn't make sense to only format the line that changed.
180 * The formatter is probably going to regroup the entire array on
181 * one line (because it fits).
182 * Thanks to these extra tags, we know that we have to treat the entire
183 * array as an indivisible entity.
185 keep_source_pos : bool ;
186 source_pos_l : source_pos list ref ;
188 (* When no_trailing_commas is false (default), multiline comma separated items
189 include a trailing comma, when it is false, we omit trailing commas. The
190 standard php parser does not support trailing commas, but hhvm does.
192 no_trailing_commas : bool ;
195 (*****************************************************************************)
196 (* The "saved" environment (to allow backtracking). *)
197 (*****************************************************************************)
199 type saved_env = {
200 sv_margin : int ;
201 sv_last : char_kind ;
202 sv_last_token : Lexer_hack.token ;
203 sv_last_str : string ;
204 sv_last_out : string ;
205 sv_buffer : Buffer.t ;
206 sv_lexbuf : Parser_hack.saved_lb ;
207 sv_lb_line : int ;
208 sv_char_pos : int ;
209 sv_abs_pos : int ;
210 sv_line : int ;
211 sv_failed : int ;
212 sv_spaces : int ;
213 sv_silent : bool ;
214 sv_source_pos_l : source_pos list ;
217 let empty file lexbuf from to_ keep_source_pos no_trailing_commas = {
218 margin = ref 0 ;
219 last = ref Newline ;
220 last_token = ref Terror ;
221 last_str = ref "" ;
222 last_out = ref "" ;
223 buffer = Buffer.create 256 ;
224 file = file ;
225 lexbuf = lexbuf ;
226 lb_line = ref 1 ;
227 input_line_non_ws_token_count = ref 0 ;
228 priority = 0 ;
229 char_pos = ref 0 ;
230 abs_pos = ref 0 ;
231 char_size = 80 ;
232 char_break = 80 ;
233 break_on = max_int ;
234 line = ref 0 ;
235 report_fit = false ;
236 failed = ref 0 ;
237 try_depth = 0 ;
238 one_line = false ;
239 in_attr = false ;
240 spaces = ref 0 ;
241 stop = max_int ;
242 silent = ref false ;
243 from ;
244 to_ ;
245 keep_source_pos ;
246 source_pos_l = ref [] ;
247 no_trailing_commas = no_trailing_commas ;
250 (* Saves all the references of the environment *)
251 let save_env env =
252 let { margin; last; last_token; buffer; file; lexbuf; lb_line;
253 input_line_non_ws_token_count;
254 priority; char_pos; abs_pos; char_break;
255 char_size; silent; one_line;
256 last_str; last_out; keep_source_pos; source_pos_l;
257 break_on; line; failed; try_depth; spaces;
258 report_fit; in_attr; stop; from; to_; no_trailing_commas } = env in
259 { sv_margin = !margin;
260 sv_last = !last;
261 sv_buffer = env.buffer;
262 sv_last_token = !last_token;
263 sv_last_str = !last_str;
264 sv_last_out = !last_out;
265 sv_lexbuf = Parser_hack.save_lexbuf_state lexbuf;
266 sv_lb_line = !lb_line;
267 sv_char_pos = !char_pos;
268 sv_abs_pos = !abs_pos;
269 sv_line = !line;
270 sv_failed = !failed;
271 sv_spaces = !spaces;
272 sv_silent = !silent;
273 sv_source_pos_l = !source_pos_l;
276 let restore_env env saved_env =
277 Parser_hack.restore_lexbuf_state env.lexbuf saved_env.sv_lexbuf;
278 env.lb_line := saved_env.sv_lb_line;
279 env.margin := saved_env.sv_margin;
280 env.last := saved_env.sv_last;
281 env.last_token := saved_env.sv_last_token;
282 env.last_str := saved_env.sv_last_str;
283 env.last_out := saved_env.sv_last_out;
284 env.char_pos := saved_env.sv_char_pos;
285 env.abs_pos := saved_env.sv_abs_pos;
286 env.line := saved_env.sv_line;
287 env.failed := saved_env.sv_failed;
288 env.spaces := saved_env.sv_spaces;
289 env.silent := saved_env.sv_silent;
290 env.source_pos_l := saved_env.sv_source_pos_l;
291 { env with buffer = saved_env.sv_buffer }
293 (*****************************************************************************)
294 (* Consumes the next token.
295 * The logic is a bit complex because of the regions.
296 * If hh_format is called with -from -to options, we want to start/stop
297 * emitting text depending on the position.
298 * The problem is that -from could point to the middle of a token.
299 * When that happens we split the token in 2, the relevant part (the one
300 * we want to emit) is kept in env.last_out, the full string is kept in
301 * env.last_str.
302 * Both env.last_out/env.last_str are useful:
303 * -) env.last_out is used to emit the token (that's why we use the function
304 * last_token instead of emitting a string directly)
305 * -) env.last_str is used for the logic of the parser (we can't use a
306 * truncated token for that)
308 (*****************************************************************************)
310 let make_tokenizer next_token env =
311 let pos = env.lexbuf.Lexing.lex_curr_pos in
312 if pos >= env.stop then Teof else
313 let tok = next_token env.lexbuf in
314 let new_pos = env.lexbuf.Lexing.lex_curr_pos in
315 env.silent := (new_pos <= env.from || pos >= env.to_ - 1);
316 env.last_token := tok;
317 let str_value = Lexing.lexeme env.lexbuf in
318 env.last_str := str_value;
319 (* Splitting the token (-from) *)
320 if pos < env.from && new_pos > env.from
321 then begin
322 let sub_size = new_pos - env.from + 1 in
323 let str_size = String.length str_value in
324 let start = str_size - sub_size in
325 env.last_out := String.sub str_value start sub_size;
327 (* Splitting the token (-to) *)
328 else if pos < env.to_ - 1 && new_pos >= env.to_ - 1
329 then begin
330 let sub_size = env.to_ - pos - 1 in
331 env.last_out := String.sub str_value 0 sub_size;
333 else env.last_out := str_value;
334 (match tok with
335 | Tnewline ->
336 env.lb_line := !(env.lb_line) + 1;
337 env.input_line_non_ws_token_count := 0
338 | Tspace -> ()
339 | _ ->
340 env.input_line_non_ws_token_count :=
341 !(env.input_line_non_ws_token_count) + 1
345 (* Normal tokenizer *)
346 let token = make_tokenizer Lexer_hack.format_token
348 (* XHP tokenizer *)
349 let xhp_token = make_tokenizer Lexer_hack.format_xhptoken
351 (* Comment tokenizer *)
352 let comment_token = make_tokenizer Lexer_hack.format_comment
354 (*****************************************************************************)
355 (* Backtracking. *)
356 (*****************************************************************************)
358 let back env =
359 if !(env.last_token) = Tnewline
360 then env.lb_line := !(env.lb_line) - 1;
361 env.last_token := Terror;
362 env.input_line_non_ws_token_count :=
363 max 0 (!(env.input_line_non_ws_token_count) - 1);
364 Lexer_hack.back env.lexbuf
366 (*****************************************************************************)
367 (* Primitives used to look ahead. *)
368 (*****************************************************************************)
370 (* Attempt does not modify the state of the environment *)
371 let attempt env f =
372 let buffer = Buffer.create 256 in
373 let saved_env = save_env env in
374 let f_result = f { env with buffer } in
375 let _ = restore_env env saved_env in
376 f_result
378 let attempt_keep_lines env f =
379 attempt env begin fun env ->
380 let buffer = Buffer.create 256 in
381 let env = { env with buffer } in
382 let line = !(env.line) in
383 let _ = f { env with report_fit = true } in
384 let nbr_lines = !(env.line) - line in
385 buffer, nbr_lines, !(env.failed)
388 (*****************************************************************************)
389 (* Primitives for branching.
391 * The branching logic always tries to break the outer-most expression.
393 * For example:
394 * array(array(array(...)))
395 * should be first rewritten by trying:
396 * array(
397 * array(array(...))
399 * Then:
400 * array(
401 * array(
402 * array(...)
406 * However, the logic is a bit complicated because the algorithm is
407 * exponential and that becomes a problem on very large nested arrays.
408 * The solution consist in breaking multiple layers at once when a
409 * certain depth is reached.
411 * Let's consider: array(array(.. array N times ))
412 * If the array breaks at a depth larger than 6 we directly try:
413 * array(
414 * array(
415 * array( ... N/2 times
417 * We preemptively break the array N/2 times, to avoid the exponential.
419 (*****************************************************************************)
421 module Try: sig
423 val one_line: env -> (env -> unit) -> (env -> unit) -> unit
424 val outer: env -> (env -> unit) -> (env -> unit) -> unit
425 end = struct
427 let try_raw env f1 f2 =
428 let saved_env = save_env env in
429 let buffer = Buffer.create 256 in
430 let env = { env with buffer } in
431 let f1_result = f1 { env with report_fit = true } in
432 if !(env.failed) <= 0
433 then begin
434 Buffer.add_buffer saved_env.sv_buffer buffer;
435 f1_result
437 else begin
438 let env = restore_env env saved_env in
439 f2 env
442 let one_line env f1 f2 =
443 if env.one_line then f1 env else
444 try_raw env
445 begin fun env ->
446 try ignore (f1 { env with one_line = true })
447 with One_line ->
448 env.failed := 1
452 let outer env f1 f2 =
453 if env.try_depth > 0
454 then f1 { env with try_depth = env.try_depth + 1 }
455 else if env.try_depth < 0
456 then f2 { env with try_depth = env.try_depth + 1 }
457 else
458 let depth_failed = ref 0 in
459 let big_buffer = ref false in
460 try_raw env
461 (fun env ->
462 f1 { env with try_depth = 1 };
463 big_buffer := Buffer.length env.buffer > 10_000;
464 depth_failed := !(env.failed);
466 (fun env ->
467 if !depth_failed > 6
468 then f2 { env with try_depth = - (!depth_failed / 2) }
469 else if !big_buffer
470 then f2 { env with try_depth = - 3 }
471 else f2 env)
474 (*****************************************************************************)
475 (* Scoring functions.
476 * There are cases where multiple choices could fit. When that's the case,
477 * we pick the one that "looks" nicer.
478 * The "looks" function is pretty subjective ;-)
480 (*****************************************************************************)
482 let rec aligned last_tok count lexbuf =
483 match Lexer_hack.format_token lexbuf with
484 | Teof -> count
485 | Tspace -> aligned last_tok count lexbuf
486 | _ ->
487 let tok = Lexing.lexeme lexbuf in
488 if last_tok = ")" && tok = "->" then -100 else
489 let count = if last_tok = tok && last_tok = "->"
490 then count + 1 else count in
491 aligned_look_for_newline tok count lexbuf
493 and aligned_look_for_newline last_tok count lexbuf =
494 match Lexer_hack.format_token lexbuf with
495 | Teof -> count
496 | Tnewline -> aligned last_tok count lexbuf
497 | _ -> aligned_look_for_newline last_tok count lexbuf
499 let keep_best env f1 f2 =
500 if env.one_line then f1 env else
501 let env = { env with try_depth = 0 } in
502 let buffer1, nbr_lines1, failed1 = attempt_keep_lines env f1 in
503 let buffer2, nbr_lines2, failed2 = attempt_keep_lines env f2 in
504 if failed1 > 0 then f2 env else
505 if failed2 > 0 then f1 env else
506 let buffer1 = Buffer.contents buffer1 in
507 let buffer2 = Buffer.contents buffer2 in
508 (* The logic to select the best solution *)
509 let aligned_count1 = aligned "" 0 (Lexing.from_string buffer1) in
510 let aligned_count2 = aligned "" 0 (Lexing.from_string buffer2) in
511 if aligned_count2 > aligned_count1 then f2 env else
512 if aligned_count1 < aligned_count2 then f1 env else
513 if nbr_lines1 <= nbr_lines2 then f1 env else f2 env
515 (*****************************************************************************)
516 (* Returns the current position in the buffer. *)
517 (*****************************************************************************)
519 let get_pos env =
520 env.lexbuf.Lexing.lex_curr_pos
522 (*****************************************************************************)
523 (* Pretty printing primitives.
524 * We don't want to maintain the state of pretty-printer all the time.
525 * This module keeps track of what the margin should be (adds spaces when
526 * needed), removes spaces when they are followed by a new line etc ...
528 (*****************************************************************************)
530 module Pp: sig
532 val out: string -> env -> unit
533 val last_token: env -> unit
534 val margin_set: int -> env -> (env -> 'a) -> 'a
535 val right: env -> (env -> 'a) -> 'a
536 val right_fun: (env -> 'a) -> env -> 'a
537 val right_n: int -> env -> (env -> 'a) -> 'a
538 val force_nl: env -> unit
539 val newline: env -> unit
540 val space: env -> unit
541 val keep_space: env -> unit
543 end = struct
545 let buf_add_char env c =
546 if not !(env.silent) then begin
547 Buffer.add_char env.buffer c
550 let buf_add_string env s =
551 if not !(env.silent) then begin
552 Buffer.add_string env.buffer s
555 let add_char_pos env n =
556 env.char_pos := !(env.char_pos) + n;
557 env.abs_pos := !(env.abs_pos) + n;
558 if env.report_fit && !(env.char_pos) >= env.char_break then begin
559 if env.one_line then raise One_line;
560 env.failed := max 1 (max !(env.failed) env.try_depth)
561 end;
564 let add_char env c =
565 buf_add_char env c;
566 add_char_pos env 1
568 let add_string env s =
569 buf_add_string env s;
570 add_char_pos env (String.length s)
572 let force_nl env =
573 env.char_pos := 0;
574 env.last := Newline;
575 env.line := !(env.line) + 1;
576 env.spaces := 0;
577 add_char env '\n';
578 if env.keep_source_pos then begin
579 let source_pos = !(env.abs_pos), Line !(env.lb_line) in
580 env.source_pos_l := source_pos :: !(env.source_pos_l)
583 let newline env =
584 if env.one_line then raise One_line;
585 if !(env.last) <> Newline then force_nl env
587 let space env =
588 if !(env.last) <> Space then begin
589 env.last := Space;
590 env.spaces := !(env.spaces) + 1;
593 let keep_space env =
594 assert (!(env.last_token) = Tspace);
595 let str = !(env.last_out) in
596 env.last := Space;
597 String.iter (fun c -> assert (c = ' ')) str;
598 env.spaces := !(env.spaces) + String.length str
600 let right_n n env f =
601 env.margin := !(env.margin) + n;
602 let result = f env in
603 env.margin := !(env.margin) - n;
604 result
606 let margin_set n env f =
607 let margin_cpy = !(env.margin) in
608 env.margin := n;
609 let result = f env in
610 env.margin := margin_cpy;
611 result
613 let right env f = right_n 2 env f
614 let right_fun f = fun env -> right env f
616 let out s env =
617 if !(env.last) = Newline then env.spaces := !(env.margin);
618 for i = 0 to !(env.spaces) - 1 do
619 add_char env ' '
620 done;
621 env.spaces := 0;
622 add_string env s;
623 env.last := Other;
626 let last_token env =
627 out !(env.last_out) env
631 open Pp
633 (*****************************************************************************)
634 (* Some helpers to regroup sequences of pretty-printing functions. *)
635 (*****************************************************************************)
637 let rec seq env = function
638 | [] -> ()
639 | f :: rl -> f env; seq env rl
641 let seq_fun l = fun env -> seq env l
643 let line env l =
644 seq env l;
645 newline env
647 let out_next env =
648 ignore (token env);
649 last_token env
651 let ignore_ f env = ignore (f env)
653 (*****************************************************************************)
654 (* Precedence of binary operators.
655 * We need to maintain that information to break expressions with the lowest
656 * precedence first.
657 * Example: 1 * 2 * 3 + 4
658 * We must first try:
659 * 1 * 2 * 3 +
661 * Before we try to to break (1 * 2 * 3).
662 * These functions keep track the precedence of the current operator to later
663 * on prioritize in what order we will break an expression (when necessary).
665 (*****************************************************************************)
667 let set_priority env priority =
668 { env with priority }
670 let reset_priority env =
671 { env with priority = 0 }
673 let with_priority env op f =
674 let _, prio = Parser_hack.get_priority op in
675 let env = set_priority env prio in
676 f env
678 (*****************************************************************************)
679 (* Add block tag. Used for --diff mode.
680 * We don't have to worry about Opening or Closing blocks, because the logic
681 * is: whatever is in between 2 blocks is indivisible.
682 * Why is that? Because the place where we add the block tag are the places
683 * where we know it's acceptable to break the indentation.
684 * Think of it this way: block tags tell us where we can break the formatting
685 * given that, whatever is in between two block tags is indivisible.
687 * Note that we only insert Block tags where the existing code has a line
688 * break. See Format_diff.TextBlocks for more details.
690 (*****************************************************************************)
692 let add_block_tag env =
693 assert (!(env.last) = Newline);
694 if env.keep_source_pos && attempt env token = Tnewline then begin
695 let source_pos = !(env.abs_pos), Block in
696 env.source_pos_l := source_pos :: !(env.source_pos_l)
699 (*****************************************************************************)
700 (* Comments *)
701 (*****************************************************************************)
703 let rec skip_spaces env =
704 match token env with
705 | Teof -> ()
706 | Tspace -> skip_spaces env
707 | _ -> back env
709 let rec skip_spaces_and_nl env =
710 match token env with
711 | Teof -> ()
712 | Tspace | Tnewline -> skip_spaces_and_nl env
713 | _ -> back env
715 let rec comment env =
716 right_n 1 env comment_loop
718 and comment_loop env =
719 match comment_token env with
720 | Teof -> ()
721 | Tclose_comment ->
722 last_token env;
723 | Tnewline ->
724 newline env;
725 skip_spaces env;
726 comment_loop env;
727 | Tspace ->
728 keep_space env;
729 comment_loop env
730 | _ ->
731 last_token env;
732 comment_loop env
734 let rec line_comment env =
735 line_comment_loop env
737 and line_comment_loop env =
738 match token env with
739 | Teof -> ()
740 | Tnewline -> back env
741 | Tspace -> keep_space env; line_comment_loop env
742 | _ -> last_token env; line_comment_loop env
744 (*****************************************************************************)
745 (* Generic handling of newlines + spaces + comments.
746 * Default is:
747 * -) Newlines are removed
748 * -) Comments are preserved
749 * -) Spaces are removed
751 * There are some cases where we need to handle comments "by hand", but this
752 * logic is the one we want most of the time.
754 (*****************************************************************************)
756 let rec keep_comment env =
757 match token env with
758 | Teof -> ()
759 | Tspace -> keep_comment env
760 | Topen_comment ->
761 last_token env;
762 comment env;
763 space env
764 | Tline_comment ->
765 if !(env.last) <> Newline then space env;
766 last_token env;
767 line_comment_loop env;
768 newline env;
769 add_block_tag env
770 | _ -> back env
772 let rec generic_nsc env =
773 match !(env.last_token) with
774 | Teof -> ()
775 | Topen_comment ->
776 if !(env.last) <> Newline && !(env.last) <> Space
777 then space env;
778 last_token env;
779 comment env;
780 if attempt env is_closing_list
781 then ()
782 else space env
783 | Tline_comment ->
784 if !(env.last) <> Newline
785 then space env;
786 last_token env;
787 line_comment_loop env;
788 newline env;
789 add_block_tag env
790 | Tspace
791 | Tnewline ->
792 ignore (token env);
793 generic_nsc env
794 | _ ->
795 back env
797 and is_closing_list env =
798 match token env with
799 | Teof -> false
800 | Tspace | Tnewline -> is_closing_list env
801 | Trp | Trb | Tgt | Tcomma | Trcb | Tsc -> true
802 | _ -> false
804 (*****************************************************************************)
805 (* Wrappers for newlines, spaces and comments.
807 * Most of the time (not always), we want to look at the next "real" token, in
808 * other words: we want to skip white spaces and the comments to see what the
809 * next token looks like (and presumably decide what to do based on that).
811 (*****************************************************************************)
813 let rec wrap_non_ws env f =
814 match token env with
815 | Tnewline | Tspace ->
816 wrap_non_ws env f
817 | x -> f x
819 let rec wrap_eof env f =
820 match token env with
821 | Tnewline | Tspace | Tline_comment | Topen_comment ->
822 generic_nsc env;
823 wrap_eof env f
824 | x -> f x
826 let rec wrap_eof_xhp env f =
827 match xhp_token env with
828 | Tnewline | Tspace | Tline_comment | Topen_comment ->
829 generic_nsc env;
830 wrap_eof_xhp env f
831 | x -> f x
833 let rec wrap env f =
834 match token env with
835 | Teof -> ()
836 | Tnewline | Tspace | Tline_comment | Topen_comment ->
837 generic_nsc env;
838 wrap env f
839 | x -> f x
841 let rec wrap_xhp env f =
842 match xhp_token env with
843 | Teof -> ()
844 | Tnewline | Tspace | Tline_comment | Topen_comment ->
845 generic_nsc env;
846 wrap_xhp env f
847 | x -> f x
849 let wrap_word env f = wrap env begin function
850 | Tword -> f !(env.last_str)
851 | _ -> back env
854 let next_real_token_info ~wrap env =
855 attempt env begin fun env ->
856 wrap env begin fun tok ->
857 let tok_str = !(env.last_str) in
858 tok, tok_str
862 let next_token ?(wrap=wrap_eof) env =
863 let tok, _tok_str = next_real_token_info ~wrap env in
866 let next_token_str ?(wrap=wrap_eof) env =
867 let _tok, tok_str = next_real_token_info ~wrap env in
868 tok_str
870 let next_non_ws_token env =
871 attempt env begin fun env ->
872 wrap_non_ws env (fun tok -> tok)
875 (*****************************************************************************)
876 (* Helpers to look ahead. *)
877 (*****************************************************************************)
879 let try_words env wordl f = wrap env begin function
880 | Tword when List.mem wordl !(env.last_str) ->
881 f env
882 | _ -> back env
885 let try_word env word f = try_words env [word] f
887 let try_token env tok f = wrap env begin function
888 | tok' when tok = tok' ->
889 f env
890 | _ ->
891 back env
894 let opt_word word env = wrap env begin function
895 | Tword when !(env.last_str) = word ->
896 last_token env
897 | _ -> back env
900 let opt_tok tok env = wrap env begin function
901 | tok' when tok = tok' ->
902 last_token env
903 | _ -> back env
906 (*****************************************************************************)
907 (* There are cases where the formatter expects a token (e.g. a semi colon).
908 * If the token is not found, the whole process stops, because one of the
909 * assumption of the formatter is that we are dealing with correct Hack code
910 * (at least for now ;-)).
912 * There is a debug mode (default turned to false) that gives a lot of context
913 * on where the error was found. It's handy to leave it here in case someone
914 * else wants to do some work with the formatter.
916 (*****************************************************************************)
918 let debug = false
920 let rec mycat n env =
921 if n < 0 then () else
922 match token env with
923 | Teof -> ()
924 | _ ->
925 let n = n - (String.length !(env.last_str)) in
926 Buffer.add_string env.buffer !(env.last_str);
927 mycat n env
929 (* Used to give some context while debugging *)
930 let print_error tok_str env =
931 Buffer.add_string env.buffer !(env.last_str);
932 Buffer.add_string env.buffer "<----";
933 mycat 200 env;
934 let buffer = Buffer.contents env.buffer in
935 let buffer =
936 if String.length buffer > 400 then
937 String.sub buffer (String.length buffer - 400 -1) 400
938 else buffer
940 let error =
941 (Pos.string (Pos.make (env.file :> string) env.lexbuf))^"\n"^
942 (Printf.sprintf "Expected: %s, found: '%s'\n" tok_str !(env.last_str))^
943 buffer^"\n"
945 output_string stderr error;
946 flush stderr
948 let expect tok_str env = wrap env begin fun _ ->
949 if !(env.last_str) = tok_str
950 then last_token env
951 else begin
952 if debug then print_error tok_str env;
953 raise Format_error
957 let expect_token tok env = wrap env begin fun x ->
958 if x = tok
959 then last_token env
960 else begin
961 raise Format_error
965 let expect_xhp tok_str env = wrap_xhp env begin fun _ ->
966 if !(env.last_str) = tok_str
967 then last_token env
968 else begin
969 if debug then begin
970 print_error tok_str env;
971 flush stderr
972 end;
973 raise Format_error
977 (*****************************************************************************)
978 (* Helper functions to determine if a function has consumed tokens. *)
979 (*****************************************************************************)
981 let consume_value env f =
982 let pos_before = get_pos env in
983 let f_return = f env in
984 let pos_after = get_pos env in
985 let has_consumed = pos_before <> pos_after in
986 has_consumed, f_return
988 let has_consumed env f =
989 let result, _f_value = consume_value env f in
990 result
992 let is_followed_by env f tok_str =
993 attempt env begin fun env ->
994 has_consumed env f &&
995 next_token_str env = tok_str
998 let wrap_would_consume env f =
999 attempt env begin fun env ->
1000 wrap_eof env begin fun _ ->
1001 back env;
1002 has_consumed env f
1006 (*****************************************************************************)
1007 (* Logic preserving newlines. *)
1008 (*****************************************************************************)
1010 let empty_line env =
1011 let tok = ref Tspace in
1012 while !tok = Tspace do tok := token env done;
1013 match !tok with
1014 | Tnewline -> true
1015 | _ -> back env; false
1017 let is_empty_line env =
1018 attempt env empty_line
1020 let rec preserve_nl_space env =
1021 match token env with
1022 | Teof -> ()
1023 | Tspace ->
1024 preserve_nl_space env
1025 | Tnewline ->
1026 while is_empty_line env do
1027 ignore (empty_line env)
1028 done;
1029 back env;
1030 force_nl env;
1031 | _ ->
1032 back env
1034 let rec preserve_nl env f =
1035 match token env with
1036 | Tline_comment ->
1037 generic_nsc env;
1038 assert (token env = Tnewline);
1039 preserve_nl_space env;
1040 preserve_nl env f
1041 | Topen_comment ->
1042 generic_nsc env;
1043 newline env;
1044 add_block_tag env;
1045 preserve_nl env f
1046 | Tspace when is_empty_line env ->
1047 preserve_nl_space env;
1048 preserve_nl env f
1049 | Tspace ->
1050 preserve_nl env f
1051 | Tnewline ->
1052 preserve_nl_space env;
1053 preserve_nl env f
1054 | Teof ->
1056 | _ ->
1057 back env;
1058 f env
1060 (*****************************************************************************)
1061 (* Dealing with lists. *)
1062 (*****************************************************************************)
1064 let rec list env element = preserve_nl env begin fun env ->
1065 if has_consumed env element
1066 then (newline env; add_block_tag env; list env element)
1069 (*****************************************************************************)
1070 (* List comma separated. *)
1071 (*****************************************************************************)
1073 let rec list_comma_loop :
1074 'a. break:(env -> unit) -> ('a -> env -> 'a) -> 'a -> env -> 'a =
1075 fun ~break element acc env ->
1076 while has_consumed env begin fun env ->
1077 wrap env begin function
1078 | Topen_comment | Tline_comment -> generic_nsc env
1079 | _ -> back env
1082 do () done;
1083 let has_consumed, acc = consume_value {
1084 env with char_break = env.char_break - 1
1085 } (element acc) in
1086 if has_consumed
1087 then list_comma_loop_remain ~break element acc env
1088 else acc
1090 and list_comma_loop_remain :
1091 'a. break:(env -> unit) -> ('a -> env -> 'a) -> 'a -> env -> 'a =
1092 fun ~break element acc env ->
1093 wrap_eof env begin function
1094 | Teof -> acc
1095 | Tcomma ->
1096 let continue = wrap_would_consume env (element acc) in
1097 if continue
1098 then begin
1099 seq env [last_token; comment_after_comma ~break; break];
1100 list_comma_loop ~break element acc env
1102 else acc
1103 | _ ->
1104 back env;
1108 and comment_after_comma ~break env =
1109 match token env with
1110 | Teof -> ()
1111 | Topen_comment ->
1112 if attempt env begin fun env ->
1113 comment env;
1114 empty_line env
1116 then begin
1117 space env;
1118 last_token env;
1119 comment env;
1120 break env
1122 else begin
1123 break env;
1124 back env
1126 | Tline_comment ->
1127 space env;
1128 last_token env;
1129 line_comment_loop env;
1130 newline env
1131 | Tspace ->
1132 comment_after_comma ~break env
1133 | Tnewline ->
1134 break env
1135 | _ ->
1136 back env;
1137 break env
1139 let rec list_comma_single_comment env =
1140 let k = list_comma_single_comment in
1141 match token env with
1142 | Teof -> ()
1143 | Tspace | Tnewline -> k env
1144 | Topen_comment ->
1145 last_token env;
1146 comment_loop env;
1147 space env
1148 | _ -> back env
1150 let list_comma_single element env =
1151 list_comma_single_comment env;
1152 let f () env = element env in
1153 let () = list_comma_loop ~break:space f () env in
1156 let list_comma_multi_maybe_trail ~trailing element env =
1157 let trailing = trailing && not env.no_trailing_commas in
1158 let break = newline in
1159 let trailing = list_comma_loop ~break element trailing env in
1160 if trailing && !(env.last) <> Newline
1161 then seq env [out ","; comment_after_comma ~break]
1163 let list_comma_multi ~trailing element env =
1164 let f acc env = ignore (element env); acc in
1165 list_comma_multi_maybe_trail ~trailing f env
1167 let list_comma_multi_nl ~trailing element env =
1168 newline env;
1169 list_comma_multi ~trailing element env;
1170 newline env
1172 let list_comma ?(trailing=true) element env =
1173 Try.one_line env
1174 (list_comma_single element)
1175 (list_comma_multi ~trailing element)
1177 let list_comma_nl ?(trailing=true) element env =
1178 Try.one_line env
1179 (list_comma_single element)
1180 (list_comma_multi_nl ~trailing element)
1182 (*****************************************************************************)
1183 (* Semi colons are special because we want to keep the comments on the same
1184 * line right after them.
1186 (*****************************************************************************)
1188 let semi_colon env =
1189 expect ";" env; space env; keep_comment env
1191 (*****************************************************************************)
1192 (* The entry point *)
1193 (*****************************************************************************)
1195 type 'a return =
1196 | Disabled_mode
1197 | Parsing_error of Errors.error list
1198 | Internal_error
1199 | Success of 'a
1201 let rec entry ~keep_source_metadata ~no_trailing_commas ~modes
1202 (file : Path.t) from to_ content k =
1204 let errorl, () = Errors.do_ begin fun () ->
1205 let rp = Relative_path.(create Dummy (file :> string)) in
1206 let {Parser_hack.file_mode; _} =
1207 Parser_hack.program rp content in
1208 if not (List.mem modes file_mode) then raise PHP;
1209 end in
1210 if errorl <> []
1211 then Parsing_error errorl
1212 else begin
1213 let lb = Lexing.from_string content in
1214 let env = empty file lb from to_ keep_source_metadata no_trailing_commas in
1215 header env;
1216 Success (k env)
1218 with
1219 | PHP -> Disabled_mode
1220 | _ ->
1221 Printexc.print_backtrace stderr;
1222 Internal_error
1224 (*****************************************************************************)
1225 (* Hack header <?hh *)
1226 (*****************************************************************************)
1228 and header env = wrap env begin function
1229 | Thh | Tphp ->
1230 seq env [last_token; mode; newline];
1231 stmt_list ~is_toplevel:true env
1232 | _ -> assert false
1235 and mode env =
1236 match token env with
1237 | Tspace -> mode env
1238 | Tline_comment ->
1239 space env; last_token env;
1240 line_comment env;
1241 newline env
1242 | _ -> back env
1244 (*****************************************************************************)
1245 (* Identifiers *)
1246 (*****************************************************************************)
1248 and name env =
1249 match token env with
1250 | Teof -> ()
1251 | Tnewline | Tspace | Tline_comment | Topen_comment ->
1252 generic_nsc env;
1253 name env
1254 | _ ->
1255 back env;
1256 name_loop env
1258 and name_loop env =
1259 match token env with
1260 (* names can contain colons, but cannot end with them *)
1261 | Tcolon when attempt env begin fun env ->
1262 match token env with
1263 | Tword -> true
1264 | _ -> false
1265 end ->
1266 last_token env;
1267 name_loop env
1268 | Tpercent | Tminus | Tword | Tbslash ->
1269 last_token env;
1270 name_loop env
1271 | _ ->
1272 back env
1274 (*****************************************************************************)
1275 (* Shapes *)
1276 (*****************************************************************************)
1278 and shape_type_elt env =
1279 if has_consumed env expr
1280 then seq env [space; expect "=>"; space; hint]
1282 (*****************************************************************************)
1283 (* Constants *)
1284 (*****************************************************************************)
1286 and const env =
1287 last_token env;
1288 if attempt env begin fun env ->
1289 name env;
1290 next_token env = Teq
1292 then ()
1293 else (space env; hint env);
1294 class_members env;
1295 newline env
1297 and abs_const env =
1298 last_token env;
1299 if attempt env begin fun env ->
1300 name env;
1301 next_token env = Tsc
1303 then ()
1304 else (space env; hint env);
1305 seq env [space ; name ; expect ";"]
1307 (*****************************************************************************)
1308 (* Type Constants *)
1309 (*****************************************************************************)
1311 and is_typeconst env =
1312 attempt env begin fun env ->
1313 wrap_non_ws env begin function
1314 | Tword when !(env.last_str) = "type" ->
1315 (match next_non_ws_token env with
1316 | Teq | Tsc -> false
1317 | _ -> true
1319 | _ -> false
1323 and type_const env =
1324 seq env [last_token; space; expect "type"; space; hint; as_constraint];
1325 if next_non_ws_token env = Teq
1326 then seq env [space; expect "="; space; hint; semi_colon]
1327 else semi_colon env
1329 and abs_type_const env =
1330 seq env [last_token; space; expect "type"; space; hint];
1331 if attempt env begin fun env ->
1332 as_constraint env;
1333 next_token env = Tsc
1335 then seq env [as_constraint; semi_colon]
1336 else semi_colon env
1338 (*****************************************************************************)
1339 (* Type hints. *)
1340 (*****************************************************************************)
1342 and hint_function_params env =
1343 expect "(" env;
1344 list_comma ~trailing:true hint_function_param env;
1345 expect ")" env
1347 and hint_function_param env = wrap env begin function
1348 | Tellipsis -> last_token env
1349 | _ -> back env; hint env
1352 and taccess_loop env = wrap env begin function
1353 | Tcolcol when next_token env = Tword ->
1354 last_token env;
1355 wrap env begin function
1356 | Tword ->
1357 last_token env;
1358 taccess_loop env
1359 | _ -> back env
1361 | _ -> back env
1364 and hint env = wrap env begin function
1365 | Tplus | Tminus | Tqm | Tat | Tbslash | Tpipe ->
1366 last_token env;
1367 hint env
1368 | Tpercent | Tcolon ->
1369 last_token env;
1370 name_loop env;
1371 taccess_loop env;
1372 hint_parameter env
1373 | Tword when !(env.last_str) = "shape" ->
1374 last_token env;
1375 expect "(" env;
1376 if next_token env = Trp || attempt env begin fun env ->
1377 (* does the shape have only one element? *)
1378 shape_type_elt env;
1379 wrap_eof env begin function
1380 | Tcomma -> next_token env = Trp
1381 | Trp -> true
1382 | _ -> false
1384 end then
1385 list_comma_single shape_type_elt env
1386 else
1387 right env (list_comma_multi_nl ~trailing:true shape_type_elt);
1388 expect ")" env
1389 | Tword ->
1390 last_token env;
1391 name_loop env;
1392 taccess_loop env;
1393 typevar_constraint env;
1394 hint_parameter env
1395 | Tlp -> begin
1396 last_token env;
1397 (match token env with
1398 | Tword when !(env.last_str) = "function" ->
1399 last_token env;
1400 hint_function_params env;
1401 return_type env
1402 | _ ->
1403 back env;
1404 hint_list env);
1405 expect ")" env
1407 | _ ->
1408 back env
1411 and typevar_constraint env =
1412 try_words env ["as"; "super"] begin fun env ->
1413 space env;
1414 last_token env;
1415 space env;
1416 hint env
1417 end;
1419 and as_constraint env =
1420 try_word env "as" begin fun env ->
1421 space env;
1422 last_token env;
1423 space env;
1424 hint env
1425 end;
1427 and hint_parameter env = wrap env begin function
1428 | Tlt ->
1429 last_token env;
1430 hint_list ~trailing:false env;
1431 expect ">" env
1432 | _ -> back env
1435 and hint_list ?(trailing=true) env =
1436 list_comma ~trailing:trailing hint env
1438 (*****************************************************************************)
1439 (* Enums *)
1440 (*****************************************************************************)
1442 and enum_ env =
1443 seq env [expect_token Tword; hint_parameter; space];
1444 try_token env Tcolon (seq_fun
1445 [last_token; space; hint; as_constraint; space]);
1446 (* stmt parses any list of statements, including things like $x = 1; which
1447 * are not valid in an enum body, but since we run the parser before
1448 * formatting the text, we can be sure tha we only encounter valid enum body
1449 * statements at this point. *)
1450 stmt ~is_toplevel:false env
1452 (*****************************************************************************)
1453 (* Functions *)
1454 (*****************************************************************************)
1456 and fun_ env =
1457 seq env [opt_tok Tamp; name; hint_parameter];
1458 Try.one_line env fun_signature_single fun_signature_multi;
1459 if next_token env = Tlcb
1460 then space env;
1461 stmt ~is_toplevel:false env
1463 (*****************************************************************************)
1464 (* function foo($arg1, $arg2 ...): return_type (all on one line) *)
1465 (*****************************************************************************)
1467 and fun_signature_single env =
1468 expect "(" env;
1469 right env (list_comma_single (ignore_ fun_param));
1470 seq env [expect ")"; return_type; use]
1472 (*****************************************************************************)
1473 (* Multi line function signature (adds a trailing comma if missing, unless
1474 * the last param is variadic).
1475 * function foo(
1476 * $arg1,
1477 * ...,
1480 * There is a special case with comments, when the only thing present is a
1481 * comment, we don't want to add a trailing comma.
1483 (*****************************************************************************)
1485 and fun_signature_multi env =
1486 seq env [expect "("; newline];
1487 if next_token env = Trp
1488 then right env (fun env -> wrap env (fun _ -> back env))
1489 else right env fun_params_multi;
1490 seq env [newline; expect ")"; return_type; use]
1492 and fun_params_multi env = list_comma_multi_maybe_trail
1493 ~trailing:true
1494 begin fun trailing env ->
1495 let is_variadic = fun_param env in
1496 trailing && not is_variadic
1500 and fun_param env =
1501 let curr_pos = !(env.abs_pos) in
1502 let space_opt env =
1503 if !(env.abs_pos) != curr_pos
1504 then space env
1506 seq env [attribute; space_opt; modifier_list; space_opt; hint; space_opt];
1507 opt_tok Tamp env;
1508 let is_variadic = wrap_eof env begin function
1509 | Tellipsis -> last_token env; true
1510 | _ -> back env; false
1511 end in
1512 opt_tok Tlvar env;
1513 seq env [opt_tok Tamp; opt_tok Tellipsis; opt_tok Tlvar];
1514 try_token env Teq (seq_fun [space; last_token; space; expr]);
1515 is_variadic
1517 and return_type env =
1518 try_token env Tcolon (seq_fun [last_token; space; hint])
1520 (*****************************************************************************)
1521 (* Classes *)
1522 (*****************************************************************************)
1524 and class_ env =
1525 seq env [name; hint_parameter; class_extends; space; class_body]
1527 (*****************************************************************************)
1528 (* Class extends/implements:
1529 * class ... extends A, B, C (on the same line)
1531 (*****************************************************************************)
1533 and class_extends_single env =
1534 seq env [last_token; space; list_comma_single hint]
1536 (*****************************************************************************)
1537 (* Class extends/implements:
1539 * class ...
1540 * extends A, B, C (on a different line)
1542 * OR:
1544 * class ...
1545 * extends
1546 * A, B, C (on a different line)
1548 (*****************************************************************************)
1550 and nl_class_extends_single ~break env =
1551 newline env;
1552 let line = !(env.line) in
1553 right env begin fun env ->
1554 last_token env;
1555 break env;
1556 right env begin fun env ->
1557 list_comma_single hint env
1559 end;
1560 if line <> !(env.line) && env.report_fit
1561 then env.failed := 1
1563 (*****************************************************************************)
1564 (* Class extends/implements:
1566 * class ...
1567 * extends
1568 * A,
1571 (*****************************************************************************)
1573 and class_extends_multi env =
1574 right env begin fun env ->
1575 newline env;
1576 last_token env;
1577 newline env;
1578 right env begin fun env ->
1579 list_comma_multi ~trailing:false hint env
1583 and class_extends env = wrap_word env begin function
1584 | "extends" | "implements" ->
1585 space env;
1586 Try.one_line env
1587 class_extends_single
1588 (fun env ->
1589 Try.outer env
1590 (nl_class_extends_single ~break:space)
1591 (fun env ->
1592 Try.outer env
1593 (nl_class_extends_single ~break:newline)
1594 class_extends_multi
1597 class_extends env
1598 | _ ->
1599 back env
1602 and class_body env =
1603 expect "{" env;
1604 if next_non_ws_token env = Trcb (* Empty class body *)
1605 then expect "}" env
1606 else begin
1607 newline env;
1608 add_block_tag env;
1609 right env begin fun env ->
1610 list env class_element;
1611 end;
1612 expect "}" env;
1613 newline env
1616 and class_element env = wrap env begin function
1617 | Trcb ->
1618 back env
1619 | Tword ->
1620 newline env;
1621 class_element_word env !(env.last_str)
1622 | Tltlt ->
1623 newline env;
1624 last_token env;
1625 expr_list ~trailing:false { env with in_attr = true };
1626 expect ">" env;
1627 expect ">" env;
1628 newline env;
1629 class_element env
1630 | _ ->
1631 back env
1634 and class_element_word env = function
1635 | "function" ->
1636 seq env [space; last_token; space; fun_; newline]
1637 | "public" | "protected" | "private" | "abstract"
1638 | "final"| "static" | "async" ->
1639 back env;
1640 seq env [modifier_list; after_modifier; newline]
1641 | "const" ->
1642 if is_typeconst env
1643 then type_const env
1644 else const env
1645 | "require" ->
1646 seq env [last_token; space; class_extends; semi_colon]
1647 | "use" ->
1648 seq env
1649 [last_token; space; hint_list ~trailing:false; semi_colon; newline]
1650 | "category" ->
1651 seq env [last_token; xhp_category; semi_colon]
1652 | "attribute" ->
1653 last_token env;
1654 xhp_class_attribute_list env
1655 | "children" ->
1656 last_token env;
1657 space env;
1658 xhp_children env;
1659 semi_colon env
1660 | _ ->
1661 back env
1663 and modifier_list env =
1664 let pos_before = get_pos env in
1665 modifier env;
1666 let pos_after = get_pos env in
1667 if pos_before = pos_after then () else begin
1668 space env;
1669 modifier_list env
1672 and modifier env = try_token env Tword begin fun env ->
1673 match !(env.last_str) with
1674 | "public" | "protected" | "private" | "abstract"
1675 | "final"| "static" | "async" ->
1676 last_token env
1677 | _ -> back env
1680 and attribute env = try_token env Tltlt begin fun env ->
1681 last_token env;
1682 expr_list ~trailing:false { env with in_attr = true };
1683 expect ">" env;
1684 expect ">" env;
1687 and use env = try_word env "use" begin fun env ->
1688 seq env [space; last_token; space; expect "("; expr_list; expect ")"]
1691 and after_modifier env = wrap env begin function
1692 | Tword when !(env.last_str) = "const" ->
1693 if is_typeconst env
1694 then abs_type_const env
1695 else abs_const env
1696 | Tword when !(env.last_str) = "function" ->
1697 seq env [last_token; space; fun_]
1698 | _ ->
1699 back env;
1700 hint env;
1701 class_members env
1704 and class_members env = class_members_list class_member env
1706 and class_members_list member_handler env =
1707 Try.one_line env
1708 (class_member_list_single member_handler)
1709 (fun env -> right env (class_member_list_multi member_handler));
1710 semi_colon env
1712 and class_member_list_single member_handler env =
1713 space env;
1714 list_comma_single member_handler env
1716 and class_member_list_multi member_handler env =
1717 newline env;
1718 list_comma_multi ~trailing:false member_handler env
1720 and class_member env = wrap env begin function
1721 | Tword (* In case we are dealing with a constant *)
1722 | Tlvar ->
1723 last_token env;
1724 try_token env Teq
1725 (seq_fun [space; last_token; space; expr])
1726 | _ ->
1727 back env
1730 (*****************************************************************************)
1731 (* XHP formatting *)
1732 (*****************************************************************************)
1734 and xhp_children env = wrap env begin function
1735 | Tlp ->
1736 last_token env;
1737 right env (list_comma_nl ~trailing:false xhp_children);
1738 expect ")" env;
1739 xhp_children_post env;
1740 xhp_children_remain env
1741 | _ ->
1742 back env;
1743 name env;
1744 xhp_children_post env;
1745 xhp_children_remain env
1748 and xhp_children_post env = wrap env begin function
1749 | Tplus | Tqm | Tstar ->
1750 last_token env
1751 | _ -> back env
1754 and xhp_children_remain env = wrap env begin function
1755 | Tbar ->
1756 seq env [space; last_token; space; xhp_children]
1757 | _ -> back env
1760 and xhp_category env =
1761 space env; list_comma ~trailing:false name env
1763 and xhp_class_attribute_list env =
1764 Try.one_line env
1765 (class_member_list_single xhp_class_attribute)
1766 (fun env -> newline env; right env xhp_class_attribute_list_multi);
1767 semi_colon env
1769 and xhp_class_attribute_list_multi env = preserve_nl env begin fun env ->
1770 xhp_class_attribute env;
1771 match token env with
1772 | Tcomma ->
1773 seq env [last_token; keep_comment; newline; add_block_tag];
1774 xhp_class_attribute_list_multi env
1775 | _ -> back env
1778 and xhp_class_attribute env =
1779 Try.one_line env
1780 (xhp_class_attribute_impl ~enum_list_elts:(list_comma_single expr))
1781 (xhp_class_attribute_impl ~enum_list_elts:
1782 (fun env -> right env (list_comma_multi_nl ~trailing:false expr)))
1784 and xhp_class_attribute_impl ~enum_list_elts env =
1785 let curr_pos = !(env.abs_pos) in
1786 wrap env begin function
1787 | Tword when !(env.last_str) = "enum" ->
1788 last_token env;
1789 space env;
1790 expect "{" env;
1791 enum_list_elts env;
1792 expect "}" env
1793 | _ -> back env; hint env
1794 end;
1795 if !(env.abs_pos) != curr_pos then begin
1796 match next_token env with
1797 | Tsc | Tcomma -> ()
1798 | _ -> space env
1799 end;
1800 name env;
1801 wrap env begin function
1802 | Teq -> seq env [space; last_token; space; expr]
1803 | _ -> back env
1804 end;
1805 (match next_token env with
1806 | Tsc | Tcomma -> ()
1807 | _ -> space env);
1808 hint env
1810 (*****************************************************************************)
1811 (* XHP *)
1812 (*****************************************************************************)
1814 and is_xhp env =
1815 attempt env begin fun env ->
1816 match token env with
1817 | Tpercent | Tcolon | Tword ->
1818 name_loop env;
1819 wrap_eof_xhp env begin function
1820 | Tgt | Tword | Tslash -> true
1821 | _ -> false
1823 | _ ->
1824 false
1827 and xhp_tag_kind env =
1828 attempt env begin fun env ->
1829 expect_xhp "<" env;
1830 name env;
1831 xhp_attribute_list ~break:space env;
1832 match next_token ~wrap:wrap_eof_xhp env with
1833 | Tslash -> `Xhp_self_closing
1834 | Tgt -> `Xhp_paired
1835 | _ -> raise Format_error
1838 (* First we try inserting the XHP on the current line, e.g.
1840 * $foo = <aaaaaaaaaa>1</aaaaaaaaaa>;
1842 * If that fails to fit within char_break, then we try inserting the same XHP
1843 * on the next line:
1845 * $foo =
1846 * <aaaaaaaaaa>1</aaaaaaaaaa>;
1848 * If that *still* fails to fit, we split the XHP up into multiple lines:
1850 * $foo =
1851 * <aaaaaaaaaa>
1853 * </aaaaaaaaaa>;
1855 and xhp env =
1856 match xhp_tag_kind env with
1857 | `Xhp_self_closing ->
1858 Try.one_line env
1859 xhp_self_closing_single
1860 (fun env ->
1861 newline env;
1862 Try.one_line env
1863 xhp_self_closing_single
1864 xhp_self_closing_multi)
1865 | `Xhp_paired ->
1866 Try.one_line env
1867 xhp_paired_single
1868 (fun env ->
1869 newline env;
1870 Try.one_line env
1871 xhp_paired_single
1872 xhp_paired_multi)
1874 and xhp_self_closing_single env =
1875 seq env [expect_xhp "<"; name; xhp_attribute_list ~break:space];
1876 seq env [space; expect_xhp "/"; expect_xhp ">"];
1878 and xhp_self_closing_multi env =
1879 seq env [expect_xhp "<"; name];
1880 right env (xhp_attribute_list ~break:newline);
1881 seq env [newline; expect_xhp "/"; expect_xhp ">"];
1882 xhp_multi_post env
1884 and xhp_paired_single env =
1885 seq env [expect_xhp "<"; name; xhp_attribute_list ~break:space];
1886 seq env [expect_xhp ">"; skip_spaces_and_nl; xhp_body];
1887 env.spaces := 0;
1888 xhp_close_tag env;
1890 and xhp_paired_multi env =
1891 expect_xhp "<" env;
1892 let margin_pos = !(env.char_pos) in
1893 name env;
1894 Try.one_line env
1895 begin fun env ->
1896 xhp_attribute_list ~break:space env;
1897 expect_xhp ">" env
1899 begin fun env ->
1900 margin_set margin_pos env
1901 (xhp_attribute_list ~break:newline);
1902 expect_xhp ">" env;
1903 end;
1904 newline env;
1905 skip_spaces_and_nl env;
1906 margin_set margin_pos env begin fun env ->
1907 xhp_body env;
1908 end;
1909 newline env;
1910 xhp_close_tag env;
1911 xhp_multi_post env
1913 and xhp_multi_post env =
1914 match xhp_token env with
1915 | Tnewline | Tspace ->
1916 newline env
1917 | Tlt when is_xhp env ->
1918 back env;
1919 newline env
1920 | _ -> back env
1922 and xhp_close_tag env =
1923 seq env [expect_xhp "<"; expect_xhp "/"; name; expect_xhp ">"]
1925 and xhp_attribute_list ~break env = wrap_xhp env begin function
1926 | Tword ->
1927 break env;
1928 last_token env;
1929 Try.one_line env
1930 xhp_attribute_assign
1931 begin fun env ->
1932 expect "=" env;
1933 newline env;
1934 right env xhp_attribute_value
1935 end;
1936 xhp_attribute_list ~break env
1937 | _ ->
1938 back env
1941 and xhp_attribute_assign env =
1942 expect_xhp "=" env;
1943 xhp_attribute_value env
1945 and xhp_attribute_value env = wrap_xhp env begin function
1946 | Tquote | Tdquote as tok ->
1947 last_token env;
1948 string ~last:tok env
1949 | Tlcb ->
1950 last_token env;
1951 expr env;
1952 expect_xhp "}" env
1953 | _ ->
1954 back env
1957 (* It seems like whitespace is significant in XHP, but only insofar as it acts
1958 * as a separator of non-whitespace characters. That is, consecutive whitespace
1959 * will be rendered as a single space at runtime. Thus the handling of xhp_body
1960 * has to be slightly different from the rest of the syntax, which does not
1961 * treat whitespace as significant. In particular, we output consecutive
1962 * whitespace here as a single space, unless we have just wrapped a line, in
1963 * which case we output the necessary number of spaces required by the
1964 * indentation level. *)
1965 and xhp_body env =
1966 let k = xhp_body in
1967 match xhp_token env with
1968 | Teof -> ()
1969 | Tnewline | Tspace ->
1970 if !(env.last) <> Newline then space env;
1971 k env
1972 | Topen_xhp_comment ->
1973 last_token env;
1974 xhp_comment env;
1975 k env
1976 | Tlt when is_xhp env ->
1977 back env;
1978 xhp env;
1979 xhp_keep_one_nl env;
1980 k env;
1981 | Tlt ->
1982 back env
1983 | Tlcb ->
1984 Try.one_line env
1985 begin fun env ->
1986 last_token env;
1987 expr env;
1988 expect_xhp "}" env;
1990 begin fun env ->
1991 newline env;
1992 last_token env;
1993 expr env;
1994 expect_xhp "}" env;
1995 end;
1996 xhp_keep_one_nl env;
1997 k env
1998 | x ->
1999 let pos = !(env.char_pos) in
2000 let text = xhp_text env (Buffer.create 256) x in
2001 if pos + String.length text >= env.char_size
2002 then newline env;
2003 out text { env with report_fit = false };
2004 env.last := Text;
2005 k env
2007 (* preserves up to one empty line between XHP blocks *)
2008 and xhp_keep_one_nl env =
2009 match xhp_token env with
2010 | Teof -> ()
2011 | Tnewline ->
2012 newline env;
2013 (match xhp_token env with
2014 | Tnewline -> force_nl env
2015 | _ -> back env);
2016 while xhp_token env = Tnewline do () done;
2017 back env;
2018 | _ ->
2019 back env
2021 and xhp_text env buf = function
2022 | Tnewline | Tspace | Tlt | Tlcb | Teof | Tclose_xhp_comment ->
2023 back env;
2024 Buffer.contents buf
2025 | _ ->
2026 Buffer.add_string buf !(env.last_out);
2027 xhp_text env buf (xhp_token env)
2029 and xhp_comment env = Try.one_line env xhp_comment_single xhp_comment_multi
2031 and xhp_comment_single env =
2032 seq env [xhp_comment_body; expect_xhp "-->"]
2034 and xhp_comment_multi env =
2035 newline env;
2036 right env xhp_comment_body;
2037 newline env;
2038 expect_xhp "-->" env
2040 and xhp_comment_body env =
2041 match xhp_token env with
2042 | Teof -> ()
2043 | Tnewline | Tspace ->
2044 if !(env.last) <> Newline then space env;
2045 xhp_comment_body env
2046 | Tclose_xhp_comment ->
2047 back env
2048 | Tlt | Tlcb ->
2049 last_token env;
2050 xhp_comment_body env
2051 | x ->
2052 let pos = !(env.char_pos) in
2053 let text = xhp_text env (Buffer.create 256) x in
2054 if pos + String.length text >= env.char_size
2055 then newline env;
2056 out text env;
2057 env.last := Text;
2058 xhp_comment_body env
2060 (*****************************************************************************)
2061 (* Statements *)
2062 (*****************************************************************************)
2064 and stmt ~is_toplevel env = wrap env begin function
2065 | Tltlt ->
2066 line { env with in_attr = true }
2067 [last_token; expr_list ~trailing:false; expect ">"; expect ">"];
2068 stmt ~is_toplevel env
2069 | Tword ->
2070 let word = !(env.last_str) in
2071 stmt_word ~is_toplevel env word
2072 | Tlcb ->
2073 last_token env;
2074 if next_non_ws_token env <> Trcb then begin
2075 seq env [space; keep_comment; newline];
2076 add_block_tag env;
2077 right env (stmt_list ~is_toplevel);
2078 end;
2079 expect "}" env;
2080 | Tsc ->
2081 seq env [last_token; space; keep_comment; newline]
2082 | _ ->
2083 back env;
2084 if has_consumed env expr
2085 then semi_colon env
2088 and stmt_word ~is_toplevel env word =
2089 match word with
2090 | "type" | "newtype" | "namespace" | "use"
2091 | "abstract" | "final" | "interface" | "const"
2092 | "class" | "trait" | "function" | "async" | "enum" as word ->
2093 if is_toplevel
2094 then stmt_toplevel_word env word
2095 else back env
2096 | "public" | "protected" | "private" | "case" | "default" ->
2097 back env
2098 | "print" | "echo"
2099 | "require" | "require_once" | "include" | "include_once" ->
2100 seq env [last_token; space];
2101 right env (list_comma_nl ~trailing:false expr);
2102 semi_colon env
2103 | "throw" ->
2104 seq env [last_token; space; expr; semi_colon]
2105 | "break" | "continue" | "return" ->
2106 last_token env;
2107 if wrap_would_consume env expr
2108 then rhs_assign env;
2109 semi_colon env;
2110 | "static" when next_token env <> Tcolcol ->
2111 seq env [last_token; space];
2112 Try.one_line env
2113 (seq_fun [space; list_comma_single expr])
2114 (seq_fun [newline; right_fun (list_comma_multi ~trailing:false expr)]);
2115 semi_colon env
2116 | "if" ->
2117 last_token env;
2118 if_ ~is_toplevel env
2119 | "do" ->
2120 seq env [last_token; block];
2121 seq env [space; expect "while"; space; expr_paren; opt_tok Tsc]
2122 | "while" ->
2123 seq env [last_token; space; expr_paren; block; newline]
2124 | "for" ->
2125 last_token env;
2126 for_loop env
2127 | "switch" ->
2128 last_token env;
2129 switch env
2130 | "foreach" ->
2131 last_token env;
2132 foreach env;
2133 | "try" ->
2134 seq env [last_token; space; block; space];
2135 catch_list env
2136 | _ ->
2137 back env;
2138 seq env [expr; semi_colon]
2140 and stmt_toplevel_word env = function
2141 | "abstract" | "final" | "async" ->
2142 seq env [last_token; space; stmt ~is_toplevel:true]
2143 | "interface" | "class" | "trait" ->
2144 seq env [last_token; space; class_]
2145 | "enum" ->
2146 seq env [last_token; space; enum_]
2147 | "function" ->
2148 seq env [last_token; space; fun_]
2149 | "const" ->
2150 const env
2151 | "type" | "newtype" ->
2152 seq env [last_token; space; hint; as_constraint; space;
2153 expect "="; space];
2154 hint env;
2155 semi_colon env;
2156 | "namespace" ->
2157 last_token env;
2158 namespace env
2159 | "use" ->
2160 last_token env;
2161 namespace_use_list env;
2162 | _ ->
2163 back env
2165 and stmt_list ~is_toplevel env =
2166 (* -1 for the trailing semicolon *)
2167 let env = {env with char_break = min env.char_break (env.char_size - 1)} in
2168 list env (stmt ~is_toplevel)
2170 and block ?(is_toplevel=false) env = wrap env begin function
2171 | Tlcb ->
2172 seq env [space; last_token; space; keep_comment; newline];
2173 add_block_tag env;
2174 right env (stmt_list ~is_toplevel);
2175 expect "}" env
2176 | _ ->
2177 back env;
2178 newline env;
2179 right env (stmt ~is_toplevel)
2182 (*****************************************************************************)
2183 (* If statement *)
2184 (*****************************************************************************)
2186 and if_ ~is_toplevel env =
2187 seq env [space; expr_paren; block ~is_toplevel; else_ ~is_toplevel]
2189 and else_ ~is_toplevel env =
2190 match next_token_str env with
2191 | "else" | "elseif" ->
2192 space env;
2193 else_word ~is_toplevel env;
2194 else_ ~is_toplevel env
2195 | _ -> newline env
2197 and else_word ~is_toplevel env = wrap_word env begin function
2198 | "else" ->
2199 seq env [last_token; space];
2200 wrap_word env (function
2201 | "if" -> seq env [last_token; space; expr_paren; space]
2202 | _ -> back env);
2203 block ~is_toplevel env;
2204 | "elseif" ->
2205 seq env [out "else"; space; out "if"; space; expr_paren; space];
2206 block ~is_toplevel env;
2207 | _ -> assert false
2210 (*****************************************************************************)
2211 (* Namespaces *)
2212 (*****************************************************************************)
2214 and namespace env =
2215 seq env [space; name];
2216 wrap env begin function
2217 | Tsc -> back env; semi_colon env;
2218 | Tlcb ->
2219 space env; last_token env; newline env;
2220 right env (stmt_list ~is_toplevel:true);
2221 expect "}" env
2222 | _ ->
2223 expect ";" env
2226 and namespace_use_list env =
2227 seq env [space; opt_word "const"; opt_word "function"; space;];
2228 let is_group_use = attempt env begin fun env ->
2229 name env;
2230 match next_token_str env with
2231 | "{" -> true
2232 | _ -> false
2233 end in
2234 if is_group_use then seq env [name; expect "{"];
2235 right env (list_comma_nl ~trailing:false namespace_use);
2236 let rem =
2237 if is_group_use then [expect "}"; semi_colon;]
2238 else [semi_colon] in
2239 seq env rem
2241 and namespace_use env =
2242 let next = match next_token_str env with
2243 | "const"
2244 | "function" as x -> [opt_word x; space; name;]
2245 | _ -> [name] in
2246 seq env next;
2247 if next_token_str env = "as" then seq env [space; expect "as"; space; name;];
2250 (*****************************************************************************)
2251 (* Foreach loop *)
2252 (*****************************************************************************)
2254 and foreach env =
2255 seq env [space; expect "("];
2256 margin_set (!(env.char_pos) - 1) env foreach_as;
2257 expect ")" env;
2258 block env;
2259 newline env
2261 and foreach_as env =
2262 seq env [expr; space; opt_word "await"; space; expect "as"];
2263 Try.outer env
2264 (fun env -> seq env [space; expr; arrow_opt])
2265 (fun env -> seq env [newline; expr; arrow_opt])
2267 (*****************************************************************************)
2268 (* For loop *)
2269 (*****************************************************************************)
2271 and for_loop env =
2272 seq env [space; expect "("];
2273 (* the expr_list at toplevel adds newlines before and after the list, which
2274 * we don't want *)
2275 let expr_list = list_comma ~trailing:false expr in
2276 let for_exprs ~break = begin fun env ->
2277 seq env [expr_list; semi_colon];
2278 seq env [break; expr_list; semi_colon];
2279 seq env [break; expr_list]
2280 end in
2281 Try.one_line env
2282 (for_exprs ~break:space)
2283 begin fun env ->
2284 newline env;
2285 right env (for_exprs ~break:newline);
2286 newline env;
2287 end;
2288 seq env [expect ")"; block; newline]
2290 (*****************************************************************************)
2291 (* Switch statement *)
2292 (*****************************************************************************)
2294 and switch env =
2295 seq env [space; expr_paren; space];
2296 line env [expect "{"];
2297 add_block_tag env;
2298 case_list env;
2299 line env [expect "}"]
2301 and case_list env =
2302 right env begin fun env ->
2303 list env case
2306 and case env =
2307 wrap env begin function
2308 | Trcb ->
2309 back env
2310 | Tword ->
2311 case_word env !(env.last_str)
2312 | _ -> back env
2315 and case_word env = function
2316 | "case" ->
2317 seq env [last_token; space; expr; expect ":"; space; keep_comment;
2318 newline];
2319 right env (stmt_list ~is_toplevel:false)
2320 | "default" ->
2321 seq env [last_token; expect ":"; keep_comment; newline];
2322 right env (stmt_list ~is_toplevel:false)
2323 | _ ->
2324 back env
2326 and catch_list env = wrap_word env begin function
2327 | "catch" ->
2328 last_token env;
2329 catch_remain env;
2330 (match next_token_str env with
2331 | "catch" | "finally" ->
2332 space env; catch_list env
2333 | _ -> newline env)
2334 | "finally" ->
2335 last_token env;
2336 block env;
2337 newline env
2338 | _ -> back env
2341 and catch_remain env =
2342 seq env [space; expect "("; (ignore_ fun_param); expect ")"; block]
2344 (*****************************************************************************)
2345 (* Expressions *)
2346 (*****************************************************************************)
2348 and rhs_assign env =
2349 wrap env begin function
2350 | Theredoc ->
2351 last_token env;
2352 heredoc env
2353 (* XHP *)
2354 | Tlt ->
2355 back env;
2356 Try.one_line env
2357 (fun env -> space env; expr env)
2358 (fun env -> newline env; right env expr)
2359 | Tword when
2360 !(env.last_str) = "array" || !(env.last_str) = "shape" ||
2361 !(env.last_str) = "tuple" ->
2362 back env;
2363 space env; expr env
2364 | Tword when next_token env = Tlcb ->
2365 back env;
2366 space env; expr env
2367 | _ ->
2368 back env;
2369 keep_best env
2370 begin fun env ->
2371 let line = !(env.line) in
2372 space env;
2373 let lowest_pri = expr_lowest env in
2374 if lowest_pri > 0 &&
2375 (lowest_pri != tarrow_prec
2376 || lowest_pri != tpipe_prec) &&
2377 line <> !(env.line)
2378 then env.failed := 1;
2380 begin fun env ->
2381 newline env;
2382 right env expr;
2386 and expr_paren env =
2387 expect "(" env;
2388 (* an expr_paren is usually followed by `) {`, so take that into account *)
2389 let env = {env with char_break = min env.char_break (env.char_size - 3)} in
2390 margin_set (!(env.char_pos) - 1) env expr;
2391 expect ")" env
2393 and expr_break_tarrow env =
2394 let env = { env with break_on = tarrow_prec } in
2395 expr_atomic env;
2396 right env (fun env -> ignore (expr_remain_loop 0 env))
2398 and expr_break_tpipe env =
2399 let env = { env with break_on = tpipe_prec } in
2400 expr_atomic env;
2401 right env (fun env -> ignore (expr_remain_loop 0 env))
2403 and expr env =
2404 let break_on = ref 0 in
2405 (** We try to shove as much of an expression as possible onto one line. If
2406 * that succeeds, good. If not, we add a linebreak at the lowest-priority
2407 * operator encountered. This ensures that things of higher priority
2408 * are kept on the same line.*)
2409 Try.outer env
2410 begin fun env ->
2411 let line = !(env.line) in
2412 let lowest = expr_lowest env in
2413 break_on := lowest;
2414 if !(env.failed) <= 0 && line <> !(env.line) && lowest > 0
2415 then env.failed := max 1 (max !(env.failed) env.try_depth);
2417 begin fun env ->
2418 (** The linebreak is inserted by setting "break_on" in the env and
2419 * trying the expression output again. When the operator with that
2420 * "break_on" priority is encountered, that's when the break will be
2421 * output. *)
2422 let break_on = !break_on in
2423 if break_on = tarrow_prec (* Operator -> is special *)
2424 then keep_best env ignore_expr_lowest expr_break_tarrow
2425 else if break_on = tpipe_prec (* Operator |> is special *)
2426 then keep_best env ignore_expr_lowest expr_break_tpipe
2427 else ignore_expr_lowest { env with break_on };
2431 and ignore_expr_lowest env =
2432 ignore (expr_lowest env)
2434 and expr_lowest env =
2435 let env = reset_priority env in
2436 expr_atomic env;
2437 expr_remain_loop 0 env
2439 and expr_remain_loop lowest env =
2440 let pos_before = get_pos env in
2441 let lowest = expr_remain lowest env in
2442 let pos_after = get_pos env in
2443 if pos_before = pos_after
2444 then lowest
2445 else expr_remain_loop lowest env
2447 and expr_list ?(trailing=true) env =
2448 list_comma_nl ~trailing expr { env with break_on = 0; priority = 0 }
2450 and expr_binop lowest str_op op env =
2451 with_priority env op begin fun env ->
2452 space env;
2453 out str_op env;
2454 if env.priority = env.break_on
2455 then newline env
2456 else space env;
2457 expr_atomic env;
2458 let lowest =
2459 if lowest = 0 then env.priority else
2460 if env.priority = 0 then lowest
2461 else min env.priority lowest in
2462 expr_remain_loop lowest env
2465 and expr_binop_arrow lowest str_op tok env =
2466 with_priority env tok begin fun env ->
2467 if env.priority = env.break_on
2468 then begin
2469 seq env [newline; out str_op];
2471 else out str_op env;
2472 wrap env begin function
2473 | Tword ->
2474 last_token env
2475 | Tlcb -> (* $xx->{...} *)
2476 last_token env;
2477 expr env;
2478 expect "}" env
2479 | _ ->
2480 back env;
2481 expr_atomic env
2482 end;
2483 let lowest =
2484 if lowest = 0 then env.priority else
2485 min env.priority lowest in
2486 expr_remain_loop lowest env
2489 (** The pipe expression may have linebreaks inserted because that is the
2490 * operator precedence we are currently adding linebreaks on
2491 * (see "env.break_on"), or it may preserve linebreaks from the input that
2492 * are there for "prettiness" (i.e. not to satisfy the 80-char width limit.) *)
2493 and expr_binop_pipe lowest str_op tok env =
2494 with_priority env tok begin fun env ->
2495 let pretty_newline = (!(env.input_line_non_ws_token_count) = 1)
2496 && (env.priority != env.break_on) in
2497 if pretty_newline
2498 then begin
2499 (** The pipe is the first non-ws token consumed from this line of input.
2500 * Allow it to be on a newline. *)
2501 right env begin function env ->
2502 let env = { env with break_on = tpipe_prec } in
2503 seq env [newline; out str_op; space];
2504 expr_atomic env;
2505 expr_remain_loop 0 env
2508 else begin
2509 if (env.priority = env.break_on)
2510 then seq env [newline; out str_op; space]
2511 else seq env [space; out str_op; space];
2512 wrap env begin function
2513 | _ ->
2514 back env;
2515 expr_atomic env
2516 end;
2517 let lowest =
2518 if lowest = 0 then env.priority else
2519 min env.priority lowest in
2520 expr_remain_loop lowest env
2524 and expr_binop_dot lowest str_op env =
2525 with_priority env Tdot begin fun env ->
2526 out str_op env;
2527 if env.priority = env.break_on
2528 then newline env;
2529 (match next_token env with
2530 | Tminus | Tplus | Tint | Tfloat -> space env
2531 | _ -> ());
2532 expr_atomic env;
2533 let lowest =
2534 if lowest = 0 then env.priority else
2535 min env.priority lowest in
2536 expr_remain_loop lowest env
2539 and expr_remain lowest env =
2540 let tok = token env in
2541 let tok_str = !(env.last_out) in
2542 match tok with
2543 | Topen_comment ->
2544 seq env [space; last_token; comment];
2545 expr_remain lowest env
2546 | Tline_comment ->
2547 seq env [space; last_token; line_comment; newline];
2548 expr_remain lowest env
2549 | Tnewline | Tspace ->
2550 expr_remain lowest env
2551 | Tplus | Tminus | Tstar | Tslash | Tstarstar
2552 | Teqeqeq | Tpercent
2553 | Teqeq | Tampamp | Tbarbar
2554 | Tdiff | Tlt | Tdiff2 | Tgte
2555 | Tlte | Tamp | Tbar | Tltlt
2556 | Tgtgt | Txor as op ->
2557 expr_binop lowest tok_str op env
2558 | Tpipe ->
2559 expr_binop_pipe lowest tok_str tok env
2560 | Tdot ->
2561 expr_binop_dot lowest tok_str env
2562 | Tarrow | Tnsarrow ->
2563 expr_binop_arrow lowest tok_str tok env
2564 | Tgt when env.in_attr ->
2565 back env;
2566 lowest
2567 | Tlambda ->
2568 space env;
2569 last_token env;
2570 space env;
2571 if next_token env = Tlcb
2572 then block env
2573 else expr env;
2574 lowest
2575 | Tgt ->
2576 (match token env with
2577 | Tgt ->
2578 expr_binop lowest ">>" Tgtgt env
2579 | _ ->
2580 back env;
2581 expr_binop lowest ">" Tgt env
2583 | Teq | Tbareq | Tpluseq | Tstareq | Tslasheq
2584 | Tdoteq | Tminuseq | Tpercenteq | Txoreq
2585 | Tampeq | Tlshifteq | Trshifteq ->
2586 space env;
2587 last_token env;
2588 space env;
2589 rhs_assign env;
2590 lowest
2591 | Tincr | Tdecr ->
2592 out tok_str env;
2593 lowest
2594 | Tcolcol ->
2595 out tok_str env;
2596 expr_atomic env;
2597 lowest
2598 | Tlp ->
2599 let env = { env with break_on = 0 } in
2600 back env;
2601 arg_list env;
2602 lowest
2603 | Tlb ->
2604 last_token env;
2605 (match token env with
2606 | Trb -> last_token env
2607 | _ -> back env; expr env; expect "]" env
2609 lowest
2610 | Tqm when attempt env begin fun env ->
2611 wrap_eof env begin function
2612 | Tcolon ->
2613 token env <> Tword
2614 | _ -> false
2616 end ->
2617 seq env [space; out "?"; expect ":"; space; expr];
2618 lowest
2619 | Tqm ->
2620 Try.one_line env
2621 ternary_one_line
2622 ternary_multi_line;
2623 (* Horrible Hack. We pretend the ternary operator is a binary operator
2624 * to make sure we get a new line after an assignment.
2625 * Without this hack, we could have results looking like this:
2626 * $x = (my_cond)?
2627 * ...
2630 | Tqmqm ->
2631 expr_binop lowest "??" Tqmqm env
2632 | Tword when !(env.last_str) = "xor" ->
2633 expr_binop lowest "xor" Txor env
2634 | Tword when !(env.last_str) = "instanceof" ->
2635 space env;
2636 last_token env;
2637 space env;
2638 expr_atomic env;
2639 lowest
2640 | _ ->
2641 back env;
2642 lowest
2644 and expr_atomic env =
2645 let last = !(env.last_token) in
2646 let token = token env in
2647 match token with
2648 | Tline_comment ->
2649 seq env [last_token; line_comment; newline; expr_atomic]
2650 | Topen_comment ->
2651 seq env [last_token; comment; space; expr_atomic]
2652 | Tnewline | Tspace ->
2653 expr_atomic env
2654 | Tlvar ->
2655 last_token env;
2656 (match next_token env with
2657 | Tarrow | Tnsarrow as tok ->
2658 (match tok with
2659 | Tarrow -> expect "->" env
2660 | Tnsarrow -> expect "?->" env
2661 | _ -> assert false);
2662 wrap env begin function
2663 | Tword ->
2664 last_token env
2665 | Tlcb ->
2666 last_token env;
2667 expr env;
2668 expect "}" env
2669 | _ ->
2670 back env;
2671 expr_atomic env
2673 | _ -> ()
2675 | Tint | Tfloat ->
2676 last_token env;
2677 if next_token env = Tdot
2678 then space env
2679 | Tquote | Tdquote as tok ->
2680 last_token env;
2681 string ~last:tok env
2682 | Tcolon ->
2683 last_token env;
2684 name_loop env
2685 | Tamp | Tat | Tbslash
2686 | Tem | Tincr | Tdecr | Ttild | Tplus | Tminus ->
2687 last_token env;
2688 expr_atomic env
2689 | Tword ->
2690 let word = !(env.last_str) in
2691 expr_atomic_word env last (String.lowercase word)
2692 | Tdollardollar ->
2693 last_token env;
2694 | Tlb ->
2695 last_token env;
2696 right env array_body;
2697 expect "]" env
2698 | Tlp ->
2699 let env = { env with break_on = 0 } in
2700 (* CAST *)
2701 if is_followed_by env name ")"
2702 then begin
2703 seq env [last_token; out_next; expect ")"; space; expr]
2705 else if next_token_str env = "new"
2706 then begin
2707 seq env [last_token; expr; expect ")"]
2709 (* Short lambda parameters *)
2710 else if attempt env begin fun env ->
2712 list_comma (ignore_ fun_param) env;
2713 seq env [expect ")"; return_type];
2714 wrap_eof env (fun tok -> tok = Tlambda)
2715 with Format_error -> false
2716 end then begin
2717 back env;
2718 Try.one_line env fun_signature_single fun_signature_multi;
2720 (* Expression *)
2721 else begin
2722 last_token env;
2723 margin_set (!(env.char_pos) -1) env begin fun env ->
2724 expr env
2725 end;
2726 expect ")" env;
2728 | Tlt when is_xhp env ->
2729 back env;
2730 xhp env;
2731 | Theredoc ->
2732 last_token env;
2733 heredoc env
2734 | _ ->
2735 back env
2737 and expr_atomic_word env last_tok = function
2738 | "true" | "false" | "null" ->
2739 last_token env
2740 | "array" | "shape" | "tuple" as v ->
2741 out v env;
2742 expect "(" env;
2743 right env array_body;
2744 expect ")" env
2745 | "dict" ->
2746 out "dict" env;
2747 expect (token_to_string Tlb) env;
2748 (** Dict body looks exactly like an array body. *)
2749 right env array_body;
2750 expect (token_to_string Trb) env;
2751 | "empty" | "unset" | "isset" as v ->
2752 out v env;
2753 arg_list ~trailing:false env
2754 | "new" ->
2755 last_token env;
2756 space env;
2757 expr env;
2758 | "async" ->
2759 last_token env;
2760 space env;
2761 begin match next_token env with
2762 | Tlcb -> stmt ~is_toplevel:false env
2763 | _ -> expr_atomic env
2765 | "function" when last_tok <> Tarrow && last_tok <> Tnsarrow ->
2766 last_token env;
2767 if next_non_ws_token env <> Tlp then space env;
2768 fun_ env
2769 | "await" ->
2770 last_token env;
2771 space env;
2772 with_priority env Tawait expr
2773 | "yield" ->
2774 last_token env;
2775 space env;
2776 with_priority env Tyield array_element_single
2777 | "clone" ->
2778 last_token env;
2779 space env;
2780 with_priority env Tclone expr
2781 | _ ->
2782 last_token env;
2783 wrap env begin function
2784 (* Collection *)
2785 | Tlcb ->
2786 space env;
2787 last_token env;
2788 if next_token env <> Trcb
2789 then right env array_body;
2790 expect "}" env
2791 | Tlp ->
2792 back env;
2793 let _ = expr_remain 0 env in
2795 | Tbslash ->
2796 last_token env;
2797 name_loop env
2798 | _ ->
2799 back env
2802 and expr_call_list ?(trailing=true) env =
2803 let env = { env with break_on = 0; priority = 0 } in
2804 list_comma_nl ~trailing expr_call_elt env
2806 and expr_call_elt env = wrap env begin function
2807 | Tellipsis -> seq env [last_token; expr]
2808 | _ -> back env; expr env
2811 (*****************************************************************************)
2812 (* Ternary operator ... ? ... : ... *)
2813 (*****************************************************************************)
2815 and ternary_one_line env =
2816 seq env [space; last_token; space; expr; space; expect ":"; space; expr]
2818 and ternary_multi_line env =
2819 right env begin fun env ->
2820 seq env
2821 [newline; last_token; space; expr; newline; expect ":"; space; expr]
2825 (*****************************************************************************)
2826 (* Strings *)
2827 (*****************************************************************************)
2829 and string ~last env =
2830 match token env with
2831 | Teof -> ()
2832 | tok when tok = last -> last_token env
2833 | tok -> string_char env tok; string ~last env
2835 and string_char env = function
2836 | Teof -> ()
2837 | Tbslash -> last_token env; out_next env
2838 | Tnewline -> force_nl env
2839 | Tspace -> keep_space env
2840 | _ -> last_token env
2842 (*****************************************************************************)
2843 (* Heredocs *)
2844 (*****************************************************************************)
2846 and heredoc env =
2847 let env = { env with margin = ref 0 } in
2848 (match token env with
2849 | Tspace -> heredoc env
2850 (* <<<'MYSTRING' *)
2851 | Tquote ->
2852 last_token env;
2853 let abs_start = env.lexbuf.Lexing.lex_curr_pos in
2854 string ~last:Tquote env;
2855 let len = env.lexbuf.Lexing.lex_curr_pos - abs_start - 1 in
2856 let str_value = String.sub env.lexbuf.Lexing.lex_buffer abs_start len in
2857 heredoc_loop str_value env
2858 (* <<<MYWORD *)
2859 | Tword ->
2860 last_token env;
2861 heredoc_loop !(env.last_str) env
2862 (* <<< *)
2863 | _ ->
2864 last_token env;
2865 heredoc_loop "EOT" env
2867 (match token env with
2868 | Tsc -> back env
2869 | _ -> back env; newline env)
2871 and heredoc_loop close env =
2872 match token env with
2873 | Teof -> ()
2874 | Tnewline ->
2875 force_nl env;
2876 if attempt env begin fun env ->
2877 token env = Tword &&
2878 !(env.last_str) = close &&
2879 match token env with
2880 | Tsc | Tnewline -> true
2881 | _ -> false
2883 then (ignore (token env); last_token env)
2884 else heredoc_loop close env
2885 | Tspace -> keep_space env; heredoc_loop close env
2886 | _ -> last_token env; heredoc_loop close env
2888 (*****************************************************************************)
2889 (* Arrays *)
2890 (*****************************************************************************)
2892 and array_body env =
2893 Try.one_line env
2894 array_one_line
2895 array_multi_line
2897 and array_one_line env =
2898 list_comma_single array_element_single env
2900 and array_multi_line env =
2901 list_comma_multi_nl ~trailing:true array_element_multi env
2903 and array_element_single env =
2904 expr env;
2905 arrow_opt env
2907 and array_element_multi env = wrap env begin fun _ ->
2908 back env;
2909 newline env;
2910 expr env;
2911 arrow_opt env
2914 and arrow_opt env =
2915 match token env with
2916 | Tsarrow ->
2917 space env;
2918 last_token env;
2919 Try.outer env
2920 (fun env -> space env; expr env)
2921 (fun env -> newline env; right env expr)
2922 | _ ->
2923 back env
2925 (*****************************************************************************)
2926 (* Argument lists *)
2927 (*****************************************************************************)
2929 and arg_list ?(trailing=true) env =
2930 expect "(" env;
2931 keep_comment env;
2932 if next_token env <> Trp
2933 then right env (expr_call_list ~trailing);
2934 expect ")" env
2936 (*****************************************************************************)
2937 (* The outside API *)
2938 (*****************************************************************************)
2940 let region modes file ~start ~end_ content =
2941 entry ~keep_source_metadata:false file start end_ content
2942 ~no_trailing_commas:false ~modes
2943 (fun env -> Buffer.contents env.buffer)
2945 let program ?no_trailing_commas:(no_trailing_commas = false) modes file
2946 content =
2947 entry ~keep_source_metadata:false file 0 max_int content
2948 ~no_trailing_commas ~modes
2949 (fun env -> Buffer.contents env.buffer)
2951 let program_with_source_metadata modes file content =
2952 entry ~keep_source_metadata:true file 0 max_int content
2953 ~no_trailing_commas:false ~modes begin
2954 fun env ->
2955 Buffer.contents env.buffer, List.rev !(env.source_pos_l)