maint: prefer C23-style nullptr
[coreutils.git] / src / fmt.c
bloba68164e8daa40e0f5d2d0d744608fdfb007679a8
1 /* GNU fmt -- simple text formatter.
2 Copyright (C) 1994-2023 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* Written by Ross Paterson <rap@doc.ic.ac.uk>. */
19 #include <config.h>
20 #include <stdio.h>
21 #include <sys/types.h>
22 #include <getopt.h>
23 #include <assert.h>
25 /* Redefine. Otherwise, systems (Unicos for one) with headers that define
26 it to be a type get syntax errors for the variable declaration below. */
27 #define word unused_word_type
29 #include "c-ctype.h"
30 #include "system.h"
31 #include "error.h"
32 #include "die.h"
33 #include "fadvise.h"
34 #include "xdectoint.h"
36 /* The official name of this program (e.g., no 'g' prefix). */
37 #define PROGRAM_NAME "fmt"
39 #define AUTHORS proper_name ("Ross Paterson")
41 /* The following parameters represent the program's idea of what is
42 "best". Adjust to taste, subject to the caveats given. */
44 /* Default longest permitted line length (max_width). */
45 #define WIDTH 75
47 /* Prefer lines to be LEEWAY % shorter than the maximum width, giving
48 room for optimization. */
49 #define LEEWAY 7
51 /* The default secondary indent of tagged paragraph used for unindented
52 one-line paragraphs not preceded by any multi-line paragraphs. */
53 #define DEF_INDENT 3
55 /* Costs and bonuses are expressed as the equivalent departure from the
56 optimal line length, multiplied by 10. e.g. assigning something a
57 cost of 50 means that it is as bad as a line 5 characters too short
58 or too long. The definition of SHORT_COST(n) should not be changed.
59 However, EQUIV(n) may need tuning. */
61 /* FIXME: "fmt" misbehaves given large inputs or options. One
62 possible workaround for part of the problem is to change COST to be
63 a floating-point type. There are other problems besides COST,
64 though; see MAXWORDS below. */
66 typedef long int COST;
68 #define MAXCOST TYPE_MAXIMUM (COST)
70 #define SQR(n) ((n) * (n))
71 #define EQUIV(n) SQR ((COST) (n))
73 /* Cost of a filled line n chars longer or shorter than goal_width. */
74 #define SHORT_COST(n) EQUIV ((n) * 10)
76 /* Cost of the difference between adjacent filled lines. */
77 #define RAGGED_COST(n) (SHORT_COST (n) / 2)
79 /* Basic cost per line. */
80 #define LINE_COST EQUIV (70)
82 /* Cost of breaking a line after the first word of a sentence, where
83 the length of the word is N. */
84 #define WIDOW_COST(n) (EQUIV (200) / ((n) + 2))
86 /* Cost of breaking a line before the last word of a sentence, where
87 the length of the word is N. */
88 #define ORPHAN_COST(n) (EQUIV (150) / ((n) + 2))
90 /* Bonus for breaking a line at the end of a sentence. */
91 #define SENTENCE_BONUS EQUIV (50)
93 /* Cost of breaking a line after a period not marking end of a sentence.
94 With the definition of sentence we are using (borrowed from emacs, see
95 get_line()) such a break would then look like a sentence break. Hence
96 we assign a very high cost -- it should be avoided unless things are
97 really bad. */
98 #define NOBREAK_COST EQUIV (600)
100 /* Bonus for breaking a line before open parenthesis. */
101 #define PAREN_BONUS EQUIV (40)
103 /* Bonus for breaking a line after other punctuation. */
104 #define PUNCT_BONUS EQUIV(40)
106 /* Credit for breaking a long paragraph one line later. */
107 #define LINE_CREDIT EQUIV(3)
109 /* Size of paragraph buffer, in words and characters. Longer paragraphs
110 are handled neatly (cf. flush_paragraph()), so long as these values
111 are considerably greater than required by the width. These values
112 cannot be extended indefinitely: doing so would run into size limits
113 and/or cause more overflows in cost calculations. FIXME: Remove these
114 arbitrary limits. */
116 #define MAXWORDS 1000
117 #define MAXCHARS 5000
119 /* Extra ctype(3)-style macros. */
121 #define isopen(c) (strchr ("(['`\"", c) != nullptr)
122 #define isclose(c) (strchr (")]'\"", c) != nullptr)
123 #define isperiod(c) (strchr (".?!", c) != nullptr)
125 /* Size of a tab stop, for expansion on input and re-introduction on
126 output. */
127 #define TABWIDTH 8
129 /* Word descriptor structure. */
131 typedef struct Word WORD;
133 struct Word
136 /* Static attributes determined during input. */
138 char const *text; /* the text of the word */
139 int length; /* length of this word */
140 int space; /* the size of the following space */
141 unsigned int paren:1; /* starts with open paren */
142 unsigned int period:1; /* ends in [.?!])* */
143 unsigned int punct:1; /* ends in punctuation */
144 unsigned int final:1; /* end of sentence */
146 /* The remaining fields are computed during the optimization. */
148 int line_length; /* length of the best line starting here */
149 COST best_cost; /* cost of best paragraph starting here */
150 WORD *next_break; /* break which achieves best_cost */
153 /* Forward declarations. */
155 static void set_prefix (char *p);
156 static bool fmt (FILE *f, char const *);
157 static bool get_paragraph (FILE *f);
158 static int get_line (FILE *f, int c);
159 static int get_prefix (FILE *f);
160 static int get_space (FILE *f, int c);
161 static int copy_rest (FILE *f, int c);
162 static bool same_para (int c);
163 static void flush_paragraph (void);
164 static void fmt_paragraph (void);
165 static void check_punctuation (WORD *w);
166 static COST base_cost (WORD *this);
167 static COST line_cost (WORD *next, int len);
168 static void put_paragraph (WORD *finish);
169 static void put_line (WORD *w, int indent);
170 static void put_word (WORD *w);
171 static void put_space (int space);
173 /* Option values. */
175 /* If true, first 2 lines may have different indent (default false). */
176 static bool crown;
178 /* If true, first 2 lines _must_ have different indent (default false). */
179 static bool tagged;
181 /* If true, each line is a paragraph on its own (default false). */
182 static bool split;
184 /* If true, don't preserve inter-word spacing (default false). */
185 static bool uniform;
187 /* Prefix minus leading and trailing spaces (default ""). */
188 static char const *prefix;
190 /* User-supplied maximum line width (default WIDTH). The only output
191 lines longer than this will each comprise a single word. */
192 static int max_width;
194 /* Values derived from the option values. */
196 /* The length of prefix minus leading space. */
197 static int prefix_full_length;
199 /* The length of the leading space trimmed from the prefix. */
200 static int prefix_lead_space;
202 /* The length of prefix minus leading and trailing space. */
203 static int prefix_length;
205 /* The preferred width of text lines, set to LEEWAY % less than max_width. */
206 static int goal_width;
208 /* Dynamic variables. */
210 /* Start column of the character most recently read from the input file. */
211 static int in_column;
213 /* Start column of the next character to be written to stdout. */
214 static int out_column;
216 /* Space for the paragraph text -- longer paragraphs are handled neatly
217 (cf. flush_paragraph()). */
218 static char parabuf[MAXCHARS];
220 /* A pointer into parabuf, indicating the first unused character position. */
221 static char *wptr;
223 /* The words of a paragraph -- longer paragraphs are handled neatly
224 (cf. flush_paragraph()). */
225 static WORD word[MAXWORDS];
227 /* A pointer into the above word array, indicating the first position
228 after the last complete word. Sometimes it will point at an incomplete
229 word. */
230 static WORD *word_limit;
232 /* If true, current input file contains tab characters, and so tabs can be
233 used for white space on output. */
234 static bool tabs;
236 /* Space before trimmed prefix on each line of the current paragraph. */
237 static int prefix_indent;
239 /* Indentation of the first line of the current paragraph. */
240 static int first_indent;
242 /* Indentation of other lines of the current paragraph */
243 static int other_indent;
245 /* To detect the end of a paragraph, we need to look ahead to the first
246 non-blank character after the prefix on the next line, or the first
247 character on the following line that failed to match the prefix.
248 We can reconstruct the lookahead from that character (next_char), its
249 position on the line (in_column) and the amount of space before the
250 prefix (next_prefix_indent). See get_paragraph() and copy_rest(). */
252 /* The last character read from the input file. */
253 static int next_char;
255 /* The space before the trimmed prefix (or part of it) on the next line
256 after the current paragraph. */
257 static int next_prefix_indent;
259 /* If nonzero, the length of the last line output in the current
260 paragraph, used to charge for raggedness at the split point for long
261 paragraphs chosen by fmt_paragraph(). */
262 static int last_line_length;
264 void
265 usage (int status)
267 if (status != EXIT_SUCCESS)
268 emit_try_help ();
269 else
271 printf (_("Usage: %s [-WIDTH] [OPTION]... [FILE]...\n"), program_name);
272 fputs (_("\
273 Reformat each paragraph in the FILE(s), writing to standard output.\n\
274 The option -WIDTH is an abbreviated form of --width=DIGITS.\n\
275 "), stdout);
277 emit_stdin_note ();
278 emit_mandatory_arg_note ();
280 fputs (_("\
281 -c, --crown-margin preserve indentation of first two lines\n\
282 -p, --prefix=STRING reformat only lines beginning with STRING,\n\
283 reattaching the prefix to reformatted lines\n\
284 -s, --split-only split long lines, but do not refill\n\
286 stdout);
287 /* Tell xgettext that the "% o" below is not a printf-style
288 format string: xgettext:no-c-format */
289 fputs (_("\
290 -t, --tagged-paragraph indentation of first line different from second\n\
291 -u, --uniform-spacing one space between words, two after sentences\n\
292 -w, --width=WIDTH maximum line width (default of 75 columns)\n\
293 -g, --goal=WIDTH goal width (default of 93% of width)\n\
294 "), stdout);
295 fputs (HELP_OPTION_DESCRIPTION, stdout);
296 fputs (VERSION_OPTION_DESCRIPTION, stdout);
297 emit_ancillary_info (PROGRAM_NAME);
299 exit (status);
302 /* Decode options and launch execution. */
304 static struct option const long_options[] =
306 {"crown-margin", no_argument, nullptr, 'c'},
307 {"prefix", required_argument, nullptr, 'p'},
308 {"split-only", no_argument, nullptr, 's'},
309 {"tagged-paragraph", no_argument, nullptr, 't'},
310 {"uniform-spacing", no_argument, nullptr, 'u'},
311 {"width", required_argument, nullptr, 'w'},
312 {"goal", required_argument, nullptr, 'g'},
313 {GETOPT_HELP_OPTION_DECL},
314 {GETOPT_VERSION_OPTION_DECL},
315 {nullptr, 0, nullptr, 0},
319 main (int argc, char **argv)
321 int optchar;
322 bool ok = true;
323 char const *max_width_option = nullptr;
324 char const *goal_width_option = nullptr;
326 initialize_main (&argc, &argv);
327 set_program_name (argv[0]);
328 setlocale (LC_ALL, "");
329 bindtextdomain (PACKAGE, LOCALEDIR);
330 textdomain (PACKAGE);
332 atexit (close_stdout);
334 crown = tagged = split = uniform = false;
335 max_width = WIDTH;
336 prefix = "";
337 prefix_length = prefix_lead_space = prefix_full_length = 0;
339 if (argc > 1 && argv[1][0] == '-' && ISDIGIT (argv[1][1]))
341 /* Old option syntax; a dash followed by one or more digits. */
342 max_width_option = argv[1] + 1;
344 /* Make the option we just parsed invisible to getopt. */
345 argv[1] = argv[0];
346 argv++;
347 argc--;
350 while ((optchar = getopt_long (argc, argv, "0123456789cstuw:p:g:",
351 long_options, nullptr))
352 != -1)
353 switch (optchar)
355 default:
356 if (ISDIGIT (optchar))
357 error (0, 0, _("invalid option -- %c; -WIDTH is recognized\
358 only when it is the first\noption; use -w N instead"),
359 optchar);
360 usage (EXIT_FAILURE);
362 case 'c':
363 crown = true;
364 break;
366 case 's':
367 split = true;
368 break;
370 case 't':
371 tagged = true;
372 break;
374 case 'u':
375 uniform = true;
376 break;
378 case 'w':
379 max_width_option = optarg;
380 break;
382 case 'g':
383 goal_width_option = optarg;
384 break;
386 case 'p':
387 set_prefix (optarg);
388 break;
390 case_GETOPT_HELP_CHAR;
392 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
396 if (max_width_option)
398 /* Limit max_width to MAXCHARS / 2; otherwise, the resulting
399 output can be quite ugly. */
400 max_width = xdectoumax (max_width_option, 0, MAXCHARS / 2, "",
401 _("invalid width"), 0);
404 if (goal_width_option)
406 /* Limit goal_width to max_width. */
407 goal_width = xdectoumax (goal_width_option, 0, max_width, "",
408 _("invalid width"), 0);
409 if (max_width_option == nullptr)
410 max_width = goal_width + 10;
412 else
414 goal_width = max_width * (2 * (100 - LEEWAY) + 1) / 200;
417 bool have_read_stdin = false;
419 if (optind == argc)
421 have_read_stdin = true;
422 ok = fmt (stdin, "-");
424 else
426 for (; optind < argc; optind++)
428 char *file = argv[optind];
429 if (STREQ (file, "-"))
431 ok &= fmt (stdin, file);
432 have_read_stdin = true;
434 else
436 FILE *in_stream;
437 in_stream = fopen (file, "r");
438 if (in_stream != nullptr)
439 ok &= fmt (in_stream, file);
440 else
442 error (0, errno, _("cannot open %s for reading"),
443 quoteaf (file));
444 ok = false;
450 if (have_read_stdin && fclose (stdin) != 0)
451 die (EXIT_FAILURE, errno, "%s", _("closing standard input"));
453 return ok ? EXIT_SUCCESS : EXIT_FAILURE;
456 /* Trim space from the front and back of the string P, yielding the prefix,
457 and record the lengths of the prefix and the space trimmed. */
459 static void
460 set_prefix (char *p)
462 char *s;
464 prefix_lead_space = 0;
465 while (*p == ' ')
467 prefix_lead_space++;
468 p++;
470 prefix = p;
471 prefix_full_length = strlen (p);
472 s = p + prefix_full_length;
473 while (s > p && s[-1] == ' ')
474 s--;
475 *s = '\0';
476 prefix_length = s - p;
479 /* Read F and send formatted output to stdout.
480 Close F when done, unless F is stdin. Diagnose input errors, using FILE.
481 If !F, assume F resulted from an fopen failure and diagnose that.
482 Return true if successful. */
484 static bool
485 fmt (FILE *f, char const *file)
487 fadvise (f, FADVISE_SEQUENTIAL);
488 tabs = false;
489 other_indent = 0;
490 next_char = get_prefix (f);
491 while (get_paragraph (f))
493 fmt_paragraph ();
494 put_paragraph (word_limit);
497 int err = ferror (f) ? 0 : -1;
498 if (f == stdin)
499 clearerr (f);
500 else if (fclose (f) != 0 && err < 0)
501 err = errno;
502 if (0 <= err)
503 error (0, err, err ? "%s" : _("read error"), quotef (file));
504 return err < 0;
507 /* Set the global variable 'other_indent' according to SAME_PARAGRAPH
508 and other global variables. */
510 static void
511 set_other_indent (bool same_paragraph)
513 if (split)
514 other_indent = first_indent;
515 else if (crown)
517 other_indent = (same_paragraph ? in_column : first_indent);
519 else if (tagged)
521 if (same_paragraph && in_column != first_indent)
523 other_indent = in_column;
526 /* Only one line: use the secondary indent from last time if it
527 splits, or 0 if there have been no multi-line paragraphs in the
528 input so far. But if these rules make the two indents the same,
529 pick a new secondary indent. */
531 else if (other_indent == first_indent)
532 other_indent = first_indent == 0 ? DEF_INDENT : 0;
534 else
536 other_indent = first_indent;
540 /* Read a paragraph from input file F. A paragraph consists of a
541 maximal number of non-blank (excluding any prefix) lines subject to:
542 * In split mode, a paragraph is a single non-blank line.
543 * In crown mode, the second and subsequent lines must have the
544 same indentation, but possibly different from the indent of the
545 first line.
546 * Tagged mode is similar, but the first and second lines must have
547 different indentations.
548 * Otherwise, all lines of a paragraph must have the same indent.
549 If a prefix is in effect, it must be present at the same indent for
550 each line in the paragraph.
552 Return false if end-of-file was encountered before the start of a
553 paragraph, else true. */
555 static bool
556 get_paragraph (FILE *f)
558 int c;
560 last_line_length = 0;
561 c = next_char;
563 /* Scan (and copy) blank lines, and lines not introduced by the prefix. */
565 while (c == '\n' || c == EOF
566 || next_prefix_indent < prefix_lead_space
567 || in_column < next_prefix_indent + prefix_full_length)
569 c = copy_rest (f, c);
570 if (c == EOF)
572 next_char = EOF;
573 return false;
575 putchar ('\n');
576 c = get_prefix (f);
579 /* Got a suitable first line for a paragraph. */
581 prefix_indent = next_prefix_indent;
582 first_indent = in_column;
583 wptr = parabuf;
584 word_limit = word;
585 c = get_line (f, c);
586 set_other_indent (same_para (c));
588 /* Read rest of paragraph (unless split is specified). */
590 if (split)
592 /* empty */
594 else if (crown)
596 if (same_para (c))
599 { /* for each line till the end of the para */
600 c = get_line (f, c);
602 while (same_para (c) && in_column == other_indent);
605 else if (tagged)
607 if (same_para (c) && in_column != first_indent)
610 { /* for each line till the end of the para */
611 c = get_line (f, c);
613 while (same_para (c) && in_column == other_indent);
616 else
618 while (same_para (c) && in_column == other_indent)
619 c = get_line (f, c);
622 /* Tell static analysis tools that using word_limit[-1] is ok.
623 word_limit is guaranteed to have been incremented by get_line. */
624 assert (word < word_limit);
626 (word_limit - 1)->period = (word_limit - 1)->final = true;
627 next_char = c;
628 return true;
631 /* Copy to the output a line that failed to match the prefix, or that
632 was blank after the prefix. In the former case, C is the character
633 that failed to match the prefix. In the latter, C is \n or EOF.
634 Return the character (\n or EOF) ending the line. */
636 static int
637 copy_rest (FILE *f, int c)
639 char const *s;
641 out_column = 0;
642 if (in_column > next_prefix_indent || (c != '\n' && c != EOF))
644 put_space (next_prefix_indent);
645 for (s = prefix; out_column != in_column && *s; out_column++)
646 putchar (*s++);
647 if (c != EOF && c != '\n')
648 put_space (in_column - out_column);
649 if (c == EOF && in_column >= next_prefix_indent + prefix_length)
650 putchar ('\n');
652 while (c != '\n' && c != EOF)
654 putchar (c);
655 c = getc (f);
657 return c;
660 /* Return true if a line whose first non-blank character after the
661 prefix (if any) is C could belong to the current paragraph,
662 otherwise false. */
664 static bool
665 same_para (int c)
667 return (next_prefix_indent == prefix_indent
668 && in_column >= next_prefix_indent + prefix_full_length
669 && c != '\n' && c != EOF);
672 /* Read a line from input file F, given first non-blank character C
673 after the prefix, and the following indent, and break it into words.
674 A word is a maximal non-empty string of non-white characters. A word
675 ending in [.?!][])"']* and followed by end-of-line or at least two
676 spaces ends a sentence, as in emacs.
678 Return the first non-blank character of the next line. */
680 static int
681 get_line (FILE *f, int c)
683 int start;
684 char *end_of_parabuf;
685 WORD *end_of_word;
687 end_of_parabuf = &parabuf[MAXCHARS];
688 end_of_word = &word[MAXWORDS - 2];
691 { /* for each word in a line */
693 /* Scan word. */
695 word_limit->text = wptr;
698 if (wptr == end_of_parabuf)
700 set_other_indent (true);
701 flush_paragraph ();
703 *wptr++ = c;
704 c = getc (f);
706 while (c != EOF && !c_isspace (c));
707 in_column += word_limit->length = wptr - word_limit->text;
708 check_punctuation (word_limit);
710 /* Scan inter-word space. */
712 start = in_column;
713 c = get_space (f, c);
714 word_limit->space = in_column - start;
715 word_limit->final = (c == EOF
716 || (word_limit->period
717 && (c == '\n' || word_limit->space > 1)));
718 if (c == '\n' || c == EOF || uniform)
719 word_limit->space = word_limit->final ? 2 : 1;
720 if (word_limit == end_of_word)
722 set_other_indent (true);
723 flush_paragraph ();
725 word_limit++;
727 while (c != '\n' && c != EOF);
728 return get_prefix (f);
731 /* Read a prefix from input file F. Return either first non-matching
732 character, or first non-blank character after the prefix. */
734 static int
735 get_prefix (FILE *f)
737 int c;
739 in_column = 0;
740 c = get_space (f, getc (f));
741 if (prefix_length == 0)
742 next_prefix_indent = prefix_lead_space < in_column ?
743 prefix_lead_space : in_column;
744 else
746 char const *p;
747 next_prefix_indent = in_column;
748 for (p = prefix; *p != '\0'; p++)
750 unsigned char pc = *p;
751 if (c != pc)
752 return c;
753 in_column++;
754 c = getc (f);
756 c = get_space (f, c);
758 return c;
761 /* Read blank characters from input file F, starting with C, and keeping
762 in_column up-to-date. Return first non-blank character. */
764 static int
765 get_space (FILE *f, int c)
767 while (true)
769 if (c == ' ')
770 in_column++;
771 else if (c == '\t')
773 tabs = true;
774 in_column = (in_column / TABWIDTH + 1) * TABWIDTH;
776 else
777 return c;
778 c = getc (f);
782 /* Set extra fields in word W describing any attached punctuation. */
784 static void
785 check_punctuation (WORD *w)
787 char const *start = w->text;
788 char const *finish = start + (w->length - 1);
789 unsigned char fin = *finish;
791 w->paren = isopen (*start);
792 w->punct = !! ispunct (fin);
793 while (start < finish && isclose (*finish))
794 finish--;
795 w->period = isperiod (*finish);
798 /* Flush part of the paragraph to make room. This function is called on
799 hitting the limit on the number of words or characters. */
801 static void
802 flush_paragraph (void)
804 WORD *split_point;
805 WORD *w;
806 int shift;
807 COST best_break;
809 /* In the special case where it's all one word, just flush it. */
811 if (word_limit == word)
813 fwrite (parabuf, sizeof *parabuf, wptr - parabuf, stdout);
814 wptr = parabuf;
815 return;
818 /* Otherwise:
819 - format what you have so far as a paragraph,
820 - find a low-cost line break near the end,
821 - output to there,
822 - make that the start of the paragraph. */
824 fmt_paragraph ();
826 /* Choose a good split point. */
828 split_point = word_limit;
829 best_break = MAXCOST;
830 for (w = word->next_break; w != word_limit; w = w->next_break)
832 if (w->best_cost - w->next_break->best_cost < best_break)
834 split_point = w;
835 best_break = w->best_cost - w->next_break->best_cost;
837 if (best_break <= MAXCOST - LINE_CREDIT)
838 best_break += LINE_CREDIT;
840 put_paragraph (split_point);
842 /* Copy text of words down to start of parabuf -- we use memmove because
843 the source and target may overlap. */
845 memmove (parabuf, split_point->text, wptr - split_point->text);
846 shift = split_point->text - parabuf;
847 wptr -= shift;
849 /* Adjust text pointers. */
851 for (w = split_point; w <= word_limit; w++)
852 w->text -= shift;
854 /* Copy words from split_point down to word -- we use memmove because
855 the source and target may overlap. */
857 memmove (word, split_point, (word_limit - split_point + 1) * sizeof *word);
858 word_limit -= split_point - word;
861 /* Compute the optimal formatting for the whole paragraph by computing
862 and remembering the optimal formatting for each suffix from the empty
863 one to the whole paragraph. */
865 static void
866 fmt_paragraph (void)
868 WORD *start, *w;
869 int len;
870 COST wcost, best;
871 int saved_length;
873 word_limit->best_cost = 0;
874 saved_length = word_limit->length;
875 word_limit->length = max_width; /* sentinel */
877 for (start = word_limit - 1; start >= word; start--)
879 best = MAXCOST;
880 len = start == word ? first_indent : other_indent;
882 /* At least one word, however long, in the line. */
884 w = start;
885 len += w->length;
888 w++;
890 /* Consider breaking before w. */
892 wcost = line_cost (w, len) + w->best_cost;
893 if (start == word && last_line_length > 0)
894 wcost += RAGGED_COST (len - last_line_length);
895 if (wcost < best)
897 best = wcost;
898 start->next_break = w;
899 start->line_length = len;
902 /* This is a kludge to keep us from computing 'len' as the
903 sum of the sentinel length and some non-zero number.
904 Since the sentinel w->length may be INT_MAX, adding
905 to that would give a negative result. */
906 if (w == word_limit)
907 break;
909 len += (w - 1)->space + w->length; /* w > start >= word */
911 while (len < max_width);
912 start->best_cost = best + base_cost (start);
915 word_limit->length = saved_length;
918 /* Work around <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109628>. */
919 #if 13 <= __GNUC__
920 # pragma GCC diagnostic ignored "-Wanalyzer-use-of-uninitialized-value"
921 #endif
923 /* Return the constant component of the cost of breaking before the
924 word THIS. */
926 static COST
927 base_cost (WORD *this)
929 COST cost;
931 cost = LINE_COST;
933 if (this > word)
935 if ((this - 1)->period)
937 if ((this - 1)->final)
938 cost -= SENTENCE_BONUS;
939 else
940 cost += NOBREAK_COST;
942 else if ((this - 1)->punct)
943 cost -= PUNCT_BONUS;
944 else if (this > word + 1 && (this - 2)->final)
945 cost += WIDOW_COST ((this - 1)->length);
948 if (this->paren)
949 cost -= PAREN_BONUS;
950 else if (this->final)
951 cost += ORPHAN_COST (this->length);
953 return cost;
956 /* Return the component of the cost of breaking before word NEXT that
957 depends on LEN, the length of the line beginning there. */
959 static COST
960 line_cost (WORD *next, int len)
962 int n;
963 COST cost;
965 if (next == word_limit)
966 return 0;
967 n = goal_width - len;
968 cost = SHORT_COST (n);
969 if (next->next_break != word_limit)
971 n = len - next->line_length;
972 cost += RAGGED_COST (n);
974 return cost;
977 /* Output to stdout a paragraph from word up to (but not including)
978 FINISH, which must be in the next_break chain from word. */
980 static void
981 put_paragraph (WORD *finish)
983 WORD *w;
985 put_line (word, first_indent);
986 for (w = word->next_break; w != finish; w = w->next_break)
987 put_line (w, other_indent);
990 /* Output to stdout the line beginning with word W, beginning in column
991 INDENT, including the prefix (if any). */
993 static void
994 put_line (WORD *w, int indent)
996 WORD *endline;
998 out_column = 0;
999 put_space (prefix_indent);
1000 fputs (prefix, stdout);
1001 out_column += prefix_length;
1002 put_space (indent - out_column);
1004 endline = w->next_break - 1;
1005 for (; w != endline; w++)
1007 put_word (w);
1008 put_space (w->space);
1010 put_word (w);
1011 last_line_length = out_column;
1012 putchar ('\n');
1015 /* Output to stdout the word W. */
1017 static void
1018 put_word (WORD *w)
1020 char const *s;
1021 int n;
1023 s = w->text;
1024 for (n = w->length; n != 0; n--)
1025 putchar (*s++);
1026 out_column += w->length;
1029 /* Output to stdout SPACE spaces, or equivalent tabs. */
1031 static void
1032 put_space (int space)
1034 int space_target, tab_target;
1036 space_target = out_column + space;
1037 if (tabs)
1039 tab_target = space_target / TABWIDTH * TABWIDTH;
1040 if (out_column + 1 < tab_target)
1041 while (out_column < tab_target)
1043 putchar ('\t');
1044 out_column = (out_column / TABWIDTH + 1) * TABWIDTH;
1047 while (out_column < space_target)
1049 putchar (' ');
1050 out_column++;