1 //! Functions for wrapping text.
5 use crate::core::{break_words, display_width, Word};
6 use crate::word_splitters::split_words;
9 /// Wrap a line of text at a given width.
11 /// The result is a vector of lines, each line is of type [`Cow<'_,
12 /// str>`](Cow), which means that the line will borrow from the input
13 /// `&str` if possible. The lines do not have trailing whitespace,
14 /// including a final `'\n'`. Please use [`fill()`](crate::fill()) if
15 /// you need a [`String`] instead.
17 /// The easiest way to use this function is to pass an integer for
18 /// `width_or_options`:
21 /// use textwrap::wrap;
23 /// let lines = wrap("Memory safety without garbage collection.", 15);
24 /// assert_eq!(lines, &[
26 /// "without garbage",
31 /// If you need to customize the wrapping, you can pass an [`Options`]
32 /// instead of an `usize`:
35 /// use textwrap::{wrap, Options};
37 /// let options = Options::new(15)
38 /// .initial_indent("- ")
39 /// .subsequent_indent(" ");
40 /// let lines = wrap("Memory safety without garbage collection.", &options);
41 /// assert_eq!(lines, &[
42 /// "- Memory safety",
49 /// # Optimal-Fit Wrapping
51 /// By default, `wrap` will try to ensure an even right margin by
52 /// finding breaks which avoid short lines. We call this an
53 /// “optimal-fit algorithm” since the line breaks are computed by
54 /// considering all possible line breaks. The alternative is a
55 /// “first-fit algorithm” which simply accumulates words until they no
56 /// longer fit on the line.
58 /// As an example, using the first-fit algorithm to wrap the famous
59 /// Hamlet quote “To be, or not to be: that is the question” in a
60 /// narrow column with room for only 10 characters looks like this:
63 /// # use textwrap::{WrapAlgorithm::FirstFit, Options, wrap};
65 /// # let lines = wrap("To be, or not to be: that is the question",
66 /// # Options::new(10).wrap_algorithm(FirstFit));
67 /// # assert_eq!(lines.join("\n") + "\n", "\
76 /// Notice how the second to last line is quite narrow because
77 /// “question” was too large to fit? The greedy first-fit algorithm
78 /// doesn’t look ahead, so it has no other option than to put
79 /// “question” onto its own line.
81 /// With the optimal-fit wrapping algorithm, the previous lines are
82 /// shortened slightly in order to make the word “is” go into the
86 /// # #[cfg(feature = "smawk")] {
87 /// # use textwrap::{Options, WrapAlgorithm, wrap};
89 /// # let lines = wrap(
90 /// # "To be, or not to be: that is the question",
91 /// # Options::new(10).wrap_algorithm(WrapAlgorithm::new_optimal_fit())
93 /// # assert_eq!(lines.join("\n") + "\n", "\
102 /// Please see [`WrapAlgorithm`](crate::WrapAlgorithm) for details on
107 /// The returned iterator yields lines of type `Cow<'_, str>`. If
108 /// possible, the wrapped lines will borrow from the input string. As
109 /// an example, a hanging indentation, the first line can borrow from
110 /// the input, but the subsequent lines become owned strings:
113 /// use std::borrow::Cow::{Borrowed, Owned};
114 /// use textwrap::{wrap, Options};
116 /// let options = Options::new(15).subsequent_indent("....");
117 /// let lines = wrap("Wrapping text all day long.", &options);
118 /// let annotated = lines
120 /// .map(|line| match line {
121 /// Borrowed(text) => format!("[Borrowed] {}", text),
122 /// Owned(text) => format!("[Owned] {}", text),
124 /// .collect::<Vec<_>>();
128 /// "[Borrowed] Wrapping text",
129 /// "[Owned] ....all day",
130 /// "[Owned] ....long.",
135 /// ## Leading and Trailing Whitespace
137 /// As a rule, leading whitespace (indentation) is preserved and
138 /// trailing whitespace is discarded.
140 /// In more details, when wrapping words into lines, words are found
141 /// by splitting the input text on space characters. One or more
142 /// spaces (shown here as “␣”) are attached to the end of each word:
145 /// "Foo␣␣␣bar␣baz" -> ["Foo␣␣␣", "bar␣", "baz"]
148 /// These words are then put into lines. The interword whitespace is
149 /// preserved, unless the lines are wrapped so that the `"Foo␣␣␣"`
150 /// word falls at the end of a line:
153 /// use textwrap::wrap;
155 /// assert_eq!(wrap("Foo bar baz", 10), vec!["Foo bar", "baz"]);
156 /// assert_eq!(wrap("Foo bar baz", 8), vec!["Foo", "bar baz"]);
159 /// Notice how the trailing whitespace is removed in both case: in the
160 /// first example, `"bar␣"` becomes `"bar"` and in the second case
161 /// `"Foo␣␣␣"` becomes `"Foo"`.
163 /// Leading whitespace is preserved when the following word fits on
164 /// the first line. To understand this, consider how words are found
165 /// in a text with leading spaces:
168 /// "␣␣foo␣bar" -> ["␣␣", "foo␣", "bar"]
171 /// When put into lines, the indentation is preserved if `"foo"` fits
172 /// on the first line, otherwise you end up with an empty line:
175 /// use textwrap::wrap;
177 /// assert_eq!(wrap(" foo bar", 8), vec![" foo", "bar"]);
178 /// assert_eq!(wrap(" foo bar", 4), vec!["", "foo", "bar"]);
180 pub fn wrap<'a, Opt>(text: &str, width_or_options: Opt) -> Vec<Cow<'_, str>>
182 Opt: Into<Options<'a>>,
184 let options: Options = width_or_options.into();
185 let line_ending_str = options.line_ending.as_str();
187 let mut lines = Vec::new();
188 for line in text.split(line_ending_str) {
189 wrap_single_line(line, &options, &mut lines);
195 pub(crate) fn wrap_single_line<'a>(
197 options: &Options<'_>,
198 lines: &mut Vec<Cow<'a, str>>,
200 let indent = if lines.is_empty() {
201 options.initial_indent
203 options.subsequent_indent
205 if line.len() < options.width && indent.is_empty() {
206 lines.push(Cow::from(line.trim_end_matches(' ')));
208 wrap_single_line_slow_path(line, options, lines)
212 /// Wrap a single line of text.
214 /// This is taken when `line` is longer than `options.width`.
215 pub(crate) fn wrap_single_line_slow_path<'a>(
217 options: &Options<'_>,
218 lines: &mut Vec<Cow<'a, str>>,
220 let initial_width = options
222 .saturating_sub(display_width(options.initial_indent));
223 let subsequent_width = options
225 .saturating_sub(display_width(options.subsequent_indent));
226 let line_widths = [initial_width, subsequent_width];
228 let words = options.word_separator.find_words(line);
229 let split_words = split_words(words, &options.word_splitter);
230 let broken_words = if options.break_words {
231 let mut broken_words = break_words(split_words, line_widths[1]);
232 if !options.initial_indent.is_empty() {
233 // Without this, the first word will always go into the
234 // first line. However, since we break words based on the
235 // _second_ line width, it can be wrong to unconditionally
236 // put the first word onto the first line. An empty
237 // zero-width word fixed this.
238 broken_words.insert(0, Word::from(""));
242 split_words.collect::<Vec<_>>()
245 let wrapped_words = options.wrap_algorithm.wrap(&broken_words, &line_widths);
248 for words in wrapped_words {
249 let last_word = match words.last() {
251 lines.push(Cow::from(""));
257 // We assume here that all words are contiguous in `line`.
258 // That is, the sum of their lengths should add up to the
262 .map(|word| word.len() + word.whitespace.len())
264 - last_word.whitespace.len();
266 // The result is owned if we have indentation, otherwise we
267 // can simply borrow an empty string.
268 let mut result = if lines.is_empty() && !options.initial_indent.is_empty() {
269 Cow::Owned(options.initial_indent.to_owned())
270 } else if !lines.is_empty() && !options.subsequent_indent.is_empty() {
271 Cow::Owned(options.subsequent_indent.to_owned())
273 // We can use an empty string here since string
274 // concatenation for `Cow` preserves a borrowed value when
275 // either side is empty.
279 result += &line[idx..idx + len];
281 if !last_word.penalty.is_empty() {
282 result.to_mut().push_str(last_word.penalty);
287 // Advance by the length of `result`, plus the length of
288 // `last_word.whitespace` -- even if we had a penalty, we need
289 // to skip over the whitespace.
290 idx += len + last_word.whitespace.len();
297 use crate::{WordSeparator, WordSplitter, WrapAlgorithm};
299 #[cfg(feature = "hyphenation")]
300 use hyphenation::{Language, Load, Standard};
304 assert_eq!(wrap("foo", 10), vec!["foo"]);
309 assert_eq!(wrap("foo bar baz", 5), vec!["foo", "bar", "baz"]);
316 "To be, or not to be, that is the question.",
317 Options::new(10).wrap_algorithm(WrapAlgorithm::FirstFit)
319 vec!["To be, or", "not to be,", "that is", "the", "question."]
324 fn multiple_words_on_first_line() {
325 assert_eq!(wrap("foo bar baz", 10), vec!["foo bar", "baz"]);
330 assert_eq!(wrap("foo", 0), vec!["f", "o", "o"]);
335 assert_eq!(wrap("foo bar", 0), vec!["f", "o", "o", "b", "a", "r"]);
340 assert_eq!(wrap("foo bar", usize::MAX), vec!["foo bar"]);
342 let text = "Hello there! This is some English text. \
343 It should not be wrapped given the extents below.";
344 assert_eq!(wrap(text, usize::MAX), vec![text]);
348 fn leading_whitespace() {
349 assert_eq!(wrap(" foo bar", 6), vec![" foo", "bar"]);
353 fn leading_whitespace_empty_first_line() {
354 // If there is no space for the first word, the first line
355 // will be empty. This is because the string is split into
356 // words like [" ", "foobar ", "baz"], which puts "foobar " on
357 // the second line. We never output trailing whitespace
358 assert_eq!(wrap(" foobar baz", 6), vec!["", "foobar", "baz"]);
362 fn trailing_whitespace() {
363 // Whitespace is only significant inside a line. After a line
364 // gets too long and is broken, the first word starts in
365 // column zero and is not indented.
366 assert_eq!(wrap("foo bar baz ", 5), vec!["foo", "bar", "baz"]);
371 // We did not reset the in_whitespace flag correctly and did
372 // not handle single-character words after a line break.
374 wrap("aaabbbccc x yyyzzzwww", 9),
375 vec!["aaabbbccc", "x", "yyyzzzwww"]
381 // The dash is an em-dash which takes up four bytes. We used
382 // to panic since we tried to index into the character.
383 let options = Options::new(1).word_separator(WordSeparator::AsciiSpace);
384 assert_eq!(wrap("x – x", options), vec!["x", "–", "x"]);
388 fn wide_character_handling() {
389 assert_eq!(wrap("Hello, World!", 15), vec!["Hello, World!"]);
393 Options::new(15).word_separator(WordSeparator::AsciiSpace)
395 vec!["Hello,", "World!"]
398 // Wide characters are allowed to break if the
399 // unicode-linebreak feature is enabled.
400 #[cfg(feature = "unicode-linebreak")]
404 Options::new(15).word_separator(WordSeparator::UnicodeBreakProperties),
406 vec!["Hello, W", "orld!"]
411 fn indent_empty_line() {
412 // Previously, indentation was not applied to empty lines.
413 // However, this is somewhat inconsistent and undesirable if
414 // the indentation is something like a border ("| ") which you
415 // want to apply to all lines, empty or not.
416 let options = Options::new(10).initial_indent("!!!");
417 assert_eq!(wrap("", &options), vec!["!!!"]);
421 fn indent_single_line() {
422 let options = Options::new(10).initial_indent(">>>"); // No trailing space
423 assert_eq!(wrap("foo", &options), vec![">>>foo"]);
427 fn indent_first_emoji() {
428 let options = Options::new(10).initial_indent("👉👉");
430 wrap("x x x x x x x x x x x x x", &options),
431 vec!["👉👉x x x", "x x x x x", "x x x x x"]
436 fn indent_multiple_lines() {
437 let options = Options::new(6).initial_indent("* ").subsequent_indent(" ");
439 wrap("foo bar baz", &options),
440 vec!["* foo", " bar", " baz"]
445 fn only_initial_indent_multiple_lines() {
446 let options = Options::new(10).initial_indent(" ");
447 assert_eq!(wrap("foo\nbar\nbaz", &options), vec![" foo", "bar", "baz"]);
451 fn only_subsequent_indent_multiple_lines() {
452 let options = Options::new(10).subsequent_indent(" ");
454 wrap("foo\nbar\nbaz", &options),
455 vec!["foo", " bar", " baz"]
460 fn indent_break_words() {
461 let options = Options::new(5).initial_indent("* ").subsequent_indent(" ");
462 assert_eq!(wrap("foobarbaz", &options), vec!["* foo", " bar", " baz"]);
466 fn initial_indent_break_words() {
467 // This is a corner-case showing how the long word is broken
468 // according to the width of the subsequent lines. The first
469 // fragment of the word no longer fits on the first line,
470 // which ends up being pure indentation.
471 let options = Options::new(5).initial_indent("-->");
472 assert_eq!(wrap("foobarbaz", &options), vec!["-->", "fooba", "rbaz"]);
477 assert_eq!(wrap("foo-bar", 5), vec!["foo-", "bar"]);
481 fn trailing_hyphen() {
482 let options = Options::new(5).break_words(false);
483 assert_eq!(wrap("foobar-", &options), vec!["foobar-"]);
487 fn multiple_hyphens() {
488 assert_eq!(wrap("foo-bar-baz", 5), vec!["foo-", "bar-", "baz"]);
493 let options = Options::new(5).break_words(false);
495 wrap("The --foo-bar flag.", &options),
496 vec!["The", "--foo-", "bar", "flag."]
501 fn repeated_hyphens() {
502 let options = Options::new(4).break_words(false);
503 assert_eq!(wrap("foo--bar", &options), vec!["foo--bar"]);
507 fn hyphens_alphanumeric() {
508 assert_eq!(wrap("Na2-CH4", 5), vec!["Na2-", "CH4"]);
512 fn hyphens_non_alphanumeric() {
513 let options = Options::new(5).break_words(false);
514 assert_eq!(wrap("foo(-)bar", &options), vec!["foo(-)bar"]);
518 fn multiple_splits() {
519 assert_eq!(wrap("foo-bar-baz", 9), vec!["foo-bar-", "baz"]);
524 let options = Options::new(5).break_words(false);
525 assert_eq!(wrap("foobar-baz", &options), vec!["foobar-", "baz"]);
529 fn multiple_unbroken_words_issue_193() {
530 let options = Options::new(3).break_words(false);
532 wrap("small large tiny", &options),
533 vec!["small", "large", "tiny"]
536 wrap("small large tiny", &options),
537 vec!["small", "large", "tiny"]
542 fn very_narrow_lines_issue_193() {
543 let options = Options::new(1).break_words(false);
544 assert_eq!(wrap("fooo x y", &options), vec!["fooo", "x", "y"]);
545 assert_eq!(wrap("fooo x y", &options), vec!["fooo", "x", "y"]);
549 fn simple_hyphens() {
550 let options = Options::new(8).word_splitter(WordSplitter::HyphenSplitter);
551 assert_eq!(wrap("foo bar-baz", &options), vec!["foo bar-", "baz"]);
555 fn no_hyphenation() {
556 let options = Options::new(8).word_splitter(WordSplitter::NoHyphenation);
557 assert_eq!(wrap("foo bar-baz", &options), vec!["foo", "bar-baz"]);
561 #[cfg(feature = "hyphenation")]
562 fn auto_hyphenation_double_hyphenation() {
563 let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap();
564 let options = Options::new(10);
566 wrap("Internationalization", &options),
567 vec!["Internatio", "nalization"]
570 let options = Options::new(10).word_splitter(WordSplitter::Hyphenation(dictionary));
572 wrap("Internationalization", &options),
573 vec!["Interna-", "tionaliza-", "tion"]
578 #[cfg(feature = "hyphenation")]
579 fn auto_hyphenation_issue_158() {
580 let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap();
581 let options = Options::new(10);
583 wrap("participation is the key to success", &options),
584 vec!["participat", "ion is", "the key to", "success"]
587 let options = Options::new(10).word_splitter(WordSplitter::Hyphenation(dictionary));
589 wrap("participation is the key to success", &options),
590 vec!["partici-", "pation is", "the key to", "success"]
595 #[cfg(feature = "hyphenation")]
596 fn split_len_hyphenation() {
597 // Test that hyphenation takes the width of the whitespace
599 let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap();
600 let options = Options::new(15).word_splitter(WordSplitter::Hyphenation(dictionary));
602 wrap("garbage collection", &options),
603 vec!["garbage col-", "lection"]
608 #[cfg(feature = "hyphenation")]
609 fn borrowed_lines() {
610 // Lines that end with an extra hyphen are owned, the final
612 use std::borrow::Cow::{Borrowed, Owned};
613 let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap();
614 let options = Options::new(10).word_splitter(WordSplitter::Hyphenation(dictionary));
615 let lines = wrap("Internationalization", &options);
616 assert_eq!(lines, vec!["Interna-", "tionaliza-", "tion"]);
617 if let Borrowed(s) = lines[0] {
618 assert!(false, "should not have been borrowed: {:?}", s);
620 if let Borrowed(s) = lines[1] {
621 assert!(false, "should not have been borrowed: {:?}", s);
623 if let Owned(ref s) = lines[2] {
624 assert!(false, "should not have been owned: {:?}", s);
629 #[cfg(feature = "hyphenation")]
630 fn auto_hyphenation_with_hyphen() {
631 let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap();
632 let options = Options::new(8).break_words(false);
634 wrap("over-caffinated", &options),
635 vec!["over-", "caffinated"]
638 let options = options.word_splitter(WordSplitter::Hyphenation(dictionary));
640 wrap("over-caffinated", &options),
641 vec!["over-", "caffi-", "nated"]
647 assert_eq!(wrap("foobarbaz", 3), vec!["foo", "bar", "baz"]);
651 fn break_words_wide_characters() {
652 // Even the poor man's version of `ch_width` counts these
653 // characters as wide.
654 let options = Options::new(5).word_separator(WordSeparator::AsciiSpace);
655 assert_eq!(wrap("Hello", options), vec!["He", "ll", "o"]);
659 fn break_words_zero_width() {
660 assert_eq!(wrap("foobar", 0), vec!["f", "o", "o", "b", "a", "r"]);
664 fn break_long_first_word() {
665 assert_eq!(wrap("testx y", 4), vec!["test", "x y"]);
669 fn wrap_preserves_line_breaks_trims_whitespace() {
670 assert_eq!(wrap(" ", 80), vec![""]);
671 assert_eq!(wrap(" \n ", 80), vec!["", ""]);
672 assert_eq!(wrap(" \n \n \n ", 80), vec!["", "", "", ""]);
676 fn wrap_colored_text() {
677 // The words are much longer than 6 bytes, but they remain
678 // intact after filling the text.
679 let green_hello = "\u{1b}[0m\u{1b}[32mHello\u{1b}[0m";
680 let blue_world = "\u{1b}[0m\u{1b}[34mWorld!\u{1b}[0m";
682 wrap(&format!("{} {}", green_hello, blue_world), 6),
683 vec![green_hello, blue_world],