2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2011,2012,2013,2014,2015, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
37 * Declares common string utility and formatting routines.
39 * \author Teemu Murtola <teemu.murtola@gmail.com>
41 * \ingroup module_utility
43 #ifndef GMX_UTILITY_STRINGUTIL_H
44 #define GMX_UTILITY_STRINGUTIL_H
54 //! \addtogroup module_utility
58 * Tests whether a string is null or empty.
62 static inline bool isNullOrEmpty(const char *str
)
64 return str
== NULL
|| str
[0] == '\0';
68 * Tests whether a string starts with another string.
70 * \param[in] str String to process.
71 * \param[in] prefix Prefix to find.
72 * \returns true if \p str starts with \p prefix.
74 * Returns true if \p prefix is empty.
77 static inline bool startsWith(const std::string
&str
, const std::string
&prefix
)
79 return str
.compare(0, prefix
.length(), prefix
) == 0;
81 //! \copydoc startsWith(const std::string &, const std::string &)
82 static inline bool startsWith(const char *str
, const char *prefix
)
84 return std::strncmp(str
, prefix
, std::strlen(prefix
)) == 0;
88 * Tests whether a string ends with another string.
90 * \param[in] str String to process.
91 * \param[in] suffix Suffix to find.
92 * \returns true if \p str ends with \p suffix.
94 * Returns true if \p suffix is NULL or empty.
97 bool endsWith(const char *str
, const char *suffix
);
98 //! \copydoc endsWith(const char *, const char *)
99 static inline bool endsWith(const std::string
&str
, const char *suffix
)
101 return endsWith(str
.c_str(), suffix
);
105 * Tests whether a string contains another as a substring.
107 * \param[in] str String to process.
108 * \param[in] substr Substring to find.
109 * \returns true if \p str contains \p substr.
113 static inline bool contains(const std::string
&str
, const char *substr
)
115 return str
.find(substr
) != std::string::npos
;
118 /*!\brief Returns number of space-separated words in zero-terminated char ptr
120 * \param s Character pointer to zero-terminated, which will not be changed.
122 * \returns number of words in string.
124 * \note This routine is mainly meant to support legacy code in GROMACS. For
125 * new source you should try hard to use C++ string objects instead.
128 countWords(const char *s
);
130 /*!\brief Returns the number of space-separated words in a string object
132 * \param str Reference to string object, which will not be changed.
134 * \returns number of words in string.
137 countWords(const std::string
&str
);
140 * Removes a suffix from a string.
142 * \param[in] str String to process.
143 * \param[in] suffix Suffix to remove.
144 * \returns \p str with \p suffix removed, or \p str unmodified if it does
145 * not end with \p suffix.
146 * \throws std::bad_alloc if out of memory.
148 * Returns \p str if \p suffix is NULL or empty.
150 std::string
stripSuffixIfPresent(const std::string
&str
, const char *suffix
);
152 * Removes leading and trailing whitespace from a string.
154 * \param[in] str String to process.
155 * \returns \p str with leading and trailing whitespaces removed.
156 * \throws std::bad_alloc if out of memory.
158 std::string
stripString(const std::string
&str
);
161 * Formats a string (snprintf() wrapper).
163 * \throws std::bad_alloc if out of memory.
165 * This function works like sprintf(), except that it returns an std::string
166 * instead of requiring a preallocated buffer. Arbitrary length output is
169 std::string
formatString(const char *fmt
, ...);
171 /*! \brief Function object that wraps a call to formatString() that
172 * expects a single conversion argument, for use with algorithms. */
173 class StringFormatter
176 /*! \brief Constructor
178 * \param[in] format The printf-style format string that will
179 * be applied to convert values of type T to
180 * string. Exactly one argument to the conversion
181 * specification(s) in `format` is supported. */
182 explicit StringFormatter(const char *format
) : format_(format
)
186 //! Implements the formatting functionality
187 template <typename T
>
188 std::string
operator()(const T
&value
) const
190 return formatString(format_
, value
);
194 //! Format string to use
198 /*! \brief Function object to implement the same interface as
199 * `StringFormatter` to use with strings that should not be formatted
201 class IdentityFormatter
204 //! Implements the formatting non-functionality
205 std::string
operator()(const std::string
&value
) const
211 /*! \brief Formats all the range as strings, and then joins them with
212 * a separator in between.
214 * \param[in] begin Iterator the beginning of the range to join.
215 * \param[in] end Iterator the end of the range to join.
216 * \param[in] separator String to put in between the joined strings.
217 * \param[in] formatter Function object to format the objects in
218 * `container` as strings
219 * \returns All objects in the range from `begin` to `end` formatted
220 * as strings and concatenated with `separator` between each pair.
221 * \throws std::bad_alloc if out of memory.
223 template <typename InputIterator
, typename FormatterType
>
224 std::string
formatAndJoin(InputIterator begin
, InputIterator end
, const char *separator
, const FormatterType
&formatter
)
227 const char *currentSeparator
= "";
228 for (InputIterator i
= begin
; i
!= end
; ++i
)
230 result
.append(currentSeparator
);
231 result
.append(formatter(*i
));
232 currentSeparator
= separator
;
237 /*! \brief Formats all elements of the container as strings, and then
238 * joins them with a separator in between.
240 * \param[in] container Objects to join.
241 * \param[in] separator String to put in between the joined strings.
242 * \param[in] formatter Function object to format the objects in
243 * `container` as strings
244 * \returns All objects from `container` formatted as strings and
245 * concatenated with `separator` between each pair.
246 * \throws std::bad_alloc if out of memory.
248 template <typename ContainerType
, typename FormatterType
>
249 std::string
formatAndJoin(const ContainerType
&container
, const char *separator
, const FormatterType
&formatter
)
251 return formatAndJoin(container
.begin(), container
.end(), separator
, formatter
);
255 * Joins strings from a range with a separator in between.
257 * \param[in] begin Iterator the beginning of the range to join.
258 * \param[in] end Iterator the end of the range to join.
259 * \param[in] separator String to put in between the joined strings.
260 * \returns All strings from (`begin`, `end`) concatenated with `separator`
262 * \throws std::bad_alloc if out of memory.
264 template <typename InputIterator
>
265 std::string
joinStrings(InputIterator begin
, InputIterator end
,
266 const char *separator
)
268 return formatAndJoin(begin
, end
, separator
, IdentityFormatter());
272 * Joins strings from a container with a separator in between.
274 * \param[in] container Strings to join.
275 * \param[in] separator String to put in between the joined strings.
276 * \returns All strings from `container` concatenated with `separator`
278 * \throws std::bad_alloc if out of memory.
280 template <typename ContainerType
>
281 std::string
joinStrings(const ContainerType
&container
, const char *separator
)
283 return joinStrings(container
.begin(), container
.end(), separator
);
287 * Joins strings from an array with a separator in between.
289 * \param[in] array Array of strings to join.
290 * \param[in] separator String to put in between the joined strings.
291 * \tparam count Deduced number of elements in \p array.
292 * \returns All strings from `aray` concatenated with `separator`
294 * \throws std::bad_alloc if out of memory.
296 template <size_t count
>
297 std::string
joinStrings(const char *const (&array
)[count
], const char *separator
)
299 return joinStrings(array
, array
+ count
, separator
);
303 * Converts a boolean to a "true"/"false" string.
307 static inline const char *boolToString(bool value
)
309 return value
? "true" : "false";
313 * Splits a string to whitespace separated tokens.
315 * \param[in] str String to process.
316 * \returns \p str split into tokens at each whitespace sequence.
317 * \throws std::bad_alloc if out of memory.
319 * This function works like `split` in Python, i.e., leading and trailing
320 * whitespace is ignored, and consecutive whitespaces are treated as a single
323 std::vector
<std::string
> splitString(const std::string
&str
);
326 * Replace all occurrences of a string with another string.
328 * \param[in] input Input string.
329 * \param[in] from String to find.
330 * \param[in] to String to use to replace \p from.
331 * \returns Copy of \p input with all occurrences of \p from replaced with \p to.
332 * \throws std::bad_alloc if out of memory.
334 * The replacement is greedy and not recursive: starting from the beginning of
335 * \p input, each match of \p from is replaced with \p to, and the search for
336 * the next match begins after the end of the previous match.
338 * Compexity is O(N), where N is length of output.
340 * \see replaceAllWords()
342 std::string
replaceAll(const std::string
&input
,
343 const char *from
, const char *to
);
344 //! \copydoc replaceAll(const std::string &, const char *, const char *)
345 std::string
replaceAll(const std::string
&input
,
346 const std::string
&from
, const std::string
&to
);
348 * Replace whole words with others.
350 * \param[in] input Input string.
351 * \param[in] from String to find.
352 * \param[in] to String to use to replace \p from.
353 * \returns Copy of \p input with all \p from words replaced with \p to.
354 * \throws std::bad_alloc if out of memory.
356 * Works as replaceAll(), but a match is only considered if it is delimited by
357 * non-alphanumeric characters.
361 std::string
replaceAllWords(const std::string
&input
,
362 const char *from
, const char *to
);
363 //! \copydoc replaceAllWords(const std::string &, const char *, const char *)
364 std::string
replaceAllWords(const std::string
&input
,
365 const std::string
&from
, const std::string
&to
);
367 class TextLineWrapper
;
370 * Stores settings for line wrapping.
372 * Methods in this class do not throw.
374 * \see TextLineWrapper
378 class TextLineWrapperSettings
382 * Initializes default wrapper settings.
384 * Default settings are:
385 * - No maximum line width (only explicit line breaks).
387 * - No continuation characters.
388 * - Do not keep final spaces in input strings.
390 TextLineWrapperSettings();
393 * Sets the maximum length for output lines.
395 * \param[in] length Maximum length for the lines after wrapping.
397 * If this method is not called, or is called with zero \p length, the
398 * wrapper has no maximum length (only wraps at explicit line breaks).
400 void setLineLength(int length
) { maxLength_
= length
; }
402 * Sets the indentation for output lines.
404 * \param[in] indent Number of spaces to add for indentation.
406 * If this method is not called, the wrapper does not add indentation.
408 void setIndent(int indent
) { indent_
= indent
; }
410 * Sets the indentation for first output line after a line break.
412 * \param[in] indent Number of spaces to add for indentation.
414 * If this method is not called, or called with \p indent equal to -1,
415 * the value set with setIndent() is used.
417 void setFirstLineIndent(int indent
) { firstLineIndent_
= indent
; }
419 * Sets whether final spaces in input should be kept.
421 * \param[in] bKeep Whether to keep spaces at the end of the input.
423 * This means that wrapping a string that ends in spaces also keeps
424 * those spaces in the output. This allows using the wrapper for
425 * partial lines where the initial part of the line may end in a space.
426 * By default, all trailing whitespace is removed. Note that this
427 * option does not affect spaces before an explicit newline: those are
430 void setKeepFinalSpaces(bool bKeep
) { bKeepFinalSpaces_
= bKeep
; }
432 * Sets a continuation marker for wrapped lines.
434 * \param[in] continuationChar Character to use to mark continuation
437 * If set to non-zero character code, this character is added at the
438 * end of each line where a line break is added by TextLineWrapper
439 * (but not after lines produced by explicit line breaks).
440 * The default (\c '\0') is to not add continuation markers.
442 * Note that currently, the continuation char may cause the output line
443 * length to exceed the value set with setLineLength() by at most two
446 void setContinuationChar(char continuationChar
)
448 continuationChar_
= continuationChar
;
451 //! Returns the maximum length set with setLineLength().
452 int lineLength() const { return maxLength_
; }
453 //! Returns the indentation set with setIndent().
454 int indent() const { return indent_
; }
456 * Returns the indentation set with setFirstLineIndent().
458 * If setFirstLineIndent() has not been called or has been called with
459 * -1, indent() is returned.
461 int firstLineIndent() const
463 return (firstLineIndent_
>= 0 ? firstLineIndent_
: indent_
);
467 //! Maximum length of output lines, or <= 0 if no limit.
469 //! Number of spaces to indent each output line with.
472 * Number of spaces to indent the first line after a newline.
474 * If -1, \a indent_ is used.
476 int firstLineIndent_
;
477 //! Whether to keep spaces at end of input.
478 bool bKeepFinalSpaces_
;
479 //! If not \c '\0', mark each wrapping point with this character.
480 char continuationChar_
;
482 //! Needed to access the members.
483 friend class TextLineWrapper
;
487 * Wraps lines to a predefined length.
489 * This utility class wraps lines at word breaks to produce lines that are not
490 * longer than a predefined length. Explicit newlines ('\\n') are preserved.
491 * Only space is considered a word separator. If a single word exceeds the
492 * maximum line length, it is still printed on a single line.
493 * Extra whitespace is stripped from the end of produced lines.
494 * Other options on the wrapping, such as the line length or indentation,
495 * can be changed using a TextLineWrapperSettings object.
497 * Two interfaces to do the wrapping are provided:
498 * -# High-level interface using either wrapToString() (produces a single
499 * string with embedded newlines) or wrapToVector() (produces a vector of
500 * strings with each line as one element).
501 * These methods operate on std::string and wrap the entire input string.
502 * -# Low-level interface using findNextLine() and formatLine().
503 * findNextLine() operates either on a C string or an std::string, and does
504 * not do any memory allocation (so it does not throw). It finds the next
505 * line to be wrapped, considering the wrapping settings.
506 * formatLine() does whitespace operations on the line found by
507 * findNextLine() and returns an std::string.
508 * These methods allow custom wrapping implementation to either avoid
509 * exceptions or to wrap only a part of the input string.
513 gmx::TextLineWrapper wrapper;
514 wrapper.settings().setLineLength(78);
515 printf("%s\n", wrapper.wrapToString(textToWrap).c_str());
520 class TextLineWrapper
524 * Constructs a new line wrapper with default settings.
532 * Constructs a new line wrapper with given settings.
534 * \param[in] settings Wrapping settings.
538 explicit TextLineWrapper(const TextLineWrapperSettings
&settings
)
539 : settings_(settings
)
544 * Provides access to settings of this wrapper.
546 * \returns The settings object for this wrapper.
548 * The returned object can be used to modify settings for the wrapper.
549 * All subsequent calls to wrapToString() and wrapToVector() use the
554 TextLineWrapperSettings
&settings() { return settings_
; }
556 //! Returns true if the wrapper would not modify the input string.
557 bool isTrivial() const;
560 * Finds the next line to be wrapped.
562 * \param[in] input String to wrap.
563 * \param[in] lineStart Index of first character of the line to find.
564 * \returns Index of first character of the next line.
566 * If this is the last line, returns the length of \p input.
567 * In determining the length of the returned line, this function
568 * considers the maximum line length, leaving space for indentation,
569 * and also whitespace stripping behavior.
570 * Thus, the line returned may be longer than the maximum line length
571 * if it has leading and/or trailing space.
572 * When wrapping a line on a space (not on an explicit line break),
573 * the returned index is always on a non-whitespace character after the
576 * To iterate over lines in a string, use the following code:
578 gmx::TextLineWrapper wrapper;
579 // <set desired wrapping settings>
580 size_t lineStart = 0;
581 size_t length = input.length();
582 while (lineStart < length)
584 size_t nextLineStart = wrapper.findNextLine(input, lineStart);
585 std::string line = wrapper.formatLine(input, lineStart, nextLineStart));
586 // <do something with the line>
587 lineStart = nextLineStart;
594 size_t findNextLine(const char *input
, size_t lineStart
) const;
595 //! \copydoc findNextLine(const char *, size_t)const
596 size_t findNextLine(const std::string
&input
, size_t lineStart
) const;
598 * Formats a single line for output according to wrapping settings.
600 * \param[in] input Input string.
601 * \param[in] lineStart Index of first character of the line to format.
602 * \param[in] lineEnd Index of first character of the next line.
603 * \returns The line with leading and/or trailing whitespace removed
604 * and indentation applied.
605 * \throws std::bad_alloc if out of memory.
607 * Intended to be used on the lines found by findNextLine().
608 * When used with the lines returned from findNextLine(), the returned
609 * line conforms to the wrapper settings.
610 * Trailing whitespace is always stripped (including any newlines,
611 * i.e., the return value does not contain a newline).
613 std::string
formatLine(const std::string
&input
,
614 size_t lineStart
, size_t lineEnd
) const;
617 * Formats a string, producing a single string with all the lines.
619 * \param[in] input String to wrap.
620 * \returns \p input with added newlines such that maximum line
621 * length is not exceeded.
622 * \throws std::bad_alloc if out of memory.
624 * Newlines in the input are preserved, including terminal newlines.
625 * Note that if the input does not contain a terminal newline, the
626 * output does not either.
628 std::string
wrapToString(const std::string
&input
) const;
630 * Formats a string, producing a vector with all the lines.
632 * \param[in] input String to wrap.
633 * \returns \p input split into lines such that maximum line length
635 * \throws std::bad_alloc if out of memory.
637 * The strings in the returned vector do not contain newlines at the
639 * Note that a single terminal newline does not affect the output:
640 * "line\\n" and "line" both produce the same output (but "line\\n\\n"
641 * produces two lines, the second of which is empty).
643 std::vector
<std::string
> wrapToVector(const std::string
&input
) const;
646 TextLineWrapperSettings settings_
;