3 * This source code is part of
7 * GROningen MAchine for Chemical Simulations
9 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
10 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
11 * Copyright (c) 2001-2009, The GROMACS development team,
12 * check out http://www.gromacs.org for more information.
14 * This program is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU General Public License
16 * as published by the Free Software Foundation; either version 2
17 * of the License, or (at your option) any later version.
19 * If you want to redistribute modifications, please consider that
20 * scientific software is very special. Version control is crucial -
21 * bugs must be traceable. We will be happy to consider code for
22 * inclusion in the official distribution, but derived work must not
23 * be called official GROMACS. Details are found in the README & COPYING
24 * files - if they are missing, get the official version at www.gromacs.org.
26 * To help us fund GROMACS development, we humbly ask that you cite
27 * the papers on the package - you can find them in the top README file.
29 * For more info, check our website at http://www.gromacs.org
33 * Declares common string utility and formatting routines.
35 * \author Teemu Murtola <teemu.murtola@cbr.su.se>
37 * \ingroup module_utility
39 #ifndef GMX_UTILITY_STRINGUTIL_H
40 #define GMX_UTILITY_STRINGUTIL_H
51 * Tests whether a string starts with another string.
53 * \param[in] str String to process.
54 * \param[in] prefix Prefix to find.
55 * \returns true if \p str starts with \p prefix.
57 * Returns true if \p prefix is empty.
62 bool inline startsWith(const std::string
&str
, const std::string
&prefix
)
64 return str
.compare(0, prefix
.length(), prefix
) == 0;
66 //! \copydoc startsWith(const std::string &, const std::string &)
67 bool inline startsWith(const char *str
, const char *prefix
)
69 return std::strncmp(str
, prefix
, std::strlen(prefix
)) == 0;
73 * Tests whether a string ends with another string.
75 * \param[in] str String to process.
76 * \param[in] suffix Suffix to find.
77 * \returns true if \p str ends with \p suffix.
79 * Returns true if \p suffix is NULL or empty.
84 bool endsWith(const std::string
&str
, const char *suffix
);
87 * Removes a suffix from a string.
89 * \param[in] str String to process.
90 * \param[in] suffix Suffix to remove.
91 * \returns \p str with \p suffix removed, or \p str unmodified if it does
92 * not end with \p suffix.
93 * \throws std::bad_alloc if out of memory.
95 * Returns \p str if \p suffix is NULL or empty.
99 std::string
stripSuffixIfPresent(const std::string
&str
, const char *suffix
);
102 * Format a string (snprintf() wrapper).
104 * \throws std::bad_alloc if out of memory.
106 * This function works like sprintf(), except that it returns an std::string
107 * instead of requiring a preallocated buffer. Arbitrary length output is
112 std::string
formatString(const char *fmt
, ...);
115 * Joins strings in an array to a single string.
117 * \param[in] sarray Array of strings to concatenate.
118 * \param[in] count Number of elements in \p sarray to concatenate.
119 * \returns All strings in \p sarray joined, ensuring at least one space
120 * between the strings.
121 * \throws std::bad_alloc if out of memory.
123 * The strings in the \p sarray array are concatenated, adding a single space
124 * between the strings if there is no whitespace in the end of a string.
125 * Terminal whitespace is removed.
129 std::string
concatenateStrings(const char * const *sarray
, size_t count
);
131 * Convenience overload for joining strings in a C array (static data).
133 * \param[in] sarray Array of strings to concatenate.
134 * \tparam count Deduced number of elements in \p sarray.
135 * \returns All strings in \p sarray joined, ensuring at least one space
136 * between the strings.
137 * \throws std::bad_alloc if out of memory.
139 * \see concatenateStrings(const char * const *, size_t)
143 template <size_t count
>
144 std::string
concatenateStrings(const char * const (&sarray
)[count
])
146 return concatenateStrings(sarray
, count
);
150 * Replace all occurrences of a string with another string.
152 * \param[in] input Input string.
153 * \param[in] from String to find.
154 * \param[in] to String to use to replace \p from.
155 * \returns \p input with all occurrences of \p from replaced with \p to.
156 * \throws std::bad_alloc if out of memory.
158 * The replacement is greedy and not recursive: starting from the beginning of
159 * \p input, each match of \p from is replaced with \p to, and the search for
160 * the next match begins after the end of the previous match.
162 * Compexity is O(N), where N is length of output.
164 * \see replaceAllWords()
168 std::string
replaceAll(const std::string
&input
,
169 const char *from
, const char *to
);
171 * Replace whole words with others.
173 * \param[in] input Input string.
174 * \param[in] from String to find.
175 * \param[in] to String to use to replace \p from.
176 * \returns \p input with all \p from words replaced with \p to.
177 * \throws std::bad_alloc if out of memory.
179 * Works as replaceAll(), but a match is only considered if it is delimited by
180 * non-alphanumeric characters.
186 std::string
replaceAllWords(const std::string
&input
,
187 const char *from
, const char *to
);
189 class TextLineWrapper
;
192 * Stores settings for line wrapping.
194 * Methods in this class do not throw.
196 * \see TextLineWrapper
199 * \ingroup module_utility
201 class TextLineWrapperSettings
205 * Initializes default wrapper settings.
207 * Default settings are:
208 * - No maximum line width (only explicit line breaks).
210 * - No continuation characters.
211 * - Ignore whitespace after an explicit newline.
213 TextLineWrapperSettings();
216 * Sets the maximum length for output lines.
218 * \param[in] length Maximum length for the lines after wrapping.
220 * If this method is not called, or is called with zero \p length, the
221 * wrapper has no maximum length (only wraps at explicit line breaks).
223 void setLineLength(int length
) { maxLength_
= length
; }
225 * Sets the indentation for output lines.
227 * \param[in] indent Number of spaces to add for indentation.
229 * If this method is not called, the wrapper does not add indentation.
231 void setIndent(int indent
) { indent_
= indent
; }
233 * Sets the indentation for first output line after a line break.
235 * \param[in] indent Number of spaces to add for indentation.
237 * If this method is not called, or called with \p indent equal to -1,
238 * the value set with setIndent() is used.
240 void setFirstLineIndent(int indent
) { firstLineIndent_
= indent
; }
242 * Sets whether to remove spaces after an explicit newline.
244 * \param[in] bStrip If true, spaces after newline are ignored.
246 * If not removed, the space is added to the indentation set with
248 * The default is to strip such whitespace.
250 void setStripLeadingWhitespace(bool bStrip
)
252 bStripLeadingWhitespace_
= bStrip
;
255 * Sets a continuation marker for wrapped lines.
257 * \param[in] continuationChar Character to use to mark continuation
260 * If set to non-zero character code, this character is added at the
261 * end of each line where a line break is added by TextLineWrapper
262 * (but not after lines produced by explicit line breaks).
263 * The default (\c '\0') is to not add continuation markers.
265 * Note that currently, the continuation char may cause the output line
266 * length to exceed the value set with setLineLength() by at most two
269 void setContinuationChar(char continuationChar
)
271 continuationChar_
= continuationChar
;
274 //! Returns the maximum length set with setLineLength().
275 int lineLength() const { return maxLength_
; }
276 //! Returns the indentation set with setIndent().
277 int indent() const { return indent_
; }
279 * Returns the indentation set with setFirstLineIndent().
281 * If setFirstLineIndent() has not been called or has been called with
282 * -1, indent() is returned.
284 int firstLineIndent() const
286 return (firstLineIndent_
>= 0 ? firstLineIndent_
: indent_
);
290 //! Maximum length of output lines, or <= 0 if no limit.
292 //! Number of spaces to indent each output line with.
295 * Number of spaces to indent the first line after a newline.
297 * If -1, \a indent_ is used.
299 int firstLineIndent_
;
300 //! Whether to ignore or preserve space after a newline.
301 bool bStripLeadingWhitespace_
;
302 //! If not \c '\0', mark each wrapping point with this character.
303 char continuationChar_
;
305 //! Needed to access the members.
306 friend class TextLineWrapper
;
310 * Wraps lines to a predefined length.
312 * This utility class wraps lines at word breaks to produce lines that are not
313 * longer than a predefined length. Explicit newlines ('\\n') are preserved.
314 * Only space is considered a word separator. If a single word exceeds the
315 * maximum line length, it is still printed on a single line.
316 * Extra whitespace is stripped from the end of produced lines.
317 * Other options on the wrapping, such as the line length or indentation,
318 * can be changed using a TextLineWrapperSettings object.
320 * Two interfaces to do the wrapping are provided:
321 * -# High-level interface using either wrapToString() (produces a single
322 * string with embedded newlines) or wrapToVector() (produces a vector of
323 * strings with each line as one element).
324 * These methods operate on std::string and wrap the entire input string.
325 * -# Low-level interface using findNextLine() and formatLine().
326 * findNextLine() operates either on a C string or an std::string, and does
327 * not do any memory allocation (so it does not throw). It finds the next
328 * line to be wrapped, considering the wrapping settings.
329 * formatLine() does whitespace operations on the line found by
330 * findNextLine() and returns an std::string.
331 * These methods allow custom wrapping implementation to either avoid
332 * exceptions or to wrap only a part of the input string.
336 gmx::TextLineWrapper wrapper;
337 wrapper.settings().setLineLength(78);
338 printf("%s\n", wrapper.wrapToString(textToWrap).c_str());
342 * \ingroup module_utility
344 class TextLineWrapper
348 * Constructs a new line wrapper with default settings.
356 * Constructs a new line wrapper with given settings.
358 * \param[in] settings Wrapping settings.
362 explicit TextLineWrapper(const TextLineWrapperSettings
&settings
)
363 : settings_(settings
)
368 * Provides access to settings of this wrapper.
370 * \returns The settings object for this wrapper.
372 * The returned object can be used to modify settings for the wrapper.
373 * All subsequent calls to wrapToString() and wrapToVector() use the
378 TextLineWrapperSettings
&settings() { return settings_
; }
381 * Finds the next line to be wrapped.
383 * \param[in] input String to wrap.
384 * \param[in] lineStart Index of first character of the line to find.
385 * \returns Index of first character of the next line.
387 * If this is the last line, returns the length of \p input.
388 * In determining the length of the returned line, this function
389 * considers the maximum line length, leaving space for indentation,
390 * and also whitespace stripping behavior.
391 * Thus, the line returned may be longer than the maximum line length
392 * if it has leading and/or trailing space.
393 * When wrapping a line on a space (not on an explicit line break),
394 * the returned index is always on a non-whitespace character after the
397 * To iterate over lines in a string, use the following code:
399 gmx::TextLineWrapper wrapper;
400 // <set desired wrapping settings>
401 size_t lineStart = 0;
402 size_t length = input.length();
403 while (lineStart < length)
405 size_t nextLineStart = wrapper.findNextLine(input, lineStart);
406 std::string line = wrapper.formatLine(input, lineStart, nextLineStart));
407 // <do something with the line>
408 lineStart = nextLineStart;
415 size_t findNextLine(const char *input
, size_t lineStart
) const;
416 //! \copydoc findNextLine(const char *, size_t) const
417 size_t findNextLine(const std::string
&input
, size_t lineStart
) const;
419 * Formats a single line for output according to wrapping settings.
421 * \param[in] input Input string.
422 * \param[in] lineStart Index of first character of the line to format.
423 * \param[in] lineEnd Index of first character of the next line.
424 * \returns The line with leading and/or trailing whitespace removed
425 * and indentation applied.
426 * \throws std::bad_alloc if out of memory.
428 * Intended to be used on the lines found by findNextLine().
429 * When used with the lines returned from findNextLine(), the returned
430 * line conforms to the wrapper settings.
431 * Trailing whitespace is always stripped (including any newlines,
432 * i.e., the return value does not contain a newline).
434 std::string
formatLine(const std::string
&input
,
435 size_t lineStart
, size_t lineEnd
) const;
438 * Formats a string, producing a single string with all the lines.
440 * \param[in] input String to wrap.
441 * \returns \p input with added newlines such that maximum line
442 * length is not exceeded.
443 * \throws std::bad_alloc if out of memory.
445 * Newlines in the input are preserved, including terminal newlines.
446 * Note that if the input does not contain a terminal newline, the
447 * output does not either.
449 std::string
wrapToString(const std::string
&input
) const;
451 * Formats a string, producing a vector with all the lines.
453 * \param[in] input String to wrap.
454 * \returns \p input split into lines such that maximum line length
456 * \throws std::bad_alloc if out of memory.
458 * The strings in the returned vector do not contain newlines at the
460 * Note that a single terminal newline does not affect the output:
461 * "line\\n" and "line" both produce the same output (but "line\\n\\n"
462 * produces two lines, the second of which is empty).
464 std::vector
<std::string
> wrapToVector(const std::string
&input
) const;
467 TextLineWrapperSettings settings_
;