Build system improvements
[ustl.git] / ustring.h
blob9d4f9a9921f10447f467c4c3dfeac37d326dd8ed
1 // This file is part of the ustl library, an STL implementation.
2 //
3 // Copyright (C) 2005 by Mike Sharov <msharov@users.sourceforge.net>
4 // This file is free software, distributed under the MIT License.
5 //
6 // ustring.h
7 //
9 #ifndef USTRING_H_1249CB7A098A9010763AAC6D37B133CF
10 #define USTRING_H_1249CB7A098A9010763AAC6D37B133CF
12 #include "memblock.h"
13 #include "utf8.h"
14 #include <stdarg.h> // for va_list, va_start, and va_end (in string::format)
16 namespace ustl {
18 /// \class string ustring.h ustl.h
19 /// \ingroup Sequences
20 ///
21 /// \brief STL basic_string&lt;char&gt; equivalent.
22 ///
23 /// An STL container for text string manipulation.
24 /// Differences from C++ standard:
25 /// - string is a class, not a template. Wide characters are assumed to be
26 /// encoded with utf8 at all times except when rendering or editing,
27 /// where you would use a utf8 iterator.
28 /// - format member function - you can, of course use an \ref ostringstream,
29 /// which also have format functions, but most of the time this way
30 /// is more convenient. Because uSTL does not implement locales,
31 /// format is the only way to create localized strings.
32 /// - const char* cast operator. It is much clearer to use this than having
33 /// to type .c_str() every time.
34 /// - length returns the number of _characters_, not bytes.
35 /// This function is O(N), so use wisely.
36 ///
37 /// An additional note is in order regarding the use of indexes. All indexes
38 /// passed in as arguments or returned by find are byte offsets, not character
39 /// offsets. Likewise, sizes are specified in bytes, not characters. The
40 /// rationale is that there is no way for you to know what is in the string.
41 /// There is no way for you to know how many characters are needed to express
42 /// one thing or another. The only thing you can do to a localized string is
43 /// search for delimiters and modify text between them as opaque blocks. If you
44 /// do anything else, you are hardcoding yourself into a locale! So stop it!
45 ///
46 class string : public memblock {
47 public:
48 typedef char value_type;
49 typedef value_type* pointer;
50 typedef const value_type* const_pointer;
51 typedef wchar_t wvalue_type;
52 typedef wvalue_type* wpointer;
53 typedef const wvalue_type* const_wpointer;
54 typedef pointer iterator;
55 typedef const_pointer const_iterator;
56 typedef value_type& reference;
57 typedef value_type const_reference;
58 typedef ::ustl::reverse_iterator<iterator> reverse_iterator;
59 typedef ::ustl::reverse_iterator<const_iterator> const_reverse_iterator;
60 typedef utf8in_iterator<const_iterator> utf8_iterator;
61 public:
62 static const uoff_t npos = static_cast<uoff_t>(-1); ///< Value that means the end of string.
63 static const value_type c_Terminator = 0; ///< String terminator
64 static const size_type size_Terminator = sizeof(c_Terminator); ///< Most systems terminate strings with '\\0'
65 static const char empty_string [size_Terminator]; ///< An empty string.
66 public:
67 string (void);
68 string (const string& s);
69 inline string (const string& s, uoff_t o, size_type n);
70 inline explicit string (const cmemlink& l);
71 string (const_pointer s);
72 inline string (const_pointer s, size_type len);
73 inline string (const_pointer s1, const_pointer s2);
74 explicit string (size_type n, value_type c = c_Terminator);
75 inline pointer data (void) { return (string::pointer (memblock::data())); }
76 inline const_pointer c_str (void) const { return (string::const_pointer (memblock::cdata())); }
77 inline size_type max_size (void) const { size_type s (memblock::max_size()); return (s - !!s); }
78 inline size_type capacity (void) const { size_type c (memblock::capacity()); return (c - !!c); }
79 void resize (size_type n);
80 inline void clear (void) { resize (0); }
81 inline const_iterator begin (void) const { return (const_iterator (memblock::begin())); }
82 inline iterator begin (void) { return (iterator (memblock::begin())); }
83 inline const_iterator end (void) const { return (const_iterator (memblock::end())); }
84 inline iterator end (void) { return (iterator (memblock::end())); }
85 inline const_reverse_iterator rbegin (void) const { return (const_reverse_iterator (end())); }
86 inline reverse_iterator rbegin (void) { return (reverse_iterator (end())); }
87 inline const_reverse_iterator rend (void) const { return (const_reverse_iterator (begin())); }
88 inline reverse_iterator rend (void) { return (reverse_iterator (begin())); }
89 inline utf8_iterator utf8_begin (void) const { return (utf8_iterator (begin())); }
90 inline utf8_iterator utf8_end (void) const { return (utf8_iterator (end())); }
91 inline const_reference at (uoff_t pos) const { assert (pos <= size() && begin()); return (begin()[pos]); }
92 inline reference at (uoff_t pos) { assert (pos <= size() && begin()); return (begin()[pos]); }
93 inline const_iterator iat (uoff_t pos) const { return (begin() + min (pos, size())); }
94 inline iterator iat (uoff_t pos) { return (begin() + min (pos, size())); }
95 const_iterator wiat (uoff_t i) const;
96 inline iterator wiat (uoff_t i) { return (const_cast<iterator>(const_cast<const string*>(this)->wiat(i))); }
97 inline const_reference back (void) const { return (at(size()-1)); }
98 inline reference back (void) { return (at(size()-1)); }
99 inline size_type length (void) const { return (distance (utf8_begin(), utf8_end())); }
100 inline void append (const_iterator i1, const_iterator i2) { append (i1, distance (i1, i2)); }
101 void append (const_pointer s, size_type len);
102 void append (const_pointer s);
103 void append (size_type n, const_reference c);
104 inline void append (size_type n, wvalue_type c) { insert (size(), c, n); }
105 inline void append (const_wpointer s1, const_wpointer s2) { insert (size(), s1, s2); }
106 inline void append (const_wpointer s) { const_wpointer se (s); for (;se&&*se;++se) ; append (s, se); }
107 inline void append (const string& s) { append (s.begin(), s.end()); }
108 inline void append (const string& s, uoff_t o, size_type n) { append (s.iat(o), s.iat(o+n)); }
109 inline void assign (const_iterator i1, const_iterator i2) { assign (i1, distance (i1, i2)); }
110 void assign (const_pointer s, size_type len);
111 void assign (const_pointer s);
112 inline void assign (const_wpointer s1, const_wpointer s2) { clear(); append (s1, s2); }
113 inline void assign (const_wpointer s1) { clear(); append (s1); }
114 inline void assign (const string& s) { assign (s.begin(), s.end()); }
115 inline void assign (const string& s, uoff_t o, size_type n) { assign (s.iat(o), s.iat(o+n)); }
116 size_type copyto (pointer p, size_type n, const_iterator start = NULL) const;
117 inline int compare (const string& s) const { return (compare (begin(), end(), s.begin(), s.end())); }
118 inline int compare (const_pointer s) const { return (compare (begin(), end(), s, s + strlen(s))); }
119 static int compare (const_iterator first1, const_iterator last1, const_iterator first2, const_iterator last2);
120 inline operator const value_type* (void) const;
121 inline operator value_type* (void);
122 inline const string& operator= (const string& s) { assign (s.begin(), s.end()); return (*this); }
123 inline const string& operator= (const_reference c) { assign (&c, 1); return (*this); }
124 inline const string& operator= (const_pointer s) { assign (s); return (*this); }
125 inline const string& operator= (const_wpointer s) { assign (s); return (*this); }
126 inline const string& operator+= (const string& s) { append (s.begin(), s.size()); return (*this); }
127 inline const string& operator+= (const_reference c) { append (1, c); return (*this); }
128 inline const string& operator+= (const_pointer s) { append (s); return (*this); }
129 inline const string& operator+= (wvalue_type c) { append (1, c); return (*this); }
130 inline const string& operator+= (const_wpointer s) { append (s); return (*this); }
131 inline string operator+ (const string& s) const;
132 inline bool operator== (const string& s) const { return (memblock::operator== (s)); }
133 bool operator== (const_pointer s) const;
134 inline bool operator== (const_reference c) const { return (size() == 1 && c == at(0)); }
135 inline bool operator!= (const string& s) const { return (!operator== (s)); }
136 inline bool operator!= (const_pointer s) const { return (!operator== (s)); }
137 inline bool operator!= (const_reference c) const { return (!operator== (c)); }
138 inline bool operator< (const string& s) const { return (0 > compare (s)); }
139 inline bool operator< (const_pointer s) const { return (0 > compare (s)); }
140 inline bool operator< (const_reference c) const { return (0 > compare (begin(), end(), &c, &c + 1)); }
141 inline bool operator> (const_pointer s) const { return (0 < compare (s)); }
142 void insert (const uoff_t ip, wvalue_type c, size_type n = 1);
143 void insert (const uoff_t ip, const_wpointer first, const_wpointer last, const size_type n = 1);
144 iterator insert (iterator start, const_reference c, size_type n = 1);
145 iterator insert (iterator start, const_pointer s, size_type n = 1);
146 iterator insert (iterator start, const_pointer first, const_iterator last, size_type n = 1);
147 inline void insert (uoff_t ip, const_pointer s, size_type nlen) { insert (iat(ip), s, s + nlen); }
148 inline void insert (uoff_t ip, size_type n, value_type c) { insert (iat(ip), c, n); }
149 inline void insert (uoff_t ip, const string& s, uoff_t sp, size_type slen) { insert (iat(ip), s.iat(sp), s.iat(sp + slen)); }
150 iterator erase (iterator epo, size_type n = 1);
151 void erase (uoff_t epo, size_type n = 1);
152 inline iterator erase (iterator first, const_iterator last) { return (erase (first, size_type(distance(first,last)))); }
153 inline void eraser (uoff_t first, uoff_t last) { erase (iat(first), iat(last)); }
154 inline void push_back (const_reference c) { append (1, c); }
155 inline void push_back (wvalue_type c) { append (1, c); }
156 inline void pop_back (void) { resize (size() - 1); }
157 void replace (iterator first, iterator last, const_pointer s);
158 void replace (iterator first, iterator last, const_pointer i1, const_pointer i2, size_type n = 1);
159 inline void replace (iterator first, iterator last, const string& s) { replace (first, last, s.begin(), s.end()); }
160 inline void replace (iterator first, iterator last, const_pointer s, size_type slen) { replace (first, last, s, s + slen); }
161 inline void replace (iterator first, iterator last, size_type n, value_type c) { replace (first, last, &c, &c + 1, n); }
162 inline void replace (uoff_t rp, size_type n, const string& s) { replace (iat(rp), iat(rp + n), s); }
163 inline void replace (uoff_t rp, size_type n, const string& s, uoff_t sp, size_type slen) { replace (iat(rp), iat(rp + n), s.iat(sp), s.iat(sp + slen)); }
164 inline void replace (uoff_t rp, size_type n, const_pointer s, size_type slen) { replace (iat(rp), iat(rp + n), s, s + slen); }
165 inline void replace (uoff_t rp, size_type n, const_pointer s) { replace (iat(rp), iat(rp + n), string(s)); }
166 inline void replace (uoff_t rp, size_type n, size_type count, value_type c) { replace (iat(rp), iat(rp + n), count, c); }
167 inline string substr (uoff_t o, size_type n) const { return (string (*this, o, n)); }
168 uoff_t find (const_reference c, uoff_t pos = 0) const;
169 uoff_t find (const string& s, uoff_t pos = 0) const;
170 uoff_t rfind (const_reference c, uoff_t pos = npos) const;
171 uoff_t rfind (const string& s, uoff_t pos = npos) const;
172 uoff_t find_first_of (const string& s, uoff_t pos = 0) const;
173 uoff_t find_first_not_of (const string& s, uoff_t pos = 0) const;
174 uoff_t find_last_of (const string& s, uoff_t pos = npos) const;
175 uoff_t find_last_not_of (const string& s, uoff_t pos = npos) const;
176 int vformat (const char* fmt, va_list args);
177 int format (const char* fmt, ...) __attribute__((__format__(__printf__, 2, 3)));
178 void read (istream&);
179 void write (ostream& os) const;
180 size_t stream_size (void) const;
181 static hashvalue_t hash (const char* f1, const char* l1);
182 protected:
183 inline virtual size_type minimumFreeCapacity (void) const { return (size_Terminator); }
186 //----------------------------------------------------------------------
188 /// Assigns itself the value of string \p s
189 inline string::string (const cmemlink& s)
190 : memblock ()
192 assign (const_iterator (s.begin()), s.size());
195 /// Assigns itself a [o,o+n) substring of \p s.
196 inline string::string (const string& s, uoff_t o, size_type n)
197 : memblock()
199 assign (s, o, n);
202 /// Copies the value of \p s of length \p len into itself.
203 inline string::string (const_pointer s, size_type len)
204 : memblock ()
206 assign (s, len);
209 /// Copies into itself the string data between \p s1 and \p s2
210 inline string::string (const_pointer s1, const_pointer s2)
211 : memblock ()
213 assert (s1 <= s2 && "Negative ranges result in memory allocation errors.");
214 assign (s1, s2);
217 /// Returns the pointer to the first character.
218 inline string::operator const string::value_type* (void) const
220 assert ((!end() || *end() == c_Terminator) && "This string is linked to data that is not 0-terminated. This may cause serious security problems. Please assign the data instead of linking.");
221 return (begin());
224 /// Returns the pointer to the first character.
225 inline string::operator string::value_type* (void)
227 assert ((end() && *end() == c_Terminator) && "This string is linked to data that is not 0-terminated. This may cause serious security problems. Please assign the data instead of linking.");
228 return (begin());
231 /// Concatenates itself with \p s
232 inline string string::operator+ (const string& s) const
234 string result (*this);
235 result += s;
236 return (result);
239 //----------------------------------------------------------------------
240 // Operators needed to avoid comparing pointer to pointer
242 #define PTR_STRING_CMP(op, impl) \
243 inline bool op (const char* s1, const string& s2) { return impl; }
244 PTR_STRING_CMP (operator==, (s2 == s1))
245 PTR_STRING_CMP (operator!=, (s2 != s1))
246 PTR_STRING_CMP (operator<, (s2 > s1))
247 PTR_STRING_CMP (operator<=, (s2 >= s1))
248 PTR_STRING_CMP (operator>, (s2 < s1))
249 PTR_STRING_CMP (operator>=, (s2 <= s1))
250 #undef PTR_STRING_CMP
252 //----------------------------------------------------------------------
254 template <typename T>
255 inline hashvalue_t hash_value (const T& v)
256 { return (string::hash (v.begin(), v.end())); }
258 template <>
259 inline hashvalue_t hash_value (const string::const_pointer& v)
260 { return (string::hash (v, v + strlen(v))); }
262 template <>
263 inline hashvalue_t hash_value (const string::pointer& v)
264 { return (string::hash (v, v + strlen(v))); }
266 //----------------------------------------------------------------------
268 } // namespace ustl
270 // Specialization for stream alignment
271 ALIGNOF (ustl::string, alignof (string::value_type()))
273 #endif