Build system improvements
[ustl.git] / ustring.cc
bloba0087d69e9e642f4252ded208a6e4f5f27c05d81
1 // This file is part of the ustl library, an STL implementation.
2 //
3 // Copyright (C) 2005 by Mike Sharov <msharov@users.sourceforge.net>
4 // This file is free software, distributed under the MIT License.
5 //
6 // ustring.cpp
7 //
8 // STL basic_string equivalent functionality.
9 //
11 #include "ustring.h"
12 #include "mistream.h"
13 #include "mostream.h"
14 #include "ualgo.h"
15 #include <stdio.h> // for vsnprintf (in string::format)
17 namespace ustl {
19 //----------------------------------------------------------------------
21 const uoff_t string::npos;
22 const string::size_type string::size_Terminator;
23 const string::value_type string::c_Terminator;
24 const char string::empty_string[string::size_Terminator] = "";
26 //----------------------------------------------------------------------
28 /// Creates an empty string.
29 string::string (void)
30 : memblock ()
32 relink (VectorBlock(empty_string)-1);
35 /// Assigns itself the value of string \p s
36 string::string (const string& s)
37 : memblock ((s.size() + size_Terminator) & (s.is_linked()-1)) // Allocate with terminator if not linked (can't call virtuals from base ctor)
39 if (s.is_linked())
40 relink (s.c_str(), s.size());
41 else {
42 copy_n (s.begin(), size(), begin());
43 relink (begin(), size() - size_Terminator); // --m_Size
47 /// Links to \p s
48 string::string (const_pointer s)
49 : memblock ()
51 if (!s)
52 s = empty_string;
53 relink (s, strlen(s));
56 /// Creates a string of length \p n filled with character \p c.
57 string::string (size_type n, value_type c)
58 : memblock (n + size_Terminator) // because base ctor can't call virtuals of this class
60 relink (begin(), size() - size_Terminator); // --m_Size
61 fill_n (begin(), n, c);
62 at(n) = c_Terminator;
65 /// Resize the string to \p n characters. New space contents is undefined.
66 void string::resize (size_type n)
68 if (!(n | memblock::capacity()))
69 return (relink (VectorBlock(empty_string)-1));
70 memblock::resize (n);
71 at(n) = c_Terminator;
74 /// Assigns itself the value of string \p s
75 void string::assign (const_pointer s)
77 if (!s)
78 s = empty_string;
79 assign (s, strlen (s));
82 /// Assigns itself the value of string \p s of length \p len.
83 void string::assign (const_pointer s, size_type len)
85 while (len && s[len - 1] == c_Terminator)
86 -- len;
87 resize (len);
88 copy (s, len);
91 /// Appends to itself the value of string \p s of length \p len.
92 void string::append (const_pointer s)
94 if (!s)
95 s = empty_string;
96 append (s, strlen (s));
99 /// Appends to itself the value of string \p s of length \p len.
100 void string::append (const_pointer s, size_type len)
102 while (len && s[len - 1] == c_Terminator)
103 -- len;
104 resize (size() + len);
105 copy_n (s, len, end() - len);
108 /// Appends to itself \p n characters of value \p c.
109 void string::append (size_type n, value_type c)
111 resize (size() + n);
112 fill_n (end() - n, n, c);
115 /// Copies into itself at offset \p start, the value of string \p p of length \p n.
116 string::size_type string::copyto (pointer p, size_type n, const_iterator start) const
118 assert (p && n);
119 if (!start)
120 start = begin();
121 const size_type btc = min(n - size_Terminator, size());
122 copy_n (start, btc, p);
123 p[btc] = c_Terminator;
124 return (btc + size_Terminator);
127 /// Returns comparison value regarding string \p s.
128 /// The return value is:
129 /// \li 1 if this string is greater (by value, not length) than string \p s
130 /// \li 0 if this string is equal to string \p s
131 /// \li -1 if this string is less than string \p s
133 /*static*/ int string::compare (const_iterator first1, const_iterator last1, const_iterator first2, const_iterator last2)
135 assert (first1 <= last1 && (first2 <= last2 || !last2) && "Negative ranges result in memory allocation errors.");
136 const size_type len1 = distance (first1, last1), len2 = distance (first2, last2);
137 const int rvbylen = sign (int(len1 - len2));
138 int rv = memcmp (first1, first2, min (len1, len2));
139 return (rv ? rv : rvbylen);
142 /// Returns true if this string is equal to string \p s.
143 bool string::operator== (const_pointer s) const
145 if (!s)
146 s = empty_string;
147 return (size() == strlen(s) && 0 == memcmp (c_str(), s, size()));
150 /// Returns the beginning of character \p i.
151 string::const_iterator string::wiat (uoff_t i) const
153 utf8in_iterator<string::const_iterator> cfinder (begin());
154 cfinder += i;
155 return (cfinder.base());
158 /// Inserts wide character \p c at \p ipo \p n times as a UTF-8 string.
160 /// \p ipo is a byte position, not a character position, and is intended
161 /// to be obtained from one of the find functions. Generally you are not
162 /// able to know the character position in a localized string; different
163 /// languages will have different character counts, so use find instead.
165 void string::insert (const uoff_t ipo, wchar_t c, size_type n)
167 iterator ip (iat(ipo));
168 ip = iterator (memblock::insert (memblock::iterator(ip), n * Utf8Bytes(c)));
169 fill_n (utf8out (ip), n, c);
170 *end() = c_Terminator;
173 /// Inserts sequence of wide characters at \p ipo (byte position from a find call)
174 void string::insert (const uoff_t ipo, const wchar_t* first, const wchar_t* last, const size_type n)
176 iterator ip (iat(ipo));
177 size_type nti = distance (first, last), bti = 0;
178 for (uoff_t i = 0; i < nti; ++ i)
179 bti += Utf8Bytes(first[i]);
180 ip = iterator (memblock::insert (memblock::iterator(ip), n * bti));
181 utf8out_iterator<string::iterator> uout (utf8out (ip));
182 for (uoff_t j = 0; j < n; ++ j)
183 for (uoff_t k = 0; k < nti; ++ k, ++ uout)
184 *uout = first[k];
185 *end() = c_Terminator;
188 /// Inserts character \p c into this string at \p start.
189 string::iterator string::insert (iterator start, const_reference c, size_type n)
191 start = iterator (memblock::insert (memblock::iterator(start), n));
192 fill_n (start, n, c);
193 *end() = c_Terminator;
194 return (start);
197 /// Inserts \p count instances of string \p s at offset \p start.
198 string::iterator string::insert (iterator start, const_pointer s, size_type n)
200 if (!s)
201 s = empty_string;
202 return (insert (start, s, s + strlen(s), n));
205 /// Inserts [first,last] \p n times.
206 string::iterator string::insert (iterator start, const_pointer first, const_pointer last, size_type n)
208 assert (first <= last);
209 assert (begin() <= start && end() >= start);
210 assert ((first < begin() || first >= end() || size() + abs_distance(first,last) < capacity()) && "Insertion of self with autoresize is not supported");
211 start = iterator (memblock::insert (memblock::iterator(start), distance(first, last) * n));
212 fill (memblock::iterator(start), first, distance(first, last), n);
213 *end() = c_Terminator;
214 return (start);
217 /// Erases \p size bytes at \p ep.
218 string::iterator string::erase (iterator ep, size_type n)
220 string::iterator rv = memblock::erase (memblock::iterator(ep), n);
221 *end() = c_Terminator;
222 return (rv);
225 /// Erases \p n bytes at byte offset \p epo.
226 void string::erase (uoff_t epo, size_type n)
228 erase (iat(epo), n);
231 /// Replaces range [\p start, \p start + \p len] with string \p s.
232 void string::replace (iterator first, iterator last, const_pointer s)
234 if (!s)
235 s = empty_string;
236 replace (first, last, s, s + strlen(s));
239 /// Replaces range [\p start, \p start + \p len] with \p count instances of string \p s.
240 void string::replace (iterator first, iterator last, const_pointer i1, const_pointer i2, size_type n)
242 assert (first <= last);
243 assert (n || distance(first, last));
244 assert (first >= begin() && first <= end() && last >= first && last <= end());
245 assert ((i1 < begin() || i1 >= end() || abs_distance(i1,i2) * n + size() < capacity()) && "Replacement by self can not autoresize");
246 const size_type bte = distance(first, last), bti = distance(i1, i2) * n;
247 if (bti < bte)
248 first = iterator (memblock::erase (memblock::iterator(first), bte - bti));
249 else if (bte < bti)
250 first = iterator (memblock::insert (memblock::iterator(first), bti - bte));
251 fill (memblock::iterator(first), i1, distance(i1, i2), n);
252 *end() = c_Terminator;
255 /// Returns the offset of the first occurence of \p c after \p pos.
256 uoff_t string::find (const_reference c, uoff_t pos) const
258 const_iterator found = ::ustl::find (iat(pos), end(), c);
259 return (found < end() ? distance(begin(),found) : npos);
262 /// Returns the offset of the first occurence of substring \p s of length \p n after \p pos.
263 uoff_t string::find (const string& s, uoff_t pos) const
265 if (s.empty() || s.size() > size() - pos)
266 return (npos);
267 const uoff_t endi = s.size() - 1;
268 const_reference endchar = s[endi];
269 uoff_t lastPos = endi;
270 while (lastPos-- && s[lastPos] != endchar) ;
271 const size_type skip = endi - lastPos;
272 const_iterator i = iat(pos) + endi;
273 for (; i < end() && (i = ::ustl::find (i, end(), endchar)) < end(); i += skip)
274 if (memcmp (i - endi, s.c_str(), s.size()) == 0)
275 return (distance (begin(), i) - endi);
276 return (npos);
279 /// Returns the offset of the last occurence of character \p c before \p pos.
280 uoff_t string::rfind (const_reference c, uoff_t pos) const
282 for (int i = min(pos,size()-1); i >= 0; --i)
283 if (at(i) == c)
284 return (i);
285 return (npos);
288 /// Returns the offset of the last occurence of substring \p s of size \p n before \p pos.
289 uoff_t string::rfind (const string& s, uoff_t pos) const
291 const_iterator d = iat(pos) - 1;
292 const_iterator sp = begin() + s.size() - 1;
293 const_iterator m = s.end() - 1;
294 for (long int i = 0; d > sp && size_type(i) < s.size(); -- d)
295 for (i = 0; size_type(i) < s.size(); ++ i)
296 if (m[-i] != d[-i])
297 break;
298 return (d > sp ? distance (begin(), d + 2 - s.size()) : npos);
301 /// Returns the offset of the first occurence of one of characters in \p s of size \p n after \p pos.
302 uoff_t string::find_first_of (const string& s, uoff_t pos) const
304 for (uoff_t i = min(pos,size()); i < size(); ++ i)
305 if (s.find (at(i)) != npos)
306 return (i);
307 return (npos);
310 /// Returns the offset of the first occurence of one of characters not in \p s of size \p n after \p pos.
311 uoff_t string::find_first_not_of (const string& s, uoff_t pos) const
313 for (uoff_t i = min(pos,size()); i < size(); ++ i)
314 if (s.find (at(i)) == npos)
315 return (i);
316 return (npos);
319 /// Returns the offset of the last occurence of one of characters in \p s of size \p n before \p pos.
320 uoff_t string::find_last_of (const string& s, uoff_t pos) const
322 for (int i = min(pos,size()-1); i >= 0; -- i)
323 if (s.find (at(i)) != npos)
324 return (i);
325 return (npos);
328 /// Returns the offset of the last occurence of one of characters not in \p s of size \p n before \p pos.
329 uoff_t string::find_last_not_of (const string& s, uoff_t pos) const
331 for (int i = min(pos,size()-1); i >= 0; -- i)
332 if (s.find (at(i)) == npos)
333 return (i);
334 return (npos);
337 /// Equivalent to a vsprintf on the string.
338 int string::vformat (const char* fmt, va_list args)
340 #if HAVE_VA_COPY
341 va_list args2;
342 #else
343 #define args2 args
344 #undef __va_copy
345 #define __va_copy(x,y)
346 #endif
347 size_t rv = size();
348 do {
349 reserve (rv);
350 __va_copy (args2, args);
351 rv = vsnprintf (data(), memblock::capacity(), fmt, args2);
352 rv = min (rv, memblock::capacity());
353 } while (rv > capacity());
354 resize (min (rv, capacity()));
355 return (rv);
358 /// Equivalent to a sprintf on the string.
359 int string::format (const char* fmt, ...)
361 va_list args;
362 va_start (args, fmt);
363 const int rv = vformat (fmt, args);
364 va_end (args);
365 return (rv);
368 /// Returns the number of bytes required to write this object to a stream.
369 size_t string::stream_size (void) const
371 return (Utf8Bytes(size()) + size());
374 /// Reads the object from stream \p os
375 void string::read (istream& is)
377 char szbuf [8];
378 is >> szbuf[0];
379 size_t szsz (Utf8SequenceBytes (szbuf[0]) - 1), n = 0;
380 is.verify_remaining ("read", "ustl::string", szsz);
381 is.read (szbuf + 1, szsz);
382 n = *utf8in(szbuf);
383 is.verify_remaining ("read", "ustl::string", n);
384 resize (n);
385 is.read (data(), size());
388 /// Writes the object to stream \p os
389 void string::write (ostream& os) const
391 const written_size_type sz (size());
392 assert (sz == size() && "No support for writing strings larger than 4G");
394 char szbuf [8];
395 utf8out_iterator<char*> szout (szbuf);
396 *szout = sz;
397 size_t szsz = distance (szbuf, szout.base());
399 os.verify_remaining ("write", "ustl::string", szsz + sz);
400 os.write (szbuf, szsz);
401 os.write (cdata(), sz);
404 /// Returns a hash value for [first, last)
405 /*static*/ hashvalue_t string::hash (const char* first, const char* last)
407 hashvalue_t h = 0;
408 // This has the bits flowing into each other from both sides of the number
409 for (; first < last; ++ first)
410 h = *first + ((h << 7) | (h >> (BitsInType(hashvalue_t) - 7)));
411 return (h);
414 } // namespace ustl