2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 | Copyright (c) 1998-2010 Zend Technologies Ltd. (http://www.zend.com) |
7 +----------------------------------------------------------------------+
8 | This source file is subject to version 2.00 of the Zend license, |
9 | that is bundled with this package in the file LICENSE, and is |
10 | available through the world-wide-web at the following url: |
11 | http://www.zend.com/license/2_00.txt. |
12 | If you did not receive a copy of the Zend license and are unable to |
13 | obtain it through the world-wide-web, please send a note to |
14 | license@zend.com so we can mail you a copy immediately. |
15 +----------------------------------------------------------------------+
18 #ifndef incl_HPHP_ZEND_STRING_H_
19 #define incl_HPHP_ZEND_STRING_H_
21 #include "hphp/zend/zend-string.h"
22 #include "hphp/runtime/base/type-string.h"
25 ///////////////////////////////////////////////////////////////////////////////
27 * Low-level string functions PHP uses.
29 * 1. If a function returns a char *, it has malloc-ed a new string and it's
30 * caller's responsibility to free it.
32 * 2. If a function takes "int &len" right after the 1st string parameter, it
33 * is input string's length, and in return, it's return string's length.
35 * 3. All functions work with binary strings and all returned strings are
36 * NULL terminated, regardless of whether it's a binary string.
40 * Copy src to string dst of size siz. At most siz-1 characters
41 * will be copied. Always NUL terminates (unless siz == 0).
42 * Returns strlen(src); if retval >= siz, truncation occurred.
44 int string_copy(char *dst
, const char *src
, int siz
);
47 * Compare two binary strings.
49 inline int string_strcmp(const char *s1
, int len1
, const char *s2
, int len2
) {
50 int minlen
= len1
< len2
? len1
: len2
;
53 retval
= memcmp(s1
, s2
, minlen
);
58 return (retval
> 0) - (retval
< 0);
61 * Compare two binary strings of the first n bytes.
63 inline int string_strncmp(const char *s1
, int len1
, const char *s2
, int len2
,
65 int minlen
= len1
< len2
? len1
: len2
;
69 if (UNLIKELY(len
< 0)) len
= 0;
72 retval
= memcmp(s1
, s2
, minlen
);
74 return (len
< len1
? len
: len1
) - (len
< len2
? len
: len2
);
80 * Compare two binary strings of the first n bytes, ignore case.
82 inline int string_strncasecmp(const char *s1
, int len1
,
83 const char *s2
, int len2
, int len
) {
84 int minlen
= len1
< len2
? len1
: len2
;
88 if (UNLIKELY(len
< 0)) len
= 0;
92 c1
= tolower((int)*(unsigned char *)s1
++);
93 c2
= tolower((int)*(unsigned char *)s2
++);
98 return (len
< len1
? len
: len1
) - (len
< len2
? len
: len2
);
104 int string_ncmp(const char *s1
, const char *s2
, int len
);
105 int string_natural_cmp(char const *a
, size_t a_len
,
106 char const *b
, size_t b_len
, int fold_case
);
109 * Changing string's cases in place. Return's length is always the same
112 void string_to_case(String
& s
, int (*tocase
)(int));
114 // Use lambdas wrapping the ctype.h functions because of linker weirdness on
117 #define string_to_upper(s) \
118 string_to_case((s), [] (int i) -> int { return toupper(i); })
121 * Pad a string with pad_string to pad_length. "len" is
122 * input string's length, and in return, it's trimmed string's length. pad_type
123 * can be k_STR_PAD_RIGHT, k_STR_PAD_LEFT or k_STR_PAD_BOTH.
125 String
string_pad(const char *input
, int len
, int pad_length
,
126 const char *pad_string
, int pad_str_len
, int pad_type
);
129 * Find a character or substring and return it's position (or -1 if not found).
131 int string_find(const char *input
, int len
, char ch
, int pos
,
132 bool case_sensitive
);
133 int string_rfind(const char *input
, int len
, char ch
, int pos
,
134 bool case_sensitive
);
135 int string_find(const char *input
, int len
, const char *s
, int s_len
,
136 int pos
, bool case_sensitive
);
137 int string_rfind(const char *input
, int len
, const char *s
, int s_len
,
138 int pos
, bool case_sensitive
);
140 const char *string_memnstr(const char *haystack
, const char *needle
,
141 int needle_len
, const char *end
);
144 * Replace specified substring or search string with specified replacement.
146 String
string_replace(const char *s
, int len
, int start
, int length
,
147 const char *replacement
, int len_repl
);
148 String
string_replace(const char *input
, int len
,
149 const char *search
, int len_search
,
150 const char *replacement
, int len_replace
,
151 int &count
, bool case_sensitive
);
154 * Replace a substr with another and return replaced one. Note, read
155 * http://www.php.net/substr about meanings of negative start or length.
157 * The form that takes a "count" reference will still replace all occurrences
158 * and return total replaced count in the out parameter. It does NOT mean
159 * it will replace at most that many occurrences, so count's input value
162 inline String
string_replace(const String
& str
, int start
, int length
,
163 const String
& repl
) {
164 return string_replace(str
.data(), str
.size(), start
, length
,
165 repl
.data(), repl
.size());
168 inline String
string_replace(const String
& str
, const String
& search
,
169 const String
& replacement
,
170 int &count
, bool caseSensitive
) {
172 if (!search
.empty() && !str
.empty()) {
173 auto ret
= string_replace(str
.data(), str
.size(),
174 search
.data(), search
.size(),
175 replacement
.data(), replacement
.size(),
176 count
, caseSensitive
);
184 inline String
string_replace(const String
& str
, const String
& search
,
185 const String
& replacement
) {
187 return string_replace(str
, search
, replacement
, count
, true);
191 * Reverse, repeat or shuffle a string.
193 String
string_chunk_split(const char *src
, int srclen
, const char *end
,
194 int endlen
, int chunklen
);
197 * Strip HTML and PHP tags.
199 String
string_strip_tags(const char *s
, int len
, const char *allow
,
200 int allow_len
, bool allow_tag_spaces
);
203 * Encoding/decoding strings according to certain formats.
205 String
string_quoted_printable_encode(const char *input
, int len
);
206 String
string_quoted_printable_decode(const char *input
, int len
, bool is_q
);
207 String
string_uuencode(const char *src
, int src_len
);
208 String
string_uudecode(const char *src
, int src_len
);
209 String
string_base64_encode(const char *input
, int len
);
210 String
string_base64_decode(const char *input
, int len
, bool strict
);
211 String
string_escape_shell_arg(const char *str
);
212 String
string_escape_shell_cmd(const char *str
);
214 std::string
base64_encode(const char *input
, int len
);
215 std::string
base64_decode(const char *input
, int len
, bool strict
);
218 * Convert between strings and numbers.
220 inline bool string_validate_base(int base
) {
221 return (2 <= base
&& base
<= 36);
223 Variant
string_base_to_numeric(const char *s
, int len
, int base
);
224 String
string_long_to_base(unsigned long value
, int base
);
225 String
string_numeric_to_base(const Variant
& value
, int base
);
228 * Translates characters in str_from into characters in str_to one by one,
229 * assuming str_from and str_to have the same length of "trlen".
231 void string_translate(char *str
, int len
, const char *str_from
,
232 const char *str_to
, int trlen
);
237 String
string_money_format(const char *format
, double value
);
239 String
string_number_format(double d
, int dec
,
240 const String
& dec_point
,
241 const String
& thousand_sep
);
244 * Similarity and other properties of strings.
246 int string_levenshtein(const char *s1
, int l1
, const char *s2
, int l2
,
247 int cost_ins
, int cost_rep
, int cost_del
);
248 int string_similar_text(const char *t1
, int len1
,
249 const char *t2
, int len2
, float *percent
);
250 String
string_soundex(const String
& str
);
252 String
string_metaphone(const char *input
, int word_len
, long max_phonemes
,
258 String
string_convert_cyrillic_string(const String
& input
, char from
, char to
);
259 String
string_convert_hebrew_string(const String
& str
, int max_chars_per_line
,
260 int convert_newlines
);
262 ///////////////////////////////////////////////////////////////////////////////
266 * Fills a 256-byte bytemask with input. You can specify a range like 'a..z',
267 * it needs to be incrementing. This function determines how "charlist"
268 * parameters are interpreted in varies functions that take a list of
271 void string_charmask(const char *input
, int len
, char *mask
);
273 ///////////////////////////////////////////////////////////////////////////////
276 #endif // incl_HPHP_ZEND_STRING_H_