1 // Copyright (c) 2009-2010 Satoshi Nakamoto
2 // Copyright (c) 2009-2016 The Bitcoin Core developers
3 // Distributed under the MIT software license, see the accompanying
4 // file COPYING or http://www.opensource.org/licenses/mit-license.php.
6 #include "utilstrencodings.h"
8 #include "tinyformat.h"
15 static const std::string CHARS_ALPHA_NUM
= "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
17 static const std::string SAFE_CHARS
[] =
19 CHARS_ALPHA_NUM
+ " .,;-_/:?@()", // SAFE_CHARS_DEFAULT
20 CHARS_ALPHA_NUM
+ " .,;-_?@", // SAFE_CHARS_UA_COMMENT
21 CHARS_ALPHA_NUM
+ ".-_", // SAFE_CHARS_FILENAME
24 std::string
SanitizeString(const std::string
& str
, int rule
)
26 std::string strResult
;
27 for (std::string::size_type i
= 0; i
< str
.size(); i
++)
29 if (SAFE_CHARS
[rule
].find(str
[i
]) != std::string::npos
)
30 strResult
.push_back(str
[i
]);
35 const signed char p_util_hexdigit
[256] =
36 { -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
37 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
38 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
39 0,1,2,3,4,5,6,7,8,9,-1,-1,-1,-1,-1,-1,
40 -1,0xa,0xb,0xc,0xd,0xe,0xf,-1,-1,-1,-1,-1,-1,-1,-1,-1,
41 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
42 -1,0xa,0xb,0xc,0xd,0xe,0xf,-1,-1,-1,-1,-1,-1,-1,-1,-1,
43 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
44 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
45 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
49 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
50 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
51 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, };
53 signed char HexDigit(char c
)
55 return p_util_hexdigit
[(unsigned char)c
];
58 bool IsHex(const std::string
& str
)
60 for(std::string::const_iterator
it(str
.begin()); it
!= str
.end(); ++it
)
62 if (HexDigit(*it
) < 0)
65 return (str
.size() > 0) && (str
.size()%2 == 0);
68 bool IsHexNumber(const std::string
& str
)
70 size_t starting_location
= 0;
71 if (str
.size() > 2 && *str
.begin() == '0' && *(str
.begin()+1) == 'x') {
72 starting_location
= 2;
74 for (auto c
: str
.substr(starting_location
)) {
75 if (HexDigit(c
) < 0) return false;
77 // Return false for empty string or "0x".
78 return (str
.size() > starting_location
);
81 std::vector
<unsigned char> ParseHex(const char* psz
)
83 // convert hex dump to vector
84 std::vector
<unsigned char> vch
;
89 signed char c
= HexDigit(*psz
++);
90 if (c
== (signed char)-1)
92 unsigned char n
= (c
<< 4);
94 if (c
== (signed char)-1)
102 std::vector
<unsigned char> ParseHex(const std::string
& str
)
104 return ParseHex(str
.c_str());
107 void SplitHostPort(std::string in
, int &portOut
, std::string
&hostOut
) {
108 size_t colon
= in
.find_last_of(':');
109 // if a : is found, and it either follows a [...], or no other : is in the string, treat it as port separator
110 bool fHaveColon
= colon
!= in
.npos
;
111 bool fBracketed
= fHaveColon
&& (in
[0]=='[' && in
[colon
-1]==']'); // if there is a colon, and in[0]=='[', colon is not 0, so in[colon-1] is safe
112 bool fMultiColon
= fHaveColon
&& (in
.find_last_of(':',colon
-1) != in
.npos
);
113 if (fHaveColon
&& (colon
==0 || fBracketed
|| !fMultiColon
)) {
115 if (ParseInt32(in
.substr(colon
+ 1), &n
) && n
> 0 && n
< 0x10000) {
116 in
= in
.substr(0, colon
);
120 if (in
.size()>0 && in
[0] == '[' && in
[in
.size()-1] == ']')
121 hostOut
= in
.substr(1, in
.size()-2);
126 std::string
EncodeBase64(const unsigned char* pch
, size_t len
)
128 static const char *pbase64
= "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
130 std::string strRet
= "";
131 strRet
.reserve((len
+2)/3*4);
134 const unsigned char *pchEnd
= pch
+len
;
141 case 0: // we have no bits
142 strRet
+= pbase64
[enc
>> 2];
143 left
= (enc
& 3) << 4;
147 case 1: // we have two bits
148 strRet
+= pbase64
[left
| (enc
>> 4)];
149 left
= (enc
& 15) << 2;
153 case 2: // we have four bits
154 strRet
+= pbase64
[left
| (enc
>> 6)];
155 strRet
+= pbase64
[enc
& 63];
163 strRet
+= pbase64
[left
];
172 std::string
EncodeBase64(const std::string
& str
)
174 return EncodeBase64((const unsigned char*)str
.c_str(), str
.size());
177 std::vector
<unsigned char> DecodeBase64(const char* p
, bool* pfInvalid
)
179 static const int decode64_table
[256] =
181 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
182 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
183 -1, -1, -1, 62, -1, -1, -1, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1,
184 -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
185 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, -1, 26, 27, 28,
186 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
187 49, 50, 51, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
188 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
189 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
190 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
191 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
192 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
193 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
199 std::vector
<unsigned char> vchRet
;
200 vchRet
.reserve(strlen(p
)*3/4);
207 int dec
= decode64_table
[(unsigned char)*p
];
208 if (dec
== -1) break;
212 case 0: // we have no bits and get 6
217 case 1: // we have 6 bits and keep 4
218 vchRet
.push_back((left
<<2) | (dec
>>4));
223 case 2: // we have 4 bits and get 6, we keep 2
224 vchRet
.push_back((left
<<4) | (dec
>>2));
229 case 3: // we have 2 bits and get 6
230 vchRet
.push_back((left
<<6) | dec
);
239 case 0: // 4n base64 characters processed: ok
242 case 1: // 4n+1 base64 character processed: impossible
246 case 2: // 4n+2 base64 characters processed: require '=='
247 if (left
|| p
[0] != '=' || p
[1] != '=' || decode64_table
[(unsigned char)p
[2]] != -1)
251 case 3: // 4n+3 base64 characters processed: require '='
252 if (left
|| p
[0] != '=' || decode64_table
[(unsigned char)p
[1]] != -1)
260 std::string
DecodeBase64(const std::string
& str
)
262 std::vector
<unsigned char> vchRet
= DecodeBase64(str
.c_str());
263 return std::string((const char*)vchRet
.data(), vchRet
.size());
266 std::string
EncodeBase32(const unsigned char* pch
, size_t len
)
268 static const char *pbase32
= "abcdefghijklmnopqrstuvwxyz234567";
270 std::string strRet
="";
271 strRet
.reserve((len
+4)/5*8);
274 const unsigned char *pchEnd
= pch
+len
;
281 case 0: // we have no bits
282 strRet
+= pbase32
[enc
>> 3];
283 left
= (enc
& 7) << 2;
287 case 1: // we have three bits
288 strRet
+= pbase32
[left
| (enc
>> 6)];
289 strRet
+= pbase32
[(enc
>> 1) & 31];
290 left
= (enc
& 1) << 4;
294 case 2: // we have one bit
295 strRet
+= pbase32
[left
| (enc
>> 4)];
296 left
= (enc
& 15) << 1;
300 case 3: // we have four bits
301 strRet
+= pbase32
[left
| (enc
>> 7)];
302 strRet
+= pbase32
[(enc
>> 2) & 31];
303 left
= (enc
& 3) << 3;
307 case 4: // we have two bits
308 strRet
+= pbase32
[left
| (enc
>> 5)];
309 strRet
+= pbase32
[enc
& 31];
314 static const int nPadding
[5] = {0, 6, 4, 3, 1};
317 strRet
+= pbase32
[left
];
318 for (int n
=0; n
<nPadding
[mode
]; n
++)
325 std::string
EncodeBase32(const std::string
& str
)
327 return EncodeBase32((const unsigned char*)str
.c_str(), str
.size());
330 std::vector
<unsigned char> DecodeBase32(const char* p
, bool* pfInvalid
)
332 static const int decode32_table
[256] =
334 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
335 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
336 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 26, 27, 28, 29, 30, 31, -1, -1, -1, -1,
337 -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
338 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, -1, 0, 1, 2,
339 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
340 23, 24, 25, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
341 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
342 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
343 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
344 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
345 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
346 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
352 std::vector
<unsigned char> vchRet
;
353 vchRet
.reserve((strlen(p
))*5/8);
360 int dec
= decode32_table
[(unsigned char)*p
];
361 if (dec
== -1) break;
365 case 0: // we have no bits and get 5
370 case 1: // we have 5 bits and keep 2
371 vchRet
.push_back((left
<<3) | (dec
>>2));
376 case 2: // we have 2 bits and keep 7
377 left
= left
<< 5 | dec
;
381 case 3: // we have 7 bits and keep 4
382 vchRet
.push_back((left
<<1) | (dec
>>4));
387 case 4: // we have 4 bits, and keep 1
388 vchRet
.push_back((left
<<4) | (dec
>>1));
393 case 5: // we have 1 bit, and keep 6
394 left
= left
<< 5 | dec
;
398 case 6: // we have 6 bits, and keep 3
399 vchRet
.push_back((left
<<2) | (dec
>>3));
404 case 7: // we have 3 bits, and keep 0
405 vchRet
.push_back((left
<<5) | dec
);
414 case 0: // 8n base32 characters processed: ok
417 case 1: // 8n+1 base32 characters processed: impossible
423 case 2: // 8n+2 base32 characters processed: require '======'
424 if (left
|| p
[0] != '=' || p
[1] != '=' || p
[2] != '=' || p
[3] != '=' || p
[4] != '=' || p
[5] != '=' || decode32_table
[(unsigned char)p
[6]] != -1)
428 case 4: // 8n+4 base32 characters processed: require '===='
429 if (left
|| p
[0] != '=' || p
[1] != '=' || p
[2] != '=' || p
[3] != '=' || decode32_table
[(unsigned char)p
[4]] != -1)
433 case 5: // 8n+5 base32 characters processed: require '==='
434 if (left
|| p
[0] != '=' || p
[1] != '=' || p
[2] != '=' || decode32_table
[(unsigned char)p
[3]] != -1)
438 case 7: // 8n+7 base32 characters processed: require '='
439 if (left
|| p
[0] != '=' || decode32_table
[(unsigned char)p
[1]] != -1)
447 std::string
DecodeBase32(const std::string
& str
)
449 std::vector
<unsigned char> vchRet
= DecodeBase32(str
.c_str());
450 return std::string((const char*)vchRet
.data(), vchRet
.size());
453 static bool ParsePrechecks(const std::string
& str
)
455 if (str
.empty()) // No empty string allowed
457 if (str
.size() >= 1 && (isspace(str
[0]) || isspace(str
[str
.size()-1]))) // No padding allowed
459 if (str
.size() != strlen(str
.c_str())) // No embedded NUL characters allowed
464 bool ParseInt32(const std::string
& str
, int32_t *out
)
466 if (!ParsePrechecks(str
))
468 char *endp
= nullptr;
469 errno
= 0; // strtol will not set errno if valid
470 long int n
= strtol(str
.c_str(), &endp
, 10);
471 if(out
) *out
= (int32_t)n
;
472 // Note that strtol returns a *long int*, so even if strtol doesn't report an over/underflow
473 // we still have to check that the returned value is within the range of an *int32_t*. On 64-bit
474 // platforms the size of these types may be different.
475 return endp
&& *endp
== 0 && !errno
&&
476 n
>= std::numeric_limits
<int32_t>::min() &&
477 n
<= std::numeric_limits
<int32_t>::max();
480 bool ParseInt64(const std::string
& str
, int64_t *out
)
482 if (!ParsePrechecks(str
))
484 char *endp
= nullptr;
485 errno
= 0; // strtoll will not set errno if valid
486 long long int n
= strtoll(str
.c_str(), &endp
, 10);
487 if(out
) *out
= (int64_t)n
;
488 // Note that strtoll returns a *long long int*, so even if strtol doesn't report an over/underflow
489 // we still have to check that the returned value is within the range of an *int64_t*.
490 return endp
&& *endp
== 0 && !errno
&&
491 n
>= std::numeric_limits
<int64_t>::min() &&
492 n
<= std::numeric_limits
<int64_t>::max();
495 bool ParseUInt32(const std::string
& str
, uint32_t *out
)
497 if (!ParsePrechecks(str
))
499 if (str
.size() >= 1 && str
[0] == '-') // Reject negative values, unfortunately strtoul accepts these by default if they fit in the range
501 char *endp
= nullptr;
502 errno
= 0; // strtoul will not set errno if valid
503 unsigned long int n
= strtoul(str
.c_str(), &endp
, 10);
504 if(out
) *out
= (uint32_t)n
;
505 // Note that strtoul returns a *unsigned long int*, so even if it doesn't report an over/underflow
506 // we still have to check that the returned value is within the range of an *uint32_t*. On 64-bit
507 // platforms the size of these types may be different.
508 return endp
&& *endp
== 0 && !errno
&&
509 n
<= std::numeric_limits
<uint32_t>::max();
512 bool ParseUInt64(const std::string
& str
, uint64_t *out
)
514 if (!ParsePrechecks(str
))
516 if (str
.size() >= 1 && str
[0] == '-') // Reject negative values, unfortunately strtoull accepts these by default if they fit in the range
518 char *endp
= nullptr;
519 errno
= 0; // strtoull will not set errno if valid
520 unsigned long long int n
= strtoull(str
.c_str(), &endp
, 10);
521 if(out
) *out
= (uint64_t)n
;
522 // Note that strtoull returns a *unsigned long long int*, so even if it doesn't report an over/underflow
523 // we still have to check that the returned value is within the range of an *uint64_t*.
524 return endp
&& *endp
== 0 && !errno
&&
525 n
<= std::numeric_limits
<uint64_t>::max();
529 bool ParseDouble(const std::string
& str
, double *out
)
531 if (!ParsePrechecks(str
))
533 if (str
.size() >= 2 && str
[0] == '0' && str
[1] == 'x') // No hexadecimal floats allowed
535 std::istringstream
text(str
);
536 text
.imbue(std::locale::classic());
539 if(out
) *out
= result
;
540 return text
.eof() && !text
.fail();
543 std::string
FormatParagraph(const std::string
& in
, size_t width
, size_t indent
)
545 std::stringstream out
;
548 while (ptr
< in
.size())
550 size_t lineend
= in
.find_first_of('\n', ptr
);
551 if (lineend
== std::string::npos
) {
554 const size_t linelen
= lineend
- ptr
;
555 const size_t rem_width
= width
- indented
;
556 if (linelen
<= rem_width
) {
557 out
<< in
.substr(ptr
, linelen
+ 1);
561 size_t finalspace
= in
.find_last_of(" \n", ptr
+ rem_width
);
562 if (finalspace
== std::string::npos
|| finalspace
< ptr
) {
563 // No place to break; just include the entire word and move on
564 finalspace
= in
.find_first_of("\n ", ptr
);
565 if (finalspace
== std::string::npos
) {
566 // End of the string, just add it and break
567 out
<< in
.substr(ptr
);
571 out
<< in
.substr(ptr
, finalspace
- ptr
) << "\n";
572 if (in
[finalspace
] == '\n') {
575 out
<< std::string(indent
, ' ');
578 ptr
= finalspace
+ 1;
584 std::string
i64tostr(int64_t n
)
586 return strprintf("%d", n
);
589 std::string
itostr(int n
)
591 return strprintf("%d", n
);
594 int64_t atoi64(const char* psz
)
599 return strtoll(psz
, nullptr, 10);
603 int64_t atoi64(const std::string
& str
)
606 return _atoi64(str
.c_str());
608 return strtoll(str
.c_str(), nullptr, 10);
612 int atoi(const std::string
& str
)
614 return atoi(str
.c_str());
617 /** Upper bound for mantissa.
618 * 10^18-1 is the largest arbitrary decimal that will fit in a signed 64-bit integer.
619 * Larger integers cannot consist of arbitrary combinations of 0-9:
621 * 999999999999999999 1^18-1
622 * 9223372036854775807 (1<<63)-1 (max int64_t)
623 * 9999999999999999999 1^19-1 (would overflow)
625 static const int64_t UPPER_BOUND
= 1000000000000000000LL - 1LL;
627 /** Helper function for ParseFixedPoint */
628 static inline bool ProcessMantissaDigit(char ch
, int64_t &mantissa
, int &mantissa_tzeros
)
633 for (int i
=0; i
<=mantissa_tzeros
; ++i
) {
634 if (mantissa
> (UPPER_BOUND
/ 10LL))
635 return false; /* overflow */
638 mantissa
+= ch
- '0';
644 bool ParseFixedPoint(const std::string
&val
, int decimals
, int64_t *amount_out
)
646 int64_t mantissa
= 0;
647 int64_t exponent
= 0;
648 int mantissa_tzeros
= 0;
649 bool mantissa_sign
= false;
650 bool exponent_sign
= false;
652 int end
= val
.size();
655 if (ptr
< end
&& val
[ptr
] == '-') {
656 mantissa_sign
= true;
661 if (val
[ptr
] == '0') {
664 } else if (val
[ptr
] >= '1' && val
[ptr
] <= '9') {
665 while (ptr
< end
&& val
[ptr
] >= '0' && val
[ptr
] <= '9') {
666 if (!ProcessMantissaDigit(val
[ptr
], mantissa
, mantissa_tzeros
))
667 return false; /* overflow */
670 } else return false; /* missing expected digit */
671 } else return false; /* empty string or loose '-' */
672 if (ptr
< end
&& val
[ptr
] == '.')
675 if (ptr
< end
&& val
[ptr
] >= '0' && val
[ptr
] <= '9')
677 while (ptr
< end
&& val
[ptr
] >= '0' && val
[ptr
] <= '9') {
678 if (!ProcessMantissaDigit(val
[ptr
], mantissa
, mantissa_tzeros
))
679 return false; /* overflow */
683 } else return false; /* missing expected digit */
685 if (ptr
< end
&& (val
[ptr
] == 'e' || val
[ptr
] == 'E'))
688 if (ptr
< end
&& val
[ptr
] == '+')
690 else if (ptr
< end
&& val
[ptr
] == '-') {
691 exponent_sign
= true;
694 if (ptr
< end
&& val
[ptr
] >= '0' && val
[ptr
] <= '9') {
695 while (ptr
< end
&& val
[ptr
] >= '0' && val
[ptr
] <= '9') {
696 if (exponent
> (UPPER_BOUND
/ 10LL))
697 return false; /* overflow */
698 exponent
= exponent
* 10 + val
[ptr
] - '0';
701 } else return false; /* missing expected digit */
704 return false; /* trailing garbage */
706 /* finalize exponent */
708 exponent
= -exponent
;
709 exponent
= exponent
- point_ofs
+ mantissa_tzeros
;
711 /* finalize mantissa */
713 mantissa
= -mantissa
;
715 /* convert to one 64-bit fixed-point value */
716 exponent
+= decimals
;
718 return false; /* cannot represent values smaller than 10^-decimals */
720 return false; /* cannot represent values larger than or equal to 10^(18-decimals) */
722 for (int i
=0; i
< exponent
; ++i
) {
723 if (mantissa
> (UPPER_BOUND
/ 10LL) || mantissa
< -(UPPER_BOUND
/ 10LL))
724 return false; /* overflow */
727 if (mantissa
> UPPER_BOUND
|| mantissa
< -UPPER_BOUND
)
728 return false; /* overflow */
731 *amount_out
= mantissa
;