Bitcoin Core 28.0.0
P2P Digital Currency
Loading...
Searching...
No Matches
strencodings.cpp
Go to the documentation of this file.
1// Copyright (c) 2009-2010 Satoshi Nakamoto
2// Copyright (c) 2009-2022 The Bitcoin Core developers
3// Distributed under the MIT software license, see the accompanying
4// file COPYING or http://www.opensource.org/licenses/mit-license.php.
5
6#include <util/strencodings.h>
7
8#include <crypto/hex_base.h>
9#include <span.h>
10
11#include <array>
12#include <cassert>
13#include <cstring>
14#include <limits>
15#include <optional>
16#include <ostream>
17#include <string>
18#include <vector>
19
20static const std::string CHARS_ALPHA_NUM = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
21
22static const std::string SAFE_CHARS[] =
23{
24 CHARS_ALPHA_NUM + " .,;-_/:?@()", // SAFE_CHARS_DEFAULT
25 CHARS_ALPHA_NUM + " .,;-_?@", // SAFE_CHARS_UA_COMMENT
26 CHARS_ALPHA_NUM + ".-_", // SAFE_CHARS_FILENAME
27 CHARS_ALPHA_NUM + "!*'();:@&=+$,/?#[]-_.~%", // SAFE_CHARS_URI
28};
29
30std::string SanitizeString(std::string_view str, int rule)
31{
32 std::string result;
33 for (char c : str) {
34 if (SAFE_CHARS[rule].find(c) != std::string::npos) {
35 result.push_back(c);
36 }
37 }
38 return result;
39}
40
41bool IsHex(std::string_view str)
42{
43 for (char c : str) {
44 if (HexDigit(c) < 0) return false;
45 }
46 return (str.size() > 0) && (str.size()%2 == 0);
47}
48
49bool IsHexNumber(std::string_view str)
50{
51 if (str.substr(0, 2) == "0x") str.remove_prefix(2);
52 for (char c : str) {
53 if (HexDigit(c) < 0) return false;
54 }
55 // Return false for empty string or "0x".
56 return str.size() > 0;
57}
58
59template <typename Byte>
60std::optional<std::vector<Byte>> TryParseHex(std::string_view str)
61{
62 std::vector<Byte> vch;
63 vch.reserve(str.size() / 2); // two hex characters form a single byte
64
65 auto it = str.begin();
66 while (it != str.end()) {
67 if (IsSpace(*it)) {
68 ++it;
69 continue;
70 }
71 auto c1 = HexDigit(*(it++));
72 if (it == str.end()) return std::nullopt;
73 auto c2 = HexDigit(*(it++));
74 if (c1 < 0 || c2 < 0) return std::nullopt;
75 vch.push_back(Byte(c1 << 4) | Byte(c2));
76 }
77 return vch;
78}
79template std::optional<std::vector<std::byte>> TryParseHex(std::string_view);
80template std::optional<std::vector<uint8_t>> TryParseHex(std::string_view);
81
82bool SplitHostPort(std::string_view in, uint16_t& portOut, std::string& hostOut)
83{
84 bool valid = false;
85 size_t colon = in.find_last_of(':');
86 // if a : is found, and it either follows a [...], or no other : is in the string, treat it as port separator
87 bool fHaveColon = colon != in.npos;
88 bool fBracketed = fHaveColon && (in[0] == '[' && in[colon - 1] == ']'); // if there is a colon, and in[0]=='[', colon is not 0, so in[colon-1] is safe
89 bool fMultiColon{fHaveColon && colon != 0 && (in.find_last_of(':', colon - 1) != in.npos)};
90 if (fHaveColon && (colon == 0 || fBracketed || !fMultiColon)) {
91 uint16_t n;
92 if (ParseUInt16(in.substr(colon + 1), &n)) {
93 in = in.substr(0, colon);
94 portOut = n;
95 valid = (portOut != 0);
96 }
97 } else {
98 valid = true;
99 }
100 if (in.size() > 0 && in[0] == '[' && in[in.size() - 1] == ']') {
101 hostOut = in.substr(1, in.size() - 2);
102 } else {
103 hostOut = in;
104 }
105
106 return valid;
107}
108
110{
111 static const char *pbase64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
112
113 std::string str;
114 str.reserve(((input.size() + 2) / 3) * 4);
115 ConvertBits<8, 6, true>([&](int v) { str += pbase64[v]; }, input.begin(), input.end());
116 while (str.size() % 4) str += '=';
117 return str;
118}
119
120std::optional<std::vector<unsigned char>> DecodeBase64(std::string_view str)
121{
122 static const int8_t decode64_table[256]{
123 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
124 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
125 -1, -1, -1, 62, -1, -1, -1, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1,
126 -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
127 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, -1, 26, 27, 28,
128 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
129 49, 50, 51, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
130 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
131 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
132 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
133 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
134 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
135 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
136 };
137
138 if (str.size() % 4 != 0) return {};
139 /* One or two = characters at the end are permitted. */
140 if (str.size() >= 1 && str.back() == '=') str.remove_suffix(1);
141 if (str.size() >= 1 && str.back() == '=') str.remove_suffix(1);
142
143 std::vector<unsigned char> ret;
144 ret.reserve((str.size() * 3) / 4);
145 bool valid = ConvertBits<6, 8, false>(
146 [&](unsigned char c) { ret.push_back(c); },
147 str.begin(), str.end(),
148 [](char c) { return decode64_table[uint8_t(c)]; }
149 );
150 if (!valid) return {};
151
152 return ret;
153}
154
155std::string EncodeBase32(Span<const unsigned char> input, bool pad)
156{
157 static const char *pbase32 = "abcdefghijklmnopqrstuvwxyz234567";
158
159 std::string str;
160 str.reserve(((input.size() + 4) / 5) * 8);
161 ConvertBits<8, 5, true>([&](int v) { str += pbase32[v]; }, input.begin(), input.end());
162 if (pad) {
163 while (str.size() % 8) {
164 str += '=';
165 }
166 }
167 return str;
168}
169
170std::string EncodeBase32(std::string_view str, bool pad)
171{
172 return EncodeBase32(MakeUCharSpan(str), pad);
173}
174
175std::optional<std::vector<unsigned char>> DecodeBase32(std::string_view str)
176{
177 static const int8_t decode32_table[256]{
178 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
179 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
180 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 26, 27, 28, 29, 30, 31, -1, -1, -1, -1,
181 -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
182 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, -1, 0, 1, 2,
183 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
184 23, 24, 25, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
185 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
186 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
187 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
188 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
189 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
190 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
191 };
192
193 if (str.size() % 8 != 0) return {};
194 /* 1, 3, 4, or 6 padding '=' suffix characters are permitted. */
195 if (str.size() >= 1 && str.back() == '=') str.remove_suffix(1);
196 if (str.size() >= 2 && str.substr(str.size() - 2) == "==") str.remove_suffix(2);
197 if (str.size() >= 1 && str.back() == '=') str.remove_suffix(1);
198 if (str.size() >= 2 && str.substr(str.size() - 2) == "==") str.remove_suffix(2);
199
200 std::vector<unsigned char> ret;
201 ret.reserve((str.size() * 5) / 8);
202 bool valid = ConvertBits<5, 8, false>(
203 [&](unsigned char c) { ret.push_back(c); },
204 str.begin(), str.end(),
205 [](char c) { return decode32_table[uint8_t(c)]; }
206 );
207
208 if (!valid) return {};
209
210 return ret;
211}
212
213namespace {
214template <typename T>
215bool ParseIntegral(std::string_view str, T* out)
216{
217 static_assert(std::is_integral<T>::value);
218 // Replicate the exact behavior of strtol/strtoll/strtoul/strtoull when
219 // handling leading +/- for backwards compatibility.
220 if (str.length() >= 2 && str[0] == '+' && str[1] == '-') {
221 return false;
222 }
223 const std::optional<T> opt_int = ToIntegral<T>((!str.empty() && str[0] == '+') ? str.substr(1) : str);
224 if (!opt_int) {
225 return false;
226 }
227 if (out != nullptr) {
228 *out = *opt_int;
229 }
230 return true;
231}
232}; // namespace
233
234bool ParseInt32(std::string_view str, int32_t* out)
235{
236 return ParseIntegral<int32_t>(str, out);
237}
238
239bool ParseInt64(std::string_view str, int64_t* out)
240{
241 return ParseIntegral<int64_t>(str, out);
242}
243
244bool ParseUInt8(std::string_view str, uint8_t* out)
245{
246 return ParseIntegral<uint8_t>(str, out);
247}
248
249bool ParseUInt16(std::string_view str, uint16_t* out)
250{
251 return ParseIntegral<uint16_t>(str, out);
252}
253
254bool ParseUInt32(std::string_view str, uint32_t* out)
255{
256 return ParseIntegral<uint32_t>(str, out);
257}
258
259bool ParseUInt64(std::string_view str, uint64_t* out)
260{
261 return ParseIntegral<uint64_t>(str, out);
262}
263
264std::string FormatParagraph(std::string_view in, size_t width, size_t indent)
265{
266 assert(width >= indent);
267 std::stringstream out;
268 size_t ptr = 0;
269 size_t indented = 0;
270 while (ptr < in.size())
271 {
272 size_t lineend = in.find_first_of('\n', ptr);
273 if (lineend == std::string::npos) {
274 lineend = in.size();
275 }
276 const size_t linelen = lineend - ptr;
277 const size_t rem_width = width - indented;
278 if (linelen <= rem_width) {
279 out << in.substr(ptr, linelen + 1);
280 ptr = lineend + 1;
281 indented = 0;
282 } else {
283 size_t finalspace = in.find_last_of(" \n", ptr + rem_width);
284 if (finalspace == std::string::npos || finalspace < ptr) {
285 // No place to break; just include the entire word and move on
286 finalspace = in.find_first_of("\n ", ptr);
287 if (finalspace == std::string::npos) {
288 // End of the string, just add it and break
289 out << in.substr(ptr);
290 break;
291 }
292 }
293 out << in.substr(ptr, finalspace - ptr) << "\n";
294 if (in[finalspace] == '\n') {
295 indented = 0;
296 } else if (indent) {
297 out << std::string(indent, ' ');
298 indented = indent;
299 }
300 ptr = finalspace + 1;
301 }
302 }
303 return out.str();
304}
305
314static const int64_t UPPER_BOUND = 1000000000000000000LL - 1LL;
315
317static inline bool ProcessMantissaDigit(char ch, int64_t &mantissa, int &mantissa_tzeros)
318{
319 if(ch == '0')
320 ++mantissa_tzeros;
321 else {
322 for (int i=0; i<=mantissa_tzeros; ++i) {
323 if (mantissa > (UPPER_BOUND / 10LL))
324 return false; /* overflow */
325 mantissa *= 10;
326 }
327 mantissa += ch - '0';
328 mantissa_tzeros = 0;
329 }
330 return true;
331}
332
333bool ParseFixedPoint(std::string_view val, int decimals, int64_t *amount_out)
334{
335 int64_t mantissa = 0;
336 int64_t exponent = 0;
337 int mantissa_tzeros = 0;
338 bool mantissa_sign = false;
339 bool exponent_sign = false;
340 int ptr = 0;
341 int end = val.size();
342 int point_ofs = 0;
343
344 if (ptr < end && val[ptr] == '-') {
345 mantissa_sign = true;
346 ++ptr;
347 }
348 if (ptr < end)
349 {
350 if (val[ptr] == '0') {
351 /* pass single 0 */
352 ++ptr;
353 } else if (val[ptr] >= '1' && val[ptr] <= '9') {
354 while (ptr < end && IsDigit(val[ptr])) {
355 if (!ProcessMantissaDigit(val[ptr], mantissa, mantissa_tzeros))
356 return false; /* overflow */
357 ++ptr;
358 }
359 } else return false; /* missing expected digit */
360 } else return false; /* empty string or loose '-' */
361 if (ptr < end && val[ptr] == '.')
362 {
363 ++ptr;
364 if (ptr < end && IsDigit(val[ptr]))
365 {
366 while (ptr < end && IsDigit(val[ptr])) {
367 if (!ProcessMantissaDigit(val[ptr], mantissa, mantissa_tzeros))
368 return false; /* overflow */
369 ++ptr;
370 ++point_ofs;
371 }
372 } else return false; /* missing expected digit */
373 }
374 if (ptr < end && (val[ptr] == 'e' || val[ptr] == 'E'))
375 {
376 ++ptr;
377 if (ptr < end && val[ptr] == '+')
378 ++ptr;
379 else if (ptr < end && val[ptr] == '-') {
380 exponent_sign = true;
381 ++ptr;
382 }
383 if (ptr < end && IsDigit(val[ptr])) {
384 while (ptr < end && IsDigit(val[ptr])) {
385 if (exponent > (UPPER_BOUND / 10LL))
386 return false; /* overflow */
387 exponent = exponent * 10 + val[ptr] - '0';
388 ++ptr;
389 }
390 } else return false; /* missing expected digit */
391 }
392 if (ptr != end)
393 return false; /* trailing garbage */
394
395 /* finalize exponent */
396 if (exponent_sign)
397 exponent = -exponent;
398 exponent = exponent - point_ofs + mantissa_tzeros;
399
400 /* finalize mantissa */
401 if (mantissa_sign)
402 mantissa = -mantissa;
403
404 /* convert to one 64-bit fixed-point value */
405 exponent += decimals;
406 if (exponent < 0)
407 return false; /* cannot represent values smaller than 10^-decimals */
408 if (exponent >= 18)
409 return false; /* cannot represent values larger than or equal to 10^(18-decimals) */
410
411 for (int i=0; i < exponent; ++i) {
412 if (mantissa > (UPPER_BOUND / 10LL) || mantissa < -(UPPER_BOUND / 10LL))
413 return false; /* overflow */
414 mantissa *= 10;
415 }
416 if (mantissa > UPPER_BOUND || mantissa < -UPPER_BOUND)
417 return false; /* overflow */
418
419 if (amount_out)
420 *amount_out = mantissa;
421
422 return true;
423}
424
425std::string ToLower(std::string_view str)
426{
427 std::string r;
428 r.reserve(str.size());
429 for (auto ch : str) r += ToLower(ch);
430 return r;
431}
432
433std::string ToUpper(std::string_view str)
434{
435 std::string r;
436 r.reserve(str.size());
437 for (auto ch : str) r += ToUpper(ch);
438 return r;
439}
440
441std::string Capitalize(std::string str)
442{
443 if (str.empty()) return str;
444 str[0] = ToUpper(str.front());
445 return str;
446}
447
448std::optional<uint64_t> ParseByteUnits(std::string_view str, ByteUnit default_multiplier)
449{
450 if (str.empty()) {
451 return std::nullopt;
452 }
453 auto multiplier = default_multiplier;
454 char unit = str.back();
455 switch (unit) {
456 case 'k':
457 multiplier = ByteUnit::k;
458 break;
459 case 'K':
460 multiplier = ByteUnit::K;
461 break;
462 case 'm':
463 multiplier = ByteUnit::m;
464 break;
465 case 'M':
466 multiplier = ByteUnit::M;
467 break;
468 case 'g':
469 multiplier = ByteUnit::g;
470 break;
471 case 'G':
472 multiplier = ByteUnit::G;
473 break;
474 case 't':
475 multiplier = ByteUnit::t;
476 break;
477 case 'T':
478 multiplier = ByteUnit::T;
479 break;
480 default:
481 unit = 0;
482 break;
483 }
484
485 uint64_t unit_amount = static_cast<uint64_t>(multiplier);
486 auto parsed_num = ToIntegral<uint64_t>(unit ? str.substr(0, str.size() - 1) : str);
487 if (!parsed_num || parsed_num > std::numeric_limits<uint64_t>::max() / unit_amount) { // check overflow
488 return std::nullopt;
489 }
490 return *parsed_num * unit_amount;
491}
int ret
A Span is an object that can refer to a contiguous sequence of objects.
Definition span.h:98
constexpr std::size_t size() const noexcept
Definition span.h:187
constexpr C * begin() const noexcept
Definition span.h:175
constexpr C * end() const noexcept
Definition span.h:176
#define T(expected, seed, data)
signed char HexDigit(char c)
Definition hex_base.cpp:63
constexpr auto MakeUCharSpan(V &&v) -> decltype(UCharSpanCast(Span{std::forward< V >(v)}))
Like the Span constructor, but for (const) unsigned char member types only.
Definition span.h:304
constexpr bool IsDigit(char c)
Tests if the given character is a decimal digit.
ByteUnit
Used by ParseByteUnits() Lowercase base 1000 Uppercase base 1024.
constexpr bool IsSpace(char c) noexcept
Tests if the given character is a whitespace character.
std::optional< T > ToIntegral(std::string_view str)
Convert string to integral type T.
bool ConvertBits(O outfn, It it, It end, I infn={})
Convert from one power-of-2 number base to another.
std::string Capitalize(std::string str)
Capitalizes the first character of the given string.
static const std::string SAFE_CHARS[]
bool IsHexNumber(std::string_view str)
Return true if the string is a hex number, optionally prefixed with "0x".
bool ParseInt32(std::string_view str, int32_t *out)
Convert string to signed 32-bit integer with strict parse error feedback.
bool ParseUInt16(std::string_view str, uint16_t *out)
Convert decimal string to unsigned 16-bit integer with strict parse error feedback.
std::string ToUpper(std::string_view str)
Returns the uppercase equivalent of the given string.
std::string EncodeBase64(Span< const unsigned char > input)
bool ParseFixedPoint(std::string_view val, int decimals, int64_t *amount_out)
Parse number as fixed point according to JSON number syntax.
std::optional< std::vector< unsigned char > > DecodeBase32(std::string_view str)
bool ParseInt64(std::string_view str, int64_t *out)
Convert string to signed 64-bit integer with strict parse error feedback.
std::string EncodeBase32(Span< const unsigned char > input, bool pad)
Base32 encode.
bool ParseUInt8(std::string_view str, uint8_t *out)
Convert decimal string to unsigned 8-bit integer with strict parse error feedback.
bool ParseUInt64(std::string_view str, uint64_t *out)
Convert decimal string to unsigned 64-bit integer with strict parse error feedback.
static bool ProcessMantissaDigit(char ch, int64_t &mantissa, int &mantissa_tzeros)
Helper function for ParseFixedPoint.
bool IsHex(std::string_view str)
std::string FormatParagraph(std::string_view in, size_t width, size_t indent)
Format a paragraph of text to a fixed width, adding spaces for indentation to any added line.
static const int64_t UPPER_BOUND
Upper bound for mantissa.
std::optional< std::vector< unsigned char > > DecodeBase64(std::string_view str)
bool SplitHostPort(std::string_view in, uint16_t &portOut, std::string &hostOut)
Splits socket address string into host string and port value.
bool ParseUInt32(std::string_view str, uint32_t *out)
Convert decimal string to unsigned 32-bit integer with strict parse error feedback.
std::optional< std::vector< Byte > > TryParseHex(std::string_view str)
Parse the hex string into bytes (uint8_t or std::byte).
std::optional< uint64_t > ParseByteUnits(std::string_view str, ByteUnit default_multiplier)
Parse a string with suffix unit [k|K|m|M|g|G|t|T].
std::string ToLower(std::string_view str)
Returns the lowercase equivalent of the given string.
std::string SanitizeString(std::string_view str, int rule)
Remove unsafe chars.
static const std::string CHARS_ALPHA_NUM
assert(!tx.IsCoinBase())