Monero
Toggle main menu visibility
Loading...
Searching...
No Matches
src
common
utf8.h
Go to the documentation of this file.
1
// Copyright (c) 2019-2022, The Monero Project
2
3
//
4
// All rights reserved.
5
//
6
// Redistribution and use in source and binary forms, with or without modification, are
7
// permitted provided that the following conditions are met:
8
//
9
// 1. Redistributions of source code must retain the above copyright notice, this list of
10
// conditions and the following disclaimer.
11
//
12
// 2. Redistributions in binary form must reproduce the above copyright notice, this list
13
// of conditions and the following disclaimer in the documentation and/or other
14
// materials provided with the distribution.
15
//
16
// 3. Neither the name of the copyright holder nor the names of its contributors may be
17
// used to endorse or promote products derived from this software without specific
18
// prior written permission.
19
//
20
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
21
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
22
// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
23
// THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
27
// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
28
// THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30
#pragma once
31
32
#include <cctype>
33
#include <cwchar>
34
#include <stdexcept>
35
36
namespace
tools
37
{
38
template
<
typename
T,
typename
Transform>
39
inline
T
utf8canonical
(
const
T
&
s
,
Transform
t = [](wint_t c)->wint_t {
return
c; })
40
{
41
T
sc =
""
;
42
size_t
avail =
s
.size();
43
const
char
*ptr =
s
.data();
44
wint_t cp = 0;
45
int
bytes = 1;
46
char
wbuf[8], *wptr;
47
while
(avail--)
48
{
49
if
((*ptr & 0x80) == 0)
50
{
51
cp = *ptr++;
52
bytes = 1;
53
}
54
else
if
((*ptr & 0xe0) == 0xc0)
55
{
56
if
(avail < 1)
57
throw
std::runtime_error(
"Invalid UTF-8"
);
58
cp = (*ptr++ & 0x1f) << 6;
59
cp |= *ptr++ & 0x3f;
60
--avail;
61
bytes = 2;
62
}
63
else
if
((*ptr & 0xf0) == 0xe0)
64
{
65
if
(avail < 2)
66
throw
std::runtime_error(
"Invalid UTF-8"
);
67
cp = (*ptr++ & 0xf) << 12;
68
cp |= (*ptr++ & 0x3f) << 6;
69
cp |= *ptr++ & 0x3f;
70
avail -= 2;
71
bytes = 3;
72
}
73
else
if
((*ptr & 0xf8) == 0xf0)
74
{
75
if
(avail < 3)
76
throw
std::runtime_error(
"Invalid UTF-8"
);
77
cp = (*ptr++ & 0x7) << 18;
78
cp |= (*ptr++ & 0x3f) << 12;
79
cp |= (*ptr++ & 0x3f) << 6;
80
cp |= *ptr++ & 0x3f;
81
avail -= 3;
82
bytes = 4;
83
}
84
else
85
throw
std::runtime_error(
"Invalid UTF-8"
);
86
87
cp =
t
(cp);
88
if
(cp <= 0x7f)
89
bytes = 1;
90
else
if
(cp <= 0x7ff)
91
bytes = 2;
92
else
if
(cp <= 0xffff)
93
bytes = 3;
94
else
if
(cp <= 0x10ffff)
95
bytes = 4;
96
else
97
throw
std::runtime_error(
"Invalid code point UTF-8 transformation"
);
98
99
wptr = wbuf;
100
switch
(bytes)
101
{
102
case
1: *wptr++ = cp;
break
;
103
case
2: *wptr++ = 0xc0 | (cp >> 6); *wptr++ = 0x80 | (cp & 0x3f);
break
;
104
case
3: *wptr++ = 0xe0 | (cp >> 12); *wptr++ = 0x80 | ((cp >> 6) & 0x3f); *wptr++ = 0x80 | (cp & 0x3f);
break
;
105
case
4: *wptr++ = 0xf0 | (cp >> 18); *wptr++ = 0x80 | ((cp >> 12) & 0x3f); *wptr++ = 0x80 | ((cp >> 6) & 0x3f); *wptr++ = 0x80 | (cp & 0x3f);
break
;
106
default
:
throw
std::runtime_error(
"Invalid UTF-8"
);
107
}
108
*wptr = 0;
109
sc.append(wbuf, bytes);
110
cp = 0;
111
bytes = 1;
112
}
113
return
sc;
114
}
115
}
s
#define s(x, c)
Definition
aesb.c:47
Transform
static void Transform(hashState *ctx, const uint8_t *input, int msglen)
Definition
groestl.c:171
console.t
t
Definition
console.py:33
tools
Various Tools.
Definition
apply_permutation.h:40
tools::utf8canonical
T utf8canonical(const T &s, Transform t=[](wint_t c) ->wint_t { return c;})
Definition
utf8.h:39
T
#define T(x)
Generated on
for Monero by
1.17.0