UFO: Alien Invasion
Toggle main menu visibility
Loading...
Searching...
No Matches
utf8.cpp
Go to the documentation of this file.
1
4
5
/*
6
All original material Copyright (C) 2002-2025 UFO: Alien Invasion.
7
8
Copyright (C) 1997-2001 Id Software, Inc.
9
10
This program is free software; you can redistribute it and/or
11
modify it under the terms of the GNU General Public License
12
as published by the Free Software Foundation; either version 2
13
of the License, or (at your option) any later version.
14
15
This program is distributed in the hope that it will be useful,
16
but WITHOUT ANY WARRANTY; without even the implied warranty of
17
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
18
19
See the GNU General Public License for more details.
20
21
You should have received a copy of the GNU General Public License
22
along with this program; if not, write to the Free Software
23
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
24
*/
25
26
#include "
utf8.h
"
27
#include <string.h>
28
35
int
UTF8_delete_char_at
(
char
* s,
int
pos)
36
{
37
/* Convert the UTF-8 char offset to byte offset */
38
pos =
UTF8_char_offset_to_byte_offset
(s, pos);
39
40
int
start = pos;
41
int
next = pos;
42
43
while
(start > 0 &&
UTF8_CONTINUATION_BYTE
(s[start]))
44
start--;
45
if
(s[next] != 0)
46
next++;
47
while
(s[next] != 0 &&
UTF8_CONTINUATION_BYTE
(s[next]))
48
next++;
49
/* memmove is the only standard copying function that is guaranteed
50
* to work if the source and destination overlap. */
51
memmove(&s[start], &s[next], strlen(&s[next]) + 1);
52
return
(next - start);
53
}
54
63
int
UTF8_insert_char_at
(
char
* s,
int
n,
int
pos,
int
c)
64
{
65
/* Convert the UTF-8 char offset to byte offset */
66
pos =
UTF8_char_offset_to_byte_offset
(s, pos);
67
68
const
int
utf8len =
UTF8_encoded_len
(c);
69
const
int
tail = strlen(&s[pos]) + 1;
70
71
if
(utf8len == 0)
72
return
0;
73
74
if
(pos + tail + utf8len > n)
75
return
0;
76
77
/* Insertion: move up rest of string. Also moves string terminator. */
78
memmove(&s[pos + utf8len], &s[pos], tail);
79
80
if
(c <= 0x7f) {
81
s[pos] = c;
82
}
else
if
(c <= 0x7ff) {
/* c has 11 bits */
83
s[pos] = 0xc0 | (c >> 6);
/* high 5 bits */
84
s[pos + 1] = 0x80 | (c & 0x3f);
/* low 6 bits */
85
}
else
if
(c <= 0xffff) {
/* c has 16 bits */
86
s[pos] = 0xe0 | (c >> 12);
/* high 4 bits */
87
s[pos + 1] = 0x80 | ((c >> 6) & 0x3f);
/* mid 6 bits */
88
s[pos + 2] = 0x80 | (c & 0x3f);
/* low 6 bits */
89
}
else
if
(c <= 0x10ffff) {
/* c has 21 bits */
90
s[pos] = 0xf0 | (c >> 18);
/* high 3 bits */
91
s[pos + 1] = 0x80 | ((c >> 12) & 0x3f);
/* mid 6 bits */
92
s[pos + 2] = 0x80 | ((c >> 6) & 0x3f);
/* mid 6 bits */
93
s[pos + 3] = 0x80 | (c & 0x3f);
/* low 6 bits */
94
}
95
96
return
utf8len;
97
}
98
109
int
UTF8_char_len
(
unsigned
char
c)
110
{
111
if
(c < 0x80)
112
return
1;
113
if
(c < 0xc0)
114
return
0;
115
if
(c < 0xe0)
116
return
2;
117
if
(c < 0xf0)
118
return
3;
119
if
(c < 0xf8)
120
return
4;
121
/* UTF-8 used to define 5 and 6 byte sequences, but they are
122
* no longer valid. */
123
return
0;
124
}
125
132
int
UTF8_next
(
const
char
** str)
133
{
134
size_t
len
,
i
;
135
int
cp, min;
136
const
char
* s = *str;
137
138
if
(s[0] ==
'\0'
)
139
return
-1;
140
141
const
unsigned
char
*
buf
= (
const
unsigned
char
*)(s);
142
143
if
(
buf
[0] < 0x80) {
144
len
= 1;
145
min = 0;
146
cp =
buf
[0];
147
}
else
if
(
buf
[0] < 0xC0) {
148
return
-1;
149
}
else
if
(
buf
[0] < 0xE0) {
150
len
= 2;
151
min = 1 << 7;
152
cp =
buf
[0] & 0x1F;
153
}
else
if
(
buf
[0] < 0xF0) {
154
len
= 3;
155
min = 1 << (5 + 6);
156
cp =
buf
[0] & 0x0F;
157
}
else
if
(
buf
[0] < 0xF8) {
158
len
= 4;
159
min = 1 << (4 + 6 + 6);
160
cp =
buf
[0] & 0x07;
161
}
else
{
162
return
-1;
163
}
164
165
for
(
i
= 1;
i
<
len
;
i
++) {
166
if
(!
UTF8_CONTINUATION_BYTE
(
buf
[
i
]))
167
return
-1;
168
cp = (cp << 6) | (
buf
[
i
] & 0x3F);
169
}
170
171
if
(cp < min)
172
return
-1;
173
174
if
(0xD800 <= cp && cp <= 0xDFFF)
175
return
-1;
176
177
if
(0x110000 <= cp)
178
return
-1;
179
180
*str +=
len
;
181
return
cp;
182
}
183
188
int
UTF8_encoded_len
(
int
c)
189
{
190
if
(c <= 0x7F)
191
return
1;
192
if
(c <= 0x07FF)
193
return
2;
194
if
(c <= 0xFFFF)
195
return
3;
196
if
(c <= 0x10FFFF)
/* highest defined Unicode code */
197
return
4;
198
return
0;
199
}
200
207
size_t
UTF8_strlen
(
const
char
* str)
208
{
209
size_t
result = 0;
210
211
while
(str[0] !=
'\0'
) {
212
const
int
n =
UTF8_char_len
((
unsigned
char
)*str);
213
str += n;
214
result++;
215
}
216
return
result;
217
}
218
227
int
UTF8_char_offset_to_byte_offset
(
char
* str,
int
pos)
228
{
229
int
result = 0;
230
231
while
(pos > 0 && str[0] !=
'\0'
) {
232
const
int
n =
UTF8_char_len
((
unsigned
char
)*str);
233
str += n;
234
result += n;
235
pos--;
236
}
237
return
result;
238
}
239
247
char
*
UTF8_strncpyz
(
char
*
dest
,
const
char
* src,
size_t
limit)
248
{
249
size_t
length
;
250
251
length
= strlen(src);
252
if
(
length
> limit - 1) {
253
length
= limit - 1;
254
if
(
length
> 0 && (
unsigned
char
) src[
length
- 1] >= 0x80) {
255
size_t
i
=
length
- 1;
256
while
((
i
> 0) &&
UTF8_CONTINUATION_BYTE
((
unsigned
char
) src[
i
]))
257
i
--;
258
if
(
UTF8_char_len
(src[
i
]) +
i
>
length
)
259
length
=
i
;
260
}
261
}
262
263
memcpy(
dest
, src,
length
);
264
dest
[
length
] =
'\0'
;
265
266
return
dest
;
267
}
buf
voidpf void * buf
Definition
ioapi.h:42
len
QGL_EXTERN GLuint GLchar GLuint * len
Definition
r_gl.h:99
dest
QGL_EXTERN GLenum GLuint * dest
Definition
r_gl.h:101
length
QGL_EXTERN GLuint GLsizei GLsizei * length
Definition
r_gl.h:110
i
QGL_EXTERN GLint i
Definition
r_gl.h:113
UTF8_char_len
int UTF8_char_len(unsigned char c)
length of UTF-8 character starting with this byte.
Definition
utf8.cpp:109
UTF8_delete_char_at
int UTF8_delete_char_at(char *s, int pos)
Delete a whole (possibly multibyte) character from a string.
Definition
utf8.cpp:35
UTF8_insert_char_at
int UTF8_insert_char_at(char *s, int n, int pos, int c)
Insert a (possibly multibyte) UTF-8 character into a string.
Definition
utf8.cpp:63
UTF8_strlen
size_t UTF8_strlen(const char *str)
Count the number of character (not the number of bytes) of a zero termination string.
Definition
utf8.cpp:207
UTF8_next
int UTF8_next(const char **str)
Get the next utf-8 character from the given string.
Definition
utf8.cpp:132
UTF8_strncpyz
char * UTF8_strncpyz(char *dest, const char *src, size_t limit)
UTF8 capable string copy function.
Definition
utf8.cpp:247
UTF8_char_offset_to_byte_offset
int UTF8_char_offset_to_byte_offset(char *str, int pos)
Convert UTF-8 character offset to a byte offset in the given string.
Definition
utf8.cpp:227
UTF8_encoded_len
int UTF8_encoded_len(int c)
Definition
utf8.cpp:188
utf8.h
UTF8_CONTINUATION_BYTE
#define UTF8_CONTINUATION_BYTE(c)
Definition
utf8.h:35
src
shared
utf8.cpp
Generated on __DATE__ __TIME__ for UFO: Alien Invasion by
1.17.0