LibreOffice
LibreOffice 7.3 SDK C/C++ API Reference
Loading...
Searching...
No Matches
character.hxx
Go to the documentation of this file.
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 *
9 * This file incorporates work covered by the following license notice:
10 *
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 */
19
20/*
21 * This file is part of LibreOffice published API.
22 */
23
24#ifndef INCLUDED_RTL_CHARACTER_HXX
25#define INCLUDED_RTL_CHARACTER_HXX
26
27#include "sal/config.h"
28
29#include <cassert>
30#include <cstddef>
31
32#include "sal/types.h"
33
34namespace rtl
35{
44inline SAL_CONSTEXPR bool isUnicodeCodePoint(sal_uInt32 code) { return code <= 0x10FFFF; }
45
54inline SAL_CONSTEXPR bool isAscii(sal_uInt32 code)
55{
56 assert(isUnicodeCodePoint(code));
57 return code <= 0x7F;
58}
59
60#if defined LIBO_INTERNAL_ONLY
61bool isAscii(char) = delete;
62bool isAscii(signed char) = delete;
63template <typename T> inline constexpr bool isAscii(T code) { return isAscii(sal_uInt32(code)); }
64#endif
65
75inline SAL_CONSTEXPR bool isAsciiLowerCase(sal_uInt32 code)
76{
77 assert(isUnicodeCodePoint(code));
78 return code >= 'a' && code <= 'z';
79}
80
81#if defined LIBO_INTERNAL_ONLY
82bool isAsciiLowerCase(char) = delete;
83bool isAsciiLowerCase(signed char) = delete;
84template <typename T> inline constexpr bool isAsciiLowerCase(T code)
85{
86 return isAsciiLowerCase(sal_uInt32(code));
87}
88#endif
89
99inline SAL_CONSTEXPR bool isAsciiUpperCase(sal_uInt32 code)
100{
101 assert(isUnicodeCodePoint(code));
102 return code >= 'A' && code <= 'Z';
103}
104
105#if defined LIBO_INTERNAL_ONLY
106bool isAsciiUpperCase(char) = delete;
107bool isAsciiUpperCase(signed char) = delete;
108template <typename T> inline constexpr bool isAsciiUpperCase(T code)
109{
110 return isAsciiUpperCase(sal_uInt32(code));
111}
112#endif
113
123inline SAL_CONSTEXPR bool isAsciiAlpha(sal_uInt32 code)
124{
125 assert(isUnicodeCodePoint(code));
126 return isAsciiLowerCase(code) || isAsciiUpperCase(code);
127}
128
129#if defined LIBO_INTERNAL_ONLY
130bool isAsciiAlpha(char) = delete;
131bool isAsciiAlpha(signed char) = delete;
132template <typename T> inline constexpr bool isAsciiAlpha(T code)
133{
134 return isAsciiAlpha(sal_uInt32(code));
135}
136#endif
137
147inline SAL_CONSTEXPR bool isAsciiDigit(sal_uInt32 code)
148{
149 assert(isUnicodeCodePoint(code));
150 return code >= '0' && code <= '9';
151}
152
153#if defined LIBO_INTERNAL_ONLY
154bool isAsciiDigit(char) = delete;
155bool isAsciiDigit(signed char) = delete;
156template <typename T> inline constexpr bool isAsciiDigit(T code)
157{
158 return isAsciiDigit(sal_uInt32(code));
159}
160#endif
161
171inline SAL_CONSTEXPR bool isAsciiAlphanumeric(sal_uInt32 code)
172{
173 assert(isUnicodeCodePoint(code));
174 return isAsciiDigit(code) || isAsciiAlpha(code);
175}
176
177#if defined LIBO_INTERNAL_ONLY
178bool isAsciiAlphanumeric(char) = delete;
179bool isAsciiAlphanumeric(signed char) = delete;
180template <typename T> inline constexpr bool isAsciiAlphanumeric(T code)
181{
182 return isAsciiAlphanumeric(sal_uInt32(code));
183}
184#endif
185
195inline SAL_CONSTEXPR bool isAsciiCanonicHexDigit(sal_uInt32 code)
196{
197 assert(isUnicodeCodePoint(code));
198 return isAsciiDigit(code) || (code >= 'A' && code <= 'F');
199}
200
201#if defined LIBO_INTERNAL_ONLY
202bool isAsciiCanonicHexDigit(char) = delete;
203bool isAsciiCanonicHexDigit(signed char) = delete;
204template <typename T> inline constexpr bool isAsciiCanonicHexDigit(T code)
205{
206 return isAsciiCanonicHexDigit(sal_uInt32(code));
207}
208#endif
209
219inline SAL_CONSTEXPR bool isAsciiHexDigit(sal_uInt32 code)
220{
221 assert(isUnicodeCodePoint(code));
222 return isAsciiCanonicHexDigit(code) || (code >= 'a' && code <= 'f');
223}
224
225#if defined LIBO_INTERNAL_ONLY
226bool isAsciiHexDigit(char) = delete;
227bool isAsciiHexDigit(signed char) = delete;
228template <typename T> inline constexpr bool isAsciiHexDigit(T code)
229{
230 return isAsciiHexDigit(sal_uInt32(code));
231}
232#endif
233
242inline SAL_CONSTEXPR bool isAsciiOctalDigit(sal_uInt32 code)
243{
244 assert(isUnicodeCodePoint(code));
245 return code >= '0' && code <= '7';
246}
247
248#if defined LIBO_INTERNAL_ONLY
249bool isAsciiOctalDigit(char) = delete;
250bool isAsciiOctalDigit(signed char) = delete;
251template <typename T> inline constexpr bool isAsciiOctalDigit(T code)
252{
253 return isAsciiOctalDigit(sal_uInt32(code));
254}
255#endif
256
266inline SAL_CONSTEXPR bool isAsciiWhiteSpace(sal_uInt32 code)
267{
268 assert(isUnicodeCodePoint(code));
269 return code == ' ' || code == '\f' || code == '\n' || code == '\r' || code == '\t'
270 || code == '\v';
271}
272
273#if defined LIBO_INTERNAL_ONLY
274bool isAsciiWhiteSpace(char) = delete;
275bool isAsciiWhiteSpace(signed char) = delete;
276template <typename T> inline constexpr bool isAsciiWhiteSpace(T code)
277{
278 return isAsciiWhiteSpace(sal_uInt32(code));
279}
280#endif
281
290inline SAL_CONSTEXPR sal_uInt32 toAsciiUpperCase(sal_uInt32 code)
291{
292 assert(isUnicodeCodePoint(code));
293 return isAsciiLowerCase(code) ? code - 32 : code;
294}
295
296#if defined LIBO_INTERNAL_ONLY
297sal_uInt32 toAsciiUpperCase(char) = delete;
298sal_uInt32 toAsciiUpperCase(signed char) = delete;
299template <typename T> inline constexpr sal_uInt32 toAsciiUpperCase(T code)
300{
301 return toAsciiUpperCase(sal_uInt32(code));
302}
303#endif
304
313inline SAL_CONSTEXPR sal_uInt32 toAsciiLowerCase(sal_uInt32 code)
314{
315 assert(isUnicodeCodePoint(code));
316 return isAsciiUpperCase(code) ? code + 32 : code;
317}
318
319#if defined LIBO_INTERNAL_ONLY
320sal_uInt32 toAsciiLowerCase(char) = delete;
321sal_uInt32 toAsciiLowerCase(signed char) = delete;
322template <typename T> inline constexpr sal_uInt32 toAsciiLowerCase(T code)
323{
324 return toAsciiLowerCase(sal_uInt32(code));
325}
326#endif
327
340inline SAL_CONSTEXPR sal_Int32 compareIgnoreAsciiCase(sal_uInt32 code1, sal_uInt32 code2)
341{
342 assert(isUnicodeCodePoint(code1));
343 assert(isUnicodeCodePoint(code2));
344 return static_cast<sal_Int32>(toAsciiLowerCase(code1))
345 - static_cast<sal_Int32>(toAsciiLowerCase(code2));
346}
347
349namespace detail
350{
351sal_uInt32 const surrogatesHighFirst = 0xD800;
352sal_uInt32 const surrogatesHighLast = 0xDBFF;
353sal_uInt32 const surrogatesLowFirst = 0xDC00;
354sal_uInt32 const surrogatesLowLast = 0xDFFF;
355}
357
366inline SAL_CONSTEXPR bool isSurrogate(sal_uInt32 code)
367{
368 assert(isUnicodeCodePoint(code));
369 return code >= detail::surrogatesHighFirst && code <= detail::surrogatesLowLast;
370}
371
380inline SAL_CONSTEXPR bool isHighSurrogate(sal_uInt32 code)
381{
382 assert(isUnicodeCodePoint(code));
383 return code >= detail::surrogatesHighFirst && code <= detail::surrogatesHighLast;
384}
385
394inline SAL_CONSTEXPR bool isLowSurrogate(sal_uInt32 code)
395{
396 assert(isUnicodeCodePoint(code));
397 return code >= detail::surrogatesLowFirst && code <= detail::surrogatesLowLast;
398}
399
409{
410 assert(isUnicodeCodePoint(code));
411 assert(code >= 0x10000);
412 return static_cast<sal_Unicode>(((code - 0x10000) >> 10) | detail::surrogatesHighFirst);
413}
414
424{
425 assert(isUnicodeCodePoint(code));
426 assert(code >= 0x10000);
427 return static_cast<sal_Unicode>(((code - 0x10000) & 0x3FF) | detail::surrogatesLowFirst);
428}
429
440inline SAL_CONSTEXPR sal_uInt32 combineSurrogates(sal_uInt32 high, sal_uInt32 low)
441{
442 assert(isHighSurrogate(high));
443 assert(isLowSurrogate(low));
444 return ((high - detail::surrogatesHighFirst) << 10) + (low - detail::surrogatesLowFirst)
445 + 0x10000;
446}
447
460inline SAL_CONSTEXPR std::size_t splitSurrogates(sal_uInt32 code, sal_Unicode* output)
461{
462 assert(isUnicodeCodePoint(code));
463 assert(output != NULL);
464 if (code < 0x10000)
465 {
466 output[0] = code;
467 return 1;
468 }
469 else
470 {
471 output[0] = getHighSurrogate(code);
472 output[1] = getLowSurrogate(code);
473 return 2;
474 }
475}
476
485inline SAL_CONSTEXPR bool isUnicodeScalarValue(sal_uInt32 code)
486{
487 return isUnicodeCodePoint(code) && !isSurrogate(code);
488}
489}
490
491#endif
492
493/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
#define SAL_CONSTEXPR
C++11 "constexpr" feature.
Definition: types.h:404
sal_uInt16 sal_Unicode
Definition: types.h:123
Definition: bootstrap.hxx:34
SAL_CONSTEXPR bool isUnicodeScalarValue(sal_uInt32 code)
Check for Unicode scalar value.
Definition: character.hxx:485
SAL_CONSTEXPR bool isAsciiHexDigit(sal_uInt32 code)
Check for ASCII hexadecimal digit character.
Definition: character.hxx:219
SAL_CONSTEXPR sal_Unicode getLowSurrogate(sal_uInt32 code)
Get low surrogate half of a non-BMP Unicode code point.
Definition: character.hxx:423
SAL_CONSTEXPR bool isLowSurrogate(sal_uInt32 code)
Check for low surrogate.
Definition: character.hxx:394
SAL_CONSTEXPR bool isUnicodeCodePoint(sal_uInt32 code)
Check for Unicode code point.
Definition: character.hxx:44
SAL_CONSTEXPR bool isAsciiAlpha(sal_uInt32 code)
Check for ASCII alphabetic character.
Definition: character.hxx:123
SAL_CONSTEXPR bool isAsciiOctalDigit(sal_uInt32 code)
Check for ASCII octal digit character.
Definition: character.hxx:242
SAL_CONSTEXPR bool isAsciiLowerCase(sal_uInt32 code)
Check for ASCII lower case character.
Definition: character.hxx:75
SAL_CONSTEXPR bool isAsciiWhiteSpace(sal_uInt32 code)
Check for ASCII white space character.
Definition: character.hxx:266
SAL_CONSTEXPR bool isAsciiCanonicHexDigit(sal_uInt32 code)
Check for ASCII canonic hexadecimal digit character.
Definition: character.hxx:195
SAL_CONSTEXPR std::size_t splitSurrogates(sal_uInt32 code, sal_Unicode *output)
Split a Unicode code point into UTF-16 code units.
Definition: character.hxx:460
SAL_CONSTEXPR sal_Int32 compareIgnoreAsciiCase(sal_uInt32 code1, sal_uInt32 code2)
Compare two characters ignoring ASCII case.
Definition: character.hxx:340
SAL_CONSTEXPR bool isAscii(sal_uInt32 code)
Check for ASCII character.
Definition: character.hxx:54
SAL_CONSTEXPR sal_uInt32 combineSurrogates(sal_uInt32 high, sal_uInt32 low)
Combine surrogates to form a code point.
Definition: character.hxx:440
SAL_CONSTEXPR sal_uInt32 toAsciiUpperCase(sal_uInt32 code)
Convert a character, if ASCII, to upper case.
Definition: character.hxx:290
SAL_CONSTEXPR bool isAsciiAlphanumeric(sal_uInt32 code)
Check for ASCII alphanumeric character.
Definition: character.hxx:171
SAL_CONSTEXPR bool isAsciiUpperCase(sal_uInt32 code)
Check for ASCII upper case character.
Definition: character.hxx:99
SAL_CONSTEXPR bool isAsciiDigit(sal_uInt32 code)
Check for ASCII digit character.
Definition: character.hxx:147
SAL_CONSTEXPR bool isSurrogate(sal_uInt32 code)
Check for surrogate.
Definition: character.hxx:366
SAL_CONSTEXPR bool isHighSurrogate(sal_uInt32 code)
Check for high surrogate.
Definition: character.hxx:380
SAL_CONSTEXPR sal_uInt32 toAsciiLowerCase(sal_uInt32 code)
Convert a character, if ASCII, to lower case.
Definition: character.hxx:313
SAL_CONSTEXPR sal_Unicode getHighSurrogate(sal_uInt32 code)
Get high surrogate half of a non-BMP Unicode code point.
Definition: character.hxx:408