1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*************************************************************************
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * Copyright 2000, 2010 Oracle and/or its affiliates.
8 * OpenOffice.org - a multi-platform office productivity suite
10 * This file is part of OpenOffice.org.
12 * OpenOffice.org is free software: you can redistribute it and/or modify
13 * it under the terms of the GNU Lesser General Public License version 3
14 * only, as published by the Free Software Foundation.
16 * OpenOffice.org is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU Lesser General Public License version 3 for more details
20 * (a copy is included in the LICENSE file that accompanied this code).
22 * You should have received a copy of the GNU Lesser General Public License
23 * version 3 along with OpenOffice.org. If not, see
24 * <http://www.openoffice.org/license.html>
25 * for a copy of the LGPLv3 License.
27 ************************************************************************/
30 #include "osl/diagnose.h"
31 #include "sal/types.h"
33 int ImplIsNoncharacter(sal_uInt32 nUtf32
)
35 /* All code points that are noncharacters, as of Unicode 3.1.1. */
36 return (nUtf32
>= 0xFDD0 && nUtf32
<= 0xFDEF)
37 || (nUtf32
& 0xFFFF) >= 0xFFFE
41 int ImplIsControlOrFormat(sal_uInt32 nUtf32
)
43 /* All code points of <http://www.unicode.org/Public/UNIDATA/
44 UnicodeData.txt>, Version 3.1.1, that have a General Category of Cc
45 (Other, Control) or Cf (Other, Format).
47 return nUtf32
<= 0x001F
48 || (nUtf32
>= 0x007F && nUtf32
<= 0x009F)
49 || nUtf32
== 0x070F /* SYRIAC ABBREVIATION MARK */
50 || nUtf32
== 0x180B /* MONGOLIAN FREE VARIATION SELECTOR ONE */
51 || nUtf32
== 0x180C /* MONGOLIAN FREE VARIATION SELECTOR TWO */
52 || nUtf32
== 0x180D /* MONGOLIAN FREE VARIATION SELECTOR THREE */
53 || nUtf32
== 0x180E /* MONGOLIAN VOWEL SEPARATOR */
54 || nUtf32
== 0x200C /* ZERO WIDTH NON-JOINER */
55 || nUtf32
== 0x200D /* ZERO WIDTH JOINER */
56 || nUtf32
== 0x200E /* LEFT-TO-RIGHT MARK */
57 || nUtf32
== 0x200F /* RIGHT-TO-LEFT MARK */
58 || nUtf32
== 0x202A /* LEFT-TO-RIGHT EMBEDDING */
59 || nUtf32
== 0x202B /* RIGHT-TO-LEFT EMBEDDING */
60 || nUtf32
== 0x202C /* POP DIRECTIONAL FORMATTING */
61 || nUtf32
== 0x202D /* LEFT-TO-RIGHT OVERRIDE */
62 || nUtf32
== 0x202E /* RIGHT-TO-LEFT OVERRIDE */
63 || nUtf32
== 0x206A /* INHIBIT SYMMETRIC SWAPPING */
64 || nUtf32
== 0x206B /* ACTIVATE SYMMETRIC SWAPPING */
65 || nUtf32
== 0x206C /* INHIBIT ARABIC FORM SHAPING */
66 || nUtf32
== 0x206D /* ACTIVATE ARABIC FORM SHAPING */
67 || nUtf32
== 0x206E /* NATIONAL DIGIT SHAPES */
68 || nUtf32
== 0x206F /* NOMINAL DIGIT SHAPES */
69 || nUtf32
== 0xFEFF /* ZERO WIDTH NO-BREAK SPACE */
70 || nUtf32
== 0xFFF9 /* INTERLINEAR ANNOTATION ANCHOR */
71 || nUtf32
== 0xFFFA /* INTERLINEAR ANNOTATION SEPARATOR */
72 || nUtf32
== 0xFFFB /* INTERLINEAR ANNOTATION TERMINATOR */
73 || nUtf32
== 0x1D173 /* MUSICAL SYMBOL BEGIN BEAM */
74 || nUtf32
== 0x1D174 /* MUSICAL SYMBOL END BEAM */
75 || nUtf32
== 0x1D175 /* MUSICAL SYMBOL BEGIN TIE */
76 || nUtf32
== 0x1D176 /* MUSICAL SYMBOL END TIE */
77 || nUtf32
== 0x1D177 /* MUSICAL SYMBOL BEGIN SLUR */
78 || nUtf32
== 0x1D178 /* MUSICAL SYMBOL END SLUR */
79 || nUtf32
== 0x1D179 /* MUSICAL SYMBOL BEGIN PHRASE */
80 || nUtf32
== 0x1D17A /* MUSICAL SYMBOL END PHRASE */
81 || nUtf32
== 0xE0001 /* LANGUAGE TAG */
82 || (nUtf32
>= 0xE0020 && nUtf32
<= 0xE007F);
85 int ImplIsHighSurrogate(sal_uInt32 nUtf32
)
87 /* All code points that are high-surrogates, as of Unicode 3.1.1. */
88 return nUtf32
>= 0xD800 && nUtf32
<= 0xDBFF;
91 int ImplIsLowSurrogate(sal_uInt32 nUtf32
)
93 /* All code points that are low-surrogates, as of Unicode 3.1.1. */
94 return nUtf32
>= 0xDC00 && nUtf32
<= 0xDFFF;
97 int ImplIsPrivateUse(sal_uInt32 nUtf32
)
99 /* All code points of <http://www.unicode.org/Public/UNIDATA/
100 UnicodeData.txt>, Version 3.1.1, that have a General Category of Co
101 (Other, Private Use).
103 return (nUtf32
>= 0xE000 && nUtf32
<= 0xF8FF)
104 || (nUtf32
>= 0xF0000 && nUtf32
<= 0xFFFFD)
105 || (nUtf32
>= 0x100000 && nUtf32
<= 0x10FFFD);
108 int ImplIsZeroWidth(sal_uInt32 nUtf32
)
110 /* All code points of <http://www.unicode.org/Public/UNIDATA/
111 UnicodeData.txt>, Version 3.1.1, that have "ZERO WIDTH" in their
114 return nUtf32
== 0x200B /* ZERO WIDTH SPACE */
115 || nUtf32
== 0x200C /* ZERO WIDTH NON-JOINER */
116 || nUtf32
== 0x200D /* ZERO WIDTH JOINER */
117 || nUtf32
== 0xFEFF; /* ZEOR WIDTH NO-BREAK SPACE */
120 sal_uInt32
ImplGetHighSurrogate(sal_uInt32 nUtf32
)
122 OSL_ENSURE(nUtf32
>= 0x10000, "specification violation");
123 return ((nUtf32
- 0x10000) >> 10) | 0xD800;
126 sal_uInt32
ImplGetLowSurrogate(sal_uInt32 nUtf32
)
128 OSL_ENSURE(nUtf32
>= 0x10000, "specification violation");
129 return ((nUtf32
- 0x10000) & 0x3FF) | 0xDC00;
132 sal_uInt32
ImplCombineSurrogates(sal_uInt32 nHigh
, sal_uInt32 nLow
)
134 OSL_ENSURE(ImplIsHighSurrogate(nHigh
) && ImplIsLowSurrogate(nLow
),
135 "specification violation");
136 return (((nHigh
& 0x3FF) << 10) | (nLow
& 0x3FF)) + 0x10000;
139 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */