share/man/man9f/u8_textprep_str.9f

   1 '\" te
   2 .\" Copyright (c) 2007, Sun Microsystems Inc. All Rights Reserved.
   3 .\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License").  You may not use this file except in compliance with the License.
   4 .\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing.  See the License for the specific language governing permissions and limitations under the License.
   5 .\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE.  If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
   6 .TH U8_TEXTPREP_STR 9F "Sep 18, 2007"
   7 .SH NAME
   8 u8_textprep_str \- string-based UTF-8 text preparation function
   9 .SH SYNOPSIS
  10 .LP
  11 .nf
  12 #include <sys/types.h>
  13 #include <sys/errno.h>
  14 #include <sys/sunddi.h>
  15
  16 \fBsize_t\fR \fBu8_textprep_str\fR(\fBchar *\fR\fIinarray\fR, \fBsize_t *\fR\fIinlen\fR,
  17      \fBchar *\fR\fIoutarray\fR, \fBsize_t *\fR\fIoutlen\fR, \fBint\fR \fIflag\fR,
  18      \fBsize_t\fR \fIunicode_version\fR, \fBint *\fR\fIerrno\fR);
  19 .fi
  20
  21 .SH INTERFACE LEVEL
  22 .sp
  23 .LP
  24 Solaris DDI specific (Solaris DDI)
  25 .SH PARAMETERS
  26 .sp
  27 .ne 2
  28 .na
  29 \fB\fIinarray\fR\fR
  30 .ad
  31 .RS 20n
  32 A pointer to a byte array containing a sequence of UTF-8 character bytes to be
  33 prepared.
  34 .RE
  35
  36 .sp
  37 .ne 2
  38 .na
  39 \fB\fIinlen\fR\fR
  40 .ad
  41 .RS 20n
  42 As input argument, the number of bytes to be prepared in \fIinarray\fR. As
  43 output argument, the number of bytes in \fIinarray\fR still not consumed.
  44 .RE
  45
  46 .sp
  47 .ne 2
  48 .na
  49 \fB\fIoutarray\fR\fR
  50 .ad
  51 .RS 20n
  52 A pointer to a byte array where prepared UTF-8 character bytes can be saved.
  53 .RE
  54
  55 .sp
  56 .ne 2
  57 .na
  58 \fB\fIoutlen\fR\fR
  59 .ad
  60 .RS 20n
  61 As input argument, the number of available bytes at \fIoutarray\fR where
  62 prepared character bytes can be saved.  As output argument, after the
  63 conversion, the number of bytes still available at \fIoutarray\fR.
  64 .RE
  65
  66 .sp
  67 .ne 2
  68 .na
  69 \fB\fIflag\fR\fR
  70 .ad
  71 .RS 20n
  72 The possible preparation options constructed by a bitwise-inclusive-OR of the
  73 following values:
  74 .sp
  75 .ne 2
  76 .na
  77 \fB\fBU8_TEXTPREP_IGNORE_NULL\fR\fR
  78 .ad
  79 .sp .6
  80 .RS 4n
  81 Normally \fBu8_textprep_str()\fR stops the preparation if it encounters null
  82 byte even if the current \fIinlen\fR is pointing to a value bigger than zero.
  83 .sp
  84 With this option, null byte does not stop the preparation and the preparation
  85 continues until \fIinlen\fR specified amount of \fIinarray\fR bytes are all
  86 consumed for preparation or an error happened.
  87 .RE
  88
  89 .sp
  90 .ne 2
  91 .na
  92 \fB\fBU8_TEXTPREP_IGNORE_INVALID\fR\fR
  93 .ad
  94 .sp .6
  95 .RS 4n
  96 Normally \fBu8_textprep_str()\fR stops the preparation if it encounters illegal
  97 or incomplete characters with corresponding \fIerrno\fR values.
  98 .sp
  99 When this option is set, \fBu8_textprep_str()\fR does not stop the preparation
 100 and instead treats such characters as no need to do any preparation.
 101 .RE
 102
 103 .sp
 104 .ne 2
 105 .na
 106 \fB\fBU8_TEXTPREP_TOUPPER\fR\fR
 107 .ad
 108 .sp .6
 109 .RS 4n
 110 Map lowercase characters to uppercase characters if applicable.
 111 .RE
 112
 113 .sp
 114 .ne 2
 115 .na
 116 \fB\fBU8_TEXTPREP_TOLOWER\fR\fR
 117 .ad
 118 .sp .6
 119 .RS 4n
 120 Map uppercase characters to lowercase characters if applicable.
 121 .RE
 122
 123 .sp
 124 .ne 2
 125 .na
 126 \fB\fBU8_TEXTPREP_NFD\fR\fR
 127 .ad
 128 .sp .6
 129 .RS 4n
 130 Apply Unicode Normalization Form D.
 131 .RE
 132
 133 .sp
 134 .ne 2
 135 .na
 136 \fB\fBU8_TEXTPREP_NFC\fR\fR
 137 .ad
 138 .sp .6
 139 .RS 4n
 140 Apply Unicode Normalization Form C.
 141 .RE
 142
 143 .sp
 144 .ne 2
 145 .na
 146 \fB\fBU8_TEXTPREP_NFKD\fR\fR
 147 .ad
 148 .sp .6
 149 .RS 4n
 150 Apply Unicode Normalization Form KD.
 151 .RE
 152
 153 .sp
 154 .ne 2
 155 .na
 156 \fB\fBU8_TEXTPREP_NFKC\fR\fR
 157 .ad
 158 .sp .6
 159 .RS 4n
 160 Apply Unicode Normalization Form KC.
 161 .RE
 162
 163 Only one case folding option is allowed. Only one Unicode Normalization option
 164 is allowed.
 165 .sp
 166 When a case folding option and a Unicode Normalization option are specified
 167 together, UTF-8 text preparation is done by doing case folding first and then
 168 Unicode Normalization.
 169 .sp
 170 If no option is specified, no processing occurs except the simple copying of
 171 bytes from input to output.
 172 .RE
 173
 174 .sp
 175 .ne 2
 176 .na
 177 \fB\fIunicode_version\fR\fR
 178 .ad
 179 .RS 20n
 180 The version of Unicode data that should be used during UTF-8 text preparation.
 181 The following values are supported:
 182 .sp
 183 .ne 2
 184 .na
 185 \fB\fBU8_UNICODE_320\fR\fR
 186 .ad
 187 .sp .6
 188 .RS 4n
 189 Use Unicode 3.2.0 data during comparison.
 190 .RE
 191
 192 .sp
 193 .ne 2
 194 .na
 195 \fB\fBU8_UNICODE_500\fR\fR
 196 .ad
 197 .sp .6
 198 .RS 4n
 199 Use Unicode 5.0.0 data during comparison.
 200 .RE
 201
 202 .sp
 203 .ne 2
 204 .na
 205 \fB\fBU8_UNICODE_LATEST\fR\fR
 206 .ad
 207 .sp .6
 208 .RS 4n
 209 Use the latest Unicode version data available which is Unicode 5.0.0 currently.
 210 .RE
 211
 212 .RE
 213
 214 .sp
 215 .ne 2
 216 .na
 217 \fB\fIerrno\fR\fR
 218 .ad
 219 .RS 20n
 220 The error value when preparation is not completed or fails. The following
 221 values are supported:
 222 .sp
 223 .ne 2
 224 .na
 225 \fB\fBE2BIG\fR\fR
 226 .ad
 227 .RS 10n
 228 Text preparation stopped due to lack of space in the output array.
 229 .RE
 230
 231 .sp
 232 .ne 2
 233 .na
 234 \fB\fBEBADF\fR\fR
 235 .ad
 236 .RS 10n
 237 Specified option values are conflicting and cannot be supported.
 238 .RE
 239
 240 .sp
 241 .ne 2
 242 .na
 243 \fB\fBEILSEQ\fR\fR
 244 .ad
 245 .RS 10n
 246 Text preparation stopped due to an input byte that does not belong to UTF-8.
 247 .RE
 248
 249 .sp
 250 .ne 2
 251 .na
 252 \fB\fBEINVAL\fR\fR
 253 .ad
 254 .RS 10n
 255 Text preparation stopped due to an incomplete UTF-8 character at the end of the
 256 input array.
 257 .RE
 258
 259 .sp
 260 .ne 2
 261 .na
 262 \fB\fBERANGE\fR\fR
 263 .ad
 264 .RS 10n
 265 The specified Unicode version value is not a supported version.
 266 .RE
 267
 268 .RE
 269
 270 .SH DESCRIPTION
 271 .sp
 272 .LP
 273 The \fBu8_textprep_str()\fR function prepares the sequence of UTF-8 characters
 274 in the array specified by \fIinarray\fR into a sequence of corresponding UTF-8
 275 characters prepared in the array specified by \fIoutarray\fR. The \fIinarray\fR
 276 argument points to a character byte array to the first character in the input
 277 array and \fIinlen\fR indicates the number of bytes to the end of the array to
 278 be converted. The \fIoutarray\fR argument points to a character byte array to
 279 the first available byte in the output array and \fIoutlen\fR indicates the
 280 number of the available bytes to the end of the array. Unless \fIflag\fR is
 281 \fBU8_TEXTPREP_IGNORE_NULL\fR, \fBu8_textprep_str()\fR normally stops when it
 282 encounters a null byte from the input array regardless of the current
 283 \fIinlen\fR value.
 284 .sp
 285 .LP
 286 If \fIflag\fR is \fBU8_TEXTPREP_IGNORE_INVALID\fR and a sequence of input bytes
 287 does not form a valid UTF-8 character, preparation stops after the previous
 288 successfully prepared character. If \fIflag\fR is
 289 \fBU8_TEXTPREP_IGNORE_INVALID\fR and the input array ends with an incomplete
 290 UTF-8 character, preparation stops after the previous successfully prepared
 291 bytes. If the output array is not large enough to hold the entire prepared
 292 text, preparation stops just prior to the input bytes that would cause the
 293 output array to overflow. The value pointed to by \fIinlen\fR is decremented to
 294 reflect the number of bytes still not prepared in the input array. The value
 295 pointed to by \fIoutlen\fR is decremented to reflect the number of bytes still
 296 available in the output array.
 297 .SH RETURN VALUES
 298 .sp
 299 .LP
 300 The \fBu8_textprep_str()\fR function updates the values pointed to by
 301 \fIinlen\fR and \fIoutlen\fR arguments to reflect the extent of the
 302 preparation. When \fBU8_TEXTPREP_IGNORE_INVALID\fR is specified,
 303 \fBu8_textprep_str()\fR returns the number of illegal or incomplete characters
 304 found during the text preparation. When \fBU8_TEXTPREP_IGNORE_INVALID\fR is not
 305 specified and the text preparation is successful, the function returns 0. If
 306 the entire string in the input array is prepared, the value pointed to by
 307 \fIinlen\fR will be 0. If the text preparation is stopped due to any conditions
 308 mentioned above, the value pointed to by \fIinlen\fR will be non-zero and
 309 \fIerrno\fR is set to indicate the error. If such and any other error occurs,
 310 \fBu8_textprep_str()\fR returns (\fBsize_t\fR)-1 and sets \fIerrno\fR to
 311 indicate the error.
 312 .SH CONTEXT
 313 .sp
 314 .LP
 315 The \fBu8_textprep_str()\fR function can be called from user or interrupt
 316 context.
 317 .SH EXAMPLES
 318 .LP
 319 \fBExample 1 \fRSimple UTF-8 text preparation
 320 .sp
 321 .in +2
 322 .nf
 323 #include <sys/types.h>
 324 #include <sys/errno.h>
 325 #include <sys/sunddi.h>
 326 \&.
 327 \&.
 328 \&.
 329 size_t ret;
 330 char ib[MAXPATHLEN];
 331 char ob[MAXPATHLEN];
 332 size_t il, ol;
 333 int err;
 334 \&.
 335 \&.
 336 \&.
 337 /*
 338  * We got a UTF-8 pathname from somewhere.
 339  *
 340  * Calculate the length of input string including the terminating
 341  * NULL byte and prepare other arguments.
 342  */
 343 (void) strlcpy(ib, pathname, MAXPATHLEN);
 344 il = strlen(ib) + 1;
 345 ol = MAXPATHLEN;
 346
 347 /*
 348  * Do toupper case folding, apply Unicode Normalization Form D,
 349  * ignore NULL byte, and ignore any illegal/incomplete characters.
 350  */
 351 ret = u8_textprep_str(ib, &il, ob, &ol,
 352     (U8_TEXTPREP_IGNORE_NULL|U8_TEXTPREP_IGNORE_INVALID|
 353     U8_TEXTPREP_TOUPPER|U8_TEXTPREP_NFD), U8_UNICODE_LATEST, &err);
 354 if (ret == (size_t)-1) {
 355     if (err == E2BIG)
 356         return (-1);
 357     if (err == EBADF)
 358         return (-2);
 359     if (err == ERANGE)
 360         return (-3);
 361     return (-4);
 362 }
 363 .fi
 364 .in -2
 365
 366 .SH ATTRIBUTES
 367 .sp
 368 .LP
 369 See \fBattributes\fR(5) for descriptions of the following attributes:
 370 .sp
 371
 372 .sp
 373 .TS
 374 box;
 375 c | c
 376 l | l .
 377 ATTRIBUTE TYPE  ATTRIBUTE VALUE
 378 _
 379 Interface Stability     Committed
 380 .TE
 381
 382 .SH SEE ALSO
 383 .sp
 384 .LP
 385 \fBu8_strcmp\fR(3C), \fBu8_textprep_str\fR(3C), \fBu8_validate\fR(3C),
 386 \fBattributes\fR(5), \fBu8_strcmp\fR(9F), \fBu8_validate\fR(9F),
 387 \fBuconv_u16tou32\fR(9F)
 388 .sp
 389 .LP
 390 The Unicode Standard (http://www.unicode.org)