intl/unicharutil/nsUnicodeNormalizer.cpp

   1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
   2
   3 /* This file is modified from JPNIC's mDNKit, it is under both MPL and
   4  * JPNIC's license.
   5  */
   6
   7 /* This Source Code Form is subject to the terms of the Mozilla Public
   8  * License, v. 2.0. If a copy of the MPL was not distributed with this
   9  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
  10
  11 /*
  12  * Copyright (c) 2000,2002 Japan Network Information Center.
  13  * All rights reserved.
  14  *
  15  * By using this file, you agree to the terms and conditions set forth bellow.
  16  *
  17  *                      LICENSE TERMS AND CONDITIONS
  18  *
  19  * The following License Terms and Conditions apply, unless a different
  20  * license is obtained from Japan Network Information Center ("JPNIC"),
  21  * a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda,
  22  * Chiyoda-ku, Tokyo 101-0047, Japan.
  23  *
  24  * 1. Use, Modification and Redistribution (including distribution of any
  25  *    modified or derived work) in source and/or binary forms is permitted
  26  *    under this License Terms and Conditions.
  27  *
  28  * 2. Redistribution of source code must retain the copyright notices as they
  29  *    appear in each source code file, this License Terms and Conditions.
  30  *
  31  * 3. Redistribution in binary form must reproduce the Copyright Notice,
  32  *    this License Terms and Conditions, in the documentation and/or other
  33  *    materials provided with the distribution.  For the purposes of binary
  34  *    distribution the "Copyright Notice" refers to the following language:
  35  *    "Copyright (c) 2000-2002 Japan Network Information Center.  All rights reserved."
  36  *
  37  * 4. The name of JPNIC may not be used to endorse or promote products
  38  *    derived from this Software without specific prior written approval of
  39  *    JPNIC.
  40  *
  41  * 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC
  42  *    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  43  *    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
  44  *    PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL JPNIC BE LIABLE
  45  *    FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  46  *    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  47  *    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  48  *    BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  49  *    WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  50  *    OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  51  *    ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
  52  */
  53
  54 #include <string.h>
  55
  56 #include "nsMemory.h"
  57 #include "nsUnicodeNormalizer.h"
  58 #include "nsString.h"
  59
  60 NS_IMPL_ISUPPORTS(nsUnicodeNormalizer, nsIUnicodeNormalizer)
  61
  62
  63 nsUnicodeNormalizer::nsUnicodeNormalizer()
  64 {
  65 }
  66
  67 nsUnicodeNormalizer::~nsUnicodeNormalizer()
  68 {
  69 }
  70
  71
  72
  73 #define END_BIT         0x80000000
  74
  75
  76 /*
  77  * Some constants for Hangul decomposition/composition.
  78  * These things were taken from unicode book.
  79  */
  80 #define SBase           0xac00
  81 #define LBase           0x1100
  82 #define VBase           0x1161
  83 #define TBase           0x11a7
  84 #define LCount          19
  85 #define VCount          21
  86 #define TCount          28
  87 #define SLast           (SBase + LCount * VCount * TCount)
  88
  89 struct composition {
  90         uint32_t c2;    /* 2nd character */
  91         uint32_t comp;  /* composed character */
  92 };
  93
  94
  95 #include "normalization_data.h"
  96
  97 /*
  98  * Macro for multi-level index table.
  99  */
 100 #define LOOKUPTBL(vprefix, mprefix, v) \
 101         DMAP(vprefix)[\
 102                 IMAP(vprefix)[\
 103                         IMAP(vprefix)[IDX0(mprefix, v)] + IDX1(mprefix, v)\
 104                 ]\
 105         ].tbl[IDX2(mprefix, v)]
 106
 107 #define IDX0(mprefix, v) IDX_0(v, BITS1(mprefix), BITS2(mprefix))
 108 #define IDX1(mprefix, v) IDX_1(v, BITS1(mprefix), BITS2(mprefix))
 109 #define IDX2(mprefix, v) IDX_2(v, BITS1(mprefix), BITS2(mprefix))
 110
 111 #define IDX_0(v, bits1, bits2)  ((v) >> ((bits1) + (bits2)))
 112 #define IDX_1(v, bits1, bits2)  (((v) >> (bits2)) & ((1 << (bits1)) - 1))
 113 #define IDX_2(v, bits1, bits2)  ((v) & ((1 << (bits2)) - 1))
 114
 115 #define BITS1(mprefix)  mprefix ## _BITS_1
 116 #define BITS2(mprefix)  mprefix ## _BITS_2
 117
 118 #define IMAP(vprefix)   vprefix ## _imap
 119 #define DMAP(vprefix)   vprefix ## _table
 120 #define SEQ(vprefix)    vprefix ## _seq
 121
 122 static int32_t
 123 canonclass(uint32_t c) {
 124         /* Look up canonicalclass table. */
 125         return (LOOKUPTBL(canon_class, CANON_CLASS, c));
 126 }
 127
 128 static int32_t
 129 decompose_char(uint32_t c, const uint32_t **seqp)
 130 {
 131         /* Look up decomposition table. */
 132         int32_t seqidx = LOOKUPTBL(decompose, DECOMP, c);
 133         *seqp = SEQ(decompose) + (seqidx & ~DECOMP_COMPAT);
 134         return (seqidx);
 135 }
 136
 137 static int32_t
 138 compose_char(uint32_t c,
 139                                 const struct composition **compp)
 140 {
 141         /* Look up composition table. */
 142         int32_t seqidx = LOOKUPTBL(compose, CANON_COMPOSE, c);
 143         *compp = SEQ(compose) + (seqidx & 0xffff);
 144         return (seqidx >> 16);
 145 }
 146
 147 static nsresult
 148 mdn__unicode_decompose(int32_t compat, uint32_t *v, size_t vlen,
 149                        uint32_t c, int32_t *decomp_lenp)
 150 {
 151         uint32_t *vorg = v;
 152         int32_t seqidx;
 153         const uint32_t *seq;
 154
 155         //assert(v != nullptr && vlen >= 0 && decomp_lenp != nullptr);
 156
 157         /*
 158          * First, check for Hangul.
 159          */
 160         if (SBase <= c && c < SLast) {
 161                 int32_t idx, t_offset, v_offset, l_offset;
 162
 163                 idx = c - SBase;
 164                 t_offset = idx % TCount;
 165                 idx /= TCount;
 166                 v_offset = idx % VCount;
 167                 l_offset = idx / VCount;
 168                 if ((t_offset == 0 && vlen < 2) || (t_offset > 0 && vlen < 3))
 169                         return (NS_ERROR_UNORM_MOREOUTPUT);
 170                 *v++ = LBase + l_offset;
 171                 *v++ = VBase + v_offset;
 172                 if (t_offset > 0)
 173                         *v++ = TBase + t_offset;
 174                 *decomp_lenp = v - vorg;
 175                 return (NS_OK);
 176         }
 177
 178         /*
 179          * Look up decomposition table.  If no decomposition is defined
 180          * or if it is a compatibility decomosition when canonical
 181          * decomposition requested, return 'NS_SUCCESS_UNORM_NOTFOUND'.
 182          */
 183         seqidx = decompose_char(c, &seq);
 184         if (seqidx == 0 || (compat == 0 && (seqidx & DECOMP_COMPAT) != 0))
 185                 return (NS_SUCCESS_UNORM_NOTFOUND);
 186
 187         /*
 188          * Copy the decomposed sequence.  The end of the sequence are
 189          * marked with END_BIT.
 190          */
 191         do {
 192                 uint32_t c;
 193                 int32_t dlen;
 194                 nsresult r;
 195
 196                 c = *seq & ~END_BIT;
 197
 198                 /* Decompose recursively. */
 199                 r = mdn__unicode_decompose(compat, v, vlen, c, &dlen);
 200                 if (r == NS_OK) {
 201                         v += dlen;
 202                         vlen -= dlen;
 203                 } else if (r == NS_SUCCESS_UNORM_NOTFOUND) {
 204                         if (vlen < 1)
 205                                 return (NS_ERROR_UNORM_MOREOUTPUT);
 206                         *v++ = c;
 207                         vlen--;
 208                 } else {
 209                         return (r);
 210                 }
 211
 212         } while ((*seq++ & END_BIT) == 0);
 213
 214         *decomp_lenp = v - vorg;
 215
 216         return (NS_OK);
 217 }
 218
 219 static int32_t
 220 mdn__unicode_iscompositecandidate(uint32_t c)
 221 {
 222         const struct composition *dummy;
 223
 224         /* Check for Hangul */
 225         if ((LBase <= c && c < LBase + LCount) || (SBase <= c && c < SLast))
 226                 return (1);
 227
 228         /*
 229          * Look up composition table.  If there are no composition
 230          * that begins with the given character, it is not a
 231          * composition candidate.
 232          */
 233         if (compose_char(c, &dummy) == 0)
 234                 return (0);
 235         else
 236                 return (1);
 237 }
 238
 239 static nsresult
 240 mdn__unicode_compose(uint32_t c1, uint32_t c2, uint32_t *compp)
 241 {
 242         int32_t n;
 243         int32_t lo, hi;
 244         const struct composition *cseq;
 245
 246         //assert(compp != nullptr);
 247
 248         /*
 249          * Check for Hangul.
 250          */
 251         if (LBase <= c1 && c1 < LBase + LCount &&
 252             VBase <= c2 && c2 < VBase + VCount) {
 253                 /*
 254                  * Hangul L and V.
 255                  */
 256                 *compp = SBase +
 257                         ((c1 - LBase) * VCount + (c2 - VBase)) * TCount;
 258                 return (NS_OK);
 259         } else if (SBase <= c1 && c1 < SLast &&
 260                    TBase <= c2 && c2 < TBase + TCount &&
 261                    (c1 - SBase) % TCount == 0) {
 262                 /*
 263                  * Hangul LV and T.
 264                  */
 265                 *compp = c1 + (c2 - TBase);
 266                 return (NS_OK);
 267         }
 268
 269         /*
 270          * Look up composition table.  If the result is 0, no composition
 271          * is defined.  Otherwise, upper 16bits of the result contains
 272          * the number of composition that begins with 'c1', and the lower
 273          * 16bits is the offset in 'compose_seq'.
 274          */
 275         if ((n = compose_char(c1, &cseq)) == 0)
 276                 return (NS_SUCCESS_UNORM_NOTFOUND);
 277
 278         /*
 279          * The composite sequences are sorted by the 2nd character 'c2'.
 280          * So we can use binary search.
 281          */
 282         lo = 0;
 283         hi = n - 1;
 284         while (lo <= hi) {
 285                 int32_t mid = (lo + hi) / 2;
 286
 287                 if (cseq[mid].c2 < c2) {
 288                         lo = mid + 1;
 289                 } else if (cseq[mid].c2 > c2) {
 290                         hi = mid - 1;
 291                 } else {
 292                         *compp = cseq[mid].comp;
 293                         return (NS_OK);
 294                 }
 295         }
 296         return (NS_SUCCESS_UNORM_NOTFOUND);
 297 }
 298
 299
 300 #define WORKBUF_SIZE            128
 301 #define WORKBUF_SIZE_MAX        10000
 302
 303 typedef struct {
 304         int32_t cur;            /* pointing now processing character */
 305         int32_t last;           /* pointing just after the last character */
 306         int32_t size;           /* size of UCS and CLASS array */
 307         uint32_t *ucs;  /* UCS-4 characters */
 308         int32_t *cclass;                /* and their canonical classes */
 309         uint32_t ucs_buf[WORKBUF_SIZE]; /* local buffer */
 310         int32_t class_buf[WORKBUF_SIZE];                /* ditto */
 311 } workbuf_t;
 312
 313 static nsresult decompose(workbuf_t *wb, uint32_t c, int32_t compat);
 314 static void             get_class(workbuf_t *wb);
 315 static void             reorder(workbuf_t *wb);
 316 static void             compose(workbuf_t *wb);
 317 static nsresult flush_before_cur(workbuf_t *wb, nsAString& aToStr);
 318 static void             workbuf_init(workbuf_t *wb);
 319 static void             workbuf_free(workbuf_t *wb);
 320 static nsresult workbuf_extend(workbuf_t *wb);
 321 static nsresult workbuf_append(workbuf_t *wb, uint32_t c);
 322 static void             workbuf_shift(workbuf_t *wb, int32_t shift);
 323 static void             workbuf_removevoid(workbuf_t *wb);
 324
 325
 326 static nsresult
 327 mdn_normalize(bool do_composition, bool compat,
 328           const nsAString& aSrcStr, nsAString& aToStr)
 329 {
 330         workbuf_t wb;
 331         nsresult r = NS_OK;
 332         /*
 333          * Initialize working buffer.
 334          */
 335         workbuf_init(&wb);
 336
 337         nsAString::const_iterator start, end;
 338         aSrcStr.BeginReading(start);
 339         aSrcStr.EndReading(end);
 340
 341         while (start != end) {
 342                 uint32_t c;
 343                 char16_t curChar;
 344
 345                 //assert(wb.cur == wb.last);
 346
 347                 /*
 348                  * Get one character from 'from'.
 349                  */
 350                 curChar= *start++;
 351
 352                 if (NS_IS_HIGH_SURROGATE(curChar) && start != end && NS_IS_LOW_SURROGATE(*(start)) ) {
 353                         c = SURROGATE_TO_UCS4(curChar, *start);
 354                         ++start;
 355                 } else {
 356                         c = curChar;
 357                 }
 358
 359                 /*
 360                  * Decompose it.
 361                  */
 362                 if ((r = decompose(&wb, c, compat)) != NS_OK)
 363                         break;
 364
 365                 /*
 366                  * Get canonical class.
 367                  */
 368                 get_class(&wb);
 369
 370                 /*
 371                  * Reorder & compose.
 372                  */
 373                 for (; wb.cur < wb.last; wb.cur++) {
 374                         if (wb.cur == 0) {
 375                                 continue;
 376                         } else if (wb.cclass[wb.cur] > 0) {
 377                                 /*
 378                                  * This is not a starter. Try reordering.
 379                                  * Note that characters up to it are
 380                                  * already in canonical order.
 381                                  */
 382                                 reorder(&wb);
 383                                 continue;
 384                         }
 385
 386                         /*
 387                          * This is a starter character, and there are
 388                          * some characters before it.  Those characters
 389                          * have been reordered properly, and
 390                          * ready for composition.
 391                          */
 392                         if (do_composition && wb.cclass[0] == 0)
 393                                 compose(&wb);
 394
 395                         /*
 396                          * If CUR points to a starter character,
 397                          * then process of characters before CUR are
 398                          * already finished, because any further
 399                          * reordering/composition for them are blocked
 400                          * by the starter CUR points.
 401                          */
 402                         if (wb.cur > 0 && wb.cclass[wb.cur] == 0) {
 403                                 /* Flush everything before CUR. */
 404                                 r = flush_before_cur(&wb, aToStr);
 405                                 if (r != NS_OK)
 406                                         break;
 407                         }
 408                 }
 409         }
 410
 411         if (r == NS_OK) {
 412                 if (do_composition && wb.cur > 0 && wb.cclass[0] == 0) {
 413                         /*
 414                          * There is some characters left in WB.
 415                          * They are ordered, but not composed yet.
 416                          * Now CUR points just after the last character in WB,
 417                          * and since compose() tries to compose characters
 418                          * between top and CUR inclusive, we must make CUR
 419                          * one character back during compose().
 420                          */
 421                         wb.cur--;
 422                         compose(&wb);
 423                         wb.cur++;
 424                 }
 425                 /*
 426                  * Call this even when WB.CUR == 0, to make TO
 427                  * NUL-terminated.
 428                  */
 429                 r = flush_before_cur(&wb, aToStr);
 430         }
 431
 432         workbuf_free(&wb);
 433
 434         return (r);
 435 }
 436
 437 static nsresult
 438 decompose(workbuf_t *wb, uint32_t c, int32_t compat) {
 439         nsresult r;
 440         int32_t dec_len;
 441
 442 again:
 443         r = mdn__unicode_decompose(compat, wb->ucs + wb->last,
 444                                    wb->size - wb->last, c, &dec_len);
 445         switch (r) {
 446         case NS_OK:
 447                 wb->last += dec_len;
 448                 return (NS_OK);
 449         case NS_SUCCESS_UNORM_NOTFOUND:
 450                 return (workbuf_append(wb, c));
 451         case NS_ERROR_UNORM_MOREOUTPUT:
 452                 if ((r = workbuf_extend(wb)) != NS_OK)
 453                         return (r);
 454                 if (wb->size > WORKBUF_SIZE_MAX) {
 455                         // "mdn__unormalize_form*: " "working buffer too large\n"
 456                         return (NS_ERROR_FAILURE);
 457                 }
 458                 goto again;
 459         default:
 460                 return (r);
 461         }
 462         /* NOTREACHED */
 463 }
 464
 465 static void
 466 get_class(workbuf_t *wb) {
 467         int32_t i;
 468
 469         for (i = wb->cur; i < wb->last; i++)
 470                 wb->cclass[i] = canonclass(wb->ucs[i]);
 471 }
 472
 473 static void
 474 reorder(workbuf_t *wb) {
 475         uint32_t c;
 476         int32_t i;
 477         int32_t cclass;
 478
 479         //assert(wb != nullptr);
 480
 481         i = wb->cur;
 482         c = wb->ucs[i];
 483         cclass = wb->cclass[i];
 484
 485         while (i > 0 && wb->cclass[i - 1] > cclass) {
 486                 wb->ucs[i] = wb->ucs[i - 1];
 487                 wb->cclass[i] =wb->cclass[i - 1];
 488                 i--;
 489                 wb->ucs[i] = c;
 490                 wb->cclass[i] = cclass;
 491         }
 492 }
 493
 494 static void
 495 compose(workbuf_t *wb) {
 496         int32_t cur;
 497         uint32_t *ucs;
 498         int32_t *cclass;
 499         int32_t last_class;
 500         int32_t nvoids;
 501         int32_t i;
 502
 503         //assert(wb != nullptr && wb->cclass[0] == 0);
 504
 505         cur = wb->cur;
 506         ucs = wb->ucs;
 507         cclass = wb->cclass;
 508
 509         /*
 510          * If there are no decomposition sequence that begins with
 511          * the top character, composition is impossible.
 512          */
 513         if (!mdn__unicode_iscompositecandidate(ucs[0]))
 514                 return;
 515
 516         last_class = 0;
 517         nvoids = 0;
 518         for (i = 1; i <= cur; i++) {
 519                 uint32_t c;
 520                 int32_t cl = cclass[i];
 521
 522                 if ((last_class < cl || cl == 0) &&
 523                     mdn__unicode_compose(ucs[0], ucs[i],
 524                                          &c) == NS_OK) {
 525                         /*
 526                          * Replace the top character with the composed one.
 527                          */
 528                         ucs[0] = c;
 529                         cclass[0] = canonclass(c);
 530
 531                         cclass[i] = -1; /* void this character */
 532                         nvoids++;
 533                 } else {
 534                         last_class = cl;
 535                 }
 536         }
 537
 538         /* Purge void characters, if any. */
 539         if (nvoids > 0)
 540                 workbuf_removevoid(wb);
 541 }
 542
 543 static nsresult
 544 flush_before_cur(workbuf_t *wb, nsAString& aToStr)
 545 {
 546         int32_t i;
 547
 548         for (i = 0; i < wb->cur; i++) {
 549                 if (!IS_IN_BMP(wb->ucs[i])) {
 550                         aToStr.Append((char16_t)H_SURROGATE(wb->ucs[i]));
 551                         aToStr.Append((char16_t)L_SURROGATE(wb->ucs[i]));
 552                 } else {
 553                         aToStr.Append((char16_t)(wb->ucs[i]));
 554                 }
 555         }
 556
 557         workbuf_shift(wb, wb->cur);
 558
 559         return (NS_OK);
 560 }
 561
 562 static void
 563 workbuf_init(workbuf_t *wb) {
 564         wb->cur = 0;
 565         wb->last = 0;
 566         wb->size = WORKBUF_SIZE;
 567         wb->ucs = wb->ucs_buf;
 568         wb->cclass = wb->class_buf;
 569 }
 570
 571 static void
 572 workbuf_free(workbuf_t *wb) {
 573         if (wb->ucs != wb->ucs_buf) {
 574                 nsMemory::Free(wb->ucs);
 575                 nsMemory::Free(wb->cclass);
 576         }
 577 }
 578
 579 static nsresult
 580 workbuf_extend(workbuf_t *wb) {
 581         int32_t newsize = wb->size * 3;
 582
 583         if (wb->ucs == wb->ucs_buf) {
 584                 wb->ucs = (uint32_t*)nsMemory::Alloc(sizeof(wb->ucs[0]) * newsize);
 585                 if (!wb->ucs)
 586                         return NS_ERROR_OUT_OF_MEMORY;
 587                 wb->cclass = (int32_t*)nsMemory::Alloc(sizeof(wb->cclass[0]) * newsize);
 588                 if (!wb->cclass) {
 589                         nsMemory::Free(wb->ucs);
 590                         wb->ucs = nullptr;
 591                         return NS_ERROR_OUT_OF_MEMORY;
 592                 }
 593         } else {
 594                 void* buf = nsMemory::Realloc(wb->ucs, sizeof(wb->ucs[0]) * newsize);
 595                 if (!buf)
 596                         return NS_ERROR_OUT_OF_MEMORY;
 597                 wb->ucs = (uint32_t*)buf;
 598                 buf = nsMemory::Realloc(wb->cclass, sizeof(wb->cclass[0]) * newsize);
 599                 if (!buf)
 600                         return NS_ERROR_OUT_OF_MEMORY;
 601                 wb->cclass = (int32_t*)buf;
 602         }
 603         return (NS_OK);
 604 }
 605
 606 static nsresult
 607 workbuf_append(workbuf_t *wb, uint32_t c) {
 608         nsresult r;
 609
 610         if (wb->last >= wb->size && (r = workbuf_extend(wb)) != NS_OK)
 611                 return (r);
 612         wb->ucs[wb->last++] = c;
 613         return (NS_OK);
 614 }
 615
 616 static void
 617 workbuf_shift(workbuf_t *wb, int32_t shift) {
 618         int32_t nmove;
 619
 620         //assert(wb != nullptr && wb->cur >= shift);
 621
 622         nmove = wb->last - shift;
 623         memmove(&wb->ucs[0], &wb->ucs[shift],
 624                       nmove * sizeof(wb->ucs[0]));
 625         memmove(&wb->cclass[0], &wb->cclass[shift],
 626                       nmove * sizeof(wb->cclass[0]));
 627         wb->cur -= shift;
 628         wb->last -= shift;
 629 }
 630
 631 static void
 632 workbuf_removevoid(workbuf_t *wb) {
 633         int32_t i, j;
 634         int32_t last = wb->last;
 635
 636         for (i = j = 0; i < last; i++) {
 637                 if (wb->cclass[i] >= 0) {
 638                         if (j < i) {
 639                                 wb->ucs[j] = wb->ucs[i];
 640                                 wb->cclass[j] = wb->cclass[i];
 641                         }
 642                         j++;
 643                 }
 644         }
 645         wb->cur -= last - j;
 646         wb->last = j;
 647 }
 648
 649 nsresult
 650 nsUnicodeNormalizer::NormalizeUnicodeNFD( const nsAString& aSrc, nsAString& aDest)
 651 {
 652   return mdn_normalize(false, false, aSrc, aDest);
 653 }
 654
 655 nsresult
 656 nsUnicodeNormalizer::NormalizeUnicodeNFC( const nsAString& aSrc, nsAString& aDest)
 657 {
 658   return mdn_normalize(true, false, aSrc, aDest);
 659 }
 660
 661 nsresult
 662 nsUnicodeNormalizer::NormalizeUnicodeNFKD( const nsAString& aSrc, nsAString& aDest)
 663 {
 664   return mdn_normalize(false, true, aSrc, aDest);
 665 }
 666
 667 nsresult
 668 nsUnicodeNormalizer::NormalizeUnicodeNFKC( const nsAString& aSrc, nsAString& aDest)
 669 {
 670   return mdn_normalize(true, true, aSrc, aDest);
 671 }
 672
 673 bool
 674 nsUnicodeNormalizer::Compose(uint32_t a, uint32_t b, uint32_t *ab)
 675 {
 676   return mdn__unicode_compose(a, b, ab) == NS_OK;
 677 }
 678
 679 bool
 680 nsUnicodeNormalizer::DecomposeNonRecursively(uint32_t c, uint32_t *c1, uint32_t *c2)
 681 {
 682   // We can't use mdn__unicode_decompose here, because that does a recursive
 683   // decomposition that may yield more than two characters, but the harfbuzz
 684   // callback wants just a single-step decomp that is guaranteed to produce
 685   // no more than two characters. So we do a low-level lookup in the table
 686   // of decomp sequences.
 687   const uint32_t *seq;
 688   uint32_t seqidx = decompose_char(c, &seq);
 689   if (seqidx == 0 || ((seqidx & DECOMP_COMPAT) != 0)) {
 690     return false;
 691   }
 692   *c1 = *seq & ~END_BIT;
 693   if (*seq & END_BIT) {
 694     *c2 = 0;
 695   } else {
 696     *c2 = *++seq & ~END_BIT;
 697   }
 698   return true;
 699 }