Bug 1854550 - pt 10. Allow LOG() with zero extra arguments r=glandium
[gecko.git] / accessible / atk / DOMtoATK.h
blob322358bc6e7b13cfbf24c3f3c912bd4d2162695d
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 #include <glib.h>
8 #include <cstdint>
9 #include "mozilla/a11y/HyperTextAccessibleBase.h"
10 #include "nsCharTraits.h"
11 #include "nsString.h"
13 /**
14 * ATK offsets are counted in unicode codepoints, while DOM offsets are counted
15 * in UTF-16 code units. That makes a difference for non-BMP characters,
16 * which need two UTF-16 code units to be represented (a pair of surrogates),
17 * while they are just one unicode character.
19 * To keep synchronization between ATK offsets (unicode codepoints) and DOM
20 * offsets (UTF-16 code units), after translation from UTF-16 to UTF-8 we add a
21 * BOM after each non-BMP character (which would otherwise use 2 UTF-16
22 * code units for only 1 unicode codepoint).
24 * BOMs (Byte Order Marks, U+FEFF, also known as ZERO WIDTH NO-BREAK SPACE, but
25 * that usage is deprecated) normally only appear at the beginning of unicode
26 * files, but their occurrence within text (notably after cut&paste) is not
27 * uncommon, and are thus considered as non-text.
29 * Since the selection requested through ATK may not contain both surrogates
30 * at the ends of the selection, we need to fetch one UTF-16 code point more
31 * on both side, and get rid of it before returning the string to ATK. The
32 * ATKStringConverterHelper class maintains this, NewATKString should be used
33 * to call it properly.
35 * In the end,
36 * - if the start is between the high and low surrogates, the UTF-8 result
37 * includes a BOM from it but not the character
38 * - if the end is between the high and low surrogates, the UTF-8 result
39 * includes the character but *not* the BOM
40 * - all non-BMP characters that are fully in the string are in the UTF-8 result
41 * as character followed by BOM
43 namespace mozilla {
44 namespace a11y {
46 namespace DOMtoATK {
48 /**
49 * Converts a string of accessible text into ATK gchar* string (by adding
50 * BOMs). This can be used when offsets do not need to be adjusted because
51 * ends of the string can not fall between surrogates.
53 gchar* Convert(const nsAString& aStr);
55 /**
56 * Add a BOM after each non-BMP character.
58 void AddBOMs(nsACString& aDest, const nsACString& aSource);
60 class ATKStringConverterHelper {
61 public:
62 ATKStringConverterHelper(void)
64 #ifdef DEBUG
65 mAdjusted(false),
66 #endif
67 mStartShifted(false),
68 mEndShifted(false) {
71 /**
72 * In order to properly get non-BMP values, offsets need to be changed
73 * to get one character more on each end, so that ConvertUTF16toUTF8 can
74 * convert surrogates even if the originally requested offsets fall between
75 * them.
77 void AdjustOffsets(gint* aStartOffset, gint* aEndOffset, gint count);
79 /**
80 * Converts a string of accessible text with adjusted offsets into ATK
81 * gchar* string (by adding BOMs). Note, AdjustOffsets has to be called
82 * before getting the text passed to this.
84 gchar* ConvertAdjusted(const nsAString& aStr);
86 private:
87 /**
88 * Remove the additional characters requested by PrepareUTF16toUTF8.
90 gchar* FinishUTF16toUTF8(nsCString& aStr);
92 #ifdef DEBUG
93 bool mAdjusted;
94 #endif
95 bool mStartShifted;
96 bool mEndShifted;
99 /**
100 * Get text from aAccessible, using ATKStringConverterHelper to properly
101 * introduce appropriate BOMs.
103 inline gchar* NewATKString(HyperTextAccessibleBase* aAccessible,
104 gint aStartOffset, gint aEndOffset) {
105 gint startOffset = aStartOffset, endOffset = aEndOffset;
106 ATKStringConverterHelper converter;
107 converter.AdjustOffsets(&startOffset, &endOffset,
108 gint(aAccessible->CharacterCount()));
109 nsAutoString str;
110 aAccessible->TextSubstring(startOffset, endOffset, str);
112 if (str.Length() == 0) {
113 // Bogus offsets, or empty string, either way we do not need conversion.
114 return g_strdup("");
117 return converter.ConvertAdjusted(str);
121 * Get a character from aAccessible, fetching more data as appropriate to
122 * properly get non-BMP characters or a BOM as appropriate.
124 inline gunichar ATKCharacter(HyperTextAccessibleBase* aAccessible,
125 gint aOffset) {
126 // char16_t is unsigned short in Mozilla, gnuichar is guint32 in glib.
127 gunichar character = static_cast<gunichar>(aAccessible->CharAt(aOffset));
129 if (NS_IS_LOW_SURROGATE(character)) {
130 // Trailing surrogate, return BOM instead.
131 return 0xFEFF;
134 if (NS_IS_HIGH_SURROGATE(character)) {
135 // Heading surrogate, get the trailing surrogate and combine them.
136 gunichar characterLow =
137 static_cast<gunichar>(aAccessible->CharAt(aOffset + 1));
139 if (!NS_IS_LOW_SURROGATE(characterLow)) {
140 // It should have been a trailing surrogate... Flag the error.
141 return 0xFFFD;
143 return SURROGATE_TO_UCS4(character, characterLow);
146 return character;
149 } // namespace DOMtoATK
151 } // namespace a11y
152 } // namespace mozilla