1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 #include "nsSemanticUnitScanner.h"
8 NS_IMPL_ISUPPORTS_INHERITED(nsSemanticUnitScanner
, nsSampleWordBreaker
, nsISemanticUnitScanner
)
10 nsSemanticUnitScanner::nsSemanticUnitScanner() : nsSampleWordBreaker()
12 /* member initializers and constructor code */
15 nsSemanticUnitScanner::~nsSemanticUnitScanner()
21 /* void start (in string characterSet); */
22 NS_IMETHODIMP
nsSemanticUnitScanner::Start(const char *characterSet
)
24 // do nothing for now.
28 /* void next (in wstring text, in long length, in long pos, out boolean hasMoreUnits, out long begin, out long end); */
29 NS_IMETHODIMP
nsSemanticUnitScanner::Next(const char16_t
*text
, int32_t length
, int32_t pos
, bool isLastBuffer
, int32_t *begin
, int32_t *end
, bool *_retval
)
31 // xxx need to bullet proff and check input pointer
32 // make sure begin, end and _retval is not nullptr here
34 // if we reach the end, just return
42 uint8_t char_class
= nsSampleWordBreaker::GetClass(text
[pos
]);
44 // if we are in chinese mode, return one han letter at a time
45 // we should not do this if we are in Japanese or Korean mode
46 if (kWbClassHanLetter
== char_class
) {
54 // find the next "word"
55 next
= NextWord(text
, (uint32_t) length
, (uint32_t) pos
);
57 // if we don't have enough text to make decision, return
58 if (next
== NS_WORDBREAKER_NEED_MORE_TEXT
) {
60 *end
= isLastBuffer
? length
: pos
;
61 *_retval
= isLastBuffer
;
65 // if what we got is space or punct, look at the next break
66 if ((char_class
== kWbClassSpace
) || (char_class
== kWbClassPunct
)) {
67 // if the next "word" is not letters,
68 // call itself recursively with the new pos
69 return Next(text
, length
, next
, isLastBuffer
, begin
, end
, _retval
);
72 // for the rest, return