1 // Copyright 2011 Google Inc. All Rights Reserved.
3 package com
.google
.appengine
.api
.search
.query
;
5 import java
.util
.NoSuchElementException
;
8 * A helper class that holds various, state-less utility
9 * functions used by the query parser.
12 public class ParserUtils
{
15 * Keeps the number of days per month for {@link #isDate(CharSequence)} method.
17 private static int[] MONTH_LENGTH
= {
18 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31
22 * An iterator over characters of a quote delimited phrase. If the
23 * phrase contains escaped sequences, such as "\\\"", "\\\'", "\\t", etc.
24 * this iterator converts them to regular characters.
26 private static class PhraseCharIterator
{
28 private final CharSequence text
;
31 private char leftOver
;
33 public PhraseCharIterator(CharSequence text
) {
36 n
= text
.length() - 2;
37 while (i
< n
&& Character
.isWhitespace(text
.charAt(i
))) {
40 while (n
> i
&& Character
.isWhitespace(text
.charAt(n
))) {
46 private static boolean isOctal(char c
) {
47 return '0' < c
&& c
< '8';
50 public boolean hasNext() {
51 return leftOver
!= 0 || i
<= n
;
56 throw new NoSuchElementException();
77 c
= toChar(Integer
.parseInt(text
.subSequence(i
, i
+ 4).toString(), 16));
79 } catch (NumberFormatException e
) {
93 int codeSoFar
= (c
- '0');
95 while (i
<= n
&& countSoFar
< 3) {
96 char nextChar
= text
.charAt(i
++);
97 if (!isOctal(nextChar
)) {
101 codeSoFar
= codeSoFar
* 8 + (nextChar
- '0');
104 c
= toChar(codeSoFar
);
116 private static char toChar(int code
) {
117 char[] decoded
= Character
.toChars(code
);
118 if (decoded
.length
> 1) {
119 throw new RuntimeException(
120 "Decoded " + code
+ " does not return a single character");
126 /** No instances of parser utils. */
127 private ParserUtils() {}
129 /** Removes the last character from the given text */
130 public static String
trimLast(String text
) {
131 return text
.substring(0, text
.length() - 1);
135 * Normalizes the phrase text. It strips external quote characters. Replaces
136 * white space with a single space character. Converts escape sequences to
139 * @param phrase the phrase to be normalized
140 * @return the phrase with characters and white space normalized
142 public static String
normalizePhrase(CharSequence phrase
) {
143 PhraseCharIterator iter
= new PhraseCharIterator(phrase
);
144 StringBuilder builder
= new StringBuilder(phrase
.length());
145 while (iter
.hasNext()) {
146 while (iter
.hasNext()) {
147 char c
= iter
.next();
148 if (Character
.isWhitespace(c
)) {
153 while (iter
.hasNext()) {
154 char c
= iter
.next();
155 if (!Character
.isWhitespace(c
)) {
156 builder
.append(' ').append(c
);
161 return builder
.toString();
165 * Returns whether or not the given text looks like a number.
166 * The number is defined as
167 * '-'? digit* ('.' digit* ('E' ('+' | '-')? digit+)?)?
169 * @param text the text tested if it looks like a number
170 * @return whether or not the text represents a floating point number
172 public static boolean isNumber(CharSequence text
) {
173 if (text
== null || text
.length() == 0) {
177 if (text
.charAt(0) == '-') {
178 if (text
.length() == 1) {
183 i
= consumeDigits(i
, text
);
184 if (i
>= text
.length()) {
187 if (text
.charAt(i
) == '.') {
188 i
= consumeDigits(i
+ 1, text
);
190 if (i
>= text
.length()) {
193 if (text
.charAt(i
) != 'E' && text
.charAt(i
) != 'e') {
196 if (++i
>= text
.length()) {
199 if (text
.charAt(i
) == '+' || text
.charAt(i
) == '-') {
200 if (++i
>= text
.length()) {
204 return consumeDigits(i
, text
) >= text
.length();
207 private static int consumeDigits(int i
, CharSequence text
) {
208 while (i
< text
.length() && Character
.isDigit(text
.charAt(i
))) {
215 * Returns if the given string looks like a date to us. We only accept
216 * ISO 8601 dates, which have the dddd-dd-dd format.
218 * @param text text checked if it looks like a date
219 * @return whether this could be an ISO 8601 date
221 public static boolean isDate(CharSequence text
) {
222 if (text
== null || text
.length() == 0) {
228 if (text
.charAt(i
) == '-') {
231 while (i
< text
.length()) {
232 c
= text
.charAt(i
++);
233 if (!Character
.isDigit(c
)) {
236 year
= year
* 10 + (c
- '0');
241 if (i
>= text
.length()) {
248 while (i
< text
.length()) {
249 c
= text
.charAt(i
++);
250 if (!Character
.isDigit(c
)) {
253 month
= month
* 10 + (c
- '0');
261 if (i
>= text
.length()) {
268 while (i
< text
.length()) {
269 c
= text
.charAt(i
++);
270 if (!Character
.isDigit(c
)) {
273 day
= day
* 10 + (c
- '0');
279 if ((year
% 400 == 0) || (year
% 100 != 0 && year
% 4 == 0)) {
283 return day
<= MONTH_LENGTH
[month
- 1];