Revision created by MOE tool push_codebase.
[gae.git] / java / src / main / com / google / appengine / api / search / query / ParserUtils.java
blob7a8fbcdb48b3cce4b9de7e86e166a1c5d8ccab2e
1 // Copyright 2011 Google Inc. All Rights Reserved.
3 package com.google.appengine.api.search.query;
5 import java.util.NoSuchElementException;
7 /**
8 * A helper class that holds various, state-less utility
9 * functions used by the query parser.
12 public class ParserUtils {
14 /**
15 * Keeps the number of days per month for {@link #isDate(String)} method.
17 private static int[] MONTH_LENGTH = {
18 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31
21 /**
22 * An iterator over characters of a quote delimited phrase. If the
23 * phrase contains escaped sequences, such as "\\\"", "\\\'", "\\t", etc.
24 * this iterator converts them to regular characters.
26 private static class PhraseCharIterator {
28 private final String text;
29 private int i;
30 private int n;
31 private char leftOver;
33 public PhraseCharIterator(String text) {
34 this.text = text;
35 i = 1;
36 n = text.length() - 2;
37 while (i < n && Character.isWhitespace(text.charAt(i))) {
38 ++i;
40 while (n > i && Character.isWhitespace(text.charAt(n))) {
41 --n;
43 leftOver = 0;
46 private static boolean isOctal(char c) {
47 return '0' < c && c < '8';
50 public boolean hasNext() {
51 return leftOver != 0 || i <= n;
54 public char next() {
55 if (!hasNext()) {
56 throw new NoSuchElementException();
58 char c;
59 if (leftOver != 0) {
60 c = leftOver;
61 leftOver = 0;
62 } else {
63 c = text.charAt(i++);
64 if (c == '\\') {
65 if (i <= n) {
66 c = text.charAt(i++);
67 switch (c) {
68 case '\'':
69 c = '\'';
70 break;
71 case '\"':
72 c = '\"';
73 break;
74 case 'u':
75 if (i + 3 <= n) {
76 try {
77 c = toChar(Integer.parseInt(text.substring(i, i + 4), 16));
78 i += 4;
79 } catch (NumberFormatException e) {
80 c = '\\';
81 leftOver = 'u';
83 } else {
84 c = '\\';
85 leftOver = 'u';
87 break;
88 default:
89 if (!isOctal(c)) {
90 leftOver = c;
91 c = '\\';
92 } else {
93 int codeSoFar = (c - '0');
94 int countSoFar = 1;
95 while (i <= n && countSoFar < 3) {
96 char nextChar = text.charAt(i++);
97 if (!isOctal(nextChar)) {
98 leftOver = nextChar;
99 break;
101 codeSoFar = codeSoFar * 8 + (nextChar - '0');
102 ++countSoFar;
104 c = toChar(codeSoFar);
106 break;
108 } else {
109 c = '\\';
113 return c;
116 private static char toChar(int code) {
117 char[] decoded = Character.toChars(code);
118 if (decoded.length > 1) {
119 throw new RuntimeException(
120 "Decoded " + code + " does not return a single character");
122 return decoded[0];
126 /** No instances of parser utils. */
127 private ParserUtils() {}
129 /** Removes the last character from the given text */
130 public static String trimLast(String text) {
131 return text.substring(0, text.length() - 1);
135 * Normalizes the phrase text. It strips external quote characters. Replaces
136 * white space with a single space character. Converts escape sequences to
137 * Java characters.
139 * @param phrase the phrase to be normalized
140 * @return the phrase with characters and white space normalized
142 public static String normalizePhrase(String phrase) {
143 PhraseCharIterator iter = new PhraseCharIterator(phrase);
144 StringBuilder builder = new StringBuilder(phrase.length());
145 while (iter.hasNext()) {
146 while (iter.hasNext()) {
147 char c = iter.next();
148 if (Character.isWhitespace(c)) {
149 break;
151 builder.append(c);
153 while (iter.hasNext()) {
154 char c = iter.next();
155 if (!Character.isWhitespace(c)) {
156 builder.append(' ').append(c);
157 break;
161 return builder.toString();
165 * Returns whether or not the given text looks like a number.
166 * The number is defined as
167 * '-'? digit* ('.' digit* ('E' ('+' | '-')? digit+)?)?
169 * @param text the text tested if it looks like a number
170 * @return whether or not the text represents a floating point number
172 public static boolean isNumber(String text) {
173 if (text == null || text.isEmpty()) {
174 return false;
176 int i = 0;
177 if (text.charAt(0) == '-') {
178 if (text.length() == 1) {
179 return false;
181 ++i;
183 i = consumeDigits(i, text);
184 if (i >= text.length()) {
185 return true;
187 if (text.charAt(i) == '.') {
188 i = consumeDigits(i + 1, text);
190 if (i >= text.length()) {
191 return true;
193 if (text.charAt(i) != 'E' && text.charAt(i) != 'e') {
194 return false;
196 if (++i >= text.length()) {
197 return false;
199 if (text.charAt(i) == '+' || text.charAt(i) == '-') {
200 if (++i >= text.length()) {
201 return false;
204 return consumeDigits(i, text) >= text.length();
207 private static int consumeDigits(int i, String text) {
208 while (i < text.length() && Character.isDigit(text.charAt(i))) {
209 ++i;
211 return i;
215 * Returns if the given string looks like a date to us. We only accept
216 * ISO 8601 dates, which have the dddd-dd-dd format.
218 * @param text text checked if it looks like a date
219 * @return whether this could be an ISO 8601 date
221 public static boolean isDate(String text) {
222 if (text == null || text.isEmpty()) {
223 return false;
225 int year = 0;
226 int i = 0;
227 char c = '\0';
228 if (text.charAt(i) == '-') {
229 i++;
231 while (i < text.length()) {
232 c = text.charAt(i++);
233 if (!Character.isDigit(c)) {
234 break;
236 year = year * 10 + (c - '0');
238 if (i >= text.length()) {
239 return false;
241 if (c != '-') {
242 return false;
244 int month = 0;
245 while (i < text.length()) {
246 c = text.charAt(i++);
247 if (!Character.isDigit(c)) {
248 break;
250 month = month * 10 + (c - '0');
251 if (month > 12) {
252 return false;
255 if (month <= 0) {
256 return false;
258 if (i >= text.length()) {
259 return false;
261 if (c != '-') {
262 return false;
264 int day = 0;
265 while (i < text.length()) {
266 c = text.charAt(i++);
267 if (!Character.isDigit(c)) {
268 return false;
270 day = day * 10 + (c - '0');
272 if (day <= 0) {
273 return false;
275 if (month == 2) {
276 if ((year % 400 == 0) || (year % 100 != 0 && year % 4 == 0)) {
277 return day <= 29;
280 return day <= MONTH_LENGTH[month - 1];