Correctly use a long for the offsets within a generated pack
[egit/charleso.git] / org.spearce.jgit / src / org / spearce / jgit / util / QuotedString.java
blobc6e6c0f8bd9f8a32152fb1deef4b451214ce2ec9
1 /*
2 * Copyright (C) 2008, Google Inc.
4 * All rights reserved.
6 * Redistribution and use in source and binary forms, with or
7 * without modification, are permitted provided that the following
8 * conditions are met:
10 * - Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
13 * - Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following
15 * disclaimer in the documentation and/or other materials provided
16 * with the distribution.
18 * - Neither the name of the Git Development Community nor the
19 * names of its contributors may be used to endorse or promote
20 * products derived from this software without specific prior
21 * written permission.
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
24 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
25 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
28 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
29 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
30 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
31 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
32 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
33 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
35 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 package org.spearce.jgit.util;
40 import java.util.Arrays;
42 import org.spearce.jgit.lib.Constants;
44 /** Utility functions related to quoted string handling. */
45 public abstract class QuotedString {
46 /** Quoting style that obeys the rules Git applies to file names */
47 public static final GitPathStyle GIT_PATH = new GitPathStyle();
49 /**
50 * Quoting style used by the Bourne shell.
51 * <p>
52 * Quotes are unconditionally inserted during {@link #quote(String)}. This
53 * protects shell meta-characters like <code>$</code> or <code>~</code> from
54 * being recognized as special.
56 public static final BourneStyle BOURNE = new BourneStyle();
58 /** Bourne style, but permits <code>~user</code> at the start of the string. */
59 public static final BourneUserPathStyle BOURNE_USER_PATH = new BourneUserPathStyle();
61 /**
62 * Quote an input string by the quoting rules.
63 * <p>
64 * If the input string does not require any quoting, the same String
65 * reference is returned to the caller.
66 * <p>
67 * Otherwise a quoted string is returned, including the opening and closing
68 * quotation marks at the start and end of the string. If the style does not
69 * permit raw Unicode characters then the string will first be encoded in
70 * UTF-8, with unprintable sequences possibly escaped by the rules.
72 * @param in
73 * any non-null Unicode string.
74 * @return a quoted string. See above for details.
76 public abstract String quote(String in);
78 /**
79 * Clean a previously quoted input, decoding the result via UTF-8.
80 * <p>
81 * This method must match quote such that:
83 * <pre>
84 * a.equals(dequote(quote(a)));
85 * </pre>
87 * is true for any <code>a</code>.
89 * @param in
90 * a Unicode string to remove quoting from.
91 * @return the cleaned string.
92 * @see #dequote(byte[], int, int)
94 public String dequote(final String in) {
95 final byte[] b = Constants.encode(in);
96 return dequote(b, 0, b.length);
99 /**
100 * Decode a previously quoted input, scanning a UTF-8 encoded buffer.
101 * <p>
102 * This method must match quote such that:
104 * <pre>
105 * a.equals(dequote(Constants.encode(quote(a))));
106 * </pre>
108 * is true for any <code>a</code>.
109 * <p>
110 * This method removes any opening/closing quotation marks added by
111 * {@link #quote(String)}.
113 * @param in
114 * the input buffer to parse.
115 * @param offset
116 * first position within <code>in</code> to scan.
117 * @param end
118 * one position past in <code>in</code> to scan.
119 * @return the cleaned string.
121 public abstract String dequote(byte[] in, int offset, int end);
124 * Quoting style used by the Bourne shell.
125 * <p>
126 * Quotes are unconditionally inserted during {@link #quote(String)}. This
127 * protects shell meta-characters like <code>$</code> or <code>~</code> from
128 * being recognized as special.
130 public static class BourneStyle extends QuotedString {
131 @Override
132 public String quote(final String in) {
133 final StringBuilder r = new StringBuilder();
134 r.append('\'');
135 int start = 0, i = 0;
136 for (; i < in.length(); i++) {
137 switch (in.charAt(i)) {
138 case '\'':
139 case '!':
140 r.append(in, start, i);
141 r.append('\'');
142 r.append('\\');
143 r.append(in.charAt(i));
144 r.append('\'');
145 start = i + 1;
146 break;
149 r.append(in, start, i);
150 r.append('\'');
151 return r.toString();
154 @Override
155 public String dequote(final byte[] in, int ip, final int ie) {
156 boolean inquote = false;
157 final byte[] r = new byte[ie - ip];
158 int rPtr = 0;
159 while (ip < ie) {
160 final byte b = in[ip++];
161 switch (b) {
162 case '\'':
163 inquote = !inquote;
164 continue;
165 case '\\':
166 if (inquote || ip == ie)
167 r[rPtr++] = b; // literal within a quote
168 else
169 r[rPtr++] = in[ip++];
170 continue;
171 default:
172 r[rPtr++] = b;
173 continue;
176 return RawParseUtils.decode(Constants.CHARSET, r, 0, rPtr);
180 /** Bourne style, but permits <code>~user</code> at the start of the string. */
181 public static class BourneUserPathStyle extends BourneStyle {
182 @Override
183 public String quote(final String in) {
184 if (in.matches("^~[A-Za-z0-9_-]+$")) {
185 // If the string is just "~user" we can assume they
186 // mean "~user/".
188 return in + "/";
191 if (in.matches("^~[A-Za-z0-9_-]*/.*$")) {
192 // If the string is of "~/path" or "~user/path"
193 // we must not escape ~/ or ~user/ from the shell.
195 final int i = in.indexOf('/') + 1;
196 if (i == in.length())
197 return in;
198 return in.substring(0, i) + super.quote(in.substring(i));
201 return super.quote(in);
205 /** Quoting style that obeys the rules Git applies to file names */
206 public static final class GitPathStyle extends QuotedString {
207 private static final byte[] quote;
208 static {
209 quote = new byte[128];
210 Arrays.fill(quote, (byte) -1);
212 for (int i = '0'; i <= '9'; i++)
213 quote[i] = 0;
214 for (int i = 'a'; i <= 'z'; i++)
215 quote[i] = 0;
216 for (int i = 'A'; i <= 'Z'; i++)
217 quote[i] = 0;
218 quote[' '] = 0;
219 quote['+'] = 0;
220 quote[','] = 0;
221 quote['-'] = 0;
222 quote['.'] = 0;
223 quote['/'] = 0;
224 quote['='] = 0;
225 quote['_'] = 0;
226 quote['^'] = 0;
228 quote['\u0007'] = 'a';
229 quote['\b'] = 'b';
230 quote['\f'] = 'f';
231 quote['\n'] = 'n';
232 quote['\r'] = 'r';
233 quote['\t'] = 't';
234 quote['\u000B'] = 'v';
235 quote['\\'] = '\\';
236 quote['"'] = '"';
239 @Override
240 public String quote(final String instr) {
241 if (instr.length() == 0)
242 return "\"\"";
243 boolean reuse = true;
244 final byte[] in = Constants.encode(instr);
245 final StringBuilder r = new StringBuilder(2 + in.length);
246 r.append('"');
247 for (int i = 0; i < in.length; i++) {
248 final int c = in[i] & 0xff;
249 if (c < quote.length) {
250 final byte style = quote[c];
251 if (style == 0) {
252 r.append((char) c);
253 continue;
255 if (style > 0) {
256 reuse = false;
257 r.append('\\');
258 r.append((char) style);
259 continue;
263 reuse = false;
264 r.append('\\');
265 r.append((char) (((c >> 6) & 03) + '0'));
266 r.append((char) (((c >> 3) & 07) + '0'));
267 r.append((char) (((c >> 0) & 07) + '0'));
269 if (reuse)
270 return instr;
271 r.append('"');
272 return r.toString();
275 @Override
276 public String dequote(final byte[] in, final int inPtr, final int inEnd) {
277 if (2 <= inEnd - inPtr && in[inPtr] == '"' && in[inEnd - 1] == '"')
278 return dq(in, inPtr + 1, inEnd - 1);
279 return RawParseUtils.decode(Constants.CHARSET, in, inPtr, inEnd);
282 private static String dq(final byte[] in, int inPtr, final int inEnd) {
283 final byte[] r = new byte[inEnd - inPtr];
284 int rPtr = 0;
285 while (inPtr < inEnd) {
286 final byte b = in[inPtr++];
287 if (b != '\\') {
288 r[rPtr++] = b;
289 continue;
292 if (inPtr == inEnd) {
293 // Lone trailing backslash. Treat it as a literal.
295 r[rPtr++] = '\\';
296 break;
299 switch (in[inPtr++]) {
300 case 'a':
301 r[rPtr++] = 0x07 /* \a = BEL */;
302 continue;
303 case 'b':
304 r[rPtr++] = '\b';
305 continue;
306 case 'f':
307 r[rPtr++] = '\f';
308 continue;
309 case 'n':
310 r[rPtr++] = '\n';
311 continue;
312 case 'r':
313 r[rPtr++] = '\r';
314 continue;
315 case 't':
316 r[rPtr++] = '\t';
317 continue;
318 case 'v':
319 r[rPtr++] = 0x0B/* \v = VT */;
320 continue;
322 case '\\':
323 case '"':
324 r[rPtr++] = in[inPtr - 1];
325 continue;
327 case '0':
328 case '1':
329 case '2':
330 case '3': {
331 int cp = in[inPtr - 1] - '0';
332 while (inPtr < inEnd) {
333 final byte c = in[inPtr];
334 if ('0' <= c && c <= '7') {
335 cp <<= 3;
336 cp |= c - '0';
337 inPtr++;
338 } else {
339 break;
342 r[rPtr++] = (byte) cp;
343 continue;
346 default:
347 // Any other code is taken literally.
349 r[rPtr++] = '\\';
350 r[rPtr++] = in[inPtr - 1];
351 continue;
355 return RawParseUtils.decode(Constants.CHARSET, r, 0, rPtr);
358 private GitPathStyle() {
359 // Singleton