Add Bourne style quoting for TransportGitSsh
[egit/charleso.git] / org.spearce.jgit / src / org / spearce / jgit / util / QuotedString.java
blobf91057ee42f6292ace3612980a22ffbc2ebb92ca
1 /*
2 * Copyright (C) 2008, Google Inc.
4 * All rights reserved.
6 * Redistribution and use in source and binary forms, with or
7 * without modification, are permitted provided that the following
8 * conditions are met:
10 * - Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
13 * - Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following
15 * disclaimer in the documentation and/or other materials provided
16 * with the distribution.
18 * - Neither the name of the Git Development Community nor the
19 * names of its contributors may be used to endorse or promote
20 * products derived from this software without specific prior
21 * written permission.
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
24 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
25 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
28 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
29 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
30 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
31 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
32 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
33 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
35 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 package org.spearce.jgit.util;
40 import java.util.Arrays;
42 import org.spearce.jgit.lib.Constants;
44 /** Utility functions related to quoted string handling. */
45 public abstract class QuotedString {
46 /** Quoting style that obeys the rules Git applies to file names */
47 public static final GitPathStyle GIT_PATH = new GitPathStyle();
49 /**
50 * Quoting style used by the Bourne shell.
51 * <p>
52 * Quotes are unconditionally inserted during {@link #quote(String)}. This
53 * protects shell meta-characters like <code>$</code> or <code>~</code> from
54 * being recognized as special.
56 public static final BourneStyle BOURNE = new BourneStyle();
58 /**
59 * Quote an input string by the quoting rules.
60 * <p>
61 * If the input string does not require any quoting, the same String
62 * reference is returned to the caller.
63 * <p>
64 * Otherwise a quoted string is returned, including the opening and closing
65 * quotation marks at the start and end of the string. If the style does not
66 * permit raw Unicode characters then the string will first be encoded in
67 * UTF-8, with unprintable sequences possibly escaped by the rules.
69 * @param in
70 * any non-null Unicode string.
71 * @return a quoted string. See above for details.
73 public abstract String quote(String in);
75 /**
76 * Clean a previously quoted input, decoding the result via UTF-8.
77 * <p>
78 * This method must match quote such that:
80 * <pre>
81 * a.equals(dequote(quote(a)));
82 * </pre>
84 * is true for any <code>a</code>.
86 * @param in
87 * a Unicode string to remove quoting from.
88 * @return the cleaned string.
89 * @see #dequote(byte[], int, int)
91 public String dequote(final String in) {
92 final byte[] b = Constants.encode(in);
93 return dequote(b, 0, b.length);
96 /**
97 * Decode a previously quoted input, scanning a UTF-8 encoded buffer.
98 * <p>
99 * This method must match quote such that:
101 * <pre>
102 * a.equals(dequote(Constants.encode(quote(a))));
103 * </pre>
105 * is true for any <code>a</code>.
106 * <p>
107 * This method removes any opening/closing quotation marks added by
108 * {@link #quote(String)}.
110 * @param in
111 * the input buffer to parse.
112 * @param offset
113 * first position within <code>in</code> to scan.
114 * @param end
115 * one position past in <code>in</code> to scan.
116 * @return the cleaned string.
118 public abstract String dequote(byte[] in, int offset, int end);
121 * Quoting style used by the Bourne shell.
122 * <p>
123 * Quotes are unconditionally inserted during {@link #quote(String)}. This
124 * protects shell meta-characters like <code>$</code> or <code>~</code> from
125 * being recognized as special.
127 public static class BourneStyle extends QuotedString {
128 @Override
129 public String quote(final String in) {
130 final StringBuilder r = new StringBuilder();
131 r.append('\'');
132 int start = 0, i = 0;
133 for (; i < in.length(); i++) {
134 switch (in.charAt(i)) {
135 case '\'':
136 case '!':
137 r.append(in, start, i);
138 r.append('\'');
139 r.append('\\');
140 r.append(in.charAt(i));
141 r.append('\'');
142 start = i + 1;
143 break;
146 r.append(in, start, i);
147 r.append('\'');
148 return r.toString();
151 @Override
152 public String dequote(final byte[] in, int ip, final int ie) {
153 boolean inquote = false;
154 final byte[] r = new byte[ie - ip];
155 int rPtr = 0;
156 while (ip < ie) {
157 final byte b = in[ip++];
158 switch (b) {
159 case '\'':
160 inquote = !inquote;
161 continue;
162 case '\\':
163 if (inquote || ip == ie)
164 r[rPtr++] = b; // literal within a quote
165 else
166 r[rPtr++] = in[ip++];
167 continue;
168 default:
169 r[rPtr++] = b;
170 continue;
173 return RawParseUtils.decode(Constants.CHARSET, r, 0, rPtr);
177 /** Quoting style that obeys the rules Git applies to file names */
178 public static final class GitPathStyle extends QuotedString {
179 private static final byte[] quote;
180 static {
181 quote = new byte[128];
182 Arrays.fill(quote, (byte) -1);
184 for (int i = '0'; i <= '9'; i++)
185 quote[i] = 0;
186 for (int i = 'a'; i <= 'z'; i++)
187 quote[i] = 0;
188 for (int i = 'A'; i <= 'Z'; i++)
189 quote[i] = 0;
190 quote[' '] = 0;
191 quote['+'] = 0;
192 quote[','] = 0;
193 quote['-'] = 0;
194 quote['.'] = 0;
195 quote['/'] = 0;
196 quote['='] = 0;
197 quote['_'] = 0;
198 quote['^'] = 0;
200 quote['\u0007'] = 'a';
201 quote['\b'] = 'b';
202 quote['\f'] = 'f';
203 quote['\n'] = 'n';
204 quote['\r'] = 'r';
205 quote['\t'] = 't';
206 quote['\u000B'] = 'v';
207 quote['\\'] = '\\';
208 quote['"'] = '"';
211 @Override
212 public String quote(final String instr) {
213 if (instr.length() == 0)
214 return "\"\"";
215 boolean reuse = true;
216 final byte[] in = Constants.encode(instr);
217 final StringBuilder r = new StringBuilder(2 + in.length);
218 r.append('"');
219 for (int i = 0; i < in.length; i++) {
220 final int c = in[i] & 0xff;
221 if (c < quote.length) {
222 final byte style = quote[c];
223 if (style == 0) {
224 r.append((char) c);
225 continue;
227 if (style > 0) {
228 reuse = false;
229 r.append('\\');
230 r.append((char) style);
231 continue;
235 reuse = false;
236 r.append('\\');
237 r.append((char) (((c >> 6) & 03) + '0'));
238 r.append((char) (((c >> 3) & 07) + '0'));
239 r.append((char) (((c >> 0) & 07) + '0'));
241 if (reuse)
242 return instr;
243 r.append('"');
244 return r.toString();
247 @Override
248 public String dequote(final byte[] in, final int inPtr, final int inEnd) {
249 if (2 <= inEnd - inPtr && in[inPtr] == '"' && in[inEnd - 1] == '"')
250 return dq(in, inPtr + 1, inEnd - 1);
251 return RawParseUtils.decode(Constants.CHARSET, in, inPtr, inEnd);
254 private static String dq(final byte[] in, int inPtr, final int inEnd) {
255 final byte[] r = new byte[inEnd - inPtr];
256 int rPtr = 0;
257 while (inPtr < inEnd) {
258 final byte b = in[inPtr++];
259 if (b != '\\') {
260 r[rPtr++] = b;
261 continue;
264 if (inPtr == inEnd) {
265 // Lone trailing backslash. Treat it as a literal.
267 r[rPtr++] = '\\';
268 break;
271 switch (in[inPtr++]) {
272 case 'a':
273 r[rPtr++] = 0x07 /* \a = BEL */;
274 continue;
275 case 'b':
276 r[rPtr++] = '\b';
277 continue;
278 case 'f':
279 r[rPtr++] = '\f';
280 continue;
281 case 'n':
282 r[rPtr++] = '\n';
283 continue;
284 case 'r':
285 r[rPtr++] = '\r';
286 continue;
287 case 't':
288 r[rPtr++] = '\t';
289 continue;
290 case 'v':
291 r[rPtr++] = 0x0B/* \v = VT */;
292 continue;
294 case '\\':
295 case '"':
296 r[rPtr++] = in[inPtr - 1];
297 continue;
299 case '0':
300 case '1':
301 case '2':
302 case '3': {
303 int cp = in[inPtr - 1] - '0';
304 while (inPtr < inEnd) {
305 final byte c = in[inPtr];
306 if ('0' <= c && c <= '7') {
307 cp <<= 3;
308 cp |= c - '0';
309 inPtr++;
310 } else {
311 break;
314 r[rPtr++] = (byte) cp;
315 continue;
318 default:
319 // Any other code is taken literally.
321 r[rPtr++] = '\\';
322 r[rPtr++] = in[inPtr - 1];
323 continue;
327 return RawParseUtils.decode(Constants.CHARSET, r, 0, rPtr);
330 private GitPathStyle() {
331 // Singleton