2 * Copyright (C) 2008, Google Inc.
6 * Redistribution and use in source and binary forms, with or
7 * without modification, are permitted provided that the following
10 * - Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
13 * - Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following
15 * disclaimer in the documentation and/or other materials provided
16 * with the distribution.
18 * - Neither the name of the Git Development Community nor the
19 * names of its contributors may be used to endorse or promote
20 * products derived from this software without specific prior
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
24 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
25 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
28 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
29 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
30 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
31 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
32 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
33 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
35 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 package org
.spearce
.jgit
.util
;
40 import java
.util
.Arrays
;
42 import org
.spearce
.jgit
.lib
.Constants
;
44 /** Utility functions related to quoted string handling. */
45 public abstract class QuotedString
{
46 /** Quoting style that obeys the rules Git applies to file names */
47 public static final GitPathStyle GIT_PATH
= new GitPathStyle();
50 * Quoting style used by the Bourne shell.
52 * Quotes are unconditionally inserted during {@link #quote(String)}. This
53 * protects shell meta-characters like <code>$</code> or <code>~</code> from
54 * being recognized as special.
56 public static final BourneStyle BOURNE
= new BourneStyle();
58 /** Bourne style, but permits <code>~user</code> at the start of the string. */
59 public static final BourneUserPathStyle BOURNE_USER_PATH
= new BourneUserPathStyle();
62 * Quote an input string by the quoting rules.
64 * If the input string does not require any quoting, the same String
65 * reference is returned to the caller.
67 * Otherwise a quoted string is returned, including the opening and closing
68 * quotation marks at the start and end of the string. If the style does not
69 * permit raw Unicode characters then the string will first be encoded in
70 * UTF-8, with unprintable sequences possibly escaped by the rules.
73 * any non-null Unicode string.
74 * @return a quoted string. See above for details.
76 public abstract String
quote(String in
);
79 * Clean a previously quoted input, decoding the result via UTF-8.
81 * This method must match quote such that:
84 * a.equals(dequote(quote(a)));
87 * is true for any <code>a</code>.
90 * a Unicode string to remove quoting from.
91 * @return the cleaned string.
92 * @see #dequote(byte[], int, int)
94 public String
dequote(final String in
) {
95 final byte[] b
= Constants
.encode(in
);
96 return dequote(b
, 0, b
.length
);
100 * Decode a previously quoted input, scanning a UTF-8 encoded buffer.
102 * This method must match quote such that:
105 * a.equals(dequote(Constants.encode(quote(a))));
108 * is true for any <code>a</code>.
110 * This method removes any opening/closing quotation marks added by
111 * {@link #quote(String)}.
114 * the input buffer to parse.
116 * first position within <code>in</code> to scan.
118 * one position past in <code>in</code> to scan.
119 * @return the cleaned string.
121 public abstract String
dequote(byte[] in
, int offset
, int end
);
124 * Quoting style used by the Bourne shell.
126 * Quotes are unconditionally inserted during {@link #quote(String)}. This
127 * protects shell meta-characters like <code>$</code> or <code>~</code> from
128 * being recognized as special.
130 public static class BourneStyle
extends QuotedString
{
132 public String
quote(final String in
) {
133 final StringBuilder r
= new StringBuilder();
135 int start
= 0, i
= 0;
136 for (; i
< in
.length(); i
++) {
137 switch (in
.charAt(i
)) {
140 r
.append(in
, start
, i
);
143 r
.append(in
.charAt(i
));
149 r
.append(in
, start
, i
);
155 public String
dequote(final byte[] in
, int ip
, final int ie
) {
156 boolean inquote
= false;
157 final byte[] r
= new byte[ie
- ip
];
160 final byte b
= in
[ip
++];
166 if (inquote
|| ip
== ie
)
167 r
[rPtr
++] = b
; // literal within a quote
169 r
[rPtr
++] = in
[ip
++];
176 return RawParseUtils
.decode(Constants
.CHARSET
, r
, 0, rPtr
);
180 /** Bourne style, but permits <code>~user</code> at the start of the string. */
181 public static class BourneUserPathStyle
extends BourneStyle
{
183 public String
quote(final String in
) {
184 if (in
.matches("^~[A-Za-z0-9_-]+$")) {
185 // If the string is just "~user" we can assume they
191 if (in
.matches("^~[A-Za-z0-9_-]*/.*$")) {
192 // If the string is of "~/path" or "~user/path"
193 // we must not escape ~/ or ~user/ from the shell.
195 final int i
= in
.indexOf('/') + 1;
196 if (i
== in
.length())
198 return in
.substring(0, i
) + super.quote(in
.substring(i
));
201 return super.quote(in
);
205 /** Quoting style that obeys the rules Git applies to file names */
206 public static final class GitPathStyle
extends QuotedString
{
207 private static final byte[] quote
;
209 quote
= new byte[128];
210 Arrays
.fill(quote
, (byte) -1);
212 for (int i
= '0'; i
<= '9'; i
++)
214 for (int i
= 'a'; i
<= 'z'; i
++)
216 for (int i
= 'A'; i
<= 'Z'; i
++)
228 quote
['\u0007'] = 'a';
234 quote
['\u000B'] = 'v';
240 public String
quote(final String instr
) {
241 if (instr
.length() == 0)
243 boolean reuse
= true;
244 final byte[] in
= Constants
.encode(instr
);
245 final StringBuilder r
= new StringBuilder(2 + in
.length
);
247 for (int i
= 0; i
< in
.length
; i
++) {
248 final int c
= in
[i
] & 0xff;
249 if (c
< quote
.length
) {
250 final byte style
= quote
[c
];
258 r
.append((char) style
);
265 r
.append((char) (((c
>> 6) & 03) + '0'));
266 r
.append((char) (((c
>> 3) & 07) + '0'));
267 r
.append((char) (((c
>> 0) & 07) + '0'));
276 public String
dequote(final byte[] in
, final int inPtr
, final int inEnd
) {
277 if (2 <= inEnd
- inPtr
&& in
[inPtr
] == '"' && in
[inEnd
- 1] == '"')
278 return dq(in
, inPtr
+ 1, inEnd
- 1);
279 return RawParseUtils
.decode(Constants
.CHARSET
, in
, inPtr
, inEnd
);
282 private static String
dq(final byte[] in
, int inPtr
, final int inEnd
) {
283 final byte[] r
= new byte[inEnd
- inPtr
];
285 while (inPtr
< inEnd
) {
286 final byte b
= in
[inPtr
++];
292 if (inPtr
== inEnd
) {
293 // Lone trailing backslash. Treat it as a literal.
299 switch (in
[inPtr
++]) {
301 r
[rPtr
++] = 0x07 /* \a = BEL */;
319 r
[rPtr
++] = 0x0B/* \v = VT */;
324 r
[rPtr
++] = in
[inPtr
- 1];
331 int cp
= in
[inPtr
- 1] - '0';
332 while (inPtr
< inEnd
) {
333 final byte c
= in
[inPtr
];
334 if ('0' <= c
&& c
<= '7') {
342 r
[rPtr
++] = (byte) cp
;
347 // Any other code is taken literally.
350 r
[rPtr
++] = in
[inPtr
- 1];
355 return RawParseUtils
.decode(Constants
.CHARSET
, r
, 0, rPtr
);
358 private GitPathStyle() {