2 * Copyright (C) 2008, Google Inc.
6 * Redistribution and use in source and binary forms, with or
7 * without modification, are permitted provided that the following
10 * - Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
13 * - Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following
15 * disclaimer in the documentation and/or other materials provided
16 * with the distribution.
18 * - Neither the name of the Git Development Community nor the
19 * names of its contributors may be used to endorse or promote
20 * products derived from this software without specific prior
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
24 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
25 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
28 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
29 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
30 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
31 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
32 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
33 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
35 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 package org
.spearce
.jgit
.util
;
40 import java
.util
.Arrays
;
42 import org
.spearce
.jgit
.lib
.Constants
;
44 /** Utility functions related to quoted string handling. */
45 public abstract class QuotedString
{
46 /** Quoting style that obeys the rules Git applies to file names */
47 public static final GitPathStyle GIT_PATH
= new GitPathStyle();
50 * Quoting style used by the Bourne shell.
52 * Quotes are unconditionally inserted during {@link #quote(String)}. This
53 * protects shell meta-characters like <code>$</code> or <code>~</code> from
54 * being recognized as special.
56 public static final BourneStyle BOURNE
= new BourneStyle();
59 * Quote an input string by the quoting rules.
61 * If the input string does not require any quoting, the same String
62 * reference is returned to the caller.
64 * Otherwise a quoted string is returned, including the opening and closing
65 * quotation marks at the start and end of the string. If the style does not
66 * permit raw Unicode characters then the string will first be encoded in
67 * UTF-8, with unprintable sequences possibly escaped by the rules.
70 * any non-null Unicode string.
71 * @return a quoted string. See above for details.
73 public abstract String
quote(String in
);
76 * Clean a previously quoted input, decoding the result via UTF-8.
78 * This method must match quote such that:
81 * a.equals(dequote(quote(a)));
84 * is true for any <code>a</code>.
87 * a Unicode string to remove quoting from.
88 * @return the cleaned string.
89 * @see #dequote(byte[], int, int)
91 public String
dequote(final String in
) {
92 final byte[] b
= Constants
.encode(in
);
93 return dequote(b
, 0, b
.length
);
97 * Decode a previously quoted input, scanning a UTF-8 encoded buffer.
99 * This method must match quote such that:
102 * a.equals(dequote(Constants.encode(quote(a))));
105 * is true for any <code>a</code>.
107 * This method removes any opening/closing quotation marks added by
108 * {@link #quote(String)}.
111 * the input buffer to parse.
113 * first position within <code>in</code> to scan.
115 * one position past in <code>in</code> to scan.
116 * @return the cleaned string.
118 public abstract String
dequote(byte[] in
, int offset
, int end
);
121 * Quoting style used by the Bourne shell.
123 * Quotes are unconditionally inserted during {@link #quote(String)}. This
124 * protects shell meta-characters like <code>$</code> or <code>~</code> from
125 * being recognized as special.
127 public static class BourneStyle
extends QuotedString
{
129 public String
quote(final String in
) {
130 final StringBuilder r
= new StringBuilder();
132 int start
= 0, i
= 0;
133 for (; i
< in
.length(); i
++) {
134 switch (in
.charAt(i
)) {
137 r
.append(in
, start
, i
);
140 r
.append(in
.charAt(i
));
146 r
.append(in
, start
, i
);
152 public String
dequote(final byte[] in
, int ip
, final int ie
) {
153 boolean inquote
= false;
154 final byte[] r
= new byte[ie
- ip
];
157 final byte b
= in
[ip
++];
163 if (inquote
|| ip
== ie
)
164 r
[rPtr
++] = b
; // literal within a quote
166 r
[rPtr
++] = in
[ip
++];
173 return RawParseUtils
.decode(Constants
.CHARSET
, r
, 0, rPtr
);
177 /** Quoting style that obeys the rules Git applies to file names */
178 public static final class GitPathStyle
extends QuotedString
{
179 private static final byte[] quote
;
181 quote
= new byte[128];
182 Arrays
.fill(quote
, (byte) -1);
184 for (int i
= '0'; i
<= '9'; i
++)
186 for (int i
= 'a'; i
<= 'z'; i
++)
188 for (int i
= 'A'; i
<= 'Z'; i
++)
200 quote
['\u0007'] = 'a';
206 quote
['\u000B'] = 'v';
212 public String
quote(final String instr
) {
213 if (instr
.length() == 0)
215 boolean reuse
= true;
216 final byte[] in
= Constants
.encode(instr
);
217 final StringBuilder r
= new StringBuilder(2 + in
.length
);
219 for (int i
= 0; i
< in
.length
; i
++) {
220 final int c
= in
[i
] & 0xff;
221 if (c
< quote
.length
) {
222 final byte style
= quote
[c
];
230 r
.append((char) style
);
237 r
.append((char) (((c
>> 6) & 03) + '0'));
238 r
.append((char) (((c
>> 3) & 07) + '0'));
239 r
.append((char) (((c
>> 0) & 07) + '0'));
248 public String
dequote(final byte[] in
, final int inPtr
, final int inEnd
) {
249 if (2 <= inEnd
- inPtr
&& in
[inPtr
] == '"' && in
[inEnd
- 1] == '"')
250 return dq(in
, inPtr
+ 1, inEnd
- 1);
251 return RawParseUtils
.decode(Constants
.CHARSET
, in
, inPtr
, inEnd
);
254 private static String
dq(final byte[] in
, int inPtr
, final int inEnd
) {
255 final byte[] r
= new byte[inEnd
- inPtr
];
257 while (inPtr
< inEnd
) {
258 final byte b
= in
[inPtr
++];
264 if (inPtr
== inEnd
) {
265 // Lone trailing backslash. Treat it as a literal.
271 switch (in
[inPtr
++]) {
273 r
[rPtr
++] = 0x07 /* \a = BEL */;
291 r
[rPtr
++] = 0x0B/* \v = VT */;
296 r
[rPtr
++] = in
[inPtr
- 1];
303 int cp
= in
[inPtr
- 1] - '0';
304 while (inPtr
< inEnd
) {
305 final byte c
= in
[inPtr
];
306 if ('0' <= c
&& c
<= '7') {
314 r
[rPtr
++] = (byte) cp
;
319 // Any other code is taken literally.
322 r
[rPtr
++] = in
[inPtr
- 1];
327 return RawParseUtils
.decode(Constants
.CHARSET
, r
, 0, rPtr
);
330 private GitPathStyle() {