Remove unistd.h
[helenos.git] / uspace / lib / c / generic / vfs / canonify.c
blobebc41b3b9601a835dd3f61dacff20b592fc6f83f
1 /*
2 * Copyright (c) 2008 Jakub Jermar
3 * All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 /** @addtogroup libc
30 * @{
31 */
33 /**
34 * @file
35 * @brief
38 #include <stdlib.h>
39 #include <stddef.h>
40 #include <vfs/canonify.h>
42 /** Token types used for tokenization of path. */
43 typedef enum {
44 TK_INVALID,
45 TK_SLASH,
46 TK_DOT,
47 TK_DOTDOT,
48 TK_COMP,
49 TK_NUL
50 } tokval_t;
52 typedef struct {
53 tokval_t kind;
54 char *start;
55 char *stop;
56 } token_t;
58 /** Fake up the TK_SLASH token. */
59 static token_t slash_token(char *start)
61 token_t ret;
62 ret.kind = TK_SLASH;
63 ret.start = start;
64 ret.stop = start;
65 return ret;
68 /** Given a token, return the next token. */
69 static token_t next_token(token_t *cur)
71 token_t ret;
73 if (cur->stop[1] == '\0') {
74 ret.kind = TK_NUL;
75 ret.start = cur->stop + 1;
76 ret.stop = ret.start;
77 return ret;
79 if (cur->stop[1] == '/') {
80 ret.kind = TK_SLASH;
81 ret.start = cur->stop + 1;
82 ret.stop = ret.start;
83 return ret;
85 if (cur->stop[1] == '.' && (!cur->stop[2] || cur->stop[2] == '/')) {
86 ret.kind = TK_DOT;
87 ret.start = cur->stop + 1;
88 ret.stop = ret.start;
89 return ret;
91 if (cur->stop[1] == '.' && cur->stop[2] == '.' &&
92 (!cur->stop[3] || cur->stop[3] == '/')) {
93 ret.kind = TK_DOTDOT;
94 ret.start = cur->stop + 1;
95 ret.stop = cur->stop + 2;
96 return ret;
98 unsigned i;
99 for (i = 1; cur->stop[i] && cur->stop[i] != '/'; i++)
101 ret.kind = TK_COMP;
102 ret.start = &cur->stop[1];
103 ret.stop = &cur->stop[i - 1];
104 return ret;
107 /** States used by canonify(). */
108 typedef enum {
109 S_INI,
110 S_A,
111 S_B,
112 S_C,
113 S_ACCEPT,
114 S_RESTART,
115 S_REJECT
116 } state_t;
118 typedef struct {
119 state_t s;
120 void (* f)(token_t *, token_t *, token_t *);
121 } change_state_t;
124 * Actions that can be performed when transitioning from one
125 * state of canonify() to another.
127 static void set_first_slash(token_t *t, token_t *tfsl, token_t *tlcomp)
129 *tfsl = *t;
130 *tlcomp = *t;
132 static void save_component(token_t *t, token_t *tfsl, token_t *tlcomp)
134 *tlcomp = *t;
136 static void terminate_slash(token_t *t, token_t *tfsl, token_t *tlcomp)
138 if (tfsl->stop[1]) /* avoid writing to a well-formatted path */
139 tfsl->stop[1] = '\0';
141 static void remove_trailing_slash(token_t *t, token_t *tfsl, token_t *tlcomp)
143 t->start[-1] = '\0';
145 /** Eat the extra '/'.
147 * @param t The current TK_SLASH token.
149 static void shift_slash(token_t *t, token_t *tfsl, token_t *tlcomp)
151 char *p = t->start;
152 char *q = t->stop + 1;
153 while ((*p++ = *q++))
156 /** Eat the extra '.'.
158 * @param t The current TK_DOT token.
160 static void shift_dot(token_t *t, token_t *tfsl, token_t *tlcomp)
162 char *p = t->start;
163 char *q = t->stop + 1;
164 while ((*p++ = *q++))
167 /** Collapse the TK_COMP TK_SLASH TK_DOTDOT pattern.
169 * @param t The current TK_DOTDOT token.
170 * @param tlcomp The last TK_COMP token.
172 static void shift_dotdot(token_t *t, token_t *tfsl, token_t *tlcomp)
174 char *p = tlcomp->start;
175 char *q = t->stop + 1;
176 while ((*p++ = *q++))
180 /** Transition function for canonify(). */
181 static change_state_t trans[4][6] = {
182 [S_INI] = {
183 [TK_SLASH] = {
184 .s = S_A,
185 .f = set_first_slash,
187 [TK_DOT] = {
188 .s = S_REJECT,
189 .f = NULL,
191 [TK_DOTDOT] = {
192 .s = S_REJECT,
193 .f = NULL,
195 [TK_COMP] = {
196 .s = S_REJECT,
197 .f = NULL,
199 [TK_NUL] = {
200 .s = S_REJECT,
201 .f = NULL,
203 [TK_INVALID] = {
204 .s = S_REJECT,
205 .f = NULL,
208 [S_A] = {
209 [TK_SLASH] = {
210 .s = S_A,
211 .f = set_first_slash,
213 [TK_DOT] = {
214 .s = S_A,
215 .f = NULL,
217 [TK_DOTDOT] = {
218 .s = S_A,
219 .f = NULL,
221 [TK_COMP] = {
222 .s = S_B,
223 .f = save_component,
225 [TK_NUL] = {
226 .s = S_ACCEPT,
227 .f = terminate_slash,
229 [TK_INVALID] = {
230 .s = S_REJECT,
231 .f = NULL,
234 [S_B] = {
235 [TK_SLASH] = {
236 .s = S_C,
237 .f = NULL,
239 [TK_DOT] = {
240 .s = S_REJECT,
241 .f = NULL,
243 [TK_DOTDOT] = {
244 .s = S_REJECT,
245 .f = NULL,
247 [TK_COMP] = {
248 .s = S_REJECT,
249 .f = NULL,
251 [TK_NUL] = {
252 .s = S_ACCEPT,
253 .f = NULL,
255 [TK_INVALID] = {
256 .s = S_REJECT,
257 .f = NULL,
260 [S_C] = {
261 [TK_SLASH] = {
262 .s = S_RESTART,
263 .f = shift_slash,
265 [TK_DOT] = {
266 .s = S_RESTART,
267 .f = shift_dot,
269 [TK_DOTDOT] = {
270 .s = S_RESTART,
271 .f = shift_dotdot,
273 [TK_COMP] = {
274 .s = S_B,
275 .f = save_component,
277 [TK_NUL] = {
278 .s = S_ACCEPT,
279 .f = remove_trailing_slash,
281 [TK_INVALID] = {
282 .s = S_REJECT,
283 .f = NULL,
288 /** Canonify a file system path.
290 * A file system path is canonical, if the following holds:
292 * 1) the path is absolute
293 * (i.e. a/b/c is not canonical)
294 * 2) there is no trailing slash in the path if it has components
295 * (i.e. /a/b/c/ is not canonical)
296 * 3) there is no extra slash in the path
297 * (i.e. /a//b/c is not canonical)
298 * 4) there is no '.' component in the path
299 * (i.e. /a/./b/c is not canonical)
300 * 5) there is no '..' component in the path
301 * (i.e. /a/b/../c is not canonical)
303 * This function makes a potentially non-canonical file system path canonical.
304 * It works in-place and requires a NULL-terminated input string.
306 * @param path Path to be canonified.
307 * @param lenp Pointer where the length of the final path will be
308 * stored. Can be NULL.
310 * @return Canonified path or NULL on failure.
312 char *canonify(char *path, size_t *lenp)
314 state_t state;
315 token_t t;
316 token_t tfsl; /* first slash */
317 token_t tlcomp; /* last component */
318 if (*path != '/')
319 return NULL;
320 tfsl = slash_token(path);
321 restart:
322 state = S_INI;
323 t = tfsl;
324 tlcomp = tfsl;
325 while (state != S_ACCEPT && state != S_RESTART && state != S_REJECT) {
326 if (trans[state][t.kind].f)
327 trans[state][t.kind].f(&t, &tfsl, &tlcomp);
328 state = trans[state][t.kind].s;
329 t = next_token(&t);
332 switch (state) {
333 case S_RESTART:
334 goto restart;
335 case S_REJECT:
336 return NULL;
337 case S_ACCEPT:
338 if (lenp)
339 *lenp = (size_t)((tlcomp.stop - tfsl.start) + 1);
340 return tfsl.start;
341 default:
342 abort();
347 * @}