Success build TortoiseMerge.
[TortoiseGit.git] / src / TortoiseMerge / libsvn_diff / dirent_uri.c
blob4b07a89d91e32ff2f62857ffe25f084f0e63d51e
1 /*
2 * dirent_uri.c: a library to manipulate URIs and directory entries.
4 * ====================================================================
5 * Copyright (c) 2008-2009 CollabNet. All rights reserved.
7 * This software is licensed as described in the file COPYING, which
8 * you should have received as part of this distribution. The terms
9 * are also available at http://subversion.tigris.org/license-1.html.
10 * If newer versions of this license are posted there, you may use a
11 * newer version instead, at your option.
13 * This software consists of voluntary contributions made by many
14 * individuals. For exact contribution history, see the revision
15 * history and logs, available at http://subversion.tigris.org/.
16 * ====================================================================
21 #include <string.h>
22 #include <assert.h>
23 #include <ctype.h>
25 #include <apr_uri.h>
27 //#include "svn_private_config.h"
28 #include "svn_string.h"
29 #include "svn_dirent_uri.h"
30 #include "svn_path.h"
32 //#include "private_uri.h"
33 #define SVN_PATH_LOCAL_SEPARATOR '\\'
34 const char *
35 svn_uri_canonicalize(const char *uri, apr_pool_t *pool);
37 /* The canonical empty path. Can this be changed? Well, change the empty
38 test below and the path library will work, not so sure about the fs/wc
39 libraries. */
40 #define SVN_EMPTY_PATH ""
42 /* TRUE if s is the canonical empty path, FALSE otherwise */
43 #define SVN_PATH_IS_EMPTY(s) ((s)[0] == '\0')
45 /* TRUE if s,n is the platform's empty path ("."), FALSE otherwise. Can
46 this be changed? Well, the path library will work, not so sure about
47 the OS! */
48 #define SVN_PATH_IS_PLATFORM_EMPTY(s,n) ((n) == 1 && (s)[0] == '.')
50 /* Path type definition. Used only by internal functions. */
51 typedef enum {
52 type_uri,
53 type_dirent
54 } path_type_t;
57 /**** Internal implementation functions *****/
59 /* Return an internal-style new path based on PATH, allocated in POOL.
60 * Pass type_uri for TYPE if PATH is a uri and type_dirent if PATH
61 * is a regular path.
63 * "Internal-style" means that separators are all '/', and the new
64 * path is canonicalized.
66 static const char *
67 internal_style(path_type_t type, const char *path, apr_pool_t *pool)
69 #if '/' != SVN_PATH_LOCAL_SEPARATOR
71 char *p = apr_pstrdup(pool, path);
72 path = p;
74 /* Convert all local-style separators to the canonical ones. */
75 for (; *p != '\0'; ++p)
76 if (*p == SVN_PATH_LOCAL_SEPARATOR)
77 *p = '/';
79 #endif
81 return type == type_uri ? svn_uri_canonicalize(path, pool)
82 : svn_dirent_canonicalize(path, pool);
83 /* FIXME: Should also remove trailing /.'s, if the style says so. */
86 /* Return a local-style new path based on PATH, allocated in POOL.
87 * Pass type_uri for TYPE if PATH is a uri and type_dirent if PATH
88 * is a regular path.
90 * "Local-style" means a path that looks like what users are
91 * accustomed to seeing, including native separators. The new path
92 * will still be canonicalized.
94 static const char *
95 local_style(path_type_t type, const char *path, apr_pool_t *pool)
97 path = type == type_uri ? svn_uri_canonicalize(path, pool)
98 : svn_dirent_canonicalize(path, pool);
99 /* FIXME: Should also remove trailing /.'s, if the style says so. */
101 /* Internally, Subversion represents the current directory with the
102 empty string. But users like to see "." . */
103 if (SVN_PATH_IS_EMPTY(path))
104 return ".";
106 /* If PATH is a URL, the "local style" is the same as the input. */
107 if (type == type_uri && svn_path_is_url(path))
108 return apr_pstrdup(pool, path);
110 #if '/' != SVN_PATH_LOCAL_SEPARATOR
112 char *p = apr_pstrdup(pool, path);
113 path = p;
115 /* Convert all canonical separators to the local-style ones. */
116 for (; *p != '\0'; ++p)
117 if (*p == '/')
118 *p = SVN_PATH_LOCAL_SEPARATOR;
120 #endif
122 return path;
125 /* Locale insensitive tolower() for converting parts of dirents and urls
126 while canonicalizing */
127 static char
128 canonicalize_to_lower(char c)
130 if (c < 'A' || c > 'Z')
131 return c;
132 else
133 return c - 'A' + 'a';
135 #if defined(WIN32) || defined(__CYGWIN__)
136 /* Locale insensitive toupper() for converting parts of dirents and urls
137 while canonicalizing */
138 static char
139 canonicalize_to_upper(char c)
141 if (c < 'a' || c > 'z')
142 return c;
143 else
144 return c - 'a' + 'A';
146 #endif
148 /* Return the length of substring necessary to encompass the entire
149 * previous dirent segment in DIRENT, which should be a LEN byte string.
151 * A trailing slash will not be included in the returned length except
152 * in the case in which DIRENT is absolute and there are no more
153 * previous segments.
155 static apr_size_t
156 dirent_previous_segment(const char *dirent,
157 apr_size_t len)
159 if (len == 0)
160 return 0;
162 --len;
163 while (len > 0 && dirent[len] != '/'
164 #if defined(WIN32) || defined(__CYGWIN__)
165 && dirent[len] != ':'
166 #endif /* WIN32 or Cygwin */
168 --len;
170 /* check if the remaining segment including trailing '/' is a root dirent */
171 if (svn_dirent_is_root(dirent, len + 1))
172 return len + 1;
173 else
174 return len;
177 /* Return the length of substring necessary to encompass the entire
178 * previous uri segment in URI, which should be a LEN byte string.
180 * A trailing slash will not be included in the returned length except
181 * in the case in which URI is absolute and there are no more
182 * previous segments.
184 static apr_size_t
185 uri_previous_segment(const char *uri,
186 apr_size_t len)
188 /* ### Still the old path segment code, should start checking scheme specific format */
189 if (len == 0)
190 return 0;
192 --len;
193 while (len > 0 && uri[len] != '/')
194 --len;
196 /* check if the remaining segment including trailing '/' is a root dirent */
197 if (svn_uri_is_root(uri, len + 1))
198 return len + 1;
199 else
200 return len;
203 /* Return the canonicalized version of PATH, allocated in POOL.
204 * Pass type_uri for TYPE if PATH is a uri and type_dirent if PATH
205 * is a regular path.
207 static const char *
208 canonicalize(path_type_t type, const char *path, apr_pool_t *pool)
210 char *canon, *dst;
211 const char *src;
212 apr_size_t seglen;
213 apr_size_t schemelen = 0;
214 apr_size_t canon_segments = 0;
215 svn_boolean_t url = FALSE;
217 /* "" is already canonical, so just return it; note that later code
218 depends on path not being zero-length. */
219 if (SVN_PATH_IS_EMPTY(path))
220 return path;
222 dst = canon = apr_pcalloc(pool, strlen(path) + 1);
224 /* Try to parse the path as an URI. */
225 url = FALSE;
226 src = path;
228 if (type == type_uri && *src != '/')
230 while (*src && (*src != '/') && (*src != ':'))
231 src++;
233 if (*src == ':' && *(src+1) == '/' && *(src+2) == '/')
235 const char *seg;
237 url = TRUE;
239 /* Found a scheme, convert to lowercase and copy to dst. */
240 src = path;
241 while (*src != ':')
243 *(dst++) = canonicalize_to_lower((*src++));
244 schemelen++;
246 *(dst++) = ':';
247 *(dst++) = '/';
248 *(dst++) = '/';
249 src += 3;
250 schemelen += 3;
252 /* This might be the hostname */
253 seg = src;
254 while (*src && (*src != '/') && (*src != '@'))
255 src++;
257 if (*src == '@')
259 /* Copy the username & password. */
260 seglen = src - seg + 1;
261 memcpy(dst, seg, seglen);
262 dst += seglen;
263 src++;
265 else
266 src = seg;
268 /* Found a hostname, convert to lowercase and copy to dst. */
269 while (*src && (*src != '/'))
270 *(dst++) = canonicalize_to_lower((*src++));
272 /* Copy trailing slash, or null-terminator. */
273 *(dst) = *(src);
275 /* Move src and dst forward only if we are not
276 * at null-terminator yet. */
277 if (*src)
279 src++;
280 dst++;
283 canon_segments = 1;
287 if (! url)
289 src = path;
290 /* If this is an absolute path, then just copy over the initial
291 separator character. */
292 if (*src == '/')
294 *(dst++) = *(src++);
296 #if defined(WIN32) || defined(__CYGWIN__)
297 /* On Windows permit two leading separator characters which means an
298 * UNC path. */
299 if ((type == type_dirent) && *src == '/')
300 *(dst++) = *(src++);
301 #endif /* WIN32 or Cygwin */
305 while (*src)
307 /* Parse each segment, find the closing '/' */
308 const char *next = src;
309 while (*next && (*next != '/'))
310 ++next;
312 seglen = next - src;
314 if (seglen == 0 || (seglen == 1 && src[0] == '.'))
316 /* Noop segment, so do nothing. */
318 #if defined(WIN32) || defined(__CYGWIN__)
319 /* If this is the first path segment of a file:// URI and it contains a
320 windows drive letter, convert the drive letter to upper case. */
321 else if (url && canon_segments == 1 && seglen == 2 &&
322 (strncmp(canon, "file:", 5) == 0) &&
323 src[0] >= 'a' && src[0] <= 'z' && src[1] == ':')
325 *(dst++) = canonicalize_to_upper(src[0]);
326 *(dst++) = ':';
327 if (*next)
328 *(dst++) = *next;
329 canon_segments++;
331 #endif /* WIN32 or Cygwin */
332 else
334 /* An actual segment, append it to the destination path */
335 if (*next)
336 seglen++;
337 memcpy(dst, src, seglen);
338 dst += seglen;
339 canon_segments++;
342 /* Skip over trailing slash to the next segment. */
343 src = next;
344 if (*src)
345 src++;
348 /* Remove the trailing slash if there was at least one
349 * canonical segment and the last segment ends with a slash.
351 * But keep in mind that, for URLs, the scheme counts as a
352 * canonical segment -- so if path is ONLY a scheme (such
353 * as "https://") we should NOT remove the trailing slash. */
354 if ((canon_segments > 0 && *(dst - 1) == '/')
355 && ! (url && path[schemelen] == '\0'))
357 dst --;
360 *dst = '\0';
362 #if defined(WIN32) || defined(__CYGWIN__)
363 /* Skip leading double slashes when there are less than 2
364 * canon segments. UNC paths *MUST* have two segments. */
365 if ((type == type_dirent) && canon[0] == '/' && canon[1] == '/')
367 if (canon_segments < 2)
368 return canon + 1;
369 else
371 /* Now we're sure this is a valid UNC path, convert the server name
372 (the first path segment) to lowercase as Windows treats it as case
373 insensitive.
374 Note: normally the share name is treated as case insensitive too,
375 but it seems to be possible to configure Samba to treat those as
376 case sensitive, so better leave that alone. */
377 dst = canon + 2;
378 while (*dst && *dst != '/')
379 *(dst++) = canonicalize_to_lower(*dst);
382 #endif /* WIN32 or Cygwin */
384 return canon;
387 /* Return the string length of the longest common ancestor of PATH1 and PATH2.
388 * Pass type_uri for TYPE if PATH1 and PATH2 are URIs, and type_dirent if
389 * PATH1 and PATH2 are regular paths.
391 * If the two paths do not share a common ancestor, return 0.
393 * New strings are allocated in POOL.
395 static apr_size_t
396 get_longest_ancestor_length(path_type_t types,
397 const char *path1,
398 const char *path2,
399 apr_pool_t *pool)
401 apr_size_t path1_len, path2_len;
402 apr_size_t i = 0;
403 apr_size_t last_dirsep = 0;
404 #if defined(WIN32) || defined(__CYGWIN__)
405 svn_boolean_t unc = FALSE;
406 #endif
408 path1_len = strlen(path1);
409 path2_len = strlen(path2);
411 if (SVN_PATH_IS_EMPTY(path1) || SVN_PATH_IS_EMPTY(path2))
412 return 0;
414 while (path1[i] == path2[i])
416 /* Keep track of the last directory separator we hit. */
417 if (path1[i] == '/')
418 last_dirsep = i;
420 i++;
422 /* If we get to the end of either path, break out. */
423 if ((i == path1_len) || (i == path2_len))
424 break;
427 /* two special cases:
428 1. '/' is the longest common ancestor of '/' and '/foo' */
429 if (i == 1 && path1[0] == '/' && path2[0] == '/')
430 return 1;
431 /* 2. '' is the longest common ancestor of any non-matching
432 * strings 'foo' and 'bar' */
433 if (types == type_dirent && i == 0)
434 return 0;
436 /* Handle some windows specific cases */
437 #if defined(WIN32) || defined(__CYGWIN__)
438 if (types == type_dirent)
440 /* don't count the '//' from UNC paths */
441 if (last_dirsep == 1 && path1[0] == '/' && path1[1] == '/')
443 last_dirsep = 0;
444 unc = TRUE;
447 /* X:/ and X:/foo */
448 if (i == 3 && path1[2] == '/' && path1[1] == ':')
449 return i;
451 /* Cannot use SVN_ERR_ASSERT here, so we'll have to crash, sorry.
452 * Note that this assertion triggers only if the code above has
453 * been broken. The code below relies on this assertion, because
454 * it uses [i - 1] as index. */
455 assert(i > 0);
457 /* X: and X:/ */
458 if ((path1[i - 1] == ':' && path2[i] == '/') ||
459 (path2[i - 1] == ':' && path1[i] == '/'))
460 return 0;
461 /* X: and X:foo */
462 if (path1[i - 1] == ':' || path2[i - 1] == ':')
463 return i;
465 #endif /* WIN32 or Cygwin */
467 /* last_dirsep is now the offset of the last directory separator we
468 crossed before reaching a non-matching byte. i is the offset of
469 that non-matching byte, and is guaranteed to be <= the length of
470 whichever path is shorter.
471 If one of the paths is the common part return that. */
472 if (((i == path1_len) && (path2[i] == '/'))
473 || ((i == path2_len) && (path1[i] == '/'))
474 || ((i == path1_len) && (i == path2_len)))
475 return i;
476 else
478 /* Nothing in common but the root folder '/' or 'X:/' for Windows
479 dirents. */
480 #if defined(WIN32) || defined(__CYGWIN__)
481 if (! unc)
483 /* X:/foo and X:/bar returns X:/ */
484 if ((types == type_dirent) &&
485 last_dirsep == 2 && path1[1] == ':' && path1[2] == '/'
486 && path2[1] == ':' && path2[2] == '/')
487 return 3;
488 #endif
489 if (last_dirsep == 0 && path1[0] == '/' && path2[0] == '/')
490 return 1;
491 #if defined(WIN32) || defined(__CYGWIN__)
493 #endif
496 return last_dirsep;
499 /* Determine whether PATH2 is a child of PATH1.
501 * PATH2 is a child of PATH1 if
502 * 1) PATH1 is empty, and PATH2 is not empty and not an absolute path.
503 * or
504 * 2) PATH2 is has n components, PATH1 has x < n components,
505 * and PATH1 matches PATH2 in all its x components.
506 * Components are separated by a slash, '/'.
508 * Pass type_uri for TYPE if PATH1 and PATH2 are URIs, and type_dirent if
509 * PATH1 and PATH2 are regular paths.
511 * If PATH2 is not a child of PATH1, return NULL.
513 * If PATH2 is a child of PATH1, and POOL is not NULL, allocate a copy
514 * of the child part of PATH2 in POOL and return a pointer to the
515 * newly allocated child part.
517 * If PATH2 is a child of PATH1, and POOL is NULL, return a pointer
518 * pointing to the child part of PATH2.
519 * */
520 static const char *
521 is_child(path_type_t type, const char *path1, const char *path2,
522 apr_pool_t *pool)
524 apr_size_t i;
526 /* Allow "" and "foo" or "H:foo" to be parent/child */
527 if (SVN_PATH_IS_EMPTY(path1)) /* "" is the parent */
529 if (SVN_PATH_IS_EMPTY(path2)) /* "" not a child */
530 return NULL;
532 /* check if this is an absolute path */
533 if ((type == type_uri && svn_uri_is_absolute(path2)) ||
534 (type == type_dirent && svn_dirent_is_absolute(path2)))
535 return NULL;
536 else
537 /* everything else is child */
538 return pool ? apr_pstrdup(pool, path2) : path2;
541 /* Reach the end of at least one of the paths. How should we handle
542 things like path1:"foo///bar" and path2:"foo/bar/baz"? It doesn't
543 appear to arise in the current Subversion code, it's not clear to me
544 if they should be parent/child or not. */
545 /* Hmmm... aren't paths assumed to be canonical in this function?
546 * How can "foo///bar" even happen if the paths are canonical? */
547 for (i = 0; path1[i] && path2[i]; i++)
548 if (path1[i] != path2[i])
549 return NULL;
551 /* FIXME: This comment does not really match
552 * the checks made in the code it refers to: */
553 /* There are two cases that are parent/child
554 ... path1[i] == '\0'
555 .../foo path2[i] == '/'
557 / path1[i] == '\0'
558 /foo path2[i] != '/'
560 Other root paths (like X:/) fall under the former case:
561 X:/ path1[i] == '\0'
562 X:/foo path2[i] != '/'
564 Check for '//' to avoid matching '/' and '//srv'.
566 if (path1[i] == '\0' && path2[i])
568 if (path1[i - 1] == '/'
569 #if defined(WIN32) || defined(__CYGWIN__)
570 || ((type == type_dirent) && path1[i - 1] == ':')
571 #endif /* WIN32 or Cygwin */
574 if (path2[i] == '/')
575 /* .../
576 * ..../
577 * i */
578 return NULL;
579 else
580 /* .../
581 * .../foo
582 * i */
583 return pool ? apr_pstrdup(pool, path2 + i) : path2 + i;
585 else if (path2[i] == '/')
587 if (path2[i + 1])
588 /* ...
589 * .../foo
590 * i */
591 return pool ? apr_pstrdup(pool, path2 + i + 1) : path2 + i + 1;
592 else
593 /* ...
594 * .../
595 * i */
596 return NULL;
600 /* Otherwise, path2 isn't a child. */
601 return NULL;
604 /* FIXME: no doc string */
605 static svn_boolean_t
606 is_ancestor(path_type_t type, const char *path1, const char *path2)
608 apr_size_t path1_len;
610 /* If path1 is empty and path2 is not absolute, then path1 is an ancestor. */
611 if (SVN_PATH_IS_EMPTY(path1))
613 return type == type_uri ? ! svn_uri_is_absolute(path2)
614 : ! svn_dirent_is_absolute(path2);
617 /* If path1 is a prefix of path2, then:
618 - If path1 ends in a path separator,
619 - If the paths are of the same length
621 - path2 starts a new path component after the common prefix,
622 then path1 is an ancestor. */
623 path1_len = strlen(path1);
624 if (strncmp(path1, path2, path1_len) == 0)
625 return path1[path1_len - 1] == '/'
626 #if defined(WIN32) || defined(__CYGWIN__)
627 || ((type == type_dirent) && path1[path1_len - 1] == ':')
628 #endif /* WIN32 or Cygwin */
629 || (path2[path1_len] == '/' || path2[path1_len] == '\0');
631 return FALSE;
635 /**** Public API functions ****/
637 const char *
638 svn_dirent_internal_style(const char *dirent, apr_pool_t *pool)
640 return internal_style(type_dirent, dirent, pool);
643 const char *
644 svn_dirent_local_style(const char *dirent, apr_pool_t *pool)
646 return local_style(type_dirent, dirent, pool);
649 const char *
650 svn_uri_internal_style(const char *uri, apr_pool_t *pool)
652 return internal_style(type_uri, uri, pool);
655 const char *
656 svn_uri_local_style(const char *uri, apr_pool_t *pool)
658 return local_style(type_uri, uri, pool);
661 /* We decided against using apr_filepath_root here because of the negative
662 performance impact (creating a pool and converting strings ). */
663 svn_boolean_t
664 svn_dirent_is_root(const char *dirent, apr_size_t len)
666 /* directory is root if it's equal to '/' */
667 if (len == 1 && dirent[0] == '/')
668 return TRUE;
670 #if defined(WIN32) || defined(__CYGWIN__)
671 /* On Windows and Cygwin, 'H:' or 'H:/' (where 'H' is any letter)
672 are also root directories */
673 if ((len == 2 || len == 3) &&
674 (dirent[1] == ':') &&
675 ((dirent[0] >= 'A' && dirent[0] <= 'Z') ||
676 (dirent[0] >= 'a' && dirent[0] <= 'z')) &&
677 (len == 2 || (dirent[2] == '/' && len == 3)))
678 return TRUE;
680 /* On Windows and Cygwin, both //drive and //server/share are root
681 directories */
682 if (len >= 2 && dirent[0] == '/' && dirent[1] == '/'
683 && dirent[len - 1] != '/')
685 int segments = 0;
686 int i;
687 for (i = len; i >= 2; i--)
689 if (dirent[i] == '/')
691 segments ++;
692 if (segments > 1)
693 return FALSE;
696 return (segments <= 1);
698 #endif /* WIN32 or Cygwin */
700 return FALSE;
703 svn_boolean_t
704 svn_uri_is_root(const char *uri, apr_size_t len)
706 /* directory is root if it's equal to '/' */
707 if (len == 1 && uri[0] == '/')
708 return TRUE;
710 return FALSE;
713 char *svn_dirent_join(const char *base,
714 const char *component,
715 apr_pool_t *pool)
717 apr_size_t blen = strlen(base);
718 apr_size_t clen = strlen(component);
719 char *dirent;
720 int add_separator;
722 assert(svn_dirent_is_canonical(base, pool));
723 assert(svn_dirent_is_canonical(component, pool));
725 /* If the component is absolute, then return it. */
726 if (svn_dirent_is_absolute(component))
727 return apr_pmemdup(pool, component, clen + 1);
729 /* If either is empty return the other */
730 if (SVN_PATH_IS_EMPTY(base))
731 return apr_pmemdup(pool, component, clen + 1);
732 if (SVN_PATH_IS_EMPTY(component))
733 return apr_pmemdup(pool, base, blen + 1);
735 /* if last character of base is already a separator, don't add a '/' */
736 add_separator = 1;
737 if (base[blen - 1] == '/'
738 #if defined(WIN32) || defined(__CYGWIN__)
739 || base[blen - 1] == ':'
740 #endif /* WIN32 or Cygwin */
742 add_separator = 0;
744 /* Construct the new, combined dirent. */
745 dirent = apr_palloc(pool, blen + add_separator + clen + 1);
746 memcpy(dirent, base, blen);
747 if (add_separator)
748 dirent[blen] = '/';
749 memcpy(dirent + blen + add_separator, component, clen + 1);
751 return dirent;
754 char *svn_dirent_join_many(apr_pool_t *pool, const char *base, ...)
756 #define MAX_SAVED_LENGTHS 10
757 apr_size_t saved_lengths[MAX_SAVED_LENGTHS];
758 apr_size_t total_len;
759 int nargs;
760 va_list va;
761 const char *s;
762 apr_size_t len;
763 char *dirent;
764 char *p;
765 int add_separator;
766 int base_arg = 0;
768 total_len = strlen(base);
770 assert(svn_dirent_is_canonical(base, pool));
772 /* if last character of base is already a separator, don't add a '/' */
773 add_separator = 1;
774 if (total_len == 0
775 || base[total_len - 1] == '/'
776 #if defined(WIN32) || defined(__CYGWIN__)
777 || base[total_len - 1] == ':'
778 #endif /* WIN32 or Cygwin */
780 add_separator = 0;
782 saved_lengths[0] = total_len;
784 /* Compute the length of the resulting string. */
786 nargs = 0;
787 va_start(va, base);
788 while ((s = va_arg(va, const char *)) != NULL)
790 len = strlen(s);
792 assert(svn_dirent_is_canonical(s, pool));
794 if (SVN_PATH_IS_EMPTY(s))
795 continue;
797 if (nargs++ < MAX_SAVED_LENGTHS)
798 saved_lengths[nargs] = len;
800 if (svn_dirent_is_absolute(s))
802 /* an absolute dirent. skip all components to this point and reset
803 the total length. */
804 total_len = len;
805 base_arg = nargs;
806 add_separator = 1;
807 if (s[len - 1] == '/'
808 #if defined(WIN32) || defined(__CYGWIN__)
809 || s[len - 1] == ':'
810 #endif /* WIN32 or Cygwin */
812 add_separator = 0;
814 else if (nargs == base_arg + 1)
816 total_len += add_separator + len;
818 else
820 total_len += 1 + len;
823 va_end(va);
825 /* base == "/" and no further components. just return that. */
826 if (add_separator == 0 && total_len == 1)
827 return apr_pmemdup(pool, "/", 2);
829 /* we got the total size. allocate it, with room for a NULL character. */
830 dirent = p = apr_palloc(pool, total_len + 1);
832 /* if we aren't supposed to skip forward to an absolute component, and if
833 this is not an empty base that we are skipping, then copy the base
834 into the output. */
835 if (base_arg == 0 && ! (SVN_PATH_IS_EMPTY(base)))
837 if (SVN_PATH_IS_EMPTY(base))
838 memcpy(p, SVN_EMPTY_PATH, len = saved_lengths[0]);
839 else
840 memcpy(p, base, len = saved_lengths[0]);
841 p += len;
844 nargs = 0;
845 va_start(va, base);
846 while ((s = va_arg(va, const char *)) != NULL)
848 if (SVN_PATH_IS_EMPTY(s))
849 continue;
851 if (++nargs < base_arg)
852 continue;
854 if (nargs < MAX_SAVED_LENGTHS)
855 len = saved_lengths[nargs];
856 else
857 len = strlen(s);
859 /* insert a separator if we aren't copying in the first component
860 (which can happen when base_arg is set). also, don't put in a slash
861 if the prior character is a slash (occurs when prior component
862 is "/"). */
863 if (p != dirent &&
864 ( ! (nargs - 1 == base_arg) || add_separator))
865 *p++ = '/';
867 /* copy the new component and advance the pointer */
868 memcpy(p, s, len);
869 p += len;
871 va_end(va);
873 *p = '\0';
874 assert((apr_size_t)(p - dirent) == total_len);
876 return dirent;
879 char *
880 svn_dirent_dirname(const char *dirent, apr_pool_t *pool)
882 apr_size_t len = strlen(dirent);
884 assert(svn_dirent_is_canonical(dirent, pool));
886 if (svn_dirent_is_root(dirent, len))
887 return apr_pstrmemdup(pool, dirent, len);
888 else
889 return apr_pstrmemdup(pool, dirent, dirent_previous_segment(dirent, len));
892 char *
893 svn_uri_dirname(const char *uri, apr_pool_t *pool)
895 apr_size_t len = strlen(uri);
897 assert(svn_uri_is_canonical(uri, pool));
899 if (svn_uri_is_root(uri, len))
900 return apr_pstrmemdup(pool, uri, len);
901 else
902 return apr_pstrmemdup(pool, uri, uri_previous_segment(uri, len));
905 char *
906 svn_dirent_get_longest_ancestor(const char *dirent1,
907 const char *dirent2,
908 apr_pool_t *pool)
910 return apr_pstrndup(pool, dirent1,
911 get_longest_ancestor_length(type_dirent, dirent1,
912 dirent2, pool));
915 char *
916 svn_uri_get_longest_ancestor(const char *uri1,
917 const char *uri2,
918 apr_pool_t *pool)
920 svn_boolean_t uri1_is_url, uri2_is_url;
921 uri1_is_url = svn_path_is_url(uri1);
922 uri2_is_url = svn_path_is_url(uri2);
924 if (uri1_is_url && uri2_is_url)
926 apr_size_t uri_ancestor_len;
927 apr_size_t i = 0;
929 /* Find ':' */
930 while (1)
932 /* No shared protocol => no common prefix */
933 if (uri1[i] != uri2[i])
934 return apr_pmemdup(pool, SVN_EMPTY_PATH,
935 sizeof(SVN_EMPTY_PATH));
937 if (uri1[i] == ':')
938 break;
940 /* They're both URLs, so EOS can't come before ':' */
941 assert((uri1[i] != '\0') && (uri2[i] != '\0'));
943 i++;
946 i += 3; /* Advance past '://' */
948 uri_ancestor_len = get_longest_ancestor_length(type_uri, uri1 + i,
949 uri2 + i, pool);
951 if (uri_ancestor_len == 0 ||
952 (uri_ancestor_len == 1 && (uri1 + i)[0] == '/'))
953 return apr_pmemdup(pool, SVN_EMPTY_PATH, sizeof(SVN_EMPTY_PATH));
954 else
955 return apr_pstrndup(pool, uri1, uri_ancestor_len + i);
958 else if ((! uri1_is_url) && (! uri2_is_url))
960 return apr_pstrndup(pool, uri1,
961 get_longest_ancestor_length(type_uri, uri1, uri2,
962 pool));
965 else
967 /* A URL and a non-URL => no common prefix */
968 return apr_pmemdup(pool, SVN_EMPTY_PATH, sizeof(SVN_EMPTY_PATH));
972 const char *
973 svn_dirent_is_child(const char *dirent1,
974 const char *dirent2,
975 apr_pool_t *pool)
977 return is_child(type_dirent, dirent1, dirent2, pool);
980 const char *
981 svn_uri_is_child(const char *uri1,
982 const char *uri2,
983 apr_pool_t *pool)
985 return is_child(type_uri, uri1, uri2, pool);
988 svn_boolean_t
989 svn_dirent_is_ancestor(const char *dirent1, const char *dirent2)
991 return is_ancestor(type_dirent, dirent1, dirent2);
994 svn_boolean_t
995 svn_uri_is_ancestor(const char *uri1, const char *uri2)
997 return is_ancestor(type_uri, uri1, uri2);
1000 svn_boolean_t
1001 svn_dirent_is_absolute(const char *dirent)
1003 if (! dirent)
1004 return FALSE;
1006 /* dirent is absolute if it starts with '/' */
1007 if (dirent[0] == '/')
1008 return TRUE;
1010 /* On Windows, dirent is also absolute when it starts with 'H:' or 'H:/'
1011 where 'H' is any letter. */
1012 #if defined(WIN32) || defined(__CYGWIN__)
1013 if (((dirent[0] >= 'A' && dirent[0] <= 'Z') ||
1014 (dirent[0] >= 'a' && dirent[0] <= 'z')) &&
1015 (dirent[1] == ':'))
1016 return TRUE;
1017 #endif /* WIN32 or Cygwin */
1019 return FALSE;
1022 svn_boolean_t
1023 svn_uri_is_absolute(const char *uri)
1025 /* uri is absolute if it starts with '/' */
1026 if (uri && uri[0] == '/')
1027 return TRUE;
1029 /* URLs are absolute. */
1030 return svn_path_is_url(uri);
1033 svn_error_t *
1034 svn_dirent_get_absolute(const char **pabsolute,
1035 const char *relative,
1036 apr_pool_t *pool)
1038 char *buffer;
1039 apr_status_t apr_err;
1040 const char *path_apr;
1042 /* Merge the current working directory with the relative dirent. */
1043 SVN_ERR(svn_path_cstring_from_utf8(&path_apr, relative, pool));
1045 apr_err = apr_filepath_merge(&buffer, NULL,
1046 path_apr,
1047 APR_FILEPATH_NOTRELATIVE,
1048 pool);
1049 if (apr_err)
1050 return svn_error_createf(SVN_ERR_BAD_FILENAME, NULL,
1051 _("Couldn't determine absolute path of '%s'"),
1052 svn_path_local_style(relative, pool));
1054 SVN_ERR(svn_path_cstring_to_utf8(pabsolute, buffer, pool));
1055 *pabsolute = svn_dirent_canonicalize(*pabsolute, pool);
1056 return SVN_NO_ERROR;
1059 const char *
1060 svn_uri_canonicalize(const char *uri, apr_pool_t *pool)
1062 return canonicalize(type_uri, uri, pool);;
1065 const char *
1066 svn_dirent_canonicalize(const char *dirent, apr_pool_t *pool)
1068 const char *dst = canonicalize(type_dirent, dirent, pool);;
1070 #if defined(WIN32) || defined(__CYGWIN__)
1071 /* Handle a specific case on Windows where path == "X:/". Here we have to
1072 append the final '/', as svn_path_canonicalize will chop this of. */
1073 if (((dirent[0] >= 'A' && dirent[0] <= 'Z') ||
1074 (dirent[0] >= 'a' && dirent[0] <= 'z')) &&
1075 dirent[1] == ':' && dirent[2] == '/' &&
1076 dst[3] == '\0')
1078 char *dst_slash = apr_pcalloc(pool, 4);
1079 dst_slash[0] = dirent[0];
1080 dst_slash[1] = ':';
1081 dst_slash[2] = '/';
1082 dst_slash[3] = '\0';
1084 return dst_slash;
1086 #endif /* WIN32 or Cygwin */
1088 return dst;
1091 svn_boolean_t
1092 svn_dirent_is_canonical(const char *dirent, apr_pool_t *pool)
1094 return (strcmp(dirent, svn_dirent_canonicalize(dirent, pool)) == 0);
1097 svn_boolean_t
1098 svn_uri_is_canonical(const char *uri, apr_pool_t *pool)
1100 const char *ptr = uri, *seg = uri;
1102 /* URI is canonical if it has:
1103 * - no '.' segments
1104 * - no closing '/', unless for the root path '/' itself
1105 * - no '//'
1106 * - lowercase URL scheme
1107 * - lowercase URL hostname
1110 if (*uri == '\0')
1111 return TRUE;
1113 /* Maybe parse hostname and scheme. */
1114 if (*ptr != '/')
1116 while (*ptr && (*ptr != '/') && (*ptr != ':'))
1117 ptr++;
1119 if (*ptr == ':' && *(ptr+1) == '/' && *(ptr+2) == '/')
1121 /* Found a scheme, check that it's all lowercase. */
1122 ptr = uri;
1123 while (*ptr != ':')
1125 if (*ptr >= 'A' && *ptr <= 'Z')
1126 return FALSE;
1127 ptr++;
1129 /* Skip :// */
1130 ptr += 3;
1132 /* This might be the hostname */
1133 seg = ptr;
1134 while (*ptr && (*ptr != '/') && (*ptr != '@'))
1135 ptr++;
1137 if (! *ptr)
1138 return TRUE;
1140 if (*ptr == '@')
1141 seg = ptr + 1;
1143 /* Found a hostname, check that it's all lowercase. */
1144 ptr = seg;
1145 while (*ptr && *ptr != '/')
1147 if (*ptr >= 'A' && *ptr <= 'Z')
1148 return FALSE;
1149 ptr++;
1152 else
1154 /* Didn't find a scheme; finish the segment. */
1155 while (*ptr && *ptr != '/')
1156 ptr++;
1160 #if defined(WIN32) || defined(__CYGWIN__)
1161 if (*ptr == '/')
1163 /* If this is a file url, ptr now points to the third '/' in
1164 file:///C:/path. Check that if we have such a URL the drive
1165 letter is in uppercase. */
1166 if (strncmp(uri, "file:", 5) == 0 &&
1167 ! (*(ptr+1) >= 'A' && *(ptr+1) <= 'Z') &&
1168 *(ptr+2) == ':')
1169 return FALSE;
1171 #endif /* WIN32 or Cygwin */
1173 /* Now validate the rest of the URI. */
1174 while(1)
1176 int seglen = ptr - seg;
1178 if (seglen == 1 && *seg == '.')
1179 return FALSE; /* /./ */
1181 if (*ptr == '/' && *(ptr+1) == '/')
1182 return FALSE; /* // */
1184 if (! *ptr && *(ptr - 1) == '/' && ptr - 1 != uri)
1185 return FALSE; /* foo/ */
1187 if (! *ptr)
1188 break;
1190 if (*ptr == '/')
1191 ptr++;
1192 seg = ptr;
1194 while (*ptr && (*ptr != '/'))
1195 ptr++;
1198 return TRUE;