2 * sort.c - comparison and sorting of strings
4 * This file is part of zsh, the Z shell.
6 * Copyright (c) 1992-2007 Paul Falstad
9 * Permission is hereby granted, without written agreement and without
10 * license or royalty fees, to use, copy, modify, and distribute this
11 * software and to distribute modified versions of this software for any
12 * purpose, provided that the above copyright notice and the following
13 * two paragraphs appear in all copies of this software.
15 * In no event shall Paul Falstad or the Zsh Development Group be liable
16 * to any party for direct, indirect, special, incidental, or consequential
17 * damages arising out of the use of this software and its documentation,
18 * even if Paul Falstad and the Zsh Development Group have been advised of
19 * the possibility of such damage.
21 * Paul Falstad and the Zsh Development Group specifically disclaim any
22 * warranties, including, but not limited to, the implied warranties of
23 * merchantability and fitness for a particular purpose. The software
24 * provided hereunder is on an "as is" basis, and Paul Falstad and the
25 * Zsh Development Group have no obligation to provide maintenance,
26 * support, updates, enhancements, or modifications.
33 /* Flag for direction of sort: 1 forwards, -1 reverse */
36 /* Flag that sort is numeric */
37 static int sortnumeric
;
41 eltpcmp(const void *a
, const void *b
)
43 const SortElt ae
= *(const SortElt
*)a
;
44 const SortElt be
= *(const SortElt
*)b
;
45 const char *as
= ae
->cmp
, *bs
= be
->cmp
;
49 if (ae
->len
!= -1 || be
->len
!= -1) {
51 * Length recorded. We only do that if there are embedded
52 * nulls we need to treat as regular characters.
54 * Since we don't know where multibyte characters start,
55 * but do know that a null character can't occur inside
56 * one (we are relying on multibyte characters being extensions
57 * of ASCII), we can compare starting from after the last
58 * null character that occurred in both strings.
60 const char *cmpa
, *cmpb
;
61 const char *laststarta
= as
;
65 if (be
->len
!= -1 && len
> be
->len
)
71 for (cmpa
= as
, cmpb
= bs
; *cmpa
== *cmpb
&& len
--; cmpa
++, cmpb
++) {
74 * If either string doesn't have a length, we've reached
75 * the end. This is covered in the test below.
77 if (ae
->len
== -1 || be
->len
== -1)
79 laststarta
= cmpa
+ 1;
82 if (*cmpa
== *cmpb
&& ae
->len
!= be
->len
) {
84 * Special case: one of the strings has finished, but
85 * another one continues after the NULL. The string
86 * that's finished sorts below the other. We need
87 * to handle this here since strcoll() or strcmp()
88 * will just compare the strings as equal.
93 * if a is shorter it's earlier, so return -1 and
96 return (ae
->len
- be
->len
) * sortdir
;
99 * a has a length and so continues, hence
106 * b must continue because it has a recorded length,
113 bs
+= (laststarta
- as
);
114 as
+= (laststarta
- as
);
117 cmp
= strcoll(as
, bs
);
120 for (; *as
== *bs
&& *as
; as
++, bs
++);
122 cmp
= (int)STOUC(*as
) - (int)STOUC(*bs
);
124 if (idigit(*as
) || idigit(*bs
)) {
125 for (; as
> ao
&& idigit(as
[-1]); as
--, bs
--);
126 if (idigit(*as
) && idigit(*bs
)) {
131 for (; idigit(*as
) && *as
== *bs
; as
++, bs
++);
132 if (idigit(*as
) || idigit(*bs
)) {
133 cmp
= (int)STOUC(*as
) - (int)STOUC(*bs
);
134 while (idigit(*as
) && idigit(*bs
))
136 if (idigit(*as
) && !idigit(*bs
))
138 if (idigit(*bs
) && !idigit(*as
))
146 cmp
= strcmp(as
, bs
);
149 return sortdir
* cmp
;
154 * Front-end to eltpcmp() to compare strings.
155 * TODO: it would be better to eliminate this altogether by
156 * making the calling function call into the sort code
162 zstrcmp(const char *as
, const char *bs
, int sortflags
)
164 struct sortelt ae
, be
, *aeptr
, *beptr
;
165 int oldsortdir
= sortdir
, oldsortnumeric
= sortnumeric
, ret
;
176 sortnumeric
= (sortflags
& SORTIT_NUMERICALLY
) ? 1 : 0;
178 ret
= eltpcmp(&aeptr
, &beptr
);
180 /* Paranoia: I don't think we ever need to restore these. */
181 sortnumeric
= oldsortnumeric
;
182 sortdir
= oldsortdir
;
189 * Sort an array of metafied strings. Use an "or" of bit flags
190 * to decide how to sort. See the SORTIT_* flags in zsh.h.
192 * If unmetalenp is not NULL, the strings in array are already
193 * unmetafied and unmetalenp is an array containing the corresponding
199 strmetasort(char **array
, int sortwhat
, int *unmetalenp
)
203 * Array of elements containing stuff to sort. Note sortptrarr
204 * is an array of pointers, since that's more efficient
205 * for qsort() to manipulate. sortarr is the array of
208 SortElt
*sortptrarr
, *sortptrarrptr
;
209 SortElt sortarr
, sortarrptr
;
210 int oldsortdir
, oldsortnumeric
, nsort
;
212 nsort
= arrlen(array
);
218 sortptrarr
= (SortElt
*) zhalloc(nsort
* sizeof(SortElt
));
219 sortarr
= (SortElt
) zhalloc(nsort
* sizeof(struct sortelt
));
220 for (arrptr
= array
, sortptrarrptr
= sortptrarr
, sortarrptr
= sortarr
;
221 *arrptr
; arrptr
++, sortptrarrptr
++, sortarrptr
++) {
223 int needlen
, needalloc
;
224 *sortptrarrptr
= sortarrptr
;
225 sortarrptr
->orig
= *arrptr
;
229 * Already unmetafied. We just need to check for
232 int count
= unmetalenp
[arrptr
-array
];
233 /* Remember this length for sorted array */
234 sortarrptr
->origlen
= count
;
235 for (metaptr
= *arrptr
; *metaptr
!= '\0' && count
--; metaptr
++)
237 /* *metaptr must now be \0, even if we reached the end */
238 needlen
= (count
!= 0);
241 * Not yet unmetafied. See if it needs unmetafying.
242 * If it doesn't, there can't be any embedded nulls,
243 * since these are metafied.
246 for (metaptr
= *arrptr
; *metaptr
&& *metaptr
!= Meta
;
250 * See if we need to do some special checking.
251 * Either we're going to need to copy it to transform it,
252 * or we need to unmetafy it.
254 if ((needalloc
= (sortwhat
&
255 (SORTIT_IGNORING_CASE
|SORTIT_IGNORING_BACKSLASHES
)))
256 || *metaptr
== Meta
) {
257 char *s
, *t
, *src
= *arrptr
, *dst
;
259 sortarrptr
->cmp
= dst
=
260 (char *)zhalloc(((sortwhat
& SORTIT_IGNORING_CASE
)?2:1)*strlen(src
)+1);
263 /* Already unmetafied and we have the length. */
264 len
= unmetalenp
[arrptr
-array
];
265 } else if (*metaptr
!= '\0') {
267 * Needs unmetafying. We need to check for
268 * embedded nulls while we do this.
270 char *t
= dst
+ (metaptr
- src
);
273 memcpy(dst
, src
, metaptr
- src
);
274 while ((*t
= *metaptr
++)) {
276 if ((t
[-1] = *metaptr
++ ^ 32) == '\0')
284 * Doesn't need unmetafying.
285 * This means metaptr is the NULL at the
286 * end of the string, so we have the length, and
287 * there are no embedded nulls, so we don't
288 * need the length later.
289 * We're only copying the string to transform it
294 if (sortwhat
& SORTIT_IGNORING_CASE
) {
295 char *send
= src
+ len
;
296 #ifdef MULTIBYTE_SUPPORT
297 if (isset(MULTIBYTE
)) {
299 * Lower the case the hard way. Convert to a wide
300 * character, process that, and convert back. We
301 * don't assume the characters have the same
302 * multibyte length. We can't use casemodify()
303 * because we have unmetafied data, which may have
304 * been passed down to use.
306 mbstate_t mbsin
, mbsout
;
309 memset(&mbsin
, 0, sizeof(mbstate_t));
310 memset(&mbsout
, 0, sizeof(mbstate_t));
312 for (s
= src
, t
= dst
; s
< send
; ) {
313 clen
= mbrtowc(&wc
, s
, send
-s
, &mbsin
);
315 /* invalid or unfinished: treat as single bytes */
317 *t
++ = tulower(*s
++);
328 clen
= wcrtomb(t
, wc
, &mbsout
);
330 DPUTS(clen
< 0, "Bad conversion when lowering case");
336 for (s
= src
, t
= dst
; s
< send
; )
337 *t
++ = tulower(*s
++);
340 if (sortwhat
& SORTIT_IGNORING_BACKSLASHES
) {
341 char *end
= src
+ len
+ 1;
342 /* copy null byte, so increment length */
343 for (s
= src
, t
= dst
; s
< end
; ) {
351 /* Do we need to store the length (embedded null)? */
352 sortarrptr
->len
= needlen
? len
: -1;
355 * We can use the string as is, although it's possible
356 * we still need to take account of an embedded null.
358 sortarrptr
->cmp
= *arrptr
;
359 sortarrptr
->len
= needlen
? unmetalenp
[arrptr
-array
] : -1;
363 * We probably don't need to restore the following, but it's pretty cheap.
365 oldsortdir
= sortdir
;
366 oldsortnumeric
= sortnumeric
;
368 sortdir
= (sortwhat
& SORTIT_BACKWARDS
) ? -1 : 1;
369 sortnumeric
= (sortwhat
& SORTIT_NUMERICALLY
) ? 1 : 0;
371 qsort(sortptrarr
, nsort
, sizeof(SortElt
*), eltpcmp
);
373 sortnumeric
= oldsortnumeric
;
374 sortdir
= oldsortdir
;
375 for (arrptr
= array
, sortptrarrptr
= sortptrarr
; nsort
--; ) {
377 unmetalenp
[arrptr
-array
] = (*sortptrarrptr
)->origlen
;
378 *arrptr
++ = (*sortptrarrptr
++)->orig
;