sed(1): Revert r303047 "cleanup" and therefore r303572.
[freebsd-src.git] / usr.bin / sort / vsort.c
blob5d15f9af36960369019c9ff234dc15746417a6b7
1 /*-
2 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
3 * Copyright (C) 2012 Gabor Kovesdan <gabor@FreeBSD.org>
4 * All rights reserved.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
31 #include <sys/types.h>
33 #include <ctype.h>
34 #include <stdlib.h>
35 #include <string.h>
37 #include "sort.h"
38 #include "vsort.h"
40 static inline bool
41 isdigit_clocale(wchar_t c)
44 return (c >= L'0' && c <= L'9');
47 static inline bool
48 isalpha_clocale(wchar_t c)
51 return ((c >= L'a' && c <= L'z') || (c >= L'A' && c <= L'Z'));
54 static inline bool
55 isalnum_clocale(wchar_t c)
58 return ((c >= L'a' && c <= L'z') || (c >= L'A' && c <= L'Z') ||
59 (c >= L'0' && c <= L'9'));
63 * Find string suffix of format: (\.[A-Za-z~][A-Za-z0-9~]*)*$
64 * Set length of string before suffix.
66 static void
67 find_suffix(bwstring_iterator si, bwstring_iterator se, size_t *len)
69 wchar_t c;
70 size_t clen;
71 bool expect_alpha, sfx;
73 sfx = false;
74 expect_alpha = false;
75 *len = 0;
76 clen = 0;
78 while ((si < se) && (c = bws_get_iter_value(si))) {
79 if (expect_alpha) {
80 expect_alpha = false;
81 if (!isalpha_clocale(c) && (c != L'~'))
82 sfx = false;
83 } else if (c == L'.') {
84 expect_alpha = true;
85 if (!sfx) {
86 sfx = true;
87 *len = clen;
89 } else if (!isalnum_clocale(c) && (c != L'~'))
90 sfx = false;
92 si = bws_iterator_inc(si, 1);
93 ++clen;
96 /* This code must be here to make the implementation compatible
97 * with WORDING of GNU sort documentation.
98 * But the GNU sort implementation is not following its own
99 * documentation. GNU sort allows empty file extensions
100 * (just dot with nothing after); but the regular expression in
101 * their documentation does not allow empty file extensions.
102 * We chose to make our implementation compatible with GNU sort
103 * implementation. If they will ever fix their bug, this code
104 * must be uncommented. Or they may choose to fix the info page,
105 * then the code stays commented.
107 if (expect_alpha)
108 sfx = false;
111 if (!sfx)
112 *len = clen;
115 static inline int
116 cmp_chars(wchar_t c1, wchar_t c2)
119 if (c1 == c2)
120 return (0);
122 if (c1 == L'~')
123 return (-1);
124 if (c2 == L'~')
125 return (+1);
127 if (isdigit_clocale(c1) || !c1)
128 return ((isdigit_clocale(c2) || !c2) ? 0 : -1);
130 if (isdigit_clocale(c2) || !c2)
131 return (+1);
133 if (isalpha_clocale(c1))
134 return ((isalpha_clocale(c2)) ? ((int) c1 - (int) c2) : -1);
136 if (isalpha_clocale(c2))
137 return (+1);
139 return ((int) c1 - (int) c2);
142 static int
143 cmpversions(bwstring_iterator si1, bwstring_iterator se1,
144 bwstring_iterator si2, bwstring_iterator se2)
146 int cmp, diff;
148 while ((si1 < se1) || (si2 < se2)) {
149 diff = 0;
151 while (((si1 < se1) &&
152 !isdigit_clocale(bws_get_iter_value(si1))) ||
153 ((si2 < se2) && !isdigit_clocale(bws_get_iter_value(si2)))) {
154 wchar_t c1, c2;
156 c1 = (si1 < se1) ? bws_get_iter_value(si1) : 0;
157 c2 = (si2 < se2) ? bws_get_iter_value(si2) : 0;
159 cmp = cmp_chars(c1, c2);
160 if (cmp)
161 return (cmp);
163 if (si1 < se1)
164 si1 = bws_iterator_inc(si1, 1);
165 if (si2 < se2)
166 si2 = bws_iterator_inc(si2, 1);
169 while (bws_get_iter_value(si1) == L'0')
170 si1 = bws_iterator_inc(si1, 1);
172 while (bws_get_iter_value(si2) == L'0')
173 si2 = bws_iterator_inc(si2, 1);
175 while (isdigit_clocale(bws_get_iter_value(si1)) &&
176 isdigit_clocale(bws_get_iter_value(si2))) {
177 if (!diff)
178 diff = ((int)bws_get_iter_value(si1) -
179 (int)bws_get_iter_value(si2));
180 si1 = bws_iterator_inc(si1, 1);
181 si2 = bws_iterator_inc(si2, 1);
184 if (isdigit_clocale(bws_get_iter_value(si1)))
185 return (1);
187 if (isdigit_clocale(bws_get_iter_value(si2)))
188 return (-1);
190 if (diff)
191 return (diff);
194 return (0);
198 * Compare two version strings
201 vcmp(struct bwstring *s1, struct bwstring *s2)
203 bwstring_iterator si1, si2;
204 wchar_t c1, c2;
205 size_t len1, len2, slen1, slen2;
206 int cmp_bytes, cmp_res;
208 if (s1 == s2)
209 return (0);
211 cmp_bytes = bwscmp(s1, s2, 0);
212 if (cmp_bytes == 0)
213 return (0);
215 len1 = slen1 = BWSLEN(s1);
216 len2 = slen2 = BWSLEN(s2);
218 if (slen1 < 1)
219 return (-1);
220 if (slen2 < 1)
221 return (+1);
223 si1 = bws_begin(s1);
224 si2 = bws_begin(s2);
226 c1 = bws_get_iter_value(si1);
227 c2 = bws_get_iter_value(si2);
229 if (c1 == L'.' && (slen1 == 1))
230 return (-1);
232 if (c2 == L'.' && (slen2 == 1))
233 return (+1);
235 if (slen1 == 2 && c1 == L'.' &&
236 bws_get_iter_value(bws_iterator_inc(si1, 1)) == L'.')
237 return (-1);
238 if (slen2 == 2 && c2 == L'.' &&
239 bws_get_iter_value(bws_iterator_inc(si2, 1)) == L'.')
240 return (+1);
242 if (c1 == L'.' && c2 != L'.')
243 return (-1);
244 if (c1 != L'.' && c2 == L'.')
245 return (+1);
247 if (c1 == L'.' && c2 == L'.') {
248 si1 = bws_iterator_inc(si1, 1);
249 si2 = bws_iterator_inc(si2, 1);
252 find_suffix(si1, bws_end(s1), &len1);
253 find_suffix(si2, bws_end(s2), &len2);
255 if ((len1 == len2) && (bws_iterator_cmp(si1, si2, len1) == 0))
256 return (cmp_bytes);
258 cmp_res = cmpversions(si1, bws_iterator_inc(si1, len1), si2,
259 bws_iterator_inc(si2, len2));
261 if (cmp_res == 0)
262 cmp_res = cmp_bytes;
264 return (cmp_res);