Optimize out-of-line string CONCATENATE, part 2.
[sbcl.git] / src / code / win32-pathname.lisp
blobbfd9447b316ff9ae9ba4ba8881ac32c7ddc81ad2
1 ;;;; pathname parsing for Win32 filesystems
3 ;;;; This software is part of the SBCL system. See the README file for
4 ;;;; more information.
5 ;;;;
6 ;;;; This software is derived from the CMU CL system, which was
7 ;;;; written at Carnegie Mellon University and released into the
8 ;;;; public domain. The software is in the public domain and is
9 ;;;; provided with absolutely no warranty. See the COPYING and CREDITS
10 ;;;; files for more information.
12 (in-package "SB!IMPL")
14 (defstruct (win32-host
15 (:include host
16 (parse #'parse-win32-namestring)
17 (parse-native #'parse-native-win32-namestring)
18 (unparse #'unparse-win32-namestring)
19 (unparse-native #'unparse-native-win32-namestring)
20 (unparse-host #'unparse-win32-host)
21 (unparse-directory #'unparse-win32-directory)
22 (unparse-file #'unparse-win32-file)
23 (unparse-enough #'unparse-win32-enough)
24 (unparse-directory-separator "\\")
25 (simplify-namestring #'simplify-win32-namestring)
26 (customary-case :lower))))
28 (defvar *physical-host* (make-win32-host))
30 ;;;
31 (define-symbol-macro +long-file-name-prefix+ (quote "\\\\?\\"))
32 (define-symbol-macro +unc-file-name-prefix+ (quote "\\\\?\\UNC"))
34 (defun extract-device (namestr start end)
35 (declare (type simple-string namestr)
36 (type index start end))
37 (if (>= end (+ start 2))
38 (let ((c0 (char namestr start))
39 (c1 (char namestr (1+ start))))
40 (cond ((and (eql c1 #\:) (alpha-char-p c0))
41 ;; "X:" style, saved as X
42 (values (string (char namestr start)) (+ start 2)))
43 ((and (member c0 '(#\/ #\\)) (eql c0 c1) (>= end (+ start 3)))
44 ;; "//UNC" style, saved as :UNC device, with host and share
45 ;; becoming directory components.
46 (values :unc (+ start 1)))
48 (values nil start))))
49 (values nil start)))
51 (defun split-at-slashes-and-backslashes (namestr start end)
52 (declare (type simple-string namestr)
53 (type index start end))
54 ;; FIXME: There is a fundamental brokenness in using the same
55 ;; character as escape character and directory separator in
56 ;; non-native pathnames. (PATHNAME-DIRECTORY #P"\\*/") should
57 ;; probably be (:RELATIVE "*") everywhere, but on Windows it's
58 ;; (:ABSOLUTE :WILD)! See lp#673625.
59 (let ((absolute (and (/= start end)
60 (or (char= (schar namestr start) #\/)
61 (char= (schar namestr start) #\\)))))
62 (when absolute
63 (incf start))
64 ;; Next, split the remainder into slash-separated chunks.
65 (collect ((pieces))
66 (loop
67 (let ((slash (position-if (lambda (c)
68 (or (char= c #\/)
69 (char= c #\\)))
70 namestr :start start :end end)))
71 (pieces (cons start (or slash end)))
72 (unless slash
73 (return))
74 (setf start (1+ slash))))
75 (values absolute (pieces)))))
77 (defun parse-win32-namestring (namestring start end)
78 (declare (type simple-string namestring)
79 (type index start end))
80 (setf namestring (coerce namestring 'simple-string))
81 (multiple-value-bind (device new-start)
82 (extract-device namestring start end)
83 (multiple-value-bind (absolute pieces)
84 (split-at-slashes-and-backslashes namestring new-start end)
85 (multiple-value-bind (name type version)
86 (let* ((tail (car (last pieces)))
87 (tail-start (car tail))
88 (tail-end (cdr tail)))
89 (unless (= tail-start tail-end)
90 (setf pieces (butlast pieces))
91 (extract-name-type-and-version namestring tail-start tail-end #\^)))
93 (when (stringp name)
94 (let ((position (position-if (lambda (char)
95 (or (char= char (code-char 0))
96 (char= char #\/)))
97 name)))
98 (when position
99 (error 'namestring-parse-error
100 :complaint "can't embed #\\Nul or #\\/ in Windows namestring"
101 :namestring namestring
102 :offset position))))
104 (let (home)
105 ;; Deal with ~ and ~user.
106 (when (car pieces)
107 (destructuring-bind (start . end) (car pieces)
108 (when (and (not absolute)
109 (not (eql start end))
110 (string= namestring "~"
111 :start1 start
112 :end1 (1+ start)))
113 (setf absolute t)
114 (if (> end (1+ start))
115 (setf home (list :home (subseq namestring (1+ start) end)))
116 (setf home :home))
117 (pop pieces))))
119 ;; Now we have everything we want. So return it.
120 (values nil ; no host for Win32 namestrings
121 device
122 (collect ((dirs))
123 (dolist (piece pieces)
124 (let ((piece-start (car piece))
125 (piece-end (cdr piece)))
126 (unless (= piece-start piece-end)
127 (cond ((string= namestring ".."
128 :start1 piece-start
129 :end1 piece-end)
130 (dirs :up))
131 ((string= namestring "**"
132 :start1 piece-start
133 :end1 piece-end)
134 (dirs :wild-inferiors))
136 (dirs (maybe-make-pattern namestring
137 piece-start
138 piece-end
139 #\^)))))))
140 (cond (absolute
141 (if home
142 (list* :absolute home (dirs))
143 (cons :absolute (dirs))))
144 ((dirs)
145 (cons :relative (dirs)))
147 nil)))
148 name
149 type
150 version))))))
152 (defun parse-native-win32-namestring (namestring start end as-directory)
153 (declare (type simple-string namestring)
154 (type index start end))
155 (setf namestring (coerce namestring 'simple-string))
156 (multiple-value-bind (device new-start)
157 (cond ((= (length +unc-file-name-prefix+)
158 (mismatch +unc-file-name-prefix+ namestring
159 :start2 start))
160 (values :unc (+ start (length +unc-file-name-prefix+))))
161 ((= (length +long-file-name-prefix+)
162 (mismatch +long-file-name-prefix+ namestring
163 :start2 start))
164 (extract-device namestring
165 (+ start (length +long-file-name-prefix+))
166 end))
167 (t (extract-device namestring start end)))
168 (multiple-value-bind (absolute ranges)
169 (split-at-slashes-and-backslashes namestring new-start end)
170 (let* ((components (loop for ((start . end) . rest) on ranges
171 for piece = (subseq namestring start end)
172 collect (if (and (string= piece "..") rest)
174 piece)))
175 (directory (if (and as-directory
176 (string/= "" (car (last components))))
177 components
178 (butlast components)))
179 (name-and-type
180 (unless as-directory
181 (let* ((end (first (last components)))
182 (dot (position #\. end :from-end t)))
183 ;; FIXME: can we get this dot-interpretation knowledge
184 ;; from existing code? EXTRACT-NAME-TYPE-AND-VERSION
185 ;; does slightly more work than that.
186 (cond
187 ((string= end "")
188 (list nil nil))
189 ((and dot (> dot 0))
190 (list (subseq end 0 dot) (subseq end (1+ dot))))
192 (list end nil)))))))
193 (values nil
194 device
195 (cons (if absolute :absolute :relative) directory)
196 (first name-and-type)
197 (second name-and-type)
198 nil)))))
202 (defun unparse-win32-host (pathname)
203 (declare (type pathname pathname)
204 (ignore pathname))
205 ;; FIXME: same as UNPARSE-UNIX-HOST. That's probably not good.
208 (defun unparse-win32-device (pathname &optional native)
209 (declare (type pathname pathname))
210 (let ((device (pathname-device pathname))
211 (directory (pathname-directory pathname)))
212 (cond ((or (null device) (eq device :unspecific))
214 ((eq device :unc)
215 (if native "\\" "/"))
216 ((and (= 1 (length device)) (alpha-char-p (char device 0)))
217 (concatenate 'simple-string device ":"))
218 ((and (consp directory) (eq :relative (car directory)))
219 (error "No printed representation for a relative UNC pathname."))
221 (if native
222 (concatenate 'simple-string "\\\\" device)
223 (concatenate 'simple-string "//" device))))))
225 (defun unparse-win32-directory (pathname)
226 (unparse-physical-directory pathname #\^))
228 (defun unparse-win32-file (pathname)
229 (declare (type pathname pathname))
230 (collect ((strings))
231 (let* ((name (%pathname-name pathname))
232 (type (%pathname-type pathname))
233 (type-supplied (not (or (null type) (eq type :unspecific)))))
234 ;; Note: by ANSI 19.3.1.1.5, we ignore the version slot when
235 ;; translating logical pathnames to a filesystem without
236 ;; versions (like Win32).
237 (when name
238 (when (and (null type)
239 (typep name 'string)
240 (> (length name) 0)
241 (position #\. name :start 1))
242 (error "too many dots in the name: ~S" pathname))
243 (when (and (typep name 'string)
244 (string= name ""))
245 (error "name is of length 0: ~S" pathname))
246 (strings (unparse-physical-piece name #\^)))
247 (when type-supplied
248 (unless name
249 (error "cannot specify the type without a file: ~S" pathname))
250 (when (typep type 'simple-string)
251 (when (position #\. type)
252 (error "type component can't have a #\. inside: ~S" pathname)))
253 (strings ".")
254 (strings (unparse-physical-piece type #\^))))
255 (apply #'concatenate 'simple-string (strings))))
257 (defun unparse-win32-namestring (pathname)
258 (declare (type pathname pathname))
259 (concatenate 'simple-string
260 (unparse-win32-device pathname)
261 (unparse-physical-directory pathname #\^)
262 (unparse-win32-file pathname)))
264 (defun unparse-native-win32-namestring (pathname as-file)
265 (declare (type pathname pathname))
266 (let* ((device (pathname-device pathname))
267 (directory (pathname-directory pathname))
268 (name (pathname-name pathname))
269 (name-present-p (typep name '(not (member nil :unspecific))))
270 (name-string (if name-present-p name ""))
271 (type (pathname-type pathname))
272 (type-present-p (typep type '(not (member nil :unspecific))))
273 (type-string (if type-present-p type ""))
274 (absolutep (and device (eql :absolute (car directory)))))
275 (when name-present-p
276 (setf as-file nil))
277 (when (and absolutep (member :up directory))
278 ;; employ merge-pathnames to parse :BACKs into which we turn :UPs
279 (setf directory
280 (pathname-directory
281 (merge-pathnames
282 (make-pathname :defaults pathname :directory '(:relative))
283 (make-pathname :defaults pathname
284 :directory (substitute :back :up directory))))))
285 (coerce
286 (with-simple-output-to-string (s)
287 (when absolutep
288 (write-string (case device
289 (:unc +unc-file-name-prefix+)
290 (otherwise +long-file-name-prefix+)) s))
291 (when (or (not absolutep) (not (member device '(:unc nil))))
292 (write-string (unparse-win32-device pathname t) s))
293 (when directory
294 (ecase (pop directory)
295 (:absolute
296 (let ((next (pop directory)))
297 ;; Don't use USER-HOMEDIR-NAMESTRING, since
298 ;; it can be specified as C:/User/user
299 ;; and (native-namestring (user-homedir-pathname))
300 ;; will be not equal to it, because it's parsed first.
301 (cond ((eq :home next)
302 (write-string (native-namestring (user-homedir-pathname))
304 ((and (consp next) (eq :home (car next)))
305 (let ((where (user-homedir-pathname (second next))))
306 (if where
307 (write-string (native-namestring where) s)
308 (error "User homedir unknown for: ~S."
309 (second next)))))
310 ;; namestring of user-homedir-pathname already has
311 ;; // at the end
312 (next
313 (write-char #\\ s)
314 (push next directory))
316 (write-char #\\ s)))))
317 (:relative)))
318 (loop for (piece . subdirs) on directory
319 do (typecase piece
320 ((member :up :back) (write-string ".." s))
321 (string (write-string piece s))
322 (t (error "Bad directory segment in NATIVE-NAMESTRING: ~S."
323 piece)))
324 if (or subdirs (stringp name))
325 do (write-char #\\ s)
326 else
327 do (unless as-file
328 (write-char #\\ s)))
329 (if name-present-p
330 (progn
331 (unless (stringp name-string) ;some kind of wild field
332 (error "Bad name component in NATIVE-NAMESTRING: ~S." name))
333 (write-string name-string s)
334 (when type-present-p
335 (unless (stringp type-string) ;some kind of wild field
336 (error "Bad type component in NATIVE-NAMESTRING: ~S." type))
337 (write-char #\. s)
338 (write-string type-string s)))
339 (when type-present-p
340 (error
341 "Type component without a name component in NATIVE-NAMESTRING: ~S."
342 type)))
343 (when absolutep
344 (let ((string (get-output-stream-string s)))
345 (return-from unparse-native-win32-namestring
346 (cond ((< (- 260 12) (length string))
347 ;; KLUDGE: account for additional length of 8.3 name to make
348 ;; directories always accessible
349 (coerce string 'simple-string))
350 ((eq :unc device)
351 (replace
352 (subseq string (1- (length +unc-file-name-prefix+)))
353 "\\"))
354 (t (subseq string (length +long-file-name-prefix+))))))))
355 'simple-string)))
357 ;;; FIXME.
358 (defun unparse-win32-enough (pathname defaults)
359 (declare (type pathname pathname defaults))
360 (flet ((lose ()
361 (error "~S cannot be represented relative to ~S."
362 pathname defaults)))
363 (collect ((strings))
364 (let* ((pathname-directory (%pathname-directory pathname))
365 (defaults-directory (%pathname-directory defaults))
366 (prefix-len (length defaults-directory))
367 (result-directory
368 (cond ((null pathname-directory) '(:relative))
369 ((eq (car pathname-directory) :relative)
370 pathname-directory)
371 ((and (> prefix-len 0)
372 (>= (length pathname-directory) prefix-len)
373 (compare-component (subseq pathname-directory
374 0 prefix-len)
375 defaults-directory))
376 ;; Pathname starts with a prefix of default. So
377 ;; just use a relative directory from then on out.
378 (cons :relative (nthcdr prefix-len pathname-directory)))
379 ((eq (car pathname-directory) :absolute)
380 ;; We are an absolute pathname, so we can just use it.
381 pathname-directory)
383 (bug "Bad fallthrough in ~S" 'unparse-unix-enough)))))
384 (strings (unparse-physical-directory-list result-directory #\^)))
385 (let* ((pathname-type (%pathname-type pathname))
386 (type-needed (and pathname-type
387 (not (eq pathname-type :unspecific))))
388 (pathname-name (%pathname-name pathname))
389 (name-needed (or type-needed
390 (and pathname-name
391 (not (compare-component pathname-name
392 (%pathname-name
393 defaults)))))))
394 (when name-needed
395 (unless pathname-name (lose))
396 (when (and (null pathname-type)
397 (typep pathname-name 'simple-string)
398 (position #\. pathname-name :start 1))
399 (error "too many dots in the name: ~S" pathname))
400 (strings (unparse-physical-piece pathname-name #\^)))
401 (when type-needed
402 (when (or (null pathname-type) (eq pathname-type :unspecific))
403 (lose))
404 (when (typep pathname-type 'simple-string)
405 (when (position #\. pathname-type)
406 (error "type component can't have a #\. inside: ~S" pathname)))
407 (strings ".")
408 (strings (unparse-physical-piece pathname-type #\^))))
409 (apply #'concatenate 'simple-string (strings)))))
411 ;; FIXME: This has been converted rather blindly from the Unix
412 ;; version, with no reference to any Windows docs what so ever.
413 (defun simplify-win32-namestring (src)
414 (declare (type simple-string src))
415 (let* ((src-len (length src))
416 (dst (make-string src-len :element-type 'character))
417 (dst-len 0)
418 (dots 0)
419 (last-slash nil))
420 (flet ((deposit (char)
421 (setf (schar dst dst-len) char)
422 (incf dst-len))
423 (slashp (char)
424 (find char "\\/")))
425 (dotimes (src-index src-len)
426 (let ((char (schar src src-index)))
427 (cond ((char= char #\.)
428 (when dots
429 (incf dots))
430 (deposit char))
431 ((slashp char)
432 (case dots
434 ;; either ``/...' or ``...//...'
435 (unless last-slash
436 (setf last-slash dst-len)
437 (deposit char)))
439 ;; either ``./...'' or ``..././...''
440 (decf dst-len))
442 ;; We've found ..
443 (cond
444 ((and last-slash (not (zerop last-slash)))
445 ;; There is something before this ..
446 (let ((prev-prev-slash
447 (position-if #'slashp dst :end last-slash :from-end t)))
448 (cond ((and (= (+ (or prev-prev-slash 0) 2)
449 last-slash)
450 (char= (schar dst (- last-slash 2)) #\.)
451 (char= (schar dst (1- last-slash)) #\.))
452 ;; The something before this .. is another ..
453 (deposit char)
454 (setf last-slash dst-len))
456 ;; The something is some directory or other.
457 (setf dst-len
458 (if prev-prev-slash
459 (1+ prev-prev-slash)
461 (setf last-slash prev-prev-slash)))))
463 ;; There is nothing before this .., so we need to keep it
464 (setf last-slash dst-len)
465 (deposit char))))
467 ;; something other than a dot between slashes
468 (setf last-slash dst-len)
469 (deposit char)))
470 (setf dots 0))
472 (setf dots nil)
473 (setf (schar dst dst-len) char)
474 (incf dst-len)))))
475 ;; ...finish off
476 (when (and last-slash (not (zerop last-slash)))
477 (case dots
479 ;; We've got ``foobar/.''
480 (decf dst-len))
482 ;; We've got ``foobar/..''
483 (unless (and (>= last-slash 2)
484 (char= (schar dst (1- last-slash)) #\.)
485 (char= (schar dst (- last-slash 2)) #\.)
486 (or (= last-slash 2)
487 (slashp (schar dst (- last-slash 3)))))
488 (let ((prev-prev-slash
489 (position-if #'slashp dst :end last-slash :from-end t)))
490 (if prev-prev-slash
491 (setf dst-len (1+ prev-prev-slash))
492 (return-from simplify-win32-namestring
493 (coerce ".\\" 'simple-string)))))))))
494 (cond ((zerop dst-len)
495 ".\\")
496 ((= dst-len src-len)
497 dst)
499 (subseq dst 0 dst-len)))))