3 #include "network_write.h"
9 #include "sys-socket.h"
16 /* on linux 2.4.x you get either sendfile or LFS */
17 #if defined HAVE_SYS_SENDFILE_H && defined HAVE_SENDFILE \
18 && (!defined _LARGEFILE_SOURCE || defined HAVE_SENDFILE64) \
19 && defined(__linux__) && !defined HAVE_SENDFILE_BROKEN
20 # ifdef NETWORK_WRITE_USE_SENDFILE
21 # error "can't have more than one sendfile implementation"
23 # define NETWORK_WRITE_USE_SENDFILE "linux-sendfile"
24 # define NETWORK_WRITE_USE_LINUX_SENDFILE
27 #if defined HAVE_SENDFILE && (defined(__FreeBSD__) || defined(__DragonFly__))
28 # ifdef NETWORK_WRITE_USE_SENDFILE
29 # error "can't have more than one sendfile implementation"
31 # define NETWORK_WRITE_USE_SENDFILE "freebsd-sendfile"
32 # define NETWORK_WRITE_USE_FREEBSD_SENDFILE
35 #if defined HAVE_SENDFILE && defined(__APPLE__)
36 # ifdef NETWORK_WRITE_USE_SENDFILE
37 # error "can't have more than one sendfile implementation"
39 # define NETWORK_WRITE_USE_SENDFILE "darwin-sendfile"
40 # define NETWORK_WRITE_USE_DARWIN_SENDFILE
43 #if defined HAVE_SYS_SENDFILE_H && defined HAVE_SENDFILEV && defined(__sun)
44 # ifdef NETWORK_WRITE_USE_SENDFILE
45 # error "can't have more than one sendfile implementation"
47 # define NETWORK_WRITE_USE_SENDFILE "solaris-sendfilev"
48 # define NETWORK_WRITE_USE_SOLARIS_SENDFILEV
51 /* not supported so far
52 #if defined HAVE_SEND_FILE && defined(__aix)
53 # ifdef NETWORK_WRITE_USE_SENDFILE
54 # error "can't have more than one sendfile implementation"
56 # define NETWORK_WRITE_USE_SENDFILE "aix-sendfile"
57 # define NETWORK_WRITE_USE_AIX_SENDFILE
61 #if defined HAVE_SYS_UIO_H && defined HAVE_WRITEV
62 # define NETWORK_WRITE_USE_WRITEV
65 #if defined HAVE_SYS_MMAN_H && defined HAVE_MMAP && defined ENABLE_MMAP
66 # define NETWORK_WRITE_USE_MMAP
70 static int network_write_error(server
*srv
, int fd
) {
72 int lastError
= WSAGetLastError();
82 log_error_write(srv
, __FILE__
, __LINE__
, "sdd",
83 "send failed: ", lastError
, fd
);
95 log_error_write(srv
, __FILE__
, __LINE__
, "ssd",
96 "write failed:", strerror(errno
), fd
);
103 static ssize_t
network_write_data_len(int fd
, const char *data
, off_t len
) {
105 return send(fd
, data
, len
, 0);
107 return write(fd
, data
, len
);
114 /* write next chunk(s); finished chunks are removed afterwards after successful writes.
115 * return values: similar as backends (0 succes, -1 error, -2 remote close, -3 try again later (EINTR/EAGAIN)) */
116 /* next chunk must be MEM_CHUNK. use write()/send() */
117 static int network_write_mem_chunk(server
*srv
, int fd
, chunkqueue
*cq
, off_t
*p_max_bytes
) {
118 chunk
* const c
= cq
->first
;
120 off_t c_len
= (off_t
)buffer_string_length(c
->mem
);
121 force_assert(c
->offset
>= 0 && c
->offset
<= c_len
);
123 if (c_len
> *p_max_bytes
) c_len
= *p_max_bytes
;
126 chunkqueue_remove_finished_chunks(cq
);
130 wr
= network_write_data_len(fd
, c
->mem
->ptr
+ c
->offset
, c_len
);
133 chunkqueue_mark_written(cq
, wr
);
134 return (wr
> 0 && wr
== c_len
) ? 0 : -3;
136 return network_write_error(srv
, fd
);
143 #if !defined(NETWORK_WRITE_USE_MMAP)
145 static int network_write_file_chunk_no_mmap(server
*srv
, int fd
, chunkqueue
*cq
, off_t
*p_max_bytes
) {
146 chunk
* const c
= cq
->first
;
147 off_t offset
, toSend
;
150 force_assert(c
->offset
>= 0 && c
->offset
<= c
->file
.length
);
152 offset
= c
->file
.start
+ c
->offset
;
153 toSend
= c
->file
.length
- c
->offset
;
154 if (toSend
> *p_max_bytes
) toSend
= *p_max_bytes
;
157 chunkqueue_remove_finished_chunks(cq
);
161 if (0 != chunkqueue_open_file_chunk(srv
, cq
)) return -1;
163 if (toSend
> 64*1024) toSend
= 64*1024; /* max read 64kb in one step */
164 buffer_string_prepare_copy(srv
->tmp_buf
, toSend
);
166 if (-1 == lseek(c
->file
.fd
, offset
, SEEK_SET
)) {
167 log_error_write(srv
, __FILE__
, __LINE__
, "ss","lseek:",strerror(errno
));
170 if (-1 == (toSend
= read(c
->file
.fd
, srv
->tmp_buf
->ptr
, toSend
))) {
171 log_error_write(srv
, __FILE__
, __LINE__
, "ss","read:",strerror(errno
));
175 wr
= network_write_data_len(fd
, srv
->tmp_buf
->ptr
, toSend
);
178 chunkqueue_mark_written(cq
, wr
);
179 return (wr
> 0 && wr
== toSend
) ? 0 : -3;
181 return network_write_error(srv
, fd
);
190 #if defined(NETWORK_WRITE_USE_MMAP)
192 #include "sys-mmap.h"
197 #define MMAP_CHUNK_SIZE (512*1024)
199 static off_t
mmap_align_offset(off_t start
) {
200 static long pagesize
= 0;
202 pagesize
= sysconf(_SC_PAGESIZE
);
203 force_assert(pagesize
< MMAP_CHUNK_SIZE
);
205 force_assert(start
>= (start
% pagesize
));
206 return start
- (start
% pagesize
);
209 static volatile int sigbus_jmp_valid
;
210 static sigjmp_buf sigbus_jmp
;
212 static void sigbus_handler(int sig
) {
214 if (sigbus_jmp_valid
) siglongjmp(sigbus_jmp
, 1);
215 log_failed_assert(__FILE__
, __LINE__
, "SIGBUS");
218 /* next chunk must be FILE_CHUNK. send mmap()ed file with write() */
219 static int network_write_file_chunk_mmap(server
*srv
, int fd
, chunkqueue
*cq
, off_t
*p_max_bytes
) {
220 chunk
* const c
= cq
->first
;
221 off_t offset
, toSend
, file_end
;
223 size_t mmap_offset
, mmap_avail
;
226 force_assert(c
->offset
>= 0 && c
->offset
<= c
->file
.length
);
228 offset
= c
->file
.start
+ c
->offset
;
229 toSend
= c
->file
.length
- c
->offset
;
230 if (toSend
> *p_max_bytes
) toSend
= *p_max_bytes
;
231 file_end
= c
->file
.start
+ c
->file
.length
; /*file end offset in this chunk*/
234 chunkqueue_remove_finished_chunks(cq
);
238 if (0 != chunkqueue_open_file_chunk(srv
, cq
)) return -1;
240 /* mmap buffer if offset is outside old mmap area or not mapped at all */
241 if (MAP_FAILED
== c
->file
.mmap
.start
242 || offset
< c
->file
.mmap
.offset
243 || offset
>= (off_t
)(c
->file
.mmap
.offset
+ c
->file
.mmap
.length
)) {
245 if (MAP_FAILED
!= c
->file
.mmap
.start
) {
246 munmap(c
->file
.mmap
.start
, c
->file
.mmap
.length
);
247 c
->file
.mmap
.start
= MAP_FAILED
;
250 /* Optimizations for the future:
252 * adaptive mem-mapping
254 * we mmap() the whole file. If someone has alot large files and
255 * 32-bit machine the virtual address area will be unrun and we
256 * will have a failing mmap() call.
258 * only mmap 16M in one chunk and move the window as soon as we have
259 * finished the first 8M
261 * read-ahead buffering
263 * sending out several large files in parallel trashes read-ahead
264 * of the kernel leading to long wait-for-seek times.
265 * solutions: (increasing complexity)
267 * 2. use a internal read-ahead buffer in the chunk-structure
268 * 3. use non-blocking IO for file-transfers
271 c
->file
.mmap
.offset
= mmap_align_offset(offset
);
273 /* all mmap()ed areas are MMAP_CHUNK_SIZE
274 * except the last which might be smaller */
275 c
->file
.mmap
.length
= MMAP_CHUNK_SIZE
;
276 if (c
->file
.mmap
.offset
> file_end
- (off_t
)c
->file
.mmap
.length
) {
277 c
->file
.mmap
.length
= file_end
- c
->file
.mmap
.offset
;
280 c
->file
.mmap
.start
= mmap(NULL
, c
->file
.mmap
.length
, PROT_READ
,
281 MAP_SHARED
, c
->file
.fd
, c
->file
.mmap
.offset
);
282 if (MAP_FAILED
== c
->file
.mmap
.start
) {
283 log_error_write(srv
, __FILE__
, __LINE__
, "ssbdoo", "mmap failed:",
284 strerror(errno
), c
->mem
, c
->file
.fd
,
285 c
->file
.mmap
.offset
, (off_t
) c
->file
.mmap
.length
);
289 #if defined(HAVE_MADVISE)
290 /* don't advise files < 64Kb */
291 if (c
->file
.mmap
.length
> (64*1024)) {
292 /* darwin 7 is returning EINVAL all the time and I don't know how to
293 * detect this at runtime.
295 * ignore the return value for now */
296 madvise(c
->file
.mmap
.start
, c
->file
.mmap
.length
, MADV_WILLNEED
);
301 force_assert(offset
>= c
->file
.mmap
.offset
);
302 mmap_offset
= offset
- c
->file
.mmap
.offset
;
303 force_assert(c
->file
.mmap
.length
> mmap_offset
);
304 mmap_avail
= c
->file
.mmap
.length
- mmap_offset
;
305 if (toSend
> (off_t
) mmap_avail
) toSend
= mmap_avail
;
307 data
= c
->file
.mmap
.start
+ mmap_offset
;
309 /* setup SIGBUS handler, but don't activate sigbus_jmp_valid yet */
310 if (0 == sigsetjmp(sigbus_jmp
, 1)) {
311 signal(SIGBUS
, sigbus_handler
);
313 sigbus_jmp_valid
= 1;
314 r
= network_write_data_len(fd
, data
, toSend
);
315 sigbus_jmp_valid
= 0;
317 sigbus_jmp_valid
= 0;
319 log_error_write(srv
, __FILE__
, __LINE__
, "sbd", "SIGBUS in mmap:",
322 munmap(c
->file
.mmap
.start
, c
->file
.mmap
.length
);
323 c
->file
.mmap
.start
= MAP_FAILED
;
329 chunkqueue_mark_written(cq
, r
);
330 return (r
> 0 && r
== toSend
) ? 0 : -3;
332 return network_write_error(srv
, fd
);
336 #endif /* NETWORK_WRITE_USE_MMAP */
341 #if defined(NETWORK_WRITE_USE_WRITEV)
343 #if defined(HAVE_SYS_UIO_H)
344 # include <sys/uio.h>
347 #if defined(UIO_MAXIOV)
348 # define SYS_MAX_CHUNKS UIO_MAXIOV
349 #elif defined(IOV_MAX)
350 /* new name for UIO_MAXIOV since IEEE Std 1003.1-2001 */
351 # define SYS_MAX_CHUNKS IOV_MAX
352 #elif defined(_XOPEN_IOV_MAX)
353 /* minimum value for sysconf(_SC_IOV_MAX); posix requires this to be at least 16, which is good enough - no need to call sysconf() */
354 # define SYS_MAX_CHUNKS _XOPEN_IOV_MAX
356 # error neither UIO_MAXIOV nor IOV_MAX nor _XOPEN_IOV_MAX are defined
359 /* allocate iovec[MAX_CHUNKS] on stack, so pick a sane limit:
360 * - each entry will use 1 pointer + 1 size_t
361 * - 32 chunks -> 256 / 512 bytes (32-bit/64-bit pointers)
363 #define STACK_MAX_ALLOC_CHUNKS 32
364 #if SYS_MAX_CHUNKS > STACK_MAX_ALLOC_CHUNKS
365 # define MAX_CHUNKS STACK_MAX_ALLOC_CHUNKS
367 # define MAX_CHUNKS SYS_MAX_CHUNKS
370 /* next chunk must be MEM_CHUNK. send multiple mem chunks using writev() */
371 static int network_writev_mem_chunks(server
*srv
, int fd
, chunkqueue
*cq
, off_t
*p_max_bytes
) {
372 struct iovec chunks
[MAX_CHUNKS
];
373 size_t num_chunks
= 0;
374 off_t max_bytes
= *p_max_bytes
;
378 for (const chunk
*c
= cq
->first
;
379 NULL
!= c
&& MEM_CHUNK
== c
->type
380 && num_chunks
< MAX_CHUNKS
&& toSend
< max_bytes
;
382 size_t c_len
= buffer_string_length(c
->mem
);
383 force_assert(c
->offset
>= 0 && c
->offset
<= (off_t
)c_len
);
388 chunks
[num_chunks
].iov_base
= c
->mem
->ptr
+ c
->offset
;
389 chunks
[num_chunks
].iov_len
= c_len
;
395 if (0 == num_chunks
) {
396 chunkqueue_remove_finished_chunks(cq
);
400 r
= writev(fd
, chunks
, num_chunks
);
402 if (r
< 0) switch (errno
) {
410 log_error_write(srv
, __FILE__
, __LINE__
, "ssd",
411 "writev failed:", strerror(errno
), fd
);
417 chunkqueue_mark_written(cq
, r
);
420 return (r
> 0 && r
== toSend
) ? 0 : -3;
423 #endif /* NETWORK_WRITE_USE_WRITEV */
428 #if defined(NETWORK_WRITE_USE_SENDFILE)
430 #if defined(NETWORK_WRITE_USE_LINUX_SENDFILE) \
431 || defined(NETWORK_WRITE_USE_SOLARIS_SENDFILEV)
432 #include <sys/sendfile.h>
435 #if defined(NETWORK_WRITE_USE_FREEBSD_SENDFILE) \
436 || defined(NETWORK_WRITE_USE_DARWIN_SENDFILE)
440 static int network_write_file_chunk_sendfile(server
*srv
, int fd
, chunkqueue
*cq
, off_t
*p_max_bytes
) {
441 chunk
* const c
= cq
->first
;
447 force_assert(c
->offset
>= 0 && c
->offset
<= c
->file
.length
);
449 offset
= c
->file
.start
+ c
->offset
;
450 toSend
= c
->file
.length
- c
->offset
;
451 if (toSend
> *p_max_bytes
) toSend
= *p_max_bytes
;
454 chunkqueue_remove_finished_chunks(cq
);
458 if (0 != chunkqueue_open_file_chunk(srv
, cq
)) return -1;
460 /* Darwin, FreeBSD, and Solaris variants support iovecs and could
461 * be optimized to send more than just file in single syscall */
463 #if defined(NETWORK_WRITE_USE_LINUX_SENDFILE)
465 r
= sendfile(fd
, c
->file
.fd
, &offset
, toSend
);
466 if (r
> 0) written
= (off_t
)r
;
468 #elif defined(NETWORK_WRITE_USE_DARWIN_SENDFILE)
471 r
= sendfile(c
->file
.fd
, fd
, offset
, &written
, NULL
, 0);
472 /* (for EAGAIN/EINTR written still contains the sent bytes) */
474 #elif defined(NETWORK_WRITE_USE_FREEBSD_SENDFILE)
476 r
= sendfile(c
->file
.fd
, fd
, offset
, toSend
, NULL
, &written
, 0);
477 /* (for EAGAIN/EINTR written still contains the sent bytes) */
479 #elif defined(NETWORK_WRITE_USE_SOLARIS_SENDFILEV)
482 fvec
.sfv_fd
= c
->file
.fd
;
484 fvec
.sfv_off
= offset
;
485 fvec
.sfv_len
= toSend
;
487 /* Solaris sendfilev() */
488 r
= sendfilev(fd
, &fvec
, 1, (size_t *)&written
);
489 /* (for EAGAIN/EINTR written still contains the sent bytes) */
502 break; /* try again later */
509 #if defined(ENOTSUP) && (!defined(EOPNOTSUPP) || EOPNOTSUPP != ENOTSUP)
515 #ifdef ESOCKTNOSUPPORT
516 case ESOCKTNOSUPPORT
:
521 #ifdef NETWORK_WRITE_USE_MMAP
522 return network_write_file_chunk_mmap(srv
, fd
, cq
, p_max_bytes
);
524 return network_write_file_chunk_no_mmap(srv
, fd
, cq
, p_max_bytes
);
527 log_error_write(srv
, __FILE__
, __LINE__
, "ssdSd",
528 "sendfile():", strerror(errno
), errno
, "fd:", fd
);
533 if (written
>= 0) { /*(always true)*/
534 chunkqueue_mark_written(cq
, written
);
535 *p_max_bytes
-= written
;
538 return (r
>= 0 && written
== toSend
) ? 0 : -3;
548 * -1 : error (on our side)
552 static int network_write_chunkqueue_write(server
*srv
, int fd
, chunkqueue
*cq
, off_t max_bytes
) {
553 while (max_bytes
> 0 && NULL
!= cq
->first
) {
556 switch (cq
->first
->type
) {
558 r
= network_write_mem_chunk(srv
, fd
, cq
, &max_bytes
);
561 #ifdef NETWORK_WRITE_USE_MMAP
562 r
= network_write_file_chunk_mmap(srv
, fd
, cq
, &max_bytes
);
564 r
= network_write_file_chunk_no_mmap(srv
, fd
, cq
, &max_bytes
);
569 if (-3 == r
) return 0;
570 if (0 != r
) return r
;
576 #if defined(NETWORK_WRITE_USE_WRITEV)
577 static int network_write_chunkqueue_writev(server
*srv
, int fd
, chunkqueue
*cq
, off_t max_bytes
) {
578 while (max_bytes
> 0 && NULL
!= cq
->first
) {
581 switch (cq
->first
->type
) {
583 #if defined(NETWORK_WRITE_USE_WRITEV)
584 r
= network_writev_mem_chunks(srv
, fd
, cq
, &max_bytes
);
586 r
= network_write_mem_chunk(srv
, fd
, cq
, &max_bytes
);
590 #ifdef NETWORK_WRITE_USE_MMAP
591 r
= network_write_file_chunk_mmap(srv
, fd
, cq
, &max_bytes
);
593 r
= network_write_file_chunk_no_mmap(srv
, fd
, cq
, &max_bytes
);
598 if (-3 == r
) return 0;
599 if (0 != r
) return r
;
606 #if defined(NETWORK_WRITE_USE_SENDFILE)
607 static int network_write_chunkqueue_sendfile(server
*srv
, int fd
, chunkqueue
*cq
, off_t max_bytes
) {
608 while (max_bytes
> 0 && NULL
!= cq
->first
) {
611 switch (cq
->first
->type
) {
613 #if defined(NETWORK_WRITE_USE_WRITEV)
614 r
= network_writev_mem_chunks(srv
, fd
, cq
, &max_bytes
);
616 r
= network_write_mem_chunk(srv
, fd
, cq
, &max_bytes
);
620 #if defined(NETWORK_WRITE_USE_SENDFILE)
621 r
= network_write_file_chunk_sendfile(srv
, fd
, cq
, &max_bytes
);
622 #elif defined(NETWORK_WRITE_USE_MMAP)
623 r
= network_write_file_chunk_mmap(srv
, fd
, cq
, &max_bytes
);
625 r
= network_write_file_chunk_no_mmap(srv
, fd
, cq
, &max_bytes
);
630 if (-3 == r
) return 0;
631 if (0 != r
) return r
;
638 int network_write_init(server
*srv
) {
640 NETWORK_BACKEND_UNSET
,
641 NETWORK_BACKEND_WRITE
,
642 NETWORK_BACKEND_WRITEV
,
643 NETWORK_BACKEND_SENDFILE
,
646 network_backend_t backend
;
649 network_backend_t nb
;
651 } network_backends
[] = {
653 { NETWORK_BACKEND_SENDFILE
, "sendfile" },
654 { NETWORK_BACKEND_SENDFILE
, "linux-sendfile" },
655 { NETWORK_BACKEND_SENDFILE
, "freebsd-sendfile" },
656 { NETWORK_BACKEND_SENDFILE
, "solaris-sendfilev" },
657 { NETWORK_BACKEND_WRITEV
, "writev" },
658 { NETWORK_BACKEND_WRITE
, "write" },
659 { NETWORK_BACKEND_UNSET
, NULL
}
662 /* get a useful default */
663 backend
= network_backends
[0].nb
;
665 /* match name against known types */
666 if (!buffer_string_is_empty(srv
->srvconf
.network_backend
)) {
668 for (size_t i
= 0; NULL
!= (name
= network_backends
[i
].name
); ++i
) {
669 if (0 == strcmp(srv
->srvconf
.network_backend
->ptr
, name
)) {
670 backend
= network_backends
[i
].nb
;
675 log_error_write(srv
, __FILE__
, __LINE__
, "sb",
676 "server.network-backend has an unknown value:",
677 srv
->srvconf
.network_backend
);
683 case NETWORK_BACKEND_SENDFILE
:
684 #if defined(NETWORK_WRITE_USE_SENDFILE)
685 srv
->network_backend_write
= network_write_chunkqueue_sendfile
;
688 case NETWORK_BACKEND_WRITEV
:
689 #if defined(NETWORK_WRITE_USE_WRITEV)
690 srv
->network_backend_write
= network_write_chunkqueue_writev
;
693 case NETWORK_BACKEND_WRITE
:
694 srv
->network_backend_write
= network_write_chunkqueue_write
;
703 const char * network_write_show_handlers(void) {
705 "\nNetwork handler:\n\n"
706 #if defined NETWORK_WRITE_USE_LINUX_SENDFILE
707 "\t+ linux-sendfile\n"
709 "\t- linux-sendfile\n"
711 #if defined NETWORK_WRITE_USE_FREEBSD_SENDFILE
712 "\t+ freebsd-sendfile\n"
714 "\t- freebsd-sendfile\n"
716 #if defined NETWORK_WRITE_USE_DARWIN_SENDFILE
717 "\t+ darwin-sendfile\n"
719 "\t- darwin-sendfile\n"
721 #if defined NETWORK_WRITE_USE_SOLARIS_SENDFILEV
722 "\t+ solaris-sendfilev\n"
724 "\t- solaris-sendfilev\n"
726 #if defined NETWORK_WRITE_USE_WRITEV
732 #ifdef NETWORK_WRITE_USE_MMAP