3 #include "network_write.h"
9 #include "sys-socket.h"
16 /* on linux 2.4.x you get either sendfile or LFS */
17 #if defined HAVE_SYS_SENDFILE_H && defined HAVE_SENDFILE \
18 && (!defined _LARGEFILE_SOURCE || defined HAVE_SENDFILE64) \
19 && defined(__linux__) && !defined HAVE_SENDFILE_BROKEN
20 # ifdef NETWORK_WRITE_USE_SENDFILE
21 # error "can't have more than one sendfile implementation"
23 # define NETWORK_WRITE_USE_SENDFILE "linux-sendfile"
24 # define NETWORK_WRITE_USE_LINUX_SENDFILE
27 #if defined HAVE_SENDFILE && (defined(__FreeBSD__) || defined(__DragonFly__))
28 # ifdef NETWORK_WRITE_USE_SENDFILE
29 # error "can't have more than one sendfile implementation"
31 # define NETWORK_WRITE_USE_SENDFILE "freebsd-sendfile"
32 # define NETWORK_WRITE_USE_FREEBSD_SENDFILE
35 #if defined HAVE_SENDFILE && defined(__APPLE__)
36 # ifdef NETWORK_WRITE_USE_SENDFILE
37 # error "can't have more than one sendfile implementation"
39 # define NETWORK_WRITE_USE_SENDFILE "darwin-sendfile"
40 # define NETWORK_WRITE_USE_DARWIN_SENDFILE
43 #if defined HAVE_SYS_SENDFILE_H && defined HAVE_SENDFILEV && defined(__sun)
44 # ifdef NETWORK_WRITE_USE_SENDFILE
45 # error "can't have more than one sendfile implementation"
47 # define NETWORK_WRITE_USE_SENDFILE "solaris-sendfilev"
48 # define NETWORK_WRITE_USE_SOLARIS_SENDFILEV
51 /* not supported so far
52 #if defined HAVE_SEND_FILE && defined(__aix)
53 # ifdef NETWORK_WRITE_USE_SENDFILE
54 # error "can't have more than one sendfile implementation"
56 # define NETWORK_WRITE_USE_SENDFILE "aix-sendfile"
57 # define NETWORK_WRITE_USE_AIX_SENDFILE
61 #if defined HAVE_SYS_UIO_H && defined HAVE_WRITEV
62 # define NETWORK_WRITE_USE_WRITEV
65 #if defined HAVE_SYS_MMAN_H && defined HAVE_MMAP && defined ENABLE_MMAP
66 # define NETWORK_WRITE_USE_MMAP
70 static int network_write_error(server
*srv
, int fd
) {
72 int lastError
= WSAGetLastError();
82 log_error_write(srv
, __FILE__
, __LINE__
, "sdd",
83 "send failed: ", lastError
, fd
);
95 log_error_write(srv
, __FILE__
, __LINE__
, "ssd",
96 "write failed:", strerror(errno
), fd
);
103 static ssize_t
network_write_data_len(int fd
, const char *data
, off_t len
) {
105 return send(fd
, data
, len
, 0);
107 return write(fd
, data
, len
);
114 /* write next chunk(s); finished chunks are removed afterwards after successful writes.
115 * return values: similar as backends (0 succes, -1 error, -2 remote close, -3 try again later (EINTR/EAGAIN)) */
116 /* next chunk must be MEM_CHUNK. use write()/send() */
117 static int network_write_mem_chunk(server
*srv
, int fd
, chunkqueue
*cq
, off_t
*p_max_bytes
) {
118 chunk
* const c
= cq
->first
;
120 off_t c_len
= (off_t
)buffer_string_length(c
->mem
);
121 force_assert(c
->offset
>= 0 && c
->offset
<= c_len
);
123 if (c_len
> *p_max_bytes
) c_len
= *p_max_bytes
;
126 chunkqueue_remove_finished_chunks(cq
);
130 wr
= network_write_data_len(fd
, c
->mem
->ptr
+ c
->offset
, c_len
);
133 chunkqueue_mark_written(cq
, wr
);
134 return (wr
> 0 && wr
== c_len
) ? 0 : -3;
136 return network_write_error(srv
, fd
);
143 #if !defined(NETWORK_WRITE_USE_MMAP)
145 static int network_write_file_chunk_no_mmap(server
*srv
, int fd
, chunkqueue
*cq
, off_t
*p_max_bytes
) {
146 chunk
* const c
= cq
->first
;
147 off_t offset
, toSend
;
150 force_assert(c
->offset
>= 0 && c
->offset
<= c
->file
.length
);
152 offset
= c
->file
.start
+ c
->offset
;
153 toSend
= c
->file
.length
- c
->offset
;
154 if (toSend
> *p_max_bytes
) toSend
= *p_max_bytes
;
157 chunkqueue_remove_finished_chunks(cq
);
161 if (0 != chunkqueue_open_file_chunk(srv
, cq
)) return -1;
163 if (toSend
> 64*1024) toSend
= 64*1024; /* max read 64kb in one step */
164 buffer_string_prepare_copy(srv
->tmp_buf
, toSend
);
166 if (-1 == lseek(c
->file
.fd
, offset
, SEEK_SET
)) {
167 log_error_write(srv
, __FILE__
, __LINE__
, "ss","lseek:",strerror(errno
));
170 if (-1 == (toSend
= read(c
->file
.fd
, srv
->tmp_buf
->ptr
, toSend
))) {
171 log_error_write(srv
, __FILE__
, __LINE__
, "ss","read:",strerror(errno
));
175 wr
= network_write_data_len(fd
, srv
->tmp_buf
->ptr
, toSend
);
178 chunkqueue_mark_written(cq
, wr
);
179 return (wr
> 0 && wr
== toSend
) ? 0 : -3;
181 return network_write_error(srv
, fd
);
190 #if defined(NETWORK_WRITE_USE_MMAP)
192 #include "sys-mmap.h"
197 #define MMAP_CHUNK_SIZE (512*1024)
199 static off_t
mmap_align_offset(off_t start
) {
200 static long pagesize
= 0;
202 pagesize
= sysconf(_SC_PAGESIZE
);
203 force_assert(pagesize
< MMAP_CHUNK_SIZE
);
205 force_assert(start
>= (start
% pagesize
));
206 return start
- (start
% pagesize
);
209 static volatile int sigbus_jmp_valid
;
210 static sigjmp_buf sigbus_jmp
;
212 static void sigbus_handler(int sig
) {
214 if (sigbus_jmp_valid
) siglongjmp(sigbus_jmp
, 1);
215 log_failed_assert(__FILE__
, __LINE__
, "SIGBUS");
218 /* next chunk must be FILE_CHUNK. send mmap()ed file with write() */
219 static int network_write_file_chunk_mmap(server
*srv
, int fd
, chunkqueue
*cq
, off_t
*p_max_bytes
) {
220 chunk
* const c
= cq
->first
;
221 off_t offset
, toSend
, file_end
;
223 size_t mmap_offset
, mmap_avail
;
226 force_assert(c
->offset
>= 0 && c
->offset
<= c
->file
.length
);
228 offset
= c
->file
.start
+ c
->offset
;
229 toSend
= c
->file
.length
- c
->offset
;
230 if (toSend
> *p_max_bytes
) toSend
= *p_max_bytes
;
231 file_end
= c
->file
.start
+ c
->file
.length
; /*file end offset in this chunk*/
234 chunkqueue_remove_finished_chunks(cq
);
238 if (0 != chunkqueue_open_file_chunk(srv
, cq
)) return -1;
240 /* mmap buffer if offset is outside old mmap area or not mapped at all */
241 if (MAP_FAILED
== c
->file
.mmap
.start
242 || offset
< c
->file
.mmap
.offset
243 || offset
>= (off_t
)(c
->file
.mmap
.offset
+ c
->file
.mmap
.length
)) {
245 if (MAP_FAILED
!= c
->file
.mmap
.start
) {
246 munmap(c
->file
.mmap
.start
, c
->file
.mmap
.length
);
247 c
->file
.mmap
.start
= MAP_FAILED
;
250 /* Optimizations for the future:
252 * adaptive mem-mapping
254 * we mmap() the whole file. If someone has alot large files and
255 * 32-bit machine the virtual address area will be unrun and we
256 * will have a failing mmap() call.
258 * only mmap 16M in one chunk and move the window as soon as we have
259 * finished the first 8M
261 * read-ahead buffering
263 * sending out several large files in parallel trashes read-ahead
264 * of the kernel leading to long wait-for-seek times.
265 * solutions: (increasing complexity)
267 * 2. use a internal read-ahead buffer in the chunk-structure
268 * 3. use non-blocking IO for file-transfers
271 c
->file
.mmap
.offset
= mmap_align_offset(offset
);
273 /* all mmap()ed areas are MMAP_CHUNK_SIZE
274 * except the last which might be smaller */
275 c
->file
.mmap
.length
= MMAP_CHUNK_SIZE
;
276 if (c
->file
.mmap
.offset
> file_end
- (off_t
)c
->file
.mmap
.length
) {
277 c
->file
.mmap
.length
= file_end
- c
->file
.mmap
.offset
;
280 c
->file
.mmap
.start
= mmap(NULL
, c
->file
.mmap
.length
, PROT_READ
,
281 MAP_SHARED
, c
->file
.fd
, c
->file
.mmap
.offset
);
282 if (MAP_FAILED
== c
->file
.mmap
.start
) {
283 log_error_write(srv
, __FILE__
, __LINE__
, "ssbdoo", "mmap failed:",
284 strerror(errno
), c
->file
.name
, c
->file
.fd
, c
->file
.mmap
.offset
, (off_t
) c
->file
.mmap
.length
);
288 #if defined(HAVE_MADVISE)
289 /* don't advise files < 64Kb */
290 if (c
->file
.mmap
.length
> (64*1024)) {
291 /* darwin 7 is returning EINVAL all the time and I don't know how to
292 * detect this at runtime.
294 * ignore the return value for now */
295 madvise(c
->file
.mmap
.start
, c
->file
.mmap
.length
, MADV_WILLNEED
);
300 force_assert(offset
>= c
->file
.mmap
.offset
);
301 mmap_offset
= offset
- c
->file
.mmap
.offset
;
302 force_assert(c
->file
.mmap
.length
> mmap_offset
);
303 mmap_avail
= c
->file
.mmap
.length
- mmap_offset
;
304 if (toSend
> (off_t
) mmap_avail
) toSend
= mmap_avail
;
306 data
= c
->file
.mmap
.start
+ mmap_offset
;
308 /* setup SIGBUS handler, but don't activate sigbus_jmp_valid yet */
309 if (0 == sigsetjmp(sigbus_jmp
, 1)) {
310 signal(SIGBUS
, sigbus_handler
);
312 sigbus_jmp_valid
= 1;
313 r
= network_write_data_len(fd
, data
, toSend
);
314 sigbus_jmp_valid
= 0;
316 sigbus_jmp_valid
= 0;
318 log_error_write(srv
, __FILE__
, __LINE__
, "sbd", "SIGBUS in mmap:",
319 c
->file
.name
, c
->file
.fd
);
321 munmap(c
->file
.mmap
.start
, c
->file
.mmap
.length
);
322 c
->file
.mmap
.start
= MAP_FAILED
;
328 chunkqueue_mark_written(cq
, r
);
329 return (r
> 0 && r
== toSend
) ? 0 : -3;
331 return network_write_error(srv
, fd
);
335 #endif /* NETWORK_WRITE_USE_MMAP */
340 #if defined(NETWORK_WRITE_USE_WRITEV)
342 #if defined(HAVE_SYS_UIO_H)
343 # include <sys/uio.h>
346 #if defined(UIO_MAXIOV)
347 # define SYS_MAX_CHUNKS UIO_MAXIOV
348 #elif defined(IOV_MAX)
349 /* new name for UIO_MAXIOV since IEEE Std 1003.1-2001 */
350 # define SYS_MAX_CHUNKS IOV_MAX
351 #elif defined(_XOPEN_IOV_MAX)
352 /* minimum value for sysconf(_SC_IOV_MAX); posix requires this to be at least 16, which is good enough - no need to call sysconf() */
353 # define SYS_MAX_CHUNKS _XOPEN_IOV_MAX
355 # error neither UIO_MAXIOV nor IOV_MAX nor _XOPEN_IOV_MAX are defined
358 /* allocate iovec[MAX_CHUNKS] on stack, so pick a sane limit:
359 * - each entry will use 1 pointer + 1 size_t
360 * - 32 chunks -> 256 / 512 bytes (32-bit/64-bit pointers)
362 #define STACK_MAX_ALLOC_CHUNKS 32
363 #if SYS_MAX_CHUNKS > STACK_MAX_ALLOC_CHUNKS
364 # define MAX_CHUNKS STACK_MAX_ALLOC_CHUNKS
366 # define MAX_CHUNKS SYS_MAX_CHUNKS
369 /* next chunk must be MEM_CHUNK. send multiple mem chunks using writev() */
370 static int network_writev_mem_chunks(server
*srv
, int fd
, chunkqueue
*cq
, off_t
*p_max_bytes
) {
371 struct iovec chunks
[MAX_CHUNKS
];
372 size_t num_chunks
= 0;
373 off_t max_bytes
= *p_max_bytes
;
377 for (const chunk
*c
= cq
->first
;
378 NULL
!= c
&& MEM_CHUNK
== c
->type
379 && num_chunks
< MAX_CHUNKS
&& toSend
< max_bytes
;
381 size_t c_len
= buffer_string_length(c
->mem
);
382 force_assert(c
->offset
>= 0 && c
->offset
<= (off_t
)c_len
);
387 chunks
[num_chunks
].iov_base
= c
->mem
->ptr
+ c
->offset
;
388 chunks
[num_chunks
].iov_len
= c_len
;
394 if (0 == num_chunks
) {
395 chunkqueue_remove_finished_chunks(cq
);
399 r
= writev(fd
, chunks
, num_chunks
);
401 if (r
< 0) switch (errno
) {
409 log_error_write(srv
, __FILE__
, __LINE__
, "ssd",
410 "writev failed:", strerror(errno
), fd
);
416 chunkqueue_mark_written(cq
, r
);
419 return (r
> 0 && r
== toSend
) ? 0 : -3;
422 #endif /* NETWORK_WRITE_USE_WRITEV */
427 #if defined(NETWORK_WRITE_USE_SENDFILE)
429 #if defined(NETWORK_WRITE_USE_LINUX_SENDFILE) \
430 || defined(NETWORK_WRITE_USE_SOLARIS_SENDFILEV)
431 #include <sys/sendfile.h>
434 #if defined(NETWORK_WRITE_USE_FREEBSD_SENDFILE) \
435 || defined(NETWORK_WRITE_USE_DARWIN_SENDFILE)
439 static int network_write_file_chunk_sendfile(server
*srv
, int fd
, chunkqueue
*cq
, off_t
*p_max_bytes
) {
440 chunk
* const c
= cq
->first
;
446 force_assert(c
->offset
>= 0 && c
->offset
<= c
->file
.length
);
448 offset
= c
->file
.start
+ c
->offset
;
449 toSend
= c
->file
.length
- c
->offset
;
450 if (toSend
> *p_max_bytes
) toSend
= *p_max_bytes
;
453 chunkqueue_remove_finished_chunks(cq
);
457 if (0 != chunkqueue_open_file_chunk(srv
, cq
)) return -1;
459 /* Darwin, FreeBSD, and Solaris variants support iovecs and could
460 * be optimized to send more than just file in single syscall */
462 #if defined(NETWORK_WRITE_USE_LINUX_SENDFILE)
464 r
= sendfile(fd
, c
->file
.fd
, &offset
, toSend
);
465 if (r
> 0) written
= (off_t
)r
;
467 #elif defined(NETWORK_WRITE_USE_DARWIN_SENDFILE)
470 r
= sendfile(c
->file
.fd
, fd
, offset
, &written
, NULL
, 0);
471 /* (for EAGAIN/EINTR written still contains the sent bytes) */
473 #elif defined(NETWORK_WRITE_USE_FREEBSD_SENDFILE)
475 r
= sendfile(c
->file
.fd
, fd
, offset
, toSend
, NULL
, &written
, 0);
476 /* (for EAGAIN/EINTR written still contains the sent bytes) */
478 #elif defined(NETWORK_WRITE_USE_SOLARIS_SENDFILEV)
481 fvec
.sfv_fd
= c
->file
.fd
;
483 fvec
.sfv_off
= offset
;
484 fvec
.sfv_len
= toSend
;
486 /* Solaris sendfilev() */
487 r
= sendfilev(fd
, &fvec
, 1, (size_t *)&written
);
488 /* (for EAGAIN/EINTR written still contains the sent bytes) */
501 break; /* try again later */
508 #if defined(ENOTSUP) && (!defined(EOPNOTSUPP) || EOPNOTSUPP != ENOTSUP)
514 #ifdef ESOCKTNOSUPPORT
515 case ESOCKTNOSUPPORT
:
520 #ifdef NETWORK_WRITE_USE_MMAP
521 return network_write_file_chunk_mmap(srv
, fd
, cq
, p_max_bytes
);
523 return network_write_file_chunk_no_mmap(srv
, fd
, cq
, p_max_bytes
);
526 log_error_write(srv
, __FILE__
, __LINE__
, "ssdSd",
527 "sendfile():", strerror(errno
), errno
, "fd:", fd
);
532 if (written
>= 0) { /*(always true)*/
533 chunkqueue_mark_written(cq
, written
);
534 *p_max_bytes
-= written
;
537 return (r
>= 0 && written
== toSend
) ? 0 : -3;
547 * -1 : error (on our side)
551 static int network_write_chunkqueue_write(server
*srv
, int fd
, chunkqueue
*cq
, off_t max_bytes
) {
552 while (max_bytes
> 0 && NULL
!= cq
->first
) {
555 switch (cq
->first
->type
) {
557 r
= network_write_mem_chunk(srv
, fd
, cq
, &max_bytes
);
560 #ifdef NETWORK_WRITE_USE_MMAP
561 r
= network_write_file_chunk_mmap(srv
, fd
, cq
, &max_bytes
);
563 r
= network_write_file_chunk_no_mmap(srv
, fd
, cq
, &max_bytes
);
568 if (-3 == r
) return 0;
569 if (0 != r
) return r
;
575 #if defined(NETWORK_WRITE_USE_WRITEV)
576 static int network_write_chunkqueue_writev(server
*srv
, int fd
, chunkqueue
*cq
, off_t max_bytes
) {
577 while (max_bytes
> 0 && NULL
!= cq
->first
) {
580 switch (cq
->first
->type
) {
582 #if defined(NETWORK_WRITE_USE_WRITEV)
583 r
= network_writev_mem_chunks(srv
, fd
, cq
, &max_bytes
);
585 r
= network_write_mem_chunk(srv
, fd
, cq
, &max_bytes
);
589 #ifdef NETWORK_WRITE_USE_MMAP
590 r
= network_write_file_chunk_mmap(srv
, fd
, cq
, &max_bytes
);
592 r
= network_write_file_chunk_no_mmap(srv
, fd
, cq
, &max_bytes
);
597 if (-3 == r
) return 0;
598 if (0 != r
) return r
;
605 #if defined(NETWORK_WRITE_USE_SENDFILE)
606 static int network_write_chunkqueue_sendfile(server
*srv
, int fd
, chunkqueue
*cq
, off_t max_bytes
) {
607 while (max_bytes
> 0 && NULL
!= cq
->first
) {
610 switch (cq
->first
->type
) {
612 #if defined(NETWORK_WRITE_USE_WRITEV)
613 r
= network_writev_mem_chunks(srv
, fd
, cq
, &max_bytes
);
615 r
= network_write_mem_chunk(srv
, fd
, cq
, &max_bytes
);
619 #if defined(NETWORK_WRITE_USE_SENDFILE)
620 r
= network_write_file_chunk_sendfile(srv
, fd
, cq
, &max_bytes
);
621 #elif defined(NETWORK_WRITE_USE_MMAP)
622 r
= network_write_file_chunk_mmap(srv
, fd
, cq
, &max_bytes
);
624 r
= network_write_file_chunk_no_mmap(srv
, fd
, cq
, &max_bytes
);
629 if (-3 == r
) return 0;
630 if (0 != r
) return r
;
637 int network_write_init(server
*srv
) {
639 NETWORK_BACKEND_UNSET
,
640 NETWORK_BACKEND_WRITE
,
641 NETWORK_BACKEND_WRITEV
,
642 NETWORK_BACKEND_SENDFILE
,
645 network_backend_t backend
;
648 network_backend_t nb
;
650 } network_backends
[] = {
652 { NETWORK_BACKEND_SENDFILE
, "sendfile" },
653 { NETWORK_BACKEND_SENDFILE
, "linux-sendfile" },
654 { NETWORK_BACKEND_SENDFILE
, "freebsd-sendfile" },
655 { NETWORK_BACKEND_SENDFILE
, "solaris-sendfilev" },
656 { NETWORK_BACKEND_WRITEV
, "writev" },
657 { NETWORK_BACKEND_WRITE
, "write" },
658 { NETWORK_BACKEND_UNSET
, NULL
}
661 /* get a useful default */
662 backend
= network_backends
[0].nb
;
664 /* match name against known types */
665 if (!buffer_string_is_empty(srv
->srvconf
.network_backend
)) {
667 for (size_t i
= 0; NULL
!= (name
= network_backends
[i
].name
); ++i
) {
668 if (0 == strcmp(srv
->srvconf
.network_backend
->ptr
, name
)) {
669 backend
= network_backends
[i
].nb
;
674 log_error_write(srv
, __FILE__
, __LINE__
, "sb",
675 "server.network-backend has an unknown value:",
676 srv
->srvconf
.network_backend
);
682 case NETWORK_BACKEND_SENDFILE
:
683 #if defined(NETWORK_WRITE_USE_SENDFILE)
684 srv
->network_backend_write
= network_write_chunkqueue_sendfile
;
687 case NETWORK_BACKEND_WRITEV
:
688 #if defined(NETWORK_WRITE_USE_WRITEV)
689 srv
->network_backend_write
= network_write_chunkqueue_writev
;
692 case NETWORK_BACKEND_WRITE
:
693 srv
->network_backend_write
= network_write_chunkqueue_write
;
702 const char * network_write_show_handlers(void) {
704 "\nNetwork handler:\n\n"
705 #if defined NETWORK_WRITE_USE_LINUX_SENDFILE
706 "\t+ linux-sendfile\n"
708 "\t- linux-sendfile\n"
710 #if defined NETWORK_WRITE_USE_FREEBSD_SENDFILE
711 "\t+ freebsd-sendfile\n"
713 "\t- freebsd-sendfile\n"
715 #if defined NETWORK_WRITE_USE_DARWIN_SENDFILE
716 "\t+ darwin-sendfile\n"
718 "\t- darwin-sendfile\n"
720 #if defined NETWORK_WRITE_USE_SOLARIS_SENDFILEV
721 "\t+ solaris-sendfilev\n"
723 "\t- solaris-sendfilev\n"
725 #if defined NETWORK_WRITE_USE_WRITEV
731 #ifdef NETWORK_WRITE_USE_MMAP