2 * Copyright (C) 2012-2020 all contributors <cmogstored-public@yhbt.net>
3 * License: GPL-3.0+ <https://www.gnu.org/licenses/gpl-3.0.txt>
5 #include "cmogstored.h"
6 /* epoll-specific parts see queue_common.c and activeq.c for the rest */
8 * a poll/select/libev/libevent-based implementation would have a hard time
9 * migrating clients between threads
11 #if defined(HAVE_EPOLL_WAIT) && ! MOG_LIBKQUEUE
12 #include "compat_epoll_pwait.h"
13 #include <sys/utsname.h>
16 * Detect old kernels with buggy EPOLL_CTL_MOD on SMP
17 * This issue is fixed by Linux commit 128dd1759d96ad36c379240f8b9463e8acfd37a1
18 * Remove this workaround around 2020 - 2023
20 static bool epoll_ctl_mod_buggy
;
22 __attribute__((constructor
)) static void epoll_ctl_mod_buggy_detect(void)
25 unsigned version
, patchlevel
, sublevel
, extra
;
29 * Online/current processors for this process is not enough,
30 * we need all processors since events may be triggered
31 * by interrupt handlers on any CPU in the system
33 unsigned long nproc
= num_processors(NPROC_ALL
);
35 /* Eric Wong's personal machines are ancient and weak: */
39 CHECK(int, 0, uname(&buf
));
41 /* who knows, maybe there'll be an epoll on other OSes one day */
42 if (strcmp(buf
.sysname
, "Linux"))
45 rc
= sscanf(buf
.release
, "%u.%u.%u", &version
, &patchlevel
, &sublevel
);
47 warn("sscanf failed to parse kernel version: %s (rc=%d), "
48 "assuming EPOLL_CTL_MOD is buggy on SMP",
50 epoll_ctl_mod_buggy
= true;
54 /* TODO: whitelist vendor kernels as fixes are backported */
56 epoll_ctl_mod_buggy
= true;
58 /* 2.6.32.61+ and 2.6.34.15+ are OK */
59 if (version
== 2 && patchlevel
== 6 &&
60 (sublevel
== 32 || sublevel
== 34)) {
61 rc
= sscanf(buf
.release
, "%u.%u.%u.%u",
62 &version
, &patchlevel
, &sublevel
, &extra
);
67 epoll_ctl_mod_buggy
= extra
< 61; /* 2.6.32.61+ */
68 else if (sublevel
== 34)
69 epoll_ctl_mod_buggy
= extra
< 15; /* 2.6.34.15+ */
71 assert("buggy version check for 2.6.32.61/2.6.34.15");
77 /* v3.8-rc2+ has this fix (don't care about v3.8-rc1) */
82 case 0: /* v3.0.59+ are good */
83 epoll_ctl_mod_buggy
= sublevel
< 59;
85 case 2: /* v3.2.37+ are good */
86 epoll_ctl_mod_buggy
= sublevel
< 37;
88 case 4: /* v3.4.26+ are good */
89 epoll_ctl_mod_buggy
= sublevel
< 26;
91 case 5: /* v3.5.7.3+ are good */
92 /* (extended stable) git://kernel.ubuntu.com/ubuntu/linux.git */
94 rc
= sscanf(buf
.release
, "%u.%u.%u.%u",
95 &version
, &patchlevel
, &sublevel
, &extra
);
96 epoll_ctl_mod_buggy
= (rc
== 4) && (extra
< 3);
98 epoll_ctl_mod_buggy
= true;
100 /* v3.5.8 probably will not happen ... */
102 case 7: /* v3.7.3+ are good */
103 epoll_ctl_mod_buggy
= sublevel
< 3;
105 case 1: /* v3.1 seems abandoned */
106 case 3: /* v3.3 seems abandoned */
107 case 6: /* v3.6 seems abandoned */
108 epoll_ctl_mod_buggy
= true;
112 struct mog_queue
* mog_queue_new(void)
114 int size_hint
= 666; /* hint, ignored in new kernels */
115 int epoll_fd
= epoll_create(size_hint
);
116 if (epoll_fd
< 0) die_errno("epoll_create() failed");
118 return mog_queue_init(epoll_fd
);
121 static struct mog_fd
*
122 epoll_event_check(int rc
, struct epoll_event
*event
)
128 mfd
= event
->data
.ptr
;
129 mog_fd_check_out(mfd
);
136 /* rc could be > 1 if the kernel is broken :P */
137 die_errno("epoll_wait() failed with (%d)", rc
);
143 * grabs one active event off the event queue
144 * epoll_wait() has "wake-one" behavior (like accept())
145 * to avoid thundering herd since 2007
147 struct mog_fd
* mog_idleq_wait(struct mog_queue
*q
, int timeout
)
150 struct epoll_event event
;
151 bool cancellable
= timeout
!= 0;
156 /* epoll_wait is a cancellation point since glibc 2.4 */
157 rc
= epoll_wait(q
->queue_fd
, &event
, 1, timeout
);
159 return epoll_event_check(rc
, &event
);
162 struct mog_fd
* mog_idleq_wait_intr(struct mog_queue
*q
, int timeout
)
165 struct epoll_event event
;
167 rc
= epoll_pwait(q
->queue_fd
, &event
, 1, timeout
, &mog_emptyset
);
168 return epoll_event_check(rc
, &event
);
171 MOG_NOINLINE
static void
172 epoll_ctl_error(struct mog_queue
*q
, struct mog_fd
*mfd
)
177 syslog(LOG_ERR
, "epoll_ctl: %m, dropping file descriptor");
181 syslog(LOG_ERR
, "unhandled epoll_ctl() error: %m");
182 assert(0 && "BUG in our usage of epoll");
187 * Pushes in one mog_fd for epoll to watch.
189 * Only call this from the mog_accept_loop *or*
190 * if EAGAIN/EWOULDBLOCK is encountered in mog_queue_loop.
193 idleq_mod(struct mog_queue
*q
, struct mog_fd
*mfd
, enum mog_qev ev
, int op
)
195 struct epoll_event event
;
197 event
.data
.ptr
= mfd
;
198 event
.events
= (uint32_t)ev
;
200 mog_fd_check_in(mfd
);
201 if (epoll_ctl(q
->queue_fd
, op
, mfd
->fd
, &event
) != 0) {
202 mog_fd_check_out(mfd
);
203 epoll_ctl_error(q
, mfd
);
207 void mog_idleq_add(struct mog_queue
*q
, struct mog_fd
*mfd
, enum mog_qev ev
)
209 idleq_mod(q
, mfd
, ev
, EPOLL_CTL_ADD
);
213 * Workaround buggy EPOLL_CTL_MOD race by combining EPOLL_CTL_DEL
214 * and EPOLL_CTL_ADD for the same effect (with more syscall overhead)
217 fake_epoll_ctl_mod(struct mog_queue
*q
, struct mog_fd
*mfd
, enum mog_qev ev
)
219 struct epoll_event event
;
221 if (epoll_ctl(q
->queue_fd
, EPOLL_CTL_DEL
, mfd
->fd
, &event
) == 0)
222 idleq_mod(q
, mfd
, ev
, EPOLL_CTL_ADD
);
224 epoll_ctl_error(q
, mfd
);
227 void mog_idleq_push(struct mog_queue
*q
, struct mog_fd
*mfd
, enum mog_qev ev
)
229 if (epoll_ctl_mod_buggy
)
230 fake_epoll_ctl_mod(q
, mfd
, ev
);
232 idleq_mod(q
, mfd
, ev
, EPOLL_CTL_MOD
);
236 mog_queue_xchg(struct mog_queue
*q
, struct mog_fd
*mfd
, enum mog_qev ev
)
238 /* epoll need two (or three) syscalls to implement this */
239 mog_idleq_push(q
, mfd
, ev
);
240 return mog_idleq_wait(q
, -1);
242 #else /* ! HAVE_EPOLL_WAIT */
243 typedef int avoid_empty_file
;
244 #endif /* ! HAVE_EPOLL_WAIT */