cmogstored 1.8.1 - use default system stack size
[cmogstored.git] / queue_epoll.c
blob45830090028247c20e8d68edeea3a5ca597b42fb
1 /*
2 * Copyright (C) 2012-2020 all contributors <cmogstored-public@yhbt.net>
3 * License: GPL-3.0+ <https://www.gnu.org/licenses/gpl-3.0.txt>
4 */
5 #include "cmogstored.h"
6 /* epoll-specific parts see queue_common.c and activeq.c for the rest */
7 /*
8 * a poll/select/libev/libevent-based implementation would have a hard time
9 * migrating clients between threads
11 #if defined(HAVE_EPOLL_WAIT) && ! MOG_LIBKQUEUE
12 #include "compat_epoll_pwait.h"
13 #include <sys/utsname.h>
16 * Detect old kernels with buggy EPOLL_CTL_MOD on SMP
17 * This issue is fixed by Linux commit 128dd1759d96ad36c379240f8b9463e8acfd37a1
18 * Remove this workaround around 2020 - 2023
20 static bool epoll_ctl_mod_buggy;
22 __attribute__((constructor)) static void epoll_ctl_mod_buggy_detect(void)
24 struct utsname buf;
25 unsigned version, patchlevel, sublevel, extra;
26 int rc;
29 * Online/current processors for this process is not enough,
30 * we need all processors since events may be triggered
31 * by interrupt handlers on any CPU in the system
33 unsigned long nproc = num_processors(NPROC_ALL);
35 /* Eric Wong's personal machines are ancient and weak: */
36 if (nproc == 1)
37 return;
39 CHECK(int, 0, uname(&buf));
41 /* who knows, maybe there'll be an epoll on other OSes one day */
42 if (strcmp(buf.sysname, "Linux"))
43 return;
45 rc = sscanf(buf.release, "%u.%u.%u", &version, &patchlevel, &sublevel);
46 if (rc != 3) {
47 warn("sscanf failed to parse kernel version: %s (rc=%d), "
48 "assuming EPOLL_CTL_MOD is buggy on SMP",
49 buf.release, rc);
50 epoll_ctl_mod_buggy = true;
51 return;
54 /* TODO: whitelist vendor kernels as fixes are backported */
55 if (version <= 2)
56 epoll_ctl_mod_buggy = true;
58 /* 2.6.32.61+ and 2.6.34.15+ are OK */
59 if (version == 2 && patchlevel == 6 &&
60 (sublevel == 32 || sublevel == 34)) {
61 rc = sscanf(buf.release, "%u.%u.%u.%u",
62 &version, &patchlevel, &sublevel, &extra);
63 if (rc != 4)
64 return;
66 if (sublevel == 32)
67 epoll_ctl_mod_buggy = extra < 61; /* 2.6.32.61+ */
68 else if (sublevel == 34)
69 epoll_ctl_mod_buggy = extra < 15; /* 2.6.34.15+ */
70 else
71 assert("buggy version check for 2.6.32.61/2.6.34.15");
74 if (version != 3)
75 return;
77 /* v3.8-rc2+ has this fix (don't care about v3.8-rc1) */
78 if (patchlevel >= 8)
79 return;
81 switch (patchlevel) {
82 case 0: /* v3.0.59+ are good */
83 epoll_ctl_mod_buggy = sublevel < 59;
84 return;
85 case 2: /* v3.2.37+ are good */
86 epoll_ctl_mod_buggy = sublevel < 37;
87 return;
88 case 4: /* v3.4.26+ are good */
89 epoll_ctl_mod_buggy = sublevel < 26;
90 return;
91 case 5: /* v3.5.7.3+ are good */
92 /* (extended stable) git://kernel.ubuntu.com/ubuntu/linux.git */
93 if (sublevel == 7) {
94 rc = sscanf(buf.release, "%u.%u.%u.%u",
95 &version, &patchlevel, &sublevel, &extra);
96 epoll_ctl_mod_buggy = (rc == 4) && (extra < 3);
97 } else {
98 epoll_ctl_mod_buggy = true;
100 /* v3.5.8 probably will not happen ... */
101 return;
102 case 7: /* v3.7.3+ are good */
103 epoll_ctl_mod_buggy = sublevel < 3;
104 return;
105 case 1: /* v3.1 seems abandoned */
106 case 3: /* v3.3 seems abandoned */
107 case 6: /* v3.6 seems abandoned */
108 epoll_ctl_mod_buggy = true;
112 struct mog_queue * mog_queue_new(void)
114 int size_hint = 666; /* hint, ignored in new kernels */
115 int epoll_fd = epoll_create(size_hint);
116 if (epoll_fd < 0) die_errno("epoll_create() failed");
118 return mog_queue_init(epoll_fd);
121 static struct mog_fd *
122 epoll_event_check(int rc, struct epoll_event *event)
124 struct mog_fd *mfd;
126 switch (rc) {
127 case 1:
128 mfd = event->data.ptr;
129 mog_fd_check_out(mfd);
130 return mfd;
131 case 0:
132 return NULL;
135 if (errno != EINTR)
136 /* rc could be > 1 if the kernel is broken :P */
137 die_errno("epoll_wait() failed with (%d)", rc);
139 return NULL;
143 * grabs one active event off the event queue
144 * epoll_wait() has "wake-one" behavior (like accept())
145 * to avoid thundering herd since 2007
147 struct mog_fd * mog_idleq_wait(struct mog_queue *q, int timeout)
149 int rc;
150 struct epoll_event event;
151 bool cancellable = timeout != 0;
153 if (cancellable)
154 mog_thr_test_quit();
156 /* epoll_wait is a cancellation point since glibc 2.4 */
157 rc = epoll_wait(q->queue_fd, &event, 1, timeout);
159 return epoll_event_check(rc, &event);
162 struct mog_fd * mog_idleq_wait_intr(struct mog_queue *q, int timeout)
164 int rc;
165 struct epoll_event event;
167 rc = epoll_pwait(q->queue_fd, &event, 1, timeout, &mog_emptyset);
168 return epoll_event_check(rc, &event);
171 MOG_NOINLINE static void
172 epoll_ctl_error(struct mog_queue *q, struct mog_fd *mfd)
174 switch (errno) {
175 case ENOMEM:
176 case ENOSPC:
177 syslog(LOG_ERR, "epoll_ctl: %m, dropping file descriptor");
178 mog_queue_drop(mfd);
179 return;
180 default:
181 syslog(LOG_ERR, "unhandled epoll_ctl() error: %m");
182 assert(0 && "BUG in our usage of epoll");
187 * Pushes in one mog_fd for epoll to watch.
189 * Only call this from the mog_accept_loop *or*
190 * if EAGAIN/EWOULDBLOCK is encountered in mog_queue_loop.
192 static void
193 idleq_mod(struct mog_queue *q, struct mog_fd *mfd, enum mog_qev ev, int op)
195 struct epoll_event event;
197 event.data.ptr = mfd;
198 event.events = (uint32_t)ev;
200 mog_fd_check_in(mfd);
201 if (epoll_ctl(q->queue_fd, op, mfd->fd, &event) != 0) {
202 mog_fd_check_out(mfd);
203 epoll_ctl_error(q, mfd);
207 void mog_idleq_add(struct mog_queue *q, struct mog_fd *mfd, enum mog_qev ev)
209 idleq_mod(q, mfd, ev, EPOLL_CTL_ADD);
213 * Workaround buggy EPOLL_CTL_MOD race by combining EPOLL_CTL_DEL
214 * and EPOLL_CTL_ADD for the same effect (with more syscall overhead)
216 static void
217 fake_epoll_ctl_mod(struct mog_queue *q, struct mog_fd *mfd, enum mog_qev ev)
219 struct epoll_event event;
221 if (epoll_ctl(q->queue_fd, EPOLL_CTL_DEL, mfd->fd, &event) == 0)
222 idleq_mod(q, mfd, ev, EPOLL_CTL_ADD);
223 else
224 epoll_ctl_error(q, mfd);
227 void mog_idleq_push(struct mog_queue *q, struct mog_fd *mfd, enum mog_qev ev)
229 if (epoll_ctl_mod_buggy)
230 fake_epoll_ctl_mod(q, mfd, ev);
231 else
232 idleq_mod(q, mfd, ev, EPOLL_CTL_MOD);
235 struct mog_fd *
236 mog_queue_xchg(struct mog_queue *q, struct mog_fd *mfd, enum mog_qev ev)
238 /* epoll need two (or three) syscalls to implement this */
239 mog_idleq_push(q, mfd, ev);
240 return mog_idleq_wait(q, -1);
242 #else /* ! HAVE_EPOLL_WAIT */
243 typedef int avoid_empty_file;
244 #endif /* ! HAVE_EPOLL_WAIT */