include/boost/corosio/native/detail/select/select_scheduler.hpp

86.4% Lines (140/162) 100.0% List of functions (10/10)
f(x) Functions (10)
Line TLA Hits Source Code
1 //
2 // Copyright (c) 2026 Steve Gerbino
3 //
4 // Distributed under the Boost Software License, Version 1.0. (See accompanying
5 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6 //
7 // Official repository: https://github.com/cppalliance/corosio
8 //
9
10 #ifndef BOOST_COROSIO_NATIVE_DETAIL_SELECT_SELECT_SCHEDULER_HPP
11 #define BOOST_COROSIO_NATIVE_DETAIL_SELECT_SELECT_SCHEDULER_HPP
12
13 #include <boost/corosio/detail/platform.hpp>
14
15 #if BOOST_COROSIO_HAS_SELECT
16
17 #include <boost/corosio/detail/config.hpp>
18 #include <boost/capy/ex/execution_context.hpp>
19
20 #include <boost/corosio/native/detail/reactor/reactor_scheduler.hpp>
21
22 #include <boost/corosio/native/detail/select/select_op.hpp>
23 #include <boost/corosio/detail/timer_service.hpp>
24 #include <boost/corosio/native/detail/make_err.hpp>
25 #include <boost/corosio/native/detail/posix/posix_resolver_service.hpp>
26 #include <boost/corosio/native/detail/posix/posix_signal_service.hpp>
27
28 #include <boost/corosio/detail/except.hpp>
29
30 #include <sys/select.h>
31 #include <unistd.h>
32 #include <errno.h>
33 #include <fcntl.h>
34
35 #include <atomic>
36 #include <chrono>
37 #include <cstdint>
38 #include <limits>
39 #include <mutex>
40 #include <unordered_map>
41
42 namespace boost::corosio::detail {
43
44 struct select_op;
45 struct select_descriptor_state;
46
47 /** POSIX scheduler using select() for I/O multiplexing.
48
49 This scheduler implements the scheduler interface using the POSIX select()
50 call for I/O event notification. It inherits the shared reactor threading
51 model from reactor_scheduler_base: signal state machine, inline completion
52 budget, work counting, and the do_one event loop.
53
54 The design mirrors epoll_scheduler for behavioral consistency:
55 - Same single-reactor thread coordination model
56 - Same deferred I/O pattern (reactor marks ready; workers do I/O)
57 - Same timer integration pattern
58
59 Known Limitations:
60 - FD_SETSIZE (~1024) limits maximum concurrent connections
61 - O(n) scanning: rebuilds fd_sets each iteration
62 - Level-triggered only (no edge-triggered mode)
63
64 @par Thread Safety
65 All public member functions are thread-safe.
66 */
67 class BOOST_COROSIO_DECL select_scheduler final : public reactor_scheduler_base
68 {
69 public:
70 /** Construct the scheduler.
71
72 Creates a self-pipe for reactor interruption.
73
74 @param ctx Reference to the owning execution_context.
75 @param concurrency_hint Hint for expected thread count (unused).
76 */
77 select_scheduler(capy::execution_context& ctx, int concurrency_hint = -1);
78
79 /// Destroy the scheduler.
80 ~select_scheduler() override;
81
82 select_scheduler(select_scheduler const&) = delete;
83 select_scheduler& operator=(select_scheduler const&) = delete;
84
85 /// Shut down the scheduler, draining pending operations.
86 void shutdown() override;
87
88 /** Return the maximum file descriptor value supported.
89
90 Returns FD_SETSIZE - 1, the maximum fd value that can be
91 monitored by select(). Operations with fd >= FD_SETSIZE
92 will fail with EINVAL.
93
94 @return The maximum supported file descriptor value.
95 */
96 static constexpr int max_fd() noexcept
97 {
98 return FD_SETSIZE - 1;
99 }
100
101 /** Register a descriptor for persistent monitoring.
102
103 The fd is added to the registered_descs_ map and will be
104 included in subsequent select() calls. The reactor is
105 interrupted so a blocked select() rebuilds its fd_sets.
106
107 @param fd The file descriptor to register.
108 @param desc Pointer to descriptor state for this fd.
109 */
110 void register_descriptor(int fd, select_descriptor_state* desc) const;
111
112 /** Deregister a persistently registered descriptor.
113
114 @param fd The file descriptor to deregister.
115 */
116 void deregister_descriptor(int fd) const;
117
118 /** Interrupt the reactor so it rebuilds its fd_sets.
119
120 Called when a write or connect op is registered after
121 the reactor's snapshot was taken. Without this, select()
122 may block not watching for writability on the fd.
123 */
124 void notify_reactor() const;
125
126 private:
127 void
128 run_task(std::unique_lock<std::mutex>& lock, context_type* ctx) override;
129 void interrupt_reactor() const override;
130 long calculate_timeout(long requested_timeout_us) const;
131
132 // Self-pipe for interrupting select()
133 int pipe_fds_[2]; // [0]=read, [1]=write
134
135 // Per-fd tracking for fd_set building
136 mutable std::unordered_map<int, select_descriptor_state*> registered_descs_;
137 mutable int max_fd_ = -1;
138 };
139
140 195x inline select_scheduler::select_scheduler(capy::execution_context& ctx, int)
141 195x : pipe_fds_{-1, -1}
142 195x , max_fd_(-1)
143 {
144 195x if (::pipe(pipe_fds_) < 0)
145 detail::throw_system_error(make_err(errno), "pipe");
146
147 585x for (int i = 0; i < 2; ++i)
148 {
149 390x int flags = ::fcntl(pipe_fds_[i], F_GETFL, 0);
150 390x if (flags == -1)
151 {
152 int errn = errno;
153 ::close(pipe_fds_[0]);
154 ::close(pipe_fds_[1]);
155 detail::throw_system_error(make_err(errn), "fcntl F_GETFL");
156 }
157 390x if (::fcntl(pipe_fds_[i], F_SETFL, flags | O_NONBLOCK) == -1)
158 {
159 int errn = errno;
160 ::close(pipe_fds_[0]);
161 ::close(pipe_fds_[1]);
162 detail::throw_system_error(make_err(errn), "fcntl F_SETFL");
163 }
164 390x if (::fcntl(pipe_fds_[i], F_SETFD, FD_CLOEXEC) == -1)
165 {
166 int errn = errno;
167 ::close(pipe_fds_[0]);
168 ::close(pipe_fds_[1]);
169 detail::throw_system_error(make_err(errn), "fcntl F_SETFD");
170 }
171 }
172
173 195x timer_svc_ = &get_timer_service(ctx, *this);
174 195x timer_svc_->set_on_earliest_changed(
175 4151x timer_service::callback(this, [](void* p) {
176 3956x static_cast<select_scheduler*>(p)->interrupt_reactor();
177 3956x }));
178
179 195x get_resolver_service(ctx, *this);
180 195x get_signal_service(ctx, *this);
181
182 195x completed_ops_.push(&task_op_);
183 195x }
184
185 390x inline select_scheduler::~select_scheduler()
186 {
187 195x if (pipe_fds_[0] >= 0)
188 195x ::close(pipe_fds_[0]);
189 195x if (pipe_fds_[1] >= 0)
190 195x ::close(pipe_fds_[1]);
191 390x }
192
193 inline void
194 195x select_scheduler::shutdown()
195 {
196 195x shutdown_drain();
197
198 195x if (pipe_fds_[1] >= 0)
199 195x interrupt_reactor();
200 195x }
201
202 inline void
203 7561x select_scheduler::register_descriptor(
204 int fd, select_descriptor_state* desc) const
205 {
206 7561x if (fd < 0 || fd >= FD_SETSIZE)
207 detail::throw_system_error(make_err(EINVAL), "select: fd out of range");
208
209 7561x desc->registered_events = reactor_event_read | reactor_event_write;
210 7561x desc->fd = fd;
211 7561x desc->scheduler_ = this;
212 7561x desc->ready_events_.store(0, std::memory_order_relaxed);
213
214 {
215 7561x std::lock_guard lock(desc->mutex);
216 7561x desc->impl_ref_.reset();
217 7561x desc->read_ready = false;
218 7561x desc->write_ready = false;
219 7561x }
220
221 {
222 7561x std::lock_guard lock(mutex_);
223 7561x registered_descs_[fd] = desc;
224 7561x if (fd > max_fd_)
225 7557x max_fd_ = fd;
226 7561x }
227
228 7561x interrupt_reactor();
229 7561x }
230
231 inline void
232 7561x select_scheduler::deregister_descriptor(int fd) const
233 {
234 7561x std::lock_guard lock(mutex_);
235
236 7561x auto it = registered_descs_.find(fd);
237 7561x if (it == registered_descs_.end())
238 return;
239
240 7561x registered_descs_.erase(it);
241
242 7561x if (fd == max_fd_)
243 {
244 7504x max_fd_ = pipe_fds_[0];
245 14923x for (auto& [registered_fd, state] : registered_descs_)
246 {
247 7419x if (registered_fd > max_fd_)
248 7410x max_fd_ = registered_fd;
249 }
250 }
251 7561x }
252
253 inline void
254 21448x select_scheduler::notify_reactor() const
255 {
256 21448x interrupt_reactor();
257 21448x }
258
259 inline void
260 33303x select_scheduler::interrupt_reactor() const
261 {
262 33303x char byte = 1;
263 33303x [[maybe_unused]] auto r = ::write(pipe_fds_[1], &byte, 1);
264 33303x }
265
266 inline long
267 209194x select_scheduler::calculate_timeout(long requested_timeout_us) const
268 {
269 209194x if (requested_timeout_us == 0)
270 return 0;
271
272 209194x auto nearest = timer_svc_->nearest_expiry();
273 209194x if (nearest == timer_service::time_point::max())
274 46x return requested_timeout_us;
275
276 209148x auto now = std::chrono::steady_clock::now();
277 209148x if (nearest <= now)
278 317x return 0;
279
280 auto timer_timeout_us =
281 208831x std::chrono::duration_cast<std::chrono::microseconds>(nearest - now)
282 208831x .count();
283
284 208831x constexpr auto long_max =
285 static_cast<long long>((std::numeric_limits<long>::max)());
286 auto capped_timer_us =
287 208831x (std::min)((std::max)(static_cast<long long>(timer_timeout_us),
288 208831x static_cast<long long>(0)),
289 208831x long_max);
290
291 208831x if (requested_timeout_us < 0)
292 208831x return static_cast<long>(capped_timer_us);
293
294 return static_cast<long>(
295 (std::min)(static_cast<long long>(requested_timeout_us),
296 capped_timer_us));
297 }
298
299 inline void
300 229813x select_scheduler::run_task(
301 std::unique_lock<std::mutex>& lock, context_type* ctx)
302 {
303 229813x long effective_timeout_us = task_interrupted_ ? 0 : calculate_timeout(-1);
304
305 // Snapshot registered descriptors while holding lock.
306 // Record which fds need write monitoring to avoid a hot loop:
307 // select is level-triggered so writable sockets (nearly always
308 // writable) would cause select() to return immediately every
309 // iteration if unconditionally added to write_fds.
310 struct fd_entry
311 {
312 int fd;
313 select_descriptor_state* desc;
314 bool needs_write;
315 };
316 fd_entry snapshot[FD_SETSIZE];
317 229813x int snapshot_count = 0;
318
319 670003x for (auto& [fd, desc] : registered_descs_)
320 {
321 440190x if (snapshot_count < FD_SETSIZE)
322 {
323 440190x std::lock_guard desc_lock(desc->mutex);
324 440190x snapshot[snapshot_count].fd = fd;
325 440190x snapshot[snapshot_count].desc = desc;
326 440190x snapshot[snapshot_count].needs_write =
327 440190x (desc->write_op || desc->connect_op);
328 440190x ++snapshot_count;
329 440190x }
330 }
331
332 229813x if (lock.owns_lock())
333 209194x lock.unlock();
334
335 229813x task_cleanup on_exit{this, &lock, ctx};
336
337 fd_set read_fds, write_fds, except_fds;
338 3906821x FD_ZERO(&read_fds);
339 3906821x FD_ZERO(&write_fds);
340 3906821x FD_ZERO(&except_fds);
341
342 229813x FD_SET(pipe_fds_[0], &read_fds);
343 229813x int nfds = pipe_fds_[0];
344
345 670003x for (int i = 0; i < snapshot_count; ++i)
346 {
347 440190x int fd = snapshot[i].fd;
348 440190x FD_SET(fd, &read_fds);
349 440190x if (snapshot[i].needs_write)
350 3727x FD_SET(fd, &write_fds);
351 440190x FD_SET(fd, &except_fds);
352 440190x if (fd > nfds)
353 229549x nfds = fd;
354 }
355
356 struct timeval tv;
357 229813x struct timeval* tv_ptr = nullptr;
358 229813x if (effective_timeout_us >= 0)
359 {
360 229767x tv.tv_sec = effective_timeout_us / 1000000;
361 229767x tv.tv_usec = effective_timeout_us % 1000000;
362 229767x tv_ptr = &tv;
363 }
364
365 229813x int ready = ::select(nfds + 1, &read_fds, &write_fds, &except_fds, tv_ptr);
366
367 // EINTR: signal interrupted select(), just retry.
368 // EBADF: an fd was closed between snapshot and select(); retry
369 // with a fresh snapshot from registered_descs_.
370 229813x if (ready < 0)
371 {
372 if (errno == EINTR || errno == EBADF)
373 return;
374 detail::throw_system_error(make_err(errno), "select");
375 }
376
377 // Process timers outside the lock
378 229813x timer_svc_->process_expired();
379
380 229813x op_queue local_ops;
381
382 229813x if (ready > 0)
383 {
384 215449x if (FD_ISSET(pipe_fds_[0], &read_fds))
385 {
386 char buf[256];
387 34496x while (::read(pipe_fds_[0], buf, sizeof(buf)) > 0)
388 {
389 }
390 }
391
392 634453x for (int i = 0; i < snapshot_count; ++i)
393 {
394 419004x int fd = snapshot[i].fd;
395 419004x select_descriptor_state* desc = snapshot[i].desc;
396
397 419004x std::uint32_t flags = 0;
398 419004x if (FD_ISSET(fd, &read_fds))
399 208852x flags |= reactor_event_read;
400 419004x if (FD_ISSET(fd, &write_fds))
401 3727x flags |= reactor_event_write;
402 419004x if (FD_ISSET(fd, &except_fds))
403 flags |= reactor_event_error;
404
405 419004x if (flags == 0)
406 206427x continue;
407
408 212577x desc->add_ready_events(flags);
409
410 212577x bool expected = false;
411 212577x if (desc->is_enqueued_.compare_exchange_strong(
412 expected, true, std::memory_order_release,
413 std::memory_order_relaxed))
414 {
415 212577x local_ops.push(desc);
416 }
417 }
418 }
419
420 229813x lock.lock();
421
422 229813x if (!local_ops.empty())
423 208852x completed_ops_.splice(local_ops);
424 229813x }
425
426 } // namespace boost::corosio::detail
427
428 #endif // BOOST_COROSIO_HAS_SELECT
429
430 #endif // BOOST_COROSIO_NATIVE_DETAIL_SELECT_SELECT_SCHEDULER_HPP
431