4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
28 #include <sys/types.h>
29 #include <sys/systm.h>
31 #include <sys/errno.h>
33 #include <sys/sysmacros.h>
34 #include <sys/debug.h>
35 #include <sys/poll_impl.h>
36 #include <sys/port_impl.h>
38 #define PORTHASH_START 256 /* start cache space for events */
39 #define PORTHASH_MULT 2 /* growth threshold and factor */
42 static int port_fd_callback(void *, int *, pid_t
, int, void *);
43 static int port_bind_pollhead(pollhead_t
**, polldat_t
*, short *);
44 static void port_close_sourcefd(void *, int, pid_t
, int);
45 static void port_cache_insert_fd(port_fdcache_t
*, polldat_t
*);
49 * The event port framework uses callback functions to notify associated
50 * event sources about actions on source specific objects.
51 * The source itself defines the "arg" required to identify the object with
52 * events. In the port_fd_callback() case the "arg" is a pointer to portfd_t
53 * structure. The portfd_t structure is specific for PORT_SOURCE_FD source.
54 * The port_fd_callback() function is notified in three cases:
55 * - PORT_CALLBACK_DEFAULT
56 * The object (fd) will be delivered to the application.
57 * - PORT_CALLBACK_DISSOCIATE
58 * The object (fd) will be dissociated from the port.
59 * - PORT_CALLBACK_CLOSE
60 * The object (fd) will be dissociated from the port because the port
62 * A fd is shareable between processes only when
63 * - processes have the same fd id and
64 * - processes have the same fp.
65 * A fd becomes shareable:
66 * - on fork() across parent and child process and
67 * - when I_SENDFD is used to pass file descriptors between parent and child
68 * immediately after fork() (the sender and receiver must get the same
69 * file descriptor id).
70 * If a fd is shared between processes, all involved processes will get
71 * the same rights related to re-association of the fd with the port and
72 * retrieve of events from that fd.
73 * The process which associated the fd with a port for the first time
74 * becomes also the owner of the association. Only the owner of the
75 * association is allowed to dissociate the fd from the port.
79 port_fd_callback(void *arg
, int *events
, pid_t pid
, int flag
, void *evp
)
81 portfd_t
*pfd
= (portfd_t
*)arg
;
82 polldat_t
*pdp
= PFTOD(pfd
);
87 ASSERT((pdp
!= NULL
) && (events
!= NULL
));
89 case PORT_CALLBACK_DEFAULT
:
90 if (curproc
->p_pid
!= pid
) {
92 * Check if current process is allowed to retrieve
93 * events from this fd.
95 fp
= getf(pdp
->pd_fd
);
97 error
= EACCES
; /* deny delivery of events */
100 releasef(pdp
->pd_fd
);
101 if (fp
!= pdp
->pd_fp
) {
102 error
= EACCES
; /* deny delivery of events */
106 *events
= pdp
->pd_portev
->portkev_events
; /* update events */
109 case PORT_CALLBACK_DISSOCIATE
:
112 case PORT_CALLBACK_CLOSE
:
113 /* remove polldat/portfd struct */
114 pdp
->pd_portev
= NULL
;
115 pcp
= (port_fdcache_t
*)pdp
->pd_pcache
;
116 mutex_enter(&pcp
->pc_lock
);
119 if (pdp
->pd_php
!= NULL
) {
120 pollhead_delete(pdp
->pd_php
, pdp
);
123 port_pcache_remove_fd(pcp
, pfd
);
124 mutex_exit(&pcp
->pc_lock
);
135 * This routine returns a pointer to a cached poll fd entry, or NULL if it
136 * does not find it in the hash table.
137 * The fd is used as index.
138 * The fd and the fp are used to detect a valid entry.
139 * This function returns a pointer to a valid portfd_t structure only when
140 * the fd and the fp in the args match the entries in polldat_t.
143 port_cache_lookup_fp(port_fdcache_t
*pcp
, int fd
, file_t
*fp
)
148 ASSERT(MUTEX_HELD(&pcp
->pc_lock
));
149 bucket
= PORT_FD_BUCKET(pcp
, fd
);
150 pdp
= PFTOD(*bucket
);
151 while (pdp
!= NULL
) {
152 if (pdp
->pd_fd
== fd
&& pdp
->pd_fp
== fp
)
154 pdp
= pdp
->pd_hashnext
;
160 * port_associate_fd()
161 * This function associates new file descriptors with a port or
162 * reactivate already associated file descriptors.
163 * The reactivation also updates the events types to be checked and the
164 * attached user pointer.
165 * Per port a cache is used to store associated file descriptors.
166 * Internally the fop_poll interface is used to poll for existing events.
167 * The fop_poll interface can also deliver a pointer to a pollhead_t structure
168 * which is used to enqueue polldat_t structures with pending events.
169 * If fop_poll immediately returns valid events (revents) then those events
170 * will be submitted to the event port with port_send_event().
171 * Otherwise fop_poll does not return events but it delivers a pointer to a
172 * pollhead_t structure. In such a case the corresponding file system behind
173 * fop_poll will use the pollwakeup() function to notify about existing
177 port_associate_fd(port_t
*pp
, int source
, uintptr_t object
, int events
,
182 struct pollhead
*php
= NULL
;
186 port_kevent_t
*pkevp
;
191 pcp
= pp
->port_queue
.portq_pcp
;
192 if (object
> (uintptr_t)INT_MAX
)
197 if ((fp
= getf(fd
)) == NULL
)
200 mutex_enter(&pcp
->pc_lock
);
202 if (pcp
->pc_hash
== NULL
) {
204 * This is the first time that a fd is being associated with
206 * - create PORT_SOURCE_FD cache
207 * - associate PORT_SOURCE_FD source with the port
209 error
= port_associate_ksource(pp
->port_fd
, PORT_SOURCE_FD
,
210 NULL
, port_close_sourcefd
, pp
, NULL
);
212 mutex_exit(&pcp
->pc_lock
);
217 /* create polldat cache */
218 pcp
->pc_hashsize
= PORTHASH_START
;
219 pcp
->pc_hash
= kmem_zalloc(pcp
->pc_hashsize
*
220 sizeof (portfd_t
*), KM_SLEEP
);
223 /* Check if the fd/fp is already associated with the port */
224 pfd
= port_cache_lookup_fp(pcp
, fd
, fp
);
230 * Allocate a polldat_t structure per fd
231 * The use of the polldat_t structure to cache file descriptors
232 * is required to be able to share the pollwakeup() function
233 * with poll(2) and devpoll(7d).
235 pfd
= kmem_zalloc(sizeof (portfd_t
), KM_SLEEP
);
239 pdp
->pd_pcache
= (void *)pcp
;
241 /* Allocate a port event structure per fd */
242 error
= port_alloc_event_local(pp
, source
, PORT_ALLOC_CACHED
,
245 kmem_free(pfd
, sizeof (portfd_t
));
247 mutex_exit(&pcp
->pc_lock
);
250 pkevp
= pdp
->pd_portev
;
251 pkevp
->portkev_callback
= port_fd_callback
;
252 pkevp
->portkev_arg
= pfd
;
254 /* add portfd_t entry to the cache */
255 port_cache_insert_fd(pcp
, pdp
);
256 pkevp
->portkev_object
= fd
;
257 pkevp
->portkev_user
= user
;
260 * Add current port to the file descriptor interested list
261 * The members of the list are notified when the file descriptor
267 * The file descriptor is already associated with the port
270 pkevp
= pdp
->pd_portev
;
273 * Check if the re-association happens before the last
274 * submitted event of the file descriptor was retrieved.
275 * Clear the PORT_KEV_VALID flag if set. No new events
276 * should get submitted after this flag is cleared.
278 mutex_enter(&pkevp
->portkev_lock
);
279 if (pkevp
->portkev_flags
& PORT_KEV_VALID
) {
280 pkevp
->portkev_flags
&= ~PORT_KEV_VALID
;
282 if (pkevp
->portkev_flags
& PORT_KEV_DONEQ
) {
283 mutex_exit(&pkevp
->portkev_lock
);
285 * Remove any events that where already fired
286 * for this fd and are still in the port queue.
288 (void) port_remove_done_event(pkevp
);
290 mutex_exit(&pkevp
->portkev_lock
);
292 pkevp
->portkev_user
= user
;
295 pfd
->pfd_thread
= curthread
;
296 mutex_enter(&pkevp
->portkev_lock
);
297 pkevp
->portkev_events
= 0; /* no fired events */
298 pdp
->pd_events
= events
; /* events associated */
302 pkevp
->portkev_flags
|= PORT_KEV_VALID
;
303 mutex_exit(&pkevp
->portkev_lock
);
306 * do fop_poll and cache this poll fd.
308 * XXX - pollrelock() logic needs to know
309 * which pollcache lock to grab. It'd be a
310 * cleaner solution if we could pass pcp as
311 * an arguement in fop_poll interface instead
312 * of implicitly passing it using thread_t
313 * struct. On the other hand, changing fop_poll
314 * interface will require all driver/file system
315 * poll routine to change.
317 curthread
->t_pollcache
= (pollcache_t
*)pcp
;
318 error
= fop_poll(fp
->f_vnode
, events
, 0, &revents
, &php
, NULL
);
319 curthread
->t_pollcache
= NULL
;
322 * The pc_lock can get dropped and reaquired in fop_poll.
323 * In the window pc_lock is dropped another thread in
324 * port_dissociate can remove the pfd from the port cache
326 * It is also possible for another thread to sneak in and do a
327 * port_associate on the same fd during the same window.
328 * For both these cases return the current value of error.
329 * The application should take care to ensure that the threads
330 * do not race with each other for association and disassociation
333 if (((pfd
= port_cache_lookup_fp(pcp
, fd
, fp
)) == NULL
) ||
334 (pfd
->pfd_thread
!= curthread
)) {
336 mutex_exit(&pcp
->pc_lock
);
341 * To keep synchronization between fop_poll above and
342 * pollhead_insert below, it is necessary to
343 * call fop_poll() again (see port_bind_pollhead()).
349 if (php
!= NULL
&& (pdp
->pd_php
!= php
)) {
351 * No events delivered yet.
352 * Bind pollhead pointer with current polldat_t structure.
353 * Sub-system will call pollwakeup() later with php as
356 error
= port_bind_pollhead(&php
, pdp
, &revents
);
358 * The pc_lock can get dropped and reaquired in fop_poll.
359 * In the window pc_lock is dropped another thread in
360 * port_dissociate can remove the pfd from the port cache
362 * It is also possible for another thread to sneak in and do a
363 * port_associate on the same fd during the same window.
364 * For both these cases return the current value of error.
365 * The application should take care to ensure that the threads
366 * do not race with each other for association
367 * and disassociation of the same fd.
369 if (((pfd
= port_cache_lookup_fp(pcp
, fd
, fp
)) == NULL
) ||
370 (pfd
->pfd_thread
!= curthread
)) {
372 mutex_exit(&pcp
->pc_lock
);
382 * Check if new events where detected and no events have been
383 * delivered. The revents was already set after the fop_poll
384 * above or it was updated in port_bind_pollhead().
386 mutex_enter(&pkevp
->portkev_lock
);
387 if (revents
&& (pkevp
->portkev_flags
& PORT_KEV_VALID
)) {
388 ASSERT((pkevp
->portkev_flags
& PORT_KEV_DONEQ
) == 0);
389 pkevp
->portkev_flags
&= ~PORT_KEV_VALID
;
390 revents
= revents
& (pdp
->pd_events
| POLLHUP
| POLLERR
);
391 /* send events to the event port */
392 pkevp
->portkev_events
= revents
;
394 * port_send_event will release the portkev_lock mutex.
396 port_send_event(pkevp
);
398 mutex_exit(&pkevp
->portkev_lock
);
402 mutex_exit(&pcp
->pc_lock
);
408 * If the portkev is not valid, then an event was
411 * If an event was delivered and got picked up, then
412 * we return error = 0 treating this as a successful
413 * port associate call. The thread which received
414 * the event gets control of the object.
417 mutex_enter(&pkevp
->portkev_lock
);
418 if (pkevp
->portkev_flags
& PORT_KEV_VALID
) {
419 pkevp
->portkev_flags
&= ~PORT_KEV_VALID
;
422 mutex_exit(&pkevp
->portkev_lock
);
424 if (!port_remove_fd_object(pfd
, pp
, pcp
) && !active
) {
428 mutex_exit(&pcp
->pc_lock
);
433 * The port_dissociate_fd() function dissociates the delivered file
434 * descriptor from the event port and removes already fired events.
435 * If a fd is shared between processes, all involved processes will get
436 * the same rights related to re-association of the fd with the port and
437 * retrieve of events from that fd.
438 * The process which associated the fd with a port for the first time
439 * becomes also the owner of the association. Only the owner of the
440 * association is allowed to dissociate the fd from the port.
443 port_dissociate_fd(port_t
*pp
, uintptr_t object
)
450 port_kevent_t
*pkevp
;
452 if (object
> (uintptr_t)INT_MAX
)
456 pcp
= pp
->port_queue
.portq_pcp
;
458 mutex_enter(&pcp
->pc_lock
);
459 if (pcp
->pc_hash
== NULL
) {
460 /* no file descriptor cache available */
461 mutex_exit(&pcp
->pc_lock
);
464 if ((fp
= getf(fd
)) == NULL
) {
465 mutex_exit(&pcp
->pc_lock
);
468 pfd
= port_cache_lookup_fp(pcp
, fd
, fp
);
471 mutex_exit(&pcp
->pc_lock
);
474 /* only association owner is allowed to remove the association */
475 if (curproc
->p_pid
!= PFTOD(pfd
)->pd_portev
->portkev_pid
) {
477 mutex_exit(&pcp
->pc_lock
);
481 /* remove port from the file descriptor interested list */
485 * Deactivate the association. No events get posted after
488 pkevp
= PFTOD(pfd
)->pd_portev
;
489 mutex_enter(&pkevp
->portkev_lock
);
490 if (pkevp
->portkev_flags
& PORT_KEV_VALID
) {
491 pkevp
->portkev_flags
&= ~PORT_KEV_VALID
;
496 mutex_exit(&pkevp
->portkev_lock
);
498 /* remove polldat & port event structure */
499 if (port_remove_fd_object(pfd
, pp
, pcp
)) {
501 * An event was found and removed from the
502 * port done queue. This means the event has not yet
503 * been retrived. In this case we treat this as an active
510 mutex_exit(&pcp
->pc_lock
);
513 * Return ENOENT if there was no active association.
515 return ((active
? 0 : ENOENT
));
519 * Associate event port polldat_t structure with sub-system pointer to
520 * a polhead_t structure.
523 port_bind_pollhead(pollhead_t
**php
, polldat_t
*pdp
, short *revents
)
528 /* polldat_t associated with another pollhead_t pointer */
529 if (pdp
->pd_php
!= NULL
)
530 pollhead_delete(pdp
->pd_php
, pdp
);
533 * Before pollhead_insert() pollwakeup() will not detect a polldat
534 * entry in the ph_list and the event notification will disappear.
535 * This happens because polldat_t is still not associated with
536 * the pointer to the pollhead_t structure.
538 pollhead_insert(*php
, pdp
);
541 * From now on event notification can be detected in pollwakeup(),
542 * Use fop_poll() again to check the current status of the event.
546 curthread
->t_pollcache
= (pollcache_t
*)pdp
->pd_pcache
;
547 error
= fop_poll(fp
->f_vnode
, pdp
->pd_events
, 0, revents
, php
, NULL
);
548 curthread
->t_pollcache
= NULL
;
553 * Grow the hash table. Rehash all the elements on the hash table.
556 port_cache_grow_hashtbl(port_fdcache_t
*pcp
)
565 ASSERT(MUTEX_HELD(&pcp
->pc_lock
));
566 oldsize
= pcp
->pc_hashsize
;
567 oldtbl
= pcp
->pc_hash
;
568 pcp
->pc_hashsize
*= PORTHASH_MULT
;
569 pcp
->pc_hash
= kmem_zalloc(pcp
->pc_hashsize
* sizeof (portfd_t
*),
572 * rehash existing elements
575 for (i
= 0; i
< oldsize
; i
++) {
578 while (pdp
!= NULL
) {
579 pdp1
= pdp
->pd_hashnext
;
580 port_cache_insert_fd(pcp
, pdp
);
584 kmem_free(oldtbl
, oldsize
* sizeof (portfd_t
*));
587 * This routine inserts a polldat into the portcache's hash table. It
588 * may be necessary to grow the size of the hash table.
591 port_cache_insert_fd(port_fdcache_t
*pcp
, polldat_t
*pdp
)
595 ASSERT(MUTEX_HELD(&pcp
->pc_lock
));
596 if (pcp
->pc_fdcount
> (pcp
->pc_hashsize
* PORTHASH_MULT
))
597 port_cache_grow_hashtbl(pcp
);
598 bucket
= PORT_FD_BUCKET(pcp
, pdp
->pd_fd
);
599 pdp
->pd_hashnext
= PFTOD(*bucket
);
600 *bucket
= PDTOF(pdp
);
606 * The port_remove_portfd() function dissociates the port from the fd
610 port_remove_portfd(polldat_t
*pdp
, port_fdcache_t
*pcp
)
616 ASSERT(MUTEX_HELD(&pcp
->pc_lock
));
617 pp
= pdp
->pd_portev
->portkev_port
;
618 fp
= getf(fd
= pdp
->pd_fd
);
620 * If we did not get the fp for pd_fd but its portfd_t
621 * still exist in the cache, it means the pd_fd is being
622 * closed by some other thread which will also free the portfd_t.
625 delfd_port(pdp
->pd_fd
, PDTOF(pdp
));
626 (void) port_remove_fd_object(PDTOF(pdp
), pp
, pcp
);
632 * This function is used by port_close_sourcefd() to destroy the cache
636 port_pcache_destroy(port_fdcache_t
*pcp
)
638 ASSERT(pcp
->pc_fdcount
== 0);
639 kmem_free(pcp
->pc_hash
, sizeof (polldat_t
*) * pcp
->pc_hashsize
);
640 mutex_destroy(&pcp
->pc_lock
);
641 kmem_free(pcp
, sizeof (port_fdcache_t
));
645 * port_close() calls this function to request the PORT_SOURCE_FD source
646 * to remove/free all resources allocated and associated with the port.
650 port_close_sourcefd(void *arg
, int port
, pid_t pid
, int lastclose
)
659 pcp
= pp
->port_queue
.portq_pcp
;
661 /* no cache available -> nothing to do */
664 mutex_enter(&pcp
->pc_lock
);
666 * Scan the cache and free all allocated portfd_t and port_kevent_t
669 hashtbl
= pcp
->pc_hash
;
670 for (index
= 0; index
< pcp
->pc_hashsize
; index
++) {
671 for (pdp
= PFTOD(hashtbl
[index
]); pdp
!= NULL
; pdp
= pdpnext
) {
672 pdpnext
= pdp
->pd_hashnext
;
673 if (pid
== pdp
->pd_portev
->portkev_pid
) {
675 * remove polldat + port_event_t from cache
676 * only when current process did the
679 port_remove_portfd(pdp
, pcp
);
685 * Wait for all the portfd's to be freed.
686 * The remaining portfd_t's are the once we did not
687 * free in port_remove_portfd since some other thread
688 * is closing the fd. These threads will free the portfd_t's
689 * once we drop the pc_lock mutex.
691 while (pcp
->pc_fdcount
) {
692 (void) cv_wait_sig(&pcp
->pc_lclosecv
, &pcp
->pc_lock
);
694 /* event port vnode will be destroyed -> remove everything */
695 pp
->port_queue
.portq_pcp
= NULL
;
697 mutex_exit(&pcp
->pc_lock
);
700 * pollwakeup() can not further interact with this cache
701 * (all polldat structs are removed from pollhead entries).
704 port_pcache_destroy(pcp
);