Merge commit '2cedd8f0ecbd2b29bf0aac72bb8b7413b0326938' into merges
[unleashed.git] / kernel / fs / portfs / port_fd.c
blob6351e4c5abfbd0ffc34d1ee8db00a619ac593d98
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
28 #include <sys/types.h>
29 #include <sys/systm.h>
30 #include <sys/stat.h>
31 #include <sys/errno.h>
32 #include <sys/kmem.h>
33 #include <sys/sysmacros.h>
34 #include <sys/debug.h>
35 #include <sys/poll_impl.h>
36 #include <sys/port_impl.h>
38 #define PORTHASH_START 256 /* start cache space for events */
39 #define PORTHASH_MULT 2 /* growth threshold and factor */
41 /* local functions */
42 static int port_fd_callback(void *, int *, pid_t, int, void *);
43 static int port_bind_pollhead(pollhead_t **, polldat_t *, short *);
44 static void port_close_sourcefd(void *, int, pid_t, int);
45 static void port_cache_insert_fd(port_fdcache_t *, polldat_t *);
48 * port_fd_callback()
49 * The event port framework uses callback functions to notify associated
50 * event sources about actions on source specific objects.
51 * The source itself defines the "arg" required to identify the object with
52 * events. In the port_fd_callback() case the "arg" is a pointer to portfd_t
53 * structure. The portfd_t structure is specific for PORT_SOURCE_FD source.
54 * The port_fd_callback() function is notified in three cases:
55 * - PORT_CALLBACK_DEFAULT
56 * The object (fd) will be delivered to the application.
57 * - PORT_CALLBACK_DISSOCIATE
58 * The object (fd) will be dissociated from the port.
59 * - PORT_CALLBACK_CLOSE
60 * The object (fd) will be dissociated from the port because the port
61 * is being closed.
62 * A fd is shareable between processes only when
63 * - processes have the same fd id and
64 * - processes have the same fp.
65 * A fd becomes shareable:
66 * - on fork() across parent and child process and
67 * - when I_SENDFD is used to pass file descriptors between parent and child
68 * immediately after fork() (the sender and receiver must get the same
69 * file descriptor id).
70 * If a fd is shared between processes, all involved processes will get
71 * the same rights related to re-association of the fd with the port and
72 * retrieve of events from that fd.
73 * The process which associated the fd with a port for the first time
74 * becomes also the owner of the association. Only the owner of the
75 * association is allowed to dissociate the fd from the port.
77 /* ARGSUSED */
78 static int
79 port_fd_callback(void *arg, int *events, pid_t pid, int flag, void *evp)
81 portfd_t *pfd = (portfd_t *)arg;
82 polldat_t *pdp = PFTOD(pfd);
83 port_fdcache_t *pcp;
84 file_t *fp;
85 int error;
87 ASSERT((pdp != NULL) && (events != NULL));
88 switch (flag) {
89 case PORT_CALLBACK_DEFAULT:
90 if (curproc->p_pid != pid) {
92 * Check if current process is allowed to retrieve
93 * events from this fd.
95 fp = getf(pdp->pd_fd);
96 if (fp == NULL) {
97 error = EACCES; /* deny delivery of events */
98 break;
100 releasef(pdp->pd_fd);
101 if (fp != pdp->pd_fp) {
102 error = EACCES; /* deny delivery of events */
103 break;
106 *events = pdp->pd_portev->portkev_events; /* update events */
107 error = 0;
108 break;
109 case PORT_CALLBACK_DISSOCIATE:
110 error = 0;
111 break;
112 case PORT_CALLBACK_CLOSE:
113 /* remove polldat/portfd struct */
114 pdp->pd_portev = NULL;
115 pcp = (port_fdcache_t *)pdp->pd_pcache;
116 mutex_enter(&pcp->pc_lock);
117 pdp->pd_fp = NULL;
118 pdp->pd_events = 0;
119 if (pdp->pd_php != NULL) {
120 pollhead_delete(pdp->pd_php, pdp);
121 pdp->pd_php = NULL;
123 port_pcache_remove_fd(pcp, pfd);
124 mutex_exit(&pcp->pc_lock);
125 error = 0;
126 break;
127 default:
128 error = EINVAL;
129 break;
131 return (error);
135 * This routine returns a pointer to a cached poll fd entry, or NULL if it
136 * does not find it in the hash table.
137 * The fd is used as index.
138 * The fd and the fp are used to detect a valid entry.
139 * This function returns a pointer to a valid portfd_t structure only when
140 * the fd and the fp in the args match the entries in polldat_t.
142 portfd_t *
143 port_cache_lookup_fp(port_fdcache_t *pcp, int fd, file_t *fp)
145 polldat_t *pdp;
146 portfd_t **bucket;
148 ASSERT(MUTEX_HELD(&pcp->pc_lock));
149 bucket = PORT_FD_BUCKET(pcp, fd);
150 pdp = PFTOD(*bucket);
151 while (pdp != NULL) {
152 if (pdp->pd_fd == fd && pdp->pd_fp == fp)
153 break;
154 pdp = pdp->pd_hashnext;
156 return (PDTOF(pdp));
160 * port_associate_fd()
161 * This function associates new file descriptors with a port or
162 * reactivate already associated file descriptors.
163 * The reactivation also updates the events types to be checked and the
164 * attached user pointer.
165 * Per port a cache is used to store associated file descriptors.
166 * Internally the fop_poll interface is used to poll for existing events.
167 * The fop_poll interface can also deliver a pointer to a pollhead_t structure
168 * which is used to enqueue polldat_t structures with pending events.
169 * If fop_poll immediately returns valid events (revents) then those events
170 * will be submitted to the event port with port_send_event().
171 * Otherwise fop_poll does not return events but it delivers a pointer to a
172 * pollhead_t structure. In such a case the corresponding file system behind
173 * fop_poll will use the pollwakeup() function to notify about existing
174 * events.
177 port_associate_fd(port_t *pp, int source, uintptr_t object, int events,
178 void *user)
180 port_fdcache_t *pcp;
181 int fd;
182 struct pollhead *php = NULL;
183 portfd_t *pfd;
184 polldat_t *pdp;
185 file_t *fp;
186 port_kevent_t *pkevp;
187 short revents;
188 int error = 0;
189 int active;
191 pcp = pp->port_queue.portq_pcp;
192 if (object > (uintptr_t)INT_MAX)
193 return (EBADFD);
195 fd = object;
197 if ((fp = getf(fd)) == NULL)
198 return (EBADFD);
200 mutex_enter(&pcp->pc_lock);
202 if (pcp->pc_hash == NULL) {
204 * This is the first time that a fd is being associated with
205 * the current port:
206 * - create PORT_SOURCE_FD cache
207 * - associate PORT_SOURCE_FD source with the port
209 error = port_associate_ksource(pp->port_fd, PORT_SOURCE_FD,
210 NULL, port_close_sourcefd, pp, NULL);
211 if (error) {
212 mutex_exit(&pcp->pc_lock);
213 releasef(fd);
214 return (error);
217 /* create polldat cache */
218 pcp->pc_hashsize = PORTHASH_START;
219 pcp->pc_hash = kmem_zalloc(pcp->pc_hashsize *
220 sizeof (portfd_t *), KM_SLEEP);
221 pfd = NULL;
222 } else {
223 /* Check if the fd/fp is already associated with the port */
224 pfd = port_cache_lookup_fp(pcp, fd, fp);
227 if (pfd == NULL) {
229 * new entry
230 * Allocate a polldat_t structure per fd
231 * The use of the polldat_t structure to cache file descriptors
232 * is required to be able to share the pollwakeup() function
233 * with poll(2) and devpoll(7d).
235 pfd = kmem_zalloc(sizeof (portfd_t), KM_SLEEP);
236 pdp = PFTOD(pfd);
237 pdp->pd_fd = fd;
238 pdp->pd_fp = fp;
239 pdp->pd_pcache = (void *)pcp;
241 /* Allocate a port event structure per fd */
242 error = port_alloc_event_local(pp, source, PORT_ALLOC_CACHED,
243 &pdp->pd_portev);
244 if (error) {
245 kmem_free(pfd, sizeof (portfd_t));
246 releasef(fd);
247 mutex_exit(&pcp->pc_lock);
248 return (error);
250 pkevp = pdp->pd_portev;
251 pkevp->portkev_callback = port_fd_callback;
252 pkevp->portkev_arg = pfd;
254 /* add portfd_t entry to the cache */
255 port_cache_insert_fd(pcp, pdp);
256 pkevp->portkev_object = fd;
257 pkevp->portkev_user = user;
260 * Add current port to the file descriptor interested list
261 * The members of the list are notified when the file descriptor
262 * is closed.
264 addfd_port(fd, pfd);
265 } else {
267 * The file descriptor is already associated with the port
269 pdp = PFTOD(pfd);
270 pkevp = pdp->pd_portev;
273 * Check if the re-association happens before the last
274 * submitted event of the file descriptor was retrieved.
275 * Clear the PORT_KEV_VALID flag if set. No new events
276 * should get submitted after this flag is cleared.
278 mutex_enter(&pkevp->portkev_lock);
279 if (pkevp->portkev_flags & PORT_KEV_VALID) {
280 pkevp->portkev_flags &= ~PORT_KEV_VALID;
282 if (pkevp->portkev_flags & PORT_KEV_DONEQ) {
283 mutex_exit(&pkevp->portkev_lock);
285 * Remove any events that where already fired
286 * for this fd and are still in the port queue.
288 (void) port_remove_done_event(pkevp);
289 } else {
290 mutex_exit(&pkevp->portkev_lock);
292 pkevp->portkev_user = user;
295 pfd->pfd_thread = curthread;
296 mutex_enter(&pkevp->portkev_lock);
297 pkevp->portkev_events = 0; /* no fired events */
298 pdp->pd_events = events; /* events associated */
300 * allow new events.
302 pkevp->portkev_flags |= PORT_KEV_VALID;
303 mutex_exit(&pkevp->portkev_lock);
306 * do fop_poll and cache this poll fd.
308 * XXX - pollrelock() logic needs to know
309 * which pollcache lock to grab. It'd be a
310 * cleaner solution if we could pass pcp as
311 * an arguement in fop_poll interface instead
312 * of implicitly passing it using thread_t
313 * struct. On the other hand, changing fop_poll
314 * interface will require all driver/file system
315 * poll routine to change.
317 curthread->t_pollcache = (pollcache_t *)pcp;
318 error = fop_poll(fp->f_vnode, events, 0, &revents, &php, NULL);
319 curthread->t_pollcache = NULL;
322 * The pc_lock can get dropped and reaquired in fop_poll.
323 * In the window pc_lock is dropped another thread in
324 * port_dissociate can remove the pfd from the port cache
325 * and free the pfd.
326 * It is also possible for another thread to sneak in and do a
327 * port_associate on the same fd during the same window.
328 * For both these cases return the current value of error.
329 * The application should take care to ensure that the threads
330 * do not race with each other for association and disassociation
331 * of the same fd.
333 if (((pfd = port_cache_lookup_fp(pcp, fd, fp)) == NULL) ||
334 (pfd->pfd_thread != curthread)) {
335 releasef(fd);
336 mutex_exit(&pcp->pc_lock);
337 return (error);
341 * To keep synchronization between fop_poll above and
342 * pollhead_insert below, it is necessary to
343 * call fop_poll() again (see port_bind_pollhead()).
345 if (error) {
346 goto errout;
349 if (php != NULL && (pdp->pd_php != php)) {
351 * No events delivered yet.
352 * Bind pollhead pointer with current polldat_t structure.
353 * Sub-system will call pollwakeup() later with php as
354 * argument.
356 error = port_bind_pollhead(&php, pdp, &revents);
358 * The pc_lock can get dropped and reaquired in fop_poll.
359 * In the window pc_lock is dropped another thread in
360 * port_dissociate can remove the pfd from the port cache
361 * and free the pfd.
362 * It is also possible for another thread to sneak in and do a
363 * port_associate on the same fd during the same window.
364 * For both these cases return the current value of error.
365 * The application should take care to ensure that the threads
366 * do not race with each other for association
367 * and disassociation of the same fd.
369 if (((pfd = port_cache_lookup_fp(pcp, fd, fp)) == NULL) ||
370 (pfd->pfd_thread != curthread)) {
371 releasef(fd);
372 mutex_exit(&pcp->pc_lock);
373 return (error);
376 if (error) {
377 goto errout;
382 * Check if new events where detected and no events have been
383 * delivered. The revents was already set after the fop_poll
384 * above or it was updated in port_bind_pollhead().
386 mutex_enter(&pkevp->portkev_lock);
387 if (revents && (pkevp->portkev_flags & PORT_KEV_VALID)) {
388 ASSERT((pkevp->portkev_flags & PORT_KEV_DONEQ) == 0);
389 pkevp->portkev_flags &= ~PORT_KEV_VALID;
390 revents = revents & (pdp->pd_events | POLLHUP | POLLERR);
391 /* send events to the event port */
392 pkevp->portkev_events = revents;
394 * port_send_event will release the portkev_lock mutex.
396 port_send_event(pkevp);
397 } else {
398 mutex_exit(&pkevp->portkev_lock);
401 releasef(fd);
402 mutex_exit(&pcp->pc_lock);
403 return (error);
405 errout:
406 delfd_port(fd, pfd);
408 * If the portkev is not valid, then an event was
409 * delivered.
411 * If an event was delivered and got picked up, then
412 * we return error = 0 treating this as a successful
413 * port associate call. The thread which received
414 * the event gets control of the object.
416 active = 0;
417 mutex_enter(&pkevp->portkev_lock);
418 if (pkevp->portkev_flags & PORT_KEV_VALID) {
419 pkevp->portkev_flags &= ~PORT_KEV_VALID;
420 active = 1;
422 mutex_exit(&pkevp->portkev_lock);
424 if (!port_remove_fd_object(pfd, pp, pcp) && !active) {
425 error = 0;
427 releasef(fd);
428 mutex_exit(&pcp->pc_lock);
429 return (error);
433 * The port_dissociate_fd() function dissociates the delivered file
434 * descriptor from the event port and removes already fired events.
435 * If a fd is shared between processes, all involved processes will get
436 * the same rights related to re-association of the fd with the port and
437 * retrieve of events from that fd.
438 * The process which associated the fd with a port for the first time
439 * becomes also the owner of the association. Only the owner of the
440 * association is allowed to dissociate the fd from the port.
443 port_dissociate_fd(port_t *pp, uintptr_t object)
445 int fd;
446 port_fdcache_t *pcp;
447 portfd_t *pfd;
448 file_t *fp;
449 int active;
450 port_kevent_t *pkevp;
452 if (object > (uintptr_t)INT_MAX)
453 return (EBADFD);
455 fd = object;
456 pcp = pp->port_queue.portq_pcp;
458 mutex_enter(&pcp->pc_lock);
459 if (pcp->pc_hash == NULL) {
460 /* no file descriptor cache available */
461 mutex_exit(&pcp->pc_lock);
462 return (ENOENT);
464 if ((fp = getf(fd)) == NULL) {
465 mutex_exit(&pcp->pc_lock);
466 return (EBADFD);
468 pfd = port_cache_lookup_fp(pcp, fd, fp);
469 if (pfd == NULL) {
470 releasef(fd);
471 mutex_exit(&pcp->pc_lock);
472 return (ENOENT);
474 /* only association owner is allowed to remove the association */
475 if (curproc->p_pid != PFTOD(pfd)->pd_portev->portkev_pid) {
476 releasef(fd);
477 mutex_exit(&pcp->pc_lock);
478 return (EACCES);
481 /* remove port from the file descriptor interested list */
482 delfd_port(fd, pfd);
485 * Deactivate the association. No events get posted after
486 * this.
488 pkevp = PFTOD(pfd)->pd_portev;
489 mutex_enter(&pkevp->portkev_lock);
490 if (pkevp->portkev_flags & PORT_KEV_VALID) {
491 pkevp->portkev_flags &= ~PORT_KEV_VALID;
492 active = 1;
493 } else {
494 active = 0;
496 mutex_exit(&pkevp->portkev_lock);
498 /* remove polldat & port event structure */
499 if (port_remove_fd_object(pfd, pp, pcp)) {
501 * An event was found and removed from the
502 * port done queue. This means the event has not yet
503 * been retrived. In this case we treat this as an active
504 * association.
506 ASSERT(active == 0);
507 active = 1;
509 releasef(fd);
510 mutex_exit(&pcp->pc_lock);
513 * Return ENOENT if there was no active association.
515 return ((active ? 0 : ENOENT));
519 * Associate event port polldat_t structure with sub-system pointer to
520 * a polhead_t structure.
522 static int
523 port_bind_pollhead(pollhead_t **php, polldat_t *pdp, short *revents)
525 int error;
526 file_t *fp;
528 /* polldat_t associated with another pollhead_t pointer */
529 if (pdp->pd_php != NULL)
530 pollhead_delete(pdp->pd_php, pdp);
533 * Before pollhead_insert() pollwakeup() will not detect a polldat
534 * entry in the ph_list and the event notification will disappear.
535 * This happens because polldat_t is still not associated with
536 * the pointer to the pollhead_t structure.
538 pollhead_insert(*php, pdp);
541 * From now on event notification can be detected in pollwakeup(),
542 * Use fop_poll() again to check the current status of the event.
544 pdp->pd_php = *php;
545 fp = pdp->pd_fp;
546 curthread->t_pollcache = (pollcache_t *)pdp->pd_pcache;
547 error = fop_poll(fp->f_vnode, pdp->pd_events, 0, revents, php, NULL);
548 curthread->t_pollcache = NULL;
549 return (error);
553 * Grow the hash table. Rehash all the elements on the hash table.
555 static void
556 port_cache_grow_hashtbl(port_fdcache_t *pcp)
558 portfd_t **oldtbl;
559 polldat_t *pdp;
560 portfd_t *pfd;
561 polldat_t *pdp1;
562 int oldsize;
563 int i;
565 ASSERT(MUTEX_HELD(&pcp->pc_lock));
566 oldsize = pcp->pc_hashsize;
567 oldtbl = pcp->pc_hash;
568 pcp->pc_hashsize *= PORTHASH_MULT;
569 pcp->pc_hash = kmem_zalloc(pcp->pc_hashsize * sizeof (portfd_t *),
570 KM_SLEEP);
572 * rehash existing elements
574 pcp->pc_fdcount = 0;
575 for (i = 0; i < oldsize; i++) {
576 pfd = oldtbl[i];
577 pdp = PFTOD(pfd);
578 while (pdp != NULL) {
579 pdp1 = pdp->pd_hashnext;
580 port_cache_insert_fd(pcp, pdp);
581 pdp = pdp1;
584 kmem_free(oldtbl, oldsize * sizeof (portfd_t *));
587 * This routine inserts a polldat into the portcache's hash table. It
588 * may be necessary to grow the size of the hash table.
590 static void
591 port_cache_insert_fd(port_fdcache_t *pcp, polldat_t *pdp)
593 portfd_t **bucket;
595 ASSERT(MUTEX_HELD(&pcp->pc_lock));
596 if (pcp->pc_fdcount > (pcp->pc_hashsize * PORTHASH_MULT))
597 port_cache_grow_hashtbl(pcp);
598 bucket = PORT_FD_BUCKET(pcp, pdp->pd_fd);
599 pdp->pd_hashnext = PFTOD(*bucket);
600 *bucket = PDTOF(pdp);
601 pcp->pc_fdcount++;
606 * The port_remove_portfd() function dissociates the port from the fd
607 * and vive versa.
609 static void
610 port_remove_portfd(polldat_t *pdp, port_fdcache_t *pcp)
612 port_t *pp;
613 file_t *fp;
614 int fd;
616 ASSERT(MUTEX_HELD(&pcp->pc_lock));
617 pp = pdp->pd_portev->portkev_port;
618 fp = getf(fd = pdp->pd_fd);
620 * If we did not get the fp for pd_fd but its portfd_t
621 * still exist in the cache, it means the pd_fd is being
622 * closed by some other thread which will also free the portfd_t.
624 if (fp != NULL) {
625 delfd_port(pdp->pd_fd, PDTOF(pdp));
626 (void) port_remove_fd_object(PDTOF(pdp), pp, pcp);
627 releasef(fd);
632 * This function is used by port_close_sourcefd() to destroy the cache
633 * on last close.
635 static void
636 port_pcache_destroy(port_fdcache_t *pcp)
638 ASSERT(pcp->pc_fdcount == 0);
639 kmem_free(pcp->pc_hash, sizeof (polldat_t *) * pcp->pc_hashsize);
640 mutex_destroy(&pcp->pc_lock);
641 kmem_free(pcp, sizeof (port_fdcache_t));
645 * port_close() calls this function to request the PORT_SOURCE_FD source
646 * to remove/free all resources allocated and associated with the port.
648 /* ARGSUSED */
649 static void
650 port_close_sourcefd(void *arg, int port, pid_t pid, int lastclose)
652 port_t *pp = arg;
653 port_fdcache_t *pcp;
654 portfd_t **hashtbl;
655 polldat_t *pdp;
656 polldat_t *pdpnext;
657 int index;
659 pcp = pp->port_queue.portq_pcp;
660 if (pcp == NULL)
661 /* no cache available -> nothing to do */
662 return;
664 mutex_enter(&pcp->pc_lock);
666 * Scan the cache and free all allocated portfd_t and port_kevent_t
667 * structures.
669 hashtbl = pcp->pc_hash;
670 for (index = 0; index < pcp->pc_hashsize; index++) {
671 for (pdp = PFTOD(hashtbl[index]); pdp != NULL; pdp = pdpnext) {
672 pdpnext = pdp->pd_hashnext;
673 if (pid == pdp->pd_portev->portkev_pid) {
675 * remove polldat + port_event_t from cache
676 * only when current process did the
677 * association.
679 port_remove_portfd(pdp, pcp);
683 if (lastclose) {
685 * Wait for all the portfd's to be freed.
686 * The remaining portfd_t's are the once we did not
687 * free in port_remove_portfd since some other thread
688 * is closing the fd. These threads will free the portfd_t's
689 * once we drop the pc_lock mutex.
691 while (pcp->pc_fdcount) {
692 (void) cv_wait_sig(&pcp->pc_lclosecv, &pcp->pc_lock);
694 /* event port vnode will be destroyed -> remove everything */
695 pp->port_queue.portq_pcp = NULL;
697 mutex_exit(&pcp->pc_lock);
699 * last close:
700 * pollwakeup() can not further interact with this cache
701 * (all polldat structs are removed from pollhead entries).
703 if (lastclose)
704 port_pcache_destroy(pcp);