Index: sys/kern/kern_event.c =================================================================== --- sys/kern/kern_event.c (revision 197652) +++ sys/kern/kern_event.c (working copy) @@ -392,30 +392,82 @@ filt_proc(struct knote *kn, long hint) return (1); } - /* - * process forked, and user wants to track the new process, - * so attach a new knote to it, and immediately report an - * event with the parent's pid. - */ - if ((event == NOTE_FORK) && (kn->kn_sfflags & NOTE_TRACK)) { - struct kevent kev; - int error; + return (kn->kn_fflags != 0); +} +/* + * Called when the process forked. It mostly does the same as the + * knote(), activating all knotes registered to be activated when the + * process forked. Additionally, for each knote attached to the + * parent, check whether user wants to track the new process. If so + * attach a new knote to it, and immediately report an event with the + * child's pid. + */ +void +knote_fork(struct knlist *list, int pid) +{ + struct kqueue *kq; + struct knote *kn; + struct kevent kev; + int error; + + if (list == NULL) + return; + list->kl_lock(list->kl_lockarg); + + SLIST_FOREACH(kn, &list->kl_list, kn_selnext) { + if ((kn->kn_status & KN_INFLUX) == KN_INFLUX) + continue; + kq = kn->kn_kq; + KQ_LOCK(kq); + if ((kn->kn_status & KN_INFLUX) == KN_INFLUX) { + KQ_UNLOCK(kq); + continue; + } + /* - * register knote with new process. + * The same as knote(), activate the event. */ - kev.ident = hint & NOTE_PDATAMASK; /* pid */ + if ((kn->kn_sfflags & NOTE_TRACK) == 0) { + kn->kn_status |= KN_HASKQLOCK; + if (kn->kn_fop->f_event(kn, NOTE_FORK | pid)) + KNOTE_ACTIVATE(kn, 1); + kn->kn_status &= ~KN_HASKQLOCK; + KQ_UNLOCK(kq); + continue; + } + + /* + * The NOTE_TRACK case. In addition to the activation + * of the event, we need to register new event to + * track the child. Drop the locks in preparation for + * the call to kqueue_register(). + */ + kn->kn_status |= KN_INFLUX; + KQ_UNLOCK(kq); + list->kl_unlock(list->kl_lockarg); + + /* + * Activate existing knote and register a knote with + * new process. + */ + kev.ident = pid; kev.filter = kn->kn_filter; kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1; kev.fflags = kn->kn_sfflags; - kev.data = kn->kn_id; /* parent */ - kev.udata = kn->kn_kevent.udata; /* preserve udata */ - error = kqueue_register(kn->kn_kq, &kev, NULL, 0); + kev.data = kn->kn_id; /* parent */ + kev.udata = kn->kn_kevent.udata;/* preserve udata */ + error = kqueue_register(kq, &kev, NULL, 0); + if (kn->kn_fop->f_event(kn, NOTE_FORK | pid)) + KNOTE_ACTIVATE(kn, 0); if (error) kn->kn_fflags |= NOTE_TRACKERR; + KQ_LOCK(kq); + kn->kn_status &= ~KN_INFLUX; + KQ_UNLOCK_FLUX(kq); + list->kl_lock(list->kl_lockarg); } - - return (kn->kn_fflags != 0); + list->kl_unlock(list->kl_lockarg); } static int @@ -1123,7 +1175,7 @@ kqueue_scan(struct kqueue *kq, int maxevents, stru struct kevent *kevp; struct timeval atv, rtv, ttv; struct knote *kn, *marker; - int count, timeout, nkev, error; + int count, timeout, nkev, error, influx; int haskqglobal; count = maxevents; @@ -1193,12 +1245,17 @@ start: } TAILQ_INSERT_TAIL(&kq->kq_head, marker, kn_tqe); + influx = 0; while (count) { KQ_OWNED(kq); kn = TAILQ_FIRST(&kq->kq_head); if ((kn->kn_status == KN_MARKER && kn != marker) || (kn->kn_status & KN_INFLUX) == KN_INFLUX) { + if (influx) { + influx = 0; + KQ_FLUX_WAKEUP(kq); + } kq->kq_state |= KQ_FLUXWAIT; error = msleep(kq, &kq->kq_lock, PSOCK, "kqflxwt", 0); @@ -1248,6 +1305,7 @@ start: ~(KN_QUEUED | KN_ACTIVE | KN_INFLUX); kq->kq_count--; KN_LIST_UNLOCK(kn); + influx = 1; continue; } *kevp = kn->kn_kevent; @@ -1263,6 +1321,7 @@ start: kn->kn_status &= ~(KN_INFLUX); KN_LIST_UNLOCK(kn); + influx = 1; } /* we are returning a copy to the user */ @@ -1271,6 +1330,7 @@ start: count--; if (nkev == KQ_NEVENTS) { + influx = 0; KQ_UNLOCK_FLUX(kq); error = k_ops->k_copyout(k_ops->arg, keva, nkev); nkev = 0; @@ -1434,8 +1494,11 @@ kqueue_close(struct file *fp, struct thread *td) for (i = 0; i < kq->kq_knlistsize; i++) { while ((kn = SLIST_FIRST(&kq->kq_knlist[i])) != NULL) { - KASSERT((kn->kn_status & KN_INFLUX) == 0, - ("KN_INFLUX set when not suppose to be")); + if ((kn->kn_status & KN_INFLUX) == KN_INFLUX) { + kq->kq_state |= KQ_FLUXWAIT; + msleep(kq, &kq->kq_lock, PSOCK, "kqclo1", 0); + continue; + } kn->kn_status |= KN_INFLUX; KQ_UNLOCK(kq); if (!(kn->kn_status & KN_DETACHED)) @@ -1447,8 +1510,12 @@ kqueue_close(struct file *fp, struct thread *td) if (kq->kq_knhashmask != 0) { for (i = 0; i <= kq->kq_knhashmask; i++) { while ((kn = SLIST_FIRST(&kq->kq_knhash[i])) != NULL) { - KASSERT((kn->kn_status & KN_INFLUX) == 0, - ("KN_INFLUX set when not suppose to be")); + if ((kn->kn_status & KN_INFLUX) == KN_INFLUX) { + kq->kq_state |= KQ_FLUXWAIT; + msleep(kq, &kq->kq_lock, PSOCK, + "kqclo2", 0); + continue; + } kn->kn_status |= KN_INFLUX; KQ_UNLOCK(kq); if (!(kn->kn_status & KN_DETACHED)) Index: sys/kern/kern_fork.c =================================================================== --- sys/kern/kern_fork.c (revision 197652) +++ sys/kern/kern_fork.c (working copy) @@ -699,14 +699,12 @@ again: */ PROC_LOCK(p1); _PRELE(p1); + PROC_UNLOCK(p1); /* * Tell any interested parties about the new process. */ - KNOTE_LOCKED(&p1->p_klist, NOTE_FORK | p2->p_pid); - - PROC_UNLOCK(p1); - + knote_fork(&p1->p_klist, p2->p_pid); /* * Preserve synchronization semantics of vfork. If waiting for * child to exec or exit, set P_PPWAIT on child, and sleep on our Index: sys/kern/sys_pipe.c =================================================================== --- sys/kern/sys_pipe.c (revision 197652) +++ sys/kern/sys_pipe.c (working copy) @@ -268,8 +268,8 @@ pipe_zone_ctor(void *mem, int size, void *arg, int * one at a time. When both are free'd, then the whole pair * is released. */ - rpipe->pipe_present = 1; - wpipe->pipe_present = 1; + rpipe->pipe_present = PIPE_ACTIVE; + wpipe->pipe_present = PIPE_ACTIVE; /* * Eventually, the MAC Framework may initialize the label @@ -1003,7 +1003,8 @@ pipe_write(fp, uio, active_cred, flags, td) /* * detect loss of pipe read side, issue SIGPIPE if lost. */ - if ((!wpipe->pipe_present) || (wpipe->pipe_state & PIPE_EOF)) { + if (wpipe->pipe_present != PIPE_ACTIVE || + (wpipe->pipe_state & PIPE_EOF)) { pipeunlock(wpipe); PIPE_UNLOCK(rpipe); return (EPIPE); @@ -1361,13 +1362,14 @@ pipe_poll(fp, events, active_cred, td) revents |= events & (POLLIN | POLLRDNORM); if (events & (POLLOUT | POLLWRNORM)) - if (!wpipe->pipe_present || (wpipe->pipe_state & PIPE_EOF) || + if (wpipe->pipe_present != PIPE_ACTIVE || + (wpipe->pipe_state & PIPE_EOF) || (((wpipe->pipe_state & PIPE_DIRECTW) == 0) && (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF)) revents |= events & (POLLOUT | POLLWRNORM); if ((rpipe->pipe_state & PIPE_EOF) || - (!wpipe->pipe_present) || + wpipe->pipe_present != PIPE_ACTIVE || (wpipe->pipe_state & PIPE_EOF)) revents |= POLLHUP; @@ -1506,7 +1508,7 @@ pipeclose(cpipe) * Disconnect from peer, if any. */ ppipe = cpipe->pipe_peer; - if (ppipe->pipe_present != 0) { + if (ppipe->pipe_present == PIPE_ACTIVE) { pipeselwakeup(ppipe); ppipe->pipe_state |= PIPE_EOF; @@ -1523,16 +1525,23 @@ pipeclose(cpipe) PIPE_UNLOCK(cpipe); pipe_free_kmem(cpipe); PIPE_LOCK(cpipe); - cpipe->pipe_present = 0; + cpipe->pipe_present = PIPE_CLOSING; pipeunlock(cpipe); + + /* + * knlist_clear() may sleep dropping the PIPE_MTX. Set the + * PIPE_FINALIZED, that allows other end to free the + * pipe_pair, only after the knotes are completely dismantled. + */ knlist_clear(&cpipe->pipe_sel.si_note, 1); + cpipe->pipe_present = PIPE_FINALIZED; knlist_destroy(&cpipe->pipe_sel.si_note); /* * If both endpoints are now closed, release the memory for the * pipe pair. If not, unlock. */ - if (ppipe->pipe_present == 0) { + if (ppipe->pipe_present == PIPE_FINALIZED) { PIPE_UNLOCK(cpipe); #ifdef MAC mac_destroy_pipe(pp); @@ -1556,7 +1565,7 @@ pipe_kqfilter(struct file *fp, struct knote *kn) break; case EVFILT_WRITE: kn->kn_fop = &pipe_wfiltops; - if (!cpipe->pipe_peer->pipe_present) { + if (cpipe->pipe_peer->pipe_present != PIPE_ACTIVE) { /* other end of pipe has been closed */ PIPE_UNLOCK(cpipe); return (EPIPE); @@ -1579,13 +1588,8 @@ filt_pipedetach(struct knote *kn) struct pipe *cpipe = (struct pipe *)kn->kn_fp->f_data; PIPE_LOCK(cpipe); - if (kn->kn_filter == EVFILT_WRITE) { - if (!cpipe->pipe_peer->pipe_present) { - PIPE_UNLOCK(cpipe); - return; - } + if (kn->kn_filter == EVFILT_WRITE) cpipe = cpipe->pipe_peer; - } knlist_remove(&cpipe->pipe_sel.si_note, kn, 1); PIPE_UNLOCK(cpipe); } @@ -1604,7 +1608,8 @@ filt_piperead(struct knote *kn, long hint) kn->kn_data = rpipe->pipe_map.cnt; if ((rpipe->pipe_state & PIPE_EOF) || - (!wpipe->pipe_present) || (wpipe->pipe_state & PIPE_EOF)) { + wpipe->pipe_present != PIPE_ACTIVE || + (wpipe->pipe_state & PIPE_EOF)) { kn->kn_flags |= EV_EOF; PIPE_UNLOCK(rpipe); return (1); @@ -1622,7 +1627,8 @@ filt_pipewrite(struct knote *kn, long hint) struct pipe *wpipe = rpipe->pipe_peer; PIPE_LOCK(rpipe); - if ((!wpipe->pipe_present) || (wpipe->pipe_state & PIPE_EOF)) { + if (wpipe->pipe_present != PIPE_ACTIVE || + (wpipe->pipe_state & PIPE_EOF)) { kn->kn_data = 0; kn->kn_flags |= EV_EOF; PIPE_UNLOCK(rpipe); Index: sys/sys/event.h =================================================================== --- sys/sys/event.h (revision 197652) +++ sys/sys/event.h (working copy) @@ -208,6 +208,7 @@ struct proc; struct knlist; extern void knote(struct knlist *list, long hint, int islocked); +extern void knote_fork(struct knlist *list, int pid); extern void knlist_add(struct knlist *knl, struct knote *kn, int islocked); extern void knlist_remove(struct knlist *knl, struct knote *kn, int islocked); extern void knlist_remove_inevent(struct knlist *knl, struct knote *kn); Index: sys/sys/pipe.h =================================================================== --- sys/sys/pipe.h (revision 197652) +++ sys/sys/pipe.h (working copy) @@ -115,6 +115,13 @@ struct pipe { }; /* + * Values for the pipe_present. + */ +#define PIPE_ACTIVE 1 +#define PIPE_CLOSING 2 +#define PIPE_FINALIZED 3 + +/* * Container structure to hold the two pipe endpoints, mutex, and label * pointer. */