--- sys/compat/linprocfs/linprocfs.c.orig +++ sys/compat/linprocfs/linprocfs.c @@ -1317,19 +1317,13 @@ struct vattr vat; bool private; - PROC_LOCK(p); - error = p_candebug(td, p); - PROC_UNLOCK(p); - if (error) - return (error); - if (uio->uio_rw != UIO_READ) return (EOPNOTSUPP); - error = 0; - vm = vmspace_acquire_ref(p); - if (vm == NULL) - return (ESRCH); + error = proc_vmspace_ref(td, p, PRVM_BLOCK_EXEC | PRVM_CHECK_DEBUG, + &vm); + if (error != 0) + return (error); if (SV_CURPROC_FLAG(SV_LP64)) l_map_str = l64_map_str; @@ -1427,7 +1421,7 @@ } } vm_map_unlock_read(map); - vmspace_free(vm); + proc_vmspace_unref(td, p, PRVM_CHECK_DEBUG | PRVM_BLOCK_EXEC, vm); return (error); } --- sys/compat/linux/linux_misc.c.orig +++ sys/compat/linux/linux_misc.c @@ -2007,6 +2007,7 @@ u_int which; int flags; int error; + bool exec_blocked; if (args->new == NULL && args->old != NULL) { if (linux_get_dummy_limit(args->resource, &rlim)) { @@ -2034,6 +2035,7 @@ return (error); } + exec_blocked = false; flags = PGET_HOLD | PGET_NOTWEXIT; if (args->new != NULL) flags |= PGET_CANDEBUG; @@ -2046,6 +2048,14 @@ error = pget(args->pid, flags, &p); if (error != 0) return (error); + exec_blocked = true; + PROC_LOCK(p); + execve_block_wait(td, p); + error = args->new != NULL ? p_candebug(td, p) : + p_cansee(td, p); + PROC_UNLOCK(p); + if (error != 0) + goto out; } if (args->old != NULL) { PROC_LOCK(p); @@ -2068,6 +2078,11 @@ error = kern_proc_setrlimit(td, p, which, &nrlim); out: + if (exec_blocked) { + PROC_LOCK(p); + execve_unblock(td, p); + PROC_UNLOCK(p); + } PRELE(p); return (error); } --- sys/fs/cuse/cuse.c.orig +++ sys/fs/cuse/cuse.c @@ -914,7 +914,7 @@ }; PHOLD(proc_s); - error = proc_rwmem(proc_s, &uio); + error = proc_rwmem(proc_s, &uio, 0); PRELE(proc_s); } else if (proc_cur == proc_s) { @@ -933,7 +933,7 @@ }; PHOLD(proc_d); - error = proc_rwmem(proc_d, &uio); + error = proc_rwmem(proc_d, &uio, 0); PRELE(proc_d); } else { error = EINVAL; --- sys/fs/procfs/procfs_map.c.orig +++ sys/fs/procfs/procfs_map.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include #include @@ -95,15 +96,14 @@ bool wrap32; #endif - PROC_LOCK(p); - error = p_candebug(td, p); - PROC_UNLOCK(p); - if (error) - return (error); - if (uio->uio_rw != UIO_READ) return (EOPNOTSUPP); + error = proc_vmspace_ref(td, p, PRVM_BLOCK_EXEC | PRVM_CHECK_DEBUG, + &vm); + if (error != 0) + return (error); + #ifdef COMPAT_FREEBSD32 wrap32 = false; if (SV_CURPROC_FLAG(SV_ILP32)) { @@ -113,9 +113,6 @@ } #endif - vm = vmspace_acquire_ref(p); - if (vm == NULL) - return (ESRCH); map = &vm->vm_map; vm_map_lock_read(map); VM_MAP_ENTRY_FOREACH(entry, map) { @@ -240,6 +237,6 @@ } } vm_map_unlock_read(map); - vmspace_free(vm); + proc_vmspace_unref(td, p, PRVM_BLOCK_EXEC | PRVM_CHECK_DEBUG, vm); return (error); } --- sys/fs/procfs/procfs_mem.c.orig +++ sys/fs/procfs/procfs_mem.c @@ -61,11 +61,7 @@ if (uio->uio_resid == 0) return (0); - PROC_LOCK(p); - error = p_candebug(td, p); - PROC_UNLOCK(p); - if (error == 0) - error = proc_rwmem(p, uio); + error = proc_rwmem(p, uio, PRVM_CHECK_DEBUG | PRVM_BLOCK_EXEC); return (error); } --- sys/fs/pseudofs/pseudofs_vnops.c.orig +++ sys/fs/pseudofs/pseudofs_vnops.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -132,6 +133,7 @@ pfs_lookup_proc(pid_t pid, struct proc **p) { struct proc *proc; + struct thread *td; proc = pfind(pid); if (proc == NULL) @@ -141,8 +143,10 @@ return (0); } _PHOLD(proc); - PROC_UNLOCK(proc); + td = curthread; + execve_block_wait(td, proc); *p = proc; + PROC_UNLOCK(proc); return (1); } @@ -672,6 +676,7 @@ struct pfs_node *pn = pvd->pvd_pn; struct uio *uio = va->a_uio; struct proc *proc; + struct thread *td; struct sbuf *sb = NULL; int error, locked; off_t buflen, buflim; @@ -690,21 +695,30 @@ if (pn->pn_fill == NULL) PFS_RETURN (EIO); + td = curthread; + /* * This is necessary because either process' privileges may * have changed since the open() call. */ - if (!pfs_visible(curthread, pn, pvd->pvd_pid, &proc)) + if (!pfs_visible(td, pn, pvd->pvd_pid, &proc)) PFS_RETURN (EIO); - if (proc != NULL) { - _PHOLD(proc); - PROC_UNLOCK(proc); - } vhold(vn); locked = VOP_ISLOCKED(vn); VOP_UNLOCK(vn); + if (proc != NULL) { + _PHOLD(proc); + execve_block_wait(td, proc); + if (!pfs_visible_proc(td, pn, proc)) { + PROC_UNLOCK(proc); + error = EIO; + goto ret; + } + PROC_UNLOCK(proc); + } + if (pn->pn_flags & PFS_RAWRD) { PFS_TRACE(("%zd resid", uio->uio_resid)); error = pn_fill(curthread, proc, pn, NULL, uio); @@ -774,8 +788,12 @@ ret: vn_lock(vn, locked | LK_RETRY); vdrop(vn); - if (proc != NULL) - PRELE(proc); + if (proc != NULL) { + PROC_LOCK(proc); + execve_unblock(td, proc); + _PRELE(proc); + PROC_UNLOCK(proc); + } PFS_RETURN (error); } @@ -846,6 +864,7 @@ struct pfs_node *pd = pvd->pvd_pn; pid_t pid = pvd->pvd_pid; struct proc *p, *proc; + struct thread *td; struct pfs_node *pn; struct uio *uio; struct pfsentry *pfsent, *pfsent2; @@ -884,11 +903,13 @@ KASSERT(pid == NO_PID || proc != NULL, ("%s(): no process for pid %lu", __func__, (unsigned long)pid)); + td = curthread; if (pid != NO_PID) { PROC_LOCK(proc); /* check if the directory is visible to the caller */ if (!pfs_visible_proc(curthread, pd, proc)) { + execve_unblock(td, proc); _PRELE(proc); PROC_UNLOCK(proc); pfs_unlock(pd); @@ -956,6 +977,7 @@ resid -= PFS_DELEN; } if (proc != NULL) { + execve_unblock(td, proc); _PRELE(proc); PROC_UNLOCK(proc); } @@ -1080,6 +1102,7 @@ struct pfs_node *pn = pvd->pvd_pn; struct uio *uio = va->a_uio; struct proc *proc; + struct thread *td; struct sbuf sb; int error; @@ -1099,36 +1122,44 @@ if (uio->uio_resid > PFS_MAXBUFSIZ) PFS_RETURN (EIO); + td = curthread; + /* * This is necessary because either process' privileges may * have changed since the open() call. */ - if (!pfs_visible(curthread, pn, pvd->pvd_pid, &proc)) + if (!pfs_visible(td, pn, pvd->pvd_pid, &proc)) PFS_RETURN (EIO); if (proc != NULL) { _PHOLD(proc); + execve_block_wait(td, proc); + if (!pfs_visible_proc(td, pn, proc)) { + PROC_UNLOCK(proc); + error = EIO; + goto out; + } PROC_UNLOCK(proc); } if (pn->pn_flags & PFS_RAWWR) { error = pn_fill(curthread, proc, pn, NULL, uio); - if (proc != NULL) - PRELE(proc); - PFS_RETURN (error); + goto out; } sbuf_uionew(&sb, uio, &error); - if (error) { - if (proc != NULL) - PRELE(proc); - PFS_RETURN (error); - } + if (error != 0) + goto out; error = pn_fill(curthread, proc, pn, &sb, uio); sbuf_delete(&sb); - if (proc != NULL) - PRELE(proc); +out: + if (proc != NULL) { + PROC_LOCK(proc); + execve_unblock(td, proc); + _PRELE(proc); + PROC_UNLOCK(proc); + } PFS_RETURN (error); } --- sys/kern/kern_event.c.orig +++ sys/kern/kern_event.c @@ -50,6 +50,7 @@ #include #include #include +#include #include #include #include @@ -3028,10 +3029,6 @@ if ((u_int)arg2 > 2 || (u_int)arg2 == 0) return (EINVAL); - error = pget((pid_t)name[0], PGET_HOLD | PGET_CANDEBUG, &p); - if (error != 0) - return (error); - td = curthread; #ifdef FREEBSD_COMPAT32 compat32 = SV_CURPROC_FLAG(SV_ILP32); @@ -3039,6 +3036,17 @@ compat32 = false; #endif + error = pget((pid_t)name[0], PGET_NOTWEXIT, &p); + if (error != 0) + return (error); + + _PHOLD(p); + execve_block_wait(td, p); + error = p_candebug(td, p); + if (error != 0) + goto out1; + PROC_UNLOCK(p); + s = sbuf_new_for_sysctl(&sm, NULL, 0, req); if (s == NULL) { error = ENOMEM; @@ -3059,7 +3067,11 @@ sbuf_delete(s); out: - PRELE(p); + PROC_LOCK(p); +out1: + execve_unblock(td, p); + _PRELE(p); + PROC_UNLOCK(p); return (error); } --- sys/kern/kern_exec.c.orig +++ sys/kern/kern_exec.c @@ -26,7 +26,6 @@ * SUCH DAMAGE. */ -#include #include "opt_capsicum.h" #include "opt_hwpmc_hooks.h" #include "opt_ktrace.h" @@ -45,6 +44,7 @@ #include #include #include +#include #include #include #include @@ -373,6 +373,77 @@ } } +/* + * Returns true if the execblock was obtained, in this case the + * process lock is kept. Returns false if the execblock was not + * obtained, but the function slept and the lock was dropped. + */ +bool +execve_block(struct thread *td, struct proc *p) +{ + PROC_LOCK_ASSERT(p, MA_OWNED); + MPASS(td == curthread); + MPASS(p != td->td_proc || (p->p_flag & P_INEXEC) == 0); + + if (p != td->td_proc && (p->p_flag & P_INEXEC) != 0) { + p->p_flag2 |= P2_INEXEC_WAIT; + msleep(&p->p_execblock, &p->p_mtx, PDROP, "inexec", 0); + return (false); + } + MPASS(p->p_execblock < UINT_MAX); + p->p_execblock++; + return (true); +} + +/* + * Might drop the process lock internally, callers must re-check the + * invariants afterward. + */ +void +execve_block_wait(struct thread *td, struct proc *p) +{ + bool first; + + PROC_ASSERT_HELD(p); + PROC_LOCK_ASSERT(p, MA_OWNED); + + for (first = true;; first = false) { + if (!first) + PROC_LOCK(p); + if (execve_block(td, p)) + return; + } +} + +void +execve_unblock(struct thread *td, struct proc *p) +{ + PROC_LOCK_ASSERT(p, MA_OWNED); + MPASS(td == curthread); + + MPASS(p->p_execblock > 0); + p->p_execblock--; + if (p->p_execblock == 0 && (p->p_flag2 & P2_INEXEC_WAIT) != 0) { + p->p_flag2 &= ~P2_INEXEC_WAIT; + wakeup(&p->p_execblock); + } +} + +void +execve_block_pass(struct thread *td) +{ + struct proc *p; + + MPASS(td == curthread); + p = td->td_proc; + PROC_LOCK_ASSERT(p, MA_OWNED); + + while (p->p_execblock != 0) { + p->p_flag2 |= P2_INEXEC_WAIT; + msleep(&p->p_execblock, &p->p_mtx, 0, "exeblk", 0); + } +} + /* * In-kernel implementation of execve(). All arguments are assumed to be * userspace pointers from the passed thread. @@ -428,6 +499,7 @@ PROC_LOCK(p); KASSERT((p->p_flag & P_INEXEC) == 0, ("%s(): process already has P_INEXEC flag", __func__)); + execve_block_pass(td); p->p_flag |= P_INEXEC; PROC_UNLOCK(p); @@ -896,7 +968,11 @@ * as we're now a bona fide freshly-execed process. */ KNOTE_LOCKED(p->p_klist, NOTE_EXEC); + MPASS(p->p_execblock == 0); + if ((p->p_flag2 & P2_INEXEC_WAIT) != 0) + wakeup(&p->p_execblock); p->p_flag &= ~P_INEXEC; + p->p_flag2 &= ~P2_INEXEC_WAIT; /* clear "fork but no exec" flag, as we _are_ execing */ p->p_acflag &= ~AFORK; @@ -978,7 +1054,10 @@ exec_fail: /* we're done here, clear P_INEXEC */ PROC_LOCK(p); + if ((p->p_flag2 & P2_INEXEC_WAIT) != 0) + wakeup(&p->p_execblock); p->p_flag &= ~P_INEXEC; + p->p_flag2 &= ~P2_INEXEC_WAIT; PROC_UNLOCK(p); SDT_PROBE1(proc, , , exec__failure, error); --- sys/kern/kern_exit.c.orig +++ sys/kern/kern_exit.c @@ -325,6 +325,7 @@ while (p->p_lock > 0) msleep(&p->p_lock, &p->p_mtx, PWAIT, "exithold", 0); + MPASS(p->p_execblock == 0); PROC_UNLOCK(p); /* Drain the limit callout while we don't have the proc locked */ callout_drain(&p->p_limco); --- sys/kern/kern_fork.c.orig +++ sys/kern/kern_fork.c @@ -384,6 +384,7 @@ bzero(&p2->p_startzero, __rangeof(struct proc, p_startzero, p_endzero)); + p2->p_execblock = 0; /* Tell the prison that we exist. */ prison_proc_hold(p2->p_ucred->cr_prison); --- sys/kern/kern_proc.c.orig +++ sys/kern/kern_proc.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include @@ -1833,8 +1834,8 @@ } static int -proc_read_string(struct thread *td, struct proc *p, const char *sptr, char *buf, - size_t len) +proc_read_string(struct thread *td, struct vmspace *vm, const char *sptr, + char *buf, size_t len) { ssize_t n; @@ -1843,7 +1844,7 @@ * and is aligned at the end of the page, and the following page is not * mapped. */ - n = proc_readmem(td, p, (vm_offset_t)sptr, buf, len); + n = vmspace_iop(td, vm, (vm_offset_t)sptr, buf, len, UIO_READ); if (n <= 0) return (ENOMEM); return (0); @@ -1859,8 +1860,8 @@ #ifdef COMPAT_FREEBSD32 static int -get_proc_vector32(struct thread *td, struct proc *p, char ***proc_vectorp, - size_t *vsizep, enum proc_vector_type type) +get_proc_vector32(struct thread *td, struct proc *p, struct vmspace *vm, + char ***proc_vectorp, size_t *vsizep, enum proc_vector_type type) { struct freebsd32_ps_strings pss; Elf32_Auxinfo aux; @@ -1871,8 +1872,8 @@ int i, error; error = 0; - if (proc_readmem(td, p, PROC_PS_STRINGS(p), &pss, sizeof(pss)) != - sizeof(pss)) + if (vmspace_iop(td, vm, PROC_PS_STRINGS(p), &pss, sizeof(pss), + UIO_READ) != sizeof(pss)) return (ENOMEM); switch (type) { case PROC_ARG: @@ -1895,8 +1896,8 @@ if (vptr % 4 != 0) return (ENOEXEC); for (ptr = vptr, i = 0; i < PROC_AUXV_MAX; i++) { - if (proc_readmem(td, p, ptr, &aux, sizeof(aux)) != - sizeof(aux)) + if (vmspace_iop(td, vm, ptr, &aux, sizeof(aux), + UIO_READ) != sizeof(aux)) return (ENOMEM); if (aux.a_type == AT_NULL) break; @@ -1912,7 +1913,7 @@ return (EINVAL); } proc_vector32 = malloc(size, M_TEMP, M_WAITOK); - if (proc_readmem(td, p, vptr, proc_vector32, size) != size) { + if (vmspace_iop(td, vm, vptr, proc_vector32, size, UIO_READ) != size) { error = ENOMEM; goto done; } @@ -1933,8 +1934,8 @@ #endif static int -get_proc_vector(struct thread *td, struct proc *p, char ***proc_vectorp, - size_t *vsizep, enum proc_vector_type type) +get_proc_vector(struct thread *td, struct proc *p, struct vmspace *vm, + char ***proc_vectorp, size_t *vsizep, enum proc_vector_type type) { struct ps_strings pss; Elf_Auxinfo aux; @@ -1944,11 +1945,13 @@ int i; #ifdef COMPAT_FREEBSD32 - if (SV_PROC_FLAG(p, SV_ILP32) != 0) - return (get_proc_vector32(td, p, proc_vectorp, vsizep, type)); + if (SV_PROC_FLAG(p, SV_ILP32) != 0) { + return (get_proc_vector32(td, p, vm, proc_vectorp, + vsizep, type)); + } #endif - if (proc_readmem(td, p, PROC_PS_STRINGS(p), &pss, sizeof(pss)) != - sizeof(pss)) + if (vmspace_iop(td, vm, PROC_PS_STRINGS(p), &pss, sizeof(pss), + UIO_READ) != sizeof(pss)) return (ENOMEM); switch (type) { case PROC_ARG: @@ -1986,8 +1989,8 @@ * to the allocated proc_vector. */ for (ptr = vptr, i = 0; i < PROC_AUXV_MAX; i++) { - if (proc_readmem(td, p, ptr, &aux, sizeof(aux)) != - sizeof(aux)) + if (vmspace_iop(td, vm, ptr, &aux, sizeof(aux), + UIO_READ) != sizeof(aux)) return (ENOMEM); if (aux.a_type == AT_NULL) break; @@ -2009,7 +2012,7 @@ return (EINVAL); /* In case we are built without INVARIANTS. */ } proc_vector = malloc(size, M_TEMP, M_WAITOK); - if (proc_readmem(td, p, vptr, proc_vector, size) != size) { + if (vmspace_iop(td, vm, vptr, proc_vector, size, UIO_READ) != size) { free(proc_vector, M_TEMP); return (ENOMEM); } @@ -2025,6 +2028,7 @@ get_ps_strings(struct thread *td, struct proc *p, struct sbuf *sb, enum proc_vector_type type) { + struct vmspace *vm; size_t done, len, nchr, vsize; int error, i; char **proc_vector, *sptr; @@ -2037,9 +2041,14 @@ */ nchr = 2 * (PATH_MAX + ARG_MAX); - error = get_proc_vector(td, p, &proc_vector, &vsize, type); + error = proc_vmspace_ref(td, p, PRVM_BLOCK_EXEC | + PRVM_CHECK_VISIBILITY, &vm); if (error != 0) return (error); + + error = get_proc_vector(td, p, vm, &proc_vector, &vsize, type); + if (error != 0) + goto out; for (done = 0, i = 0; i < (int)vsize && done < nchr; i++) { /* * The program may have scribbled into its argv array, e.g. to @@ -2049,7 +2058,7 @@ if (proc_vector[i] == NULL) break; for (sptr = proc_vector[i]; ; sptr += GET_PS_STRINGS_CHUNK_SZ) { - error = proc_read_string(td, p, sptr, pss_string, + error = proc_read_string(td, vm, sptr, pss_string, sizeof(pss_string)); if (error != 0) goto done; @@ -2066,6 +2075,8 @@ } done: free(proc_vector, M_TEMP); +out: + proc_vmspace_unref(td, p, PRVM_BLOCK_EXEC | PRVM_CHECK_VISIBILITY, vm); return (error); } @@ -2086,11 +2097,17 @@ int proc_getauxv(struct thread *td, struct proc *p, struct sbuf *sb) { + struct vmspace *vm; size_t vsize, size; char **auxv; int error; - error = get_proc_vector(td, p, &auxv, &vsize, PROC_AUX); + error = proc_vmspace_ref(td, p, PRVM_BLOCK_EXEC | PRVM_CHECK_DEBUG, + &vm); + if (error != 0) + return (error); + error = get_proc_vector(td, p, vm, &auxv, &vsize, PROC_AUX); + proc_vmspace_unref(td, p, PRVM_BLOCK_EXEC | PRVM_CHECK_DEBUG, vm); if (error == 0) { #ifdef COMPAT_FREEBSD32 if (SV_PROC_FLAG(p, SV_ILP32) != 0) @@ -2403,6 +2420,7 @@ int error, *name; struct vnode *vp; struct proc *p; + struct thread *td; vm_map_t map; struct vmspace *vm; @@ -2411,11 +2429,12 @@ return (EINVAL); name = (int *)arg1; + td = curthread; error = pget((pid_t)name[0], PGET_WANTREAD, &p); if (error != 0) return (error); - vm = vmspace_acquire_ref(p); - if (vm == NULL) { + error = proc_vmspace_ref(td, p, PRVM_CHECK_DEBUG, &vm); + if (error != 0) { PRELE(p); return (ESRCH); } @@ -2527,7 +2546,7 @@ } } vm_map_unlock_read(map); - vmspace_free(vm); + proc_vmspace_unref(td, p, PRVM_CHECK_DEBUG, vm); PRELE(p); free(kve, M_TEMP); return (error); @@ -2618,6 +2637,7 @@ struct vmspace *vm; struct cdev *cdev; struct cdevsw *csw; + struct thread *td; vm_offset_t addr; unsigned int last_timestamp; int error, ref; @@ -2629,10 +2649,11 @@ _PHOLD(p); PROC_UNLOCK(p); - vm = vmspace_acquire_ref(p); - if (vm == NULL) { + td = curthread; + error = proc_vmspace_ref(td, p, PRVM_CHECK_DEBUG, &vm); + if (error != 0) { PRELE(p); - return (ESRCH); + return (error); } kve = malloc(sizeof(*kve), M_TEMP, M_WAITOK | M_ZERO); @@ -2747,7 +2768,7 @@ if (vp != NULL) { vn_fullpath(vp, &fullpath, &freepath); kve->kve_vn_type = vntype_to_kinfo(vp->v_type); - cred = curthread->td_ucred; + cred = td->td_ucred; vn_lock(vp, LK_SHARED | LK_RETRY); if (VOP_GETATTR(vp, &va, cred) == 0) { kve->kve_vn_fileid = va.va_fileid; @@ -2803,7 +2824,7 @@ } } vm_map_unlock_read(map); - vmspace_free(vm); + proc_vmspace_unref(td, p, PRVM_CHECK_DEBUG, vm); PRELE(p); free(kve, M_TEMP); return (error); @@ -2842,7 +2863,7 @@ struct kinfo_kstack *kkstp; int error, i, *name, numthreads; lwpid_t *lwpidarray; - struct thread *td; + struct thread *td, *ctd; struct stack *st; struct sbuf sb; struct proc *p; @@ -2853,7 +2874,8 @@ return (EINVAL); name = (int *)arg1; - error = pget((pid_t)name[0], PGET_NOTINEXEC | PGET_WANTREAD, &p); + ctd = curthread; + error = pget((pid_t)name[0], PGET_WANTREAD, &p); if (error != 0) return (error); @@ -2862,6 +2884,14 @@ lwpidarray = NULL; PROC_LOCK(p); + execve_block_wait(ctd, p); + error = p_candebug(ctd, p); + if (error != 0) { + execve_unblock(ctd, p); + _PRELE(p); + PROC_UNLOCK(p); + return (error); + } do { if (lwpidarray != NULL) { free(lwpidarray, M_TEMP); @@ -2874,15 +2904,6 @@ PROC_LOCK(p); } while (numthreads < p->p_numthreads); - /* - * XXXRW: During the below loop, execve(2) and countless other sorts - * of changes could have taken place. Should we check to see if the - * vmspace has been replaced, or the like, in order to prevent - * giving a snapshot that spans, say, execve(2), with some threads - * before and some after? Among other things, the credentials could - * have changed, in which case the right to extract debug info might - * no longer be assured. - */ i = 0; FOREACH_THREAD_IN_PROC(p, td) { KASSERT(i < numthreads, @@ -2917,7 +2938,10 @@ if (error) break; } - PRELE(p); + PROC_LOCK(p); + execve_unblock(ctd, p); + _PRELE(p); + PROC_UNLOCK(p); if (lwpidarray != NULL) free(lwpidarray, M_TEMP); stack_destroy(st); @@ -2970,8 +2994,9 @@ u_int namelen = arg2; struct rlimit rlim; struct proc *p; + struct thread *td; u_int which; - int flags, error; + int error; if (namelen != 2) return (EINVAL); @@ -2983,23 +3008,24 @@ if (req->newptr != NULL && req->newlen != sizeof(rlim)) return (EINVAL); - flags = PGET_HOLD | PGET_NOTWEXIT; - if (req->newptr != NULL) - flags |= PGET_CANDEBUG; - else - flags |= PGET_CANSEE; - error = pget((pid_t)name[0], flags, &p); + td = curthread; + error = pget((pid_t)name[0], PGET_NOTWEXIT, &p); if (error != 0) return (error); + _PHOLD(p); + execve_block_wait(td, p); + error = req->newptr != NULL ? p_candebug(td, p) : p_cansee(td, p); + if (error != 0) + goto errout1; /* * Retrieve limit. */ if (req->oldptr != NULL) { - PROC_LOCK(p); lim_rlimit_proc(p, which, &rlim); - PROC_UNLOCK(p); } + PROC_UNLOCK(p); + error = SYSCTL_OUT(req, &rlim, sizeof(rlim)); if (error != 0) goto errout; @@ -3014,7 +3040,11 @@ } errout: - PRELE(p); + PROC_LOCK(p); +errout1: + _PRELE(p); + execve_unblock(td, p); + PROC_UNLOCK(p); return (error); } @@ -3103,39 +3133,38 @@ int *name = (int *)arg1; u_int namelen = arg2; struct proc *p; - int flags, error, osrel; + int flags, error, old_osrel, osrel; if (namelen != 1) return (EINVAL); - if (req->newptr != NULL && req->newlen != sizeof(osrel)) - return (EINVAL); - - flags = PGET_HOLD | PGET_NOTWEXIT; - if (req->newptr != NULL) + flags = PGET_NOTWEXIT; + if (req->newptr != NULL) { + if (req->newlen != sizeof(osrel)) + return (EINVAL); + error = SYSCTL_IN(req, &osrel, sizeof(osrel)); + if (error != 0) + return (error); + if (osrel < 0) + return (EINVAL); flags |= PGET_CANDEBUG; - else + } else { flags |= PGET_CANSEE; + } error = pget((pid_t)name[0], flags, &p); if (error != 0) return (error); - - error = SYSCTL_OUT(req, &p->p_osrel, sizeof(p->p_osrel)); - if (error != 0) - goto errout; - - if (req->newptr != NULL) { - error = SYSCTL_IN(req, &osrel, sizeof(osrel)); - if (error != 0) - goto errout; - if (osrel < 0) { - error = EINVAL; - goto errout; - } - p->p_osrel = osrel; + if ((p->p_flag & P_INEXEC) != 0) { + error = EBUSY; + } else { + old_osrel = p->p_osrel; + if (req->newptr != NULL) + p->p_osrel = osrel; } -errout: - PRELE(p); + PROC_UNLOCK(p); + + if (error == 0) + error = SYSCTL_OUT(req, &old_osrel, sizeof(old_osrel)); return (error); } @@ -3272,6 +3301,7 @@ { struct kinfo_vm_layout kvm; struct proc *p; + struct thread *td; struct vmspace *vmspace; int error, *name; @@ -3279,6 +3309,7 @@ if ((u_int)arg2 != 1) return (EINVAL); + td = curthread; error = pget((pid_t)name[0], PGET_CANDEBUG, &p); if (error != 0) return (error); @@ -3290,8 +3321,13 @@ } } #endif - vmspace = vmspace_acquire_ref(p); + _PHOLD(p); PROC_UNLOCK(p); + error = proc_vmspace_ref(td, p, PRVM_CHECK_DEBUG, &vmspace); + if (error != 0) { + PRELE(p); + return (error); + } memset(&kvm, 0, sizeof(kvm)); kvm.kvm_min_user_addr = vm_map_min(&vmspace->vm_map); @@ -3343,7 +3379,8 @@ #ifdef COMPAT_FREEBSD32 out: #endif - vmspace_free(vmspace); + proc_vmspace_unref(td, p, PRVM_CHECK_DEBUG, vmspace); + PRELE(p); return (error); } --- sys/kern/kern_procctl.c.orig +++ sys/kern/kern_procctl.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include #include @@ -872,37 +873,41 @@ { struct vmspace *vm; vm_map_t map; - int state; + int error, state; PROC_LOCK_ASSERT(p, MA_OWNED); if ((p->p_flag & P_WEXIT) != 0) return (ESRCH); state = *(int *)data; + error = 0; switch (state) { case PROC_WX_MAPPINGS_PERMIT: - p->p_flag2 |= P2_WXORX_DISABLE; - _PHOLD(p); PROC_UNLOCK(p); - vm = vmspace_acquire_ref(p); - if (vm != NULL) { + error = proc_vmspace_ref(td, p, PRVM_BLOCK_EXEC | + PRVM_CHECK_DEBUG, &vm); + if (error == 0) { map = &vm->vm_map; vm_map_lock(map); map->flags &= ~MAP_WXORX; vm_map_unlock(map); - vmspace_free(vm); + PROC_LOCK(p); + p->p_flag2 |= P2_WXORX_DISABLE; + PROC_UNLOCK(p); + proc_vmspace_unref(td, p, PRVM_BLOCK_EXEC | + PRVM_CHECK_DEBUG, vm); } PROC_LOCK(p); - _PRELE(p); break; case PROC_WX_MAPPINGS_DISALLOW_EXEC: p->p_flag2 |= P2_WXORX_ENABLE_EXEC; break; default: - return (EINVAL); + error = EINVAL; + break; } - return (0); + return (error); } static int --- sys/kern/kern_prot.c.orig +++ sys/kern/kern_prot.c @@ -51,6 +51,7 @@ #include #include #include +#include #include #include #include @@ -1002,6 +1003,8 @@ newcred = crget(); euip = uifind(euid); PROC_LOCK(p); + execve_block_pass(td); + /* * Copy credentials so other references do not see our changes. */ @@ -1056,6 +1059,7 @@ AUDIT_ARG_GID(gid); newcred = crget(); PROC_LOCK(p); + execve_block_pass(td); oldcred = crcopysafe(p, newcred); #ifdef MAC @@ -1154,6 +1158,7 @@ AUDIT_ARG_EGID(egid); newcred = crget(); PROC_LOCK(p); + execve_block_pass(td); oldcred = crcopysafe(p, newcred); #ifdef MAC @@ -1249,6 +1254,7 @@ if (ngrp != 0) crextend(newcred, ngrp); PROC_LOCK(p); + execve_block_pass(td); oldcred = crcopysafe(p, newcred); #ifdef MAC @@ -1311,6 +1317,7 @@ euip = uifind(euid); ruip = uifind(ruid); PROC_LOCK(p); + execve_block_pass(td); oldcred = crcopysafe(p, newcred); #ifdef MAC @@ -1383,6 +1390,7 @@ AUDIT_ARG_RGID(rgid); newcred = crget(); PROC_LOCK(p); + execve_block_pass(td); oldcred = crcopysafe(p, newcred); #ifdef MAC @@ -1453,6 +1461,7 @@ euip = uifind(euid); ruip = uifind(ruid); PROC_LOCK(p); + execve_block_pass(td); oldcred = crcopysafe(p, newcred); #ifdef MAC @@ -1537,6 +1546,7 @@ AUDIT_ARG_SGID(sgid); newcred = crget(); PROC_LOCK(p); + execve_block_pass(td); oldcred = crcopysafe(p, newcred); #ifdef MAC @@ -2229,11 +2239,11 @@ } /* - * Can't trace a process that's currently exec'ing. - * - * XXX: Note, this is not a security policy decision, it's a - * basic correctness/functionality decision. Therefore, this check - * should be moved to the caller's of p_candebug(). + * Can't trace a process that's currently exec'ing. Otherwise + * the process vmspace might change, and the target might be + * loading a setugid image. The execve_block(9) and + * proc_vmspace_ref(9) allow to get the stable credentials and + * vmspace reference. */ if ((p->p_flag & P_INEXEC) != 0) return (EBUSY); --- sys/kern/kern_resource.c.orig +++ sys/kern/kern_resource.c @@ -48,6 +48,7 @@ #include #include #include +#include #include #include #include @@ -810,11 +811,11 @@ } static int -getrlimitusage_one(struct proc *p, u_int which, int flags, rlim_t *res) +getrlimitusage_one(struct proc *p, struct vmspace *vm, u_int which, int flags, + rlim_t *res) { struct thread *td; struct uidinfo *ui; - struct vmspace *vm; uid_t uid; int error; @@ -825,7 +826,6 @@ PROC_UNLOCK(p); ui = uifind(uid); - vm = vmspace_acquire_ref(p); switch (which) { case RLIMIT_CPU: @@ -903,7 +903,6 @@ break; } - vmspace_free(vm); uifree(ui); return (error); } @@ -911,12 +910,15 @@ int sys_getrlimitusage(struct thread *td, struct getrlimitusage_args *uap) { + struct proc *p; rlim_t res; int error; if ((uap->flags & ~(GETRLIMITUSAGE_EUID)) != 0) return (EINVAL); - error = getrlimitusage_one(curproc, uap->which, uap->flags, &res); + p = curproc; + error = getrlimitusage_one(p, p->p_vmspace, uap->which, uap->flags, + &res); if (error == 0) error = copyout(&res, uap->res, sizeof(res)); return (error); @@ -1750,6 +1752,8 @@ { rlim_t resval[RLIM_NLIMITS]; struct proc *p; + struct thread *td; + struct vmspace *vm; size_t len; int error, *name, i; @@ -1759,15 +1763,20 @@ if (req->newptr != NULL) return (EINVAL); - error = pget((pid_t)name[0], PGET_WANTREAD, &p); + td = curthread; + error = pget((pid_t)name[0], PGET_HOLD | PGET_NOTWEXIT, &p); if (error != 0) return (error); + error = proc_vmspace_ref(td, p, PRVM_BLOCK_EXEC | + PRVM_CHECK_VISIBILITY, &vm); + if (error != 0) + goto out; if ((u_int)arg2 == 1) { len = sizeof(resval); memset(resval, 0, sizeof(resval)); for (i = 0; i < RLIM_NLIMITS; i++) { - error = getrlimitusage_one(p, (unsigned)i, 0, + error = getrlimitusage_one(p, vm, (unsigned)i, 0, &resval[i]); if (error == ENXIO) { resval[i] = -1; @@ -1778,7 +1787,7 @@ } } else { len = sizeof(resval[0]); - error = getrlimitusage_one(p, (unsigned)name[1], 0, + error = getrlimitusage_one(p, vm, (unsigned)name[1], 0, &resval[0]); if (error == ENXIO) { resval[0] = -1; @@ -1787,6 +1796,8 @@ } if (error == 0) error = SYSCTL_OUT(req, resval, len); + proc_vmspace_unref(td, p, PRVM_BLOCK_EXEC | PRVM_CHECK_VISIBILITY, vm); +out: PRELE(p); return (error); } --- sys/kern/sys_process.c.orig +++ sys/kern/sys_process.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -344,25 +345,93 @@ PROC_ACTION(ptrace_single_step(td)); } +static int +proc_vmspace_check_access(struct thread *td, struct proc *p, int flags) +{ + PROC_ASSERT_HELD(p); + if ((flags & PRVM_CHECK_DEBUG) != 0) + return (p_candebug(td, p)); + if ((flags & PRVM_CHECK_VISIBILITY) != 0) + return (p_cansee(td, p)); + return (0); +} + int -proc_rwmem(struct proc *p, struct uio *uio) +proc_vmspace_ref(struct thread *td, struct proc *p, int flags, + struct vmspace **vmp) +{ + struct vmspace *vm; + int error; + + MPASS((flags & ~(PRVM_BLOCK_EXEC | PRVM_CHECK_VISIBILITY | + PRVM_CHECK_DEBUG)) == 0); + MPASS((flags & (PRVM_CHECK_VISIBILITY | PRVM_CHECK_DEBUG)) != + (PRVM_CHECK_VISIBILITY | PRVM_CHECK_DEBUG)); + + PROC_LOCK(p); + if (p != td->td_proc) { + PROC_ASSERT_HELD(p); + + /* + * Make sure that the vmspace doesn't switch out from + * under us. + */ + if ((flags & PRVM_BLOCK_EXEC) != 0) { + for (;;) { + if (!execve_block(td, p)) { + PROC_LOCK(p); + continue; + } + error = proc_vmspace_check_access(td, p, flags); + if (error != 0) { + execve_unblock(td, p); + PROC_UNLOCK(p); + return (error); + } + break; + } + } else { + error = proc_vmspace_check_access(td, p, flags); + if (error != 0) { + PROC_UNLOCK(p); + return (error); + } + } + } + vm = vmspace_acquire_ref(p); + if (vm == NULL) { + if (p != td->td_proc && (flags & PRVM_BLOCK_EXEC) != 0) + execve_unblock(td, p); + PROC_UNLOCK(p); + return (ESRCH); + } + PROC_UNLOCK(p); + *vmp = vm; + return (0); +} + +void +proc_vmspace_unref(struct thread *td, struct proc *p, int flags, + struct vmspace *vm) +{ + vmspace_free(vm); + if (p != td->td_proc && (flags & PRVM_BLOCK_EXEC) != 0) { + PROC_LOCK(p); + PROC_ASSERT_HELD(p); + execve_unblock(td, p); + PROC_UNLOCK(p); + } +} + +static int +vmspace_rwmem(struct vmspace *vm, struct uio *uio) { vm_map_t map; vm_offset_t pageno; /* page number */ vm_prot_t reqprot; int error, fault_flags, page_offset, writing; - /* - * Make sure that the process' vmspace remains live. - */ - if (p != curproc) - PROC_ASSERT_HELD(p); - PROC_LOCK_ASSERT(p, MA_NOTOWNED); - - /* - * The map we want... - */ - map = &p->p_vmspace->vm_map; + map = &vm->vm_map; /* * If we are writing, then we request vm_fault() to create a private @@ -431,13 +500,30 @@ return (error); } -static ssize_t -proc_iop(struct thread *td, struct proc *p, vm_offset_t va, void *buf, +int +proc_rwmem(struct proc *p, struct uio *uio, int flags) +{ + struct vmspace *vm; + struct thread *td; + int error; + + td = curthread; + error = proc_vmspace_ref(td, p, flags, &vm); + if (error != 0) + return (error); + error = vmspace_rwmem(vm, uio); + proc_vmspace_unref(td, p, flags, vm); + return (error); +} + +ssize_t +vmspace_iop(struct thread *td, struct vmspace *vm, vm_offset_t va, void *buf, size_t len, enum uio_rw rw) { struct iovec iov; struct uio uio; ssize_t slen; + int error; MPASS(len < SSIZE_MAX); slen = (ssize_t)len; @@ -451,8 +537,8 @@ uio.uio_segflg = UIO_SYSSPACE; uio.uio_rw = rw; uio.uio_td = td; - proc_rwmem(p, &uio); - if (uio.uio_resid == slen) + error = vmspace_rwmem(vm, &uio); + if (error != 0 || uio.uio_resid == slen) return (-1); return (slen - uio.uio_resid); } @@ -462,7 +548,7 @@ size_t len) { - return (proc_iop(td, p, va, buf, len, UIO_READ)); + return (vmspace_iop(td, p->p_vmspace, va, buf, len, UIO_READ)); } ssize_t @@ -470,7 +556,7 @@ size_t len) { - return (proc_iop(td, p, va, buf, len, UIO_WRITE)); + return (vmspace_iop(td, p->p_vmspace, va, buf, len, UIO_WRITE)); } static int @@ -1419,7 +1505,7 @@ goto out; } PROC_UNLOCK(p); - error = proc_rwmem(p, &uio); + error = proc_rwmem(p, &uio, 0); piod->piod_len -= uio.uio_resid; PROC_LOCK(p); break; --- sys/sys/imgact.h.orig +++ sys/sys/imgact.h @@ -123,6 +123,10 @@ char **, char **); int pre_execve(struct thread *td, struct vmspace **oldvmspace); void post_execve(struct thread *td, int error, struct vmspace *oldvmspace); +bool execve_block(struct thread *td, struct proc *p); +void execve_block_wait(struct thread *td, struct proc *p); +void execve_unblock(struct thread *td, struct proc *p); +void execve_block_pass(struct thread *td); #endif #endif /* !_SYS_IMGACT_H_ */ --- sys/sys/proc.h.orig +++ sys/sys/proc.h @@ -778,6 +778,7 @@ TAILQ_HEAD(, kq_timer_cb_data) p_kqtim_stop; /* (c) */ LIST_ENTRY(proc) p_jaillist; /* (d) Jail process linkage. */ + u_int p_execblock; /* (c) Blockers for execve. */ }; #define p_session p_pgrp->pg_session @@ -891,6 +892,8 @@ sync core registered */ #define P2_MEMBAR_GLOBE 0x00400000 /* membar global expedited registered */ +#define P2_INEXEC_WAIT 0x80000000 /* Not same as in HEAD. + Waiters for P_INEXEC/p_execblock */ /* Flags protected by proctree_lock, kept in p_treeflags. */ #define P_TREE_ORPHANED 0x00000001 /* Reparented, on orphan list */ --- sys/sys/ptrace.h.orig +++ sys/sys/ptrace.h @@ -241,7 +241,20 @@ int proc_read_dbregs(struct thread *_td, struct dbreg *_dbreg); int proc_write_dbregs(struct thread *_td, struct dbreg *_dbreg); int proc_sstep(struct thread *_td); -int proc_rwmem(struct proc *_p, struct uio *_uio); + +#define PRVM_BLOCK_EXEC 0x00000001 +#define PRVM_CHECK_VISIBILITY 0x00000002 +#define PRVM_CHECK_DEBUG 0x00000004 + +#include +struct vmspace; +int proc_vmspace_ref(struct thread *_td, struct proc *_p, int _flags, + struct vmspace **_vmp); +void proc_vmspace_unref(struct thread *_td, struct proc *_p, int _flags, + struct vmspace *_vm); +ssize_t vmspace_iop(struct thread *td, struct vmspace *vm, vm_offset_t va, + void *buf, size_t len, enum uio_rw rw); +int proc_rwmem(struct proc *_p, struct uio *_uio, int _flags); ssize_t proc_readmem(struct thread *_td, struct proc *_p, vm_offset_t _va, void *_buf, size_t _len); ssize_t proc_writemem(struct thread *_td, struct proc *_p, vm_offset_t _va,