--- sys/compat/linprocfs/linprocfs.c.orig +++ sys/compat/linprocfs/linprocfs.c @@ -1315,19 +1315,13 @@ struct vattr vat; bool private; - PROC_LOCK(p); - error = p_candebug(td, p); - PROC_UNLOCK(p); - if (error) - return (error); - if (uio->uio_rw != UIO_READ) return (EOPNOTSUPP); - error = 0; - vm = vmspace_acquire_ref(p); - if (vm == NULL) - return (ESRCH); + error = proc_vmspace_ref(td, p, PRVM_BLOCK_EXEC | PRVM_CHECK_DEBUG, + &vm); + if (error != 0) + return (error); if (SV_CURPROC_FLAG(SV_LP64)) l_map_str = l64_map_str; @@ -1425,7 +1419,7 @@ } } vm_map_unlock_read(map); - vmspace_free(vm); + proc_vmspace_unref(td, p, PRVM_CHECK_DEBUG | PRVM_BLOCK_EXEC, vm); return (error); } --- sys/compat/linux/linux_misc.c.orig +++ sys/compat/linux/linux_misc.c @@ -2010,6 +2010,7 @@ u_int which; int flags; int error; + bool exec_blocked; if (args->new == NULL && args->old != NULL) { if (linux_get_dummy_limit(td, args->resource, &rlim)) { @@ -2037,6 +2038,7 @@ return (error); } + exec_blocked = false; flags = PGET_HOLD | PGET_NOTWEXIT; if (args->new != NULL) flags |= PGET_CANDEBUG; @@ -2049,6 +2051,14 @@ error = pget(args->pid, flags, &p); if (error != 0) return (error); + exec_blocked = true; + PROC_LOCK(p); + execve_block_wait(td, p); + error = args->new != NULL ? p_candebug(td, p) : + p_cansee(td, p); + PROC_UNLOCK(p); + if (error != 0) + goto out; } if (args->old != NULL) { PROC_LOCK(p); @@ -2071,6 +2081,11 @@ error = kern_proc_setrlimit(td, p, which, &nrlim); out: + if (exec_blocked) { + PROC_LOCK(p); + execve_unblock(td, p); + PROC_UNLOCK(p); + } PRELE(p); return (error); } --- sys/fs/cuse/cuse.c.orig +++ sys/fs/cuse/cuse.c @@ -916,7 +916,7 @@ }; PHOLD(proc_s); - error = proc_rwmem(proc_s, &uio); + error = proc_rwmem(proc_s, &uio, 0); PRELE(proc_s); } else if (proc_cur == proc_s) { @@ -935,7 +935,7 @@ }; PHOLD(proc_d); - error = proc_rwmem(proc_d, &uio); + error = proc_rwmem(proc_d, &uio, 0); PRELE(proc_d); } else { error = EINVAL; --- sys/fs/procfs/procfs_map.c.orig +++ sys/fs/procfs/procfs_map.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include #include @@ -93,15 +94,14 @@ bool wrap32; #endif - PROC_LOCK(p); - error = p_candebug(td, p); - PROC_UNLOCK(p); - if (error) - return (error); - if (uio->uio_rw != UIO_READ) return (EOPNOTSUPP); + error = proc_vmspace_ref(td, p, PRVM_BLOCK_EXEC | PRVM_CHECK_DEBUG, + &vm); + if (error != 0) + return (error); + #ifdef COMPAT_FREEBSD32 wrap32 = false; if (SV_CURPROC_FLAG(SV_ILP32)) { @@ -111,9 +111,6 @@ } #endif - vm = vmspace_acquire_ref(p); - if (vm == NULL) - return (ESRCH); map = &vm->vm_map; vm_map_lock_read(map); VM_MAP_ENTRY_FOREACH(entry, map) { @@ -238,6 +235,6 @@ } } vm_map_unlock_read(map); - vmspace_free(vm); + proc_vmspace_unref(td, p, PRVM_BLOCK_EXEC | PRVM_CHECK_DEBUG, vm); return (error); } --- sys/fs/procfs/procfs_mem.c.orig +++ sys/fs/procfs/procfs_mem.c @@ -60,11 +60,7 @@ if (uio->uio_resid == 0) return (0); - PROC_LOCK(p); - error = p_candebug(td, p); - PROC_UNLOCK(p); - if (error == 0) - error = proc_rwmem(p, uio); + error = proc_rwmem(p, uio, PRVM_CHECK_DEBUG | PRVM_BLOCK_EXEC); return (error); } --- sys/fs/pseudofs/pseudofs_vnops.c.orig +++ sys/fs/pseudofs/pseudofs_vnops.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -132,6 +133,7 @@ pfs_lookup_proc(pid_t pid, struct proc **p) { struct proc *proc; + struct thread *td; proc = pfind(pid); if (proc == NULL) @@ -141,8 +143,10 @@ return (0); } _PHOLD(proc); - PROC_UNLOCK(proc); + td = curthread; + execve_block_wait(td, proc); *p = proc; + PROC_UNLOCK(proc); return (1); } @@ -672,6 +676,7 @@ struct pfs_node *pn = pvd->pvd_pn; struct uio *uio = va->a_uio; struct proc *proc; + struct thread *td; struct sbuf *sb = NULL; int error, locked; off_t buflen, buflim; @@ -690,21 +695,30 @@ if (pn->pn_fill == NULL) PFS_RETURN (EIO); + td = curthread; + /* * This is necessary because either process' privileges may * have changed since the open() call. */ - if (!pfs_visible(curthread, pn, pvd->pvd_pid, &proc)) + if (!pfs_visible(td, pn, pvd->pvd_pid, &proc)) PFS_RETURN (EIO); - if (proc != NULL) { - _PHOLD(proc); - PROC_UNLOCK(proc); - } vhold(vn); locked = VOP_ISLOCKED(vn); VOP_UNLOCK(vn); + if (proc != NULL) { + _PHOLD(proc); + execve_block_wait(td, proc); + if (!pfs_visible_proc(td, pn, proc)) { + PROC_UNLOCK(proc); + error = EIO; + goto ret; + } + PROC_UNLOCK(proc); + } + if (pn->pn_flags & PFS_RAWRD) { PFS_TRACE(("%zd resid", uio->uio_resid)); error = pn_fill(curthread, proc, pn, NULL, uio); @@ -774,8 +788,12 @@ ret: vn_lock(vn, locked | LK_RETRY); vdrop(vn); - if (proc != NULL) - PRELE(proc); + if (proc != NULL) { + PROC_LOCK(proc); + execve_unblock(td, proc); + _PRELE(proc); + PROC_UNLOCK(proc); + } PFS_RETURN (error); } @@ -846,6 +864,7 @@ struct pfs_node *pd = pvd->pvd_pn; pid_t pid = pvd->pvd_pid; struct proc *p, *proc; + struct thread *td; struct pfs_node *pn; struct uio *uio; struct pfsentry *pfsent, *pfsent2; @@ -891,11 +910,13 @@ KASSERT(pid == NO_PID || proc != NULL, ("%s(): no process for pid %lu", __func__, (unsigned long)pid)); + td = curthread; if (pid != NO_PID) { PROC_LOCK(proc); /* check if the directory is visible to the caller */ if (!pfs_visible_proc(curthread, pd, proc)) { + execve_unblock(td, proc); _PRELE(proc); PROC_UNLOCK(proc); pfs_unlock(pd); @@ -955,6 +976,7 @@ resid -= PFS_DELEN; } if (proc != NULL) { + execve_unblock(td, proc); _PRELE(proc); PROC_UNLOCK(proc); } @@ -1079,6 +1101,7 @@ struct pfs_node *pn = pvd->pvd_pn; struct uio *uio = va->a_uio; struct proc *proc; + struct thread *td; struct sbuf sb; int error; @@ -1098,36 +1121,44 @@ if (uio->uio_resid > PFS_MAXBUFSIZ) PFS_RETURN (EIO); + td = curthread; + /* * This is necessary because either process' privileges may * have changed since the open() call. */ - if (!pfs_visible(curthread, pn, pvd->pvd_pid, &proc)) + if (!pfs_visible(td, pn, pvd->pvd_pid, &proc)) PFS_RETURN (EIO); if (proc != NULL) { _PHOLD(proc); + execve_block_wait(td, proc); + if (!pfs_visible_proc(td, pn, proc)) { + PROC_UNLOCK(proc); + error = EIO; + goto out; + } PROC_UNLOCK(proc); } if (pn->pn_flags & PFS_RAWWR) { error = pn_fill(curthread, proc, pn, NULL, uio); - if (proc != NULL) - PRELE(proc); - PFS_RETURN (error); + goto out; } sbuf_uionew(&sb, uio, &error); - if (error) { - if (proc != NULL) - PRELE(proc); - PFS_RETURN (error); - } + if (error != 0) + goto out; error = pn_fill(curthread, proc, pn, &sb, uio); sbuf_delete(&sb); - if (proc != NULL) - PRELE(proc); +out: + if (proc != NULL) { + PROC_LOCK(proc); + execve_unblock(td, proc); + _PRELE(proc); + PROC_UNLOCK(proc); + } PFS_RETURN (error); } --- sys/kern/kern_event.c.orig +++ sys/kern/kern_event.c @@ -49,6 +49,7 @@ #include #include #include +#include #include #include #include @@ -3381,10 +3382,6 @@ if ((u_int)arg2 > 2 || (u_int)arg2 == 0) return (EINVAL); - error = pget((pid_t)name[0], PGET_HOLD | PGET_CANDEBUG, &p); - if (error != 0) - return (error); - td = curthread; #ifdef COMPAT_FREEBSD32 compat32 = SV_CURPROC_FLAG(SV_ILP32); @@ -3392,6 +3389,17 @@ compat32 = false; #endif + error = pget((pid_t)name[0], PGET_NOTWEXIT, &p); + if (error != 0) + return (error); + + _PHOLD(p); + execve_block_wait(td, p); + error = p_candebug(td, p); + if (error != 0) + goto out1; + PROC_UNLOCK(p); + s = sbuf_new_for_sysctl(&sm, NULL, 0, req); if (s == NULL) { error = ENOMEM; @@ -3412,7 +3420,11 @@ sbuf_delete(s); out: - PRELE(p); + PROC_LOCK(p); +out1: + execve_unblock(td, p); + _PRELE(p); + PROC_UNLOCK(p); return (error); } --- sys/kern/kern_exec.c.orig +++ sys/kern/kern_exec.c @@ -26,7 +26,6 @@ * SUCH DAMAGE. */ -#include #include "opt_capsicum.h" #include "opt_hwpmc_hooks.h" #include "opt_hwt_hooks.h" @@ -46,6 +45,7 @@ #include #include #include +#include #include #include #include @@ -385,6 +385,77 @@ } } +/* + * Returns true if the execblock was obtained, in this case the + * process lock is kept. Returns false if the execblock was not + * obtained, but the function slept and the lock was dropped. + */ +bool +execve_block(struct thread *td, struct proc *p) +{ + PROC_LOCK_ASSERT(p, MA_OWNED); + MPASS(td == curthread); + MPASS(p != td->td_proc || (p->p_flag & P_INEXEC) == 0); + + if (p != td->td_proc && (p->p_flag & P_INEXEC) != 0) { + p->p_flag |= P_INEXEC_WAIT; + msleep(&p->p_execblock, &p->p_mtx, PDROP, "inexec", 0); + return (false); + } + MPASS(p->p_execblock < UINT_MAX); + p->p_execblock++; + return (true); +} + +/* + * Might drop the process lock internally, callers must re-check the + * invariants afterward. + */ +void +execve_block_wait(struct thread *td, struct proc *p) +{ + bool first; + + PROC_ASSERT_HELD(p); + PROC_LOCK_ASSERT(p, MA_OWNED); + + for (first = true;; first = false) { + if (!first) + PROC_LOCK(p); + if (execve_block(td, p)) + return; + } +} + +void +execve_unblock(struct thread *td, struct proc *p) +{ + PROC_LOCK_ASSERT(p, MA_OWNED); + MPASS(td == curthread); + + MPASS(p->p_execblock > 0); + p->p_execblock--; + if (p->p_execblock == 0 && (p->p_flag & P_INEXEC_WAIT) != 0) { + p->p_flag &= ~P_INEXEC_WAIT; + wakeup(&p->p_execblock); + } +} + +void +execve_block_pass(struct thread *td) +{ + struct proc *p; + + MPASS(td == curthread); + p = td->td_proc; + PROC_LOCK_ASSERT(p, MA_OWNED); + + while (p->p_execblock != 0) { + p->p_flag |= P_INEXEC_WAIT; + msleep(&p->p_execblock, &p->p_mtx, 0, "exeblk", 0); + } +} + /* * In-kernel implementation of execve(). All arguments are assumed to be * userspace pointers from the passed thread. @@ -440,6 +511,7 @@ PROC_LOCK(p); KASSERT((p->p_flag & P_INEXEC) == 0, ("%s(): process already has P_INEXEC flag", __func__)); + execve_block_pass(td); p->p_flag |= P_INEXEC; PROC_UNLOCK(p); @@ -909,7 +981,10 @@ * as we're now a bona fide freshly-execed process. */ KNOTE_LOCKED(p->p_klist, NOTE_EXEC); - p->p_flag &= ~P_INEXEC; + MPASS(p->p_execblock == 0); + if ((p->p_flag & P_INEXEC_WAIT) != 0) + wakeup(&p->p_execblock); + p->p_flag &= ~(P_INEXEC | P_INEXEC_WAIT); /* clear "fork but no exec" flag, as we _are_ execing */ p->p_acflag &= ~AFORK; @@ -1005,7 +1080,9 @@ exec_fail: /* we're done here, clear P_INEXEC */ PROC_LOCK(p); - p->p_flag &= ~P_INEXEC; + if ((p->p_flag & P_INEXEC_WAIT) != 0) + wakeup(&p->p_execblock); + p->p_flag &= ~(P_INEXEC | P_INEXEC_WAIT); PROC_UNLOCK(p); SDT_PROBE1(proc, , , exec__failure, error); --- sys/kern/kern_exit.c.orig +++ sys/kern/kern_exit.c @@ -323,6 +323,7 @@ while (p->p_lock > 0) msleep(&p->p_lock, &p->p_mtx, PWAIT, "exithold", 0); + MPASS(p->p_execblock == 0); PROC_UNLOCK(p); /* Drain the limit callout while we don't have the proc locked */ callout_drain(&p->p_limco); --- sys/kern/kern_fork.c.orig +++ sys/kern/kern_fork.c @@ -430,6 +430,7 @@ bzero(&p2->p_startzero, __rangeof(struct proc, p_startzero, p_endzero)); + p2->p_execblock = 0; /* Tell the prison that we exist. */ prison_proc_hold(p2->p_ucred->cr_prison); --- sys/kern/kern_proc.c.orig +++ sys/kern/kern_proc.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include #include @@ -1838,8 +1839,8 @@ } static int -proc_read_string(struct thread *td, struct proc *p, const char *sptr, char *buf, - size_t len) +proc_read_string(struct thread *td, struct vmspace *vm, const char *sptr, + char *buf, size_t len) { ssize_t n; @@ -1848,7 +1849,7 @@ * and is aligned at the end of the page, and the following page is not * mapped. */ - n = proc_readmem(td, p, (vm_offset_t)sptr, buf, len); + n = vmspace_iop(td, vm, (vm_offset_t)sptr, buf, len, UIO_READ); if (n <= 0) return (ENOMEM); return (0); @@ -1864,8 +1865,8 @@ #ifdef COMPAT_FREEBSD32 static int -get_proc_vector32(struct thread *td, struct proc *p, char ***proc_vectorp, - size_t *vsizep, enum proc_vector_type type) +get_proc_vector32(struct thread *td, struct proc *p, struct vmspace *vm, + char ***proc_vectorp, size_t *vsizep, enum proc_vector_type type) { struct freebsd32_ps_strings pss; Elf32_Auxinfo aux; @@ -1876,8 +1877,8 @@ int i, error; error = 0; - if (proc_readmem(td, p, PROC_PS_STRINGS(p), &pss, sizeof(pss)) != - sizeof(pss)) + if (vmspace_iop(td, vm, PROC_PS_STRINGS(p), &pss, sizeof(pss), + UIO_READ) != sizeof(pss)) return (ENOMEM); switch (type) { case PROC_ARG: @@ -1900,8 +1901,8 @@ if (vptr % 4 != 0) return (ENOEXEC); for (ptr = vptr, i = 0; i < PROC_AUXV_MAX; i++) { - if (proc_readmem(td, p, ptr, &aux, sizeof(aux)) != - sizeof(aux)) + if (vmspace_iop(td, vm, ptr, &aux, sizeof(aux), + UIO_READ) != sizeof(aux)) return (ENOMEM); if (aux.a_type == AT_NULL) break; @@ -1917,7 +1918,7 @@ return (EINVAL); } proc_vector32 = malloc(size, M_TEMP, M_WAITOK); - if (proc_readmem(td, p, vptr, proc_vector32, size) != size) { + if (vmspace_iop(td, vm, vptr, proc_vector32, size, UIO_READ) != size) { error = ENOMEM; goto done; } @@ -1938,8 +1939,8 @@ #endif static int -get_proc_vector(struct thread *td, struct proc *p, char ***proc_vectorp, - size_t *vsizep, enum proc_vector_type type) +get_proc_vector(struct thread *td, struct proc *p, struct vmspace *vm, + char ***proc_vectorp, size_t *vsizep, enum proc_vector_type type) { struct ps_strings pss; Elf_Auxinfo aux; @@ -1949,11 +1950,13 @@ int i; #ifdef COMPAT_FREEBSD32 - if (SV_PROC_FLAG(p, SV_ILP32) != 0) - return (get_proc_vector32(td, p, proc_vectorp, vsizep, type)); + if (SV_PROC_FLAG(p, SV_ILP32) != 0) { + return (get_proc_vector32(td, p, vm, proc_vectorp, + vsizep, type)); + } #endif - if (proc_readmem(td, p, PROC_PS_STRINGS(p), &pss, sizeof(pss)) != - sizeof(pss)) + if (vmspace_iop(td, vm, PROC_PS_STRINGS(p), &pss, sizeof(pss), + UIO_READ) != sizeof(pss)) return (ENOMEM); switch (type) { case PROC_ARG: @@ -1991,8 +1994,8 @@ * to the allocated proc_vector. */ for (ptr = vptr, i = 0; i < PROC_AUXV_MAX; i++) { - if (proc_readmem(td, p, ptr, &aux, sizeof(aux)) != - sizeof(aux)) + if (vmspace_iop(td, vm, ptr, &aux, sizeof(aux), + UIO_READ) != sizeof(aux)) return (ENOMEM); if (aux.a_type == AT_NULL) break; @@ -2014,7 +2017,7 @@ return (EINVAL); /* In case we are built without INVARIANTS. */ } proc_vector = malloc(size, M_TEMP, M_WAITOK); - if (proc_readmem(td, p, vptr, proc_vector, size) != size) { + if (vmspace_iop(td, vm, vptr, proc_vector, size, UIO_READ) != size) { free(proc_vector, M_TEMP); return (ENOMEM); } @@ -2030,6 +2033,7 @@ get_ps_strings(struct thread *td, struct proc *p, struct sbuf *sb, enum proc_vector_type type) { + struct vmspace *vm; size_t done, len, nchr, vsize; int error, i; char **proc_vector, *sptr; @@ -2042,9 +2046,14 @@ */ nchr = 2 * (PATH_MAX + ARG_MAX); - error = get_proc_vector(td, p, &proc_vector, &vsize, type); + error = proc_vmspace_ref(td, p, PRVM_BLOCK_EXEC | + PRVM_CHECK_VISIBILITY, &vm); if (error != 0) return (error); + + error = get_proc_vector(td, p, vm, &proc_vector, &vsize, type); + if (error != 0) + goto out; for (done = 0, i = 0; i < (int)vsize && done < nchr; i++) { /* * The program may have scribbled into its argv array, e.g. to @@ -2054,7 +2063,7 @@ if (proc_vector[i] == NULL) break; for (sptr = proc_vector[i]; ; sptr += GET_PS_STRINGS_CHUNK_SZ) { - error = proc_read_string(td, p, sptr, pss_string, + error = proc_read_string(td, vm, sptr, pss_string, sizeof(pss_string)); if (error != 0) goto done; @@ -2071,6 +2080,8 @@ } done: free(proc_vector, M_TEMP); +out: + proc_vmspace_unref(td, p, PRVM_BLOCK_EXEC | PRVM_CHECK_VISIBILITY, vm); return (error); } @@ -2091,11 +2102,17 @@ int proc_getauxv(struct thread *td, struct proc *p, struct sbuf *sb) { + struct vmspace *vm; size_t vsize, size; char **auxv; int error; - error = get_proc_vector(td, p, &auxv, &vsize, PROC_AUX); + error = proc_vmspace_ref(td, p, PRVM_BLOCK_EXEC | PRVM_CHECK_DEBUG, + &vm); + if (error != 0) + return (error); + error = get_proc_vector(td, p, vm, &auxv, &vsize, PROC_AUX); + proc_vmspace_unref(td, p, PRVM_BLOCK_EXEC | PRVM_CHECK_DEBUG, vm); if (error == 0) { #ifdef COMPAT_FREEBSD32 if (SV_PROC_FLAG(p, SV_ILP32) != 0) @@ -2408,6 +2425,7 @@ int error, *name; struct vnode *vp; struct proc *p; + struct thread *td; vm_map_t map; struct vmspace *vm; @@ -2416,11 +2434,12 @@ return (EINVAL); name = (int *)arg1; + td = curthread; error = pget((pid_t)name[0], PGET_WANTREAD, &p); if (error != 0) return (error); - vm = vmspace_acquire_ref(p); - if (vm == NULL) { + error = proc_vmspace_ref(td, p, PRVM_CHECK_DEBUG, &vm); + if (error != 0) { PRELE(p); return (ESRCH); } @@ -2532,7 +2551,7 @@ } } vm_map_unlock_read(map); - vmspace_free(vm); + proc_vmspace_unref(td, p, PRVM_CHECK_DEBUG, vm); PRELE(p); free(kve, M_TEMP); return (error); @@ -2627,6 +2646,7 @@ struct ucred *cred; struct vnode *vp; struct vmspace *vm; + struct thread *td; vm_offset_t addr; unsigned int last_timestamp; int error; @@ -2638,10 +2658,11 @@ _PHOLD(p); PROC_UNLOCK(p); - vm = vmspace_acquire_ref(p); - if (vm == NULL) { + td = curthread; + error = proc_vmspace_ref(td, p, PRVM_CHECK_DEBUG, &vm); + if (error != 0) { PRELE(p); - return (ESRCH); + return (error); } kve = malloc(sizeof(*kve), M_TEMP, M_WAITOK | M_ZERO); @@ -2745,7 +2766,7 @@ if (vp != NULL) { vn_fullpath(vp, &fullpath, &freepath); kve->kve_vn_type = vntype_to_kinfo(vp->v_type); - cred = curthread->td_ucred; + cred = td->td_ucred; vn_lock(vp, LK_SHARED | LK_RETRY); if (VOP_GETATTR(vp, &va, cred) == 0) { kve->kve_vn_fileid = va.va_fileid; @@ -2801,7 +2822,7 @@ } } vm_map_unlock_read(map); - vmspace_free(vm); + proc_vmspace_unref(td, p, PRVM_CHECK_DEBUG, vm); PRELE(p); free(kve, M_TEMP); return (error); @@ -2840,7 +2861,7 @@ struct kinfo_kstack *kkstp; int error, i, *name, numthreads; lwpid_t *lwpidarray; - struct thread *td; + struct thread *td, *ctd; struct stack *st; struct sbuf sb; struct proc *p; @@ -2851,7 +2872,8 @@ return (EINVAL); name = (int *)arg1; - error = pget((pid_t)name[0], PGET_NOTINEXEC | PGET_WANTREAD, &p); + ctd = curthread; + error = pget((pid_t)name[0], PGET_WANTREAD, &p); if (error != 0) return (error); @@ -2860,6 +2882,14 @@ lwpidarray = NULL; PROC_LOCK(p); + execve_block_wait(ctd, p); + error = p_candebug(ctd, p); + if (error != 0) { + execve_unblock(ctd, p); + _PRELE(p); + PROC_UNLOCK(p); + return (error); + } do { if (lwpidarray != NULL) { free(lwpidarray, M_TEMP); @@ -2872,15 +2902,6 @@ PROC_LOCK(p); } while (numthreads < p->p_numthreads); - /* - * XXXRW: During the below loop, execve(2) and countless other sorts - * of changes could have taken place. Should we check to see if the - * vmspace has been replaced, or the like, in order to prevent - * giving a snapshot that spans, say, execve(2), with some threads - * before and some after? Among other things, the credentials could - * have changed, in which case the right to extract debug info might - * no longer be assured. - */ i = 0; FOREACH_THREAD_IN_PROC(p, td) { KASSERT(i < numthreads, @@ -2913,7 +2934,10 @@ if (error) break; } - PRELE(p); + PROC_LOCK(p); + execve_unblock(ctd, p); + _PRELE(p); + PROC_UNLOCK(p); if (lwpidarray != NULL) free(lwpidarray, M_TEMP); stack_destroy(st); @@ -2969,8 +2993,9 @@ u_int namelen = arg2; struct rlimit rlim; struct proc *p; + struct thread *td; u_int which; - int flags, error; + int error; if (namelen != 2) return (EINVAL); @@ -2982,23 +3007,24 @@ if (req->newptr != NULL && req->newlen != sizeof(rlim)) return (EINVAL); - flags = PGET_HOLD | PGET_NOTWEXIT; - if (req->newptr != NULL) - flags |= PGET_CANDEBUG; - else - flags |= PGET_CANSEE; - error = pget((pid_t)name[0], flags, &p); + td = curthread; + error = pget((pid_t)name[0], PGET_NOTWEXIT, &p); if (error != 0) return (error); + _PHOLD(p); + execve_block_wait(td, p); + error = req->newptr != NULL ? p_candebug(td, p) : p_cansee(td, p); + if (error != 0) + goto errout1; /* * Retrieve limit. */ if (req->oldptr != NULL) { - PROC_LOCK(p); lim_rlimit_proc(p, which, &rlim); - PROC_UNLOCK(p); } + PROC_UNLOCK(p); + error = SYSCTL_OUT(req, &rlim, sizeof(rlim)); if (error != 0) goto errout; @@ -3013,7 +3039,11 @@ } errout: - PRELE(p); + PROC_LOCK(p); +errout1: + _PRELE(p); + execve_unblock(td, p); + PROC_UNLOCK(p); return (error); } @@ -3102,39 +3132,38 @@ int *name = (int *)arg1; u_int namelen = arg2; struct proc *p; - int flags, error, osrel; + int flags, error, old_osrel, osrel; if (namelen != 1) return (EINVAL); - if (req->newptr != NULL && req->newlen != sizeof(osrel)) - return (EINVAL); - - flags = PGET_HOLD | PGET_NOTWEXIT; - if (req->newptr != NULL) + flags = PGET_NOTWEXIT; + if (req->newptr != NULL) { + if (req->newlen != sizeof(osrel)) + return (EINVAL); + error = SYSCTL_IN(req, &osrel, sizeof(osrel)); + if (error != 0) + return (error); + if (osrel < 0) + return (EINVAL); flags |= PGET_CANDEBUG; - else + } else { flags |= PGET_CANSEE; + } error = pget((pid_t)name[0], flags, &p); if (error != 0) return (error); - - error = SYSCTL_OUT(req, &p->p_osrel, sizeof(p->p_osrel)); - if (error != 0) - goto errout; - - if (req->newptr != NULL) { - error = SYSCTL_IN(req, &osrel, sizeof(osrel)); - if (error != 0) - goto errout; - if (osrel < 0) { - error = EINVAL; - goto errout; - } - p->p_osrel = osrel; + if ((p->p_flag & P_INEXEC) != 0) { + error = EBUSY; + } else { + old_osrel = p->p_osrel; + if (req->newptr != NULL) + p->p_osrel = osrel; } -errout: - PRELE(p); + PROC_UNLOCK(p); + + if (error == 0) + error = SYSCTL_OUT(req, &old_osrel, sizeof(old_osrel)); return (error); } @@ -3271,6 +3300,7 @@ { struct kinfo_vm_layout kvm; struct proc *p; + struct thread *td; struct vmspace *vmspace; int error, *name; @@ -3278,6 +3308,7 @@ if ((u_int)arg2 != 1) return (EINVAL); + td = curthread; error = pget((pid_t)name[0], PGET_CANDEBUG, &p); if (error != 0) return (error); @@ -3289,8 +3320,13 @@ } } #endif - vmspace = vmspace_acquire_ref(p); + _PHOLD(p); PROC_UNLOCK(p); + error = proc_vmspace_ref(td, p, PRVM_CHECK_DEBUG, &vmspace); + if (error != 0) { + PRELE(p); + return (error); + } memset(&kvm, 0, sizeof(kvm)); kvm.kvm_min_user_addr = vm_map_min(&vmspace->vm_map); @@ -3342,7 +3378,8 @@ #ifdef COMPAT_FREEBSD32 out: #endif - vmspace_free(vmspace); + proc_vmspace_unref(td, p, PRVM_CHECK_DEBUG, vmspace); + PRELE(p); return (error); } --- sys/kern/kern_procctl.c.orig +++ sys/kern/kern_procctl.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include #include @@ -841,25 +842,30 @@ { struct vmspace *vm; vm_map_t map; - int state; + int error, state; PROC_LOCK_ASSERT(p, MA_OWNED); if ((p->p_flag & P_WEXIT) != 0) return (ESRCH); state = *(int *)data; + error = 0; switch (state) { case PROC_WX_MAPPINGS_PERMIT: - p->p_flag2 |= P2_WXORX_DISABLE; _PHOLD(p); PROC_UNLOCK(p); - vm = vmspace_acquire_ref(p); - if (vm != NULL) { + error = proc_vmspace_ref(td, p, PRVM_BLOCK_EXEC | + PRVM_CHECK_DEBUG, &vm); + if (error == 0) { map = &vm->vm_map; vm_map_lock(map); map->flags &= ~MAP_WXORX; vm_map_unlock(map); - vmspace_free(vm); + PROC_LOCK(p); + p->p_flag2 |= P2_WXORX_DISABLE; + PROC_UNLOCK(p); + proc_vmspace_unref(td, p, PRVM_BLOCK_EXEC | + PRVM_CHECK_DEBUG, vm); } PROC_LOCK(p); _PRELE(p); @@ -868,10 +874,11 @@ p->p_flag2 |= P2_WXORX_ENABLE_EXEC; break; default: - return (EINVAL); + error = EINVAL; + break; } - return (0); + return (error); } static int --- sys/kern/kern_prot.c.orig +++ sys/kern/kern_prot.c @@ -49,6 +49,7 @@ #include #include #include +#include #include #include #include @@ -1016,6 +1017,8 @@ newcred = crget(); euip = uifind(euid); PROC_LOCK(p); + execve_block_pass(td); + /* * Copy credentials so other references do not see our changes. */ @@ -1070,6 +1073,7 @@ AUDIT_ARG_GID(gid); newcred = crget(); PROC_LOCK(p); + execve_block_pass(td); oldcred = crcopysafe(p, newcred); #ifdef MAC @@ -1168,6 +1172,7 @@ AUDIT_ARG_EGID(egid); newcred = crget(); PROC_LOCK(p); + execve_block_pass(td); oldcred = crcopysafe(p, newcred); #ifdef MAC @@ -1299,6 +1304,7 @@ newcred = crget(); crextend(newcred, ngrp); PROC_LOCK(p); + execve_block_pass(td); oldcred = crcopysafe(p, newcred); #ifdef MAC @@ -1355,6 +1361,7 @@ euip = uifind(euid); ruip = uifind(ruid); PROC_LOCK(p); + execve_block_pass(td); oldcred = crcopysafe(p, newcred); #ifdef MAC @@ -1434,6 +1441,7 @@ AUDIT_ARG_RGID(rgid); newcred = crget(); PROC_LOCK(p); + execve_block_pass(td); oldcred = crcopysafe(p, newcred); #ifdef MAC @@ -1504,6 +1512,7 @@ euip = uifind(euid); ruip = uifind(ruid); PROC_LOCK(p); + execve_block_pass(td); oldcred = crcopysafe(p, newcred); #ifdef MAC @@ -1595,6 +1604,7 @@ AUDIT_ARG_SGID(sgid); newcred = crget(); PROC_LOCK(p); + execve_block_pass(td); oldcred = crcopysafe(p, newcred); #ifdef MAC @@ -2355,11 +2365,11 @@ } /* - * Can't trace a process that's currently exec'ing. - * - * XXX: Note, this is not a security policy decision, it's a - * basic correctness/functionality decision. Therefore, this check - * should be moved to the caller's of p_candebug(). + * Can't trace a process that's currently exec'ing. Otherwise + * the process vmspace might change, and the target might be + * loading a setugid image. The execve_block(9) and + * proc_vmspace_ref(9) allow to get the stable credentials and + * vmspace reference. */ if ((p->p_flag & P_INEXEC) != 0) return (EBUSY); --- sys/kern/kern_resource.c.orig +++ sys/kern/kern_resource.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include @@ -807,11 +808,11 @@ } static int -getrlimitusage_one(struct proc *p, u_int which, int flags, rlim_t *res) +getrlimitusage_one(struct proc *p, struct vmspace *vm, u_int which, int flags, + rlim_t *res) { struct thread *td; struct uidinfo *ui; - struct vmspace *vm; uid_t uid; int error; @@ -822,7 +823,6 @@ PROC_UNLOCK(p); ui = uifind(uid); - vm = vmspace_acquire_ref(p); switch (which) { case RLIMIT_CPU: @@ -900,7 +900,6 @@ break; } - vmspace_free(vm); uifree(ui); return (error); } @@ -908,12 +907,15 @@ int sys_getrlimitusage(struct thread *td, struct getrlimitusage_args *uap) { + struct proc *p; rlim_t res; int error; if ((uap->flags & ~(GETRLIMITUSAGE_EUID)) != 0) return (EINVAL); - error = getrlimitusage_one(curproc, uap->which, uap->flags, &res); + p = curproc; + error = getrlimitusage_one(p, p->p_vmspace, uap->which, uap->flags, + &res); if (error == 0) error = copyout(&res, uap->res, sizeof(res)); return (error); @@ -1768,6 +1770,8 @@ { rlim_t resval[RLIM_NLIMITS]; struct proc *p; + struct thread *td; + struct vmspace *vm; size_t len; int error, *name, i; @@ -1777,15 +1781,20 @@ if (req->newptr != NULL) return (EINVAL); - error = pget((pid_t)name[0], PGET_WANTREAD, &p); + td = curthread; + error = pget((pid_t)name[0], PGET_HOLD | PGET_NOTWEXIT, &p); if (error != 0) return (error); + error = proc_vmspace_ref(td, p, PRVM_BLOCK_EXEC | + PRVM_CHECK_VISIBILITY, &vm); + if (error != 0) + goto out; if ((u_int)arg2 == 1) { len = sizeof(resval); memset(resval, 0, sizeof(resval)); for (i = 0; i < RLIM_NLIMITS; i++) { - error = getrlimitusage_one(p, (unsigned)i, 0, + error = getrlimitusage_one(p, vm, (unsigned)i, 0, &resval[i]); if (error == ENXIO) { resval[i] = -1; @@ -1796,7 +1805,7 @@ } } else { len = sizeof(resval[0]); - error = getrlimitusage_one(p, (unsigned)name[1], 0, + error = getrlimitusage_one(p, vm, (unsigned)name[1], 0, &resval[0]); if (error == ENXIO) { resval[0] = -1; @@ -1805,6 +1814,8 @@ } if (error == 0) error = SYSCTL_OUT(req, resval, len); + proc_vmspace_unref(td, p, PRVM_BLOCK_EXEC | PRVM_CHECK_VISIBILITY, vm); +out: PRELE(p); return (error); } --- sys/kern/sys_process.c.orig +++ sys/kern/sys_process.c @@ -33,6 +33,7 @@ #include #include +#include #include #include #include @@ -333,25 +334,93 @@ return (ptrace_single_step(td)); } +static int +proc_vmspace_check_access(struct thread *td, struct proc *p, int flags) +{ + PROC_ASSERT_HELD(p); + if ((flags & PRVM_CHECK_DEBUG) != 0) + return (p_candebug(td, p)); + if ((flags & PRVM_CHECK_VISIBILITY) != 0) + return (p_cansee(td, p)); + return (0); +} + int -proc_rwmem(struct proc *p, struct uio *uio) +proc_vmspace_ref(struct thread *td, struct proc *p, int flags, + struct vmspace **vmp) +{ + struct vmspace *vm; + int error; + + MPASS((flags & ~(PRVM_BLOCK_EXEC | PRVM_CHECK_VISIBILITY | + PRVM_CHECK_DEBUG)) == 0); + MPASS((flags & (PRVM_CHECK_VISIBILITY | PRVM_CHECK_DEBUG)) != + (PRVM_CHECK_VISIBILITY | PRVM_CHECK_DEBUG)); + + PROC_LOCK(p); + if (p != td->td_proc) { + PROC_ASSERT_HELD(p); + + /* + * Make sure that the vmspace doesn't switch out from + * under us. + */ + if ((flags & PRVM_BLOCK_EXEC) != 0) { + for (;;) { + if (!execve_block(td, p)) { + PROC_LOCK(p); + continue; + } + error = proc_vmspace_check_access(td, p, flags); + if (error != 0) { + execve_unblock(td, p); + PROC_UNLOCK(p); + return (error); + } + break; + } + } else { + error = proc_vmspace_check_access(td, p, flags); + if (error != 0) { + PROC_UNLOCK(p); + return (error); + } + } + } + vm = vmspace_acquire_ref(p); + if (vm == NULL) { + if (p != td->td_proc && (flags & PRVM_BLOCK_EXEC) != 0) + execve_unblock(td, p); + PROC_UNLOCK(p); + return (ESRCH); + } + PROC_UNLOCK(p); + *vmp = vm; + return (0); +} + +void +proc_vmspace_unref(struct thread *td, struct proc *p, int flags, + struct vmspace *vm) +{ + vmspace_free(vm); + if (p != td->td_proc && (flags & PRVM_BLOCK_EXEC) != 0) { + PROC_LOCK(p); + PROC_ASSERT_HELD(p); + execve_unblock(td, p); + PROC_UNLOCK(p); + } +} + +static int +vmspace_rwmem(struct vmspace *vm, struct uio *uio) { vm_map_t map; vm_offset_t pageno; /* page number */ vm_prot_t reqprot; int error, fault_flags, page_offset, writing; - /* - * Make sure that the process' vmspace remains live. - */ - if (p != curproc) - PROC_ASSERT_HELD(p); - PROC_LOCK_ASSERT(p, MA_NOTOWNED); - - /* - * The map we want... - */ - map = &p->p_vmspace->vm_map; + map = &vm->vm_map; /* * If we are writing, then we request vm_fault() to create a private @@ -363,9 +432,9 @@ fault_flags = writing ? VM_FAULT_DIRTY : VM_FAULT_NORMAL; if (writing) { - error = priv_check_cred(p->p_ucred, PRIV_PROC_MEM_WRITE); - if (error) - return (error); + error = priv_check(curthread, PRIV_PROC_MEM_WRITE); + if (error != 0) + goto out; } /* @@ -423,16 +492,34 @@ } while (error == 0 && uio->uio_resid > 0); +out: return (error); } -static ssize_t -proc_iop(struct thread *td, struct proc *p, vm_offset_t va, void *buf, +int +proc_rwmem(struct proc *p, struct uio *uio, int flags) +{ + struct vmspace *vm; + struct thread *td; + int error; + + td = curthread; + error = proc_vmspace_ref(td, p, flags, &vm); + if (error != 0) + return (error); + error = vmspace_rwmem(vm, uio); + proc_vmspace_unref(td, p, flags, vm); + return (error); +} + +ssize_t +vmspace_iop(struct thread *td, struct vmspace *vm, vm_offset_t va, void *buf, size_t len, enum uio_rw rw) { struct iovec iov; struct uio uio; ssize_t slen; + int error; MPASS(len < SSIZE_MAX); slen = (ssize_t)len; @@ -446,8 +533,8 @@ uio.uio_segflg = UIO_SYSSPACE; uio.uio_rw = rw; uio.uio_td = td; - proc_rwmem(p, &uio); - if (uio.uio_resid == slen) + error = vmspace_rwmem(vm, &uio); + if (error != 0 || uio.uio_resid == slen) return (-1); return (slen - uio.uio_resid); } @@ -457,7 +544,7 @@ size_t len) { - return (proc_iop(td, p, va, buf, len, UIO_READ)); + return (vmspace_iop(td, p->p_vmspace, va, buf, len, UIO_READ)); } ssize_t @@ -465,7 +552,7 @@ size_t len) { - return (proc_iop(td, p, va, buf, len, UIO_WRITE)); + return (vmspace_iop(td, p->p_vmspace, va, buf, len, UIO_WRITE)); } static int @@ -1458,7 +1545,7 @@ goto out; } PROC_UNLOCK(p); - error = proc_rwmem(p, &uio); + error = proc_rwmem(p, &uio, 0); piod->piod_len -= uio.uio_resid; PROC_LOCK(p); break; --- sys/sys/imgact.h.orig +++ sys/sys/imgact.h @@ -122,6 +122,10 @@ int exec_copyin_args(struct image_args *, const char *, char **, char **); int pre_execve(struct thread *td, struct vmspace **oldvmspace); void post_execve(struct thread *td, int error, struct vmspace *oldvmspace); +bool execve_block(struct thread *td, struct proc *p); +void execve_block_wait(struct thread *td, struct proc *p); +void execve_unblock(struct thread *td, struct proc *p); +void execve_block_pass(struct thread *td); #endif #endif /* !_SYS_IMGACT_H_ */ --- sys/sys/proc.h.orig +++ sys/sys/proc.h @@ -777,6 +777,7 @@ TAILQ_HEAD(, kq_timer_cb_data) p_kqtim_stop; /* (c) */ LIST_ENTRY(proc) p_jaillist; /* (d) Jail process linkage. */ + u_int p_execblock; /* (c) Blockers for execve. */ }; #define p_session p_pgrp->pg_session @@ -843,7 +844,7 @@ #define P_STATCHILD 0x08000000 /* Child process stopped or exited. */ #define P_INMEM 0x10000000 /* Loaded into memory, always set. */ #define P_UNUSED1 0x20000000 /* --available-- */ -#define P_UNUSED2 0x40000000 /* --available-- */ +#define P_INEXEC_WAIT 0x40000000 /* Waiters for P_INEXEC/p_execblock */ #define P_PPTRACE 0x80000000 /* PT_TRACEME by vforked child. */ #define P_STOPPED (P_STOPPED_SIG|P_STOPPED_SINGLE|P_STOPPED_TRACE) --- sys/sys/ptrace.h.orig +++ sys/sys/ptrace.h @@ -247,7 +247,20 @@ int proc_read_dbregs(struct thread *_td, struct dbreg *_dbreg); int proc_write_dbregs(struct thread *_td, struct dbreg *_dbreg); int proc_sstep(struct thread *_td); -int proc_rwmem(struct proc *_p, struct uio *_uio); + +#define PRVM_BLOCK_EXEC 0x00000001 +#define PRVM_CHECK_VISIBILITY 0x00000002 +#define PRVM_CHECK_DEBUG 0x00000004 + +#include +struct vmspace; +int proc_vmspace_ref(struct thread *_td, struct proc *_p, int _flags, + struct vmspace **_vmp); +void proc_vmspace_unref(struct thread *_td, struct proc *_p, int _flags, + struct vmspace *_vm); +ssize_t vmspace_iop(struct thread *td, struct vmspace *vm, vm_offset_t va, + void *buf, size_t len, enum uio_rw rw); +int proc_rwmem(struct proc *_p, struct uio *_uio, int _flags); ssize_t proc_readmem(struct thread *_td, struct proc *_p, vm_offset_t _va, void *_buf, size_t _len); ssize_t proc_writemem(struct thread *_td, struct proc *_p, vm_offset_t _va,