Index: compat/freebsd32/syscalls.master =================================================================== RCS file: /usr/cvs/src/sys/compat/freebsd32/syscalls.master,v retrieving revision 1.88 diff -u -r1.88 syscalls.master --- compat/freebsd32/syscalls.master 20 Dec 2006 19:36:03 -0000 1.88 +++ compat/freebsd32/syscalls.master 8 Mar 2007 22:52:04 -0000 @@ -781,3 +781,6 @@ 474 AUE_NULL NOPROTO { int sctp_generic_recvmsg(int sd, struct iovec *iov, int iovlen, \ struct sockaddr * from, __socklen_t *fromlenaddr, \ struct sctp_sndrcvinfo *sinfo, int *msg_flags); } +475 AUE_NULL NOPROTO { int shm_open(const char *path, int flags, \ + mode_t mode); } +476 AUE_NULL NOPROTO { int shm_unlink(const char *path); } Index: conf/files =================================================================== RCS file: /usr/cvs/src/sys/conf/files,v retrieving revision 1.1181 diff -u -r1.1181 files --- conf/files 5 Mar 2007 13:24:01 -0000 1.1181 +++ conf/files 8 Mar 2007 22:52:04 -0000 @@ -1449,6 +1449,7 @@ kern/uipc_mbuf2.c standard kern/uipc_mqueue.c optional p1003_1b_mqueue kern/uipc_sem.c optional p1003_1b_semaphores +kern/uipc_shm.c standard kern/uipc_sockbuf.c standard kern/uipc_socket.c standard kern/uipc_socket2.c standard Index: kern/kern_descrip.c =================================================================== RCS file: /usr/cvs/src/sys/kern/kern_descrip.c,v retrieving revision 1.305 diff -u -r1.305 kern_descrip.c --- kern/kern_descrip.c 5 Mar 2007 13:10:57 -0000 1.305 +++ kern/kern_descrip.c 8 Mar 2007 22:52:04 -0000 @@ -2520,6 +2520,8 @@ return ("crpt"); case DTYPE_MQUEUE: return ("mque"); + case DTYPE_SHM: + return ("shm"); default: return ("unkn"); } Index: kern/sys_generic.c =================================================================== RCS file: /usr/cvs/src/sys/kern/sys_generic.c,v retrieving revision 1.154 diff -u -r1.154 sys_generic.c --- kern/sys_generic.c 5 Mar 2007 13:10:57 -0000 1.154 +++ kern/sys_generic.c 8 Mar 2007 22:52:04 -0000 @@ -68,6 +68,9 @@ #ifdef KTRACE #include #endif + +#include + #include #include @@ -476,6 +479,60 @@ return (error); } +/* + * Truncate a file given a file descriptor. + * + * Can't use fget_write() here, since must return EINVAL and not EBADF if the + * descriptor isn't writable. + */ +int +kern_ftruncate(td, fd, length) + struct thread *td; + int fd; + off_t length; +{ + struct file *fp; + int error; + + AUDIT_ARG(fd, fd); + if (length < 0) + return (EINVAL); + error = fget(td, fd, &fp); + if (error) + return (error); + AUDIT_ARG(file, td->td_proc, fp); + if (!(fp->f_flag & FWRITE)) { + fdrop(fp, td); + return (EINVAL); + } + if (!(fp->f_ops->fo_flags & DFLAG_TRUNCATABLE)) + error = EINVAL; + else + error = fo_truncate(fp, length, td->td_ucred, td); + fdrop(fp, td); + return (error); +} + +int +ftruncate(td, uap) + struct thread *td; + struct ftruncate_args *uap; +{ + + return (kern_ftruncate(td, uap->fd, uap->length)); +} + +#if defined(COMPAT_43) +int +oftruncate(td, uap) + struct thread *td; + struct oftruncate_args *uap; +{ + + return (kern_ftruncate(td, uap->fd, uap->length)); +} +#endif /* COMPAT_43 */ + #ifndef _SYS_SYSPROTO_H_ struct ioctl_args { int fd; Index: kern/syscalls.master =================================================================== RCS file: /usr/cvs/src/sys/kern/syscalls.master,v retrieving revision 1.231 diff -u -r1.231 syscalls.master --- kern/syscalls.master 3 Nov 2006 15:23:14 -0000 1.231 +++ kern/syscalls.master 8 Mar 2007 22:52:04 -0000 @@ -835,5 +835,8 @@ 474 AUE_NULL STD { int sctp_generic_recvmsg(int sd, struct iovec *iov, int iovlen, \ struct sockaddr * from, __socklen_t *fromlenaddr, \ struct sctp_sndrcvinfo *sinfo, int *msg_flags); } +475 AUE_NULL STD { int shm_open(const char *path, int flags, \ + mode_t mode); } +476 AUE_NULL STD { int shm_unlink(const char *path); } ; Please copy any additions and changes to the following compatability tables: ; sys/compat/freebsd32/syscalls.master Index: kern/uipc_shm.c =================================================================== RCS file: kern/uipc_shm.c diff -N kern/uipc_shm.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ kern/uipc_shm.c 12 Mar 2007 20:39:51 -0000 @@ -0,0 +1,567 @@ +/*- + * Copyright (c) 2006 Robert N. M. Watson + * Copyright (c) 2007 John H. Baldwin + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Support for shared swap-backed anonymous memory objects via + * shm_open(2) and shm_unlink(2). While most of the implementation is + * here, vm_mmap.c contains mapping logic changes. + * + * TODO: + * + * (3) Resource limits? Does this need its own resource limits or are the + * existing limits in mmap(2) sufficient? + * + * (4) Partial page truncation. vnode_pager_setsize() will zero any parts + * of a partially mapped page as a result of ftruncate(2)/truncate(2). + * We can do the same (with the same pmap evil), but do we need to + * worry about the bits on disk if the page is swapped out or will the + * swapper zero the parts of a page that are invalid if the page is + * swapped back in for us? + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +struct shmfd { + size_t shm_size; + vm_object_t shm_object; + int shm_refs; + uid_t shm_uid; + gid_t shm_gid; + mode_t shm_mode; + + /* + * Values maintained solely to make this a better-behaved file + * descriptor for fstat() to run on. + */ + struct timespec shm_atime; + struct timespec shm_mtime; + struct timespec shm_ctime; +}; + +struct shm_mapping { + char *sm_path; + Fnv32_t sm_fnv; + struct shmfd *sm_shmfd; + LIST_ENTRY(shm_mapping) sm_link; +}; + +static MALLOC_DEFINE(M_SHMFD, "shmfd", "shared memory file descriptor"); +static LIST_HEAD(, shm_mapping) *shm_dictionary; +static struct sx shm_dict_lock; +static u_long shm_hash; + +#define SHM_HASH(fnv) (&shm_dictionary[(fnv) & shm_hash]) + +static int shm_access(struct shmfd *shmfd, struct ucred *ucred, int flags); +static struct shmfd *shm_alloc(struct ucred *ucred, mode_t mode); +static void shm_dict_init(void *arg); +static void shm_drop(struct shmfd *shmfd); +static struct shmfd *shm_hold(struct shmfd *shmfd); +static void shm_insert(char *path, Fnv32_t fnv, struct shmfd *shmfd); +static struct shmfd *shm_lookup(char *path, Fnv32_t fnv); +static int shm_remove(char *path, Fnv32_t fnv, struct ucred *ucred); +static void shm_trunc(struct shmfd *shmfd, off_t length); + +static fo_rdwr_t shm_read; +static fo_rdwr_t shm_write; +static fo_ioctl_t shm_ioctl; +static fo_poll_t shm_poll; +static fo_kqfilter_t shm_kqfilter; +static fo_stat_t shm_stat; +static fo_close_t shm_close; +static fo_truncate_t shm_truncate; + +/* File descriptor operations. */ +static struct fileops shm_ops = { + .fo_read = shm_read, + .fo_write = shm_write, + .fo_ioctl = shm_ioctl, + .fo_poll = shm_poll, + .fo_kqfilter = shm_kqfilter, + .fo_stat = shm_stat, + .fo_close = shm_close, + .fo_truncate = shm_truncate, + .fo_flags = DFLAG_PASSABLE | DFLAG_TRUNCATABLE +}; + +static int +shm_read(struct file *fp, struct uio *uio, struct ucred *active_cred, + int flags, struct thread *td) +{ + + return (EOPNOTSUPP); +} + +static int +shm_write(struct file *fp, struct uio *uio, struct ucred *active_cred, + int flags, struct thread *td) +{ + + return (EOPNOTSUPP); +} + +static int +shm_ioctl(struct file *fp, u_long com, void *data, + struct ucred *active_cred, struct thread *td) +{ + + return (EOPNOTSUPP); +} + +static int +shm_poll(struct file *fp, int events, struct ucred *active_cred, + struct thread *td) +{ + + return (EOPNOTSUPP); +} + +static int +shm_kqfilter(struct file *fp, struct knote *kn) +{ + + return (EOPNOTSUPP); +} + +static int +shm_stat(struct file *fp, struct stat *sb, struct ucred *active_cred, + struct thread *td) +{ + struct shmfd *shmfd; + + shmfd = fp->f_data; + + /* + * Attempt to return sanish values for fstat() on a memory file + * descriptor. + */ + bzero(sb, sizeof(*sb)); + sb->st_mode = S_IFREG | shmfd->shm_mode; /* XXX */ + sb->st_blksize = PAGE_SIZE; + sb->st_size = shmfd->shm_size; + sb->st_blocks = (sb->st_size + sb->st_blksize - 1) / sb->st_blksize; + sb->st_atimespec = shmfd->shm_atime; + sb->st_ctimespec = shmfd->shm_ctime; + sb->st_mtimespec = shmfd->shm_mtime; + sb->st_uid = shmfd->shm_uid; + sb->st_gid = shmfd->shm_gid; + + return (0); +} + +static int +shm_close(struct file *fp, struct thread *td) +{ + struct shmfd *shmfd; + + shmfd = fp->f_data; + fp->f_data = NULL; + shm_drop(shmfd); + + return (0); +} + +static void +shm_trunc(struct shmfd *shmfd, off_t length) +{ + vm_object_t object; + vm_page_t m; + vm_pindex_t nobjsize; + + object = shmfd->shm_object; + VM_OBJECT_LOCK(object); + if (length == shmfd->shm_size) { + VM_OBJECT_UNLOCK(object); + return; + } + nobjsize = OFF_TO_IDX(length + PAGE_MASK); + + /* Are we shrinking? If so, trim the end. */ + if (length < shmfd->shm_size) { + /* Toss in memory pages. */ + if (nobjsize < object->size) + vm_object_page_remove(object, nobjsize, object->size, + FALSE); + + /* Toss pages from swap. */ + if (object->type == OBJT_SWAP) + swap_pager_freespace(object, nobjsize, + object->size - nobjsize); + + /* + * If the last page is partially mapped, then zero out + * the garbage at the end of the page. See comments + * in vnode_page_setsize() for more details. + * + * XXXJHB: This handles in memory pages, but what about + * a page swapped out to disk? + */ + if ((length & PAGE_MASK) && + (m = vm_page_lookup(object, OFF_TO_IDX(length))) != NULL && + m->valid != 0) { + int base = (int)length & PAGE_MASK; + int size = PAGE_SIZE - base; + + pmap_zero_page_area(m, base, size); + vm_page_lock_queues(); + pmap_remove_all(m); + vm_page_set_validclean(m, base, size); + if (m->dirty != 0) + m->dirty = VM_PAGE_BITS_ALL; + vm_page_unlock_queues(); + } + } + shmfd->shm_size = length; + object->size = nobjsize; + VM_OBJECT_UNLOCK(object); +} + +static int +shm_truncate(struct file *fp, off_t length, struct ucred *active_cred, + struct thread *td) +{ + struct shmfd *shmfd; + + shmfd = fp->f_data; + shm_trunc(shmfd, length); + return (0); +} + +/* + * shmfd object management including creation and reference counting + * routines. + */ +static struct shmfd * +shm_alloc(struct ucred *ucred, mode_t mode) +{ + struct shmfd *shmfd; + + shmfd = malloc(sizeof(*shmfd), M_SHMFD, M_WAITOK | M_ZERO); + shmfd->shm_size = 0; + shmfd->shm_uid = ucred->cr_uid; + shmfd->shm_gid = ucred->cr_gid; + shmfd->shm_mode = mode; + shmfd->shm_object = vm_pager_allocate(OBJT_DEFAULT, NULL, + shmfd->shm_size, VM_PROT_DEFAULT, 0); + KASSERT(shmfd->shm_object != NULL, ("shm_create: vm_pager_allocate")); + vfs_timestamp(&shmfd->shm_ctime); + shmfd->shm_atime = shmfd->shm_mtime = shmfd->shm_ctime; + refcount_init(&shmfd->shm_refs, 1); + + return (shmfd); +} + +static struct shmfd * +shm_hold(struct shmfd *shmfd) +{ + + refcount_acquire(&shmfd->shm_refs); + return (shmfd); +} + +static void +shm_drop(struct shmfd *shmfd) +{ + + if (refcount_release(&shmfd->shm_refs)) { + vm_object_deallocate(shmfd->shm_object); + free(shmfd, M_SHMFD); + } +} + +/* + * Determine if the credentials have sufficient permissions for a + * specified combination of FREAD and FWRITE. + */ +static int +shm_access(struct shmfd *shmfd, struct ucred *ucred, int flags) +{ + int acc_mode; + + acc_mode = 0; + if (flags & FREAD) + acc_mode |= VREAD; + if (flags & FWRITE) + acc_mode |= VWRITE; + return (vaccess(VREG, shmfd->shm_mode, shmfd->shm_uid, shmfd->shm_gid, + acc_mode, ucred, NULL)); +} + +/* + * Dictionary management. We maintain an in-kernel dictionary to map + * paths to shmfd objects. We use the FNV hash on the path to store + * the mappings in a hash table. + */ +static void +shm_dict_init(void *arg) +{ + + sx_init(&shm_dict_lock, "shm dictionary"); + shm_dictionary = hashinit(1024, M_SHMFD, &shm_hash); +} +SYSINIT(shm_dict_init, SI_SUB_SYSV_SHM, SI_ORDER_ANY, shm_dict_init, NULL); + +static struct shmfd * +shm_lookup(char *path, Fnv32_t fnv) +{ + struct shm_mapping *map; + + LIST_FOREACH(map, SHM_HASH(fnv), sm_link) { + if (map->sm_fnv != fnv) + continue; + if (strcmp(map->sm_path, path) == 0) + return (map->sm_shmfd); + } + + return (NULL); +} + +static void +shm_insert(char *path, Fnv32_t fnv, struct shmfd *shmfd) +{ + struct shm_mapping *map; + + map = malloc(sizeof(struct shm_mapping), M_SHMFD, M_WAITOK); + map->sm_path = path; + map->sm_fnv = fnv; + map->sm_shmfd = shm_hold(shmfd); + LIST_INSERT_HEAD(SHM_HASH(fnv), map, sm_link); +} + +static int +shm_remove(char *path, Fnv32_t fnv, struct ucred *ucred) +{ + struct shm_mapping *map; + int error; + + LIST_FOREACH(map, SHM_HASH(fnv), sm_link) { + if (map->sm_fnv != fnv) + continue; + if (strcmp(map->sm_path, path) == 0) { + /* XXX: Should this require FWRITE? */ + error = shm_access(map->sm_shmfd, ucred, FREAD); + if (error) + return (error); + LIST_REMOVE(map, sm_link); + shm_drop(map->sm_shmfd); + free(map->sm_path, M_SHMFD); + free(map, M_SHMFD); + return (0); + } + } + + return (ENOENT); +} + +/* System calls. */ +int +shm_open(struct thread *td, struct shm_open_args *uap) +{ + struct filedesc *fdp; + struct shmfd *shmfd; + struct file *fp; + char *path; + Fnv32_t fnv; + mode_t cmode; + int fd, error; + + if ((uap->flags & O_ACCMODE) != O_RDONLY && + (uap->flags & O_ACCMODE) != O_RDWR) + return (EINVAL); + + if ((uap->flags & ~(O_ACCMODE | O_CREAT | O_EXCL | O_TRUNC)) != 0) + return (EINVAL); + + fdp = td->td_proc->p_fd; + cmode = (uap->mode & ~fdp->fd_cmask) & ACCESSPERMS; + + error = falloc(td, &fp, &fd); + if (error) + return (error); + + /* A NULL path pointer creates an anonymous object. */ + if (uap->path == NULL) { + /* A read-only anonymous object is pointless. */ + if ((uap->flags & O_ACCMODE) == O_RDONLY) { + fdclose(fdp, fp, fd, td); + fdrop(fp, td); + return (EINVAL); + } + shmfd = shm_alloc(td->td_ucred, cmode); + } else { + path = malloc(MAXPATHLEN + 1, M_SHMFD, M_WAITOK); + error = copyinstr(uap->path, path, MAXPATHLEN + 1, NULL); + + /* Require paths to start with a '/' character. */ + if (error == 0 && path[0] != '/') + error = EINVAL; + if (error) { + fdclose(fdp, fp, fd, td); + fdrop(fp, td); + free(path, M_SHMFD); + return (error); + } + + fnv = fnv_32_str(path, FNV1_32_INIT); + sx_xlock(&shm_dict_lock); + shmfd = shm_lookup(path, fnv); + if (shmfd == NULL) { + /* Object does not yet exist, create it if requested. */ + if (uap->flags & O_CREAT) { + shmfd = shm_alloc(td->td_ucred, cmode); + shm_insert(path, fnv, shmfd); + } else { + free(path, M_SHMFD); + error = ENOENT; + } + } else { + /* + * Object already exists, obtain a new + * reference if requested and permitted. + */ + free(path, M_SHMFD); + if ((uap->flags & (O_CREAT | O_EXCL)) == + (O_CREAT | O_EXCL)) + error = EEXIST; + else + error = shm_access(shmfd, td->td_ucred, + FFLAGS(uap->flags & O_ACCMODE)); + + if (error == 0) { + shm_hold(shmfd); + + /* + * Truncate the file back to zero + * length if O_TRUNC was specified and + * the object was opened with + * read/write. + */ + if ((uap->flags & (O_ACCMODE | O_TRUNC)) == + (O_RDWR | O_TRUNC)) + shm_trunc(shmfd, 0); + } + } + sx_xunlock(&shm_dict_lock); + + if (error) { + fdclose(fdp, fp, fd, td); + fdrop(fp, td); + return (error); + } + } + + FILE_LOCK(fp); + fp->f_flag = FFLAGS(uap->flags & O_ACCMODE); + fp->f_type = DTYPE_SHM; + fp->f_data = shmfd; + fp->f_ops = &shm_ops; + FILE_UNLOCK(fp); + + FILEDESC_LOCK_FAST(fdp); + if (fdp->fd_ofiles[fd] == fp) + fdp->fd_ofileflags[fd] |= UF_EXCLOSE; + FILEDESC_UNLOCK_FAST(fdp); + td->td_retval[0] = fd; + fdrop(fp, td); + + return (0); +} + +int +shm_unlink(struct thread *td, struct shm_unlink_args *uap) +{ + char *path; + Fnv32_t fnv; + int error; + + path = malloc(MAXPATHLEN + 1, M_TEMP, M_WAITOK); + error = copyinstr(uap->path, path, MAXPATHLEN + 1, NULL); + if (error) { + free(path, M_TEMP); + return (error); + } + + fnv = fnv_32_str(path, FNV1_32_INIT); + sx_xlock(&shm_dict_lock); + error = shm_remove(path, fnv, td->td_ucred); + sx_xunlock(&shm_dict_lock); + free(path, M_TEMP); + + return (error); +} + +/* + * mmap() helper to validate mmap() requests against shm object state + * and give mmap() the vm_object to use for the mapping. + */ +int +shm_mmap(struct shmfd *shmfd, vm_size_t objsize, vm_ooffset_t foff, + vm_object_t *obj) +{ + + /* + * XXXRW: This validation is probably insufficient, and subject to + * sign errors. It should be fixed. + */ + if (foff >= shmfd->shm_size || foff + objsize > shmfd->shm_size) + return (EINVAL); + + vfs_timestamp(&shmfd->shm_atime); + vm_object_reference(shmfd->shm_object); + *obj = shmfd->shm_object; + return (0); +} Index: kern/vfs_syscalls.c =================================================================== RCS file: /usr/cvs/src/sys/kern/vfs_syscalls.c,v retrieving revision 1.432 diff -u -r1.432 vfs_syscalls.c --- kern/vfs_syscalls.c 5 Mar 2007 13:10:58 -0000 1.432 +++ kern/vfs_syscalls.c 8 Mar 2007 22:52:04 -0000 @@ -3053,68 +3053,6 @@ return (error); } -/* - * Truncate a file given a file descriptor. - */ -#ifndef _SYS_SYSPROTO_H_ -struct ftruncate_args { - int fd; - int pad; - off_t length; -}; -#endif -int -ftruncate(td, uap) - struct thread *td; - register struct ftruncate_args /* { - int fd; - int pad; - off_t length; - } */ *uap; -{ - struct mount *mp; - struct vattr vattr; - struct vnode *vp; - struct file *fp; - int vfslocked; - int error; - - AUDIT_ARG(fd, uap->fd); - if (uap->length < 0) - return(EINVAL); - if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0) - return (error); - if ((fp->f_flag & FWRITE) == 0) { - fdrop(fp, td); - return (EINVAL); - } - vp = fp->f_vnode; - vfslocked = VFS_LOCK_GIANT(vp->v_mount); - if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) - goto drop; - VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE); - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); - AUDIT_ARG(vnode, vp, ARG_VNODE1); - if (vp->v_type == VDIR) - error = EISDIR; -#ifdef MAC - else if ((error = mac_check_vnode_write(td->td_ucred, fp->f_cred, - vp))) { - } -#endif - else if ((error = vn_writechk(vp)) == 0) { - VATTR_NULL(&vattr); - vattr.va_size = uap->length; - error = VOP_SETATTR(vp, &vattr, fp->f_cred, td); - } - VOP_UNLOCK(vp, 0, td); - vn_finished_write(mp); -drop: - VFS_UNLOCK_GIANT(vfslocked); - fdrop(fp, td); - return (error); -} - #if defined(COMPAT_43) /* * Truncate a file given its path name. @@ -3143,34 +3081,6 @@ nuap.length = uap->length; return (truncate(td, &nuap)); } - -/* - * Truncate a file given a file descriptor. - */ -#ifndef _SYS_SYSPROTO_H_ -struct oftruncate_args { - int fd; - long length; -}; -#endif -int -oftruncate(td, uap) - struct thread *td; - register struct oftruncate_args /* { - int fd; - long length; - } */ *uap; -{ - struct ftruncate_args /* { - int fd; - int pad; - off_t length; - } */ nuap; - - nuap.fd = uap->fd; - nuap.length = uap->length; - return (ftruncate(td, &nuap)); -} #endif /* COMPAT_43 */ /* Index: kern/vfs_vnops.c =================================================================== RCS file: /usr/cvs/src/sys/kern/vfs_vnops.c,v retrieving revision 1.248 diff -u -r1.248 vfs_vnops.c --- kern/vfs_vnops.c 12 Feb 2007 22:53:01 -0000 1.248 +++ kern/vfs_vnops.c 8 Mar 2007 22:52:04 -0000 @@ -71,6 +71,7 @@ static fo_kqfilter_t vn_kqfilter; static fo_stat_t vn_statfile; static fo_close_t vn_closefile; +static fo_truncate_t vn_truncate; struct fileops vnops = { .fo_read = vn_read, @@ -80,7 +81,8 @@ .fo_kqfilter = vn_kqfilter, .fo_stat = vn_statfile, .fo_close = vn_closefile, - .fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE + .fo_truncate = vn_truncate, + .fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE | DFLAG_TRUNCATABLE }; int @@ -875,6 +877,53 @@ } /* + * File table truncate routine. + */ +static int +vn_truncate(fp, length, active_cred, td) + struct file *fp; + off_t length; + struct ucred *active_cred; + struct thread *td; +{ + struct vattr vattr; + struct mount *mp; + struct vnode *vp; + int vfslocked; + int error; + + vp = fp->f_vnode; + vfslocked = VFS_LOCK_GIANT(vp->v_mount); + error = vn_start_write(vp, &mp, V_WAIT | PCATCH); + if (error) { + VFS_UNLOCK_GIANT(vfslocked); + return (error); + } + VOP_LEASE(vp, td, active_cred, LEASE_WRITE); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); + if (vp->v_type == VDIR) { + error = EISDIR; + goto out; + } +#ifdef MAC + error = mac_check_vnode_write(active_cred, fp->f_cred, vp); + if (error) + goto out; +#endif + error = vn_writechk(vp); + if (error == 0) { + VATTR_NULL(&vattr); + vattr.va_size = length; + error = VOP_SETATTR(vp, &vattr, fp->f_cred, td); + } +out: + VOP_UNLOCK(vp, 0, td); + vn_finished_write(mp); + VFS_UNLOCK_GIANT(vfslocked); + return (error); +} + +/* * Preparing to start a filesystem write operation. If the operation is * permitted, then we bump the count of operations in progress and * proceed. If a suspend request is in progress, we wait until the Index: sys/file.h =================================================================== RCS file: /usr/cvs/src/sys/sys/file.h,v retrieving revision 1.73 diff -u -r1.73 file.h --- sys/file.h 5 Jan 2007 19:59:46 -0000 1.73 +++ sys/file.h 8 Mar 2007 22:52:04 -0000 @@ -59,6 +59,7 @@ #define DTYPE_KQUEUE 5 /* event queue */ #define DTYPE_CRYPTO 6 /* crypto */ #define DTYPE_MQUEUE 7 /* posix message queue */ +#define DTYPE_SHM 8 /* swap-backed shared memory */ #ifdef _KERNEL @@ -77,6 +78,8 @@ typedef int fo_stat_t(struct file *fp, struct stat *sb, struct ucred *active_cred, struct thread *td); typedef int fo_close_t(struct file *fp, struct thread *td); +typedef int fo_truncate_t(struct file *fp, off_t length, + struct ucred *active_cred, struct thread *td); typedef int fo_flags_t; struct fileops { @@ -87,11 +90,13 @@ fo_kqfilter_t *fo_kqfilter; fo_stat_t *fo_stat; fo_close_t *fo_close; + fo_truncate_t *fo_truncate; fo_flags_t fo_flags; /* DFLAG_* below */ }; #define DFLAG_PASSABLE 0x01 /* may be passed via unix sockets. */ #define DFLAG_SEEKABLE 0x02 /* seekable / nonsequential */ +#define DFLAG_TRUNCATABLE 0x04 /* truncate supported */ /* * Kernel descriptor table. @@ -229,6 +234,7 @@ static __inline fo_kqfilter_t fo_kqfilter; static __inline fo_stat_t fo_stat; static __inline fo_close_t fo_close; +static __inline fo_truncate_t fo_truncate; static __inline int fo_read(fp, uio, active_cred, flags, td) @@ -306,6 +312,17 @@ return ((*fp->f_ops->fo_kqfilter)(fp, kn)); } +static __inline int +fo_truncate(fp, length, active_cred, td) + struct file *fp; + off_t length; + struct ucred *active_cred; + struct thread *td; +{ + + return ((*fp->f_ops->fo_truncate)(fp, length, active_cred, td)); +} + #endif /* _KERNEL */ #endif /* !SYS_FILE_H */ Index: sys/mman.h =================================================================== RCS file: /usr/cvs/src/sys/sys/mman.h,v retrieving revision 1.40 diff -u -r1.40 mman.h --- sys/mman.h 2 Apr 2005 12:33:31 -0000 1.40 +++ sys/mman.h 8 Mar 2007 22:52:04 -0000 @@ -168,7 +168,15 @@ #define _SIZE_T_DECLARED #endif -#ifndef _KERNEL +#ifdef _KERNEL +#include + +struct shmfd; + +int shm_mmap(struct shmfd *shmfd, vm_size_t objsize, vm_ooffset_t foff, + vm_object_t *obj); + +#else /* !_KERNEL */ __BEGIN_DECLS /* Index: sys/syscallsubr.h =================================================================== RCS file: /usr/cvs/src/sys/sys/syscallsubr.h,v retrieving revision 1.45 diff -u -r1.45 syscallsubr.h --- sys/syscallsubr.h 20 Dec 2006 19:26:30 -0000 1.45 +++ sys/syscallsubr.h 8 Mar 2007 22:52:04 -0000 @@ -81,6 +81,7 @@ int kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf); int kern_fstat(struct thread *td, int fd, struct stat *sbp); int kern_fstatfs(struct thread *td, int fd, struct statfs *buf); +int kern_ftruncate(struct thread *td, int fd, off_t length); int kern_futimes(struct thread *td, int fd, struct timeval *tptr, enum uio_seg tptrseg); int kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, Index: vm/vm_mmap.c =================================================================== RCS file: /usr/cvs/src/sys/vm/vm_mmap.c,v retrieving revision 1.208 diff -u -r1.208 vm_mmap.c --- vm/vm_mmap.c 6 Nov 2006 13:42:10 -0000 1.208 +++ vm/vm_mmap.c 8 Mar 2007 22:52:04 -0000 @@ -118,6 +118,8 @@ int *, struct vnode *, vm_ooffset_t, vm_object_t *); static int vm_mmap_cdev(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *, int *, struct cdev *, vm_ooffset_t, vm_object_t *); +static int vm_mmap_shm(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *, + int *, struct shmfd *, vm_ooffset_t, vm_object_t *); /* * MPSAFE @@ -300,12 +302,23 @@ pos = 0; } else { /* - * Mapping file, get fp for validation. Obtain vnode and make - * sure it is of appropriate type. - * don't let the descriptor disappear on us if we block + * Mapping file, get fp for validation and + * don't let the descriptor disappear on us if we block. */ if ((error = fget(td, uap->fd, &fp)) != 0) goto done; + if (fp->f_type == DTYPE_SHM) { + handle = fp->f_data; + handle_type = OBJT_SWAP; + maxprot = VM_PROT_NONE; + + /* FREAD should always be set. */ + if (fp->f_flag & FREAD) + maxprot |= VM_PROT_EXECUTE | VM_PROT_READ; + if (fp->f_flag & FWRITE) + maxprot |= VM_PROT_WRITE; + goto map; + } if (fp->f_type != DTYPE_VNODE) { error = ENODEV; goto done; @@ -360,6 +373,7 @@ handle = (void *)vp; handle_type = OBJT_VNODE; } +map: /* * Do not allow more then a certain number of vm_map_entry structures @@ -1277,6 +1291,49 @@ } /* + * vm_mmap_shm() + * + * MPSAFE + * + * Helper function for vm_mmap. Perform sanity check specific for mmap + * operations on shm file descriptors. + */ +int +vm_mmap_shm(struct thread *td, vm_size_t objsize, + vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp, + struct shmfd *shmfd, vm_ooffset_t foff, vm_object_t *objp) +{ + int error, flags; + + flags = *flagsp; + + /* + * shm_open() does not provide private mappings of any kind. + * + * XXX: actually, not sure about this now? + */ + if ((*maxprotp & VM_PROT_WRITE) == 0 && + (prot & PROT_WRITE) != 0) + return (EACCES); + if (flags & (MAP_PRIVATE|MAP_COPY)) + return (EINVAL); + /* + * Force shm mappings to be shared. + */ + flags |= MAP_SHARED; +#ifdef MAC_XXX + error = mac_check_shm_mmap(td->td_ucred, shmfd, prot); + if (error != 0) + return (error); +#endif + error = shm_mmap(shmfd, objsize, foff, objp); + if (error) + return (error); + *flagsp = flags; + return (0); +} + +/* * vm_mmap() * * MPSAFE @@ -1341,6 +1398,10 @@ error = vm_mmap_vnode(td, size, prot, &maxprot, &flags, handle, foff, &object); break; + case OBJT_SWAP: + error = vm_mmap_shm(td, size, prot, &maxprot, &flags, + handle, foff, &object); + break; case OBJT_DEFAULT: if (handle == NULL) { error = 0;