svn commit: r227573 - in stable/8: lib/libc/sys
sys/compat/freebsd32 sys/kern sys/sys
John Baldwin
jhb at FreeBSD.org
Wed Nov 16 18:33:18 UTC 2011
Author: jhb
Date: Wed Nov 16 18:33:17 2011
New Revision: 227573
URL: http://svn.freebsd.org/changeset/base/227573
Log:
MFC 220791,220793,220846,221836,226364:
Add the posix_fallocate(2) syscall. The default implementation in
vop_stdallocate() is filesystem agnostic and will run as slow as a
read/write loop in userspace; however, it serves to correctly
implement the functionality for filesystems that do not implement a
VOP_ALLOCATE.
Allow VOP_ALLOCATE to be iterative, and have kern_posix_fallocate(9)
drive looping and potentially yielding.
Reviewed by: mdf
Added:
stable/8/lib/libc/sys/posix_fallocate.2
- copied unchanged from r220791, head/lib/libc/sys/posix_fallocate.2
Modified:
stable/8/lib/libc/sys/Makefile.inc
stable/8/lib/libc/sys/Symbol.map
stable/8/sys/compat/freebsd32/freebsd32_misc.c
stable/8/sys/compat/freebsd32/syscalls.master
stable/8/sys/kern/syscalls.master
stable/8/sys/kern/vfs_default.c
stable/8/sys/kern/vfs_syscalls.c
stable/8/sys/kern/vnode_if.src
stable/8/sys/sys/fcntl.h
stable/8/sys/sys/param.h
stable/8/sys/sys/vnode.h
Directory Properties:
stable/8/lib/libc/ (props changed)
stable/8/lib/libc/stdtime/ (props changed)
stable/8/sys/ (props changed)
stable/8/sys/amd64/include/xen/ (props changed)
stable/8/sys/cddl/contrib/opensolaris/ (props changed)
stable/8/sys/contrib/dev/acpica/ (props changed)
stable/8/sys/contrib/pf/ (props changed)
Modified: stable/8/lib/libc/sys/Makefile.inc
==============================================================================
--- stable/8/lib/libc/sys/Makefile.inc Wed Nov 16 17:48:05 2011 (r227572)
+++ stable/8/lib/libc/sys/Makefile.inc Wed Nov 16 18:33:17 2011 (r227573)
@@ -86,7 +86,7 @@ MAN+= abort2.2 accept.2 access.2 acct.2
mq_setattr.2 \
msgctl.2 msgget.2 msgrcv.2 msgsnd.2 \
msync.2 munmap.2 nanosleep.2 nfssvc.2 ntp_adjtime.2 open.2 \
- pathconf.2 pipe.2 poll.2 posix_openpt.2 profil.2 \
+ pathconf.2 pipe.2 poll.2 posix_fallocate.2 posix_openpt.2 profil.2 \
pselect.2 ptrace.2 quotactl.2 \
read.2 readlink.2 reboot.2 recv.2 rename.2 revoke.2 rfork.2 rmdir.2 \
rtprio.2
Modified: stable/8/lib/libc/sys/Symbol.map
==============================================================================
--- stable/8/lib/libc/sys/Symbol.map Wed Nov 16 17:48:05 2011 (r227572)
+++ stable/8/lib/libc/sys/Symbol.map Wed Nov 16 18:33:17 2011 (r227573)
@@ -360,6 +360,10 @@ FBSD_1.1 {
unlinkat;
};
+FBSD_1.2 {
+ posix_fallocate;
+};
+
FBSDprivate_1.0 {
___acl_aclcheck_fd;
__sys___acl_aclcheck_fd;
Copied: stable/8/lib/libc/sys/posix_fallocate.2 (from r220791, head/lib/libc/sys/posix_fallocate.2)
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ stable/8/lib/libc/sys/posix_fallocate.2 Wed Nov 16 18:33:17 2011 (r227573, copy of r220791, head/lib/libc/sys/posix_fallocate.2)
@@ -0,0 +1,146 @@
+.\" Copyright (c) 1980, 1991, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)open.2 8.2 (Berkeley) 11/16/93
+.\" $FreeBSD$
+.\"
+.Dd April 13, 2011
+.Dt POSIX_FALLOCATE 2
+.Os
+.Sh NAME
+.Nm posix_fallocate
+.Nd pre-allocate storage for a range in a file
+.Sh LIBRARY
+.Lb libc
+.Sh SYNOPSIS
+.In fcntl.h
+.Ft int
+.Fn posix_fallocate "int fd" "off_t offset" "off_t len"
+.Sh DESCRIPTION
+Required storage for the range
+.Fa offset
+to
+.Fa offset +
+.Fa len
+in the file referenced by
+.Fa fd
+is guarateed to be allocated upon successful return.
+That is, if
+.Fn posix_fallocate
+returns successfully, subsequent writes to the specified file data
+will not fail due to lack of free space on the file system storage
+media.
+Any existing file data in the specified range is unmodified.
+If
+.Fa offset +
+.Fa len
+is beyond the current file size, then
+.Fn posix_fallocate
+will adjust the file size to
+.Fa offset +
+.Fa len .
+Otherwise, the file size will not be changed.
+.Pp
+Space allocated by
+.Fn posix_fallocate
+will be freed by a successful call to
+.Xr creat 2
+or
+.Xr open 2
+that truncates the size of the file.
+Space allocated via
+.Fn posix_fallocate
+may be freed by a successful call to
+.Xr ftruncate 2
+that reduces the file size to a size smaller than
+.Fa offset +
+.Fa len .
+.Pp
+.Sh RETURN VALUES
+If successful,
+.Fn posix_fallocate
+returns zero.
+It returns -1 on failure, and sets
+.Va errno
+to indicate the error.
+.Sh ERRORS
+Possible failure conditions:
+.Bl -tag -width Er
+.It Bq Er EBADF
+The
+.Fa fd
+argument is not a valid file descriptor.
+.It Bq Er EBADF
+The
+.Fa fd
+argument references a file that was opened without write permission.
+.It Bq Er EFBIG
+The value of
+.Fa offset +
+.Fa len
+is greater than the maximum file size.
+.It Bq Er EINTR
+A signal was caught during execution.
+.It Bq Er EINVAL
+The
+.Fa len
+argument was zero or the
+.Fa offset
+argument was less than zero.
+.It Bq Er EIO
+An I/O error occurred while reading from or writing to a file system.
+.It Bq Er ENODEV
+The
+.Fa fd
+argument does not refer to a regular file.
+.It Bq Er ENOSPC
+There is insufficient free space remaining on the file system storage
+media.
+.It Bq Er ESPIPE
+The
+.Fa fd
+argument is associated with a pipe or FIFO.
+.El
+.Sh SEE ALSO
+.Xr creat 2 ,
+.Xr ftruncate 2 ,
+.Xr open 2 ,
+.Xr unlink 2
+.Sh STANDARDS
+The
+.Fn posix_fallocate
+system call conforms to
+.St -p1003.1-2004 .
+.Sh HISTORY
+The
+.Fn posix_fallocate
+function appeared in
+.Fx 9.0 .
+.Sh AUTHORS
+.Fn posix_fallocate
+and this manual page were initially written by
+.An Matthew Fleming Aq mdf at FreeBSD.org .
Modified: stable/8/sys/compat/freebsd32/freebsd32_misc.c
==============================================================================
--- stable/8/sys/compat/freebsd32/freebsd32_misc.c Wed Nov 16 17:48:05 2011 (r227572)
+++ stable/8/sys/compat/freebsd32/freebsd32_misc.c Wed Nov 16 18:33:17 2011 (r227573)
@@ -2672,3 +2672,15 @@ freebsd32_kldstat(struct thread *td, str
bcopy(&stat.pathname[0], &stat32.pathname[0], sizeof(stat.pathname));
return (copyout(&stat32, uap->stat, version));
}
+
+int
+freebsd32_posix_fallocate(struct thread *td,
+ struct freebsd32_posix_fallocate_args *uap)
+{
+ struct posix_fallocate_args ap;
+
+ ap.fd = uap->fd;
+ ap.offset = PAIR32TO64(off_t, uap->offset);
+ ap.len = PAIR32TO64(off_t, uap->len);
+ return (posix_fallocate(td, &ap));
+}
Modified: stable/8/sys/compat/freebsd32/syscalls.master
==============================================================================
--- stable/8/sys/compat/freebsd32/syscalls.master Wed Nov 16 17:48:05 2011 (r227572)
+++ stable/8/sys/compat/freebsd32/syscalls.master Wed Nov 16 18:33:17 2011 (r227573)
@@ -963,3 +963,14 @@
fd_set *ou, fd_set *ex, \
const struct timespec32 *ts, \
const sigset_t *sm); }
+523 AUE_NULL UNIMPL getloginclass
+524 AUE_NULL UNIMPL setloginclass
+525 AUE_NULL UNIMPL rctl_get_racct
+526 AUE_NULL UNIMPL rctl_get_rules
+527 AUE_NULL UNIMPL rctl_get_limits
+528 AUE_NULL UNIMPL rctl_add_rule
+529 AUE_NULL UNIMPL rctl_remove_rule
+530 AUE_NULL STD { int freebsd32_posix_fallocate(int fd,\
+ uint32_t offset1, uint32_t offset2,\
+ uint32_t len1, uint32_t len2); }
+531 AUE_NULL UNIMPL posix_fadvise
Modified: stable/8/sys/kern/syscalls.master
==============================================================================
--- stable/8/sys/kern/syscalls.master Wed Nov 16 17:48:05 2011 (r227572)
+++ stable/8/sys/kern/syscalls.master Wed Nov 16 18:33:17 2011 (r227573)
@@ -927,5 +927,15 @@
fd_set *ou, fd_set *ex, \
const struct timespec *ts, \
const sigset_t *sm); }
+523 AUE_NULL UNIMPL getloginclass
+524 AUE_NULL UNIMPL setloginclass
+525 AUE_NULL UNIMPL rctl_get_racct
+526 AUE_NULL UNIMPL rctl_get_rules
+527 AUE_NULL UNIMPL rctl_get_limits
+528 AUE_NULL UNIMPL rctl_add_rule
+529 AUE_NULL UNIMPL rctl_remove_rule
+530 AUE_NULL STD { int posix_fallocate(int fd, \
+ off_t offset, off_t len); }
+531 AUE_NULL UNIMPL posix_fadvise
; Please copy any additions and changes to the following compatability tables:
; sys/compat/freebsd32/syscalls.master
Modified: stable/8/sys/kern/vfs_default.c
==============================================================================
--- stable/8/sys/kern/vfs_default.c Wed Nov 16 17:48:05 2011 (r227572)
+++ stable/8/sys/kern/vfs_default.c Wed Nov 16 18:33:17 2011 (r227573)
@@ -98,6 +98,7 @@ struct vop_vector default_vnodeops = {
.vop_accessx = vop_stdaccessx,
.vop_advlock = vop_stdadvlock,
.vop_advlockasync = vop_stdadvlockasync,
+ .vop_allocate = vop_stdallocate,
.vop_bmap = vop_stdbmap,
.vop_close = VOP_NULL,
.vop_fsync = VOP_NULL,
@@ -844,6 +845,134 @@ out:
return (error);
}
+int
+vop_stdallocate(struct vop_allocate_args *ap)
+{
+#ifdef __notyet__
+ struct statfs sfs;
+#endif
+ struct iovec aiov;
+ struct vattr vattr, *vap;
+ struct uio auio;
+ off_t fsize, len, cur, offset;
+ uint8_t *buf;
+ struct thread *td;
+ struct vnode *vp;
+ size_t iosize;
+ int error;
+
+ buf = NULL;
+ error = 0;
+ td = curthread;
+ vap = &vattr;
+ vp = ap->a_vp;
+ len = *ap->a_len;
+ offset = *ap->a_offset;
+
+ error = VOP_GETATTR(vp, vap, td->td_ucred);
+ if (error != 0)
+ goto out;
+ fsize = vap->va_size;
+ iosize = vap->va_blocksize;
+ if (iosize == 0)
+ iosize = BLKDEV_IOSIZE;
+ if (iosize > MAXPHYS)
+ iosize = MAXPHYS;
+ buf = malloc(iosize, M_TEMP, M_WAITOK);
+
+#ifdef __notyet__
+ /*
+ * Check if the filesystem sets f_maxfilesize; if not use
+ * VOP_SETATTR to perform the check.
+ */
+ error = VFS_STATFS(vp->v_mount, &sfs, td);
+ if (error != 0)
+ goto out;
+ if (sfs.f_maxfilesize) {
+ if (offset > sfs.f_maxfilesize || len > sfs.f_maxfilesize ||
+ offset + len > sfs.f_maxfilesize) {
+ error = EFBIG;
+ goto out;
+ }
+ } else
+#endif
+ if (offset + len > vap->va_size) {
+ /*
+ * Test offset + len against the filesystem's maxfilesize.
+ */
+ VATTR_NULL(vap);
+ vap->va_size = offset + len;
+ error = VOP_SETATTR(vp, vap, td->td_ucred);
+ if (error != 0)
+ goto out;
+ VATTR_NULL(vap);
+ vap->va_size = fsize;
+ error = VOP_SETATTR(vp, vap, td->td_ucred);
+ if (error != 0)
+ goto out;
+ }
+
+ for (;;) {
+ /*
+ * Read and write back anything below the nominal file
+ * size. There's currently no way outside the filesystem
+ * to know whether this area is sparse or not.
+ */
+ cur = iosize;
+ if ((offset % iosize) != 0)
+ cur -= (offset % iosize);
+ if (cur > len)
+ cur = len;
+ if (offset < fsize) {
+ aiov.iov_base = buf;
+ aiov.iov_len = cur;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_offset = offset;
+ auio.uio_resid = cur;
+ auio.uio_segflg = UIO_SYSSPACE;
+ auio.uio_rw = UIO_READ;
+ auio.uio_td = td;
+ error = VOP_READ(vp, &auio, 0, td->td_ucred);
+ if (error != 0)
+ break;
+ if (auio.uio_resid > 0) {
+ bzero(buf + cur - auio.uio_resid,
+ auio.uio_resid);
+ }
+ } else {
+ bzero(buf, cur);
+ }
+
+ aiov.iov_base = buf;
+ aiov.iov_len = cur;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_offset = offset;
+ auio.uio_resid = cur;
+ auio.uio_segflg = UIO_SYSSPACE;
+ auio.uio_rw = UIO_WRITE;
+ auio.uio_td = td;
+
+ error = VOP_WRITE(vp, &auio, 0, td->td_ucred);
+ if (error != 0)
+ break;
+
+ len -= cur;
+ offset += cur;
+ if (len == 0)
+ break;
+ if (should_yield())
+ break;
+ }
+
+ out:
+ *ap->a_len = len;
+ *ap->a_offset = offset;
+ free(buf, M_TEMP);
+ return (error);
+}
+
/*
* vfs default ops
* used to fill the vfs function table to get reasonable default return values.
Modified: stable/8/sys/kern/vfs_syscalls.c
==============================================================================
--- stable/8/sys/kern/vfs_syscalls.c Wed Nov 16 17:48:05 2011 (r227572)
+++ stable/8/sys/kern/vfs_syscalls.c Wed Nov 16 18:33:17 2011 (r227573)
@@ -4654,3 +4654,98 @@ out:
VFS_UNLOCK_GIANT(vfslocked);
return (error);
}
+
+static int
+kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len)
+{
+ struct file *fp;
+ struct mount *mp;
+ struct vnode *vp;
+ off_t olen, ooffset;
+ int error, vfslocked;
+
+ fp = NULL;
+ vfslocked = 0;
+ error = fget(td, fd, &fp);
+ if (error != 0)
+ goto out;
+
+ switch (fp->f_type) {
+ case DTYPE_VNODE:
+ break;
+ case DTYPE_PIPE:
+ case DTYPE_FIFO:
+ error = ESPIPE;
+ goto out;
+ default:
+ error = ENODEV;
+ goto out;
+ }
+ if ((fp->f_flag & FWRITE) == 0) {
+ error = EBADF;
+ goto out;
+ }
+ vp = fp->f_vnode;
+ if (vp->v_type != VREG) {
+ error = ENODEV;
+ goto out;
+ }
+ if (offset < 0 || len <= 0) {
+ error = EINVAL;
+ goto out;
+ }
+ /* Check for wrap. */
+ if (offset > OFF_MAX - len) {
+ error = EFBIG;
+ goto out;
+ }
+
+ /* Allocating blocks may take a long time, so iterate. */
+ for (;;) {
+ olen = len;
+ ooffset = offset;
+
+ bwillwrite();
+ vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+ mp = NULL;
+ error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
+ if (error != 0) {
+ VFS_UNLOCK_GIANT(vfslocked);
+ break;
+ }
+ error = vn_lock(vp, LK_EXCLUSIVE);
+ if (error != 0) {
+ vn_finished_write(mp);
+ VFS_UNLOCK_GIANT(vfslocked);
+ break;
+ }
+#ifdef MAC
+ error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp);
+ if (error == 0)
+#endif
+ error = VOP_ALLOCATE(vp, &offset, &len);
+ VOP_UNLOCK(vp, 0);
+ vn_finished_write(mp);
+ VFS_UNLOCK_GIANT(vfslocked);
+
+ if (olen + ooffset != offset + len) {
+ panic("offset + len changed from %jx/%jx to %jx/%jx",
+ ooffset, olen, offset, len);
+ }
+ if (error != 0 || len == 0)
+ break;
+ KASSERT(olen > len, ("Iteration did not make progress?"));
+ maybe_yield();
+ }
+ out:
+ if (fp != NULL)
+ fdrop(fp, td);
+ return (error);
+}
+
+int
+posix_fallocate(struct thread *td, struct posix_fallocate_args *uap)
+{
+
+ return (kern_posix_fallocate(td, uap->fd, uap->offset, uap->len));
+}
Modified: stable/8/sys/kern/vnode_if.src
==============================================================================
--- stable/8/sys/kern/vnode_if.src Wed Nov 16 17:48:05 2011 (r227572)
+++ stable/8/sys/kern/vnode_if.src Wed Nov 16 18:33:17 2011 (r227573)
@@ -601,6 +601,7 @@ vop_vptofh {
IN struct fid *fhp;
};
+
%% vptocnp vp L L L
%% vptocnp vpp - U -
@@ -611,3 +612,12 @@ vop_vptocnp {
INOUT char *buf;
INOUT int *buflen;
};
+
+
+%% allocate vp E E E
+
+vop_allocate {
+ IN struct vnode *vp;
+ INOUT off_t *offset;
+ INOUT off_t *len;
+};
Modified: stable/8/sys/sys/fcntl.h
==============================================================================
--- stable/8/sys/sys/fcntl.h Wed Nov 16 17:48:05 2011 (r227572)
+++ stable/8/sys/sys/fcntl.h Wed Nov 16 18:33:17 2011 (r227573)
@@ -278,7 +278,7 @@ struct oflock {
#endif
/*
- * XXX missing posix_fadvise() and posix_fallocate(), and POSIX_FADV_* macros.
+ * XXX missing posix_fadvise() and POSIX_FADV_* macros.
*/
#ifndef _KERNEL
@@ -289,6 +289,9 @@ int fcntl(int, int, ...);
#if __BSD_VISIBLE || __POSIX_VISIBLE >= 200809
int openat(int, const char *, int, ...);
#endif
+#if __BSD_VISIBLE || __POSIX_VISIBLE >= 200112
+int posix_fallocate(int, off_t, off_t);
+#endif
#if __BSD_VISIBLE
int flock(int, int);
#endif
Modified: stable/8/sys/sys/param.h
==============================================================================
--- stable/8/sys/sys/param.h Wed Nov 16 17:48:05 2011 (r227572)
+++ stable/8/sys/sys/param.h Wed Nov 16 18:33:17 2011 (r227573)
@@ -58,7 +58,7 @@
* in the range 5 to 9.
*/
#undef __FreeBSD_version
-#define __FreeBSD_version 802513 /* Master, propagated to newvers */
+#define __FreeBSD_version 802514 /* Master, propagated to newvers */
#ifdef _KERNEL
#define P_OSREL_SIGWAIT 700000
Modified: stable/8/sys/sys/vnode.h
==============================================================================
--- stable/8/sys/sys/vnode.h Wed Nov 16 17:48:05 2011 (r227572)
+++ stable/8/sys/sys/vnode.h Wed Nov 16 18:33:17 2011 (r227573)
@@ -688,6 +688,7 @@ int vop_stdaccess(struct vop_access_args
int vop_stdaccessx(struct vop_accessx_args *ap);
int vop_stdadvlock(struct vop_advlock_args *ap);
int vop_stdadvlockasync(struct vop_advlockasync_args *ap);
+int vop_stdallocate(struct vop_allocate_args *ap);
int vop_stdpathconf(struct vop_pathconf_args *);
int vop_stdpoll(struct vop_poll_args *);
int vop_stdvptocnp(struct vop_vptocnp_args *ap);
More information about the svn-src-stable-8
mailing list