svn commit: r227573 - in stable/8: lib/libc/sys sys/compat/freebsd32 sys/kern sys/sys

John Baldwin jhb at FreeBSD.org
Wed Nov 16 18:33:18 UTC 2011


Author: jhb
Date: Wed Nov 16 18:33:17 2011
New Revision: 227573
URL: http://svn.freebsd.org/changeset/base/227573

Log:
  MFC 220791,220793,220846,221836,226364:
  Add the posix_fallocate(2) syscall.  The default implementation in
  vop_stdallocate() is filesystem agnostic and will run as slow as a
  read/write loop in userspace; however, it serves to correctly
  implement the functionality for filesystems that do not implement a
  VOP_ALLOCATE.
  
  Allow VOP_ALLOCATE to be iterative, and have kern_posix_fallocate(9)
  drive looping and potentially yielding.
  
  Reviewed by:	mdf

Added:
  stable/8/lib/libc/sys/posix_fallocate.2
     - copied unchanged from r220791, head/lib/libc/sys/posix_fallocate.2
Modified:
  stable/8/lib/libc/sys/Makefile.inc
  stable/8/lib/libc/sys/Symbol.map
  stable/8/sys/compat/freebsd32/freebsd32_misc.c
  stable/8/sys/compat/freebsd32/syscalls.master
  stable/8/sys/kern/syscalls.master
  stable/8/sys/kern/vfs_default.c
  stable/8/sys/kern/vfs_syscalls.c
  stable/8/sys/kern/vnode_if.src
  stable/8/sys/sys/fcntl.h
  stable/8/sys/sys/param.h
  stable/8/sys/sys/vnode.h
Directory Properties:
  stable/8/lib/libc/   (props changed)
  stable/8/lib/libc/stdtime/   (props changed)
  stable/8/sys/   (props changed)
  stable/8/sys/amd64/include/xen/   (props changed)
  stable/8/sys/cddl/contrib/opensolaris/   (props changed)
  stable/8/sys/contrib/dev/acpica/   (props changed)
  stable/8/sys/contrib/pf/   (props changed)

Modified: stable/8/lib/libc/sys/Makefile.inc
==============================================================================
--- stable/8/lib/libc/sys/Makefile.inc	Wed Nov 16 17:48:05 2011	(r227572)
+++ stable/8/lib/libc/sys/Makefile.inc	Wed Nov 16 18:33:17 2011	(r227573)
@@ -86,7 +86,7 @@ MAN+=	abort2.2 accept.2 access.2 acct.2 
 	mq_setattr.2 \
 	msgctl.2 msgget.2 msgrcv.2 msgsnd.2 \
 	msync.2 munmap.2 nanosleep.2 nfssvc.2 ntp_adjtime.2 open.2 \
-	pathconf.2 pipe.2 poll.2 posix_openpt.2 profil.2 \
+	pathconf.2 pipe.2 poll.2 posix_fallocate.2 posix_openpt.2 profil.2 \
 	pselect.2 ptrace.2 quotactl.2 \
 	read.2 readlink.2 reboot.2 recv.2 rename.2 revoke.2 rfork.2 rmdir.2 \
 	rtprio.2

Modified: stable/8/lib/libc/sys/Symbol.map
==============================================================================
--- stable/8/lib/libc/sys/Symbol.map	Wed Nov 16 17:48:05 2011	(r227572)
+++ stable/8/lib/libc/sys/Symbol.map	Wed Nov 16 18:33:17 2011	(r227573)
@@ -360,6 +360,10 @@ FBSD_1.1 {
 	unlinkat;
 };
 
+FBSD_1.2 {
+	posix_fallocate;
+};
+
 FBSDprivate_1.0 {
 	___acl_aclcheck_fd;
 	__sys___acl_aclcheck_fd;

Copied: stable/8/lib/libc/sys/posix_fallocate.2 (from r220791, head/lib/libc/sys/posix_fallocate.2)
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ stable/8/lib/libc/sys/posix_fallocate.2	Wed Nov 16 18:33:17 2011	(r227573, copy of r220791, head/lib/libc/sys/posix_fallocate.2)
@@ -0,0 +1,146 @@
+.\" Copyright (c) 1980, 1991, 1993
+.\"	The Regents of the University of California.  All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\"    may be used to endorse or promote products derived from this software
+.\"    without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\"     @(#)open.2	8.2 (Berkeley) 11/16/93
+.\" $FreeBSD$
+.\"
+.Dd April 13, 2011
+.Dt POSIX_FALLOCATE 2
+.Os
+.Sh NAME
+.Nm posix_fallocate
+.Nd pre-allocate storage for a range in a file
+.Sh LIBRARY
+.Lb libc
+.Sh SYNOPSIS
+.In fcntl.h
+.Ft int
+.Fn posix_fallocate "int fd" "off_t offset" "off_t len"
+.Sh DESCRIPTION
+Required storage for the range
+.Fa offset
+to
+.Fa offset +
+.Fa len
+in the file referenced by
+.Fa fd
+is guarateed to be allocated upon successful return.
+That is, if
+.Fn posix_fallocate
+returns successfully, subsequent writes to the specified file data
+will not fail due to lack of free space on the file system storage
+media.
+Any existing file data in the specified range is unmodified.
+If
+.Fa offset +
+.Fa len
+is beyond the current file size, then
+.Fn posix_fallocate
+will adjust the file size to
+.Fa offset +
+.Fa len .
+Otherwise, the file size will not be changed.
+.Pp
+Space allocated by
+.Fn posix_fallocate
+will be freed by a successful call to
+.Xr creat 2
+or
+.Xr open 2
+that truncates the size of the file.
+Space allocated via
+.Fn posix_fallocate
+may be freed by a successful call to
+.Xr ftruncate 2
+that reduces the file size to a size smaller than
+.Fa offset +
+.Fa len .
+.Pp
+.Sh RETURN VALUES
+If successful,
+.Fn posix_fallocate
+returns zero.
+It returns -1 on failure, and sets
+.Va errno
+to indicate the error.
+.Sh ERRORS
+Possible failure conditions:
+.Bl -tag -width Er
+.It Bq Er EBADF
+The
+.Fa fd
+argument is not a valid file descriptor.
+.It Bq Er EBADF
+The
+.Fa fd
+argument references a file that was opened without write permission.
+.It Bq Er EFBIG
+The value of
+.Fa offset +
+.Fa len
+is greater than the maximum file size.
+.It Bq Er EINTR
+A signal was caught during execution.
+.It Bq Er EINVAL
+The
+.Fa len
+argument was zero or the
+.Fa offset
+argument was less than zero.
+.It Bq Er EIO
+An I/O error occurred while reading from or writing to a file system.
+.It Bq Er ENODEV
+The
+.Fa fd
+argument does not refer to a regular file.
+.It Bq Er ENOSPC
+There is insufficient free space remaining on the file system storage
+media.
+.It Bq Er ESPIPE
+The
+.Fa fd
+argument is associated with a pipe or FIFO.
+.El
+.Sh SEE ALSO
+.Xr creat 2 ,
+.Xr ftruncate 2 ,
+.Xr open 2 ,
+.Xr unlink 2
+.Sh STANDARDS
+The
+.Fn posix_fallocate
+system call conforms to
+.St -p1003.1-2004 .
+.Sh HISTORY
+The
+.Fn posix_fallocate
+function appeared in
+.Fx 9.0 .
+.Sh AUTHORS
+.Fn posix_fallocate
+and this manual page were initially written by
+.An Matthew Fleming Aq mdf at FreeBSD.org .

Modified: stable/8/sys/compat/freebsd32/freebsd32_misc.c
==============================================================================
--- stable/8/sys/compat/freebsd32/freebsd32_misc.c	Wed Nov 16 17:48:05 2011	(r227572)
+++ stable/8/sys/compat/freebsd32/freebsd32_misc.c	Wed Nov 16 18:33:17 2011	(r227573)
@@ -2672,3 +2672,15 @@ freebsd32_kldstat(struct thread *td, str
 	bcopy(&stat.pathname[0], &stat32.pathname[0], sizeof(stat.pathname));
 	return (copyout(&stat32, uap->stat, version));
 }
+
+int
+freebsd32_posix_fallocate(struct thread *td,
+    struct freebsd32_posix_fallocate_args *uap)
+{
+	struct posix_fallocate_args ap;
+
+	ap.fd = uap->fd;
+	ap.offset = PAIR32TO64(off_t, uap->offset);
+	ap.len = PAIR32TO64(off_t, uap->len);
+	return (posix_fallocate(td, &ap));
+}

Modified: stable/8/sys/compat/freebsd32/syscalls.master
==============================================================================
--- stable/8/sys/compat/freebsd32/syscalls.master	Wed Nov 16 17:48:05 2011	(r227572)
+++ stable/8/sys/compat/freebsd32/syscalls.master	Wed Nov 16 18:33:17 2011	(r227573)
@@ -963,3 +963,14 @@
 				    fd_set *ou, fd_set *ex, \
 				    const struct timespec32 *ts, \
 				    const sigset_t *sm); }
+523	AUE_NULL	UNIMPL	getloginclass
+524	AUE_NULL	UNIMPL	setloginclass
+525	AUE_NULL	UNIMPL	rctl_get_racct
+526	AUE_NULL	UNIMPL	rctl_get_rules
+527	AUE_NULL	UNIMPL	rctl_get_limits
+528	AUE_NULL	UNIMPL	rctl_add_rule
+529	AUE_NULL	UNIMPL	rctl_remove_rule
+530	AUE_NULL	STD	{ int freebsd32_posix_fallocate(int fd,\
+				    uint32_t offset1, uint32_t offset2,\
+				    uint32_t len1, uint32_t len2); }
+531	AUE_NULL	UNIMPL	posix_fadvise

Modified: stable/8/sys/kern/syscalls.master
==============================================================================
--- stable/8/sys/kern/syscalls.master	Wed Nov 16 17:48:05 2011	(r227572)
+++ stable/8/sys/kern/syscalls.master	Wed Nov 16 18:33:17 2011	(r227573)
@@ -927,5 +927,15 @@
 				    fd_set *ou, fd_set *ex, \
 				    const struct timespec *ts, \
 				    const sigset_t *sm); }
+523	AUE_NULL	UNIMPL	getloginclass
+524	AUE_NULL	UNIMPL	setloginclass
+525	AUE_NULL	UNIMPL	rctl_get_racct
+526	AUE_NULL	UNIMPL	rctl_get_rules
+527	AUE_NULL	UNIMPL	rctl_get_limits
+528	AUE_NULL	UNIMPL	rctl_add_rule
+529	AUE_NULL	UNIMPL	rctl_remove_rule
+530	AUE_NULL	STD	{ int posix_fallocate(int fd, \
+				    off_t offset, off_t len); }
+531	AUE_NULL	UNIMPL	posix_fadvise
 ; Please copy any additions and changes to the following compatability tables:
 ; sys/compat/freebsd32/syscalls.master

Modified: stable/8/sys/kern/vfs_default.c
==============================================================================
--- stable/8/sys/kern/vfs_default.c	Wed Nov 16 17:48:05 2011	(r227572)
+++ stable/8/sys/kern/vfs_default.c	Wed Nov 16 18:33:17 2011	(r227573)
@@ -98,6 +98,7 @@ struct vop_vector default_vnodeops = {
 	.vop_accessx =		vop_stdaccessx,
 	.vop_advlock =		vop_stdadvlock,
 	.vop_advlockasync =	vop_stdadvlockasync,
+	.vop_allocate =		vop_stdallocate,
 	.vop_bmap =		vop_stdbmap,
 	.vop_close =		VOP_NULL,
 	.vop_fsync =		VOP_NULL,
@@ -844,6 +845,134 @@ out:
 	return (error);
 }
 
+int
+vop_stdallocate(struct vop_allocate_args *ap)
+{
+#ifdef __notyet__
+	struct statfs sfs;
+#endif
+	struct iovec aiov;
+	struct vattr vattr, *vap;
+	struct uio auio;
+	off_t fsize, len, cur, offset;
+	uint8_t *buf;
+	struct thread *td;
+	struct vnode *vp;
+	size_t iosize;
+	int error;
+
+	buf = NULL;
+	error = 0;
+	td = curthread;
+	vap = &vattr;
+	vp = ap->a_vp;
+	len = *ap->a_len;
+	offset = *ap->a_offset;
+
+	error = VOP_GETATTR(vp, vap, td->td_ucred);
+	if (error != 0)
+		goto out;
+	fsize = vap->va_size;
+	iosize = vap->va_blocksize;
+	if (iosize == 0)
+		iosize = BLKDEV_IOSIZE;
+	if (iosize > MAXPHYS)
+		iosize = MAXPHYS;
+	buf = malloc(iosize, M_TEMP, M_WAITOK);
+
+#ifdef __notyet__
+	/*
+	 * Check if the filesystem sets f_maxfilesize; if not use
+	 * VOP_SETATTR to perform the check.
+	 */
+	error = VFS_STATFS(vp->v_mount, &sfs, td);
+	if (error != 0)
+		goto out;
+	if (sfs.f_maxfilesize) {
+		if (offset > sfs.f_maxfilesize || len > sfs.f_maxfilesize ||
+		    offset + len > sfs.f_maxfilesize) {
+			error = EFBIG;
+			goto out;
+		}
+	} else
+#endif
+	if (offset + len > vap->va_size) {
+		/*
+		 * Test offset + len against the filesystem's maxfilesize.
+		 */
+		VATTR_NULL(vap);
+		vap->va_size = offset + len;
+		error = VOP_SETATTR(vp, vap, td->td_ucred);
+		if (error != 0)
+			goto out;
+		VATTR_NULL(vap);
+		vap->va_size = fsize;
+		error = VOP_SETATTR(vp, vap, td->td_ucred);
+		if (error != 0)
+			goto out;
+	}
+
+	for (;;) {
+		/*
+		 * Read and write back anything below the nominal file
+		 * size.  There's currently no way outside the filesystem
+		 * to know whether this area is sparse or not.
+		 */
+		cur = iosize;
+		if ((offset % iosize) != 0)
+			cur -= (offset % iosize);
+		if (cur > len)
+			cur = len;
+		if (offset < fsize) {
+			aiov.iov_base = buf;
+			aiov.iov_len = cur;
+			auio.uio_iov = &aiov;
+			auio.uio_iovcnt = 1;
+			auio.uio_offset = offset;
+			auio.uio_resid = cur;
+			auio.uio_segflg = UIO_SYSSPACE;
+			auio.uio_rw = UIO_READ;
+			auio.uio_td = td;
+			error = VOP_READ(vp, &auio, 0, td->td_ucred);
+			if (error != 0)
+				break;
+			if (auio.uio_resid > 0) {
+				bzero(buf + cur - auio.uio_resid,
+				    auio.uio_resid);
+			}
+		} else {
+			bzero(buf, cur);
+		}
+
+		aiov.iov_base = buf;
+		aiov.iov_len = cur;
+		auio.uio_iov = &aiov;
+		auio.uio_iovcnt = 1;
+		auio.uio_offset = offset;
+		auio.uio_resid = cur;
+		auio.uio_segflg = UIO_SYSSPACE;
+		auio.uio_rw = UIO_WRITE;
+		auio.uio_td = td;
+
+		error = VOP_WRITE(vp, &auio, 0, td->td_ucred);
+		if (error != 0)
+			break;
+
+		len -= cur;
+		offset += cur;
+		if (len == 0)
+			break;
+		if (should_yield())
+			break;
+	}
+
+ out:
+	*ap->a_len = len;
+	*ap->a_offset = offset;
+	free(buf, M_TEMP);
+	return (error);
+}
+
 /*
  * vfs default ops
  * used to fill the vfs function table to get reasonable default return values.

Modified: stable/8/sys/kern/vfs_syscalls.c
==============================================================================
--- stable/8/sys/kern/vfs_syscalls.c	Wed Nov 16 17:48:05 2011	(r227572)
+++ stable/8/sys/kern/vfs_syscalls.c	Wed Nov 16 18:33:17 2011	(r227573)
@@ -4654,3 +4654,98 @@ out:
 	VFS_UNLOCK_GIANT(vfslocked);
 	return (error);
 }
+
+static int
+kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len)
+{
+	struct file *fp;
+	struct mount *mp;
+	struct vnode *vp;
+	off_t olen, ooffset;
+	int error, vfslocked;
+
+	fp = NULL;
+	vfslocked = 0;
+	error = fget(td, fd, &fp);
+	if (error != 0)
+		goto out;
+
+	switch (fp->f_type) {
+	case DTYPE_VNODE:
+		break;
+	case DTYPE_PIPE:
+	case DTYPE_FIFO:
+		error = ESPIPE;
+		goto out;
+	default:
+		error = ENODEV;
+		goto out;
+	}
+	if ((fp->f_flag & FWRITE) == 0) {
+		error = EBADF;
+		goto out;
+	}
+	vp = fp->f_vnode;
+	if (vp->v_type != VREG) {
+		error = ENODEV;
+		goto out;
+	}
+	if (offset < 0 || len <= 0) {
+		error = EINVAL;
+		goto out;
+	}
+	/* Check for wrap. */
+	if (offset > OFF_MAX - len) {
+		error = EFBIG;
+		goto out;
+	}
+
+	/* Allocating blocks may take a long time, so iterate. */
+	for (;;) {
+		olen = len;
+		ooffset = offset;
+
+		bwillwrite();
+		vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+		mp = NULL;
+		error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
+		if (error != 0) {
+			VFS_UNLOCK_GIANT(vfslocked);
+			break;
+		}
+		error = vn_lock(vp, LK_EXCLUSIVE);
+		if (error != 0) {
+			vn_finished_write(mp);
+			VFS_UNLOCK_GIANT(vfslocked);
+			break;
+		}
+#ifdef MAC
+		error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp);
+		if (error == 0)
+#endif
+			error = VOP_ALLOCATE(vp, &offset, &len);
+		VOP_UNLOCK(vp, 0);
+		vn_finished_write(mp);
+		VFS_UNLOCK_GIANT(vfslocked);
+
+		if (olen + ooffset != offset + len) {
+			panic("offset + len changed from %jx/%jx to %jx/%jx",
+			    ooffset, olen, offset, len);
+		}
+		if (error != 0 || len == 0)
+			break;
+		KASSERT(olen > len, ("Iteration did not make progress?"));
+		maybe_yield();
+	}
+ out:
+	if (fp != NULL)
+		fdrop(fp, td);
+	return (error);
+}
+
+int
+posix_fallocate(struct thread *td, struct posix_fallocate_args *uap)
+{
+
+	return (kern_posix_fallocate(td, uap->fd, uap->offset, uap->len));
+}

Modified: stable/8/sys/kern/vnode_if.src
==============================================================================
--- stable/8/sys/kern/vnode_if.src	Wed Nov 16 17:48:05 2011	(r227572)
+++ stable/8/sys/kern/vnode_if.src	Wed Nov 16 18:33:17 2011	(r227573)
@@ -601,6 +601,7 @@ vop_vptofh {
 	IN struct fid *fhp;
 };
 
+
 %% vptocnp		vp	L L L
 %% vptocnp		vpp	- U -
 
@@ -611,3 +612,12 @@ vop_vptocnp {
 	INOUT char *buf;
 	INOUT int *buflen;
 };
+
+
+%% allocate	vp	E E E
+
+vop_allocate {
+	IN struct vnode *vp;
+	INOUT off_t *offset;
+	INOUT off_t *len;
+};

Modified: stable/8/sys/sys/fcntl.h
==============================================================================
--- stable/8/sys/sys/fcntl.h	Wed Nov 16 17:48:05 2011	(r227572)
+++ stable/8/sys/sys/fcntl.h	Wed Nov 16 18:33:17 2011	(r227573)
@@ -278,7 +278,7 @@ struct oflock {
 #endif
 
 /*
- * XXX missing posix_fadvise() and posix_fallocate(), and POSIX_FADV_* macros.
+ * XXX missing posix_fadvise() and POSIX_FADV_* macros.
  */
 
 #ifndef _KERNEL
@@ -289,6 +289,9 @@ int	fcntl(int, int, ...);
 #if __BSD_VISIBLE || __POSIX_VISIBLE >= 200809
 int	openat(int, const char *, int, ...);
 #endif
+#if __BSD_VISIBLE || __POSIX_VISIBLE >= 200112
+int	posix_fallocate(int, off_t, off_t);
+#endif
 #if __BSD_VISIBLE
 int	flock(int, int);
 #endif

Modified: stable/8/sys/sys/param.h
==============================================================================
--- stable/8/sys/sys/param.h	Wed Nov 16 17:48:05 2011	(r227572)
+++ stable/8/sys/sys/param.h	Wed Nov 16 18:33:17 2011	(r227573)
@@ -58,7 +58,7 @@
  *		in the range 5 to 9.
  */
 #undef __FreeBSD_version
-#define __FreeBSD_version 802513	/* Master, propagated to newvers */
+#define __FreeBSD_version 802514	/* Master, propagated to newvers */
 
 #ifdef _KERNEL
 #define	P_OSREL_SIGWAIT		700000

Modified: stable/8/sys/sys/vnode.h
==============================================================================
--- stable/8/sys/sys/vnode.h	Wed Nov 16 17:48:05 2011	(r227572)
+++ stable/8/sys/sys/vnode.h	Wed Nov 16 18:33:17 2011	(r227573)
@@ -688,6 +688,7 @@ int	vop_stdaccess(struct vop_access_args
 int	vop_stdaccessx(struct vop_accessx_args *ap);
 int	vop_stdadvlock(struct vop_advlock_args *ap);
 int	vop_stdadvlockasync(struct vop_advlockasync_args *ap);
+int	vop_stdallocate(struct vop_allocate_args *ap);
 int	vop_stdpathconf(struct vop_pathconf_args *);
 int	vop_stdpoll(struct vop_poll_args *);
 int	vop_stdvptocnp(struct vop_vptocnp_args *ap);


More information about the svn-src-stable-8 mailing list