git: 197997a4c36d - main - file: Fix offset handling in kern_copy_file_range()

From: Mark Johnston <markj_at_FreeBSD.org>
Date: Mon, 31 Mar 2025 09:06:38 UTC
The branch main has been updated by markj:

URL: https://cgit.FreeBSD.org/src/commit/?id=197997a4c36d8be5807688a4f973ebe8ae807a6e

commit 197997a4c36d8be5807688a4f973ebe8ae807a6e
Author:     Mark Johnston <markj@FreeBSD.org>
AuthorDate: 2025-03-31 01:23:30 +0000
Commit:     Mark Johnston <markj@FreeBSD.org>
CommitDate: 2025-03-31 09:01:09 +0000

    file: Fix offset handling in kern_copy_file_range()
    
    One can ask copy_file_range(2) to use the file offsets of the file
    descriptions that it copies from and to.  We were updating those offsets
    without any locking, which is incorrect and can lead to unkillable loops
    in the event of a race (e.g., the check for overlapping ranges in
    kern_copy_file_range() is subject to a TOCTOU race with the following
    loop which range-locks the input and output file).
    
    Use foffset_lock() to serialize updates to the file descriptions, as we
    do for other, similar system calls.
    
    Reported by:    syzkaller
    Reviewed by:    rmacklem, kib
    MFC after:      2 weeks
    Fixes:          bbbbeca3e9a3 ("Add kernel support for a Linux compatible copy_file_range(2) syscall.")
    Differential Revision:  https://reviews.freebsd.org/D49440
---
 sys/kern/vfs_syscalls.c | 73 +++++++++++++++++++++++++++++++++++--------------
 1 file changed, 52 insertions(+), 21 deletions(-)

diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c
index 41609d7c5351..a6c216d61d8c 100644
--- a/sys/kern/vfs_syscalls.c
+++ b/sys/kern/vfs_syscalls.c
@@ -4940,16 +4940,17 @@ int
 kern_copy_file_range(struct thread *td, int infd, off_t *inoffp, int outfd,
     off_t *outoffp, size_t len, unsigned int flags)
 {
-	struct file *infp, *outfp;
+	struct file *infp, *infp1, *outfp, *outfp1;
 	struct vnode *invp, *outvp;
 	int error;
 	size_t retlen;
 	void *rl_rcookie, *rl_wcookie;
-	off_t savinoff, savoutoff;
+	off_t inoff, outoff, savinoff, savoutoff;
+	bool foffsets_locked;
 
 	infp = outfp = NULL;
 	rl_rcookie = rl_wcookie = NULL;
-	savinoff = -1;
+	foffsets_locked = false;
 	error = 0;
 	retlen = 0;
 
@@ -4991,13 +4992,35 @@ kern_copy_file_range(struct thread *td, int infd, off_t *inoffp, int outfd,
 		goto out;
 	}
 
-	/* Set the offset pointers to the correct place. */
-	if (inoffp == NULL)
-		inoffp = &infp->f_offset;
-	if (outoffp == NULL)
-		outoffp = &outfp->f_offset;
-	savinoff = *inoffp;
-	savoutoff = *outoffp;
+	/*
+	 * Figure out which file offsets we're reading from and writing to.
+	 * If the offsets come from the file descriptions, we need to lock them,
+	 * and locking both offsets requires a loop to avoid deadlocks.
+	 */
+	infp1 = outfp1 = NULL;
+	if (inoffp != NULL)
+		inoff = *inoffp;
+	else
+		infp1 = infp;
+	if (outoffp != NULL)
+		outoff = *outoffp;
+	else
+		outfp1 = outfp;
+	if (infp1 != NULL || outfp1 != NULL) {
+		if (infp1 == outfp1) {
+			/*
+			 * Overlapping ranges are not allowed.  A more thorough
+			 * check appears below, but we must not lock the same
+			 * offset twice.
+			 */
+			error = EINVAL;
+			goto out;
+		}
+		foffset_lock_pair(infp1, &inoff, outfp1, &outoff, 0);
+		foffsets_locked = true;
+	}
+	savinoff = inoff;
+	savoutoff = outoff;
 
 	invp = infp->f_vnode;
 	outvp = outfp->f_vnode;
@@ -5017,8 +5040,8 @@ kern_copy_file_range(struct thread *td, int infd, off_t *inoffp, int outfd,
 	 * overlap.
 	 */
 	if (invp == outvp) {
-		if ((savinoff <= savoutoff && savinoff + len > savoutoff) ||
-		    (savinoff > savoutoff && savoutoff + len > savinoff)) {
+		if ((inoff <= outoff && inoff + len > outoff) ||
+		    (inoff > outoff && outoff + len > inoff)) {
 			error = EINVAL;
 			goto out;
 		}
@@ -5027,28 +5050,36 @@ kern_copy_file_range(struct thread *td, int infd, off_t *inoffp, int outfd,
 
 	/* Range lock the byte ranges for both invp and outvp. */
 	for (;;) {
-		rl_wcookie = vn_rangelock_wlock(outvp, *outoffp, *outoffp +
-		    len);
-		rl_rcookie = vn_rangelock_tryrlock(invp, *inoffp, *inoffp +
-		    len);
+		rl_wcookie = vn_rangelock_wlock(outvp, outoff, outoff + len);
+		rl_rcookie = vn_rangelock_tryrlock(invp, inoff, inoff + len);
 		if (rl_rcookie != NULL)
 			break;
 		vn_rangelock_unlock(outvp, rl_wcookie);
-		rl_rcookie = vn_rangelock_rlock(invp, *inoffp, *inoffp + len);
+		rl_rcookie = vn_rangelock_rlock(invp, inoff, inoff + len);
 		vn_rangelock_unlock(invp, rl_rcookie);
 	}
 
 	retlen = len;
-	error = vn_copy_file_range(invp, inoffp, outvp, outoffp, &retlen,
+	error = vn_copy_file_range(invp, &inoff, outvp, &outoff, &retlen,
 	    flags, infp->f_cred, outfp->f_cred, td);
 out:
 	if (rl_rcookie != NULL)
 		vn_rangelock_unlock(invp, rl_rcookie);
 	if (rl_wcookie != NULL)
 		vn_rangelock_unlock(outvp, rl_wcookie);
-	if (savinoff != -1 && (error == EINTR || error == ERESTART)) {
-		*inoffp = savinoff;
-		*outoffp = savoutoff;
+	if (foffsets_locked) {
+		if (error == EINTR || error == ERESTART) {
+			inoff = savinoff;
+			outoff = savoutoff;
+		}
+		if (inoffp == NULL)
+			foffset_unlock(infp, inoff, 0);
+		else
+			*inoffp = inoff;
+		if (outoffp == NULL)
+			foffset_unlock(outfp, outoff, 0);
+		else
+			*outoffp = outoff;
 	}
 	if (outfp != NULL)
 		fdrop(outfp, td);