git: b57954717ddf - stable/12 - cp: fix some cases with infinite recursion

From: Kyle Evans <kevans_at_FreeBSD.org>
Date: Sun, 24 Apr 2022 13:29:23 UTC
The branch stable/12 has been updated by kevans:

URL: https://cgit.FreeBSD.org/src/commit/?id=b57954717ddfe52be9e18591d302c61e50db0aa6

commit b57954717ddfe52be9e18591d302c61e50db0aa6
Author:     Kyle Evans <kevans@FreeBSD.org>
AuthorDate: 2022-01-27 18:02:17 +0000
Commit:     Kyle Evans <kevans@FreeBSD.org>
CommitDate: 2022-04-24 03:48:56 +0000

    cp: fix some cases with infinite recursion
    
    As noted in the PR, cp -R has some surprising behavior.  Typically, when
    you `cp -R foo bar` where both foo and bar exist, foo is cleanly copied
    to foo/bar.  When you `cp -R foo foo` (where foo clearly exists), cp(1)
    goes a little off the rails as it creates foo/foo, then discovers that
    and creates foo/foo/foo, so on and so forth, until it eventually fails.
    
    POSIX doesn't seem to disallow this behavior, but it isn't very useful.
    GNU cp(1) will detect the recursion and squash it, but emit a message in
    the process that it has done so.
    
    This change seemingly follows the GNU behavior, but it currently doesn't
    warn about the situation -- the author feels that the final product is
    about what one might expect from doing this and thus, doesn't need a
    warning.  The author doesn't feel strongly about this.
    
    PR:             235438
    Reviewed by:    bapt
    Sponsored by:   Klara, Inc.
    
    (cherry picked from commit 848263aad129c8f9de75b58a5ab9a010611b75ac)
---
 bin/cp/cp.c             | 75 +++++++++++++++++++++++++++++++++++++++++++----
 bin/cp/tests/cp_test.sh | 77 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 147 insertions(+), 5 deletions(-)

diff --git a/bin/cp/cp.c b/bin/cp/cp.c
index 6c9a57d30d78..6255b7e2a0c0 100644
--- a/bin/cp/cp.c
+++ b/bin/cp/cp.c
@@ -64,6 +64,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/types.h>
 #include <sys/stat.h>
 
+#include <assert.h>
 #include <err.h>
 #include <errno.h>
 #include <fts.h>
@@ -91,7 +92,7 @@ volatile sig_atomic_t info;
 
 enum op { FILE_TO_FILE, FILE_TO_DIR, DIR_TO_DNE };
 
-static int copy(char *[], enum op, int);
+static int copy(char *[], enum op, int, struct stat *);
 static void siginfo(int __unused);
 
 int
@@ -259,18 +260,26 @@ main(int argc, char *argv[])
 		 */
 		type = FILE_TO_DIR;
 
-	exit (copy(argv, type, fts_options));
+	/*
+	 * For DIR_TO_DNE, we could provide copy() with the to_stat we've
+	 * already allocated on the stack here that isn't being used for
+	 * anything.  Not doing so, though, simplifies later logic a little bit
+	 * as we need to skip checking root_stat on the first iteration and
+	 * ensure that we set it with the first mkdir().
+	 */
+	exit (copy(argv, type, fts_options, (type == DIR_TO_DNE ? NULL :
+	    &to_stat)));
 }
 
 static int
-copy(char *argv[], enum op type, int fts_options)
+copy(char *argv[], enum op type, int fts_options, struct stat *root_stat)
 {
-	struct stat to_stat;
+	struct stat created_root_stat, to_stat;
 	FTS *ftsp;
 	FTSENT *curr;
 	int base = 0, dne, badcp, rval;
 	size_t nlen;
-	char *p, *target_mid;
+	char *p, *recurse_path, *target_mid;
 	mode_t mask, mode;
 
 	/*
@@ -280,6 +289,7 @@ copy(char *argv[], enum op type, int fts_options)
 	mask = ~umask(0777);
 	umask(~mask);
 
+	recurse_path = NULL;
 	if ((ftsp = fts_open(argv, fts_options, NULL)) == NULL)
 		err(1, "fts_open");
 	for (badcp = rval = 0; (curr = fts_read(ftsp)) != NULL; badcp = 0) {
@@ -299,6 +309,47 @@ copy(char *argv[], enum op type, int fts_options)
 			;
 		}
 
+		if (curr->fts_info == FTS_D && type != FILE_TO_FILE &&
+		    root_stat != NULL &&
+		    root_stat->st_dev == curr->fts_statp->st_dev &&
+		    root_stat->st_ino == curr->fts_statp->st_ino) {
+			assert(recurse_path == NULL);
+			if (curr->fts_level > FTS_ROOTLEVEL) {
+				/*
+				 * If the recursion isn't at the immediate
+				 * level, we can just not traverse into this
+				 * directory.
+				 */
+				fts_set(ftsp, curr, FTS_SKIP);
+				continue;
+			} else {
+				const char *slash;
+
+				/*
+				 * Grab the last path component and double it,
+				 * to make life easier later and ensure that
+				 * we work even with fts_level == 0 is a couple
+				 * of components deep in fts_path.  No path
+				 * separators are fine and expected in the
+				 * common case, though.
+				 */
+				slash = strrchr(curr->fts_path, '/');
+				if (slash != NULL)
+					slash++;
+				else
+					slash = curr->fts_path;
+				if (asprintf(&recurse_path, "%s/%s",
+				    curr->fts_path, slash) == -1)
+					err(1, "asprintf");
+			}
+		}
+
+		if (recurse_path != NULL &&
+		    strcmp(curr->fts_path, recurse_path) == 0) {
+			fts_set(ftsp, curr, FTS_SKIP);
+			continue;
+		}
+
 		/*
 		 * If we are in case (2) or (3) above, we need to append the
 		 * source name to the target name.
@@ -447,6 +498,19 @@ copy(char *argv[], enum op type, int fts_options)
 				if (mkdir(to.p_path,
 				    curr->fts_statp->st_mode | S_IRWXU) < 0)
 					err(1, "%s", to.p_path);
+				/*
+				 * First DNE with a NULL root_stat is the root
+				 * path, so set root_stat.  We can't really
+				 * tell in all cases if the target path is
+				 * within the src path, so we just stat() the
+				 * first directory we created and use that.
+				 */
+				if (root_stat == NULL &&
+				    stat(to.p_path, &created_root_stat) == -1) {
+					err(1, "stat");
+				} else if (root_stat == NULL) {
+					root_stat = &created_root_stat;
+				}
 			} else if (!S_ISDIR(to_stat.st_mode)) {
 				errno = ENOTDIR;
 				err(1, "%s", to.p_path);
@@ -492,6 +556,7 @@ copy(char *argv[], enum op type, int fts_options)
 	if (errno)
 		err(1, "fts_read");
 	fts_close(ftsp);
+	free(recurse_path);
 	return (rval);
 }
 
diff --git a/bin/cp/tests/cp_test.sh b/bin/cp/tests/cp_test.sh
index 753e69f2d619..3220b45b4438 100755
--- a/bin/cp/tests/cp_test.sh
+++ b/bin/cp/tests/cp_test.sh
@@ -57,8 +57,85 @@ chrdev_body()
 	check_size trunc 0
 }
 
+atf_test_case matching_srctgt
+matching_srctgt_body()
+{
+
+	# PR235438: `cp -R foo foo` would previously infinitely recurse and
+	# eventually error out.
+	mkdir foo
+	echo "qux" > foo/bar
+	cp foo/bar foo/zoo
+
+	atf_check cp -R foo foo
+	atf_check -o inline:"qux\n" cat foo/foo/bar
+	atf_check -o inline:"qux\n" cat foo/foo/zoo
+	atf_check -e not-empty -s not-exit:0 stat foo/foo/foo
+}
+
+atf_test_case matching_srctgt_contained
+matching_srctgt_contained_body()
+{
+
+	# Let's do the same thing, except we'll try to recursively copy foo into
+	# one of its subdirectories.
+	mkdir foo
+	echo "qux" > foo/bar
+	mkdir foo/loo
+	mkdir foo/moo
+	mkdir foo/roo
+	cp foo/bar foo/zoo
+
+	atf_check cp -R foo foo/moo
+	atf_check -o inline:"qux\n" cat foo/moo/foo/bar
+	atf_check -o inline:"qux\n" cat foo/moo/foo/zoo
+	atf_check -e not-empty -s not-exit:0 stat foo/moo/foo/moo
+}
+
+atf_test_case matching_srctgt_link
+matching_srctgt_link_body()
+{
+
+	mkdir foo
+	echo "qux" > foo/bar
+	cp foo/bar foo/zoo
+
+	atf_check ln -s foo roo
+	atf_check cp -RH roo foo
+	atf_check -o inline:"qux\n" cat foo/roo/bar
+	atf_check -o inline:"qux\n" cat foo/roo/zoo
+}
+
+atf_test_case matching_srctgt_nonexistent
+matching_srctgt_nonexistent_body()
+{
+
+	# We'll copy foo to a nonexistent subdirectory; ideally, we would
+	# skip just the directory and end up with a layout like;
+	#
+	# foo/
+	#     bar
+	#     dne/
+	#         bar
+	#         zoo
+	#     zoo
+	#
+	mkdir foo
+	echo "qux" > foo/bar
+	cp foo/bar foo/zoo
+
+	atf_check cp -R foo foo/dne
+	atf_check -o inline:"qux\n" cat foo/dne/bar
+	atf_check -o inline:"qux\n" cat foo/dne/zoo
+	atf_check -e not-empty -s not-exit:0 stat foo/dne/foo
+}
+
 atf_init_test_cases()
 {
 	atf_add_test_case basic
 	atf_add_test_case chrdev
+	atf_add_test_case matching_srctgt
+	atf_add_test_case matching_srctgt_contained
+	atf_add_test_case matching_srctgt_link
+	atf_add_test_case matching_srctgt_nonexistent
 }