git: 7a605ba8f799 - stable/14 - lib/libc/amd64/string/strcat.S: enable use of SIMD

From: Robert Clausecker <fuz_at_FreeBSD.org>
Date: Wed, 24 Jan 2024 19:44:45 UTC
The branch stable/14 has been updated by fuz:

URL: https://cgit.FreeBSD.org/src/commit/?id=7a605ba8f7996f38ba7b353a0120d84bae48da0f

commit 7a605ba8f7996f38ba7b353a0120d84bae48da0f
Author:     Robert Clausecker <fuz@FreeBSD.org>
AuthorDate: 2023-11-14 18:09:08 +0000
Commit:     Robert Clausecker <fuz@FreeBSD.org>
CommitDate: 2024-01-24 19:39:28 +0000

    lib/libc/amd64/string/strcat.S: enable use of SIMD
    
    strcat has a bespoke scalar assembly implementation we
    inherited from NetBSD.  While it performs well, it is
    better to call into our SIMD implementations if any SIMD
    features are available at all.  So do that and implement
    strcat() by calling into strlen() and strcpy() if these
    are available.
    
    Sponsored by:   The FreeBSD Foundation
    Tested by:      developers@, exp-run
    Approved by:    mjg
    MFC after:      1 month
    MFC to:         stable/14
    PR:             275785
    Differential Reviison: https://reviews.freebsd.org/D42600
    
    (cherry picked from commit aff9143a242c0012b0195b3666e03fa3b7cd33e8)
---
 lib/libc/amd64/string/strcat.S | 47 +++++++++++++++++++++++++++++++++++++-----
 1 file changed, 42 insertions(+), 5 deletions(-)

diff --git a/lib/libc/amd64/string/strcat.S b/lib/libc/amd64/string/strcat.S
index 0834408acfb7..081e98840cee 100644
--- a/lib/libc/amd64/string/strcat.S
+++ b/lib/libc/amd64/string/strcat.S
@@ -1,6 +1,14 @@
-/*
- * Written by J.T. Conklin <jtc@acorntoolworks.com>
- * Public domain.
+/*-
+ * Copyright (c) 2023, The FreeBSD Foundation
+ *
+ * SPDX-License-Expression: BSD-2-Clause
+ *
+ * Portions of this software were developed by Robert Clausecker
+ * <fuz@FreeBSD.org> under sponsorship from the FreeBSD Foundation.
+ *
+ * Adapted from NetBSD's common/lib/libc/arch/x86_64/string/strcat.S
+ * written by J.T. Conklin <jtc@acorntoolworks.com>
+ * that was originally dedicated to the public domain
  */
 
 #include <machine/asm.h>
@@ -8,7 +16,14 @@
 	RCSID("$NetBSD: strcat.S,v 1.4 2004/07/26 18:51:21 drochner Exp $")
 #endif
 
-ENTRY(strcat)
+#include "amd64_archlevel.h"
+
+ARCHFUNCS(strcat)
+	ARCHFUNC(strcat, scalar)
+	ARCHFUNC(strcat, baseline)
+ENDARCHFUNCS(strcat)
+
+ARCHENTRY(strcat, scalar)
 	movq	%rdi,%rax
 	movabsq	$0x0101010101010101,%r8
 	movabsq	$0x8080808080808080,%r9
@@ -161,6 +176,28 @@ ENTRY(strcat)
 
 .Ldone:
 	ret
-END(strcat)
+ARCHEND(strcat, scalar)
+
+/*
+ * Call into strlen + strcpy if we have any SIMD at all.
+ * The scalar implementation above is better for the scalar
+ * case as it avoids the function call overhead, but pessimal
+ * if we could call SIMD routines instead.
+ */
+ARCHENTRY(strcat, baseline)
+	push	%rbp
+	mov	%rsp, %rbp
+	push	%rsi
+	push	%rbx
+	mov	%rdi, %rbx		# remember destination for later
+	call	CNAME(strlen)		# strlen(dest)
+	mov	-8(%rbp), %rsi
+	lea	(%rbx, %rax, 1), %rdi	# dest + strlen(dest)
+	call	CNAME(__stpcpy)		# stpcpy(dest + strlen(dest), src)
+	mov	%rbx, %rax		# return dest
+	pop	%rbx
+	leave
+	ret
+ARCHEND(strcat, baseline)
 
 	.section .note.GNU-stack,"",%progbits