git: 52d4a4d4e0de - main - lib/libc/amd64/string/strcspn.S: fix behaviour with sets of 17--32

From: Robert Clausecker <fuz_at_FreeBSD.org>
Date: Tue, 12 Sep 2023 03:41:46 UTC
The branch main has been updated by fuz:

URL: https://cgit.FreeBSD.org/src/commit/?id=52d4a4d4e0dedc72bc33082a3f84c2d0fd6f2cbb

commit 52d4a4d4e0dedc72bc33082a3f84c2d0fd6f2cbb
Author:     Robert Clausecker <fuz@FreeBSD.org>
AuthorDate: 2023-09-11 23:56:30 +0000
Commit:     Robert Clausecker <fuz@FreeBSD.org>
CommitDate: 2023-09-12 02:58:43 +0000

    lib/libc/amd64/string/strcspn.S: fix behaviour with sets of 17--32
    
    When a string is matched against a set of 17--32 characters, each chunk
    of the string is matched first against the first 16 characters of the
    set and then against the remaining characters.  We also check at the
    same time if the string has a nul byte in the current chunk, terminating
    the search if it does.
    
    Due to misconceived logic, the order of checks was "first half of set,
    nul byte, second half of set", meaning that a match with the second half
    of the set was ignored when the string ended in the same 16 bytes.
    Reverse the order of checks to fix this problem.
    
    Sponsored by:   The FreeBSD Foundation
    Approved by:    mjg (blanket, via IRC)
    MFC after:      1 week
    MFC to:         stable/14
---
 lib/libc/amd64/string/strcspn.S | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/lib/libc/amd64/string/strcspn.S b/lib/libc/amd64/string/strcspn.S
index de409db6d472..53100eeea9a5 100644
--- a/lib/libc/amd64/string/strcspn.S
+++ b/lib/libc/amd64/string/strcspn.S
@@ -259,27 +259,32 @@ ARCHENTRY(strcspn, x86_64_v2)
 	movdqu		48(%rsp, %rcx, 1), %xmm3 # second part of set
 
 	/* set is 17--32 bytes in size */
-	pcmpistri	$0, %xmm0, %xmm2	# match in head?
-	jbe		.Lheadmatchv2
-	pcmpistri	$0, %xmm0, %xmm3	# ZF=1 not possible here
+	pcmpistri	$0, %xmm0, %xmm2	# match in first set half?
 	jb		.Lheadmatchv2
+	pcmpistri	$0, %xmm0, %xmm3	# match in second set half or end of string?
+	jbe		.Lheadmatchv2
 
 	ALIGN_TEXT
 0:	movdqa		(%rax), %xmm0
 	pcmpistri	$0, %xmm0, %xmm2
-	jbe		1b
+	jb		2f			# match in first set half?
 	pcmpistri	$0, %xmm0, %xmm3
-	jb		1f			# ZF=1 not possible here
+	jbe		1f			# match in second set half or end of string?
 	movdqa		16(%rax), %xmm0
 	add		$32, %rax
 	pcmpistri	$0, %xmm0, %xmm2
-	jbe		3b
+	jb		3f			# match in first set half?
 	pcmpistri	$0, %xmm0, %xmm3
-	jae		0b			# ZF=1 not possible here
+	ja		0b			# neither match in 2nd half nor string end?
 
-	sub		$16, %rax		# go back to second half
-1:	add		%rcx, %rax
-	sub		%rdi, %rax
+3:	lea		-16(%rax), %rax		# go back to second half
+1:	jc		2f			# jump if match found
+	pxor		%xmm1, %xmm1
+	pcmpeqb		%xmm1, %xmm0		# where is the NUL byte?
+	pmovmskb	%xmm0, %ecx
+	tzcnt		%ecx, %ecx		# location of NUL byte in (%rax)
+2:	sub		%rdi, %rax		# offset of %xmm0 from beginning of string
+	add		%rcx, %rax		# prefix length before match/NUL
 	leave
 	ret