git: 17631aa22f13 - main - devel/dyncall: Fix build on arm

From: Mikael Urankar <mikael_at_FreeBSD.org>
Date: Fri, 08 Oct 2021 10:48:21 UTC
The branch main has been updated by mikael:

URL: https://cgit.FreeBSD.org/ports/commit/?id=17631aa22f13fa4cecfed6cf92bd3ac15334454b

commit 17631aa22f13fa4cecfed6cf92bd3ac15334454b
Author:     Tassilo Philipp <tphilipp@potion-studios.com>
AuthorDate: 2021-10-08 10:46:15 +0000
Commit:     Mikael Urankar <mikael@FreeBSD.org>
CommitDate: 2021-10-08 10:46:15 +0000

    devel/dyncall: Fix build on arm
    
    PR:             258846
    Reported by:    Robert Clausecker
---
 devel/dyncall/files/patch-arm | 176 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 176 insertions(+)

diff --git a/devel/dyncall/files/patch-arm b/devel/dyncall/files/patch-arm
new file mode 100644
index 000000000000..0433091778d8
--- /dev/null
+++ b/devel/dyncall/files/patch-arm
@@ -0,0 +1,176 @@
+fix arm32 builds when using clang's integrated assembler (syntax-only fix for v1.2 until v1.3 is released)
+
+--- dyncall/dyncall_call_arm32_arm_armhf.S.orig	2021-01-22 15:43:00 UTC
++++ dyncall/dyncall_call_arm32_arm_armhf.S
+@@ -59,7 +59,7 @@ ENTRY_C(dcCall_arm32_armhf)
+ 	add r5,  r1, #16 /* r5 = stack args (after intreg ones) */
+ 
+ 	/* Load 16 single-precision registers (= 8 double-precision registers). */
+-	fldmiad r3, {d0-d7}
++	vldmia r3, {d0-d7}
+ 
+ 	/* prep stack parameter area (includes room for spill area, callee spills if needed) */
+ 	sub r13, r13, r2
+@@ -77,7 +77,7 @@ armhf_pushArgs:
+ 
+ armhf_call:
+ 	ldmia r1, {r0-r3}  /* Load first 4 arguments for new call into r0-r3. */
+-	                   /* 'blx %r4' workaround for ARMv4t: */
++	                   /* 'blx r4' workaround for ARMv4t: */
+ 	mov r14, r15       /* Branch return address(r15) -> link register (r14) -- r15 always points to address of current + 2 instructions (= Epilog code). */ 
+ 	bx  r4             /* Call (ARM/THUMB), available for ARMv4t. */
+ 
+--- dyncall/dyncall_call_arm32_thumb_armhf.S.orig	2021-01-22 15:43:00 UTC
++++ dyncall/dyncall_call_arm32_thumb_armhf.S
+@@ -68,7 +68,7 @@ ENTRY_C(dcCall_arm32_armhf)
+ 	mov	r5  , r1	 /* r5 = 'args' (2nd argument is passed in r1). */
+ 	
+ 	/* Load 16 single-precision registers (= 8 double-precision registers). */
+-	fldmiad	r3, {d0-d7}
++	vldmia	r3, {d0-d7}
+ 
+ 	sub	r2 , #16	
+ 	cmp     r2, #0
+@@ -99,7 +99,7 @@ armhf_pushArgs:
+ 
+ armhf_call:
+ 	ldmia	r5!, {r0-r3}	/* Load first 4 arguments for new call into r0-r3. */
+-				/* 'blx %r4' workaround for ARMv4t: */
++				/* 'blx r4' workaround for ARMv4t: */
+ 	// mov	r14, r15	/*   Branch return address(r15) -> link register (r14) -- r15 always points to address of current + 2 instructions (= Epilog code). */ 
+ 	mov	r6, r15
+ 	add	r6, #5
+--- dyncall/dyncall_call_arm32_thumb_gas.s.orig	2021-01-22 15:43:00 UTC
++++ dyncall/dyncall_call_arm32_thumb_gas.s
+@@ -37,45 +37,45 @@ dcCall_arm32_thumb:
+ 
+ 	/* Prolog. This function never needs to spill inside its prolog, so just store the permanent registers. */
+ 	/* Code below is not using high registers, so not storing them in prolog, which is more involved with thumb, anyways. */
+-	push	{%r4-%r7, %r14}		/* Frame ptr, permanent registers, link register -> save area on stack. */
+-	mov		%r7, %r13	/* Set frame ptr. */
+-	sub		%r13, #4	/* Realign stack to 8 bytes (b/c we stored 5 regs = 20b). */
++	push	{r4-r7, r14}		/* Frame ptr, permanent registers, link register -> save area on stack. */
++	mov		r7, r13	/* Set frame ptr. */
++	sub		r13, #4	/* Realign stack to 8 bytes (b/c we stored 5 regs = 20b). */
+ 
+ 	/* Call. */
+-	mov		%r4, %r0	/* Move 'fptr' to r4 (1st argument is passed in r0). */
+-	mov		%r5, %r1	/* Move 'args' to r5 (2nd argument is passed in r1). */
+-	mov		%r6, %r2	/* Move 'size' to r6 (3rd argument is passed in r2). */
++	mov		r4, r0	/* Move 'fptr' to r4 (1st argument is passed in r0). */
++	mov		r5, r1	/* Move 'args' to r5 (2nd argument is passed in r1). */
++	mov		r6, r2	/* Move 'size' to r6 (3rd argument is passed in r2). */
+ 
+-	cmp		%r6, #16	/* Jump to call if no more than 4 arguments. */
++	cmp		r6, #16	/* Jump to call if no more than 4 arguments. */
+ 	ble		call
+ 
+-	sub		%r6, #16	/* Size of remaining arguments. */
+-	mov		%r0, %r13	/* Set stack pointer to top of stack. */
+-	sub		%r0, %r0, %r6
+-	lsr		%r0, #3		/* Align stack on 8 byte boundaries. */
+-	lsl		%r0, #3
+-	mov		%r13, %r0
++	sub		r6, #16	/* Size of remaining arguments. */
++	mov		r0, r13	/* Set stack pointer to top of stack. */
++	sub		r0, r0, r6
++	lsr		r0, #3		/* Align stack on 8 byte boundaries. */
++	lsl		r0, #3
++	mov		r13, r0
+ 
+-	add		%r1, #16	/* Let r1 point to remaining arguments. */
+-	mov		%r2, #0		/* Init byte counter to 0. */
++	add		r1, #16	/* Let r1 point to remaining arguments. */
++	mov		r2, #0		/* Init byte counter to 0. */
+ .thumb_func
+ pushArgs:
+-	ldrb	%r3, [%r1, %r2]		/* Load a byte into r3. */
+-	strb	%r3, [%r0, %r2]		/* Push byte onto stack. */
+-	add		%r2, %r2, #1	/* Increment byte counter. */
+-	cmp		%r2, %r6
++	ldrb	r3, [r1, r2]		/* Load a byte into r3. */
++	strb	r3, [r0, r2]		/* Push byte onto stack. */
++	add		r2, r2, #1	/* Increment byte counter. */
++	cmp		r2, r6
+ 	bne		pushArgs
+ .thumb_func
+ call:
+-	ldmia	%r5!, {%r0-%r3}		/* Load first 4 arguments for new call into r0-r3. */
++	ldmia	r5!, {r0-r3}		/* Load first 4 arguments for new call into r0-r3. */
+ 					
+-					/* 'blx %r4' workaround for ARMv4t in THUMB: */
+-	mov		%r6,  %r15	/* Load PC+2 instructions from here */
+-	add		%r6,  #5	/* Increment by 2 instructions (Address of 'Epilog') and set bit 0 (THUMB) */
+-	mov		%r14, %r6	/* Store in link register. */
+-	bx		%r4		/* Branch and force THUMB-mode return (LR bit 0 set). */
++					/* 'blx r4' workaround for ARMv4t in THUMB: */
++	mov		r6,  r15	/* Load PC+2 instructions from here */
++	add		r6,  #5	/* Increment by 2 instructions (Address of 'Epilog') and set bit 0 (THUMB) */
++	mov		r14, r6	/* Store in link register. */
++	bx		r4		/* Branch and force THUMB-mode return (LR bit 0 set). */
+ 
+ 	/* Epilog. */
+-	mov		%r13, %r7	/* Reset stack ptr. */
+-	pop		{%r4-%r7, %r15}	/* Restore permanent registers and program counter. (Force a stay in THUMB in ARMv4, whether ARMv5 can return in ARM or THUMB depending on the bit 0. */
++	mov		r13, r7	/* Reset stack ptr. */
++	pop		{r4-r7, r15}	/* Restore permanent registers and program counter. (Force a stay in THUMB in ARMv4, whether ARMv5 can return in ARM or THUMB depending on the bit 0. */
+ 
+--- dyncallback/dyncall_callback_arm32_arm_gas.S.orig	2021-01-22 15:43:00 UTC
++++ dyncallback/dyncall_callback_arm32_arm_gas.S
+@@ -47,37 +47,37 @@ CTX_userdata    =  16
+ dcCallbackThunkEntry:
+ 
+ 	/* Prolog. This function never needs to spill inside its prolog, so just store the permanent registers. */
+-	stmdb	%r13, {%r4-%r11, %r13, %r14}	/* Permanent registers and stack pointer, etc... -> save area on stack (except counter). */
+-	mov		%r11, %r13						/* Set frame pointer. */
+-	sub		%r13, %r13, #40					/* Adjust stack pointer. */
++	stmdb	r13, {r4-r11, r13, r14}	/* Permanent registers and stack pointer, etc... -> save area on stack (except counter). */
++	mov		r11, r13						/* Set frame pointer. */
++	sub		r13, r13, #40					/* Adjust stack pointer. */
+ 
+ 	/* Grab arguments. */
+-	mov		%r4, #0
++	mov		r4, #0
+ #if defined(DC__ABI_ARM_HF)
+-	stmdb	%r13!, {%r4}					/* Init freg_count and dreg_count to 0 */
+-	stmdb	%r13!, {%r4}
+-	fstmdbd	%r13!, {d0-d7}					/* Store all fp-registers in DCArgs' f[16] */
++	stmdb	r13!, {r4}					/* Init freg_count and dreg_count to 0 */
++	stmdb	r13!, {r4}
++	vstmdb	r13!, {d0-d7}					/* Store all fp-registers in DCArgs' f[16] */
+ #endif
+-	stmdb	%r13!, {%r0-%r4, %r11}			/* Spill first 4 args to DCArgs, along with reg_count (init to 0) and (stack) pointer to remaining args. */
++	stmdb	r13!, {r0-r4, r11}			/* Spill first 4 args to DCArgs, along with reg_count (init to 0) and (stack) pointer to remaining args. */
+ 
+ 	/* Prepare callback handler call. */
+-	mov		%r0, %r12						/* Parameter 0 (r0) = DCCallback pointer (r12, pointer to thunk). */
+-	mov		%r1, %r13						/* Parameter 1 (r1) = DCArgs pointer (r13, stack pointer). */
+-	sub		%r13, %r13, #DCValue_size		/* Make room for return value. */
+-	mov		%r2, %r13						/* Parameter 2 (r2) = results pointer. */
+-	ldr		%r3, [%r12, #CTX_userdata]		/* Parameter 3 (r3) = userdata pointer. */
++	mov		r0, r12						/* Parameter 0 (r0) = DCCallback pointer (r12, pointer to thunk). */
++	mov		r1, r13						/* Parameter 1 (r1) = DCArgs pointer (r13, stack pointer). */
++	sub		r13, r13, #DCValue_size		/* Make room for return value. */
++	mov		r2, r13						/* Parameter 2 (r2) = results pointer. */
++	ldr		r3, [r12, #CTX_userdata]		/* Parameter 3 (r3) = userdata pointer. */
+ 
+ 	/* Call. */
+-	ldr		%r4, [%r12, #CTX_handler]		/* Load callback handler pointer into r4. */
+-	mov		%r14, %r15						/* Branch return address(r15) -> link register (r14) -- r15 always points to address of current + 2 instructions (= Epilog code). */
+-	bx		%r4								/* Call. */
++	ldr		r4, [r12, #CTX_handler]		/* Load callback handler pointer into r4. */
++	mov		r14, r15						/* Branch return address(r15) -> link register (r14) -- r15 always points to address of current + 2 instructions (= Epilog code). */
++	bx		r4								/* Call. */
+ 
+ 	/* Return value. */
+-	ldmia	%r13, {%r0, %r1}				/* Load return value in r0 and r1. */
++	ldmia	r13, {r0, r1}				/* Load return value in r0 and r1. */
+ #if defined(DC__ABI_ARM_HF)
+-	fldmiad	%r13, {%d0}						/* Same for floating point return value (if any). */
++	vldmia	r13, {d0}						/* Same for floating point return value (if any). */
+ #endif
+ 
+ 	/* Epilog. */
+-	ldmdb	%r11, {%r4-%r11, %r13, %r15}	/* Restore permanent registers (restore stack ptr and program counter).@@@db not needed since we rewrite r13? */
++	ldmdb	r11, {r4-r11, r13, r15}	/* Restore permanent registers (restore stack ptr and program counter).@@@db not needed since we rewrite r13? */
+