catrig[fl].c and inexact
Dimitry Andric
dimitry at andric.com
Sat May 13 13:08:37 UTC 2017
On 13 May 2017, at 08:08, Steve Kargl <sgk at troutmask.apl.washington.edu> wrote:
>
> On Sat, May 13, 2017 at 11:35:49AM +1000, Bruce Evans wrote:
>> On Fri, 12 May 2017, Steve Kargl wrote:
...
>> required for the standard magic. I planned to fix all this magic using
>> macros like raise_inexact().
>
> If you plan to fix the magic with raise_inexact, then please
> test with a suite of compilers. AFAICT, clang is optimizing
> out the code. I haven't written a testcase to demonstrate this
> as I have other irons in the fire.
Using the full catrig.c and -O3, I tried gcc 4.2.1, 4.7.4, 4.8.5, 4.9.4,
5.4.0, 6.3.0 and 7.0.1, in addition to clang 3.4.1, 3.8.0, 3.9.1, 4.0.0
and 5.0.0. All versions of gcc produced something similar to the
following for i386:
# /usr/src/lib/msun/src/catrig.c:314: if (x == 0 && y == 0)
.loc 1 314 0
fldz
fucom %st(3) #
fnstsw %ax # tmp262
sahf
setne %al #, tmp270
setnp %dl #, tmp259
subl $1, %eax #, tmp270
testb %al, %dl # tmp270, tmp259
je .L176 #,
fucomp %st(1) #
fnstsw %ax # tmp281
sahf
setne %al #, tmp289
setnp %dl #, tmp278
subl $1, %eax #, tmp289
testb %al, %dl # tmp289, tmp278
je .L37 #,
fstp %st(3) #
fstp %st(0) #
jmp .L153 #
[...]
.L176:
fstp %st(0) #
.L37:
.LBB25:
# /usr/src/lib/msun/src/catrig.c:318: raise_inexact();
flds tiny # tiny
fadds .LC2 #
fstps 120(%esp) # junk
and for amd64:
# /usr/src/lib/msun/src/catrig.c:314: if (x == 0 && y == 0)
.loc 1 314 0
pxor %xmm7, %xmm7 # tmp386
ucomisd %xmm7, %xmm3 # tmp386, z
setnp %dl #, tmp258
cmovne %eax, %edx # tmp258,, tmp207, tmp254
testb %dl, %dl # tmp254
je .L34 #,
ucomisd %xmm7, %xmm1 # tmp386, z
setnp %dl #, tmp266
cmove %edx, %eax # tmp266,, tmp262
testb %al, %al # tmp262
je .L34 #,
[...]
.L34:
.LBB33:
# /usr/src/lib/msun/src/catrig.c:318: raise_inexact();
movss tiny(%rip), %xmm0 # tiny, tiny.0_28
addss .LC13(%rip), %xmm0 #, _29
movss %xmm0, 188(%rsp) # _29, junk
All versions of clang produced something similar to the following for
i386:
.loc 1 314 8 is_stmt 1 # /usr/src/lib/msun/src/catrig.c:314:8
fldz
.loc 1 314 13 is_stmt 0 # /usr/src/lib/msun/src/catrig.c:314:13
fxch %st(1)
fucom %st(1)
fnstsw %ax
sahf
jne .LBB0_19
jp .LBB0_19
.loc 1 0 13 # /usr/src/lib/msun/src/catrig.c:0:13
fxch %st(3)
fucom %st(1)
fstp %st(1)
fnstsw %ax
sahf
fldz
fxch %st(1)
fxch %st(3)
jne .LBB0_19
jp .LBB0_19
[...]
.LBB0_19: # %do.body
.loc 1 0 8 is_stmt 0 # /usr/src/lib/msun/src/catrig.c:0:8
fstp %st(1)
.loc 1 318 2 is_stmt 1 # /usr/src/lib/msun/src/catrig.c:318:2
fld1
fadds tiny
fstps 168(%esp)
and for amd64:
.loc 1 314 8 is_stmt 1 # /usr/src/lib/msun/src/catrig.c:314:8
pxor %xmm2, %xmm2
.loc 1 314 13 is_stmt 0 # /usr/src/lib/msun/src/catrig.c:314:13
ucomisd %xmm2, %xmm4
jne .LBB0_15
jp .LBB0_15
.loc 1 0 13 # /usr/src/lib/msun/src/catrig.c:0:13
ucomisd %xmm2, %xmm3
jne .LBB0_15
jnp .LBB0_21
.LBB0_15: # %do.body
.loc 1 318 2 is_stmt 1 # /usr/src/lib/msun/src/catrig.c:318:2
movss tiny(%rip), %xmm2 # xmm2 = mem[0],zero,zero,zero
addss .LCPI0_2(%rip), %xmm2
.Ltmp11:
movss %xmm2, -16(%rbp)
E.g., these all look good, at least with regards to not optimizing out
the desired addition.
The only compiler I could find that does optimize everything away (at
least in the simplified test case), is the Intel compiler:
https://godbolt.org/g/g1UT2m
-Dimitry
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 194 bytes
Desc: Message signed with OpenPGP
URL: <http://lists.freebsd.org/pipermail/freebsd-numerics/attachments/20170513/6ccea7c2/attachment.sig>
More information about the freebsd-numerics
mailing list