svn commit: r252080 - in stable/9: contrib/gcc contrib/gcc/config/i386 contrib/gcc/doc gnu/usr.bin/cc/include
Pedro F. Giffuni
pfg at FreeBSD.org
Sat Jun 22 02:22:14 UTC 2013
Author: pfg
Date: Sat Jun 22 02:22:12 2013
New Revision: 252080
URL: http://svnweb.freebsd.org/changeset/base/252080
Log:
MFC r251212, r251480:
GCC: bring back experimental support for amdfam10/barcelona CPUs.
This is a revised subset of the support initially brought in in r236962
and later reverted. The collateral efects seem to have disappeared but
it is still recommended to set the CPUTYPE with caution.
While here also install AMD intrinsics header ammintrin.h
Reviewed by: jkim (ages ago)
Added:
stable/9/contrib/gcc/config/i386/ammintrin.h
- copied unchanged from r251212, head/contrib/gcc/config/i386/ammintrin.h
Modified:
stable/9/contrib/gcc/ChangeLog.gcc43
stable/9/contrib/gcc/config.gcc
stable/9/contrib/gcc/config/i386/athlon.md
stable/9/contrib/gcc/config/i386/driver-i386.c
stable/9/contrib/gcc/config/i386/emmintrin.h
stable/9/contrib/gcc/config/i386/i386.c
stable/9/contrib/gcc/config/i386/i386.h
stable/9/contrib/gcc/config/i386/i386.md
stable/9/contrib/gcc/config/i386/i386.opt
stable/9/contrib/gcc/config/i386/pmmintrin.h
stable/9/contrib/gcc/config/i386/sse.md
stable/9/contrib/gcc/config/i386/tmmintrin.h
stable/9/contrib/gcc/doc/extend.texi
stable/9/contrib/gcc/doc/invoke.texi
stable/9/gnu/usr.bin/cc/include/Makefile
Directory Properties:
stable/9/ (props changed)
stable/9/contrib/gcc/ (props changed)
stable/9/gnu/usr.bin/cc/include/ (props changed)
Modified: stable/9/contrib/gcc/ChangeLog.gcc43
==============================================================================
--- stable/9/contrib/gcc/ChangeLog.gcc43 Fri Jun 21 23:21:16 2013 (r252079)
+++ stable/9/contrib/gcc/ChangeLog.gcc43 Sat Jun 22 02:22:12 2013 (r252080)
@@ -1,3 +1,8 @@
+2007-05-01 Dwarakanath Rajagopal <dwarak.rajagopal at amd.com> (r124341)
+
+ * doc/invoke.texi: Fix typo, 'AMD Family 10h core' instead of
+ 'AMD Family 10 core'.
+
2007-05-01 Dwarakanath Rajagopal <dwarak.rajagopal at amd.com> (r124339)
* config/i386/i386.c (override_options): Accept k8-sse3, opteron-sse3
@@ -5,6 +10,12 @@
with SSE3 instruction set support.
* doc/invoke.texi: Likewise.
+2007-05-01 Dwarakanath Rajagopal <dwarak.rajagopal at amd.com> (r124330)
+
+ * config/i386/i386.c (override_options): Tuning 32-byte loop
+ alignment for amdfam10 architecture. Increasing the max loop
+ alignment to 24 bytes.
+
2007-04-12 Richard Guenther <rguenther at suse.de> (r123736)
PR tree-optimization/24689
@@ -21,6 +32,17 @@
* config/i386/i386.c (ix86_handle_option): Handle SSSE3.
+2007-03-28 Dwarakanath Rajagopal <dwarak.rajagopal at amd.com> (r123313)
+
+ * config.gcc: Accept barcelona as a variant of amdfam10.
+ * config/i386/i386.c (override_options): Likewise.
+ * doc/invoke.texi: Likewise.
+
+2007-02-09 Dwarakanath Rajagopal <dwarak.rajagopal at amd.com> (r121763)
+
+ * config/i386/driver-i386.c: Turn on -mtune=native for AMDFAM10.
+ (bit_SSE4a): New.
+
2007-02-08 Harsha Jagasia <harsha.jagasia at amd.com> (r121726)
* config/i386/xmmintrin.h: Make inclusion of emmintrin.h
@@ -38,6 +60,168 @@
* config/i386/i386.c (override_options): Set PTA_SSSE3 for core2.
+2007-02-05 Harsha Jagasia <harsha.jagasia at amd.com> (r121625)
+
+ * config/i386/athlon.md (athlon_fldxf_k8, athlon_fld_k8,
+ athlon_fstxf_k8, athlon_fst_k8, athlon_fist, athlon_fmov,
+ athlon_fadd_load, athlon_fadd_load_k8, athlon_fadd, athlon_fmul,
+ athlon_fmul_load, athlon_fmul_load_k8, athlon_fsgn,
+ athlon_fdiv_load, athlon_fdiv_load_k8, athlon_fdiv_k8,
+ athlon_fpspc_load, athlon_fpspc, athlon_fcmov_load,
+ athlon_fcmov_load_k8, athlon_fcmov_k8, athlon_fcomi_load_k8,
+ athlon_fcomi, athlon_fcom_load_k8, athlon_fcom): Added amdfam10.
+
+2007-02-05 Harsha Jagasia <harsha.jagasia at amd.com> (r121625)
+
+ * config/i386/i386.md (x86_sahf_1, cmpfp_i_mixed, cmpfp_i_sse,
+ cmpfp_i_i387, cmpfp_iu_mixed, cmpfp_iu_sse, cmpfp_iu_387,
+ swapsi, swaphi_1, swapqi_1, swapdi_rex64, fix_truncsfdi_sse,
+ fix_truncdfdi_sse, fix_truncsfsi_sse, fix_truncdfsi_sse,
+ x86_fldcw_1, floatsisf2_mixed, floatsisf2_sse, floatdisf2_mixed,
+ floatdisf2_sse, floatsidf2_mixed, floatsidf2_sse,
+ floatdidf2_mixed, floatdidf2_sse, muldi3_1_rex64, mulsi3_1,
+ mulsi3_1_zext, mulhi3_1, mulqi3_1, umulqihi3_1, mulqihi3_insn,
+ umulditi3_insn, umulsidi3_insn, mulditi3_insn, mulsidi3_insn,
+ umuldi3_highpart_rex64, umulsi3_highpart_insn,
+ umulsi3_highpart_zext, smuldi3_highpart_rex64,
+ smulsi3_highpart_insn, smulsi3_highpart_zext, x86_64_shld,
+ x86_shld_1, x86_64_shrd, sqrtsf2_mixed, sqrtsf2_sse,
+ sqrtsf2_i387, sqrtdf2_mixed, sqrtdf2_sse, sqrtdf2_i387,
+ sqrtextendsfdf2_i387, sqrtxf2, sqrtextendsfxf2_i387,
+ sqrtextenddfxf2_i387): Added amdfam10_decode.
+
+ * config/i386/athlon.md (athlon_idirect_amdfam10,
+ athlon_ivector_amdfam10, athlon_idirect_load_amdfam10,
+ athlon_ivector_load_amdfam10, athlon_idirect_both_amdfam10,
+ athlon_ivector_both_amdfam10, athlon_idirect_store_amdfam10,
+ athlon_ivector_store_amdfam10): New define_insn_reservation.
+ (athlon_idirect_loadmov, athlon_idirect_movstore): Added
+ amdfam10.
+
+2007-02-05 Harsha Jagasia <harsha.jagasia at amd.com> (r121625)
+
+ * config/i386/athlon.md (athlon_call_amdfam10,
+ athlon_pop_amdfam10, athlon_lea_amdfam10): New
+ define_insn_reservation.
+ (athlon_branch, athlon_push, athlon_leave_k8, athlon_imul_k8,
+ athlon_imul_k8_DI, athlon_imul_mem_k8, athlon_imul_mem_k8_DI,
+ athlon_idiv, athlon_idiv_mem, athlon_str): Added amdfam10.
+
+2007-02-05 Harsha Jagasia <harsha.jagasia at amd.com> (r121625)
+
+ * config/i386/athlon.md (athlon_sseld_amdfam10,
+ athlon_mmxld_amdfam10, athlon_ssest_amdfam10,
+ athlon_mmxssest_short_amdfam10): New define_insn_reservation.
+
+2007-02-05 Harsha Jagasia <harsha.jagasia at amd.com> (r121625)
+
+ * config/i386/athlon.md (athlon_sseins_amdfam10): New
+ define_insn_reservation.
+ * config/i386/i386.md (sseins): Added sseins to define_attr type
+ and define_attr unit.
+ * config/i386/sse.md: Set type attribute to sseins for insertq
+ and insertqi.
+
+2007-02-05 Harsha Jagasia <harsha.jagasia at amd.com> (r121625)
+
+ * config/i386/athlon.md (sselog_load_amdfam10, sselog_amdfam10,
+ ssecmpvector_load_amdfam10, ssecmpvector_amdfam10,
+ ssecomi_load_amdfam10, ssecomi_amdfam10,
+ sseaddvector_load_amdfam10, sseaddvector_amdfam10): New
+ define_insn_reservation.
+ (ssecmp_load_k8, ssecmp, sseadd_load_k8, seadd): Added amdfam10.
+
+2007-02-05 Harsha Jagasia <harsha.jagasia at amd.com> (r121625)
+
+ * config/i386/athlon.md (cvtss2sd_load_amdfam10,
+ cvtss2sd_amdfam10, cvtps2pd_load_amdfam10, cvtps2pd_amdfam10,
+ cvtsi2sd_load_amdfam10, cvtsi2ss_load_amdfam10,
+ cvtsi2sd_amdfam10, cvtsi2ss_amdfam10, cvtsd2ss_load_amdfam10,
+ cvtsd2ss_amdfam10, cvtpd2ps_load_amdfam10, cvtpd2ps_amdfam10,
+ cvtsX2si_load_amdfam10, cvtsX2si_amdfam10): New
+ define_insn_reservation.
+
+ * config/i386/sse.md (cvtsi2ss, cvtsi2ssq, cvtss2si,
+ cvtss2siq, cvttss2si, cvttss2siq, cvtsi2sd, cvtsi2sdq,
+ cvtsd2si, cvtsd2siq, cvttsd2si, cvttsd2siq,
+ cvtpd2dq, cvttpd2dq, cvtsd2ss, cvtss2sd,
+ cvtpd2ps, cvtps2pd): Added amdfam10_decode attribute.
+
+2007-02-05 Harsha Jagasia <harsha.jagasia at amd.com> (r121625)
+
+ * config/i386/athlon.md (athlon_ssedivvector_amdfam10,
+ athlon_ssedivvector_load_amdfam10, athlon_ssemulvector_amdfam10,
+ athlon_ssemulvector_load_amdfam10): New define_insn_reservation.
+ (athlon_ssediv, athlon_ssediv_load_k8, athlon_ssemul,
+ athlon_ssemul_load_k8): Added amdfam10.
+
+2007-02-05 Harsha Jagasia <harsha.jagasia at amd.com> (r121625)
+
+ * config/i386/i386.h (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL): New macro.
+ (x86_sse_unaligned_move_optimal): New variable.
+
+ * config/i386/i386.c (x86_sse_unaligned_move_optimal): Enable for
+ m_AMDFAM10.
+ (ix86_expand_vector_move_misalign): Add code to generate movupd/movups
+ for unaligned vector SSE double/single precision loads for AMDFAM10.
+
+2007-02-05 Harsha Jagasia <harsha.jagasia at amd.com> (r121625)
+
+ * config/i386/i386.h (TARGET_AMDFAM10): New macro.
+ (TARGET_CPU_CPP_BUILTINS): Add code for amdfam10.
+ Define TARGET_CPU_DEFAULT_amdfam10.
+ (TARGET_CPU_DEFAULT_NAMES): Add amdfam10.
+ (processor_type): Add PROCESSOR_AMDFAM10.
+
+ * config/i386/i386.md: Add amdfam10 as a new cpu attribute to match
+ processor_type in config/i386/i386.h.
+ Enable imul peepholes for TARGET_AMDFAM10.
+
+ * config.gcc: Add support for --with-cpu option for amdfam10.
+
+ * config/i386/i386.c (amdfam10_cost): New variable.
+ (m_AMDFAM10): New macro.
+ (m_ATHLON_K8_AMDFAM10): New macro.
+ (x86_use_leave, x86_push_memory, x86_movx, x86_unroll_strlen,
+ x86_cmove, x86_3dnow_a, x86_deep_branch, x86_use_simode_fiop,
+ x86_promote_QImode, x86_integer_DFmode_moves,
+ x86_partial_reg_dependency, x86_memory_mismatch_stall,
+ x86_accumulate_outgoing_args, x86_arch_always_fancy_math_387,
+ x86_sse_partial_reg_dependency, x86_sse_typeless_stores,
+ x86_use_ffreep, x86_use_incdec, x86_four_jump_limit,
+ x86_schedule, x86_use_bt, x86_cmpxchg16b, x86_pad_returns):
+ Enable/disable for amdfam10.
+ (override_options): Add amdfam10_cost to processor_target_table.
+ Set up PROCESSOR_AMDFAM10 for amdfam10 entry in
+ processor_alias_table.
+ (ix86_issue_rate): Add PROCESSOR_AMDFAM10.
+ (ix86_adjust_cost): Add code for amdfam10.
+
+2007-02-05 Harsha Jagasia <harsha.jagasia at amd.com> (r121625)
+
+ * config/i386/i386.opt: Add new Advanced Bit Manipulation (-mabm)
+ instruction set feature flag. Add new (-mpopcnt) flag for popcnt
+ instruction. Add new SSE4A (-msse4a) instruction set feature flag.
+ * config/i386/i386.h: Add builtin definition for SSE4A.
+ * config/i386/i386.md: Add support for ABM instructions
+ (popcnt and lzcnt).
+ * config/i386/sse.md: Add support for SSE4A instructions
+ (movntss, movntsd, extrq, insertq).
+ * config/i386/i386.c: Add support for ABM and SSE4A builtins.
+ Add -march=amdfam10 flag.
+ * config/i386/ammintrin.h: Add support for SSE4A intrinsics.
+ * doc/invoke.texi: Add documentation on flags for sse4a, abm, popcnt
+ and amdfam10.
+ * doc/extend.texi: Add documentation for SSE4A builtins.
+
+2007-01-24 Jakub Jelinek <jakub at redhat.com> (r121140)
+
+ * config/i386/i386.h (x86_cmpxchg16b): Remove const.
+ (TARGET_CMPXCHG16B): Define to x86_cmpxchg16b.
+ * config/i386/i386.c (x86_cmpxchg16b): Remove const.
+ (override_options): Add PTA_CX16 flag. Set x86_cmpxchg16b
+ for CPUs that have PTA_CX16 set.
+
2007-01-17 Eric Christopher <echristo at apple.com> (r120846)
* config.gcc: Support core2 processor.
@@ -47,6 +231,11 @@
PR target/30040
* config/i386/driver-i386.c (bit_SSSE3): New.
+2006-11-27 Uros Bizjak <ubizjak at gmail.com> (r119260)
+
+ * config/i386/i386.c (x86_ext_80387_constants): Add m_K8, m_CORE2
+ and m_GENERIC64.
+
2006-11-18 Vladimir Makarov <vmakarov at redhat.com> (r118973)
* doc/invoke.texi (core2): Add item.
@@ -182,7 +371,7 @@
* doc/invoke.texi: Document -mssse3/-mno-ssse3 switches.
-2006-10-22 H.J. Lu <hongjiu.lu at intel.com>
+2006-10-22 H.J. Lu <hongjiu.lu at intel.com> (r117959)
* config/i386/tmmintrin.h: Remove the duplicated content.
Modified: stable/9/contrib/gcc/config.gcc
==============================================================================
--- stable/9/contrib/gcc/config.gcc Fri Jun 21 23:21:16 2013 (r252079)
+++ stable/9/contrib/gcc/config.gcc Sat Jun 22 02:22:12 2013 (r252080)
@@ -269,12 +269,12 @@ xscale-*-*)
i[34567]86-*-*)
cpu_type=i386
extra_headers="mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h
- pmmintrin.h tmmintrin.h"
+ pmmintrin.h tmmintrin.h ammintrin.h"
;;
x86_64-*-*)
cpu_type=i386
extra_headers="mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h
- pmmintrin.h tmmintrin.h"
+ pmmintrin.h tmmintrin.h ammintrin.h"
need_64bit_hwint=yes
;;
ia64-*-*)
@@ -1209,14 +1209,14 @@ i[34567]86-*-solaris2*)
# FIXME: -m64 for i[34567]86-*-* should be allowed just
# like -m32 for x86_64-*-*.
case X"${with_cpu}" in
- Xgeneric|Xcore2|Xnocona|Xx86-64|Xk8|Xopteron|Xathlon64|Xathlon-fx)
+ Xgeneric|Xcore2|Xnocona|Xx86-64|Xamdfam10|Xbarcelona|Xk8|Xopteron|Xathlon64|Xathlon-fx)
;;
X)
with_cpu=generic
;;
*)
echo "Unsupported CPU used in --with-cpu=$with_cpu, supported values:" 1>&2
- echo "generic core2 nocona x86-64 k8 opteron athlon64 athlon-fx" 1>&2
+ echo "generic core2 nocona x86-64 amdfam10 barcelona k8 opteron athlon64 athlon-fx" 1>&2
exit 1
;;
esac
@@ -2515,6 +2515,9 @@ if test x$with_cpu = x ; then
;;
i686-*-* | i786-*-*)
case ${target_noncanonical} in
+ amdfam10-*|barcelona-*)
+ with_cpu=amdfam10
+ ;;
k8-*|opteron-*|athlon_64-*)
with_cpu=k8
;;
@@ -2555,6 +2558,9 @@ if test x$with_cpu = x ; then
;;
x86_64-*-*)
case ${target_noncanonical} in
+ amdfam10-*|barcelona-*)
+ with_cpu=amdfam10
+ ;;
k8-*|opteron-*|athlon_64-*)
with_cpu=k8
;;
@@ -2795,7 +2801,7 @@ case "${target}" in
esac
# OK
;;
- "" | k8 | opteron | athlon64 | athlon-fx | nocona | core2 | generic)
+ "" | amdfam10 | barcelona | k8 | opteron | athlon64 | athlon-fx | nocona | core2 | generic)
# OK
;;
*)
Copied: stable/9/contrib/gcc/config/i386/ammintrin.h (from r251212, head/contrib/gcc/config/i386/ammintrin.h)
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ stable/9/contrib/gcc/config/i386/ammintrin.h Sat Jun 22 02:22:12 2013 (r252080, copy of r251212, head/contrib/gcc/config/i386/ammintrin.h)
@@ -0,0 +1,73 @@
+/* Copyright (C) 2007 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING. If not, write to
+ the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+ Boston, MA 02110-1301, USA. */
+
+/* As a special exception, if you include this header file into source
+ files compiled by GCC, this header file does not by itself cause
+ the resulting executable to be covered by the GNU General Public
+ License. This exception does not however invalidate any other
+ reasons why the executable file might be covered by the GNU General
+ Public License. */
+
+/* Implemented from the specification included in the AMD Programmers
+ Manual Update, version 2.x */
+
+#ifndef _AMMINTRIN_H_INCLUDED
+#define _AMMINTRIN_H_INCLUDED
+
+#ifndef __SSE4A__
+# error "SSE4A instruction set not enabled"
+#else
+
+/* We need definitions from the SSE3, SSE2 and SSE header files*/
+#include <pmmintrin.h>
+
+static __inline void __attribute__((__always_inline__))
+_mm_stream_sd (double * __P, __m128d __Y)
+{
+ __builtin_ia32_movntsd (__P, (__v2df) __Y);
+}
+
+static __inline void __attribute__((__always_inline__))
+_mm_stream_ss (float * __P, __m128 __Y)
+{
+ __builtin_ia32_movntss (__P, (__v4sf) __Y);
+}
+
+static __inline __m128i __attribute__((__always_inline__))
+_mm_extract_si64 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_extrq ((__v2di) __X, (__v16qi) __Y);
+}
+
+#define _mm_extracti_si64(X, I, L) \
+((__m128i) __builtin_ia32_extrqi ((__v2di)(X), I, L))
+
+static __inline __m128i __attribute__((__always_inline__))
+_mm_insert_si64 (__m128i __X,__m128i __Y)
+{
+ return (__m128i) __builtin_ia32_insertq ((__v2di)__X, (__v2di)__Y);
+}
+
+#define _mm_inserti_si64(X, Y, I, L) \
+((__m128i) __builtin_ia32_insertqi ((__v2di)(X), (__v2di)(Y), I, L))
+
+
+#endif /* __SSE4A__ */
+
+#endif /* _AMMINTRIN_H_INCLUDED */
Modified: stable/9/contrib/gcc/config/i386/athlon.md
==============================================================================
--- stable/9/contrib/gcc/config/i386/athlon.md Fri Jun 21 23:21:16 2013 (r252079)
+++ stable/9/contrib/gcc/config/i386/athlon.md Sat Jun 22 02:22:12 2013 (r252080)
@@ -29,6 +29,8 @@
(const_string "vector")]
(const_string "direct")))
+(define_attr "amdfam10_decode" "direct,vector,double"
+ (const_string "direct"))
;;
;; decode0 decode1 decode2
;; \ | /
@@ -131,18 +133,22 @@
;; Jump instructions are executed in the branch unit completely transparent to us
(define_insn_reservation "athlon_branch" 0
- (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
(eq_attr "type" "ibr"))
"athlon-direct,athlon-ieu")
(define_insn_reservation "athlon_call" 0
(and (eq_attr "cpu" "athlon,k8,generic64")
(eq_attr "type" "call,callv"))
"athlon-vector,athlon-ieu")
+(define_insn_reservation "athlon_call_amdfam10" 0
+ (and (eq_attr "cpu" "amdfam10")
+ (eq_attr "type" "call,callv"))
+ "athlon-double,athlon-ieu")
;; Latency of push operation is 3 cycles, but ESP value is available
;; earlier
(define_insn_reservation "athlon_push" 2
- (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
(eq_attr "type" "push"))
"athlon-direct,athlon-agu,athlon-store")
(define_insn_reservation "athlon_pop" 4
@@ -153,12 +159,16 @@
(and (eq_attr "cpu" "k8,generic64")
(eq_attr "type" "pop"))
"athlon-double,(athlon-ieu+athlon-load)")
+(define_insn_reservation "athlon_pop_amdfam10" 3
+ (and (eq_attr "cpu" "amdfam10")
+ (eq_attr "type" "pop"))
+ "athlon-direct,(athlon-ieu+athlon-load)")
(define_insn_reservation "athlon_leave" 3
(and (eq_attr "cpu" "athlon")
(eq_attr "type" "leave"))
"athlon-vector,(athlon-ieu+athlon-load)")
(define_insn_reservation "athlon_leave_k8" 3
- (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "cpu" "k8,generic64,amdfam10")
(eq_attr "type" "leave"))
"athlon-double,(athlon-ieu+athlon-load)")
@@ -167,6 +177,11 @@
(and (eq_attr "cpu" "athlon,k8,generic64")
(eq_attr "type" "lea"))
"athlon-direct,athlon-agu,nothing")
+;; Lea executes in AGU unit with 1 cycle latency on AMDFAM10
+(define_insn_reservation "athlon_lea_amdfam10" 1
+ (and (eq_attr "cpu" "amdfam10")
+ (eq_attr "type" "lea"))
+ "athlon-direct,athlon-agu,nothing")
;; Mul executes in special multiplier unit attached to IEU0
(define_insn_reservation "athlon_imul" 5
@@ -176,29 +191,35 @@
"athlon-vector,athlon-ieu0,athlon-mult,nothing,nothing,athlon-ieu0")
;; ??? Widening multiply is vector or double.
(define_insn_reservation "athlon_imul_k8_DI" 4
- (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "cpu" "k8,generic64,amdfam10")
(and (eq_attr "type" "imul")
(and (eq_attr "mode" "DI")
(eq_attr "memory" "none,unknown"))))
"athlon-direct0,athlon-ieu0,athlon-mult,nothing,athlon-ieu0")
(define_insn_reservation "athlon_imul_k8" 3
- (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "cpu" "k8,generic64,amdfam10")
(and (eq_attr "type" "imul")
(eq_attr "memory" "none,unknown")))
"athlon-direct0,athlon-ieu0,athlon-mult,athlon-ieu0")
+(define_insn_reservation "athlon_imul_amdfam10_HI" 4
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "type" "imul")
+ (and (eq_attr "mode" "HI")
+ (eq_attr "memory" "none,unknown"))))
+ "athlon-vector,athlon-ieu0,athlon-mult,nothing,athlon-ieu0")
(define_insn_reservation "athlon_imul_mem" 8
(and (eq_attr "cpu" "athlon")
(and (eq_attr "type" "imul")
(eq_attr "memory" "load,both")))
"athlon-vector,athlon-load,athlon-ieu,athlon-mult,nothing,nothing,athlon-ieu")
(define_insn_reservation "athlon_imul_mem_k8_DI" 7
- (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "cpu" "k8,generic64,amdfam10")
(and (eq_attr "type" "imul")
(and (eq_attr "mode" "DI")
(eq_attr "memory" "load,both"))))
"athlon-vector,athlon-load,athlon-ieu,athlon-mult,nothing,athlon-ieu")
(define_insn_reservation "athlon_imul_mem_k8" 6
- (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "cpu" "k8,generic64,amdfam10")
(and (eq_attr "type" "imul")
(eq_attr "memory" "load,both")))
"athlon-vector,athlon-load,athlon-ieu,athlon-mult,athlon-ieu")
@@ -209,21 +230,23 @@
;; other instructions.
;; ??? Experiments show that the idiv can overlap with roughly 6 cycles
;; of the other code
+;; Using the same heuristics for amdfam10 as K8 with idiv
(define_insn_reservation "athlon_idiv" 6
- (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
(and (eq_attr "type" "idiv")
(eq_attr "memory" "none,unknown")))
"athlon-vector,(athlon-ieu0*6+(athlon-fpsched,athlon-fvector))")
(define_insn_reservation "athlon_idiv_mem" 9
- (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
(and (eq_attr "type" "idiv")
(eq_attr "memory" "load,both")))
"athlon-vector,((athlon-load,athlon-ieu0*6)+(athlon-fpsched,athlon-fvector))")
;; The parallelism of string instructions is not documented. Model it same way
;; as idiv to create smaller automata. This probably does not matter much.
+;; Using the same heuristics for amdfam10 as K8 with idiv
(define_insn_reservation "athlon_str" 6
- (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
(and (eq_attr "type" "str")
(eq_attr "memory" "load,both,store")))
"athlon-vector,athlon-load,athlon-ieu0*6")
@@ -234,34 +257,62 @@
(and (eq_attr "unit" "integer,unknown")
(eq_attr "memory" "none,unknown"))))
"athlon-direct,athlon-ieu")
+(define_insn_reservation "athlon_idirect_amdfam10" 1
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "amdfam10_decode" "direct")
+ (and (eq_attr "unit" "integer,unknown")
+ (eq_attr "memory" "none,unknown"))))
+ "athlon-direct,athlon-ieu")
(define_insn_reservation "athlon_ivector" 2
(and (eq_attr "cpu" "athlon,k8,generic64")
(and (eq_attr "athlon_decode" "vector")
(and (eq_attr "unit" "integer,unknown")
(eq_attr "memory" "none,unknown"))))
"athlon-vector,athlon-ieu,athlon-ieu")
+(define_insn_reservation "athlon_ivector_amdfam10" 2
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "amdfam10_decode" "vector")
+ (and (eq_attr "unit" "integer,unknown")
+ (eq_attr "memory" "none,unknown"))))
+ "athlon-vector,athlon-ieu,athlon-ieu")
+
(define_insn_reservation "athlon_idirect_loadmov" 3
- (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
(and (eq_attr "type" "imov")
(eq_attr "memory" "load")))
"athlon-direct,athlon-load")
+
(define_insn_reservation "athlon_idirect_load" 4
(and (eq_attr "cpu" "athlon,k8,generic64")
(and (eq_attr "athlon_decode" "direct")
(and (eq_attr "unit" "integer,unknown")
(eq_attr "memory" "load"))))
"athlon-direct,athlon-load,athlon-ieu")
+(define_insn_reservation "athlon_idirect_load_amdfam10" 4
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "amdfam10_decode" "direct")
+ (and (eq_attr "unit" "integer,unknown")
+ (eq_attr "memory" "load"))))
+ "athlon-direct,athlon-load,athlon-ieu")
(define_insn_reservation "athlon_ivector_load" 6
(and (eq_attr "cpu" "athlon,k8,generic64")
(and (eq_attr "athlon_decode" "vector")
(and (eq_attr "unit" "integer,unknown")
(eq_attr "memory" "load"))))
"athlon-vector,athlon-load,athlon-ieu,athlon-ieu")
+(define_insn_reservation "athlon_ivector_load_amdfam10" 6
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "amdfam10_decode" "vector")
+ (and (eq_attr "unit" "integer,unknown")
+ (eq_attr "memory" "load"))))
+ "athlon-vector,athlon-load,athlon-ieu,athlon-ieu")
+
(define_insn_reservation "athlon_idirect_movstore" 1
- (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
(and (eq_attr "type" "imov")
(eq_attr "memory" "store")))
"athlon-direct,athlon-agu,athlon-store")
+
(define_insn_reservation "athlon_idirect_both" 4
(and (eq_attr "cpu" "athlon,k8,generic64")
(and (eq_attr "athlon_decode" "direct")
@@ -270,6 +321,15 @@
"athlon-direct,athlon-load,
athlon-ieu,athlon-store,
athlon-store")
+(define_insn_reservation "athlon_idirect_both_amdfam10" 4
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "amdfam10_decode" "direct")
+ (and (eq_attr "unit" "integer,unknown")
+ (eq_attr "memory" "both"))))
+ "athlon-direct,athlon-load,
+ athlon-ieu,athlon-store,
+ athlon-store")
+
(define_insn_reservation "athlon_ivector_both" 6
(and (eq_attr "cpu" "athlon,k8,generic64")
(and (eq_attr "athlon_decode" "vector")
@@ -279,6 +339,16 @@
athlon-ieu,
athlon-ieu,
athlon-store")
+(define_insn_reservation "athlon_ivector_both_amdfam10" 6
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "amdfam10_decode" "vector")
+ (and (eq_attr "unit" "integer,unknown")
+ (eq_attr "memory" "both"))))
+ "athlon-vector,athlon-load,
+ athlon-ieu,
+ athlon-ieu,
+ athlon-store")
+
(define_insn_reservation "athlon_idirect_store" 1
(and (eq_attr "cpu" "athlon,k8,generic64")
(and (eq_attr "athlon_decode" "direct")
@@ -286,6 +356,14 @@
(eq_attr "memory" "store"))))
"athlon-direct,(athlon-ieu+athlon-agu),
athlon-store")
+(define_insn_reservation "athlon_idirect_store_amdfam10" 1
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "amdfam10_decode" "direct")
+ (and (eq_attr "unit" "integer,unknown")
+ (eq_attr "memory" "store"))))
+ "athlon-direct,(athlon-ieu+athlon-agu),
+ athlon-store")
+
(define_insn_reservation "athlon_ivector_store" 2
(and (eq_attr "cpu" "athlon,k8,generic64")
(and (eq_attr "athlon_decode" "vector")
@@ -293,6 +371,13 @@
(eq_attr "memory" "store"))))
"athlon-vector,(athlon-ieu+athlon-agu),athlon-ieu,
athlon-store")
+(define_insn_reservation "athlon_ivector_store_amdfam10" 2
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "amdfam10_decode" "vector")
+ (and (eq_attr "unit" "integer,unknown")
+ (eq_attr "memory" "store"))))
+ "athlon-vector,(athlon-ieu+athlon-agu),athlon-ieu,
+ athlon-store")
;; Athlon floatin point unit
(define_insn_reservation "athlon_fldxf" 12
@@ -302,7 +387,7 @@
(eq_attr "mode" "XF"))))
"athlon-vector,athlon-fpload2,athlon-fvector*9")
(define_insn_reservation "athlon_fldxf_k8" 13
- (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "cpu" "k8,generic64,amdfam10")
(and (eq_attr "type" "fmov")
(and (eq_attr "memory" "load")
(eq_attr "mode" "XF"))))
@@ -314,7 +399,7 @@
(eq_attr "memory" "load")))
"athlon-direct,athlon-fpload,athlon-fany")
(define_insn_reservation "athlon_fld_k8" 2
- (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "cpu" "k8,generic64,amdfam10")
(and (eq_attr "type" "fmov")
(eq_attr "memory" "load")))
"athlon-direct,athlon-fploadk8,athlon-fstore")
@@ -326,7 +411,7 @@
(eq_attr "mode" "XF"))))
"athlon-vector,(athlon-fpsched+athlon-agu),(athlon-store2+(athlon-fvector*7))")
(define_insn_reservation "athlon_fstxf_k8" 8
- (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "cpu" "k8,generic64,amdfam10")
(and (eq_attr "type" "fmov")
(and (eq_attr "memory" "store,both")
(eq_attr "mode" "XF"))))
@@ -337,16 +422,16 @@
(eq_attr "memory" "store,both")))
"athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
(define_insn_reservation "athlon_fst_k8" 2
- (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "cpu" "k8,generic64,amdfam10")
(and (eq_attr "type" "fmov")
(eq_attr "memory" "store,both")))
"athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
(define_insn_reservation "athlon_fist" 4
- (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
(eq_attr "type" "fistp,fisttp"))
"athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
(define_insn_reservation "athlon_fmov" 2
- (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
(eq_attr "type" "fmov"))
"athlon-direct,athlon-fpsched,athlon-faddmul")
(define_insn_reservation "athlon_fadd_load" 4
@@ -355,12 +440,12 @@
(eq_attr "memory" "load")))
"athlon-direct,athlon-fpload,athlon-fadd")
(define_insn_reservation "athlon_fadd_load_k8" 6
- (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "cpu" "k8,generic64,amdfam10")
(and (eq_attr "type" "fop")
(eq_attr "memory" "load")))
"athlon-direct,athlon-fploadk8,athlon-fadd")
(define_insn_reservation "athlon_fadd" 4
- (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
(eq_attr "type" "fop"))
"athlon-direct,athlon-fpsched,athlon-fadd")
(define_insn_reservation "athlon_fmul_load" 4
@@ -369,16 +454,16 @@
(eq_attr "memory" "load")))
"athlon-direct,athlon-fpload,athlon-fmul")
(define_insn_reservation "athlon_fmul_load_k8" 6
- (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "cpu" "k8,generic64,amdfam10")
(and (eq_attr "type" "fmul")
(eq_attr "memory" "load")))
"athlon-direct,athlon-fploadk8,athlon-fmul")
(define_insn_reservation "athlon_fmul" 4
- (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
(eq_attr "type" "fmul"))
"athlon-direct,athlon-fpsched,athlon-fmul")
(define_insn_reservation "athlon_fsgn" 2
- (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
(eq_attr "type" "fsgn"))
"athlon-direct,athlon-fpsched,athlon-fmul")
(define_insn_reservation "athlon_fdiv_load" 24
@@ -387,7 +472,7 @@
(eq_attr "memory" "load")))
"athlon-direct,athlon-fpload,athlon-fmul")
(define_insn_reservation "athlon_fdiv_load_k8" 13
- (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "cpu" "k8,generic64,amdfam10")
(and (eq_attr "type" "fdiv")
(eq_attr "memory" "load")))
"athlon-direct,athlon-fploadk8,athlon-fmul")
@@ -396,16 +481,16 @@
(eq_attr "type" "fdiv"))
"athlon-direct,athlon-fpsched,athlon-fmul")
(define_insn_reservation "athlon_fdiv_k8" 11
- (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "cpu" "k8,generic64,amdfam10")
(eq_attr "type" "fdiv"))
"athlon-direct,athlon-fpsched,athlon-fmul")
(define_insn_reservation "athlon_fpspc_load" 103
- (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
(and (eq_attr "type" "fpspc")
(eq_attr "memory" "load")))
"athlon-vector,athlon-fpload,athlon-fvector")
(define_insn_reservation "athlon_fpspc" 100
- (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
(eq_attr "type" "fpspc"))
"athlon-vector,athlon-fpsched,athlon-fvector")
(define_insn_reservation "athlon_fcmov_load" 7
@@ -418,12 +503,12 @@
(eq_attr "type" "fcmov"))
"athlon-vector,athlon-fpsched,athlon-fvector")
(define_insn_reservation "athlon_fcmov_load_k8" 17
- (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "cpu" "k8,generic64,amdfam10")
(and (eq_attr "type" "fcmov")
(eq_attr "memory" "load")))
"athlon-vector,athlon-fploadk8,athlon-fvector")
(define_insn_reservation "athlon_fcmov_k8" 15
- (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "cpu" "k8,generic64,amdfam10")
(eq_attr "type" "fcmov"))
"athlon-vector,athlon-fpsched,athlon-fvector")
;; fcomi is vector decoded by uses only one pipe.
@@ -434,13 +519,13 @@
(eq_attr "memory" "load"))))
"athlon-vector,athlon-fpload,athlon-fadd")
(define_insn_reservation "athlon_fcomi_load_k8" 5
- (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "cpu" "k8,generic64,amdfam10")
(and (eq_attr "type" "fcmp")
(and (eq_attr "athlon_decode" "vector")
(eq_attr "memory" "load"))))
"athlon-vector,athlon-fploadk8,athlon-fadd")
(define_insn_reservation "athlon_fcomi" 3
- (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
(and (eq_attr "athlon_decode" "vector")
(eq_attr "type" "fcmp")))
"athlon-vector,athlon-fpsched,athlon-fadd")
@@ -450,18 +535,18 @@
(eq_attr "memory" "load")))
"athlon-direct,athlon-fpload,athlon-fadd")
(define_insn_reservation "athlon_fcom_load_k8" 4
- (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "cpu" "k8,generic64,amdfam10")
(and (eq_attr "type" "fcmp")
(eq_attr "memory" "load")))
"athlon-direct,athlon-fploadk8,athlon-fadd")
(define_insn_reservation "athlon_fcom" 2
- (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
(eq_attr "type" "fcmp"))
"athlon-direct,athlon-fpsched,athlon-fadd")
;; Never seen by the scheduler because we still don't do post reg-stack
;; scheduling.
;(define_insn_reservation "athlon_fxch" 2
-; (and (eq_attr "cpu" "athlon,k8,generic64")
+; (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
; (eq_attr "type" "fxch"))
; "athlon-direct,athlon-fpsched,athlon-fany")
@@ -516,6 +601,23 @@
(and (eq_attr "type" "mmxmov,ssemov")
(eq_attr "memory" "load")))
"athlon-direct,athlon-fploadk8,athlon-fstore")
+;; On AMDFAM10 all double, single and integer packed and scalar SSEx data
+;; loads generated are direct path, latency of 2 and do not use any FP
+;; executions units. No seperate entries for movlpx/movhpx loads, which
+;; are direct path, latency of 4 and use the FADD/FMUL FP execution units,
+;; as they will not be generated.
+(define_insn_reservation "athlon_sseld_amdfam10" 2
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "type" "ssemov")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-fploadk8")
+;; On AMDFAM10 MMX data loads generated are direct path, latency of 4
+;; and can use any FP executions units
+(define_insn_reservation "athlon_mmxld_amdfam10" 4
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "type" "mmxmov")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-fploadk8, athlon-fany")
(define_insn_reservation "athlon_mmxssest" 3
(and (eq_attr "cpu" "k8,generic64")
(and (eq_attr "type" "mmxmov,ssemov")
@@ -533,6 +635,25 @@
(and (eq_attr "type" "mmxmov,ssemov")
(eq_attr "memory" "store,both")))
"athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
+;; On AMDFAM10 all double, single and integer packed SSEx data stores
+;; generated are all double path, latency of 2 and use the FSTORE FP
+;; execution unit. No entries seperate for movupx/movdqu, which are
+;; vector path, latency of 3 and use the FSTORE*2 FP execution unit,
+;; as they will not be generated.
+(define_insn_reservation "athlon_ssest_amdfam10" 2
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "type" "ssemov")
+ (and (eq_attr "mode" "V4SF,V2DF,TI")
+ (eq_attr "memory" "store,both"))))
+ "athlon-double,(athlon-fpsched+athlon-agu),((athlon-fstore+athlon-store)*2)")
+;; On AMDFAM10 all double, single and integer scalar SSEx and MMX
+;; data stores generated are all direct path, latency of 2 and use
+;; the FSTORE FP execution unit
+(define_insn_reservation "athlon_mmxssest_short_amdfam10" 2
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "type" "mmxmov,ssemov")
+ (eq_attr "memory" "store,both")))
+ "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
(define_insn_reservation "athlon_movaps_k8" 2
(and (eq_attr "cpu" "k8,generic64")
(and (eq_attr "type" "ssemov")
@@ -578,6 +699,11 @@
(and (eq_attr "type" "sselog,sselog1")
(eq_attr "memory" "load")))
"athlon-double,athlon-fpload2k8,(athlon-fmul*2)")
+(define_insn_reservation "athlon_sselog_load_amdfam10" 4
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "type" "sselog,sselog1")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-fploadk8,(athlon-fadd|athlon-fmul)")
(define_insn_reservation "athlon_sselog" 3
(and (eq_attr "cpu" "athlon")
(eq_attr "type" "sselog,sselog1"))
@@ -586,6 +712,11 @@
(and (eq_attr "cpu" "k8,generic64")
(eq_attr "type" "sselog,sselog1"))
"athlon-double,athlon-fpsched,athlon-fmul")
+(define_insn_reservation "athlon_sselog_amdfam10" 2
+ (and (eq_attr "cpu" "amdfam10")
+ (eq_attr "type" "sselog,sselog1"))
+ "athlon-direct,athlon-fpsched,(athlon-fadd|athlon-fmul)")
+
;; ??? pcmp executes in addmul, probably not worthwhile to bother about that.
(define_insn_reservation "athlon_ssecmp_load" 2
(and (eq_attr "cpu" "athlon")
@@ -594,13 +725,13 @@
(eq_attr "memory" "load"))))
"athlon-direct,athlon-fpload,athlon-fadd")
(define_insn_reservation "athlon_ssecmp_load_k8" 4
- (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "cpu" "k8,generic64,amdfam10")
(and (eq_attr "type" "ssecmp")
(and (eq_attr "mode" "SF,DF,DI,TI")
(eq_attr "memory" "load"))))
"athlon-direct,athlon-fploadk8,athlon-fadd")
(define_insn_reservation "athlon_ssecmp" 2
- (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
(and (eq_attr "type" "ssecmp")
(eq_attr "mode" "SF,DF,DI,TI")))
"athlon-direct,athlon-fpsched,athlon-fadd")
@@ -614,6 +745,11 @@
(and (eq_attr "type" "ssecmp")
(eq_attr "memory" "load")))
"athlon-double,athlon-fpload2k8,(athlon-fadd*2)")
+(define_insn_reservation "athlon_ssecmpvector_load_amdfam10" 4
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "type" "ssecmp")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-fploadk8,athlon-fadd")
(define_insn_reservation "athlon_ssecmpvector" 3
(and (eq_attr "cpu" "athlon")
(eq_attr "type" "ssecmp"))
@@ -622,6 +758,10 @@
(and (eq_attr "cpu" "k8,generic64")
(eq_attr "type" "ssecmp"))
"athlon-double,athlon-fpsched,(athlon-fadd*2)")
+(define_insn_reservation "athlon_ssecmpvector_amdfam10" 2
+ (and (eq_attr "cpu" "amdfam10")
+ (eq_attr "type" "ssecmp"))
+ "athlon-direct,athlon-fpsched,athlon-fadd")
(define_insn_reservation "athlon_ssecomi_load" 4
(and (eq_attr "cpu" "athlon")
(and (eq_attr "type" "ssecomi")
@@ -632,10 +772,20 @@
(and (eq_attr "type" "ssecomi")
(eq_attr "memory" "load")))
"athlon-vector,athlon-fploadk8,athlon-fadd")
+(define_insn_reservation "athlon_ssecomi_load_amdfam10" 5
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "type" "ssecomi")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-fploadk8,athlon-fadd")
(define_insn_reservation "athlon_ssecomi" 4
(and (eq_attr "cpu" "athlon,k8,generic64")
(eq_attr "type" "ssecmp"))
"athlon-vector,athlon-fpsched,athlon-fadd")
+(define_insn_reservation "athlon_ssecomi_amdfam10" 3
+ (and (eq_attr "cpu" "amdfam10")
+;; It seems athlon_ssecomi has a bug in the attr_type, fixed for amdfam10
+ (eq_attr "type" "ssecomi"))
+ "athlon-direct,athlon-fpsched,athlon-fadd")
(define_insn_reservation "athlon_sseadd_load" 4
(and (eq_attr "cpu" "athlon")
(and (eq_attr "type" "sseadd")
@@ -643,13 +793,13 @@
(eq_attr "memory" "load"))))
"athlon-direct,athlon-fpload,athlon-fadd")
(define_insn_reservation "athlon_sseadd_load_k8" 6
- (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "cpu" "k8,generic64,amdfam10")
(and (eq_attr "type" "sseadd")
(and (eq_attr "mode" "SF,DF,DI")
(eq_attr "memory" "load"))))
"athlon-direct,athlon-fploadk8,athlon-fadd")
(define_insn_reservation "athlon_sseadd" 4
- (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
(and (eq_attr "type" "sseadd")
(eq_attr "mode" "SF,DF,DI")))
"athlon-direct,athlon-fpsched,athlon-fadd")
@@ -663,6 +813,11 @@
(and (eq_attr "type" "sseadd")
(eq_attr "memory" "load")))
"athlon-double,athlon-fpload2k8,(athlon-fadd*2)")
+(define_insn_reservation "athlon_sseaddvector_load_amdfam10" 6
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "type" "sseadd")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-fploadk8,athlon-fadd")
(define_insn_reservation "athlon_sseaddvector" 5
(and (eq_attr "cpu" "athlon")
(eq_attr "type" "sseadd"))
@@ -671,6 +826,10 @@
(and (eq_attr "cpu" "k8,generic64")
(eq_attr "type" "sseadd"))
"athlon-double,athlon-fpsched,(athlon-fadd*2)")
+(define_insn_reservation "athlon_sseaddvector_amdfam10" 4
+ (and (eq_attr "cpu" "amdfam10")
+ (eq_attr "type" "sseadd"))
+ "athlon-direct,athlon-fpsched,athlon-fadd")
;; Conversions behaves very irregularly and the scheduling is critical here.
;; Take each instruction separately. Assume that the mode is always set to the
@@ -684,12 +843,25 @@
(and (eq_attr "mode" "DF")
(eq_attr "memory" "load")))))
"athlon-direct,athlon-fploadk8,athlon-fstore")
+(define_insn_reservation "athlon_ssecvt_cvtss2sd_load_amdfam10" 7
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "type" "ssecvt")
+ (and (eq_attr "amdfam10_decode" "double")
+ (and (eq_attr "mode" "DF")
+ (eq_attr "memory" "load")))))
+ "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
(define_insn_reservation "athlon_ssecvt_cvtss2sd" 2
(and (eq_attr "cpu" "athlon,k8,generic64")
(and (eq_attr "type" "ssecvt")
(and (eq_attr "athlon_decode" "direct")
(eq_attr "mode" "DF"))))
"athlon-direct,athlon-fpsched,athlon-fstore")
+(define_insn_reservation "athlon_ssecvt_cvtss2sd_amdfam10" 7
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "type" "ssecvt")
+ (and (eq_attr "amdfam10_decode" "vector")
+ (eq_attr "mode" "DF"))))
+ "athlon-vector,athlon-fpsched,athlon-faddmul,(athlon-fstore*2)")
;; cvtps2pd. Model same way the other double decoded FP conversions.
(define_insn_reservation "athlon_ssecvt_cvtps2pd_load_k8" 5
(and (eq_attr "cpu" "k8,athlon,generic64")
@@ -698,12 +870,25 @@
(and (eq_attr "mode" "V2DF,V4SF,TI")
(eq_attr "memory" "load")))))
"athlon-double,athlon-fpload2k8,(athlon-fstore*2)")
+(define_insn_reservation "athlon_ssecvt_cvtps2pd_load_amdfam10" 4
+ (and (eq_attr "cpu" "amdfam10")
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-stable
mailing list