!521 [Sync] sync commits from openEuler-24.03-LTS-SP1

From: @li-yancheng 
Reviewed-by: @huang-xiaoquan 
Signed-off-by: @huang-xiaoquan
This commit is contained in:
openeuler-ci-bot 2024-12-09 11:33:34 +00:00 committed by Gitee
commit 246220291d
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
244 changed files with 66762 additions and 699 deletions

View File

@ -0,0 +1,703 @@
From 64050ef082f7f3af78cc136c17c995d62cec14b5 Mon Sep 17 00:00:00 2001
From: swcompiler <lc@wxiat.com>
Date: Mon, 25 Nov 2024 16:25:58 +0800
Subject: [PATCH 01/16] Sw64 Port: add configure support for sw64
---
Makefile.in | 1 +
Makefile.tpl | 1 +
config.guess | 12 ++++
config.sub | 1 +
config/intdiv0.m4 | 2 +-
config/mt-sw_64ieee | 3 +
config/tcl.m4 | 6 ++
configure | 16 ++++-
configure.ac | 19 +++++-
contrib/config-list.mk | 1 +
gcc/config.gcc | 35 +++++++++++
gcc/config.host | 8 +++
gcc/config.in | 17 +++++
gcc/config/host-linux.cc | 2 +
gcc/configure | 131 ++++++++++++++++++++++++++++++++++++++-
gcc/configure.ac | 84 ++++++++++++++++++++++++-
gcc/doc/install.texi | 9 +++
17 files changed, 342 insertions(+), 6 deletions(-)
create mode 100644 config/mt-sw_64ieee
diff --git a/Makefile.in b/Makefile.in
index 7785b3d9a..283c16c4e 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -707,6 +707,7 @@ all:
@target_makefile_frag@
@alphaieee_frag@
@ospace_frag@
+@sw_64ieee_frag@
@host_makefile_frag@
###
diff --git a/Makefile.tpl b/Makefile.tpl
index ef58fac2b..d629bca8b 100644
--- a/Makefile.tpl
+++ b/Makefile.tpl
@@ -630,6 +630,7 @@ all:
@target_makefile_frag@
@alphaieee_frag@
@ospace_frag@
+@sw_64ieee_frag@
@host_makefile_frag@
###
diff --git a/config.guess b/config.guess
index 1972fda8e..0275a0ef8 100755
--- a/config.guess
+++ b/config.guess
@@ -1101,6 +1101,18 @@ EOF
sparc:Linux:*:* | sparc64:Linux:*:*)
echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
exit ;;
+ sw_64:Linux:*:*)
+ case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in
+ SW6) UNAME_MACHINE=sw_64sw6 ;;
+ SW6A) UNAME_MACHINE=sw_64sw6a ;;
+ SW6B) UNAME_MACHINE=sw_64sw6b ;;
+ SW8A) UNAME_MACHINE=sw_64sw8a ;;
+ SW) UNAME_MACHINE=sw_64 ;;
+ esac
+ objdump --private-headers /bin/sh | grep -q ld.so.1
+ if test "$?" = 0 ; then LIBC=gnulibc1 ; fi
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+ exit ;;
tile*:Linux:*:*)
echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
exit ;;
diff --git a/config.sub b/config.sub
index 38f3d037a..70ff2e6e4 100755
--- a/config.sub
+++ b/config.sub
@@ -1262,6 +1262,7 @@ case $cpu-$vendor in
| sparclite \
| sparcv8 | sparcv9 | sparcv9b | sparcv9v | sv1 | sx* \
| spu \
+ | sw_64 | sw_64sw6a | sw_64sw6b | sw_64sw8a \
| tahoe \
| thumbv7* \
| tic30 | tic4x | tic54x | tic55x | tic6x | tic80 \
diff --git a/config/intdiv0.m4 b/config/intdiv0.m4
index 55dddcf1c..bfe1bdcdf 100644
--- a/config/intdiv0.m4
+++ b/config/intdiv0.m4
@@ -56,7 +56,7 @@ int main ()
[
# Guess based on the CPU.
case "$host_cpu" in
- alpha* | i[34567]86 | m68k | s390*)
+ alpha* | i[34567]86 | m68k | s390* | sw_64*)
gt_cv_int_divbyzero_sigfpe="guessing yes";;
*)
gt_cv_int_divbyzero_sigfpe="guessing no";;
diff --git a/config/mt-sw_64ieee b/config/mt-sw_64ieee
new file mode 100644
index 000000000..80c17cdc6
--- /dev/null
+++ b/config/mt-sw_64ieee
@@ -0,0 +1,3 @@
+CFLAGS_FOR_TARGET += -mieee
+CXXFLAGS_FOR_TARGET += -mieee
+GOCFLAGS_FOR_TARGET += -mieee
diff --git a/config/tcl.m4 b/config/tcl.m4
index 4542a4b23..c58bf5343 100644
--- a/config/tcl.m4
+++ b/config/tcl.m4
@@ -1368,6 +1368,9 @@ dnl AC_CHECK_TOOL(AR, ar)
if test "`uname -m`" = "alpha" ; then
CFLAGS="$CFLAGS -mieee"
fi
+ if test "`uname -m`" = "sw_64" ; then
+ CFLAGS="$CFLAGS -mieee"
+ fi
if test $do64bit = yes; then
AC_CACHE_CHECK([if compiler accepts -m64 flag], tcl_cv_cc_m64, [
hold_cflags=$CFLAGS
@@ -1418,6 +1421,9 @@ dnl AC_CHECK_TOOL(AR, ar)
if test "`uname -m`" = "alpha" ; then
CFLAGS="$CFLAGS -mieee"
fi
+ if test "`uname -m`" = "sw_64" ; then
+ CFLAGS="$CFLAGS -mieee"
+ fi
;;
Lynx*)
SHLIB_CFLAGS="-fPIC"
diff --git a/configure b/configure
index aff62c464..1b7c11292 100755
--- a/configure
+++ b/configure
@@ -789,6 +789,7 @@ ac_subst_files='serialization_dependencies
host_makefile_frag
target_makefile_frag
alphaieee_frag
+sw_64ieee_frag
ospace_frag'
ac_user_opts='
enable_option_checking
@@ -4016,6 +4017,10 @@ case "${target}" in
use_gnu_ld=no
fi
;;
+ sw_64*-*-*)
+ # newlib is not 64 bit ready
+ noconfigdirs="$noconfigdirs target-newlib target-libgloss"
+ ;;
tic6x-*-*)
noconfigdirs="$noconfigdirs sim"
;;
@@ -9584,6 +9589,15 @@ case $target in
;;
esac
+sw_64ieee_frag=/dev/null
+case $target in
+ sw_64*-*-*)
+ # This just makes sure to use the -mieee option to build target libs.
+ # This should probably be set individually by each library.
+ sw_64ieee_frag="config/mt-sw_64ieee"
+ ;;
+esac
+
# If --enable-target-optspace always use -Os instead of -O2 to build
# the target libraries, similarly if it is not specified, use -Os
# on selected platforms.
@@ -10299,7 +10313,7 @@ case "${target}" in
esac
# Makefile fragments.
-for frag in host_makefile_frag target_makefile_frag alphaieee_frag ospace_frag;
+for frag in host_makefile_frag target_makefile_frag alphaieee_frag sw_64ieee_frag ospace_frag;
do
eval fragval=\$$frag
if test $fragval != /dev/null; then
diff --git a/configure.ac b/configure.ac
index f310d75ca..c4a65a49d 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1271,6 +1271,10 @@ case "${target}" in
use_gnu_ld=no
fi
;;
+ sw_64*-*-*)
+ # newlib is not 64 bit ready
+ noconfigdirs="$noconfigdirs target-newlib target-libgloss"
+ ;;
tic6x-*-*)
noconfigdirs="$noconfigdirs sim"
;;
@@ -1335,6 +1339,9 @@ case "${host}" in
rs6000-*-aix*)
host_makefile_frag="config/mh-ppc-aix"
;;
+ sw_64*-linux*)
+ host_makefile_frag="config/mh-sw_64-linux"
+ ;;
esac
fi
@@ -2765,6 +2772,15 @@ case $target in
;;
esac
+sw_64ieee_frag=/dev/null
+case $target in
+ sw_64*-*-*)
+ # This just makes sure to use the -mieee option to build target libs.
+ # This should probably be set individually by each library.
+ sw_64ieee_frag="config/mt-sw_64ieee"
+ ;;
+esac
+
# If --enable-target-optspace always use -Os instead of -O2 to build
# the target libraries, similarly if it is not specified, use -Os
# on selected platforms.
@@ -3475,7 +3491,7 @@ case "${target}" in
esac
# Makefile fragments.
-for frag in host_makefile_frag target_makefile_frag alphaieee_frag ospace_frag;
+for frag in host_makefile_frag target_makefile_frag alphaieee_frag ospace_frag sw_64ieee_frag;
do
eval fragval=\$$frag
if test $fragval != /dev/null; then
@@ -3486,6 +3502,7 @@ AC_SUBST_FILE(host_makefile_frag)
AC_SUBST_FILE(target_makefile_frag)
AC_SUBST_FILE(alphaieee_frag)
AC_SUBST_FILE(ospace_frag)
+AC_SUBST_FILE(sw_64ieee_frag)
# Miscellanea: directories, flags, etc.
AC_SUBST(RPATH_ENVVAR)
diff --git a/contrib/config-list.mk b/contrib/config-list.mk
index e04210556..730a49e64 100644
--- a/contrib/config-list.mk
+++ b/contrib/config-list.mk
@@ -96,6 +96,7 @@ LIST = aarch64-elf aarch64-linux-gnu aarch64-rtems \
sparc64-sun-solaris2.11OPT-with-gnu-ldOPT-with-gnu-asOPT-enable-threads=posix \
sparc-wrs-vxworks sparc64-elf sparc64-rtems sparc64-linux sparc64-freebsd6 \
sparc64-netbsd sparc64-openbsd \
+ sw_64-linux-gnu sw_64-netbsd sw_64-openbsd \
tilegx-linux-gnuOPT-enable-obsolete tilegxbe-linux-gnuOPT-enable-obsolete \
tilepro-linux-gnuOPT-enable-obsolete \
v850e1-elf v850e-elf v850-elf v850-rtems vax-linux-gnu \
diff --git a/gcc/config.gcc b/gcc/config.gcc
index 23c5bee2b..d55645381 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -547,6 +547,10 @@ sh[123456789lbe]*-*-* | sh-*-*)
extra_options="${extra_options} fused-madd.opt"
extra_objs="${extra_objs} sh_treg_combine.o sh-mem.o sh_optimize_sett_clrt.o"
;;
+sw_64*-*-*)
+ cpu_type=sw_64
+ extra_options="${extra_options} g.opt"
+ ;;
v850*-*-*)
cpu_type=v850
;;
@@ -3471,6 +3475,11 @@ sparc64-*-openbsd*)
with_cpu=ultrasparc
tmake_file="${tmake_file} sparc/t-sparc"
;;
+sw_64*-*-linux*)
+ tm_file="elfos.h ${tm_file} sw_64/gnu-user.h sw_64/elf.h sw_64/linux.h sw_64/linux-elf.h glibc-stdint.h"
+ tmake_file="${tmake_file} sw_64/t-linux sw_64/t-sw_64"
+ extra_options="${extra_options} sw_64/elf.opt"
+ ;;
tic6x-*-elf)
tm_file="elfos.h ${tm_file} c6x/elf-common.h c6x/elf.h"
tm_file="${tm_file} tm-dwarf2.h newlib-stdint.h"
@@ -3999,6 +4008,15 @@ if test x$with_cpu = x ; then
;;
esac
;;
+ sw_64sw6a*-*-*)
+ with_cpu=sw6a
+ ;;
+ sw_64sw6b*-*-*)
+ with_cpu=sw6b
+ ;;
+ sw_64sw8a*-*-*)
+ with_cpu=sw8a
+ ;;
visium-*-*)
with_cpu=gr5
;;
@@ -5571,6 +5589,23 @@ case "${target}" in
esac
;;
+ sw_64*-*-*)
+ supported_defaults="cpu tune"
+ for which in cpu tune; do
+ eval "val=\$with_$which"
+ case "$val" in
+ "" \
+ | sw6 | sw6a | sw6b | sw8a \
+ | sw6c)
+ ;;
+ *)
+ echo "Unknown CPU used in --with-$which=$val" 1>&2
+ exit 1
+ ;;
+ esac
+ done
+ ;;
+
tic6x-*-*)
supported_defaults="arch"
diff --git a/gcc/config.host b/gcc/config.host
index bf7dcb4cc..081ce29c2 100644
--- a/gcc/config.host
+++ b/gcc/config.host
@@ -198,6 +198,14 @@ case ${host} in
;;
esac
;;
+ sw_64*-*-linux*)
+ case ${target} in
+ sw_64*-*-linux*)
+ host_extra_gcc_objs="driver-sw_64.o"
+ host_xmake_file="${host_xmake_file} sw_64/x-sw_64"
+ ;;
+ esac
+ ;;
esac
# Machine-specific settings.
diff --git a/gcc/config.in b/gcc/config.in
index 91328572b..caf0d6492 100644
--- a/gcc/config.in
+++ b/gcc/config.in
@@ -205,6 +205,11 @@
#undef ENABLE_LD_BUILDID
#endif
+/* Define if gcc should always pass --no-relax to linker for sw_64. */
+#ifndef USED_FOR_TARGET
+#undef ENABLE_LD_NORELAX
+#endif
+
/* Define to 1 to enable libquadmath support */
#ifndef USED_FOR_TARGET
@@ -422,6 +427,10 @@
#undef HAVE_AS_EXPLICIT_RELOCS
#endif
+/* Define if your assembler supports explicit relocations. */
+#ifndef USED_FOR_TARGET
+#undef SW_64_ENABLE_ASAN
+#endif
/* Define if your assembler supports FMAF, HPC, and VIS 3.0 instructions. */
#ifndef USED_FOR_TARGET
@@ -2726,3 +2735,11 @@
#undef vfork
#endif
+/* Define only sw64 target */
+#undef FLAG_SW64_ATOMIC
+#undef FLAG_SW64_90139
+#undef FLAG_SW64_PREFETCH
+#undef FLAG_SW64_PROTECT
+#undef FLAG_SW64_INC_DEC
+#undef FLAG_SW64_DELNOP
+#undef FLAG_SW64_FM
diff --git a/gcc/config/host-linux.cc b/gcc/config/host-linux.cc
index 817d3c087..a65468272 100644
--- a/gcc/config/host-linux.cc
+++ b/gcc/config/host-linux.cc
@@ -100,6 +100,8 @@
# define TRY_EMPTY_VM_SPACE 0x1000000000
#elif defined(__loongarch__) && defined(__LP64__)
# define TRY_EMPTY_VM_SPACE 0x8000000000
+#elif defined(__sw_64)
+#define TRY_EMPTY_VM_SPACE 0x10000000000
#else
# define TRY_EMPTY_VM_SPACE 0
#endif
diff --git a/gcc/configure b/gcc/configure
index ef0449edd..4a70b7c66 100755
--- a/gcc/configure
+++ b/gcc/configure
@@ -25775,6 +25775,29 @@ foo: .long 25
xor %l1, %tle_lox10(foo), %o5
ld [%g7 + %o5], %o1"
;;
+ sw_64*-*-*)
+ conftest_s='
+ .section ".tdata","awT",@progbits
+foo: .long 25
+ .text
+ ldl $27,__tls_get_addr($29) !literal!1
+ ldi $16,foo($29) !tlsgd!1
+ call $26,($27),__tls_get_addr !lituse_tlsgd!1
+ ldl $27,__tls_get_addr($29) !literal!2
+ ldi $16,foo($29) !tlsldm!2
+ call $26,($27),__tls_get_addr !lituse_tlsldm!2
+ ldl $1,foo($29) !gotdtprel
+ ldih $2,foo($29) !dtprelhi
+ ldi $3,foo($2) !dtprello
+ ldi $4,foo($29) !dtprel
+ ldl $1,foo($29) !gottprel
+ ldih $2,foo($29) !tprelhi
+ ldi $3,foo($2) !tprello
+ ldi $4,foo($29) !tprel'
+ tls_first_major=2
+ tls_first_minor=13
+ tls_as_opt=--fatal-warnings
+ ;;
tilepro*-*-*)
conftest_s='
.section ".tdata","awT",@progbits
@@ -26313,6 +26336,101 @@ fi
;;
+ sw_64*-*-linux* | sw_64*-*-*bsd*)
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for explicit relocation support" >&5
+$as_echo_n "checking assembler for explicit relocation support... " >&6; }
+if ${gcc_cv_as_sw_64_explicit_relocs+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ gcc_cv_as_sw_64_explicit_relocs=no
+ if test x$gcc_cv_as != x; then
+ $as_echo ' .set nomacro
+ .text
+ ext0b $3, $2, $3 !lituse_bytoff!1
+ ldl $2, a($29) !literal!1
+ ldl $4, b($29) !literal!2
+ ldl_u $3, 0($2) !lituse_base!1
+ ldl $27, f($29) !literal!5
+ call $26, ($27), f !lituse_jsr!5
+ ldih $29, 0($26) !gpdisp!3
+ ldi $0, c($29) !gprel
+ ldih $1, d($29) !gprelhigh
+ ldi $1, d($1) !gprellow
+ ldi $29, 0($29) !gpdisp!3' > conftest.s
+ if { ac_try='$gcc_cv_as $gcc_cv_as_flags -o conftest.o conftest.s >&5'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; }
+ then
+ gcc_cv_as_sw_64_explicit_relocs=yes
+ else
+ echo "configure: failed program was" >&5
+ cat conftest.s >&5
+ fi
+ rm -f conftest.o conftest.s
+ fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_sw_64_explicit_relocs" >&5
+$as_echo "$gcc_cv_as_sw_64_explicit_relocs" >&6; }
+if test $gcc_cv_as_sw_64_explicit_relocs = yes; then
+
+$as_echo "#define HAVE_AS_EXPLICIT_RELOCS 1" >>confdefs.h
+
+fi
+
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for jsrdirect relocation support" >&5
+$as_echo_n "checking assembler for jsrdirect relocation support... " >&6; }
+if ${gcc_cv_as_sw_64_jsrdirect_relocs+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ gcc_cv_as_sw_64_jsrdirect_relocs=no
+ if test $in_tree_gas = yes; then
+ if test $gcc_cv_gas_vers -ge `expr \( \( 2 \* 1000 \) + 16 \) \* 1000 + 90`
+ then gcc_cv_as_sw_64_jsrdirect_relocs=yes
+fi
+#trouble#
+ elif test x$gcc_cv_as != x; then
+ $as_echo ' .set nomacro
+ .text
+ ldl $27, a($29) !literal!1
+ call $26, ($27), a !lituse_jsrdirect!1' > conftest.s
+ if { ac_try='$gcc_cv_as $gcc_cv_as_flags -o conftest.o conftest.s >&5'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; }
+ then
+ gcc_cv_as_sw_64_jsrdirect_relocs=yes
+ else
+ echo "configure: failed program was" >&5
+ cat conftest.s >&5
+ fi
+ rm -f conftest.o conftest.s
+ fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_sw_64_jsrdirect_relocs" >&5
+$as_echo "$gcc_cv_as_sw_64_jsrdirect_relocs" >&6; }
+if test $gcc_cv_as_sw_64_jsrdirect_relocs = yes; then
+
+$as_echo "#define HAVE_AS_JSRDIRECT_RELOCS 1" >>confdefs.h
+
+fi
+cat >> confdefs.h <<_ACEOF
+#define FLAG_SW64_ATOMIC 1
+#define FLAG_SW64_90139 1
+#define FLAG_SW64_PREFETCH 1
+#define FLAG_SW64_PROTECT 1
+#define FLAG_SW64_INC_DEC 1
+#define FLAG_SW64_DELNOP 1
+#define FLAG_SW64_FM 1
+_ACEOF
+
+ ;;
+
avr-*-*)
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for --mlink-relax option" >&5
$as_echo_n "checking assembler for --mlink-relax option... " >&6; }
@@ -29377,7 +29495,7 @@ esac
case "$cpu_type" in
aarch64 | alpha | arc | arm | avr | bfin | cris | csky | i386 | loongarch | m32c \
| m68k | microblaze | mips | nds32 | nios2 | pa | riscv | rs6000 | score | sparc \
- | tilegx | tilepro | visium | xstormy16 | xtensa)
+ | sw_64 | tilegx | tilepro | visium | xstormy16 | xtensa)
insn="nop"
;;
ia64 | s390)
@@ -31151,6 +31269,17 @@ $as_echo "$as_me: WARNING: --build-id is not supported by your linker; --enable-
fi
fi
+# sw_64 add --enable-linker-no-relax to support linker -Wl,-no-relax
+# Check whether --enable-linker-no-relax was given.
+if test "${enable_linker_no_relax+set}" = set; then :
+ enableval=$enable_linker_no_relax;
+else
+ enable_linker_no_relax=no
+fi
+
+if test x"$enable_linker_no_relax" = xyes; then
+ $as_echo "#define ENABLE_LD_NORELAX 1" >>confdefs.h
+fi
# In binutils 2.21, GNU ld gained support for new emulations fully
# supporting the Solaris 2 ABI. Detect their presence in the linker used.
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking linker *_sol2 emulation support" >&5
diff --git a/gcc/configure.ac b/gcc/configure.ac
index 708ec3fd3..9683ab156 100644
--- a/gcc/configure.ac
+++ b/gcc/configure.ac
@@ -23,6 +23,7 @@
# Initialization and sanity checks
# --------------------------------
+AC_PREREQ(2.64)
AC_INIT
AC_CONFIG_SRCDIR(tree.cc)
AC_CONFIG_HEADER(auto-host.h:config.in)
@@ -4076,6 +4077,29 @@ foo: .long 25
xor %l1, %tle_lox10(foo), %o5
ld [%g7 + %o5], %o1"
;;
+ sw_64*-*-*)
+ conftest_s='
+ .section ".tdata","awT",@progbits
+foo: .long 25
+ .text
+ ldl $27,__tls_get_addr($29) !literal!1
+ ldi $16,foo($29) !tlsgd!1
+ call $26,($27),__tls_get_addr !lituse_tlsgd!1
+ ldl $27,__tls_get_addr($29) !literal!2
+ ldi $16,foo($29) !tlsldm!2
+ call $26,($27),__tls_get_addr !lituse_tlsldm!2
+ ldl $1,foo($29) !gotdtprel
+ ldih $2,foo($29) !dtprelhi
+ ldi $3,foo($2) !dtprello
+ ldi $4,foo($29) !dtprel
+ ldl $1,foo($29) !gottprel
+ ldih $2,foo($29) !tprelhi
+ ldi $3,foo($2) !tprello
+ ldi $4,foo($29) !tprel'
+ tls_first_major=2
+ tls_first_minor=13
+ tls_as_opt=--fatal-warnings
+ ;;
tilepro*-*-*)
conftest_s='
.section ".tdata","awT",@progbits
@@ -4677,6 +4701,36 @@ foo:
[Define if your assembler supports LEON instructions.])])
;;
+ sw_64*-*-linux* | sw_64*-*-*bsd*)
+ gcc_GAS_CHECK_FEATURE([explicit relocation support],
+ gcc_cv_as_sw_64_explicit_relocs, [2,12,0],,
+[ .set nomacro
+ .text
+ ext0b $3, $2, $3 !lituse_bytoff!1
+ ldl $2, a($29) !literal!1
+ ldl $4, b($29) !literal!2
+ ldl_u $3, 0($2) !lituse_base!1
+ ldl $27, f($29) !literal!5
+ call $26, ($27), f !lituse_jsr!5
+ ldih $29, 0($26) !gpdisp!3
+ ldi $0, c($29) !gprel
+ ldih $1, d($29) !gprelhigh
+ ldi $1, d($1) !gprellow
+ ldi $29, 0($29) !gpdisp!3],,
+ [AC_DEFINE(HAVE_AS_EXPLICIT_RELOCS, 1,
+ [Define if your assembler supports explicit relocations.])])
+ gcc_GAS_CHECK_FEATURE([jsrdirect relocation support],
+ gcc_cv_as_sw_64_jsrdirect_relocs,,
+[ .set nomacro
+ .text
+ ldl $27, a($29) !literal!1
+ call $26, ($27), a !lituse_jsrdirect!1],,
+ [AC_DEFINE(HAVE_AS_JSRDIRECT_RELOCS, 1,
+ [Define if your assembler supports the lituse_jsrdirect relocation.])])
+# [AC_DEFINE(SW_64_ENABLE_ASAN, 1,
+# [Define if your target fully enable asan supports.])])
+ ;;
+
changequote(,)dnl
i[34567]86-*-* | x86_64-*-*)
changequote([,])dnl
@@ -5505,7 +5559,7 @@ esac
# version to the per-target configury.
case "$cpu_type" in
aarch64 | alpha | arc | arm | avr | bfin | cris | csky | i386 | loongarch | m32c \
- | m68k | microblaze | mips | nds32 | nios2 | pa | riscv | rs6000 | score | sparc \
+ | m68k | microblaze | mips | nds32 | nios2 | pa | riscv | rs6000 | score | sparc | sw_64 \
| tilegx | tilepro | visium | xstormy16 | xtensa)
insn="nop"
;;
@@ -6758,6 +6812,31 @@ if test x"$enable_linker_build_id" = xyes; then
fi
fi
+# --no-relax
+AC_ARG_ENABLE(linker-no-relax,
+[AS_HELP_STRING([--enable-linker-no-relax],
+ [compiler will always pass --no-relax to linker])],
+[],
+enable_linker_no_relax=no)
+
+if test x"$enable_linker_build_id" = xyes; then
+ if test x"$gcc_cv_ld_buildid" = xyes; then
+ AC_DEFINE(ENABLE_LD_BUILDID, 1,
+ [Define if gcc should always pass --build-id to linker.])
+ else
+ AC_MSG_WARN(--build-id is not supported by your linker; --enable-linker-build-id ignored)
+ fi
+fi
+
+# --no-relax
+if test x"$enable_linker_no_relax" = xyes; then
+ AC_DEFINE(ENABLE_LD_NORELAX, 1,
+ [Define if gcc should always pass --no-relax to linker.])
+ else
+ AC_MSG_WARN(--no-relax is not supported by your linker; --enable-linker-no-relax ignored)
+ fi
+fi
+
# In binutils 2.21, GNU ld gained support for new emulations fully
# supporting the Solaris 2 ABI. Detect their presence in the linker used.
AC_CACHE_CHECK(linker *_sol2 emulation support,
@@ -6930,7 +7009,8 @@ case "$target" in
powerpc*-*-linux* | \
sparc*-*-linux* | \
s390*-*-linux* | \
- alpha*-*-linux*)
+ alpha*-*-linux*) | \
+ sw_64*-*-linux*)
AC_ARG_WITH(long-double-128,
[AS_HELP_STRING([--with-long-double-128],
[use 128-bit long double by default])],
diff --git a/gcc/doc/install.texi b/gcc/doc/install.texi
index a650f60c7..40dc7ae75 100644
--- a/gcc/doc/install.texi
+++ b/gcc/doc/install.texi
@@ -5005,6 +5005,15 @@ on a Solaris 11 system:
@heading sparcv9-*-solaris2*
This is a synonym for @samp{sparc64-*-solaris2*}.
+@html
+<hr />
+@end html
+@anchor{sw_64-x-x}
+@heading sw_64*-*-*
+This section contains general configuration information for all
+SW64-based platforms using ELF@. In addition to reading this
+section, please read all other sections that match your target.
+
@html
<hr />
@end html
--
2.25.1

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,528 @@
From bfa51c2eda3e40fdfd64601f5e7df19049a006cd Mon Sep 17 00:00:00 2001
From: swcompiler <lc@wxiat.com>
Date: Mon, 25 Nov 2024 16:33:10 +0800
Subject: [PATCH 03/16] Sw64 Port: add multi-prefetch support for sw64
---
gcc/builtins.cc | 161 +++++++++++++++++++++++++++++++++-
gcc/builtins.def | 2 +
gcc/ipa-pure-const.cc | 2 +
gcc/opt-functions.awk | 4 +-
gcc/params.opt | 12 +++
gcc/target-insns.def | 3 +
gcc/tree-ssa-loop-prefetch.cc | 155 +++++++++++++++++++++++++++++++-
7 files changed, 336 insertions(+), 3 deletions(-)
diff --git a/gcc/builtins.cc b/gcc/builtins.cc
index 57929a42b..c2589f316 100644
--- a/gcc/builtins.cc
+++ b/gcc/builtins.cc
@@ -109,6 +109,8 @@ static int apply_args_size (void);
static int apply_result_size (void);
static rtx result_vector (int, rtx);
static void expand_builtin_prefetch (tree);
+static void expand_builtin_prefetch_sc (tree);
+static void expand_builtin_prefetch_tc (tree);
static rtx expand_builtin_apply_args (void);
static rtx expand_builtin_apply_args_1 (void);
static rtx expand_builtin_apply (rtx, rtx, rtx);
@@ -1352,6 +1354,156 @@ expand_builtin_prefetch (tree exp)
emit_insn (op0);
}
+static void
+expand_builtin_prefetch_sc (tree exp)
+{
+ tree arg0, arg1, arg2;
+ int nargs;
+ rtx op0, op1, op2;
+
+ if (!validate_arglist (exp, POINTER_TYPE, 0))
+ return;
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+
+ /* Arguments 1 and 2 are optional; argument 1 (read/write) defaults to
+ * zero (read) and argument 2 (locality) defaults to 3 (high degree of
+ * locality). */
+ nargs = call_expr_nargs (exp);
+ if (nargs > 1)
+ arg1 = CALL_EXPR_ARG (exp, 1);
+ else
+ arg1 = integer_zero_node;
+ if (nargs > 2)
+ arg2 = CALL_EXPR_ARG (exp, 2);
+ else
+ arg2 = integer_three_node;
+
+ /* Argument 0 is an address. */
+ op0 = expand_expr (arg0, NULL_RTX, Pmode, EXPAND_NORMAL);
+
+ /* Argument 1 (read/write flag) must be a compile-time constant int. */
+ if (TREE_CODE (arg1) != INTEGER_CST)
+ {
+ error ("second argument to %<__builtin_prefetch_sc%> must be a constant");
+ arg1 = integer_zero_node;
+ }
+ op1 = expand_normal (arg1);
+ /* Argument 1 must be either zero or one. */
+ if (INTVAL (op1) != 0 && INTVAL (op1) != 1)
+ {
+ warning (0, "invalid second argument to %<__builtin_prefetch_sc%>;"
+ " using zero");
+ op1 = const0_rtx;
+ }
+
+ /* Argument 2 (locality) must be a compile-time constant int. */
+ if (TREE_CODE (arg2) != INTEGER_CST)
+ {
+ error ("third argument to %<__builtin_prefetch_sc%> must be a constant");
+ arg2 = integer_zero_node;
+ }
+ op2 = expand_normal (arg2);
+ /* Argument 2 must be 0, 1, 2, or 3. */
+ if (INTVAL (op2) < 0 || INTVAL (op2) > 3)
+ {
+ warning (
+ 0, "invalid third argument to %<__builtin_prefetch_sc%>; using zero");
+ op2 = const0_rtx;
+ }
+
+ if (targetm.have_prefetch ())
+ {
+ class expand_operand ops[3];
+
+ create_address_operand (&ops[0], op0);
+ create_integer_operand (&ops[1], INTVAL (op1));
+ create_integer_operand (&ops[2], INTVAL (op2));
+ if (maybe_expand_insn (targetm.code_for_prefetch_sc, 3, ops))
+ return;
+ }
+
+ /* Don't do anything with direct references to volatile memory, but
+ * generate code to handle other side effects. */
+ if (!MEM_P (op0) && side_effects_p (op0))
+ emit_insn (op0);
+}
+
+static void
+expand_builtin_prefetch_tc (tree exp)
+{
+ tree arg0, arg1, arg2;
+ int nargs;
+ rtx op0, op1, op2;
+
+ if (!validate_arglist (exp, POINTER_TYPE, 0))
+ return;
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+
+ /* Arguments 1 and 2 are optional; argument 1 (read/write) defaults to
+ * zero (read) and argument 2 (locality) defaults to 3 (high degree of
+ * locality). */
+ nargs = call_expr_nargs (exp);
+ if (nargs > 1)
+ arg1 = CALL_EXPR_ARG (exp, 1);
+ else
+ arg1 = integer_zero_node;
+ if (nargs > 2)
+ arg2 = CALL_EXPR_ARG (exp, 2);
+ else
+ arg2 = integer_three_node;
+
+ /* Argument 0 is an address. */
+ op0 = expand_expr (arg0, NULL_RTX, Pmode, EXPAND_NORMAL);
+
+ /* Argument 1 (read/write flag) must be a compile-time constant int. */
+ if (TREE_CODE (arg1) != INTEGER_CST)
+ {
+ error ("second argument to %<__builtin_prefetch%> must be a constant");
+ arg1 = integer_zero_node;
+ }
+ op1 = expand_normal (arg1);
+ /* Argument 1 must be either zero or one. */
+ if (INTVAL (op1) != 0 && INTVAL (op1) != 1)
+ {
+ warning (0, "invalid second argument to %<__builtin_prefetch%>;"
+ " using zero");
+ op1 = const0_rtx;
+ }
+
+ /* Argument 2 (locality) must be a compile-time constant int. */
+ if (TREE_CODE (arg2) != INTEGER_CST)
+ {
+ error ("third argument to %<__builtin_prefetch%> must be a constant");
+ arg2 = integer_zero_node;
+ }
+ op2 = expand_normal (arg2);
+ /* Argument 2 must be 0, 1, 2, or 3. */
+ if (INTVAL (op2) < 0 || INTVAL (op2) > 3)
+ {
+ warning (0,
+ "invalid third argument to %<__builtin_prefetch%>; using zero");
+ op2 = const0_rtx;
+ }
+
+ if (targetm.have_prefetch ())
+ {
+ class expand_operand ops[3];
+
+ create_address_operand (&ops[0], op0);
+ create_integer_operand (&ops[1], INTVAL (op1));
+ create_integer_operand (&ops[2], INTVAL (op2));
+ if (maybe_expand_insn (targetm.code_for_prefetch_tc, 3, ops))
+ return;
+ }
+
+ /* Don't do anything with direct references to volatile memory, but
+ * generate code to handle other side effects. */
+ if (!MEM_P (op0) && side_effects_p (op0))
+ emit_insn (op0);
+}
+
/* Get a MEM rtx for expression EXP which is the address of an operand
to be used in a string instruction (cmpstrsi, cpymemsi, ..). LEN is
the maximum length of the block of memory that might be accessed or
@@ -7598,7 +7750,12 @@ expand_builtin (tree exp, rtx target, rtx subtarget, machine_mode mode,
case BUILT_IN_PREFETCH:
expand_builtin_prefetch (exp);
return const0_rtx;
-
+ case BUILT_IN_PREFETCH_SC:
+ expand_builtin_prefetch_sc (exp);
+ return const0_rtx;
+ case BUILT_IN_PREFETCH_TC:
+ expand_builtin_prefetch_tc (exp);
+ return const0_rtx;
case BUILT_IN_INIT_TRAMPOLINE:
return expand_builtin_init_trampoline (exp, true);
case BUILT_IN_INIT_HEAP_TRAMPOLINE:
@@ -10989,6 +11146,8 @@ is_inexpensive_builtin (tree decl)
case BUILT_IN_LABS:
case BUILT_IN_LLABS:
case BUILT_IN_PREFETCH:
+ case BUILT_IN_PREFETCH_SC:
+ case BUILT_IN_PREFETCH_TC:
case BUILT_IN_ACC_ON_DEVICE:
return true;
diff --git a/gcc/builtins.def b/gcc/builtins.def
index 005976f34..983de293e 100644
--- a/gcc/builtins.def
+++ b/gcc/builtins.def
@@ -924,6 +924,8 @@ DEF_GCC_BUILTIN (BUILT_IN_POPCOUNTL, "popcountl", BT_FN_INT_ULONG, ATTR_C
DEF_GCC_BUILTIN (BUILT_IN_POPCOUNTLL, "popcountll", BT_FN_INT_ULONGLONG, ATTR_CONST_NOTHROW_LEAF_LIST)
DEF_EXT_LIB_BUILTIN (BUILT_IN_POSIX_MEMALIGN, "posix_memalign", BT_FN_INT_PTRPTR_SIZE_SIZE, ATTR_NOTHROW_NONNULL_LEAF)
DEF_GCC_BUILTIN (BUILT_IN_PREFETCH, "prefetch", BT_FN_VOID_CONST_PTR_VAR, ATTR_NOVOPS_LEAF_LIST)
+DEF_GCC_BUILTIN (BUILT_IN_PREFETCH_SC, "prefetch_sc", BT_FN_VOID_CONST_PTR_VAR, ATTR_NOVOPS_LEAF_LIST)
+DEF_GCC_BUILTIN (BUILT_IN_PREFETCH_TC, "prefetch_tc", BT_FN_VOID_CONST_PTR_VAR, ATTR_NOVOPS_LEAF_LIST)
DEF_LIB_BUILTIN (BUILT_IN_REALLOC, "realloc", BT_FN_PTR_PTR_SIZE, ATTR_ALLOC_WARN_UNUSED_RESULT_SIZE_2_NOTHROW_LEAF_LIST)
DEF_GCC_BUILTIN (BUILT_IN_RETURN, "return", BT_FN_VOID_PTR, ATTR_NORETURN_NOTHROW_LEAF_LIST)
DEF_GCC_BUILTIN (BUILT_IN_RETURN_ADDRESS, "return_address", BT_FN_PTR_UINT, ATTR_LEAF_LIST)
diff --git a/gcc/ipa-pure-const.cc b/gcc/ipa-pure-const.cc
index 2642df91e..89a950966 100644
--- a/gcc/ipa-pure-const.cc
+++ b/gcc/ipa-pure-const.cc
@@ -534,6 +534,8 @@ builtin_safe_for_const_function_p (bool *looping, tree callee)
*looping = false;
return true;
case BUILT_IN_PREFETCH:
+ case BUILT_IN_PREFETCH_SC:
+ case BUILT_IN_PREFETCH_TC:
*looping = true;
return true;
default:
diff --git a/gcc/opt-functions.awk b/gcc/opt-functions.awk
index 2aee0b9f1..0dabde89d 100644
--- a/gcc/opt-functions.awk
+++ b/gcc/opt-functions.awk
@@ -247,6 +247,8 @@ function var_type(flags)
return "HOST_WIDE_INT "
else if (flag_set_p("UInteger", flags))
return "int "
+ else if (flag_set_p("UInteger", flags))
+ return "int "
else
return "const char *"
}
@@ -256,7 +258,7 @@ function var_type(flags)
# type instead of int to save space.
function var_type_struct(flags)
{
- if (flag_set_p("UInteger", flags)) {
+ if (flag_set_p("UInteger", flags)) {
if (host_wide_int[var_name(flags)] == "yes")
return "HOST_WIDE_INT ";
if (flag_set_p("ByteSize", flags))
diff --git a/gcc/params.opt b/gcc/params.opt
index 3ddfaf5b2..5abc8ce82 100644
--- a/gcc/params.opt
+++ b/gcc/params.opt
@@ -385,6 +385,18 @@ The size of L1 cache.
Common Joined UInteger Var(param_l2_cache_size) Init(512) Param Optimization
The size of L2 cache.
+-param=pf1=
+Common Joined UInteger Var(PF1) Init(0) IntegerRange(0,200) Param Optimization
+The number of Cache lines add to L1 prefetch delta.
+
+-param=pf2=
+Common Joined UInteger Var(PF2) Init(0) IntegerRange(0,200) Param Optimization
+The number of Cache lines add to L2 prefetch delta.
+
+-param=pf3=
+Common Joined UInteger Var(PF3) Init(0) IntegerRange(0,200) Param Optimization
+The number of Cache lines add to L3 prefetch delta.
+
-param=large-function-growth=
Common Joined UInteger Var(param_large_function_growth) Optimization Init(100) Param
Maximal growth due to inlining of large function (in percent).
diff --git a/gcc/target-insns.def b/gcc/target-insns.def
index de8c0092f..8b4da8bc4 100644
--- a/gcc/target-insns.def
+++ b/gcc/target-insns.def
@@ -77,6 +77,9 @@ DEF_TARGET_INSN (omp_simt_vote_any, (rtx x0, rtx x1))
DEF_TARGET_INSN (omp_simt_xchg_bfly, (rtx x0, rtx x1, rtx x2))
DEF_TARGET_INSN (omp_simt_xchg_idx, (rtx x0, rtx x1, rtx x2))
DEF_TARGET_INSN (prefetch, (rtx x0, rtx x1, rtx x2))
+DEF_TARGET_INSN (prefetch_sc, (rtx x0, rtx x1, rtx x2))
+DEF_TARGET_INSN (prefetch_tc, (rtx x0, rtx x1, rtx x2))
+/*********************/
DEF_TARGET_INSN (probe_stack, (rtx x0))
DEF_TARGET_INSN (probe_stack_address, (rtx x0))
DEF_TARGET_INSN (prologue, (void))
diff --git a/gcc/tree-ssa-loop-prefetch.cc b/gcc/tree-ssa-loop-prefetch.cc
index aebd7c920..6aa242260 100644
--- a/gcc/tree-ssa-loop-prefetch.cc
+++ b/gcc/tree-ssa-loop-prefetch.cc
@@ -193,6 +193,9 @@ along with GCC; see the file COPYING3. If not see
#define L1_CACHE_SIZE_BYTES ((unsigned) (param_l1_cache_size * 1024))
#define L2_CACHE_SIZE_BYTES ((unsigned) (param_l2_cache_size * 1024))
+#ifdef FLAG_SW64_PREFETCH
+#define L1_CACHE_LINE_SIZE ((unsigned) (param_l1_cache_line_size))
+#endif
/* We consider a memory access nontemporal if it is not reused sooner than
after L2_CACHE_SIZE_BYTES of memory are accessed. However, we ignore
accesses closer than L1_CACHE_SIZE_BYTES / NONTEMPORAL_FRACTION,
@@ -1057,7 +1060,11 @@ schedule_prefetches (struct mem_ref_group *groups, unsigned unroll_factor,
/* At most param_simultaneous_prefetches should be running
at the same time. */
+#ifdef FLAG_SW64_PREFETCH
+ remaining_prefetch_slots = param_simultaneous_prefetches * 5;
+#else
remaining_prefetch_slots = param_simultaneous_prefetches;
+#endif
/* The prefetch will run for AHEAD iterations of the original loop, i.e.,
AHEAD / UNROLL_FACTOR iterations of the unrolled loop. In each iteration,
@@ -1081,8 +1088,10 @@ schedule_prefetches (struct mem_ref_group *groups, unsigned unroll_factor,
/* The loop is far from being sufficiently unrolled for this
prefetch. Do not generate prefetch to avoid many redudant
prefetches. */
- if (ref->prefetch_mod / unroll_factor > PREFETCH_MOD_TO_UNROLL_FACTOR_RATIO)
+#ifndef FLAG_SW64_PREFETCH
+ if (ref->prefetch_mod / unroll_factor > PREFETCH_MOD_TO_UNROLL_FACTOR_RATIO)
continue;
+#endif
/* If we need to prefetch the reference each PREFETCH_MOD iterations,
and we unroll the loop UNROLL_FACTOR times, we need to insert
@@ -1153,6 +1162,19 @@ estimate_prefetch_count (struct mem_ref_group *groups, unsigned unroll_factor)
return prefetch_count;
}
+#ifdef FLAG_SW64_PREFETCH
+/*Due to the need for SW to dynamically adjust the value of PF during
+ * prefetching,PF needs to handle negative values.However ,since Common Joined
+ * UInteger Var(PFX) is used, the function needs to convert unsigned (0-200) to
+ * (-100,100)*/
+int
+convert_default_to_sw (unsigned int pf_value)
+{
+ if (pf_value > 100)
+ return 100 - (int) pf_value;
+ return pf_value;
+}
+#endif
/* Issue prefetches for the reference REF into loop as decided before.
HEAD is the number of iterations to prefetch ahead. UNROLL_FACTOR
is the factor by which LOOP was unrolled. */
@@ -1184,11 +1206,21 @@ issue_prefetch_ref (struct mem_ref *ref, unsigned unroll_factor, unsigned ahead)
for (ap = 0; ap < n_prefetches; ap++)
{
+#ifdef FLAG_SW64_PREFETCH
+ if (flag_sw_prefetch_dc == 1)
+ {
+#endif
if (cst_and_fits_in_hwi (ref->group->step))
{
/* Determine the address to prefetch. */
+#ifdef FLAG_SW64_PREFETCH
+ delta = (ahead + ap * ref->prefetch_mod) *
+ int_cst_value (ref->group->step) * 2
+ + convert_default_to_sw (PF1) * L1_CACHE_LINE_SIZE;
+#else
delta = (ahead + ap * ref->prefetch_mod) *
int_cst_value (ref->group->step);
+#endif
addr = fold_build_pointer_plus_hwi (addr_base, delta);
addr = force_gimple_operand_gsi (&bsi, unshare_expr (addr), true,
NULL, true, GSI_SAME_STMT);
@@ -1220,6 +1252,86 @@ issue_prefetch_ref (struct mem_ref *ref, unsigned unroll_factor, unsigned ahead)
prefetch = gimple_build_call (builtin_decl_explicit (BUILT_IN_PREFETCH),
3, addr, write_p, local);
gsi_insert_before (&bsi, prefetch, GSI_SAME_STMT);
+#ifdef FLAG_SW64_PREFETCH
+ }
+ /* Generate L2 prefetch if the option is open. */
+ if (flag_sw_prefetch_sc == 1)
+ {
+ if (cst_and_fits_in_hwi (ref->group->step))
+ {
+ delta = (ahead + ap * ref->prefetch_mod) *
+ int_cst_value (ref->group->step) * 2
+ + (4 + convert_default_to_sw (PF2)) * L1_CACHE_LINE_SIZE;
+
+ addr = fold_build_pointer_plus_hwi (addr_base, delta);
+ addr = force_gimple_operand_gsi (&bsi, unshare_expr (addr), true,
+ NULL, true, GSI_SAME_STMT);
+ }
+ else
+ {
+ ahead += (unsigned) (convert_default_to_sw (PF2)
+ - convert_default_to_sw (PF1));
+ forward = fold_build2 (MULT_EXPR, sizetype,
+ fold_convert (sizetype, ref->group->step),
+ fold_convert (sizetype, size_int (ahead)));
+ addr = fold_build_pointer_plus (addr_base, forward);
+ addr = force_gimple_operand_gsi (&bsi, unshare_expr (addr), true,
+ NULL, true, GSI_SAME_STMT);
+ }
+
+ if (addr_base != addr && TREE_CODE (addr_base) == SSA_NAME
+ && TREE_CODE (addr) == SSA_NAME)
+ {
+ duplicate_ssa_name_ptr_info (addr, SSA_NAME_PTR_INFO (addr_base));
+ if (SSA_NAME_PTR_INFO (addr))
+ mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (addr));
+ }
+
+ /* Create the L2 prefetch instruction. */
+ prefetch
+ = gimple_build_call (builtin_decl_explicit (BUILT_IN_PREFETCH_SC),
+ 3, addr, write_p, local);
+ gsi_insert_before (&bsi, prefetch, GSI_SAME_STMT);
+ }
+ /* Generate L3 prefetch if the option is open. */
+ if (flag_sw_prefetch_tc == 1)
+ {
+ if (cst_and_fits_in_hwi (ref->group->step))
+ {
+ delta = (ahead + ap * ref->prefetch_mod) *
+ int_cst_value (ref->group->step) * 2
+ + (10 + convert_default_to_sw (PF3)) * L1_CACHE_LINE_SIZE;
+
+ addr = fold_build_pointer_plus_hwi (addr_base, delta);
+ addr = force_gimple_operand_gsi (&bsi, unshare_expr (addr), true,
+ NULL, true, GSI_SAME_STMT);
+ }
+ else
+ {
+ ahead += (unsigned) (convert_default_to_sw (PF3)
+ - convert_default_to_sw (PF1));
+ forward = fold_build2 (MULT_EXPR, sizetype,
+ fold_convert (sizetype, ref->group->step),
+ fold_convert (sizetype, size_int (ahead)));
+ addr = fold_build_pointer_plus (addr_base, forward);
+ addr = force_gimple_operand_gsi (&bsi, unshare_expr (addr), true,
+ NULL, true, GSI_SAME_STMT);
+ }
+
+ if (addr_base != addr && TREE_CODE (addr_base) == SSA_NAME
+ && TREE_CODE (addr) == SSA_NAME)
+ {
+ duplicate_ssa_name_ptr_info (addr, SSA_NAME_PTR_INFO (addr_base));
+ if (SSA_NAME_PTR_INFO (addr))
+ mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (addr));
+ }
+ /* Create the L3 prefetch instruction. */
+ prefetch
+ = gimple_build_call (builtin_decl_explicit (BUILT_IN_PREFETCH_TC),
+ 3, addr, write_p, local);
+ gsi_insert_before (&bsi, prefetch, GSI_SAME_STMT);
+ }
+#endif
}
}
@@ -1375,9 +1487,22 @@ should_unroll_loop_p (class loop *loop, class tree_niter_desc *desc,
as well; but the unrolling/prefetching is usually more profitable for
loops consisting of a single basic block, and we want to limit the
code growth. */
+#ifdef FLAG_SW64_PREFETCH
+ if (flag_sw_prefetch_unroll == 1)
+ {
+ if (loop->num_nodes > 7)
+ return false;
+ }
+ else
+ {
+ if (loop->num_nodes > 2)
+ return false;
+ }
+#else
if (loop->num_nodes > 2)
return false;
+#endif
return true;
}
@@ -1422,6 +1547,12 @@ determine_unroll_factor (class loop *loop, struct mem_ref_group *refs,
if (should_issue_prefetch_p (ref))
{
mod_constraint = ref->prefetch_mod;
+#ifdef FLAG_SW64_PREFETCH
+ /* TODO: mod_constraint is set to 4 by experience, but we should do it
+ * with precision. */
+ if (mod_constraint > upper_bound)
+ mod_constraint = 4;
+#endif
nfactor = least_common_multiple (mod_constraint, factor);
if (nfactor <= upper_bound)
factor = nfactor;
@@ -2022,6 +2153,28 @@ tree_ssa_prefetch_arrays (void)
DECL_IS_NOVOPS (decl) = true;
set_builtin_decl (BUILT_IN_PREFETCH, decl, false);
}
+#ifdef FLAG_SW64_PREFETCH
+ if (!builtin_decl_explicit_p (BUILT_IN_PREFETCH_SC))
+ {
+ tree type = build_function_type_list (void_type_node, const_ptr_type_node,
+ NULL_TREE);
+ tree decl = add_builtin_function ("__builtin_prefetch_sc", type,
+ BUILT_IN_PREFETCH_SC, BUILT_IN_NORMAL,
+ NULL, NULL_TREE);
+ DECL_IS_NOVOPS (decl) = true;
+ set_builtin_decl (BUILT_IN_PREFETCH_SC, decl, false);
+ }
+ if (!builtin_decl_explicit_p (BUILT_IN_PREFETCH_TC))
+ {
+ tree type = build_function_type_list (void_type_node, const_ptr_type_node,
+ NULL_TREE);
+ tree decl = add_builtin_function ("__builtin_prefetch_tc", type,
+ BUILT_IN_PREFETCH_TC, BUILT_IN_NORMAL,
+ NULL, NULL_TREE);
+ DECL_IS_NOVOPS (decl) = true;
+ set_builtin_decl (BUILT_IN_PREFETCH_TC, decl, false);
+ }
+#endif
for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
{
--
2.25.1

View File

@ -0,0 +1,688 @@
From f0e14563ae35b0e0c52bed8f091a750028a42e67 Mon Sep 17 00:00:00 2001
From: swcompiler <lc@wxiat.com>
Date: Mon, 25 Nov 2024 16:44:09 +0800
Subject: [PATCH 04/16] Sw64 Port: update gcc/testsuite for sw64
---
contrib/compare-all-tests | 3 +-
.../c-c++-common/torture/asm-inline.c | 2 +-
gcc/testsuite/g++.dg/cpp0x/constexpr-rom.C | 2 +-
.../g++.dg/no-stack-protector-attr-3.C | 4 +--
gcc/testsuite/g++.dg/opt/devirt2.C | 3 +-
gcc/testsuite/g++.dg/pr49718.C | 2 +-
.../gcc.c-torture/execute/20101011-1.c | 3 ++
gcc/testsuite/gcc.dg/20020312-2.c | 2 ++
.../gcc.dg/atomic/c11-atomic-exec-5.c | 2 +-
gcc/testsuite/gcc.dg/attr-alloc_size-11.c | 4 +--
gcc/testsuite/gcc.dg/cpp/assert4.c | 4 +--
gcc/testsuite/gcc.dg/pr44194-1.c | 2 +-
gcc/testsuite/gcc.dg/stack-usage-1.c | 2 ++
gcc/testsuite/gcc.dg/torture/restrict-8.c | 2 +-
gcc/testsuite/gcc.dg/tree-ssa/20040204-1.c | 2 +-
gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-2.c | 2 +-
gcc/testsuite/gcc.dg/tree-ssa/stdarg-2.c | 30 +++++++++----------
gcc/testsuite/gcc.dg/tree-ssa/stdarg-3.c | 22 +++++++-------
gcc/testsuite/gcc.dg/tree-ssa/stdarg-4.c | 8 ++---
gcc/testsuite/gcc.dg/tree-ssa/stdarg-5.c | 14 ++++-----
gcc/testsuite/go.test/go-test.exp | 3 ++
gcc/testsuite/lib/target-supports.exp | 11 ++++++-
22 files changed, 75 insertions(+), 54 deletions(-)
diff --git a/contrib/compare-all-tests b/contrib/compare-all-tests
index 502cc64f5..02519a1f3 100644
--- a/contrib/compare-all-tests
+++ b/contrib/compare-all-tests
@@ -33,8 +33,9 @@ ppc_opts='-m32 -m64'
s390_opts='-m31 -m31/-mzarch -m64'
sh_opts='-m3 -m3e -m4 -m4a -m4al -m4/-mieee -m1 -m1/-mno-cbranchdi -m2a -m2a/-mieee -m2e -m2e/-mieee'
sparc_opts='-mcpu=v8/-m32 -mcpu=v9/-m32 -m64'
+sw_64_opts='-mlong-double-64/-mieee -mlong-double-64 -mlong-double-128/-mieee -mlong-double-128'
-all_targets='alpha arm avr bfin cris fr30 frv h8300 ia64 iq2000 m32c m32r m68k mcore mips mmix mn10300 pa pdp11 ppc sh sparc v850 vax xstormy16 xtensa' # e500
+all_targets='alpha arm avr bfin cris fr30 frv h8300 ia64 iq2000 m32c m32r m68k mcore mips mmix mn10300 pa pdp11 ppc sh sparc sw_64 v850 vax xstormy16 xtensa' # e500
test_one_file ()
{
diff --git a/gcc/testsuite/c-c++-common/torture/asm-inline.c b/gcc/testsuite/c-c++-common/torture/asm-inline.c
index dea89658b..f860b3a7b 100644
--- a/gcc/testsuite/c-c++-common/torture/asm-inline.c
+++ b/gcc/testsuite/c-c++-common/torture/asm-inline.c
@@ -1,6 +1,6 @@
/* { dg-do compile } */
/* -O0 does no inlining, and -O3 does it too aggressively for this test: */
-/* { dg-skip-if "" { *-*-* } { "-O0" "-O3" } { "" } }
+/* { dg-skip-if "" { *-*-* } { "-O0" "-O3" "-Og -g" } { "" } }
/* The normal asm is not inlined: */
/* { dg-final { scan-assembler-times "w.w.w.w.w.w.w.w.w.w.w.w.w.w.w.w.w.w.w.w.w.w.w.w.w.w.w.w.w.w" 2 } } */
/* But the asm inline is inlined: */
diff --git a/gcc/testsuite/g++.dg/cpp0x/constexpr-rom.C b/gcc/testsuite/g++.dg/cpp0x/constexpr-rom.C
index 424979a60..37c539a54 100644
--- a/gcc/testsuite/g++.dg/cpp0x/constexpr-rom.C
+++ b/gcc/testsuite/g++.dg/cpp0x/constexpr-rom.C
@@ -1,6 +1,6 @@
// PR c++/49673: check that test_data goes into .rodata
// { dg-do compile { target c++11 } }
-// { dg-additional-options -G0 { target { { alpha*-*-* frv*-*-* ia64-*-* lm32*-*-* m32r*-*-* microblaze*-*-* mips*-*-* loongarch*-*-* nios2-*-* powerpc*-*-* rs6000*-*-* } && { ! { *-*-darwin* *-*-aix* alpha*-*-*vms* } } } } }
+// { dg-additional-options -G0 { target { { alpha*-*-* sw_64*-*-* frv*-*-* ia64-*-* lm32*-*-* m32r*-*-* microblaze*-*-* mips*-*-* loongarch*-*-* nios2-*-* powerpc*-*-* rs6000*-*-* } && { ! { *-*-darwin* *-*-aix* alpha*-*-*vms* } } } } }
// { dg-final { scan-assembler "\\.rdata" { target mips*-*-* } } }
// { dg-final { scan-assembler "rodata" { target { { *-*-linux-gnu *-*-gnu* *-*-elf } && { ! { mips*-*-* riscv*-*-* } } } } } }
diff --git a/gcc/testsuite/g++.dg/no-stack-protector-attr-3.C b/gcc/testsuite/g++.dg/no-stack-protector-attr-3.C
index 76a5ec086..982dd9856 100644
--- a/gcc/testsuite/g++.dg/no-stack-protector-attr-3.C
+++ b/gcc/testsuite/g++.dg/no-stack-protector-attr-3.C
@@ -20,5 +20,5 @@ int __attribute__((stack_protect)) bar()
return 0;
}
-/* { dg-final { scan-assembler-times "stack_chk_fail" 1 { target { ! mips*-*-* } } } }*/
-/* { dg-final { scan-assembler-times "stack_chk_fail" 2 { target { mips*-*-* } } } }*/
+/* { dg-final { scan-assembler-times "stack_chk_fail" 1 { target { ! { mips*-*-* sw_64*-*-* } } } } }*/
+/* { dg-final { scan-assembler-times "stack_chk_fail" 2 { target { mips*-*-* sw_64*-*-* } } } }*/
diff --git a/gcc/testsuite/g++.dg/opt/devirt2.C b/gcc/testsuite/g++.dg/opt/devirt2.C
index cf4842bd4..c6b5a19fa 100644
--- a/gcc/testsuite/g++.dg/opt/devirt2.C
+++ b/gcc/testsuite/g++.dg/opt/devirt2.C
@@ -5,7 +5,7 @@
// { dg-additional-options "-mshort-calls" {target epiphany-*-*} }
// Using -mno-abicalls avoids a R_MIPS_JALR .reloc.
// { dg-additional-options "-mno-abicalls" { target mips*-*-* } }
-// { dg-final { scan-assembler-times "xyzzy" 2 { target { ! { alpha*-*-* hppa*-*-* ia64*-*-hpux* sparc*-*-* *-*-mingw* } } } } }
+// { dg-final { scan-assembler-times "xyzzy" 2 { target { ! { alpha*-*-* sw_64*-*-* hppa*-*-* ia64*-*-hpux* sparc*-*-* *-*-mingw* } } } } }
// For *-*-mingw* there is additionally one .def match
// { dg-final { scan-assembler-times "xyzzy" 3 { target *-*-mingw* } } }
// The IA64 and HPPA compilers generate external declarations in addition
@@ -15,6 +15,7 @@
// If assembler supports explicit relocation directives, the alpha compiler generates
// literal/lituse_jsr pairs, so the scans need to be more specific.
// { dg-final { scan-assembler-times "jsr\[^\n\]*xyzzy" 2 { target alpha*-*-* } } }
+// { dg-final { scan-assembler-times "call\[^\n\]*xyzzy" 2 { target sw_64*-*-* } } }
// Unless the assembler supports -relax, the 32-bit SPARC compiler generates
// sethi/jmp instead of just call, so the scans need to be more specific.
// With subexpressions, Tcl regexp -inline -all returns both the complete
diff --git a/gcc/testsuite/g++.dg/pr49718.C b/gcc/testsuite/g++.dg/pr49718.C
index b1cc5deb7..13c661642 100644
--- a/gcc/testsuite/g++.dg/pr49718.C
+++ b/gcc/testsuite/g++.dg/pr49718.C
@@ -1,6 +1,6 @@
/* { dg-do compile } */
/* { dg-options "-O2 -finstrument-functions" } */
-/* { dg-additional-options "-mno-explicit-relocs" { target alpha*-*-* } } */
+/* { dg-additional-options "-mno-explicit-relocs" { target alpha*-*-* sw_64*-*-* } } */
/* { dg-additional-options "-mno-relax-pic-calls" { target mips*-*-* } } */
/* { dg-final { scan-assembler-times "__cyg_profile_func_enter" 1 { target { ! { hppa*-*-hpux* } } } } } */
/* { dg-final { scan-assembler-times "__cyg_profile_func_enter,%r" 1 { target hppa*-*-hpux* } } } */
diff --git a/gcc/testsuite/gcc.c-torture/execute/20101011-1.c b/gcc/testsuite/gcc.c-torture/execute/20101011-1.c
index d2c0f9ab7..878be5eab 100644
--- a/gcc/testsuite/gcc.c-torture/execute/20101011-1.c
+++ b/gcc/testsuite/gcc.c-torture/execute/20101011-1.c
@@ -29,6 +29,9 @@
#elif defined (__aarch64__)
/* On AArch64 integer division by zero does not trap. */
# define DO_TEST 0
+#elif defined (__sw_64__)
+ /* On sw_64 integer division by zero does not trap. */
+# define DO_TEST 0
#elif defined (__TMS320C6X__)
/* On TI C6X division by zero does not trap. */
# define DO_TEST 0
diff --git a/gcc/testsuite/gcc.dg/20020312-2.c b/gcc/testsuite/gcc.dg/20020312-2.c
index 92bc150df..292964e02 100644
--- a/gcc/testsuite/gcc.dg/20020312-2.c
+++ b/gcc/testsuite/gcc.dg/20020312-2.c
@@ -15,6 +15,8 @@ extern void abort (void);
#if defined(__alpha__)
/* PIC register is $29, but is used even without -fpic. */
+#elif defined(__sw_64__)
+/* PIC register is $29, but is used even without -fpic. */
#elif defined(__arc__)
# define PIC_REG "26"
#elif defined(__arm__)
diff --git a/gcc/testsuite/gcc.dg/atomic/c11-atomic-exec-5.c b/gcc/testsuite/gcc.dg/atomic/c11-atomic-exec-5.c
index 692c64ad2..2f5457645 100644
--- a/gcc/testsuite/gcc.dg/atomic/c11-atomic-exec-5.c
+++ b/gcc/testsuite/gcc.dg/atomic/c11-atomic-exec-5.c
@@ -24,7 +24,7 @@
| FE_OVERFLOW \
| FE_UNDERFLOW)
-#if defined __alpha__ || defined __aarch64__
+#if defined __alpha__ || defined __aarch64__ || defined __sw_64__
#define ITER_COUNT 100
#else
#define ITER_COUNT 10000
diff --git a/gcc/testsuite/gcc.dg/attr-alloc_size-11.c b/gcc/testsuite/gcc.dg/attr-alloc_size-11.c
index 8332b3993..3d5a2e28a 100644
--- a/gcc/testsuite/gcc.dg/attr-alloc_size-11.c
+++ b/gcc/testsuite/gcc.dg/attr-alloc_size-11.c
@@ -47,8 +47,8 @@ typedef __SIZE_TYPE__ size_t;
/* The following tests fail because of missing range information. The xfail
exclusions are PR79356. */
-TEST (signed char, SCHAR_MIN + 2, ALLOC_MAX); /* { dg-warning "argument 1 range \\\[13, \[0-9\]+\\\] exceeds maximum object size 12" "missing range info for signed char" { xfail { ! { aarch64*-*-* arm*-*-* avr-*-* alpha*-*-* ia64-*-* mips*-*-* or1k*-*-* pdp11*-*-* powerpc*-*-* sparc*-*-* s390*-*-* visium-*-* msp430-*-* nvptx*-*-*} } } } */
-TEST (short, SHRT_MIN + 2, ALLOC_MAX); /* { dg-warning "argument 1 range \\\[13, \[0-9\]+\\\] exceeds maximum object size 12" "missing range info for short" { xfail { ! { aarch64*-*-* arm*-*-* alpha*-*-* avr-*-* ia64-*-* mips*-*-* or1k*-*-* pdp11*-*-* powerpc*-*-* sparc*-*-* s390x-*-* visium-*-* msp430-*-* nvptx*-*-* } } } } */
+TEST (signed char, SCHAR_MIN + 2, ALLOC_MAX); /* { dg-warning "argument 1 range \\\[13, \[0-9\]+\\\] exceeds maximum object size 12" "missing range info for signed char" { xfail { ! { aarch64*-*-* arm*-*-* avr-*-* alpha*-*-* sw_64*-*-* ia64-*-* mips*-*-* or1k*-*-* pdp11*-*-* powerpc*-*-* sparc*-*-* s390*-*-* visium-*-* msp430-*-* nvptx*-*-*} } } } */
+TEST (short, SHRT_MIN + 2, ALLOC_MAX); /* { dg-warning "argument 1 range \\\[13, \[0-9\]+\\\] exceeds maximum object size 12" "missing range info for short" { xfail { ! { aarch64*-*-* arm*-*-* alpha*-*-* sw_64*-*-* avr-*-* ia64-*-* mips*-*-* or1k*-*-* pdp11*-*-* powerpc*-*-* sparc*-*-* s390x-*-* visium-*-* msp430-*-* nvptx*-*-* } } } } */
TEST (int, INT_MIN + 2, ALLOC_MAX); /* { dg-warning "argument 1 range \\\[13, \[0-9\]+\\\] exceeds maximum object size 12" } */
TEST (int, -3, ALLOC_MAX); /* { dg-warning "argument 1 range \\\[13, \[0-9\]+\\\] exceeds maximum object size 12" } */
TEST (int, -2, ALLOC_MAX); /* { dg-warning "argument 1 range \\\[13, \[0-9\]+\\\] exceeds maximum object size 12" } */
diff --git a/gcc/testsuite/gcc.dg/cpp/assert4.c b/gcc/testsuite/gcc.dg/cpp/assert4.c
index 92e3dba5c..1b40ddeb6 100644
--- a/gcc/testsuite/gcc.dg/cpp/assert4.c
+++ b/gcc/testsuite/gcc.dg/cpp/assert4.c
@@ -151,8 +151,8 @@
|| (!defined __alpha_ev4__ && #cpu(ev4))
# error
# endif
-#elif #cpu(alpha) || #machine(alpha) || #cpu(cix) || #cpu(fix) || #cpu(bwx) \
- || #cpu(max) || #cpu(ev6) || #cpu(ev5) || #cpu(ev4)
+#elif (#cpu(alpha) || #machine(alpha) || #cpu(cix) || #cpu(fix) || #cpu(bwx) \
+ || #cpu(max) || #cpu(ev6) || #cpu(ev5) || #cpu(ev4)) && !#cpu(sw_64)
# error
#endif
diff --git a/gcc/testsuite/gcc.dg/pr44194-1.c b/gcc/testsuite/gcc.dg/pr44194-1.c
index a38270b79..13709e727 100644
--- a/gcc/testsuite/gcc.dg/pr44194-1.c
+++ b/gcc/testsuite/gcc.dg/pr44194-1.c
@@ -1,4 +1,4 @@
-/* { dg-do compile { target { { { { { { { { { { i?86-*-* x86_64-*-* } && x32 } || lp64 } && { ! s390*-*-* } } && { ! hppa*64*-*-* } } && { ! alpha*-*-* } } && { { ! powerpc*-*-linux* } || powerpc_elfv2 } } && { ! nvptx-*-* } } } } } } */
+/* { dg-do compile { target { { { { { { { { { { i?86-*-* x86_64-*-* } && x32 } || lp64 } && { ! s390*-*-* } } && { ! hppa*64*-*-* } } && { ! alpha*-*-* } } && { ! sw_64*-*-* } } && { { ! powerpc*-*-linux* } || powerpc_elfv2 } } && { ! nvptx-*-* } } } } } */
/* { dg-skip-if "returns that struct in memory" { mmix-*-* } } */
/* { dg-options "-O2 -fdump-rtl-dse1 -fdump-rtl-final" } */
diff --git a/gcc/testsuite/gcc.dg/stack-usage-1.c b/gcc/testsuite/gcc.dg/stack-usage-1.c
index 21cce0f44..618be5603 100644
--- a/gcc/testsuite/gcc.dg/stack-usage-1.c
+++ b/gcc/testsuite/gcc.dg/stack-usage-1.c
@@ -31,6 +31,8 @@
# define SIZE 192
#elif defined (__alpha__)
# define SIZE 240
+#elif defined (__sw_64__)
+# define SIZE 240
#elif defined (__ia64__)
# define SIZE 272
#elif defined(__mips__)
diff --git a/gcc/testsuite/gcc.dg/torture/restrict-8.c b/gcc/testsuite/gcc.dg/torture/restrict-8.c
index 0118de013..7cb50980c 100644
--- a/gcc/testsuite/gcc.dg/torture/restrict-8.c
+++ b/gcc/testsuite/gcc.dg/torture/restrict-8.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og -g" } { "" } } */
/* { dg-options "-fdump-tree-fre1" } */
struct S { int i; void *p; int j; };
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/20040204-1.c b/gcc/testsuite/gcc.dg/tree-ssa/20040204-1.c
index b9f8fd21a..ba487e689 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/20040204-1.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/20040204-1.c
@@ -33,4 +33,4 @@ void test55 (int x, int y)
that the && should be emitted (based on BRANCH_COST). Fix this
by teaching dom to look through && and register all components
as true. */
-/* { dg-final { scan-tree-dump-times "link_error" 0 "optimized" { xfail { ! "alpha*-*-* arm*-*-* aarch64*-*-* powerpc*-*-* cris-*-* hppa*-*-* i?86-*-* mmix-*-* mips*-*-* m68k*-*-* moxie-*-* nds32*-*-* s390*-*-* sh*-*-* sparc*-*-* visium-*-* x86_64-*-* riscv*-*-* or1k*-*-* msp430-*-* pru*-*-* nvptx*-*-*" } } } } */
+/* { dg-final { scan-tree-dump-times "link_error" 0 "optimized" { xfail { ! "alpha*-*-* sw_64*-*-* arm*-*-* aarch64*-*-* powerpc*-*-* cris-*-* hppa*-*-* i?86-*-* mmix-*-* mips*-*-* m68k*-*-* moxie-*-* nds32*-*-* s390*-*-* sh*-*-* sparc*-*-* visium-*-* x86_64-*-* riscv*-*-* or1k*-*-* msp430-*-* pru*-*-* nvptx*-*-*" } } } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-2.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-2.c
index a879d3059..8e9391c11 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-2.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-2.c
@@ -27,4 +27,4 @@ foo ()
but the loop reads only one element at a time, and DOM cannot resolve these.
The same happens on powerpc depending on the SIMD support available. */
-/* { dg-final { scan-tree-dump "return 28;" "optimized" { xfail { { alpha*-*-* hppa*64*-*-* nvptx*-*-* mmix-knuth-mmixware } || { { { lp64 && { powerpc*-*-* sparc*-*-* riscv*-*-* } } || aarch64_sve } || { arm*-*-* && { ! arm_neon } } } } } } } */
+/* { dg-final { scan-tree-dump "return 28;" "optimized" { xfail { { alpha*-*-* sw_64*-*-* hppa*64*-*-* nvptx*-*-* mmix-knuth-mmixware } || { { { lp64 && { powerpc*-*-* sparc*-*-* riscv*-*-* } } || aarch64_sve } || { arm*-*-* && { ! arm_neon } } } } } } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/stdarg-2.c b/gcc/testsuite/gcc.dg/tree-ssa/stdarg-2.c
index 0224997f1..d6bda1658 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/stdarg-2.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/stdarg-2.c
@@ -23,7 +23,7 @@ f1 (int i, ...)
}
/* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save 0 GPR units and 0 FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
/* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save 0 GPR units and 0 FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
-/* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save 0 GPR units and 0 FPR units" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save 0 GPR units and 0 FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
/* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save 0 GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */
/* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save 0 GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */
/* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save 0 GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
@@ -44,7 +44,7 @@ f2 (int i, ...)
architecture or bytes on 64-bit architecture. */
/* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save \[148\] GPR units and 0 FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
/* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save \[148\] GPR units and 0 FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
-/* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save 8 GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save 8 GPR units and 1" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
/* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save 1 GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */
/* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save 8 GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */
/* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save \[148\] GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
@@ -63,7 +63,7 @@ f3 (int i, ...)
/* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 0 GPR units and \[1-9\]\[0-9\]* FPR units" "stdarg" { target { powerpc*-*-linux* && { powerpc_fprs && ilp32 } } } } } */
/* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 0 GPR units and 1 FPR units" "stdarg" { target s390*-*-linux* } } } */
/* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 0 GPR units and 16 FPR units" "stdarg" { target aarch64*-*-* } } } */
-/* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 8 GPR units and 2" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 8 GPR units and 2" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
/* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save \[1-9\]\[0-9\]* GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
/* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save \[1-9\]\[0-9\]* GPR units" "stdarg" { target ia64-*-* } } } */
/* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save \[1-9\]\[0-9\]* GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */
@@ -79,7 +79,7 @@ f4 (int i, ...)
}
/* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
/* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
-/* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
/* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
/* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
/* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
@@ -98,7 +98,7 @@ f5 (int i, ...)
}
/* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
/* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
-/* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
/* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
/* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
/* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
@@ -119,7 +119,7 @@ f6 (int i, ...)
}
/* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save (3|12|24) GPR units and 0 FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
/* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save (3|12|24) GPR units and 0 FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
-/* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save 24 GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save 24 GPR units and 1" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
/* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save 3 GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */
/* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save 24 GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */
/* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save (3|12|24) GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
@@ -137,7 +137,7 @@ f7 (int i, ...)
}
/* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
/* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
-/* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
/* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
/* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
/* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
@@ -157,7 +157,7 @@ f8 (int i, ...)
}
/* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
/* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
-/* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
/* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
/* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
/* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
@@ -175,7 +175,7 @@ f9 (int i, ...)
}
/* { dg-final { scan-tree-dump "f9: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
/* { dg-final { scan-tree-dump "f9: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
-/* { dg-final { scan-tree-dump "f9: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f9: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
/* { dg-final { scan-tree-dump "f9: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
/* { dg-final { scan-tree-dump "f9: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
/* { dg-final { scan-tree-dump "f9: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
@@ -195,7 +195,7 @@ f10 (int i, ...)
}
/* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
/* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
-/* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
/* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
/* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
/* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
@@ -216,7 +216,7 @@ f11 (int i, ...)
}
/* { dg-final { scan-tree-dump "f11: va_list escapes 0, needs to save (3|12|24) GPR units and 0 FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
/* { dg-final { scan-tree-dump "f11: va_list escapes 0, needs to save (3|12|24) GPR units and 0 FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
-/* { dg-final { scan-tree-dump "f11: va_list escapes 0, needs to save 24 GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f11: va_list escapes 0, needs to save 24 GPR units and 1" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
/* { dg-final { scan-tree-dump "f11: va_list escapes 0, needs to save 3 GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */
/* { dg-final { scan-tree-dump "f11: va_list escapes 0, needs to save 24 GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */
/* { dg-final { scan-tree-dump "f11: va_list escapes 0, needs to save (3|12|24) GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
@@ -237,7 +237,7 @@ f12 (int i, ...)
}
/* { dg-final { scan-tree-dump "f12: va_list escapes 0, needs to save 0 GPR units and \[1-9\]\[0-9\]* FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
/* { dg-final { scan-tree-dump "f12: va_list escapes 0, needs to save 0 GPR units and \[1-9\]\[0-9\]* FPR units" "stdarg" { target { powerpc*-*-linux* && { powerpc_fprs && ilp32 } } } } } */
-/* { dg-final { scan-tree-dump "f12: va_list escapes 0, needs to save 24 GPR units and 2" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f12: va_list escapes 0, needs to save 24 GPR units and 2" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
/* { dg-final { scan-tree-dump "f12: va_list escapes 0, needs to save 0 GPR units and 3 FPR units" "stdarg" { target s390*-*-linux* } } } */
/* { dg-final { scan-tree-dump "f12: va_list escapes 0, needs to save 0 GPR units and 48 FPR units" "stdarg" { target aarch64*-*-* } } } */
/* { dg-final { scan-tree-dump "f12: va_list escapes 0, needs to save \[1-9]\[0-9\]* GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
@@ -258,7 +258,7 @@ f13 (int i, ...)
}
/* { dg-final { scan-tree-dump "f13: va_list escapes 0, needs to save 0 GPR units and \[1-9\]\[0-9\]* FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
/* { dg-final { scan-tree-dump "f13: va_list escapes 0, needs to save 0 GPR units and \[1-9\]\[0-9\]* FPR units" "stdarg" { target { powerpc*-*-linux* && { powerpc_fprs && ilp32 } } } } } */
-/* { dg-final { scan-tree-dump "f13: va_list escapes 0, needs to save 24 GPR units and 2" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f13: va_list escapes 0, needs to save 24 GPR units and 2" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
/* { dg-final { scan-tree-dump "f13: va_list escapes 0, needs to save 0 GPR units and 3 FPR units" "stdarg" { target s390*-*-linux* } } } */
/* { dg-final { scan-tree-dump "f13: va_list escapes 0, needs to save 0 GPR units and 48 FPR units" "stdarg" { target aarch64*-*-* } } } */
/* { dg-final { scan-tree-dump "f13: va_list escapes 0, needs to save \[1-9]\[0-9\]* GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
@@ -279,7 +279,7 @@ f14 (int i, ...)
}
/* { dg-final { scan-tree-dump "f14: va_list escapes 0, needs to save \[148\] GPR units and \[1-9\]\[0-9\]* FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
/* { dg-final { scan-tree-dump "f14: va_list escapes 0, needs to save \[148\] GPR units and \[1-9\]\[0-9\]* FPR units" "stdarg" { target { powerpc*-*-linux* && { powerpc_fprs && ilp32 } } } } } */
-/* { dg-final { scan-tree-dump "f14: va_list escapes 0, needs to save 24 GPR units and 3" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f14: va_list escapes 0, needs to save 24 GPR units and 3" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
/* { dg-final { scan-tree-dump "f14: va_list escapes 0, needs to save 1 GPR units and 2 FPR units" "stdarg" { target s390*-*-linux* } } } */
/* { dg-final { scan-tree-dump "f14: va_list escapes 0, needs to save 8 GPR units and 32 FPR units" "stdarg" { target aarch64*-*-* } } } */
/* { dg-final { scan-tree-dump "f14: va_list escapes 0, needs to save \[1-9]\[0-9\]* GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
@@ -308,7 +308,7 @@ f15 (int i, ...)
/* { dg-final { scan-tree-dump "f15: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
/* We may be able to improve upon this after fixing PR66010/PR66013. */
-/* { dg-final { scan-tree-dump "f15: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f15: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
/* { dg-final { scan-tree-dump-not "f15: va_list escapes 0, needs to save 0 GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
/* { dg-final { scan-tree-dump-not "f15: va_list escapes 0, needs to save 0 GPR units" "stdarg" { target ia64-*-* } } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/stdarg-3.c b/gcc/testsuite/gcc.dg/tree-ssa/stdarg-3.c
index d044654e0..d92290bb0 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/stdarg-3.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/stdarg-3.c
@@ -22,7 +22,7 @@ f1 (int i, ...)
}
/* { dg-final { scan-tree-dump "f1: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 } } } } } } */
/* { dg-final { scan-tree-dump "f1: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
-/* { dg-final { scan-tree-dump "f1: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f1: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
/* { dg-final { scan-tree-dump "f1: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
/* { dg-final { scan-tree-dump "f1: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
/* { dg-final { scan-tree-dump "f1: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
@@ -38,7 +38,7 @@ f2 (int i, ...)
}
/* { dg-final { scan-tree-dump "f2: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 } } } } } } */
/* { dg-final { scan-tree-dump "f2: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
-/* { dg-final { scan-tree-dump "f2: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f2: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
/* { dg-final { scan-tree-dump "f2: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
/* { dg-final { scan-tree-dump "f2: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
/* { dg-final { scan-tree-dump "f2: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
@@ -57,7 +57,7 @@ f3 (int i, ...)
}
/* { dg-final { scan-tree-dump "f3: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 } } } } } } */
/* { dg-final { scan-tree-dump "f3: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
-/* { dg-final { scan-tree-dump "f3: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f3: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
/* { dg-final { scan-tree-dump "f3: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
/* { dg-final { scan-tree-dump "f3: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
/* { dg-final { scan-tree-dump "f3: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
@@ -74,7 +74,7 @@ f4 (int i, ...)
}
/* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 } } } } } } */
/* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
-/* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
/* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
/* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
/* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
@@ -91,7 +91,7 @@ f5 (int i, ...)
}
/* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 } } } } } } */
/* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
-/* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
/* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
/* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
/* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
@@ -110,7 +110,7 @@ f6 (int i, ...)
}
/* { dg-final { scan-tree-dump "f6: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 } } } } } } */
/* { dg-final { scan-tree-dump "f6: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
-/* { dg-final { scan-tree-dump "f6: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f6: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
/* { dg-final { scan-tree-dump "f6: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
/* { dg-final { scan-tree-dump "f6: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
/* { dg-final { scan-tree-dump "f6: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
@@ -127,7 +127,7 @@ f7 (int i, ...)
}
/* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 } } } } } } */
/* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
-/* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
/* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
/* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
/* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
@@ -144,7 +144,7 @@ f8 (int i, ...)
}
/* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 } } } } } } */
/* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
-/* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
/* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
/* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
/* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
@@ -161,7 +161,7 @@ f10 (int i, ...)
}
/* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 } } } } } } */
/* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
-/* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
/* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
/* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
/* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
@@ -178,7 +178,7 @@ f11 (int i, ...)
}
/* { dg-final { scan-tree-dump "f11: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 } } } } } } */
/* { dg-final { scan-tree-dump "f11: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
-/* { dg-final { scan-tree-dump "f11: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f11: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
/* { dg-final { scan-tree-dump "f11: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
/* { dg-final { scan-tree-dump "f11: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
/* { dg-final { scan-tree-dump "f11: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
@@ -195,7 +195,7 @@ f12 (int i, ...)
}
/* { dg-final { scan-tree-dump "f12: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 } } } } } } */
/* { dg-final { scan-tree-dump "f12: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
-/* { dg-final { scan-tree-dump "f12: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f12: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
/* { dg-final { scan-tree-dump "f12: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
/* { dg-final { scan-tree-dump "f12: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
/* { dg-final { scan-tree-dump "f12: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/stdarg-4.c b/gcc/testsuite/gcc.dg/tree-ssa/stdarg-4.c
index 1a637d6ef..8b2f38929 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/stdarg-4.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/stdarg-4.c
@@ -25,7 +25,7 @@ f1 (int i, ...)
}
/* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save all GPR units and 0 FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { x32 || { ! { ia32 || llp64 } } } } } } } */
/* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save all GPR units and 0 FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
-/* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save all GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save all GPR units and 1" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
/* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save all GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */
/* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save all GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */
/* { dg-final { scan-tree-dump "f1: va_list escapes \[01\], needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
@@ -43,7 +43,7 @@ f2 (int i, ...)
}
/* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save 0 GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { x32 || { ! { ia32 || llp64 } } } } } } } */
/* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save 0 GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && { powerpc_fprs && ilp32 } } } } } */
-/* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save all GPR units and 2" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save all GPR units and 2" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
/* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save 0 GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
/* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save 0 GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
/* { dg-final { scan-tree-dump "f2: va_list escapes \[01\], needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
@@ -67,7 +67,7 @@ f3 (int i, ...)
}
/* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save \[148\] GPR units and 0 FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
/* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save \[148\] GPR units and 0 FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
-/* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 8 GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 8 GPR units and 1" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
/* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 1 GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */
/* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 8 GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */
/* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save \[148\] GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
@@ -89,7 +89,7 @@ f4 (int i, ...)
}
/* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 0 GPR units and \[1-9\]\[0-9\]* FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
/* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 0 GPR units and \[1-9\]\[0-9\]* FPR units" "stdarg" { target { powerpc*-*-linux* && { powerpc_fprs && ilp32 } } } } } */
-/* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 8 GPR units and 2" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 8 GPR units and 2" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
/* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 0 GPR units and 1 FPR units" "stdarg" { target s390*-*-linux* } } } */
/* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 0 GPR units and 16 FPR units" "stdarg" { target aarch64*-*-* } } } */
/* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save \[148\] GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/stdarg-5.c b/gcc/testsuite/gcc.dg/tree-ssa/stdarg-5.c
index c8ad4fe32..c3eba1e21 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/stdarg-5.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/stdarg-5.c
@@ -23,7 +23,7 @@ f1 (int i, ...)
va_end (ap);
}
/* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save 0 GPR units and 0 FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
-/* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save all GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save all GPR units and 1" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
/* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save all GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */
/* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save all GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */
@@ -37,7 +37,7 @@ f2 (int i, ...)
va_end (ap);
}
/* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
-/* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save all GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save all GPR units and 1" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
/* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save all GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */
/* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save all GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */
@@ -56,7 +56,7 @@ f3 (int i, ...)
}
}
/* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 0 GPR units and 0 FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
-/* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 32 GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 32 GPR units and 1" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
/* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 1 GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */
/* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 8 GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */
@@ -75,7 +75,7 @@ f4 (int i, ...)
}
}
/* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 16 GPR units and 16 FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
-/* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 24 GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 24 GPR units and 1" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
/* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 2 GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */
/* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 24 GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */
@@ -90,7 +90,7 @@ f5 (int i, ...)
bar (__real__ ci + __imag__ ci);
}
/* { dg-final { scan-tree-dump "f5: va_list escapes 0, needs to save 16 GPR units and 0 FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
-/* { dg-final { scan-tree-dump "f5: va_list escapes 0, needs to save 32 GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f5: va_list escapes 0, needs to save 32 GPR units and 1" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
/* { dg-final { scan-tree-dump "f5: va_list escapes 0, needs to save (4|2) GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */
/* { dg-final { scan-tree-dump "f5: va_list escapes 0, needs to save 16 GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */
@@ -105,7 +105,7 @@ f6 (int i, ...)
bar (__real__ ci + __imag__ cd);
}
/* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save 8 GPR units and 32 FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
-/* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save 32 GPR units and 3" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save 32 GPR units and 3" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
/* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save (3|2) GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */
/* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save 8 GPR units and 32 FPR units" "stdarg" { target aarch64*-*-* } } } */
@@ -120,6 +120,6 @@ f7 (int i, ...)
bar (__real__ cd + __imag__ cd);
}
/* { dg-final { scan-tree-dump "f7: va_list escapes 0, needs to save 0 GPR units and 64 FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
-/* { dg-final { scan-tree-dump "f7: va_list escapes 0, needs to save 32 GPR units and 2" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f7: va_list escapes 0, needs to save 32 GPR units and 2" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
/* { dg-final { scan-tree-dump "f7: va_list escapes 0, needs to save 2 GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */
/* { dg-final { scan-tree-dump "f7: va_list escapes 0, needs to save 0 GPR units and 64 FPR units" "stdarg" { target aarch64*-*-* } } } */
diff --git a/gcc/testsuite/go.test/go-test.exp b/gcc/testsuite/go.test/go-test.exp
index 11c178ad7..d405316e1 100644
--- a/gcc/testsuite/go.test/go-test.exp
+++ b/gcc/testsuite/go.test/go-test.exp
@@ -174,6 +174,9 @@ proc go-set-goarch { } {
"alpha*-*-*" {
set goarch "alpha"
}
+ "sw_64*-*-*" {
+ set goarch "sw_64"
+ }
"arm*-*-*" -
"ep9312*-*-*" -
"strongarm*-*-*" -
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 292737dae..534405aa7 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -3526,6 +3526,7 @@ proc check_effective_target_vect_cmdline_needed { } {
return [check_cached_effective_target vect_cmdline_needed {
if { [istarget alpha*-*-*]
|| [istarget ia64-*-*]
+ || [istarget sw_64-*-*]
|| (([istarget i?86-*-*] || [istarget x86_64-*-*])
&& ![is-effective-target ia32])
|| ([istarget powerpc*-*-*]
@@ -3554,6 +3555,7 @@ proc check_effective_target_vect_int { } {
|| [istarget amdgcn-*-*]
|| [istarget sparc*-*-*]
|| [istarget alpha*-*-*]
+ || [istarget sw_64*-*-*]
|| [istarget ia64-*-*]
|| [istarget aarch64*-*-*]
|| [is-effective-target arm_neon]
@@ -7019,6 +7021,7 @@ proc check_effective_target_vect_no_int_min_max { } {
return [check_cached_effective_target_indexed vect_no_int_min_max {
expr { [istarget sparc*-*-*]
|| [istarget alpha*-*-*]
+ || [istarget sw_64*-*-*]
|| ([istarget mips*-*-*]
&& [et-is-effective-target mips_loongson_mmi]) }}]
}
@@ -7031,7 +7034,7 @@ proc check_effective_target_vect_no_int_min_max { } {
proc check_effective_target_vect_no_int_add { } {
# Alpha only supports vector add on V8QI and V4HI.
return [check_cached_effective_target_indexed vect_no_int_add {
- expr { [istarget alpha*-*-*] }}]
+ expr { [istarget alpha*-*-*] || [istarget sw_64*-*-*] }}]
}
# Return 1 if the target plus current options does not support vector
@@ -8551,6 +8554,7 @@ proc check_effective_target_sync_long_long { } {
|| [istarget aarch64*-*-*]
|| [istarget arm*-*-*]
|| [istarget alpha*-*-*]
+ || [istarget sw_64*-*-*]
|| ([istarget sparc*-*-*] && [check_effective_target_lp64])
|| [istarget s390*-*-*] } {
return 1
@@ -8632,6 +8636,7 @@ proc check_effective_target_sync_long_long_runtime { } {
}
} "" ])
|| [istarget alpha*-*-*]
+ || [istarget sw_64*-*-*]
|| ([istarget sparc*-*-*]
&& [check_effective_target_lp64]
&& [check_effective_target_ultrasparc_hw])
@@ -8648,6 +8653,7 @@ proc check_effective_target_bswap { } {
return [check_cached_effective_target bswap {
expr { [istarget aarch64*-*-*]
|| [istarget alpha*-*-*]
+ || [istarget sw_64*-*-*]
|| [istarget i?86-*-*] || [istarget x86_64-*-*]
|| [istarget m68k-*-*]
|| [istarget powerpc*-*-*]
@@ -8672,6 +8678,7 @@ proc check_effective_target_sync_int_long { } {
|| [istarget i?86-*-*] || [istarget x86_64-*-*]
|| [istarget aarch64*-*-*]
|| [istarget alpha*-*-*]
+ || [istarget sw_64*-*-*]
|| [istarget arm*-*-linux-*]
|| [istarget arm*-*-uclinuxfdpiceabi]
|| ([istarget arm*-*-*]
@@ -8708,6 +8715,7 @@ proc check_effective_target_sync_char_short { } {
|| [istarget ia64-*-*]
|| [istarget i?86-*-*] || [istarget x86_64-*-*]
|| [istarget alpha*-*-*]
+ || [istarget sw_64*-*-*]
|| [istarget arm*-*-linux-*]
|| [istarget arm*-*-uclinuxfdpiceabi]
|| ([istarget arm*-*-*]
@@ -9149,6 +9157,7 @@ proc check_effective_target_fd_truncate { } {
proc add_options_for_ieee { flags } {
if { [istarget alpha*-*-*]
+ || [istarget sw_64*-*-*]
|| [istarget sh*-*-*] } {
return "$flags -mieee"
}
--
2.25.1

View File

@ -0,0 +1,29 @@
From 76693fb016acae2a7a1e130e196a5793f2b2f23b Mon Sep 17 00:00:00 2001
From: swcompiler <lc@wxiat.com>
Date: Mon, 25 Nov 2024 16:50:36 +0800
Subject: [PATCH 05/16] Sw64 Port: libatomic
---
libatomic/configure.tgt | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/libatomic/configure.tgt b/libatomic/configure.tgt
index 33f8c91ce..f9dbd2d35 100644
--- a/libatomic/configure.tgt
+++ b/libatomic/configure.tgt
@@ -81,6 +81,12 @@ case "${target_cpu}" in
ARCH=sparc
;;
+ sw_64*)
+ # fenv.c needs this option to generate inexact exceptions.
+ XCFLAGS="${XCFLAGS} -mfp-trap-mode=sui"
+ ARCH=sw_64
+ ;;
+
i[3456]86 | x86_64)
cat > conftestx.c <<EOF
#ifndef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4
--
2.25.1

497
0006-Sw64-Port-libgcc.patch Normal file
View File

@ -0,0 +1,497 @@
From d78f11c5eeabd79f798399a345e92b065c4e7e75 Mon Sep 17 00:00:00 2001
From: swcompiler <lc@wxiat.com>
Date: Mon, 25 Nov 2024 16:51:15 +0800
Subject: [PATCH 06/16] Sw64 Port: libgcc
---
libgcc/config.host | 18 +++
libgcc/config/sw_64/crtfastmath.c | 36 +++++
libgcc/config/sw_64/libgcc-sw_64-ldbl.ver | 50 ++++++
libgcc/config/sw_64/linux-unwind.h | 103 ++++++++++++
libgcc/config/sw_64/qrnnd.S | 181 ++++++++++++++++++++++
libgcc/config/sw_64/t-ieee | 2 +
libgcc/config/sw_64/t-linux | 1 +
libgcc/config/sw_64/t-sw_64 | 6 +
libgcc/libgcc2.c | 2 +-
9 files changed, 398 insertions(+), 1 deletion(-)
create mode 100644 libgcc/config/sw_64/crtfastmath.c
create mode 100644 libgcc/config/sw_64/libgcc-sw_64-ldbl.ver
create mode 100644 libgcc/config/sw_64/linux-unwind.h
create mode 100644 libgcc/config/sw_64/qrnnd.S
create mode 100644 libgcc/config/sw_64/t-ieee
create mode 100644 libgcc/config/sw_64/t-linux
create mode 100644 libgcc/config/sw_64/t-sw_64
diff --git a/libgcc/config.host b/libgcc/config.host
index 8c56fcae5..01cb28d2e 100644
--- a/libgcc/config.host
+++ b/libgcc/config.host
@@ -212,6 +212,9 @@ s390*-*-*)
sh[123456789lbe]*-*-*)
cpu_type=sh
;;
+sw_64*-*-*)
+ cpu_type=sw_64
+ ;;
tilegx*-*-*)
cpu_type=tilegx
;;
@@ -1467,6 +1470,21 @@ sparc64-*-linux*) # 64-bit SPARC's running GNU/Linux
;;
sparc64-*-netbsd*)
;;
+sw_64*-*-linux*)
+ tmake_file="${tmake_file} sw_64/t-sw_64 sw_64/t-ieee t-crtfm sw_64/t-linux"
+ extra_parts="$extra_parts crtfastmath.o"
+ md_unwind_header=sw_64/linux-unwind.h
+ ;;
+sw_64*-*-freebsd*)
+ tmake_file="${tmake_file} sw_64/t-sw_64 sw_64/t-ieee t-crtfm"
+ extra_parts="$extra_parts crtbeginT.o crtfastmath.o"
+ ;;
+sw_64*-*-netbsd*)
+ tmake_file="${tmake_file} sw_64/t-sw_64 sw_64/t-ieee"
+ ;;
+sw_64*-*-openbsd*)
+ tmake_file="${tmake_file} sw_64/t-sw_64 sw_64/t-ieee"
+ ;;
tic6x-*-uclinux)
tmake_file="${tmake_file} t-softfp-sfdf t-softfp-excl t-softfp \
c6x/t-elf c6x/t-uclinux t-crtstuff-pic t-libgcc-pic \
diff --git a/libgcc/config/sw_64/crtfastmath.c b/libgcc/config/sw_64/crtfastmath.c
new file mode 100644
index 000000000..aec92c819
--- /dev/null
+++ b/libgcc/config/sw_64/crtfastmath.c
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2001-2022 Free Software Foundation, Inc.
+ * Contributed by Richard Henderson (rth@redhat.com)
+ *
+ * This file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option) any
+ * later version.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
+ */
+
+/* Assume SYSV/1 compatible interfaces. */
+
+extern void
+__ieee_set_fp_control (unsigned long int);
+
+#define IEEE_MAP_DMZ (1UL << 12) /* Map denorm inputs to zero */
+#define IEEE_MAP_UMZ (1UL << 13) /* Map underflowed outputs to zero */
+
+static void __attribute__ ((constructor)) set_fast_math (void)
+{
+ __ieee_set_fp_control (IEEE_MAP_DMZ | IEEE_MAP_UMZ);
+}
diff --git a/libgcc/config/sw_64/libgcc-sw_64-ldbl.ver b/libgcc/config/sw_64/libgcc-sw_64-ldbl.ver
new file mode 100644
index 000000000..6666bc639
--- /dev/null
+++ b/libgcc/config/sw_64/libgcc-sw_64-ldbl.ver
@@ -0,0 +1,50 @@
+# Copyright (C) 2006-2019 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3. If not see
+# <http://www.gnu.org/licenses/>.
+
+%ifdef __LONG_DOUBLE_128__
+
+# long double 128 bit support in libgcc_s.so.1 is only available
+# when configured with --with-long-double-128. Make sure all the
+# symbols are available at @@GCC_LDBL_* versions to make it clear
+# there is a configurable symbol set.
+
+%exclude {
+ __fixtfdi
+ __fixunstfdi
+ __floatditf
+
+ __divtc3
+ __multc3
+ __powitf2
+}
+
+%inherit GCC_LDBL_3.0 GCC_3.0
+GCC_LDBL_3.0 {
+ __fixtfdi
+ __fixunstfdi
+ __floatditf
+}
+
+%inherit GCC_LDBL_4.0.0 GCC_4.0.0
+GCC_LDBL_4.0.0 {
+ __divtc3
+ __multc3
+ __powitf2
+}
+
+%endif
diff --git a/libgcc/config/sw_64/linux-unwind.h b/libgcc/config/sw_64/linux-unwind.h
new file mode 100644
index 000000000..d446c123f
--- /dev/null
+++ b/libgcc/config/sw_64/linux-unwind.h
@@ -0,0 +1,103 @@
+/* DWARF2 EH unwinding support for Sw_64 Linux.
+ Copyright (C) 2004-2022 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#ifndef inhibit_libc
+/* Do code reading to identify a signal frame, and set the frame
+ state data appropriately. See unwind-dw2.c for the structs. */
+
+#include <signal.h>
+#include <sys/ucontext.h>
+
+#define MD_FALLBACK_FRAME_STATE_FOR sw_64_fallback_frame_state
+
+static _Unwind_Reason_Code
+sw_64_fallback_frame_state (struct _Unwind_Context *context,
+ _Unwind_FrameState *fs)
+{
+ unsigned int *pc = context->ra;
+ struct sigcontext *sc;
+ long new_cfa;
+ int i;
+
+ if (pc[0] != 0x47fe0410 /* mov $30,$16 */
+ || pc[2] != 0x00000083) /* callsys */
+ return _URC_END_OF_STACK;
+ if (context->cfa == 0)
+ return _URC_END_OF_STACK;
+ if (pc[1] == 0x201f0067) /* lda $0,NR_sigreturn */
+ sc = context->cfa;
+ else if (pc[1] == 0x201f015f) /* lda $0,NR_rt_sigreturn */
+ {
+ struct rt_sigframe
+ {
+ siginfo_t info;
+ ucontext_t uc;
+ } *rt_ = context->cfa;
+ /* The void * cast is necessary to avoid an aliasing warning.
+ The aliasing warning is correct, but should not be a problem
+ because it does not alias anything. */
+ sc = (struct sigcontext *) (void *) &rt_->uc.uc_mcontext;
+ }
+ else
+ return _URC_END_OF_STACK;
+
+ new_cfa = sc->sc_regs[30];
+ fs->regs.cfa_how = CFA_REG_OFFSET;
+ fs->regs.cfa_reg = 30;
+ fs->regs.cfa_offset = new_cfa - (long) context->cfa;
+ for (i = 0; i < 30; ++i)
+ {
+ fs->regs.reg[i].how = REG_SAVED_OFFSET;
+ fs->regs.reg[i].loc.offset = (long) &sc->sc_regs[i] - new_cfa;
+ }
+ for (i = 0; i < 31; ++i)
+ {
+ fs->regs.reg[i + 32].how = REG_SAVED_OFFSET;
+ fs->regs.reg[i + 32].loc.offset = (long) &sc->sc_fpregs[i] - new_cfa;
+ }
+ fs->regs.reg[64].how = REG_SAVED_OFFSET;
+ fs->regs.reg[64].loc.offset = (long) &sc->sc_pc - new_cfa;
+ fs->retaddr_column = 64;
+ fs->signal_frame = 1;
+
+ return _URC_NO_REASON;
+}
+
+#define MD_FROB_UPDATE_CONTEXT sw_64_frob_update_context
+
+/* Fix up for signal handlers that don't have S flag set. */
+
+static void
+sw_64_frob_update_context (struct _Unwind_Context *context,
+ _Unwind_FrameState *fs ATTRIBUTE_UNUSED)
+{
+ unsigned int *pc = context->ra;
+
+ if (pc[0] == 0x47fe0410 /* mov $30,$16 */
+ && pc[2] == 0x00000083 /* callsys */
+ && (pc[1] == 0x201f0067 /* lda $0,NR_sigreturn */
+ || pc[1] == 0x201f015f)) /* lda $0,NR_rt_sigreturn */
+ _Unwind_SetSignalFrame (context, 1);
+}
+#endif
diff --git a/libgcc/config/sw_64/qrnnd.S b/libgcc/config/sw_64/qrnnd.S
new file mode 100644
index 000000000..ab2e3d0bc
--- /dev/null
+++ b/libgcc/config/sw_64/qrnnd.S
@@ -0,0 +1,181 @@
+ # Sw_64 __udiv_qrnnd
+ # Copyright (C) 1992-2022 Free Software Foundation, Inc.
+
+ # This file is part of GCC.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU General Public License as published by
+ # the Free Software Foundation; either version 3 of the License, or (at your
+ # option) any later version.
+
+ # This file is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+ # License for more details.
+
+ # Under Section 7 of GPL version 3, you are granted additional
+ # permissions described in the GCC Runtime Library Exception, version
+ # 3.1, as published by the Free Software Foundation.
+
+ # You should have received a copy of the GNU General Public License and
+ # a copy of the GCC Runtime Library Exception along with this program;
+ # see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ # <http://www.gnu.org/licenses/>.
+
+#ifdef __ELF__
+.section .note.GNU-stack,""
+#endif
+
+ .set noreorder
+ .set noat
+
+ .text
+
+ .globl __udiv_qrnnd
+ .ent __udiv_qrnnd
+#ifdef __VMS__
+__udiv_qrnnd..en:
+ .frame $29,0,$26,0
+ .prologue
+#else
+__udiv_qrnnd:
+ .frame $30,0,$26,0
+ .prologue 0
+#endif
+/*
+ ldiq -> ldi
+ addq->addl
+ subq->subl
+ cmovne qb,tmp,n1->selne qb,tmp,n1,n1
+ stq ->stl
+ cmoveq tmp,AT,n1(n0)->seleq tmp,AT,n1,n1(n0,n0) */
+#define cnt $2
+#define tmp $3
+#define rem_ptr $16
+#define n1 $17
+#define n0 $18
+#define d $19
+#define qb $20
+#define AT $at
+
+ ldi cnt,16
+ blt d,$largedivisor
+
+$loop1: cmplt n0,0,tmp
+ addl n1,n1,n1
+ bis n1,tmp,n1
+ addl n0,n0,n0
+ cmpule d,n1,qb
+ subl n1,d,tmp
+ selne qb,tmp,n1,n1
+ bis n0,qb,n0
+ cmplt n0,0,tmp
+ addl n1,n1,n1
+ bis n1,tmp,n1
+ addl n0,n0,n0
+ cmpule d,n1,qb
+ subl n1,d,tmp
+ selne qb,tmp,n1,n1
+ bis n0,qb,n0
+ cmplt n0,0,tmp
+ addl n1,n1,n1
+ bis n1,tmp,n1
+ addl n0,n0,n0
+ cmpule d,n1,qb
+ subl n1,d,tmp
+ selne qb,tmp,n1,n1
+ bis n0,qb,n0
+ cmplt n0,0,tmp
+ addl n1,n1,n1
+ bis n1,tmp,n1
+ addl n0,n0,n0
+ cmpule d,n1,qb
+ subl n1,d,tmp
+ selne qb,tmp,n1,n1
+ bis n0,qb,n0
+ subl cnt,1,cnt
+ bgt cnt,$loop1
+ stl n1,0(rem_ptr)
+ bis $31,n0,$0
+ ret $31,($26),1
+
+$largedivisor:
+ and n0,1,$4
+
+ srl n0,1,n0
+ sll n1,63,tmp
+ or tmp,n0,n0
+ srl n1,1,n1
+
+ and d,1,$6
+ srl d,1,$5
+ addl $5,$6,$5
+
+$loop2: cmplt n0,0,tmp
+ addl n1,n1,n1
+ bis n1,tmp,n1
+ addl n0,n0,n0
+ cmpule $5,n1,qb
+ subl n1,$5,tmp
+ selne qb,tmp,n1,n1
+ bis n0,qb,n0
+ cmplt n0,0,tmp
+ addl n1,n1,n1
+ bis n1,tmp,n1
+ addl n0,n0,n0
+ cmpule $5,n1,qb
+ subl n1,$5,tmp
+ selne qb,tmp,n1,n1
+ bis n0,qb,n0
+ cmplt n0,0,tmp
+ addl n1,n1,n1
+ bis n1,tmp,n1
+ addl n0,n0,n0
+ cmpule $5,n1,qb
+ subl n1,$5,tmp
+ selne qb,tmp,n1,n1
+ bis n0,qb,n0
+ cmplt n0,0,tmp
+ addl n1,n1,n1
+ bis n1,tmp,n1
+ addl n0,n0,n0
+ cmpule $5,n1,qb
+ subl n1,$5,tmp
+ selne qb,tmp,n1,n1
+ bis n0,qb,n0
+ subl cnt,1,cnt
+ bgt cnt,$loop2
+
+ addl n1,n1,n1
+ addl $4,n1,n1
+ bne $6,$Odd
+ stl n1,0(rem_ptr)
+ bis $31,n0,$0
+ ret $31,($26),1
+
+$Odd:
+ /* q' in n0. r' in n1 */
+ addl n1,n0,n1
+
+ cmpult n1,n0,tmp # tmp := carry from addl
+ subl n1,d,AT
+ addl n0,tmp,n0
+ selne tmp,AT,n1,n1
+
+ cmpult n1,d,tmp
+ addl n0,1,AT
+ seleq tmp,AT,n0,n0
+ subl n1,d,AT
+ seleq tmp,AT,n1,n1
+
+ stl n1,0(rem_ptr)
+ bis $31,n0,$0
+ ret $31,($26),1
+
+#ifdef __VMS__
+ .link
+ .align 3
+__udiv_qrnnd:
+ .pdesc __udiv_qrnnd..en,null
+#endif
+ .end __udiv_qrnnd
diff --git a/libgcc/config/sw_64/t-ieee b/libgcc/config/sw_64/t-ieee
new file mode 100644
index 000000000..9b66e50ac
--- /dev/null
+++ b/libgcc/config/sw_64/t-ieee
@@ -0,0 +1,2 @@
+# All sw_64s get an IEEE complaint set of libraries.
+#HOST_LIBGCC2_CFLAGS += -mieee
diff --git a/libgcc/config/sw_64/t-linux b/libgcc/config/sw_64/t-linux
new file mode 100644
index 000000000..fe9d20e9a
--- /dev/null
+++ b/libgcc/config/sw_64/t-linux
@@ -0,0 +1 @@
+SHLIB_MAPFILES += $(srcdir)/config/sw_64/libgcc-sw_64-ldbl.ver
diff --git a/libgcc/config/sw_64/t-sw_64 b/libgcc/config/sw_64/t-sw_64
new file mode 100644
index 000000000..dffba8ee7
--- /dev/null
+++ b/libgcc/config/sw_64/t-sw_64
@@ -0,0 +1,6 @@
+# This is a support routine for longlong.h, used by libgcc2.c.
+LIB2ADD += $(srcdir)/config/sw_64/qrnnd.S
+
+# When GAS-generated unwind tables are created, they get created
+# after the __FRAME_END__ terminator, which causes an ld error.
+CRTSTUFF_T_CFLAGS = -fno-unwind-tables
diff --git a/libgcc/libgcc2.c b/libgcc/libgcc2.c
index 3ebfcc83f..f01a150c4 100644
--- a/libgcc/libgcc2.c
+++ b/libgcc/libgcc2.c
@@ -2280,7 +2280,7 @@ int mprotect (char *,int, int);
int
getpagesize (void)
{
-#ifdef _ALPHA_
+#ifdef _ALPHA_ || defined _SW_64_
return 8192;
#else
return 4096;
--
2.25.1

1054
0007-Sw64-Port-libffi.patch Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,57 @@
From 5920c7d65bf452ddec031bfcbe610404324a38bc Mon Sep 17 00:00:00 2001
From: swcompiler <lc@wxiat.com>
Date: Mon, 25 Nov 2024 16:52:12 +0800
Subject: [PATCH 08/16] Sw64 Port: libgfortran
---
libgfortran/config/fpu-glibc.h | 6 +++---
libgfortran/configure.host | 2 ++
2 files changed, 5 insertions(+), 3 deletions(-)
diff --git a/libgfortran/config/fpu-glibc.h b/libgfortran/config/fpu-glibc.h
index 265ef6938..8a5eb3c99 100644
--- a/libgfortran/config/fpu-glibc.h
+++ b/libgfortran/config/fpu-glibc.h
@@ -446,7 +446,7 @@ set_fpu_state (void *state)
int
support_fpu_underflow_control (int kind __attribute__((unused)))
{
-#if defined(__alpha__) && defined(FE_MAP_UMZ)
+#if (defined(__alpha__) || defined(__sw_64__)) && defined(FE_MAP_UMZ)
return (kind == 4 || kind == 8) ? 1 : 0;
#else
return 0;
@@ -457,7 +457,7 @@ support_fpu_underflow_control (int kind __attribute__((unused)))
int
get_fpu_underflow_mode (void)
{
-#if defined(__alpha__) && defined(FE_MAP_UMZ)
+#if (defined(__alpha__) || defined(__sw_64__)) && defined(FE_MAP_UMZ)
fenv_t state = __ieee_get_fp_control ();
@@ -475,7 +475,7 @@ get_fpu_underflow_mode (void)
void
set_fpu_underflow_mode (int gradual __attribute__((unused)))
{
-#if defined(__alpha__) && defined(FE_MAP_UMZ)
+#if (defined(__alpha__) || defined(__sw_64__)) && defined(FE_MAP_UMZ)
fenv_t state = __ieee_get_fp_control ();
diff --git a/libgfortran/configure.host b/libgfortran/configure.host
index 3d6c2db77..ddd24ac12 100644
--- a/libgfortran/configure.host
+++ b/libgfortran/configure.host
@@ -71,6 +71,8 @@ case "${host_cpu}" in
ieee_flags="-mieee" ;;
sh*)
ieee_flags="-mieee" ;;
+ sw_64*)
+ ieee_flags="-mieee" ;;
esac
tmake_file=
--
2.25.1

629
0009-Sw64-Port-libgo.patch Normal file
View File

@ -0,0 +1,629 @@
From 9e32a64afd05cb18a5dcb09a27322e243cd245f4 Mon Sep 17 00:00:00 2001
From: swcompiler <lc@wxiat.com>
Date: Mon, 25 Nov 2024 16:52:50 +0800
Subject: [PATCH 09/16] Sw64 Port: libgo
---
libgo/configure | 7 +-
libgo/configure.ac | 7 +-
libgo/go/cmd/cgo/main.go | 2 +
libgo/go/cmd/internal/sys/arch.go | 11 +++
libgo/go/debug/elf/elf.go | 72 ++++++++++++++++++
libgo/go/debug/elf/elf_test.go | 1 +
libgo/go/debug/elf/file.go | 47 ++++++++++++
libgo/go/encoding/xml/xml.go | 1 +
libgo/go/go/build/syslist.go | 2 +-
.../syscall/unix/getrandom_linux_sw_64.go | 9 +++
.../syscall/unix/sysnum_linux_sw_64.go | 10 +++
libgo/go/net/listen_test.go | 2 +-
libgo/go/regexp/testdata/basic.dat | 1 +
libgo/go/runtime/hash64.go | 2 +-
libgo/go/runtime/lfstack_64bit.go | 2 +-
libgo/go/runtime/mpagealloc_64bit.go | 2 +-
libgo/go/syscall/endian_little.go | 2 +-
libgo/go/syscall/libcall_linux_sw_64.go | 13 ++++
libgo/go/syscall/syscall_linux_sw_64.go | 25 ++++++
libgo/goarch.sh | 5 ++
libgo/mksysinfo.sh | 5 ++
libgo/runtime/go-signal.c | 76 ++++++++++---------
22 files changed, 257 insertions(+), 47 deletions(-)
create mode 100644 libgo/go/internal/syscall/unix/getrandom_linux_sw_64.go
create mode 100644 libgo/go/internal/syscall/unix/sysnum_linux_sw_64.go
create mode 100644 libgo/go/syscall/libcall_linux_sw_64.go
create mode 100644 libgo/go/syscall/syscall_linux_sw_64.go
diff --git a/libgo/configure b/libgo/configure
index ffe17c9be..b90dd9dae 100755
--- a/libgo/configure
+++ b/libgo/configure
@@ -14124,10 +14124,10 @@ esac
# - libgo/go/syscall/endian_XX.go
# - possibly others
# - possibly update files in libgo/go/internal/syscall/unix
-ALLGOARCH="386 alpha amd64 amd64p32 arm armbe arm64 arm64be ia64 m68k mips mipsle mips64 mips64le mips64p32 mips64p32le nios2 ppc ppc64 ppc64le riscv riscv64 s390 s390x sh shbe sparc sparc64 wasm"
+ALLGOARCH="386 alpha amd64 amd64p32 arm armbe arm64 arm64be ia64 m68k mips mipsle mips64 mips64le mips64p32 mips64p32le nios2 ppc ppc64 ppc64le riscv riscv64 s390 s390x sh shbe sparc sparc64 sw_64 wasm"
# All known GOARCH family values.
-ALLGOARCHFAMILY="I386 ALPHA AMD64 ARM ARM64 IA64 M68K MIPS MIPS64 NIOS2 PPC PPC64 RISCV RISCV64 S390 S390X SH SPARC SPARC64 WASM"
+ALLGOARCHFAMILY="I386 ALPHA AMD64 ARM ARM64 IA64 M68K MIPS MIPS64 NIOS2 PPC PPC64 RISCV RISCV64 S390 S390X SH SPARC SPARC64 SW_64 WASM"
GOARCH=unknown
case ${host} in
@@ -14323,6 +14323,9 @@ else
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
;;
+ sw_64*-*-*)
+ GOARCH=sw_64
+ ;;
esac
diff --git a/libgo/configure.ac b/libgo/configure.ac
index 7e2b98ba6..9f903c64e 100644
--- a/libgo/configure.ac
+++ b/libgo/configure.ac
@@ -239,10 +239,10 @@ AC_SUBST(USE_DEJAGNU)
# - libgo/go/syscall/endian_XX.go
# - possibly others
# - possibly update files in libgo/go/internal/syscall/unix
-ALLGOARCH="386 alpha amd64 amd64p32 arm armbe arm64 arm64be ia64 m68k mips mipsle mips64 mips64le mips64p32 mips64p32le nios2 ppc ppc64 ppc64le riscv riscv64 s390 s390x sh shbe sparc sparc64 wasm"
+ALLGOARCH="386 alpha amd64 amd64p32 arm armbe arm64 arm64be ia64 m68k mips mipsle mips64 mips64le mips64p32 mips64p32le nios2 ppc ppc64 ppc64le riscv riscv64 s390 s390x sh shbe sparc sparc64 sw_64 wasm"
# All known GOARCH family values.
-ALLGOARCHFAMILY="I386 ALPHA AMD64 ARM ARM64 IA64 M68K MIPS MIPS64 NIOS2 PPC PPC64 RISCV RISCV64 S390 S390X SH SPARC SPARC64 WASM"
+ALLGOARCHFAMILY="I386 ALPHA SW_64 AMD64 ARM ARM64 IA64 M68K MIPS MIPS64 NIOS2 PPC PPC64 RISCV RISCV64 S390 S390X SH SPARC SPARC64 SW_64 WASM"
GOARCH=unknown
case ${host} in
@@ -370,6 +370,9 @@ AC_COMPILE_IFELSE([AC_LANG_SOURCE([
[GOARCH=sparc],
[GOARCH=sparc64])
;;
+ sw_64*-*-*)
+ GOARCH=sw_64
+ ;;
esac
AC_SUBST(GOARCH)
AC_SUBST(ALLGOARCH)
diff --git a/libgo/go/cmd/cgo/main.go b/libgo/go/cmd/cgo/main.go
index 58477e470..842237774 100644
--- a/libgo/go/cmd/cgo/main.go
+++ b/libgo/go/cmd/cgo/main.go
@@ -194,6 +194,7 @@ var ptrSizeMap = map[string]int64{
"shbe": 4,
"sparc": 4,
"sparc64": 8,
+ "sw_64": 8,
}
var intSizeMap = map[string]int64{
@@ -221,6 +222,7 @@ var intSizeMap = map[string]int64{
"shbe": 4,
"sparc": 4,
"sparc64": 8,
+ "sw_64": 8,
}
var cPrefix string
diff --git a/libgo/go/cmd/internal/sys/arch.go b/libgo/go/cmd/internal/sys/arch.go
index 97d0ac9bb..dea328a34 100644
--- a/libgo/go/cmd/internal/sys/arch.go
+++ b/libgo/go/cmd/internal/sys/arch.go
@@ -12,6 +12,7 @@ type ArchFamily byte
const (
NoArch ArchFamily = iota
+ SW_64
AMD64
ARM
ARM64
@@ -229,7 +230,17 @@ var ArchWasm = &Arch{
CanMergeLoads: false,
}
+/*TODO*/
+var ArchSW_64 = &Arch{
+ Name: "sw_64",
+ Family: SW_64,
+ ByteOrder: binary.LittleEndian,
+ PtrSize: 8,
+ RegSize: 8,
+ MinLC: 1,
+}
var Archs = [...]*Arch{
+ ArchSW_64,
Arch386,
ArchAMD64,
ArchARM,
diff --git a/libgo/go/debug/elf/elf.go b/libgo/go/debug/elf/elf.go
index 4c51bc4de..1899a4245 100644
--- a/libgo/go/debug/elf/elf.go
+++ b/libgo/go/debug/elf/elf.go
@@ -6,6 +6,7 @@
* $FreeBSD: src/sys/sys/elf64.h,v 1.10.14.1 2005/12/30 22:13:58 marcel Exp $
* $FreeBSD: src/sys/sys/elf_common.h,v 1.15.8.1 2005/12/30 22:13:58 marcel Exp $
* $FreeBSD: src/sys/alpha/include/elf.h,v 1.14 2003/09/25 01:10:22 peter Exp $
+ * $FreeBSD: src/sys/sw_64/include/elf.h,v 1.14 2003/09/25 01:10:22 peter Exp $
* $FreeBSD: src/sys/amd64/include/elf.h,v 1.18 2004/08/03 08:21:48 dfr Exp $
* $FreeBSD: src/sys/arm/include/elf.h,v 1.5.2.1 2006/06/30 21:42:52 cognet Exp $
* $FreeBSD: src/sys/i386/include/elf.h,v 1.16 2004/08/02 19:12:17 dfr Exp $
@@ -390,6 +391,8 @@ const (
EM_MIPS_RS4_BE Machine = 10 /* MIPS R4000 Big-Endian */
EM_ALPHA_STD Machine = 41 /* Digital Alpha (standard value). */
EM_ALPHA Machine = 0x9026 /* Alpha (written in the absence of an ABI) */
+ EM_SW_64_STD Machine = 41 /* Digital Sw_64 (standard value). */
+ EM_SW_64 Machine = 0x9916 /* Mieee-opt Sw_64 (written in the absence of an ABI) */
)
var machineStrings = []intName{
@@ -581,6 +584,8 @@ var machineStrings = []intName{
{10, "EM_MIPS_RS4_BE"},
{41, "EM_ALPHA_STD"},
{0x9026, "EM_ALPHA"},
+ {41, "EM_SW_64_STD"},
+ {0x9916, "EM_SW_64"},
}
func (i Machine) String() string { return stringName(uint32(i), machineStrings, false) }
@@ -1645,6 +1650,73 @@ var ralphaStrings = []intName{
func (i R_ALPHA) String() string { return stringName(uint32(i), ralphaStrings, false) }
func (i R_ALPHA) GoString() string { return stringName(uint32(i), ralphaStrings, true) }
+// Relocation types for SW_64.
+type R_SW_64 int
+
+const (
+ R_SW_64_NONE R_SW_64 = 0 /* No reloc */
+ R_SW_64_REFLONG R_SW_64 = 1 /* Direct 32 bit */
+ R_SW_64_REFQUAD R_SW_64 = 2 /* Direct 64 bit */
+ R_SW_64_GPREL32 R_SW_64 = 3 /* GP relative 32 bit */
+ R_SW_64_LITERAL R_SW_64 = 4 /* GP relative 16 bit w/optimization */
+ R_SW_64_LITUSE R_SW_64 = 5 /* Optimization hint for LITERAL */
+ R_SW_64_GPDISP R_SW_64 = 6 /* Add displacement to GP */
+ R_SW_64_BRADDR R_SW_64 = 7 /* PC+4 relative 23 bit shifted */
+ R_SW_64_HINT R_SW_64 = 8 /* PC+4 relative 16 bit shifted */
+ R_SW_64_SREL16 R_SW_64 = 9 /* PC relative 16 bit */
+ R_SW_64_SREL32 R_SW_64 = 10 /* PC relative 32 bit */
+ R_SW_64_SREL64 R_SW_64 = 11 /* PC relative 64 bit */
+ R_SW_64_OP_PUSH R_SW_64 = 12 /* OP stack push */
+ R_SW_64_OP_STORE R_SW_64 = 13 /* OP stack pop and store */
+ R_SW_64_OP_PSUB R_SW_64 = 14 /* OP stack subtract */
+ R_SW_64_OP_PRSHIFT R_SW_64 = 15 /* OP stack right shift */
+ R_SW_64_GPVALUE R_SW_64 = 16
+ R_SW_64_GPRELHIGH R_SW_64 = 17
+ R_SW_64_GPRELLOW R_SW_64 = 18
+ R_SW_64_IMMED_GP_16 R_SW_64 = 19
+ R_SW_64_IMMED_GP_HI32 R_SW_64 = 20
+ R_SW_64_IMMED_SCN_HI32 R_SW_64 = 21
+ R_SW_64_IMMED_BR_HI32 R_SW_64 = 22
+ R_SW_64_IMMED_LO32 R_SW_64 = 23
+ R_SW_64_COPY R_SW_64 = 24 /* Copy sympol at runtime */
+ R_SW_64_GLOB_DAT R_SW_64 = 25 /* Create GOT entry */
+ R_SW_64_JMP_SLOT R_SW_64 = 26 /* Create PLT entry */
+ R_SW_64_RELATIVE R_SW_64 = 27 /* Adjust by program base */
+)
+var rsw_64Strings = []intName{
+ {0, "R_SW_64_NONE"},
+ {1, "R_SW_64_REFLONG"},
+ {2, "R_SW_64_REFQUAD"},
+ {3, "R_SW_64_GPREL32"},
+ {4, "R_SW_64_LITERAL"},
+ {5, "R_SW_64_LITUSE"},
+ {6, "R_SW_64_GPDISP"},
+ {7, "R_SW_64_BRADDR"},
+ {8, "R_SW_64_HINT"},
+ {9, "R_SW_64_SREL16"},
+ {10, "R_SW_64_SREL32"},
+ {11, "R_SW_64_SREL64"},
+ {12, "R_SW_64_OP_PUSH"},
+ {13, "R_SW_64_OP_STORE"},
+ {14, "R_SW_64_OP_PSUB"},
+ {15, "R_SW_64_OP_PRSHIFT"},
+ {16, "R_SW_64_GPVALUE"},
+ {17, "R_SW_64_GPRELHIGH"},
+ {18, "R_SW_64_GPRELLOW"},
+ {19, "R_SW_64_IMMED_GP_16"},
+ {20, "R_SW_64_IMMED_GP_HI32"},
+ {21, "R_SW_64_IMMED_SCN_HI32"},
+ {22, "R_SW_64_IMMED_BR_HI32"},
+ {23, "R_SW_64_IMMED_LO32"},
+ {24, "R_SW_64_COPY"},
+ {25, "R_SW_64_GLOB_DAT"},
+ {26, "R_SW_64_JMP_SLOT"},
+ {27, "R_SW_64_RELATIVE"},
+}
+
+func (i R_SW_64) String() string { return stringName(uint32(i), rsw_64Strings, false) }
+func (i R_SW_64) GoString() string { return stringName(uint32(i), rsw_64Strings, true) }
+
// Relocation types for ARM.
type R_ARM int
diff --git a/libgo/go/debug/elf/elf_test.go b/libgo/go/debug/elf/elf_test.go
index b8c310dba..940af9c51 100644
--- a/libgo/go/debug/elf/elf_test.go
+++ b/libgo/go/debug/elf/elf_test.go
@@ -31,6 +31,7 @@ var nameTests = []nameTest{
{STV_HIDDEN, "STV_HIDDEN"},
{R_X86_64_PC32, "R_X86_64_PC32"},
{R_ALPHA_OP_PUSH, "R_ALPHA_OP_PUSH"},
+ {R_SW_64_OP_PUSH, "R_SW_64_OP_PUSH"},
{R_ARM_THM_ABS5, "R_ARM_THM_ABS5"},
{R_386_GOT32, "R_386_GOT32"},
{R_PPC_GOT16_HI, "R_PPC_GOT16_HI"},
diff --git a/libgo/go/debug/elf/file.go b/libgo/go/debug/elf/file.go
index 60d2788c9..53f34d78c 100644
--- a/libgo/go/debug/elf/file.go
+++ b/libgo/go/debug/elf/file.go
@@ -632,6 +632,8 @@ func (f *File) applyRelocations(dst []byte, rels []byte) error {
return f.applyRelocationsSPARC64(dst, rels)
case f.Class == ELFCLASS64 && f.Machine == EM_ALPHA:
return f.applyRelocationsALPHA(dst, rels)
+ case f.Class == ELFCLASS64 && f.Machine == EM_SW_64:
+ return f.applyRelocationsSW_64(dst, rels)
default:
return errors.New("applyRelocations: not implemented")
}
@@ -1266,6 +1268,51 @@ func (f *File) applyRelocationsALPHA(dst []byte, rels []byte) error {
return nil
}
+func (f *File) applyRelocationsSW_64(dst []byte, rels []byte) error {
+ // 24 is the size of Rela64.
+ if len(rels)%24 != 0 {
+ return errors.New("length of relocation section is not a multiple of 24")
+ }
+
+ symbols, _, err := f.getSymbols(SHT_SYMTAB)
+ if err != nil {
+ return err
+ }
+
+ b := bytes.NewReader(rels)
+ var rela Rela64
+ for b.Len() > 0 {
+ binary.Read(b, f.ByteOrder, &rela)
+ symNo := rela.Info >> 32
+ t := R_SW_64(rela.Info & 0xffff)
+
+ if symNo == 0 || symNo > uint64(len(symbols)) {
+ continue
+ }
+ sym := &symbols[symNo-1]
+ if SymType(sym.Info&0xf) != STT_SECTION {
+ // We don't handle non-section relocations for now.
+ continue
+ }
+
+ // There are relocations, so this must be a normal
+ // object file, and we only look at section symbols,
+ // so we assume that the symbol value is 0.
+ switch t {
+ case R_SW_64_REFQUAD:
+ if rela.Off+8 >= uint64(len(dst)) || rela.Addend < 0 {
+ continue
+ }
+ f.ByteOrder.PutUint64(dst[rela.Off:rela.Off+8], uint64(rela.Addend))
+ case R_SW_64_REFLONG:
+ if rela.Off+4 >= uint64(len(dst)) || rela.Addend < 0 {
+ }
+ f.ByteOrder.PutUint32(dst[rela.Off:rela.Off+4], uint32(rela.Addend))
+ }
+ }
+ return nil
+}
+
func (f *File) DWARF() (*dwarf.Data, error) {
dwarfSuffix := func(s *Section) string {
switch {
diff --git a/libgo/go/encoding/xml/xml.go b/libgo/go/encoding/xml/xml.go
index 8a0a9c253..f40099a1b 100644
--- a/libgo/go/encoding/xml/xml.go
+++ b/libgo/go/encoding/xml/xml.go
@@ -1727,6 +1727,7 @@ var htmlEntity = map[string]string{
"Psi": "\u03A8",
"Omega": "\u03A9",
"alpha": "\u03B1",
+ "sw_64": "\u03B1",
"beta": "\u03B2",
"gamma": "\u03B3",
"delta": "\u03B4",
diff --git a/libgo/go/go/build/syslist.go b/libgo/go/go/build/syslist.go
index 1b11365f5..74d7fec11 100644
--- a/libgo/go/go/build/syslist.go
+++ b/libgo/go/go/build/syslist.go
@@ -8,4 +8,4 @@ package build
// Do not remove from this list, as these are used for go/build filename matching.
const goosList = "aix android darwin dragonfly freebsd hurd illumos ios js linux nacl netbsd openbsd plan9 solaris windows zos "
-const goarchList = "386 amd64 amd64p32 arm armbe arm64 arm64be loong64 mips mipsle mips64 mips64le ppc ppc64 ppc64le riscv riscv64 s390 s390x sparc sparc64 wasm alpha m68k nios2 sh shbe "
+const goarchList = "386 amd64 amd64p32 arm armbe arm64 arm64be loong64 mips mipsle mips64 mips64le ppc ppc64 ppc64le riscv riscv64 s390 s390x sparc sparc64 wasm alpha m68k nios2 sh shbe sw_64"
diff --git a/libgo/go/internal/syscall/unix/getrandom_linux_sw_64.go b/libgo/go/internal/syscall/unix/getrandom_linux_sw_64.go
new file mode 100644
index 000000000..9587b5aa4
--- /dev/null
+++ b/libgo/go/internal/syscall/unix/getrandom_linux_sw_64.go
@@ -0,0 +1,9 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package unix
+
+// Linux getrandom system call number.
+// See GetRandom in getrandom_linux.go.
+const randomTrap uintptr = 511
diff --git a/libgo/go/internal/syscall/unix/sysnum_linux_sw_64.go b/libgo/go/internal/syscall/unix/sysnum_linux_sw_64.go
new file mode 100644
index 000000000..c40bc8488
--- /dev/null
+++ b/libgo/go/internal/syscall/unix/sysnum_linux_sw_64.go
@@ -0,0 +1,10 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package unix
+
+const (
+ getrandomTrap uintptr = 511
+ copyFileRangeTrap uintptr = 519
+)
diff --git a/libgo/go/net/listen_test.go b/libgo/go/net/listen_test.go
index 59c011212..d61055a04 100644
--- a/libgo/go/net/listen_test.go
+++ b/libgo/go/net/listen_test.go
@@ -673,7 +673,7 @@ func multicastRIBContains(ip IP) (bool, error) {
case "aix", "dragonfly", "netbsd", "openbsd", "plan9", "solaris", "illumos", "windows":
return true, nil // not implemented yet
case "linux":
- if runtime.GOARCH == "arm" || runtime.GOARCH == "alpha" {
+ if runtime.GOARCH == "arm" || runtime.GOARCH == "alpha" || runtime.GOARCH == "sw_64" {
return true, nil // not implemented yet
}
}
diff --git a/libgo/go/regexp/testdata/basic.dat b/libgo/go/regexp/testdata/basic.dat
index 1776b1ff9..b53926812 100644
--- a/libgo/go/regexp/testdata/basic.dat
+++ b/libgo/go/regexp/testdata/basic.dat
@@ -153,6 +153,7 @@ E a[bcd]*dcdcde adcdcde (0,7)
E (ab|a)b*c abc (0,3)(0,2)
E ((a)(b)c)(d) abcd (0,4)(0,3)(0,1)(1,2)(3,4)
BE [A-Za-z_][A-Za-z0-9_]* alpha (0,5)
+BE [A-Za-z_][A-Za-z0-9_]* sw_64 (0,5)
E ^a(bc+|b[eh])g|.h$ abh (1,3)
E (bc+d$|ef*g.|h?i(j|k)) effgz (0,5)(0,5)
E (bc+d$|ef*g.|h?i(j|k)) ij (0,2)(0,2)(1,2)
diff --git a/libgo/go/runtime/hash64.go b/libgo/go/runtime/hash64.go
index a1d2529e7..ee793552c 100644
--- a/libgo/go/runtime/hash64.go
+++ b/libgo/go/runtime/hash64.go
@@ -5,7 +5,7 @@
// Hashing algorithm inspired by
// wyhash: https://github.com/wangyi-fudan/wyhash
-//go:build amd64 || arm64 || mips64 || mips64le || ppc64 || ppc64le || riscv64 || s390x || wasm || alpha || arm64be || ia64 || sparc64
+//go:build amd64 || arm64 || mips64 || mips64le || ppc64 || ppc64le || riscv64 || s390x || wasm || alpha || sw_64 || arm64be || ia64 || sparc64
package runtime
diff --git a/libgo/go/runtime/lfstack_64bit.go b/libgo/go/runtime/lfstack_64bit.go
index 8e0883094..0e87c5059 100644
--- a/libgo/go/runtime/lfstack_64bit.go
+++ b/libgo/go/runtime/lfstack_64bit.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-//go:build amd64 || arm64 || mips64 || mips64le || ppc64 || ppc64le || riscv64 || s390x || wasm || arm64be || alpha || sparc64 || ia64
+//go:build amd64 || arm64 || mips64 || mips64le || ppc64 || ppc64le || riscv64 || s390x || wasm || arm64be || alpha || sw_64 || sparc64 || ia64
package runtime
diff --git a/libgo/go/runtime/mpagealloc_64bit.go b/libgo/go/runtime/mpagealloc_64bit.go
index 3d0d4c608..aca127d7c 100644
--- a/libgo/go/runtime/mpagealloc_64bit.go
+++ b/libgo/go/runtime/mpagealloc_64bit.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-//go:build amd64 || arm64 || mips64 || mips64le || ppc64 || ppc64le || riscv64 || s390x || arm64be || alpha || sparc64 || ia64
+//go:build amd64 || arm64 || mips64 || mips64le || ppc64 || ppc64le || riscv64 || s390x || arm64be || alpha || sw_64 || sparc64 || ia64
package runtime
diff --git a/libgo/go/syscall/endian_little.go b/libgo/go/syscall/endian_little.go
index 63e46d8b1..37af34bce 100644
--- a/libgo/go/syscall/endian_little.go
+++ b/libgo/go/syscall/endian_little.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-//go:build 386 || alpha || amd64 || amd64p32 || arm || arm64 || ia64 || ppc64le || mips64le || mipsle || mips64p32le || nios2 || riscv || riscv64 || sh || wasm
+//go:build 386 || alpha || sw_64 || amd64 || amd64p32 || arm || arm64 || ia64 || ppc64le || mips64le || mipsle || mips64p32le || nios2 || riscv || riscv64 || sh || wasm
package syscall
diff --git a/libgo/go/syscall/libcall_linux_sw_64.go b/libgo/go/syscall/libcall_linux_sw_64.go
new file mode 100644
index 000000000..13ccf05a6
--- /dev/null
+++ b/libgo/go/syscall/libcall_linux_sw_64.go
@@ -0,0 +1,13 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// GNU/Linux library calls Alpha specific.
+
+package syscall
+
+//sys Ioperm(from int, num int, on int) (err error)
+//ioperm(from _C_long, num _C_long, on _C_int) _C_int
+
+//sys Iopl(level int) (err error)
+//iopl(level _C_int) _C_int
diff --git a/libgo/go/syscall/syscall_linux_sw_64.go b/libgo/go/syscall/syscall_linux_sw_64.go
new file mode 100644
index 000000000..5115b9b7c
--- /dev/null
+++ b/libgo/go/syscall/syscall_linux_sw_64.go
@@ -0,0 +1,25 @@
+// syscall_linux_alpha.go -- GNU/Linux ALPHA specific support
+
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package syscall
+
+import "unsafe"
+
+func (r *PtraceRegs) PC() uint64 {
+ return r.Pc
+}
+
+func (r *PtraceRegs) SetPC(pc uint64) {
+ r.Pc = pc
+}
+
+func PtraceGetRegs(pid int, regsout *PtraceRegs) (err error) {
+ return ptrace(PTRACE_GETREGS, pid, 0, uintptr(unsafe.Pointer(regsout)))
+}
+
+func PtraceSetRegs(pid int, regs *PtraceRegs) (err error) {
+ return ptrace(PTRACE_SETREGS, pid, 0, uintptr(unsafe.Pointer(regs)))
+}
diff --git a/libgo/goarch.sh b/libgo/goarch.sh
index 977f318b3..a0cdcf17e 100755
--- a/libgo/goarch.sh
+++ b/libgo/goarch.sh
@@ -54,6 +54,11 @@ case $goarch in
defaultphyspagesize=8192
pcquantum=4
;;
+ sw_64)
+ family=SW_64
+ defaultphyspagesize=8192
+ pcquantum=4
+ ;;
amd64)
family=AMD64
;;
diff --git a/libgo/mksysinfo.sh b/libgo/mksysinfo.sh
index 0c52ea5d7..11031f5a0 100755
--- a/libgo/mksysinfo.sh
+++ b/libgo/mksysinfo.sh
@@ -377,7 +377,12 @@ if test "$regs" = ""; then
# mips*
regs=`grep '^type _pt_regs struct' gen-sysinfo.go || true`
fi
+if test "$regs" = ""; then
+ # sw_64*
+ regs=`grep '^type _user_pt_regs struct' gen-sysinfo.go || true`
+fi
if test "$regs" != ""; then
+ regs=`echo $regs | sed -e 's/type _user_pt_regs struct//'`
regs=`echo $regs | sed -e 's/type _pt_regs struct//'`
regs=`echo $regs |
sed -e 's/type __*user_regs_struct struct //' -e 's/[{}]//g'`
diff --git a/libgo/runtime/go-signal.c b/libgo/runtime/go-signal.c
index 528d9b6d9..20e6947b5 100644
--- a/libgo/runtime/go-signal.c
+++ b/libgo/runtime/go-signal.c
@@ -230,6 +230,8 @@ getSiginfo(siginfo_t *info, void *context __attribute__((unused)))
ret.sigpc = ((ucontext_t*)(context))->uc_mcontext.gregs[REG_EIP];
#elif defined(__alpha__) && defined(__linux__)
ret.sigpc = ((ucontext_t*)(context))->uc_mcontext.sc_pc;
+#elif defined(__sw_64__) && defined(__linux__)
+ ret.sigpc = ((ucontext_t *) (context))->uc_mcontext.sc_pc;
#elif defined(__PPC64__) && defined(__linux__)
ret.sigpc = ((ucontext_t*)(context))->uc_mcontext.gp_regs[32];
#elif defined(__PPC__) && defined(__linux__)
@@ -311,43 +313,43 @@ dumpregs(siginfo_t *info __attribute__((unused)), void *context __attribute__((u
runtime_printf("fs %x\n", m->gregs[REG_FS]);
runtime_printf("gs %x\n", m->gregs[REG_GS]);
}
-#elif defined(__alpha__) && defined(__linux__)
- {
- mcontext_t *m = &((ucontext_t*)(context))->uc_mcontext;
-
- runtime_printf("v0 %X\n", m->sc_regs[0]);
- runtime_printf("t0 %X\n", m->sc_regs[1]);
- runtime_printf("t1 %X\n", m->sc_regs[2]);
- runtime_printf("t2 %X\n", m->sc_regs[3]);
- runtime_printf("t3 %X\n", m->sc_regs[4]);
- runtime_printf("t4 %X\n", m->sc_regs[5]);
- runtime_printf("t5 %X\n", m->sc_regs[6]);
- runtime_printf("t6 %X\n", m->sc_regs[7]);
- runtime_printf("t7 %X\n", m->sc_regs[8]);
- runtime_printf("s0 %X\n", m->sc_regs[9]);
- runtime_printf("s1 %X\n", m->sc_regs[10]);
- runtime_printf("s2 %X\n", m->sc_regs[11]);
- runtime_printf("s3 %X\n", m->sc_regs[12]);
- runtime_printf("s4 %X\n", m->sc_regs[13]);
- runtime_printf("s5 %X\n", m->sc_regs[14]);
- runtime_printf("fp %X\n", m->sc_regs[15]);
- runtime_printf("a0 %X\n", m->sc_regs[16]);
- runtime_printf("a1 %X\n", m->sc_regs[17]);
- runtime_printf("a2 %X\n", m->sc_regs[18]);
- runtime_printf("a3 %X\n", m->sc_regs[19]);
- runtime_printf("a4 %X\n", m->sc_regs[20]);
- runtime_printf("a5 %X\n", m->sc_regs[21]);
- runtime_printf("t8 %X\n", m->sc_regs[22]);
- runtime_printf("t9 %X\n", m->sc_regs[23]);
- runtime_printf("t10 %X\n", m->sc_regs[24]);
- runtime_printf("t11 %X\n", m->sc_regs[25]);
- runtime_printf("ra %X\n", m->sc_regs[26]);
- runtime_printf("t12 %X\n", m->sc_regs[27]);
- runtime_printf("at %X\n", m->sc_regs[28]);
- runtime_printf("gp %X\n", m->sc_regs[29]);
- runtime_printf("sp %X\n", m->sc_regs[30]);
- runtime_printf("pc %X\n", m->sc_pc);
- }
+#elif (defined(__alpha__) || defined(__sw_64__)) && defined(__linux__)
+ {
+ mcontext_t *m = &((ucontext_t *) (context))->uc_mcontext;
+
+ runtime_printf ("v0 %X\n", m->sc_regs[0]);
+ runtime_printf ("t0 %X\n", m->sc_regs[1]);
+ runtime_printf ("t1 %X\n", m->sc_regs[2]);
+ runtime_printf ("t2 %X\n", m->sc_regs[3]);
+ runtime_printf ("t3 %X\n", m->sc_regs[4]);
+ runtime_printf ("t4 %X\n", m->sc_regs[5]);
+ runtime_printf ("t5 %X\n", m->sc_regs[6]);
+ runtime_printf ("t6 %X\n", m->sc_regs[7]);
+ runtime_printf ("t7 %X\n", m->sc_regs[8]);
+ runtime_printf ("s0 %X\n", m->sc_regs[9]);
+ runtime_printf ("s1 %X\n", m->sc_regs[10]);
+ runtime_printf ("s2 %X\n", m->sc_regs[11]);
+ runtime_printf ("s3 %X\n", m->sc_regs[12]);
+ runtime_printf ("s4 %X\n", m->sc_regs[13]);
+ runtime_printf ("s5 %X\n", m->sc_regs[14]);
+ runtime_printf ("fp %X\n", m->sc_regs[15]);
+ runtime_printf ("a0 %X\n", m->sc_regs[16]);
+ runtime_printf ("a1 %X\n", m->sc_regs[17]);
+ runtime_printf ("a2 %X\n", m->sc_regs[18]);
+ runtime_printf ("a3 %X\n", m->sc_regs[19]);
+ runtime_printf ("a4 %X\n", m->sc_regs[20]);
+ runtime_printf ("a5 %X\n", m->sc_regs[21]);
+ runtime_printf ("t8 %X\n", m->sc_regs[22]);
+ runtime_printf ("t9 %X\n", m->sc_regs[23]);
+ runtime_printf ("t10 %X\n", m->sc_regs[24]);
+ runtime_printf ("t11 %X\n", m->sc_regs[25]);
+ runtime_printf ("ra %X\n", m->sc_regs[26]);
+ runtime_printf ("t12 %X\n", m->sc_regs[27]);
+ runtime_printf ("at %X\n", m->sc_regs[28]);
+ runtime_printf ("gp %X\n", m->sc_regs[29]);
+ runtime_printf ("sp %X\n", m->sc_regs[30]);
+ runtime_printf ("pc %X\n", m->sc_pc);
+ }
#elif defined(__PPC__) && defined(__linux__)
{
int i;
--
2.25.1

View File

@ -0,0 +1,165 @@
From 6ecc701c02c54cd1af013e70aef7ccf768f42da2 Mon Sep 17 00:00:00 2001
From: swcompiler <lc@wxiat.com>
Date: Mon, 25 Nov 2024 16:53:22 +0800
Subject: [PATCH 10/16] Sw64 Port: libgomp
---
libgomp/config/linux/sw_64/futex.h | 102 +++++++++++++++++++++++++++++
libgomp/configure | 6 ++
libgomp/configure.tgt | 4 ++
libgomp/libgomp.spec.in | 2 +-
4 files changed, 113 insertions(+), 1 deletion(-)
create mode 100644 libgomp/config/linux/sw_64/futex.h
diff --git a/libgomp/config/linux/sw_64/futex.h b/libgomp/config/linux/sw_64/futex.h
new file mode 100644
index 000000000..258f38289
--- /dev/null
+++ b/libgomp/config/linux/sw_64/futex.h
@@ -0,0 +1,102 @@
+/* Copyright (C) 2005-2022 Free Software Foundation, Inc.
+ Contributed by Richard Henderson <rth@redhat.com>.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Provide target-specific access to the futex system call. */
+
+#ifndef SYS_futex
+#define SYS_futex 394
+#endif
+
+static inline void
+futex_wait (int *addr, int val)
+{
+ register long sc_0 __asm__("$0");
+ register long sc_16 __asm__("$16");
+ register long sc_17 __asm__("$17");
+ register long sc_18 __asm__("$18");
+ register long sc_19 __asm__("$19");
+
+ sc_0 = SYS_futex;
+ sc_16 = (long) addr;
+ sc_17 = gomp_futex_wait;
+ sc_18 = val;
+ sc_19 = 0;
+ __asm volatile ("callsys"
+ : "=r"(sc_0), "=r"(sc_19)
+ : "0"(sc_0), "r"(sc_16), "r"(sc_17), "r"(sc_18), "1"(sc_19)
+ : "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8", "$22", "$23",
+ "$24", "$25", "$27", "$28", "memory");
+ if (__builtin_expect (sc_19, 0) && sc_0 == ENOSYS)
+ {
+ gomp_futex_wait &= ~FUTEX_PRIVATE_FLAG;
+ gomp_futex_wake &= ~FUTEX_PRIVATE_FLAG;
+ sc_0 = SYS_futex;
+ sc_17 &= ~FUTEX_PRIVATE_FLAG;
+ sc_19 = 0;
+ __asm volatile ("callsys"
+ : "=r"(sc_0), "=r"(sc_19)
+ : "0"(sc_0), "r"(sc_16), "r"(sc_17), "r"(sc_18), "1"(sc_19)
+ : "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8", "$22",
+ "$23", "$24", "$25", "$27", "$28", "memory");
+ }
+}
+
+static inline void
+futex_wake (int *addr, int count)
+{
+ register long sc_0 __asm__("$0");
+ register long sc_16 __asm__("$16");
+ register long sc_17 __asm__("$17");
+ register long sc_18 __asm__("$18");
+ register long sc_19 __asm__("$19");
+
+ sc_0 = SYS_futex;
+ sc_16 = (long) addr;
+ sc_17 = gomp_futex_wake;
+ sc_18 = count;
+ __asm volatile ("callsys"
+ : "=r"(sc_0), "=r"(sc_19)
+ : "0"(sc_0), "r"(sc_16), "r"(sc_17), "r"(sc_18)
+ : "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8", "$22", "$23",
+ "$24", "$25", "$27", "$28", "memory");
+ if (__builtin_expect (sc_19, 0) && sc_0 == ENOSYS)
+ {
+ gomp_futex_wait &= ~FUTEX_PRIVATE_FLAG;
+ gomp_futex_wake &= ~FUTEX_PRIVATE_FLAG;
+ sc_0 = SYS_futex;
+ sc_17 &= ~FUTEX_PRIVATE_FLAG;
+ __asm volatile ("callsys"
+ : "=r"(sc_0), "=r"(sc_19)
+ : "0"(sc_0), "r"(sc_16), "r"(sc_17), "r"(sc_18)
+ : "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8", "$22",
+ "$23", "$24", "$25", "$27", "$28", "memory");
+ }
+}
+
+static inline void
+cpu_relax (void)
+{
+ __asm volatile ("" : : : "memory");
+}
diff --git a/libgomp/configure b/libgomp/configure
index 471c957b7..a1df23705 100755
--- a/libgomp/configure
+++ b/libgomp/configure
@@ -11841,6 +11841,12 @@ case `echo $GFORTRAN` in
FC=no
fi ;;
esac
+case "${target}" in
+ sw_64-*-*)
+ FC="$GFORTRAN"
+ ;;
+*)
+esac
ac_ext=${ac_fc_srcext-f}
ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5'
ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5'
diff --git a/libgomp/configure.tgt b/libgomp/configure.tgt
index f924e9f98..a8023d0f2 100644
--- a/libgomp/configure.tgt
+++ b/libgomp/configure.tgt
@@ -87,6 +87,10 @@ if test x$enable_linux_futex = xyes; then
config_path="linux/s390 linux posix"
;;
+ sw_64*-*-linux*)
+ config_path="linux/sw_64 linux posix"
+ ;;
+
tile*-*-linux*)
config_path="linux/tile linux posix"
;;
diff --git a/libgomp/libgomp.spec.in b/libgomp/libgomp.spec.in
index 5651603f4..8442e6313 100644
--- a/libgomp/libgomp.spec.in
+++ b/libgomp/libgomp.spec.in
@@ -1,3 +1,3 @@
# This spec file is read by gcc when linking. It is used to specify the
# standard libraries we need in order to link with libgomp.
-*link_gomp: @link_gomp@
+*link_gomp: @link_gomp@ --whole-archive -lpthread --no-whole-archive
--
2.25.1

260
0011-Sw64-Port-libitm.patch Normal file
View File

@ -0,0 +1,260 @@
From c506f4995a68274efbd31ede3751b14dc0fa0718 Mon Sep 17 00:00:00 2001
From: swcompiler <lc@wxiat.com>
Date: Mon, 25 Nov 2024 16:53:43 +0800
Subject: [PATCH 11/16] Sw64 Port: libitm
---
libitm/config/linux/sw_64/futex_bits.h | 56 +++++++++++++
libitm/config/sw_64/sjlj.S | 112 +++++++++++++++++++++++++
libitm/config/sw_64/target.h | 44 ++++++++++
libitm/configure.tgt | 1 +
4 files changed, 213 insertions(+)
create mode 100644 libitm/config/linux/sw_64/futex_bits.h
create mode 100644 libitm/config/sw_64/sjlj.S
create mode 100644 libitm/config/sw_64/target.h
diff --git a/libitm/config/linux/sw_64/futex_bits.h b/libitm/config/linux/sw_64/futex_bits.h
new file mode 100644
index 000000000..478a3078a
--- /dev/null
+++ b/libitm/config/linux/sw_64/futex_bits.h
@@ -0,0 +1,56 @@
+/* Copyright (C) 2008-2022 Free Software Foundation, Inc.
+ Contributed by Richard Henderson <rth@redhat.com>.
+
+ This file is part of the GNU Transactional Memory Library (libitm).
+
+ Libitm is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Provide target-specific access to the futex system call. */
+
+#ifndef SYS_futex
+#define SYS_futex 394
+#endif
+
+static inline long
+sys_futex0 (std::atomic<int> *addr, int op, int val)
+{
+ register long sc_0 __asm__("$0");
+ register long sc_16 __asm__("$16");
+ register long sc_17 __asm__("$17");
+ register long sc_18 __asm__("$18");
+ register long sc_19 __asm__("$19");
+ long res;
+
+ sc_0 = SYS_futex;
+ sc_16 = (long) addr;
+ sc_17 = op;
+ sc_18 = val;
+ sc_19 = 0;
+ __asm volatile("callsys"
+ : "=r"(sc_0), "=r"(sc_19)
+ : "0"(sc_0), "r"(sc_16), "r"(sc_17), "r"(sc_18), "1"(sc_19)
+ : "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8", "$22", "$23",
+ "$24", "$25", "$27", "$28", "memory");
+
+ res = sc_0;
+ if (__builtin_expect (sc_19, 0))
+ res = -res;
+ return res;
+}
diff --git a/libitm/config/sw_64/sjlj.S b/libitm/config/sw_64/sjlj.S
new file mode 100644
index 000000000..c4b74d76b
--- /dev/null
+++ b/libitm/config/sw_64/sjlj.S
@@ -0,0 +1,112 @@
+/* Copyright (C) 2009-2022 Free Software Foundation, Inc.
+ Contributed by Richard Henderson <rth@redhat.com>.
+
+ This file is part of the GNU Transactional Memory Library (libitm).
+
+ Libitm is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+ .text
+ .align 4
+ .globl _ITM_beginTransaction
+ .ent _ITM_beginTransaction
+
+#define FRAME 144
+
+_ITM_beginTransaction:
+ ldgp $29, 0($27)
+ subl $30, FRAME, $30
+ .frame $30, FRAME, $26, 0
+ .mask 0x04000000, 0
+ stl $26, 0($30)
+ .prologue 1
+
+ stl $9, 8($30)
+ stl $10, 16($30)
+ addl $30, FRAME, $0
+ stl $11, 24($30)
+
+ stl $12, 32($30)
+ stl $13, 40($30)
+ stl $14, 48($30)
+ stl $15, 56($30)
+
+ stl $0, 64($30)
+ fstd $f2, 72($30)
+ fstd $f3, 80($30)
+ fstd $f4, 88($30)
+
+ fstd $f5, 96($30)
+ fstd $f6, 104($30)
+ fstd $f7, 112($30)
+ fstd $f8, 120($30)
+
+ fstd $f9, 128($30)
+ mov $30, $17
+#ifdef __PIC__
+ unop
+ bsr $26, GTM_begin_transaction !samegp
+#else
+ call $26, GTM_begin_transaction
+ ldgp $29, 0($26)
+#endif
+
+ ldl $26, 0($30)
+ addl $30, FRAME, $30
+ ret
+.end _ITM_beginTransaction
+
+ .align 4
+ .globl GTM_longjmp
+#ifdef __ELF__
+ .hidden GTM_longjmp
+#endif
+ .ent GTM_longjmp
+
+GTM_longjmp:
+ .prologue 0
+ ldl $26, 0($17)
+ ldl $9, 8($17)
+ ldl $10, 16($17)
+ ldl $11, 24($17)
+
+ ldl $12, 32($17)
+ ldl $13, 40($17)
+ ldl $14, 48($17)
+ ldl $15, 56($17)
+
+ ldl $1, 64($17)
+ fldd $f2, 72($17)
+ fldd $f3, 80($17)
+ fldd $f4, 88($17)
+
+ fldd $f5, 96($17)
+ fldd $f6, 104($17)
+ fldd $f7, 112($17)
+ fldd $f8, 120($17)
+
+ fldd $f9, 128($17)
+ mov $16, $0
+ mov $1, $30
+ ret
+.end GTM_longjmp
+
+#ifdef __linux__
+.section .note.GNU-stack, "", @progbits
+#endif
diff --git a/libitm/config/sw_64/target.h b/libitm/config/sw_64/target.h
new file mode 100644
index 000000000..7c3f1e3bd
--- /dev/null
+++ b/libitm/config/sw_64/target.h
@@ -0,0 +1,44 @@
+/* Copyright (C) 2009-2022 Free Software Foundation, Inc.
+ Contributed by Richard Henderson <rth@redhat.com>.
+
+ This file is part of the GNU Transactional Memory Library (libitm).
+
+ Libitm is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+namespace GTM HIDDEN {
+
+typedef struct gtm_jmpbuf
+{
+ unsigned long pc;
+ unsigned long s[7];
+ void *cfa;
+ unsigned long f[8];
+} gtm_jmpbuf;
+
+/* The size of one line in hardware caches (in bytes). */
+#define HW_CACHELINE_SIZE 64
+
+static inline void
+cpu_relax (void)
+{
+ __asm volatile ("" : : : "memory");
+}
+
+} // namespace HIDDEN
diff --git a/libitm/configure.tgt b/libitm/configure.tgt
index 06e90973e..d831da5b0 100644
--- a/libitm/configure.tgt
+++ b/libitm/configure.tgt
@@ -118,6 +118,7 @@ EOF
*)
ARCH="${target_cpu}"
;;
+ sw_64*) ARCH=sw_64 ;;
esac
# For the benefit of top-level configure, determine if the cpu is supported.
--
2.25.1

6237
0012-Sw64-Port-libstdc.patch Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,28 @@
From e8813c5a4ba57493f92214f6d97433208ac30d9e Mon Sep 17 00:00:00 2001
From: swcompiler <lc@wxiat.com>
Date: Mon, 25 Nov 2024 16:55:30 +0800
Subject: [PATCH 13/16] Sw64 Port: set raise FPE when DivbyZero on Sw_64
platform
---
intl/dcigettext.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/intl/dcigettext.c b/intl/dcigettext.c
index a8d4a14d2..a828f0419 100644
--- a/intl/dcigettext.c
+++ b/intl/dcigettext.c
@@ -72,8 +72,8 @@ extern int errno;
#ifdef _LIBC
/* Guess whether integer division by zero raises signal SIGFPE.
Set to 1 only if you know for sure. In case of doubt, set to 0. */
-# if defined __alpha__ || defined __arm__ || defined __i386__ \
- || defined __m68k__ || defined __s390__
+#if defined __alpha__ || defined __arm__ || defined __i386__ \
+ || defined __m68k__ || defined __s390__ || defined __sw_64__
# define INTDIV0_RAISES_SIGFPE 1
# else
# define INTDIV0_RAISES_SIGFPE 0
--
2.25.1

View File

@ -0,0 +1,35 @@
From b85fb9eca6a6a7612ae6451995f9908bfbbba72f Mon Sep 17 00:00:00 2001
From: swcompiler <lc@wxiat.com>
Date: Mon, 25 Nov 2024 16:56:12 +0800
Subject: [PATCH 14/16] Sw64 Port: add lex builtin support in libcpp
---
libcpp/lex.cc | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/libcpp/lex.cc b/libcpp/lex.cc
index fb1dfabb7..bc2541ef9 100644
--- a/libcpp/lex.cc
+++ b/libcpp/lex.cc
@@ -168,6 +168,8 @@ acc_char_cmp (word_type val, word_type c)
/* We can get exact results using a compare-bytes instruction.
Get (val == c) via (0 >= (val ^ c)). */
return __builtin_alpha_cmpbge (0, val ^ c);
+#elif defined(__GNUC__) && defined(__sw_64__)
+ return __builtin_sw_64_cmpbge (0, val ^ c);
#else
word_type magic = 0x7efefefeU;
if (sizeof(word_type) == 8)
@@ -186,7 +188,8 @@ static inline int
acc_char_index (word_type cmp ATTRIBUTE_UNUSED,
word_type val ATTRIBUTE_UNUSED)
{
-#if defined(__GNUC__) && defined(__alpha__) && !WORDS_BIGENDIAN
+#if defined(__GNUC__) && (defined(__alpha__) || defined(__sw_64__)) \
+ && !WORDS_BIGENDIAN
/* The cmpbge instruction sets *bits* of the result corresponding to
matches in the bytes with no false positives. */
return __builtin_ctzl (cmp);
--
2.25.1

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,104 @@
From add6d92567cfdc16e0acfaf73fb4b8cbc213661f Mon Sep 17 00:00:00 2001
From: swcompiler <lc@wxiat.com>
Date: Mon, 25 Nov 2024 17:15:16 +0800
Subject: [PATCH 16/16] libsanitizer: fix isoc23 function interception for
glibc-2.38
---
libsanitizer/asan/asan_interceptors.cpp | 55 +++++++++++++++----------
1 file changed, 33 insertions(+), 22 deletions(-)
diff --git a/libsanitizer/asan/asan_interceptors.cpp b/libsanitizer/asan/asan_interceptors.cpp
index b28909152..ae1c9bfcb 100644
--- a/libsanitizer/asan/asan_interceptors.cpp
+++ b/libsanitizer/asan/asan_interceptors.cpp
@@ -41,6 +41,8 @@
# define ASAN_PTHREAD_CREATE_VERSION "GLIBC_2.1"
# elif defined(__mips__) && SANITIZER_LINUX
# define ASAN_PTHREAD_CREATE_VERSION "GLIBC_2.2"
+# elif defined(__sw_64__)
+# define ASAN_PTHREAD_CREATE_VERSION "GLIBC_2.1"
# endif
namespace __asan {
@@ -472,19 +474,32 @@ INTERCEPTOR(char*, strncpy, char *to, const char *from, uptr size) {
return REAL(strncpy)(to, from, size);
}
-INTERCEPTOR(long, strtol, const char *nptr, char **endptr, int base) {
- void *ctx;
- ASAN_INTERCEPTOR_ENTER(ctx, strtol);
- ENSURE_ASAN_INITED();
- if (!flags()->replace_str) {
- return REAL(strtol)(nptr, endptr, base);
- }
+template <typename Fn>
+static ALWAYS_INLINE auto StrtolImpl(void *ctx, Fn real, const char *nptr,
+ char **endptr, int base)
+ -> decltype(real(nullptr, nullptr, 0)) {
+ if (!flags()->replace_str)
+ return real(nptr, endptr, base);
char *real_endptr;
- long result = REAL(strtol)(nptr, &real_endptr, base);
+ auto res = real(nptr, &real_endptr, base);
StrtolFixAndCheck(ctx, nptr, endptr, real_endptr, base);
- return result;
+ return res;
}
+# define INTERCEPTOR_STRTO_BASE(ret_type, func) \
+ INTERCEPTOR(ret_type, func, const char *nptr, char **endptr, int base) { \
+ void *ctx; \
+ ASAN_INTERCEPTOR_ENTER(ctx, func); \
+ ENSURE_ASAN_INITED(); \
+ return StrtolImpl(ctx, REAL(func), nptr, endptr, base); \
+ }
+
+INTERCEPTOR_STRTO_BASE(long, strtol)
+
+#if SANITIZER_GLIBC
+INTERCEPTOR_STRTO_BASE(long, __isoc23_strtol)
+#endif
+
INTERCEPTOR(int, atoi, const char *nptr) {
void *ctx;
ASAN_INTERCEPTOR_ENTER(ctx, atoi);
@@ -524,18 +539,11 @@ INTERCEPTOR(long, atol, const char *nptr) {
}
#if ASAN_INTERCEPT_ATOLL_AND_STRTOLL
-INTERCEPTOR(long long, strtoll, const char *nptr, char **endptr, int base) {
- void *ctx;
- ASAN_INTERCEPTOR_ENTER(ctx, strtoll);
- ENSURE_ASAN_INITED();
- if (!flags()->replace_str) {
- return REAL(strtoll)(nptr, endptr, base);
- }
- char *real_endptr;
- long long result = REAL(strtoll)(nptr, &real_endptr, base);
- StrtolFixAndCheck(ctx, nptr, endptr, real_endptr, base);
- return result;
-}
+INTERCEPTOR_STRTO_BASE(long long, strtoll)
+
+#if SANITIZER_GLIBC
+INTERCEPTOR_STRTO_BASE(long long, __isoc23_strtoll)
+#endif
INTERCEPTOR(long long, atoll, const char *nptr) {
void *ctx;
@@ -639,7 +647,10 @@ void InitializeAsanInterceptors() {
ASAN_INTERCEPT_FUNC(atoll);
ASAN_INTERCEPT_FUNC(strtoll);
#endif
-
+#if SANITIZER_GLIBC
+ ASAN_INTERCEPT_FUNC(__isoc23_strtol);
+ ASAN_INTERCEPT_FUNC(__isoc23_strtoll);
+#endif
// Intecept jump-related functions.
ASAN_INTERCEPT_FUNC(longjmp);
--
2.25.1

View File

@ -0,0 +1,14 @@
diff --git a/gcc/config/sw_64/sw_64.opt b/gcc/config/sw_64/sw_64.opt
index c818dff40..22d0cdd5d 100644
--- a/gcc/config/sw_64/sw_64.opt
+++ b/gcc/config/sw_64/sw_64.opt
@@ -21,6 +21,9 @@ msw-use-32align
C C++ Fortran LTO Driver Target Mask(SW_32ALIGN) Save
Use or not use 32align.
+TargetVariable
+uint64_t sw_64_dummy_target = 0
+
fsw-sf-cmpsel
Target Var(flag_sw_sf_cmpsel) Init(0)
use or not use SF cmp/br/selcet instructions.

View File

@ -0,0 +1,101 @@
From 6de2e0d400cbe46da482a672810c37b1832c408c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=83=91=E6=99=A8=E5=8D=89?= <zhengchenhui1@huawei.com>
Date: Thu, 25 Jul 2024 19:45:43 +0800
Subject: [PATCH] Improve non-loop disambiguation
This optimization is brought from https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=038b077689bb5310386b04d40a2cea234f01e6aa.
When dr_may_alias_p is called without a loop context, it tries
to use the tree-affine interface to calculate the difference
between the two addresses and use that difference to check whether
the gap between the accesses is known at compile time. However, as the
example in the PR shows, this doesn't expand SSA_NAMEs and so can easily
be defeated by things like reassociation.
One fix would have been to use aff_combination_expand to expand the
SSA_NAMEs, but we'd then need some way of maintaining the associated
cache. This patch instead reuses the innermost_loop_behavior fields
(which exist even when no loop context is provided).
It might still be useful to do the aff_combination_expand thing too,
if an example turns out to need it.
---
gcc/common.opt | 4 ++++
gcc/testsuite/gcc.dg/vect/bb-slp-pr106019.c | 16 +++++++++++++++
gcc/tree-data-ref.cc | 22 +++++++++++++++++++++
3 files changed, 42 insertions(+)
create mode 100644 gcc/testsuite/gcc.dg/vect/bb-slp-pr106019.c
diff --git a/gcc/common.opt b/gcc/common.opt
index b18f0b944..75bf9c9c1 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -3217,6 +3217,10 @@ ftree-loop-vectorize
Common Var(flag_tree_loop_vectorize) Optimization EnabledBy(ftree-vectorize)
Enable loop vectorization on trees.
+falias-analysis-expand-ssa
+Common Var(flag_alias_analysis_expand_ssa) Init(0)
+Enable expanded SSA name analysis during alias analysis.
+
ftree-slp-vectorize
Common Var(flag_tree_slp_vectorize) Optimization EnabledBy(ftree-vectorize)
Enable basic block vectorization (SLP) on trees.
diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pr106019.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pr106019.c
new file mode 100644
index 000000000..5ff8a8a62
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pr106019.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-falias-analysis-expand-ssa" } */
+
+void f(double *p, long i)
+{
+ p[i+0] += 1;
+ p[i+1] += 1;
+}
+void g(double *p, long i)
+{
+ double *q = p + i;
+ q[0] += 1;
+ q[1] += 1;
+}
+
+/* { dg-final { scan-tree-dump-not "can't determine dependence" slp2 } } */
diff --git a/gcc/tree-data-ref.cc b/gcc/tree-data-ref.cc
index e6ae9e847..a05073c51 100644
--- a/gcc/tree-data-ref.cc
+++ b/gcc/tree-data-ref.cc
@@ -2993,6 +2993,28 @@ dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
disambiguation. */
if (!loop_nest)
{
+ if (flag_alias_analysis_expand_ssa)
+ {
+ tree tree_size_a = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (a)));
+ tree tree_size_b = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (b)));
+
+ if (DR_BASE_ADDRESS (a)
+ && DR_BASE_ADDRESS (b)
+ && operand_equal_p (DR_BASE_ADDRESS (a), DR_BASE_ADDRESS (b))
+ && operand_equal_p (DR_OFFSET (a), DR_OFFSET (b))
+ && poly_int_tree_p (tree_size_a)
+ && poly_int_tree_p (tree_size_b)
+ && !ranges_maybe_overlap_p (wi::to_widest (DR_INIT (a)),
+ wi::to_widest (tree_size_a),
+ wi::to_widest (DR_INIT (b)),
+ wi::to_widest (tree_size_b)))
+ {
+ gcc_assert (integer_zerop (DR_STEP (a))
+ && integer_zerop (DR_STEP (b)));
+ return false;
+ }
+ }
+
aff_tree off1, off2;
poly_widest_int size1, size2;
get_inner_reference_aff (DR_REF (a), &off1, &size1);
--
2.33.0

View File

@ -0,0 +1,265 @@
From c4e4fef145c1e402f0558cc35f6c1ed0a08beffb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=83=91=E6=99=A8=E5=8D=89?= <zhengchenhui1@huawei.com>
Date: Thu, 25 Jul 2024 20:16:52 +0800
Subject: [PATCH] CHREC multiplication and undefined overflow
This optimization is brought from https://gcc.gnu.org/pipermail/gcc-patches/2024-February/646531.html
When folding a multiply CHRECs are handled like {a, +, b} * c
is {a*c, +, b*c} but that isn't generally correct when overflow
invokes undefined behavior. The following uses unsigned arithmetic
unless either a is zero or a and b have the same sign.
I've used simple early outs for INTEGER_CSTs and otherwise use
a range-query since we lack a tree_expr_nonpositive_p and
get_range_pos_neg isn't a good fit.
---
gcc/common.opt | 4 ++
gcc/testsuite/gcc.dg/pr68317.c | 6 +-
gcc/testsuite/gcc.dg/torture/pr114074.c | 31 ++++++++++
gcc/tree-chrec.cc | 81 +++++++++++++++++++++----
gcc/tree-chrec.h | 2 +-
gcc/value-range.cc | 12 ++++
gcc/value-range.h | 2 +
7 files changed, 123 insertions(+), 15 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/torture/pr114074.c
diff --git a/gcc/common.opt b/gcc/common.opt
index b18f0b944..d3af3ba39 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -1771,6 +1771,10 @@ floop-interchange
Common Var(flag_loop_interchange) Optimization
Enable loop interchange on trees.
+fchrec-mul-fold-strict-overflow
+Common Var(flag_chrec_mul_fold_strict_overflow) Init(0)
+Enable strict overflow handling during constant folding of multiply CHRECs.
+
floop-block
Common Alias(floop-nest-optimize)
Enable loop nest transforms. Same as -floop-nest-optimize.
diff --git a/gcc/testsuite/gcc.dg/pr68317.c b/gcc/testsuite/gcc.dg/pr68317.c
index bd053a752..671a67d95 100644
--- a/gcc/testsuite/gcc.dg/pr68317.c
+++ b/gcc/testsuite/gcc.dg/pr68317.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -fdisable-tree-ethread" } */
+/* { dg-options "-O2 -fdisable-tree-ethread -fchrec-mul-fold-strict-overflow" } */
/* Note: Threader will collapse loop. */
@@ -12,8 +12,8 @@ foo ()
{
int32_t index = 0;
- for (index; index <= 10; index--) // expected warning here
+ for (index; index <= 10; index--) /* { dg-warning "iteration \[0-9\]+ invokes undefined behavior" } */
/* Result of the following multiply will overflow
when converted to signed int32_t. */
- bar ((0xcafe + index) * 0xdead); /* { dg-warning "iteration \[0-9\]+ invokes undefined behavior" } */
+ bar ((0xcafe + index) * 0xdead);
}
diff --git a/gcc/testsuite/gcc.dg/torture/pr114074.c b/gcc/testsuite/gcc.dg/torture/pr114074.c
new file mode 100644
index 000000000..9a383d8fc
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr114074.c
@@ -0,0 +1,31 @@
+/* { dg-do run } */
+<<<<<<< HEAD
+/* { dg-options "-fchrec-mul-fold-strict-overflow" } */
+=======
+/* { dg-options "-fchrec-mul-fold-strict-overflow"" } */
+>>>>>>> 47092575e7696f5a21cf75284fe3d4feb0c813ab
+int a, b, d;
+
+__attribute__((noipa)) void
+foo (void)
+{
+ ++d;
+}
+
+int
+main ()
+{
+ for (a = 0; a > -3; a -= 2)
+ {
+ int c = a;
+ b = __INT_MAX__ - 3000;
+ a = ~c * b;
+ foo ();
+ if (!a)
+ break;
+ a = c;
+ }
+ if (d != 2)
+ __builtin_abort ();
+ return 0;
+}
diff --git a/gcc/tree-chrec.cc b/gcc/tree-chrec.cc
index c44cea754..3323901bc 100644
--- a/gcc/tree-chrec.cc
+++ b/gcc/tree-chrec.cc
@@ -38,6 +38,8 @@ along with GCC; see the file COPYING3. If not see
#include "gimple.h"
#include "tree-ssa-loop.h"
#include "dumpfile.h"
+#include "value-range.h"
+#include "value-query.h"
#include "tree-scalar-evolution.h"
/* Extended folder for chrecs. */
@@ -404,6 +406,13 @@ chrec_fold_multiply (tree type,
|| automatically_generated_chrec_p (op1))
return chrec_fold_automatically_generated_operands (op0, op1);
+ if (flag_chrec_mul_fold_strict_overflow)
+ {
+ if (TREE_CODE (op0) != POLYNOMIAL_CHREC
+ && TREE_CODE (op1) == POLYNOMIAL_CHREC)
+ std::swap (op0, op1);
+ }
+
switch (TREE_CODE (op0))
{
case POLYNOMIAL_CHREC:
@@ -428,10 +437,53 @@ chrec_fold_multiply (tree type,
if (integer_zerop (op1))
return build_int_cst (type, 0);
- return build_polynomial_chrec
- (CHREC_VARIABLE (op0),
- chrec_fold_multiply (type, CHREC_LEFT (op0), op1),
- chrec_fold_multiply (type, CHREC_RIGHT (op0), op1));
+ if (flag_chrec_mul_fold_strict_overflow)
+ {
+ /* When overflow is undefined and CHREC_LEFT/RIGHT do not have the
+ same sign or CHREC_LEFT is zero then folding the multiply into
+ the addition does not have the same behavior on overflow. Use
+ unsigned arithmetic in that case. */
+ value_range rl, rr;
+ if (!ANY_INTEGRAL_TYPE_P (type)
+ || TYPE_OVERFLOW_WRAPS (type)
+ || integer_zerop (CHREC_LEFT (op0))
+ || (TREE_CODE (CHREC_LEFT (op0)) == INTEGER_CST
+ && TREE_CODE (CHREC_RIGHT (op0)) == INTEGER_CST
+ && (tree_int_cst_sgn (CHREC_LEFT (op0))
+ == tree_int_cst_sgn (CHREC_RIGHT (op0))))
+ || (get_range_query (cfun)->range_of_expr (rl, CHREC_LEFT (op0))
+ && !rl.undefined_p ()
+ && (rl.nonpositive_p () || rl.nonnegative_p ())
+ && get_range_query (cfun)->range_of_expr (rr,
+ CHREC_RIGHT (op0))
+ && !rr.undefined_p ()
+ && ((rl.nonpositive_p () && rr.nonpositive_p ())
+ || (rl.nonnegative_p () && rr.nonnegative_p ()))))
+ {
+ tree left = chrec_fold_multiply (type, CHREC_LEFT (op0), op1);
+ tree right = chrec_fold_multiply (type, CHREC_RIGHT (op0), op1);
+ return build_polynomial_chrec (CHREC_VARIABLE (op0), left, right);
+ }
+ else
+ {
+ tree utype = unsigned_type_for (type);
+ tree uop1 = chrec_convert_rhs (utype, op1);
+ tree uleft0 = chrec_convert_rhs (utype, CHREC_LEFT (op0));
+ tree uright0 = chrec_convert_rhs (utype, CHREC_RIGHT (op0));
+ tree left = chrec_fold_multiply (utype, uleft0, uop1);
+ tree right = chrec_fold_multiply (utype, uright0, uop1);
+ tree tem = build_polynomial_chrec (CHREC_VARIABLE (op0),
+ left, right);
+ return chrec_convert_rhs (type, tem);
+ }
+ }
+ else
+ {
+ return build_polynomial_chrec
+ (CHREC_VARIABLE (op0),
+ chrec_fold_multiply (type, CHREC_LEFT (op0), op1),
+ chrec_fold_multiply (type, CHREC_RIGHT (op0), op1));
+ }
}
CASE_CONVERT:
@@ -449,13 +501,20 @@ chrec_fold_multiply (tree type,
switch (TREE_CODE (op1))
{
case POLYNOMIAL_CHREC:
- gcc_checking_assert
- (!chrec_contains_symbols_defined_in_loop (op1,
- CHREC_VARIABLE (op1)));
- return build_polynomial_chrec
- (CHREC_VARIABLE (op1),
- chrec_fold_multiply (type, CHREC_LEFT (op1), op0),
- chrec_fold_multiply (type, CHREC_RIGHT (op1), op0));
+ if (flag_chrec_mul_fold_strict_overflow)
+ {
+ gcc_unreachable ();
+ }
+ else
+ {
+ gcc_checking_assert
+ (!chrec_contains_symbols_defined_in_loop (op1,
+ CHREC_VARIABLE (op1)));
+ return build_polynomial_chrec
+ (CHREC_VARIABLE (op1),
+ chrec_fold_multiply (type, CHREC_LEFT (op1), op0),
+ chrec_fold_multiply (type, CHREC_RIGHT (op1), op0));
+ }
CASE_CONVERT:
if (tree_contains_chrecs (op1, NULL))
diff --git a/gcc/tree-chrec.h b/gcc/tree-chrec.h
index fcf41710d..cdc97d5d9 100644
--- a/gcc/tree-chrec.h
+++ b/gcc/tree-chrec.h
@@ -63,7 +63,7 @@ extern tree chrec_fold_plus (tree, tree, tree);
extern tree chrec_fold_minus (tree, tree, tree);
extern tree chrec_fold_multiply (tree, tree, tree);
extern tree chrec_convert (tree, tree, gimple *, bool = true, tree = NULL);
-extern tree chrec_convert_rhs (tree, tree, gimple *);
+extern tree chrec_convert_rhs (tree, tree, gimple * = NULL);
extern tree chrec_convert_aggressive (tree, tree, bool *);
/* Operations. */
diff --git a/gcc/value-range.cc b/gcc/value-range.cc
index 000bbcf89..a1dc10a24 100644
--- a/gcc/value-range.cc
+++ b/gcc/value-range.cc
@@ -656,6 +656,18 @@ irange::contains_p (tree cst) const
return false;
}
+bool
+irange::nonnegative_p () const
+{
+ return wi::ge_p (lower_bound (), 0, TYPE_SIGN (type ()));
+}
+
+bool
+irange::nonpositive_p () const
+{
+ return wi::le_p (upper_bound (), 0, TYPE_SIGN (type ()));
+}
+
/* Normalize addresses into constants. */
diff --git a/gcc/value-range.h b/gcc/value-range.h
index d4cba22d5..2dc0907de 100644
--- a/gcc/value-range.h
+++ b/gcc/value-range.h
@@ -69,6 +69,8 @@ public:
bool varying_p () const;
bool singleton_p (tree *result = NULL) const;
bool contains_p (tree) const;
+ bool nonnegative_p () const;
+ bool nonpositive_p () const;
// In-place operators.
void union_ (const irange &);
--
2.33.0

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,882 @@
From d9131757175667d35e74d9ee84689039990af768 Mon Sep 17 00:00:00 2001
From: xingyushuai <xingyushuai@huawei.com>
Date: Fri, 3 Mar 2023 09:31:04 +0800
Subject: [PATCH 001/157] Add hip09 machine discribtion
Here is the patch introducing hip09 machine model
for the scheduler.
---
gcc/config/aarch64/aarch64-cores.def | 1 +
gcc/config/aarch64/aarch64-cost-tables.h | 104 +++++
gcc/config/aarch64/aarch64-tune.md | 2 +-
gcc/config/aarch64/aarch64.cc | 109 +++++
gcc/config/aarch64/aarch64.md | 1 +
gcc/config/aarch64/hip09.md | 558 +++++++++++++++++++++++
6 files changed, 774 insertions(+), 1 deletion(-)
create mode 100644 gcc/config/aarch64/hip09.md
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
index 70b11eb80..a854bdb24 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -130,6 +130,7 @@ AARCH64_CORE("a64fx", a64fx, a64fx, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F
/* HiSilicon ('H') cores. */
AARCH64_CORE("tsv110", tsv110, tsv110, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
+AARCH64_CORE("hip09", hip09, hip09, 8_5A, AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_SVE | AARCH64_FL_I8MM | AARCH64_FL_F32MM | AARCH64_FL_F64MM | AARCH64_FL_PROFILE | AARCH64_FL_PREDRES, hip09, 0x48, 0xd02, 0x0)
/* ARMv8.3-A Architecture Processors. */
diff --git a/gcc/config/aarch64/aarch64-cost-tables.h b/gcc/config/aarch64/aarch64-cost-tables.h
index 48522606f..fc5a3cbe4 100644
--- a/gcc/config/aarch64/aarch64-cost-tables.h
+++ b/gcc/config/aarch64/aarch64-cost-tables.h
@@ -668,6 +668,110 @@ const struct cpu_cost_table a64fx_extra_costs =
}
};
+const struct cpu_cost_table hip09_extra_costs =
+{
+ /* ALU */
+ {
+ 0, /* arith. */
+ 0, /* logical. */
+ 0, /* shift. */
+ 0, /* shift_reg. */
+ COSTS_N_INSNS (1), /* arith_shift. */
+ COSTS_N_INSNS (1), /* arith_shift_reg. */
+ COSTS_N_INSNS (1), /* log_shift. */
+ COSTS_N_INSNS (1), /* log_shift_reg. */
+ 0, /* extend. */
+ COSTS_N_INSNS (1), /* extend_arith. */
+ 0, /* bfi. */
+ 0, /* bfx. */
+ 0, /* clz. */
+ 0, /* rev. */
+ 0, /* non_exec. */
+ true /* non_exec_costs_exec. */
+ },
+
+ {
+ /* MULT SImode */
+ {
+ COSTS_N_INSNS (2), /* simple. */
+ COSTS_N_INSNS (2), /* flag_setting. */
+ COSTS_N_INSNS (2), /* extend. */
+ COSTS_N_INSNS (2), /* add. */
+ COSTS_N_INSNS (2), /* extend_add. */
+ COSTS_N_INSNS (11) /* idiv. */
+ },
+ /* MULT DImode */
+ {
+ COSTS_N_INSNS (3), /* simple. */
+ 0, /* flag_setting (N/A). */
+ COSTS_N_INSNS (3), /* extend. */
+ COSTS_N_INSNS (3), /* add. */
+ COSTS_N_INSNS (3), /* extend_add. */
+ COSTS_N_INSNS (19) /* idiv. */
+ }
+ },
+ /* LD/ST */
+ {
+ COSTS_N_INSNS (3), /* load. */
+ COSTS_N_INSNS (4), /* load_sign_extend. */
+ COSTS_N_INSNS (3), /* ldrd. */
+ COSTS_N_INSNS (3), /* ldm_1st. */
+ 1, /* ldm_regs_per_insn_1st. */
+ 2, /* ldm_regs_per_insn_subsequent. */
+ COSTS_N_INSNS (4), /* loadf. */
+ COSTS_N_INSNS (4), /* loadd. */
+ COSTS_N_INSNS (4), /* load_unaligned. */
+ 0, /* store. */
+ 0, /* strd. */
+ 0, /* stm_1st. */
+ 1, /* stm_regs_per_insn_1st. */
+ 2, /* stm_regs_per_insn_subsequent. */
+ 0, /* storef. */
+ 0, /* stored. */
+ COSTS_N_INSNS (1), /* store_unaligned. */
+ COSTS_N_INSNS (4), /* loadv. */
+ COSTS_N_INSNS (4) /* storev. */
+ },
+ {
+ /* FP SFmode */
+ {
+ COSTS_N_INSNS (10), /* div. */
+ COSTS_N_INSNS (4), /* mult. */
+ COSTS_N_INSNS (4), /* mult_addsub. */
+ COSTS_N_INSNS (4), /* fma. */
+ COSTS_N_INSNS (4), /* addsub. */
+ COSTS_N_INSNS (1), /* fpconst. */
+ COSTS_N_INSNS (1), /* neg. */
+ COSTS_N_INSNS (1), /* compare. */
+ COSTS_N_INSNS (2), /* widen. */
+ COSTS_N_INSNS (2), /* narrow. */
+ COSTS_N_INSNS (2), /* toint. */
+ COSTS_N_INSNS (1), /* fromint. */
+ COSTS_N_INSNS (2) /* roundint. */
+ },
+ /* FP DFmode */
+ {
+ COSTS_N_INSNS (17), /* div. */
+ COSTS_N_INSNS (4), /* mult. */
+ COSTS_N_INSNS (6), /* mult_addsub. */
+ COSTS_N_INSNS (6), /* fma. */
+ COSTS_N_INSNS (3), /* addsub. */
+ COSTS_N_INSNS (1), /* fpconst. */
+ COSTS_N_INSNS (1), /* neg. */
+ COSTS_N_INSNS (1), /* compare. */
+ COSTS_N_INSNS (2), /* widen. */
+ COSTS_N_INSNS (2), /* narrow. */
+ COSTS_N_INSNS (2), /* toint. */
+ COSTS_N_INSNS (1), /* fromint. */
+ COSTS_N_INSNS (2) /* roundint. */
+ }
+ },
+ /* Vector */
+ {
+ COSTS_N_INSNS (1) /* alu. */
+ }
+};
+
const struct cpu_cost_table ampere1_extra_costs =
{
/* ALU */
diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md
index 9dc9adc70..238bb6e31 100644
--- a/gcc/config/aarch64/aarch64-tune.md
+++ b/gcc/config/aarch64/aarch64-tune.md
@@ -1,5 +1,5 @@
;; -*- buffer-read-only: t -*-
;; Generated automatically by gentune.sh from aarch64-cores.def
(define_attr "tune"
- "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,zeus,neoversev1,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa710,cortexx2,neoversen2,demeter,neoversev2"
+ "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,hip09,thunderx3t110,zeus,neoversev1,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa710,cortexx2,neoversen2,demeter,neoversev2"
(const (symbol_ref "((enum attr_tune) aarch64_tune)")))
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 5537a537c..e9b3980c4 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -465,6 +465,22 @@ static const struct cpu_addrcost_table tsv110_addrcost_table =
0, /* imm_offset */
};
+static const struct cpu_addrcost_table hip09_addrcost_table =
+{
+ {
+ 1, /* hi */
+ 0, /* si */
+ 0, /* di */
+ 1, /* ti */
+ },
+ 0, /* pre_modify */
+ 0, /* post_modify */
+ 0, /* register_offset */
+ 1, /* register_sextend */
+ 1, /* register_zextend */
+ 0, /* imm_offset */
+};
+
static const struct cpu_addrcost_table qdf24xx_addrcost_table =
{
{
@@ -660,6 +676,16 @@ static const struct cpu_regmove_cost a64fx_regmove_cost =
2 /* FP2FP */
};
+static const struct cpu_regmove_cost hip09_regmove_cost =
+{
+ 1, /* GP2GP */
+ /* Avoid the use of slow int<->fp moves for spilling by setting
+ their cost higher than memmov_cost. */
+ 2, /* GP2FP */
+ 3, /* FP2GP */
+ 2 /* FP2FP */
+};
+
static const struct cpu_regmove_cost neoversen2_regmove_cost =
{
1, /* GP2GP */
@@ -947,6 +973,43 @@ static const struct cpu_vector_cost tsv110_vector_cost =
nullptr /* issue_info */
};
+static const advsimd_vec_cost hip09_advsimd_vector_cost =
+{
+ 2, /* int_stmt_cost */
+ 2, /* fp_stmt_cost */
+ 0, /* ld2_st2_permute_cost */
+ 0, /* ld3_st3_permute_cost */
+ 0, /* ld4_st4_permute_cost */
+ 2, /* permute_cost */
+ 3, /* reduc_i8_cost */
+ 3, /* reduc_i16_cost */
+ 3, /* reduc_i32_cost */
+ 3, /* reduc_i64_cost */
+ 3, /* reduc_f16_cost */
+ 3, /* reduc_f32_cost */
+ 3, /* reduc_f64_cost */
+ 3, /* store_elt_extra_cost */
+ 3, /* vec_to_scalar_cost */
+ 2, /* scalar_to_vec_cost */
+ 5, /* align_load_cost */
+ 5, /* unalign_load_cost */
+ 1, /* unalign_store_cost */
+ 1 /* store_cost */
+};
+
+static const struct cpu_vector_cost hip09_vector_cost =
+{
+ 1, /* scalar_int_stmt_cost */
+ 1, /* scalar_fp_stmt_cost */
+ 5, /* scalar_load_cost */
+ 1, /* scalar_store_cost */
+ 1, /* cond_taken_branch_cost */
+ 1, /* cond_not_taken_branch_cost */
+ &hip09_advsimd_vector_cost, /* advsimd */
+ nullptr, /* sve */
+ nullptr /* issue_info */
+};
+
static const advsimd_vec_cost cortexa57_advsimd_vector_cost =
{
2, /* int_stmt_cost */
@@ -1293,6 +1356,18 @@ static const cpu_prefetch_tune tsv110_prefetch_tune =
-1 /* default_opt_level */
};
+
+static const cpu_prefetch_tune hip09_prefetch_tune =
+{
+ 0, /* num_slots */
+ 64, /* l1_cache_size */
+ 64, /* l1_cache_line_size */
+ 512, /* l2_cache_size */
+ true, /* prefetch_dynamic_strides */
+ -1, /* minimum_stride */
+ -1 /* default_opt_level */
+};
+
static const cpu_prefetch_tune xgene1_prefetch_tune =
{
8, /* num_slots */
@@ -1658,6 +1733,40 @@ static const struct tune_params tsv110_tunings =
&tsv110_prefetch_tune
};
+static const struct tune_params hip09_tunings =
+{
+ &hip09_extra_costs,
+ &hip09_addrcost_table,
+ &hip09_regmove_cost,
+ &hip09_vector_cost,
+ &generic_branch_cost,
+ &generic_approx_modes,
+ SVE_256, /* sve_width */
+ { 4, /* load_int. */
+ 4, /* store_int. */
+ 4, /* load_fp. */
+ 4, /* store_fp. */
+ 4, /* load_pred. */
+ 4 /* store_pred. */
+ }, /* memmov_cost. */
+ 4, /* issue_rate */
+ (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_ALU_BRANCH
+ | AARCH64_FUSE_ALU_CBZ), /* fusible_ops */
+ "16", /* function_align. */
+ "4", /* jump_align. */
+ "8", /* loop_align. */
+ 2, /* int_reassoc_width. */
+ 4, /* fp_reassoc_width. */
+ 1, /* vec_reassoc_width. */
+ 2, /* min_div_recip_mul_sf. */
+ 2, /* min_div_recip_mul_df. */
+ 0, /* max_case_values. */
+ tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
+ (AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS
+ | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT), /* tune_flags. */
+ &hip09_prefetch_tune
+};
+
static const struct tune_params xgene1_tunings =
{
&xgene1_extra_costs,
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index d24c8afcf..cf699e4c7 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -477,6 +477,7 @@
(include "thunderx2t99.md")
(include "tsv110.md")
(include "thunderx3t110.md")
+(include "hip09.md")
;; -------------------------------------------------------------------
;; Jumps and other miscellaneous insns
diff --git a/gcc/config/aarch64/hip09.md b/gcc/config/aarch64/hip09.md
new file mode 100644
index 000000000..25428de9a
--- /dev/null
+++ b/gcc/config/aarch64/hip09.md
@@ -0,0 +1,558 @@
+;; hip09 pipeline description
+;; Copyright (C) 2023 Free Software Foundation, Inc.
+;;
+;;Contributed by Yushuai Xing
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "hip09")
+(define_automaton "hip09_ldst")
+(define_automaton "hip09_fsu")
+
+(define_attr "hip09_type"
+ "hip09_neon_abs, hip09_neon_fp_arith, hip09_neon_mul, hip09_neon_mla,
+ hip09_neon_dot, hip09_neon_fp_div, hip09_neon_fp_sqrt,
+ hip09_neon_ins, hip09_neon_load1, hip09_neon_load1_lanes,
+ hip09_neon_load2and4, hip09_neon_load3_3reg,
+ hip09_neon_load4_4reg, hip09_neon_store1and2,
+ hip09_neon_store1_1reg, hip09_neon_store1_2reg,
+ hip09_neon_store1_3reg, hip09_neon_store1_4reg,
+ hip09_neon_store3and4_lane, hip09_neon_store3_3reg,
+ hip09_neon_store4_4reg, unknown"
+ (cond [
+ (eq_attr "type" "neon_abs,neon_abs_q,neon_add,neon_add_q,\
+ neon_neg,neon_neg_q,neon_sub,neon_sub_q,neon_add_widen,\
+ neon_sub_widen,neon_qadd,neon_qadd_q,\
+ neon_add_long,neon_sub_long,\
+ neon_qabs,neon_qabs_q,neon_qneg,\
+ neon_qneg_q,neon_qsub,neon_qsub_q,neon_compare,\
+ neon_compare_q,neon_compare_zero,\
+ neon_compare_zero_q,neon_logic,neon_logic_q,\
+ neon_minmax,neon_minmax_q,neon_tst,\
+ neon_tst_q,neon_bsl,neon_bsl_q,\
+ neon_cls,neon_cls_q,neon_ext,\
+ neon_ext_q,neon_rev,neon_rev_q,\
+ neon_tbl1,neon_tbl1_q,neon_fp_abs_s,\
+ neon_fp_abs_s_q,neon_fp_abs_d,\
+ neon_fp_neg_s,neon_fp_neg_s_q,\
+ neon_fp_neg_d,neon_fp_neg_d_q,\
+ neon_shift_imm_narrow_q,neon_move,neon_move_q")
+ (const_string "hip09_neon_abs")
+ (eq_attr "type" "neon_abd,neon_abd_q,\
+ neon_arith_acc,neon_arith_acc_q,\
+ neon_add_halve,neon_add_halve_q,\
+ neon_sub_halve,neon_sub_halve_q,\
+ neon_add_halve_narrow_q,\
+ neon_sub_halve_narrow_q,neon_reduc_add,\
+ neon_reduc_add_q,\
+ neon_sat_mul_b,neon_sat_mul_b_q,\
+ neon_sat_mul_b_long,neon_mul_b,neon_mul_b_q,\
+ neon_mul_b_long,neon_mla_b,neon_mla_b_q,\
+ neon_mla_b_long,neon_sat_mla_b_long,\
+ neon_sat_shift_imm,\
+ neon_sat_shift_imm_q,neon_shift_imm_long,\
+ neon_shift_imm,neon_shift_imm_q,neon_cnt,\
+ neon_cnt_q,neon_fp_recpe_s,neon_fp_recpe_s_q,\
+ neon_fp_recpe_d,neon_fp_recpe_d_q,\
+ neon_fp_rsqrte_s,neon_fp_rsqrte_s_q,\
+ neon_fp_rsqrte_d,neon_fp_rsqrte_d_q,\
+ neon_fp_recpx_s,neon_fp_recpx_s_q,\
+ neon_fp_recpx_d,neon_fp_recpx_d_q,\
+ neon_tbl2,neon_tbl2_q,neon_to_gp,\
+ neon_to_gp_q,neon_fp_abd_s,neon_fp_abd_s_q,\
+ neon_fp_abd_d,neon_fp_abd_d_q,\
+ neon_fp_addsub_s,neon_fp_addsub_s_q,\
+ neon_fp_addsub_d,neon_fp_addsub_d_q,\
+ neon_fp_compare_s,neon_fp_compare_s_q,\
+ neon_fp_compare_d,neon_fp_compare_d_q,\
+ neon_fp_cvt_widen_s,neon_fp_to_int_s,\
+ neon_fp_to_int_s_q,neon_fp_to_int_d,\
+ neon_fp_to_int_d_q,neon_fp_minmax_s,\
+ neon_fp_minmax_s_q,neon_fp_minmax_d,\
+ neon_fp_minmax_d_q,neon_fp_round_s,\
+ neon_fp_round_s_q,neon_fp_cvt_narrow_d_q,\
+ neon_fp_round_d,neon_fp_round_d_q,\
+ neon_fp_cvt_narrow_s_q")
+ (const_string "hip09_neon_fp_arith")
+ (eq_attr "type" "neon_sat_mul_h,neon_sat_mul_h_q,\
+ neon_sat_mul_s,neon_sat_mul_s_q,\
+ neon_sat_mul_h_scalar,neon_sat_mul_s_scalar,\
+ neon_sat_mul_h_scalar_q,neon_sat_mul_h_long,\
+ neon_sat_mul_s_long,neon_sat_mul_h_scalar_long,\
+ neon_sat_mul_s_scalar_long,neon_mul_h,neon_mul_h_q,\
+ neon_mul_s,neon_mul_s_q,neon_mul_h_long,\
+ neon_mul_s_long,neon_mul_h_scalar_long,\
+ neon_mul_s_scalar_long,neon_mla_h,neon_mla_h_q,\
+ neon_mla_s,neon_mla_h_scalar,\
+ neon_mla_h_scalar_q,neon_mla_s_scalar,\
+ neon_mla_h_long,\
+ neon_mla_s_long,neon_sat_mla_h_long,\
+ neon_sat_mla_s_long,neon_sat_mla_h_scalar_long,\
+ neon_sat_mla_s_scalar_long,neon_mla_s_scalar_long,\
+ neon_mla_h_scalar_long,neon_mla_s_scalar_q,\
+ neon_shift_acc,neon_shift_acc_q,neon_shift_reg,\
+ neon_shift_reg_q,neon_sat_shift_reg,\
+ neon_sat_shift_reg_q,neon_sat_shift_imm_narrow_q,\
+ neon_tbl3,neon_tbl3_q,neon_fp_reduc_add_s,\
+ neon_fp_reduc_add_s_q,neon_fp_reduc_add_d,\
+ neon_fp_reduc_add_d_q,neon_fp_reduc_minmax_s,\
+ neon_fp_reduc_minmax_d,neon_fp_reduc_minmax_s_q,\
+ neon_fp_reduc_minmax_d_q,\
+ neon_fp_mul_s_q,\
+ neon_fp_mul_d,neon_fp_mul_d_q,\
+ neon_fp_mul_d_scalar_q,neon_fp_mul_s_scalar,\
+ neon_fp_mul_s_scalar_q")
+ (const_string "hip09_neon_mul")
+ (eq_attr "type" "neon_mla_s_q,neon_reduc_minmax,\
+ neon_reduc_minmax_q,neon_fp_recps_s,\
+ neon_fp_recps_s_q,neon_fp_recps_d,\
+ neon_fp_recps_d_q,neon_tbl4,neon_tbl4_q,\
+ neon_fp_mla_s,\
+ neon_fp_mla_d,neon_fp_mla_d_q,\
+ neon_fp_mla_s_scalar,neon_fp_mla_s_scalar_q,\
+ neon_fp_mla_d_scalar_q")
+ (const_string "hip09_neon_mla")
+ (eq_attr "type" "neon_dot,neon_dot_q")
+ (const_string "hip09_neon_dot")
+ (eq_attr "type" "neon_fp_div_s,neon_fp_div_s_q,\
+ neon_fp_div_d,neon_fp_div_d_q")
+ (const_string "hip09_neon_fp_div")
+ (eq_attr "type" "neon_fp_sqrt_s,neon_fp_sqrt_s_q,\
+ neon_fp_sqrt_d,neon_fp_sqrt_d_q")
+ (const_string "hip09_neon_fp_sqrt")
+ (eq_attr "type" "neon_dup,neon_dup_q,\
+ neon_ins,neon_ins_q")
+ (const_string "hip09_neon_ins")
+ (eq_attr "type" "neon_load1_1reg,neon_load1_1reg_q,\
+ neon_load1_2reg,neon_load1_2reg_q,\
+ neon_load1_3reg,neon_load1_3reg_q,\
+ neon_load1_4reg,neon_load1_4reg_q")
+ (const_string "hip09_neon_load1")
+ (eq_attr "type" "neon_load1_one_lane,\
+ neon_load1_one_lane_q,\
+ neon_load1_all_lanes,neon_load1_all_lanes_q")
+ (const_string "hip09_neon_load1_lanes")
+ (eq_attr "type" "neon_load2_all_lanes,\
+ neon_load2_all_lanes_q,\
+ neon_load2_one_lane,neon_load2_2reg,\
+ neon_load2_2reg_q,neon_load3_one_lane,\
+ neon_load3_all_lanes,neon_load3_all_lanes_q,\
+ neon_load4_one_lane,neon_load4_all_lanes,\
+ neon_load4_all_lanes_q")
+ (const_string "hip09_neon_load2and4")
+ (eq_attr "type" "neon_load3_3reg,neon_load3_3reg_q")
+ (const_string "hip09_neon_load3_3reg")
+ (eq_attr "type" "neon_load4_4reg,neon_load4_4reg_q")
+ (const_string "hip09_neon_load4_4reg")
+ (eq_attr "type" "neon_store1_one_lane,\
+ neon_store1_one_lane_q,neon_store2_one_lane,\
+ neon_store2_one_lane_q,neon_store2_2reg,\
+ neon_store2_2reg_q")
+ (const_string "hip09_neon_store1and2")
+ (eq_attr "type" "neon_store1_1reg,neon_store1_1reg_q")
+ (const_string "hip09_neon_store1_1reg")
+ (eq_attr "type" "neon_store1_2reg,neon_store1_2reg_q")
+ (const_string "hip09_neon_store1_2reg")
+ (eq_attr "type" "neon_store1_3reg,neon_store1_3reg_q")
+ (const_string "hip09_neon_store1_3reg")
+ (eq_attr "type" "neon_store1_4reg,neon_store1_4reg_q")
+ (const_string "hip09_neon_store1_4reg")
+ (eq_attr "type" "neon_store3_one_lane,\
+ neon_store3_one_lane_q,neon_store4_one_lane,\
+ neon_store4_one_lane_q")
+ (const_string "hip09_neon_store3and4_lane")
+ (eq_attr "type" "neon_store3_3reg,\
+ neon_store3_3reg_q")
+ (const_string "hip09_neon_store3_3reg")
+ (eq_attr "type" "neon_store4_4reg,\
+ neon_store4_4reg_q")
+ (const_string "hip09_neon_store4_4reg")]
+ (const_string "unknown")))
+
+; The hip09 core is modelled as issues pipeline that has
+; the following functional units.
+; 1. Two pipelines for branch micro operations: BRU1, BRU2
+
+(define_cpu_unit "hip09_bru0" "hip09")
+(define_cpu_unit "hip09_bru1" "hip09")
+
+(define_reservation "hip09_bru01" "hip09_bru0|hip09_bru1")
+
+; 2. Four pipelines for single cycle integer micro operations: ALUs1, ALUs2, ALUs3, ALUs4
+
+(define_cpu_unit "hip09_alus0" "hip09")
+(define_cpu_unit "hip09_alus1" "hip09")
+(define_cpu_unit "hip09_alus2" "hip09")
+(define_cpu_unit "hip09_alus3" "hip09")
+
+(define_reservation "hip09_alus0123" "hip09_alus0|hip09_alus1|hip09_alus2|hip09_alus3")
+(define_reservation "hip09_alus01" "hip09_alus0|hip09_alus1")
+(define_reservation "hip09_alus23" "hip09_alus2|hip09_alus3")
+
+; 3. Two pipelines for multi cycles integer micro operations: ALUm1, ALUm2
+
+(define_cpu_unit "hip09_alum0" "hip09")
+(define_cpu_unit "hip09_alum1" "hip09")
+
+(define_reservation "hip09_alum01" "hip09_alum0|hip09_alum1")
+
+; 4. Two pipelines for load micro opetations: Load1, Load2
+
+(define_cpu_unit "hip09_load0" "hip09_ldst")
+(define_cpu_unit "hip09_load1" "hip09_ldst")
+
+(define_reservation "hip09_ld01" "hip09_load0|hip09_load1")
+
+; 5. Two pipelines for store micro operations: Store1, Store2
+
+(define_cpu_unit "hip09_store0" "hip09_ldst")
+(define_cpu_unit "hip09_store1" "hip09_ldst")
+
+(define_reservation "hip09_st01" "hip09_store0|hip09_store1")
+
+; 6. Two pipelines for store data micro operations: STD0,STD1
+
+(define_cpu_unit "hip09_store_data0" "hip09_ldst")
+(define_cpu_unit "hip09_store_data1" "hip09_ldst")
+
+(define_reservation "hip09_std01" "hip09_store_data0|hip09_store_data1")
+
+; 7. Four asymmetric pipelines for Asimd and FP micro operations: FSU1, FSU2, FSU3, FSU4
+
+(define_cpu_unit "hip09_fsu0" "hip09_fsu")
+(define_cpu_unit "hip09_fsu1" "hip09_fsu")
+(define_cpu_unit "hip09_fsu2" "hip09_fsu")
+(define_cpu_unit "hip09_fsu3" "hip09_fsu")
+
+(define_reservation "hip09_fsu0123" "hip09_fsu0|hip09_fsu1|hip09_fsu2|hip09_fsu3")
+(define_reservation "hip09_fsu02" "hip09_fsu0|hip09_fsu2")
+
+
+; 8. Two pipelines for sve operations but same with fsu1 and fsu3: SVE1, SVE2
+
+;; Simple Execution Unit:
+;
+;; Simple ALU without shift
+(define_insn_reservation "hip09_alu" 1
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "alu_imm,logic_imm,\
+ adc_imm,adc_reg,\
+ alu_sreg,logic_reg,\
+ mov_imm,mov_reg,\
+ csel,rotate_imm,bfm,mov_imm,\
+ clz,rbit,rev"))
+ "hip09_alus0123")
+
+(define_insn_reservation "hip09_alus" 1
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "alus_sreg,alus_imm,\
+ adcs_reg,adcs_imm,\
+ logics_imm,logics_reg,adr"))
+ "hip09_alus23")
+
+;; ALU ops with shift and extend
+(define_insn_reservation "hip09_alu_ext_shift" 2
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "alu_ext,alus_ext,\
+ logics_shift_imm,logics_shift_reg,\
+ logic_shift_reg,logic_shift_imm,\
+ "))
+ "hip09_alum01")
+
+;; Multiplies instructions
+(define_insn_reservation "hip09_mult" 3
+ (and (eq_attr "tune" "hip09")
+ (ior (eq_attr "mul32" "yes")
+ (eq_attr "widen_mul64" "yes")))
+ "hip09_alum01")
+
+;; Integer divide
+(define_insn_reservation "hip09_div" 10
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "udiv,sdiv"))
+ "hip09_alum0")
+
+;; Branch execution Unit
+;
+; Branches take two issue slot.
+; No latency as there is no result
+(define_insn_reservation "hip09_branch" 2
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "branch,call"))
+ "hip09_bru01 + hip09_alus23")
+
+;; Load execution Unit
+;
+; Loads of up to two words.
+(define_insn_reservation "hip09_load1" 4
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "load_4,load_8"))
+ "hip09_ld01")
+
+; Stores of up to two words.
+(define_insn_reservation "hip09_store1" 1
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "store_4,store_8"))
+ "hip09_st01")
+
+;; FP data processing instructions.
+
+(define_insn_reservation "hip09_fp_arith" 1
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "ffariths,ffarithd,fmov,fconsts,fconstd,\
+ f_mrc"))
+ "hip09_fsu0123")
+
+(define_insn_reservation "hip09_fp_cmp" 4
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "fcmps,fcmpd"))
+ "hip09_fsu0123+hip09_alus23")
+
+(define_insn_reservation "hip09_fp_ccmp" 7
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "fccmps,fccmpd"))
+ "hip09_alus01+hip09_fsu0123+hip09_alus23")
+
+(define_insn_reservation "hip09_fp_csel" 4
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "fcsel,f_mcr"))
+ "hip09_alus01+hip09_fsu0123")
+
+(define_insn_reservation "hip09_fp_divs" 7
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "fdivs"))
+ "hip09_fsu0123")
+
+(define_insn_reservation "hip09_fp_divd" 10
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "fdivd"))
+ "hip09_fsu0123")
+
+(define_insn_reservation "hip09_fp_sqrts" 9
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "fsqrts"))
+ "hip09_fsu0123")
+
+(define_insn_reservation "hip09_fp_sqrtd" 15
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "fsqrtd"))
+ "hip09_fsu0123")
+
+(define_insn_reservation "hip09_fp_mul" 3
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "fmuls,fmuld"))
+ "hip09_fsu0123")
+
+(define_insn_reservation "hip09_fp_add" 2
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "fadds,faddd,f_minmaxs,f_minmaxd,f_cvt,\
+ f_rints,f_rintd"))
+ "hip09_fsu0123")
+
+(define_insn_reservation "hip09_fp_mac" 4
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "fmacs,fmacd"))
+ "hip09_fsu0123")
+
+;; FP miscellaneous instructions.
+
+(define_insn_reservation "hip09_fp_cvt" 5
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "f_cvtf2i"))
+ "hip09_fsu0123+hip09_alus23")
+
+(define_insn_reservation "hip09_fp_cvt2" 5
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "f_cvti2f"))
+ "hip09_alus01+hip09_fsu0123")
+
+;; FP Load Instructions
+
+(define_insn_reservation "hip09_fp_load" 7
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "f_loads,f_loadd"))
+ "hip09_ld01")
+
+(define_insn_reservation "hip09_fp_load2" 6
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "neon_ldp_q,neon_ldp"))
+ "hip09_ld01+hip09_alus01")
+
+;; FP store instructions
+
+(define_insn_reservation "hip09_fp_store" 2
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "f_stores,f_stored"))
+ "hip09_st01+hip09_std01")
+
+;; ASIMD integer instructions
+
+(define_insn_reservation "hip09_asimd_base1" 1
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "hip09_type" "hip09_neon_abs"))
+ "hip09_fsu0123")
+
+(define_insn_reservation "hip09_asimd_base2" 2
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "hip09_type" "hip09_neon_fp_arith"))
+ "hip09_fsu0123")
+
+(define_insn_reservation "hip09_asimd_base3" 3
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "hip09_type" "hip09_neon_mul"))
+ "hip09_fsu0123")
+
+(define_insn_reservation "hip09_asimd_base4" 4
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "hip09_type" "hip09_neon_mla"))
+ "hip09_fsu0123")
+
+(define_insn_reservation "hip09_asimd_base5" 5
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "neon_fp_mul_s"))
+ "hip09_fsu0123")
+
+(define_insn_reservation "hip09_asimd_dot" 6
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "hip09_type" "hip09_neon_dot"))
+ "hip09_fsu0123")
+
+(define_insn_reservation "hip09_asimd_bfmmla" 9
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "neon_fp_mla_s_q"))
+ "hip09_fsu0123")
+
+(define_insn_reservation "hip09_asimd_fdiv" 15
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "hip09_type" "hip09_neon_fp_div"))
+ "hip09_fsu0123")
+
+(define_insn_reservation "hip09_asimd_fsqrt" 25
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "hip09_type" "hip09_neon_fp_sqrt"))
+ "hip09_fsu0123")
+
+(define_insn_reservation "hip09_asimd_pmull" 2
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "crypto_pmull"))
+ "hip09_fsu2")
+
+(define_insn_reservation "hip09_asimd_dup" 4
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "hip09_type" "hip09_neon_ins"))
+ "hip09_alus01+hip09_fsu0123")
+
+;; ASIMD load instructions
+
+(define_insn_reservation "hip09_asimd_ld1_reg" 6
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "hip09_type" "hip09_neon_load1"))
+ "hip09_ld01")
+
+(define_insn_reservation "hip09_asimd_ld1_lane" 7
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "hip09_type" "hip09_neon_load1_lanes"))
+ "hip09_ld01+hip09_fsu0123")
+
+(define_insn_reservation "hip09_asimd_ld23" 8
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "hip09_type" "hip09_neon_load2and4"))
+"hip09_ld01+hip09_fsu0123")
+
+(define_insn_reservation "hip09_asimd_ld3_mtp" 9
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "hip09_type" "hip09_neon_load3_3reg"))
+ "hip09_ld01+hip09_fsu0123")
+
+(define_insn_reservation "hip09_asimd_ld4_mtp" 13
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "hip09_type" "hip09_neon_load4_4reg"))
+ "hip09_ld01+hip09_fsu0123")
+
+;; ASIMD store instructions
+
+(define_insn_reservation "hip09_asimd_st12" 1
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "hip09_type" "hip09_neon_store1and2"))
+ "hip09_st01+hip09_std01")
+
+(define_insn_reservation "hip09_asimd_st1_1reg" 2
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "hip09_type" "hip09_neon_store1_1reg"))
+ "hip09_st01+hip09_std01")
+
+(define_insn_reservation "hip09_asimd_st1_2reg" 3
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "hip09_type" "hip09_neon_store1_2reg"))
+ "hip09_st01+hip09_std01")
+
+(define_insn_reservation "hip09_asimd_st1_3reg" 4
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "hip09_type" "hip09_neon_store1_3reg"))
+ "hip09_st01+hip09_std01")
+
+(define_insn_reservation "hip09_asimd_st1_4reg" 5
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "hip09_type" "hip09_neon_store1_4reg"))
+ "hip09_st01+hip09_std01")
+
+(define_insn_reservation "hip09_asimd_st34_lane" 4
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "hip09_type" "hip09_neon_store3and4_lane"))
+ "hip09_fsu0123+hip09_st01+hip09_std01")
+
+(define_insn_reservation "hip09_asimd_st3_mtp" 7
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "hip09_type" "hip09_neon_store3_3reg"))
+ "hip09_fsu0123+hip09_st01+hip09_std01")
+
+(define_insn_reservation "hip09_asimd_st4_mtp" 10
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "hip09_type" "hip09_neon_store4_4reg"))
+ "hip09_fsu0123+hip09_st01+hip09_std01")
+
+;; Cryptography extensions
+
+(define_insn_reservation "hip09_asimd_aes" 2
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "crypto_aese,crypto_aesmc"))
+ "hip09_fsu02")
+
+(define_insn_reservation "hip09_asimd_sha3" 1
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "crypto_sha3"))
+ "hip09_fsu2")
+
+(define_insn_reservation "hip09_asimd_sha1" 2
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "crypto_sha1_fast,crypto_sha1_xor,\
+ crypto_sha256_fast,crypto_sha512,\
+ crypto_sm3"))
+ "hip09_fsu2")
+
+(define_insn_reservation "hip09_asimd_sha1_and256" 4
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "crypto_sha1_slow,crypto_sha256_slow,\
+ crypto_sm4"))
+ "hip09_fsu2")
+
+;; CRC extension.
+
+(define_insn_reservation "hip09_crc" 2
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "crc"))
+ "hip09_alum01")
--
2.33.0

View File

@ -0,0 +1,755 @@
From 824fccdab1d3c5e87fb88b31f0eeb7abd1b35c1f Mon Sep 17 00:00:00 2001
From: XingYuShuai <1150775134@qq.com>
Date: Mon, 26 Feb 2024 20:34:06 +0800
Subject: [PATCH 002/157] Add hip11 CPU pipeline scheduling
This patch adds an mcpu: hip11. It has been tested on aarch64
and no regressions from this patch.
---
gcc/config/aarch64/aarch64-cores.def | 1 +
gcc/config/aarch64/aarch64-cost-tables.h | 104 ++++++
gcc/config/aarch64/aarch64-tune.md | 2 +-
gcc/config/aarch64/aarch64.cc | 108 ++++++
gcc/config/aarch64/aarch64.md | 1 +
gcc/config/aarch64/hip11.md | 418 +++++++++++++++++++++++
gcc/doc/invoke.texi | 2 +-
7 files changed, 634 insertions(+), 2 deletions(-)
create mode 100644 gcc/config/aarch64/hip11.md
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
index a854bdb24..601b72abb 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -173,6 +173,7 @@ AARCH64_CORE("cortex-a710", cortexa710, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 |
AARCH64_CORE("cortex-x2", cortexx2, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd48, -1)
AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversen2, 0x41, 0xd49, -1)
+AARCH64_CORE("hip11", hip11, hip11, 8_5A, AARCH64_FL_FOR_ARCH8_5| AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_F16, hip11, 0x48, 0xd22, -1)
AARCH64_CORE("demeter", demeter, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1)
AARCH64_CORE("neoverse-v2", neoversev2, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1)
diff --git a/gcc/config/aarch64/aarch64-cost-tables.h b/gcc/config/aarch64/aarch64-cost-tables.h
index fc5a3cbe4..0ee427b61 100644
--- a/gcc/config/aarch64/aarch64-cost-tables.h
+++ b/gcc/config/aarch64/aarch64-cost-tables.h
@@ -561,6 +561,110 @@ const struct cpu_cost_table tsv110_extra_costs =
}
};
+const struct cpu_cost_table hip11_extra_costs =
+{
+ /* ALU */
+ {
+ 0, /* arith. */
+ 0, /* logical. */
+ 0, /* shift. */
+ 0, /* shift_reg. */
+ COSTS_N_INSNS (1), /* arith_shift. */
+ COSTS_N_INSNS (1), /* arith_shift_reg. */
+ COSTS_N_INSNS (1), /* log_shift. */
+ COSTS_N_INSNS (1), /* log_shift_reg. */
+ 0, /* extend. */
+ COSTS_N_INSNS (1), /* extend_arith. */
+ 0, /* bfi. */
+ 0, /* bfx. */
+ 0, /* clz. */
+ 0, /* rev. */
+ 0, /* non_exec. */
+ true /* non_exec_costs_exec. */
+ },
+
+ {
+ /* MULT SImode */
+ {
+ COSTS_N_INSNS (2), /* simple. */
+ COSTS_N_INSNS (2), /* flag_setting. */
+ COSTS_N_INSNS (2), /* extend. */
+ COSTS_N_INSNS (2), /* add. */
+ COSTS_N_INSNS (2), /* extend_add. */
+ COSTS_N_INSNS (11) /* idiv. */
+ },
+ /* MULT DImode */
+ {
+ COSTS_N_INSNS (3), /* simple. */
+ 0, /* flag_setting (N/A). */
+ COSTS_N_INSNS (3), /* extend. */
+ COSTS_N_INSNS (3), /* add. */
+ COSTS_N_INSNS (3), /* extend_add. */
+ COSTS_N_INSNS (19) /* idiv. */
+ }
+ },
+ /* LD/ST */
+ {
+ COSTS_N_INSNS (3), /* load. */
+ COSTS_N_INSNS (4), /* load_sign_extend. */
+ COSTS_N_INSNS (3), /* ldrd. */
+ COSTS_N_INSNS (3), /* ldm_1st. */
+ 1, /* ldm_regs_per_insn_1st. */
+ 2, /* ldm_regs_per_insn_subsequent. */
+ COSTS_N_INSNS (4), /* loadf. */
+ COSTS_N_INSNS (4), /* loadd. */
+ COSTS_N_INSNS (4), /* load_unaligned. */
+ 0, /* store. */
+ 0, /* strd. */
+ 0, /* stm_1st. */
+ 1, /* stm_regs_per_insn_1st. */
+ 2, /* stm_regs_per_insn_subsequent. */
+ 0, /* storef. */
+ 0, /* stored. */
+ COSTS_N_INSNS (1), /* store_unaligned. */
+ COSTS_N_INSNS (4), /* loadv. */
+ COSTS_N_INSNS (4) /* storev. */
+ },
+ {
+ /* FP SFmode */
+ {
+ COSTS_N_INSNS (10), /* div. */
+ COSTS_N_INSNS (4), /* mult. */
+ COSTS_N_INSNS (4), /* mult_addsub. */
+ COSTS_N_INSNS (4), /* fma. */
+ COSTS_N_INSNS (4), /* addsub. */
+ COSTS_N_INSNS (1), /* fpconst. */
+ COSTS_N_INSNS (1), /* neg. */
+ COSTS_N_INSNS (1), /* compare. */
+ COSTS_N_INSNS (2), /* widen. */
+ COSTS_N_INSNS (2), /* narrow. */
+ COSTS_N_INSNS (2), /* toint. */
+ COSTS_N_INSNS (1), /* fromint. */
+ COSTS_N_INSNS (2) /* roundint. */
+ },
+ /* FP DFmode */
+ {
+ COSTS_N_INSNS (17), /* div. */
+ COSTS_N_INSNS (4), /* mult. */
+ COSTS_N_INSNS (6), /* mult_addsub. */
+ COSTS_N_INSNS (6), /* fma. */
+ COSTS_N_INSNS (3), /* addsub. */
+ COSTS_N_INSNS (1), /* fpconst. */
+ COSTS_N_INSNS (1), /* neg. */
+ COSTS_N_INSNS (1), /* compare. */
+ COSTS_N_INSNS (2), /* widen. */
+ COSTS_N_INSNS (2), /* narrow. */
+ COSTS_N_INSNS (2), /* toint. */
+ COSTS_N_INSNS (1), /* fromint. */
+ COSTS_N_INSNS (2) /* roundint. */
+ }
+ },
+ /* Vector */
+ {
+ COSTS_N_INSNS (1) /* alu. */
+ }
+};
+
const struct cpu_cost_table a64fx_extra_costs =
{
/* ALU */
diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md
index 238bb6e31..511422081 100644
--- a/gcc/config/aarch64/aarch64-tune.md
+++ b/gcc/config/aarch64/aarch64-tune.md
@@ -1,5 +1,5 @@
;; -*- buffer-read-only: t -*-
;; Generated automatically by gentune.sh from aarch64-cores.def
(define_attr "tune"
- "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,hip09,thunderx3t110,zeus,neoversev1,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa710,cortexx2,neoversen2,demeter,neoversev2"
+ "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,hip09,thunderx3t110,zeus,neoversev1,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa710,cortexx2,neoversen2,hip11,demeter,neoversev2"
(const (symbol_ref "((enum attr_tune) aarch64_tune)")))
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index e9b3980c4..7c62ddb2a 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -481,6 +481,22 @@ static const struct cpu_addrcost_table hip09_addrcost_table =
0, /* imm_offset */
};
+static const struct cpu_addrcost_table hip11_addrcost_table =
+{
+ {
+ 1, /* hi */
+ 0, /* si */
+ 0, /* di */
+ 1, /* ti */
+ },
+ 0, /* pre_modify */
+ 0, /* post_modify */
+ 0, /* register_offset */
+ 1, /* register_sextend */
+ 1, /* register_zextend */
+ 0, /* imm_offset */
+};
+
static const struct cpu_addrcost_table qdf24xx_addrcost_table =
{
{
@@ -666,6 +682,16 @@ static const struct cpu_regmove_cost tsv110_regmove_cost =
2 /* FP2FP */
};
+static const struct cpu_regmove_cost hip11_regmove_cost =
+{
+ 1, /* GP2GP */
+ /* Avoid the use of slow int<->fp moves for spilling by setting
+ their cost higher than memmov_cost. */
+ 2, /* GP2FP */
+ 3, /* FP2GP */
+ 2 /* FP2FP */
+};
+
static const struct cpu_regmove_cost a64fx_regmove_cost =
{
1, /* GP2GP */
@@ -1010,6 +1036,43 @@ static const struct cpu_vector_cost hip09_vector_cost =
nullptr /* issue_info */
};
+static const advsimd_vec_cost hip11_advsimd_vector_cost =
+{
+ 2, /* int_stmt_cost */
+ 2, /* fp_stmt_cost */
+ 0, /* ld2_st2_permute_cost */
+ 0, /* ld3_st3_permute_cost */
+ 0, /* ld4_st4_permute_cost */
+ 2, /* permute_cost */
+ 3, /* reduc_i8_cost */
+ 3, /* reduc_i16_cost */
+ 3, /* reduc_i32_cost */
+ 3, /* reduc_i64_cost */
+ 3, /* reduc_f16_cost */
+ 3, /* reduc_f32_cost */
+ 3, /* reduc_f64_cost */
+ 3, /* store_elt_extra_cost */
+ 5, /* vec_to_scalar_cost */
+ 5, /* scalar_to_vec_cost */
+ 5, /* align_load_cost */
+ 5, /* unalign_load_cost */
+ 1, /* unalign_store_cost */
+ 1 /* store_cost */
+};
+
+static const struct cpu_vector_cost hip11_vector_cost =
+{
+ 1, /* scalar_int_stmt_cost */
+ 1, /* scalar_fp_stmt_cost */
+ 5, /* scalar_load_cost */
+ 1, /* scalar_store_cost */
+ 1, /* cond_taken_branch_cost */
+ 1, /* cond_not_taken_branch_cost */
+ &hip11_advsimd_vector_cost, /* advsimd */
+ nullptr, /* sve */
+ nullptr /* issue_info */
+};
+
static const advsimd_vec_cost cortexa57_advsimd_vector_cost =
{
2, /* int_stmt_cost */
@@ -1368,6 +1431,17 @@ static const cpu_prefetch_tune hip09_prefetch_tune =
-1 /* default_opt_level */
};
+static const cpu_prefetch_tune hip11_prefetch_tune =
+{
+ 0, /* num_slots */
+ 64, /* l1_cache_size */
+ 64, /* l1_cache_line_size */
+ 512, /* l2_cache_size */
+ true, /* prefetch_dynamic_strides */
+ -1, /* minimum_stride */
+ -1 /* default_opt_level */
+};
+
static const cpu_prefetch_tune xgene1_prefetch_tune =
{
8, /* num_slots */
@@ -1767,6 +1841,40 @@ static const struct tune_params hip09_tunings =
&hip09_prefetch_tune
};
+static const struct tune_params hip11_tunings =
+{
+ &hip11_extra_costs,
+ &hip11_addrcost_table,
+ &hip11_regmove_cost,
+ &hip11_vector_cost,
+ &generic_branch_cost,
+ &generic_approx_modes,
+ SVE_512, /* sve_width */
+ { 4, /* load_int. */
+ 4, /* store_int. */
+ 4, /* load_fp. */
+ 4, /* store_fp. */
+ 4, /* load_pred. */
+ 4 /* store_pred. */
+ }, /* memmov_cost. */
+ 4, /* issue_rate */
+ (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_ALU_BRANCH
+ | AARCH64_FUSE_ALU_CBZ), /* fusible_ops */
+ "16", /* function_align. */
+ "4", /* jump_align. */
+ "8", /* loop_align. */
+ 2, /* int_reassoc_width. */
+ 4, /* fp_reassoc_width. */
+ 1, /* vec_reassoc_width. */
+ 2, /* min_div_recip_mul_sf. */
+ 2, /* min_div_recip_mul_df. */
+ 0, /* max_case_values. */
+ tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
+ (AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS
+ | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT), /* tune_flags. */
+ &hip11_prefetch_tune
+};
+
static const struct tune_params xgene1_tunings =
{
&xgene1_extra_costs,
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index cf699e4c7..c0c64a798 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -478,6 +478,7 @@
(include "tsv110.md")
(include "thunderx3t110.md")
(include "hip09.md")
+(include "hip11.md")
;; -------------------------------------------------------------------
;; Jumps and other miscellaneous insns
diff --git a/gcc/config/aarch64/hip11.md b/gcc/config/aarch64/hip11.md
new file mode 100644
index 000000000..45f91e65b
--- /dev/null
+++ b/gcc/config/aarch64/hip11.md
@@ -0,0 +1,418 @@
+;; hip11 pipeline description
+;; Copyright (C) 2018-2024 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "hip11")
+
+;; The hip11 core is modelled as issues pipeline that has
+;; the following functional units.
+;; 1. Three pipelines for integer operations: ALU1, ALU2, ALU3
+
+(define_cpu_unit "hip11_alu1_issue" "hip11")
+(define_reservation "hip11_alu1" "hip11_alu1_issue")
+
+(define_cpu_unit "hip11_alu2_issue" "hip11")
+(define_reservation "hip11_alu2" "hip11_alu2_issue")
+
+(define_cpu_unit "hip11_alu3_issue" "hip11")
+(define_reservation "hip11_alu3" "hip11_alu3_issue")
+
+(define_reservation "hip11alu" "hip11_alu1|hip11_alu2|hip11_alu3")
+
+;; 2. One pipeline for complex integer operations: MDU
+
+(define_cpu_unit "hip11_mdu_issue" "hip11")
+(define_reservation "hip11_mdu" "hip11_mdu_issue")
+
+;; 3. Two asymmetric pipelines for Asimd and FP operations: FSU1, FSU2
+(define_automaton "hip11_fsu")
+
+(define_cpu_unit "hip11_fsu1_issue"
+ "hip11_fsu")
+(define_cpu_unit "hip11_fsu2_issue"
+ "hip11_fsu")
+
+(define_reservation "hip11_fsu1" "hip11_fsu1_issue")
+(define_reservation "hip11_fsu2" "hip11_fsu2_issue")
+(define_reservation "hip11_fsu_pipe" "hip11_fsu1|hip11_fsu2")
+
+;; 4. Two pipeline for branch operations but same with alu2 and alu3: BRU1, BRU2
+
+;; 5. Two pipelines for load and store operations: LS1, LS2.
+
+(define_cpu_unit "hip11_ls1_issue" "hip11")
+(define_cpu_unit "hip11_ls2_issue" "hip11")
+(define_reservation "hip11_ls1" "hip11_ls1_issue")
+(define_reservation "hip11_ls2" "hip11_ls2_issue")
+
+;; Block all issue queues.
+
+(define_reservation "hip11_block" "hip11_fsu1_issue + hip11_fsu2_issue
+ + hip11_mdu_issue + hip11_alu1_issue
+ + hip11_alu2_issue + hip11_alu3_issue + hip11_ls1_issue + hip11_ls2_issue")
+
+;; Branch execution Unit
+;;
+(define_insn_reservation "hip11_branch" 1
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "branch"))
+ "hip11_alu2|hip11_alu3")
+
+(define_insn_reservation "hip11_return_from_subroutine" 6
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "branch")
+ (eq_attr "sls_length" "retbr"))
+ "hip11_mdu,(hip11_alu2|hip11_alu3)")
+
+ ;; Simple Execution Unit:
+;;
+;; Simple ALU without shift
+(define_insn_reservation "hip11_alu" 1
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "alu_imm,logic_imm,\
+ alu_sreg,logic_reg,\
+ adc_imm,adc_reg,\
+ adr,bfm,clz,rbit,rev,\
+ shift_imm,shift_reg,\
+ mov_imm,mov_reg,\
+ mvn_imm,mvn_reg,\
+ mrs,multiple,csel,\
+ rotate_imm"))
+ "hip11_alu1|hip11_alu2|hip11_alu3")
+
+(define_insn_reservation "hip11_alus" 1
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "alus_imm,logics_imm,\
+ alus_sreg,logics_reg,\
+ adcs_imm,adcs_reg"))
+ "hip11_alu2|hip11_alu3")
+
+;; ALU ops with shift
+(define_insn_reservation "hip11_alu_shift" 2
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "extend,\
+ alu_shift_imm_lsl_1to4,alu_shift_imm_other,alu_shift_reg,\
+ crc,logic_shift_imm,logic_shift_reg,\
+ mov_shift,mvn_shift,\
+ mov_shift_reg,mvn_shift_reg"))
+ "hip11_mdu")
+
+(define_insn_reservation "hip11_alus_shift" 2
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "alus_shift_imm,alus_shift_reg,\
+ logics_shift_imm,logics_shift_reg"))
+ "hip11_alu2|hip11_alu3")
+
+;; Multiplies instructions
+(define_insn_reservation "hip11_mult" 3
+ (and (eq_attr "tune" "hip11")
+ (ior (eq_attr "mul32" "yes")
+ (eq_attr "widen_mul64" "yes")))
+ "hip11_mdu")
+
+;; Integer divide
+(define_insn_reservation "hip11_div" 10
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "udiv,sdiv"))
+ "hip11_mdu")
+
+(define_insn_reservation "hip11_mla" 4
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "mla,smlal,umlal,smull,umull"))
+ "hip11_mdu")
+
+;; Block all issue pipes for a cycle
+(define_insn_reservation "hip11_block" 1
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "block"))
+ "hip11_block")
+
+;; Load-store execution Unit
+;;
+(define_insn_reservation "hip11_load1" 4
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "load_4,load_8,load_16"))
+ "hip11_ls1|hip11_ls2")
+
+(define_insn_reservation "hip11_fp_load" 5
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "f_loads,f_loadd"))
+ "hip11_ls1|hip11_ls2")
+
+(define_insn_reservation "hip11_neon_ld1_single" 7
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "neon_load1_one_lane,neon_load1_one_lane_q,\
+ neon_load1_all_lanes,neon_load1_all_lanes_q"))
+ "(hip11_ls1|hip11_ls2)+hip11_fsu1")
+
+(define_insn_reservation "hip11_neon_ld1_1reg" 5
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "neon_load1_1reg,neon_load1_1reg_q"))
+ "hip11_ls1|hip11_ls2")
+
+(define_insn_reservation "hip11_neon_ld1_2reg" 6
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q"))
+ "hip11_ls1|hip11_ls2")
+
+(define_insn_reservation "hip11_neon_ld1_3reg" 7
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "neon_load1_3reg,neon_load1_3reg_q"))
+ "hip11_ls1|hip11_ls2")
+
+(define_insn_reservation "hip11_neon_ld1_4reg" 8
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "neon_load1_4reg,neon_load1_4reg_q"))
+ "hip11_ls1|hip11_ls2")
+
+(define_insn_reservation "hip11_neon_ld2" 8
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "neon_load2_one_lane,neon_load2_one_lane_q,\
+ neon_load2_all_lanes,neon_load2_all_lanes_q,\
+ neon_load2_2reg,neon_load2_2reg_q,\
+ neon_load2_4reg,neon_load2_4reg_q"))
+ "(hip11_ls1|hip11_ls2)+hip11_fsu1")
+
+(define_insn_reservation "hip11_neon_ld3_single" 9
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "neon_load3_one_lane,neon_load3_one_lane_q,\
+ neon_load3_all_lanes,neon_load3_all_lanes_q"))
+ "(hip11_ls1|hip11_ls2)+hip11_fsu1")
+
+(define_insn_reservation "hip11_neon_ld3_multiple" 13
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "neon_load3_3reg,neon_load3_3reg_q"))
+ "(hip11_ls1|hip11_ls2)+hip11_fsu1")
+
+(define_insn_reservation "hip11_neon_ld4_single" 10
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "neon_load4_one_lane,neon_load4_one_lane_q,\
+ neon_load4_all_lanes,neon_load4_all_lanes_q"))
+ "(hip11_ls1|hip11_ls2)+hip11_fsu1")
+
+(define_insn_reservation "hip11_neon_ld4_multiple" 11
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "neon_load4_4reg,neon_load4_4reg_q"))
+ "(hip11_ls1|hip11_ls2)+hip11_fsu1")
+
+;; Stores of up to two words.
+(define_insn_reservation "hip11_store1" 1
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "store_4,store_8,store_16,\
+ f_stored,f_stores"))
+ "hip11_ls1|hip11_ls2")
+
+;; Floating-Point Operations.
+(define_insn_reservation "hip11_fp_arith" 2
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "ffariths,ffarithd,f_minmaxs,\
+ f_minmaxd,fadds,faddd,neon_fcadd"))
+ "hip11_fsu_pipe")
+
+(define_insn_reservation "hip11_fp_mul" 3
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "neon_fp_mul_d,neon_fp_mul_d_q,\
+ neon_fp_mul_s_scalar,neon_fp_mul_s_scalar_q,\
+ neon_fp_mul_d_scalar_q,fmuld,fmuls"))
+ "hip11_fsu_pipe")
+
+(define_insn_reservation "hip11_fp_cmp" 2
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "fccmpd,fccmps"))
+ "hip11alu,hip11_fsu_pipe")
+
+(define_insn_reservation "hip11_fp_csel" 2
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "fcsel"))
+ "hip11alu,hip11_fsu1")
+
+(define_insn_reservation "hip11_fp_fcmp" 1
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "fcmpd,fcmps"))
+ "hip11_fsu_pipe")
+
+(define_insn_reservation "hip11_fp_divs" 7
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "fdivs"))
+ "hip11_fsu1")
+
+(define_insn_reservation "hip11_fp_divd" 10
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "fdivd"))
+ "hip11_fsu1")
+
+(define_insn_reservation "hip11_fp_sqrts" 9
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "fsqrts"))
+ "hip11_fsu1")
+
+(define_insn_reservation "hip11_fp_sqrtd" 15
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "fsqrtd"))
+ "hip11_fsu1")
+
+(define_insn_reservation "hip11_fp_mac" 4
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "fmacs,ffmas,fmacd,ffmad"))
+ "hip11_fsu_pipe")
+
+(define_insn_reservation "hip11_fp_mov" 1
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "fmov,neon_dup,neon_dup_q,\
+ neon_from_gp,neon_from_gp_q,\
+ neon_ins,neon_ins_q,\
+ neon_to_gp,neon_to_gp_q,\
+ neon_move,neon_move_q,\
+ neon_rev,neon_rev_q,\
+ neon_permute,neon_permute_q,\
+ neon_shift_imm_narrow_q,\
+ neon_ext,neon_ext_q,\
+ neon_rbit,\
+ crypto_sha3,neon_tbl1,neon_tbl1_q,\
+ neon_tbl2_q,f_mcr,neon_tst,neon_tst_q,\
+ neon_move_narrow_q"))
+ "hip11_fsu1")
+
+;; ASIMD instructions
+(define_insn_reservation "hip11_asimd_simple_arithmetic" 2
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "neon_abs,neon_abs_q,neon_neg,neon_neg_q,\
+ neon_abd,neon_abd_q,\
+ neon_add_long,neon_sub_long,neon_sub_widen,neon_add_widen,\
+ neon_add_halve_narrow_q,neon_sub_halve_narrow_q,\
+ neon_arith_acc,neon_arith_acc_q,\
+ neon_compare,neon_compare_q,\
+ neon_compare_zero,neon_compare_zero_q,\
+ neon_minmax,neon_minmax_q,\
+ neon_logic,neon_logic_q,\
+ neon_reduc_add,neon_reduc_add_q,\
+ neon_reduc_minmax,neon_reduc_minmax_q,\
+ neon_fp_to_int_s,neon_fp_to_int_s_q,\
+ neon_fp_to_int_d,neon_fp_to_int_d_q,\
+ neon_fp_cvt_widen_s,\
+ neon_fp_cvt_narrow_d_q,\
+ neon_cls,neon_cls_q,\
+ neon_cnt,neon_cnt_q,\
+ f_rints,f_rintd,f_cvtf2i,f_cvt,\
+ neon_tbl3,neon_fp_round_s,neon_fp_round_s_q,\
+ neon_fp_round_d,neon_fp_round_d_q,\
+ neon_int_to_fp_s,neon_fp_recpe_s,neon_fp_recpe_s_q,\
+ neon_fp_recpe_d,neon_fp_recpe_d_q,\
+ neon_fp_cvt_narrow_s_q,\
+ crypto_aese,crypto_aesmc,\
+ crypto_sha1_fast,crypto_sha1_xor,\
+ crypto_sha1_slow,\
+ crypto_sha256_fast,\
+ crypto_sha512,crypto_sm3,\
+ neon_qabs,neon_qabs_q,\
+ neon_qneg,neon_qneg_q,\
+ neon_qadd,neon_qadd_q,\
+ neon_qsub,neon_qsub_q,\
+ neon_add_halve,neon_add_halve_q,\
+ neon_sub_halve,neon_sub_halve_q,\
+ neon_fp_reduc_minmax_s,neon_fp_reduc_minmax_s_q,\
+ neon_fp_reduc_minmax_d,neon_fp_reduc_minmax_d_q,\
+ neon_fp_rsqrte_s,neon_fp_rsqrte_s_q,\
+ neon_fp_rsqrte_d,neon_fp_rsqrte_d_q"))
+ "hip11_fsu1")
+
+(define_insn_reservation "hip11_asimd_complex_arithmetic" 4
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "neon_mul_b,neon_mul_b_q,\
+ neon_mul_h,neon_mul_h_q,\
+ neon_mul_s,neon_mul_s_q,\
+ neon_mla_b,neon_mla_b_q,\
+ neon_mla_h,neon_mla_h_q,\
+ neon_mla_s,\
+ neon_mla_h_scalar,neon_mla_h_scalar_q,\
+ neon_mla_s_scalar,neon_mla_s_scalar_q,\
+ neon_sat_mul_h_scalar,neon_sat_mul_h_scalar_q,\
+ neon_sat_mul_s_scalar,neon_sat_mul_s_scalar_q,\
+ neon_sat_mul_b,neon_sat_mul_b_q,\
+ neon_sat_mul_h,neon_sat_mul_h_q,\
+ neon_sat_mul_s,neon_sat_mul_s_q,\
+ neon_mla_b_long,neon_mla_h_long,neon_mla_s_long,\
+ neon_mul_b_long,neon_mul_h_long,neon_mul_s_long,\
+ neon_sat_mla_b_long,neon_sat_mla_h_long,neon_sat_mla_s_long,\
+ neon_sat_mla_h_scalar_long,neon_sat_mla_s_scalar_long,\
+ neon_sat_mul_b_long,neon_sat_mul_h_long,neon_sat_mul_s_long,\
+ neon_sat_mul_h_scalar_long,neon_sat_mul_s_scalar_long,\
+ crypto_pmull,\
+ neon_sat_shift_reg,neon_sat_shift_reg_q,\
+ neon_shift_reg,neon_shift_reg_q,\
+ neon_shift_imm,neon_shift_imm_q,\
+ neon_shift_imm_long,\
+ neon_sat_shift_imm,neon_sat_shift_imm_q,\
+ neon_sat_shift_imm_narrow_q,\
+ neon_shift_acc,neon_shift_acc_q,\
+ crypto_sha256_slow"))
+ "hip11_fsu1")
+
+(define_insn_reservation "hip11_asimd_fp_compare" 2
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "neon_fp_abs_s,neon_fp_abs_s_q,\
+ neon_fp_abs_d,neon_fp_abs_d_q,\
+ neon_fp_neg_s,neon_fp_neg_s_q,\
+ neon_fp_neg_d,neon_fp_neg_d_q,\
+ neon_fp_compare_s,neon_fp_compare_s_q,\
+ neon_fp_compare_d,neon_fp_compare_d_q,\
+ neon_fp_minmax_s,neon_fp_minmax_s_q,\
+ neon_fp_minmax_d,neon_fp_minmax_d_q,\
+ neon_fp_addsub_s,neon_fp_addsub_s_q,\
+ neon_fp_addsub_d,neon_fp_addsub_d_q,\
+ neon_fp_reduc_add_s,neon_fp_reduc_add_s_q,\
+ neon_fp_reduc_add_d,neon_fp_reduc_add_d_q,\
+ neon_fp_abd_s,neon_fp_abd_s_q,\
+ neon_fp_abd_d,neon_fp_abd_d_q"))
+ "hip11_fsu_pipe")
+
+(define_insn_reservation "hip11_asimd_fdiv" 10
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "neon_fp_div_s,neon_fp_div_s_q,\
+ neon_fp_div_d,neon_fp_div_d_q"))
+ "hip11_fsu1")
+
+(define_insn_reservation "hip11_asimd_fsqrt" 15
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "neon_fp_sqrt_s,neon_fp_sqrt_s_q,\
+ neon_fp_sqrt_d,neon_fp_sqrt_d_q"))
+ "hip11_fsu1")
+
+(define_insn_reservation "hip11_asimd_fp_multiply_add" 4
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "neon_fp_mla_s,neon_fp_mla_s_q,\
+ neon_fp_mla_d,neon_fp_mla_d_q,\
+ neon_fp_mla_s_scalar,neon_fp_mla_s_scalar_q,\
+ neon_fp_mul_s,neon_fp_mul_s_q,neon_fcmla,\
+ neon_fp_recps_s,neon_fp_recps_s_q,\
+ neon_fp_recps_d,neon_fp_recps_d_q,\
+ neon_fp_rsqrts_s,neon_fp_rsqrts_s_q,\
+ neon_fp_rsqrts_d,neon_fp_rsqrts_d_q"))
+ "hip11_fsu_pipe")
+
+(define_insn_reservation "hip11_asimd_frecpx" 3
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "neon_fp_recpx_s,neon_fp_recpx_s_q,\
+ neon_fp_recpx_d,neon_fp_recpx_d_q,neon_tbl4,\
+ neon_dot,neon_dot_q"))
+ "hip11_fsu1")
+
+(define_insn_reservation "hip11_asimd_mmla" 6
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "neon_mla_s_q"))
+ "hip11_fsu1")
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 7ca60dd64..17d9e4126 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -19212,7 +19212,7 @@ performance of the code. Permissible values for this option are:
@samp{octeontx2}, @samp{octeontx2t98}, @samp{octeontx2t96}
@samp{octeontx2t93}, @samp{octeontx2f95}, @samp{octeontx2f95n},
@samp{octeontx2f95mm},
-@samp{a64fx},
+@samp{a64fx},@samp{hip11}
@samp{thunderx}, @samp{thunderxt88},
@samp{thunderxt88p1}, @samp{thunderxt81}, @samp{tsv110},
@samp{thunderxt83}, @samp{thunderx2t99}, @samp{thunderx3t110}, @samp{zeus},
--
2.33.0

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,34 @@
From 72c48ade495ef99ef032a6c44365eb102b74888e Mon Sep 17 00:00:00 2001
From: xiezhiheng <xiezhiheng@huawei.com>
Date: Fri, 23 Aug 2024 15:14:04 +0800
Subject: [PATCH 004/157] [SME] Remove hip09 and hip11 in aarch64-cores.def to
backport SME
Will apply it in the end.
---
gcc/config/aarch64/aarch64-cores.def | 2 --
1 file changed, 2 deletions(-)
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
index 601b72abb..70b11eb80 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -130,7 +130,6 @@ AARCH64_CORE("a64fx", a64fx, a64fx, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F
/* HiSilicon ('H') cores. */
AARCH64_CORE("tsv110", tsv110, tsv110, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
-AARCH64_CORE("hip09", hip09, hip09, 8_5A, AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_SVE | AARCH64_FL_I8MM | AARCH64_FL_F32MM | AARCH64_FL_F64MM | AARCH64_FL_PROFILE | AARCH64_FL_PREDRES, hip09, 0x48, 0xd02, 0x0)
/* ARMv8.3-A Architecture Processors. */
@@ -173,7 +172,6 @@ AARCH64_CORE("cortex-a710", cortexa710, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 |
AARCH64_CORE("cortex-x2", cortexx2, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd48, -1)
AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversen2, 0x41, 0xd49, -1)
-AARCH64_CORE("hip11", hip11, hip11, 8_5A, AARCH64_FL_FOR_ARCH8_5| AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_F16, hip11, 0x48, 0xd22, -1)
AARCH64_CORE("demeter", demeter, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1)
AARCH64_CORE("neoverse-v2", neoversev2, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1)
--
2.33.0

Some files were not shown because too many files have changed in this diff Show More