Compare commits
59 Commits
d792f61745
...
4244eb1cd9
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4244eb1cd9 | ||
|
|
f08325c08c | ||
|
|
ad1341bec0 | ||
|
|
b3f456737d | ||
|
|
8883917445 | ||
|
|
791c03733c | ||
|
|
d8e386d7fe | ||
|
|
b38cde7035 | ||
|
|
b974980003 | ||
|
|
a4deed733e | ||
|
|
05329f777b | ||
|
|
4af6572f3a | ||
|
|
d54dd2eaf0 | ||
|
|
6a68b69cf9 | ||
|
|
45ff6f68c3 | ||
|
|
8943f6328a | ||
|
|
623acebaa6 | ||
|
|
f7be2638ca | ||
|
|
80bf5380a0 | ||
|
|
a0725de2c4 | ||
|
|
a8cfa61489 | ||
|
|
4d72b8e986 | ||
|
|
406b179b49 | ||
|
|
552f089b68 | ||
|
|
3e4b4fcc87 | ||
|
|
b76688231e | ||
|
|
99903bd672 | ||
|
|
590b980ffc | ||
|
|
f3670876e5 | ||
|
|
e5ad654e46 | ||
|
|
069f629df7 | ||
|
|
520e062268 | ||
|
|
aca9c09071 | ||
|
|
1087136411 | ||
|
|
88f3e45ed8 | ||
|
|
a5110dd911 | ||
|
|
51f4a7d312 | ||
|
|
6d7af1becf | ||
|
|
700751006e | ||
|
|
ede988ff44 | ||
|
|
9327ad0fab | ||
|
|
1a7a96c62f | ||
|
|
73499e9115 | ||
|
|
a8517f9424 | ||
|
|
29111d6e55 | ||
|
|
5b03eda1dd | ||
|
|
cb05b56921 | ||
|
|
df9493e61b | ||
|
|
ce0a2fbef7 | ||
|
|
80348b01a4 | ||
|
|
f6bb1b6dd7 | ||
|
|
410a2c1e56 | ||
|
|
ab7ce29778 | ||
|
|
b78fcfa91d | ||
|
|
5eae06c472 | ||
|
|
e8b73631a6 | ||
|
|
190a438b45 | ||
|
|
dea14417fd | ||
|
|
d72a7d64b1 |
9915
0021-Add-BiSheng-Autotuner-support-for-LLVM-compiler.patch
Normal file
9915
0021-Add-BiSheng-Autotuner-support-for-LLVM-compiler.patch
Normal file
File diff suppressed because it is too large
Load Diff
517
0023-AArch64-Support-HiSilicon-s-HIP09-Processor.patch
Normal file
517
0023-AArch64-Support-HiSilicon-s-HIP09-Processor.patch
Normal file
@ -0,0 +1,517 @@
|
||||
From cac43828d26b178807d194b4bd7c5df69603df29 Mon Sep 17 00:00:00 2001
|
||||
From: xiajingze <xiajingze1@huawei.com>
|
||||
Date: Wed, 31 Jul 2024 18:37:29 +0800
|
||||
Subject: [PATCH] [AArch64] Support HiSilicon's HIP09 Processor
|
||||
|
||||
Signed-off-by: xiajingze <xiajingze1@huawei.com>
|
||||
---
|
||||
llvm/cmake/modules/HandleLLVMOptions.cmake | 8 ++
|
||||
.../llvm/TargetParser/AArch64TargetParser.h | 7 ++
|
||||
llvm/lib/Target/AArch64/AArch64.td | 36 +++++++
|
||||
.../lib/Target/AArch64/AArch64MacroFusion.cpp | 55 +++++++++++
|
||||
llvm/lib/Target/AArch64/AArch64Subtarget.cpp | 9 ++
|
||||
llvm/lib/Target/AArch64/AArch64Subtarget.h | 9 +-
|
||||
llvm/lib/Target/CMakeLists.txt | 4 +
|
||||
llvm/lib/TargetParser/Host.cpp | 3 +
|
||||
llvm/test/CodeGen/AArch64/cpus-hip09.ll | 11 +++
|
||||
.../CodeGen/AArch64/macro-fusion-mvnclz.mir | 20 ++++
|
||||
.../AArch64/misched-fusion-lit-hip09.ll | 73 ++++++++++++++
|
||||
llvm/test/CodeGen/AArch64/remat-hip09.ll | 18 ++++
|
||||
llvm/test/lit.site.cfg.py.in | 4 +
|
||||
llvm/unittests/TargetParser/Host.cpp | 5 +
|
||||
.../TargetParser/TargetParserTest.cpp | 16 +++
|
||||
15 files changed, 277 insertions(+), 1 deletion(-)
|
||||
create mode 100644 llvm/test/CodeGen/AArch64/cpus-hip09.ll
|
||||
create mode 100644 llvm/test/CodeGen/AArch64/macro-fusion-mvnclz.mir
|
||||
create mode 100644 llvm/test/CodeGen/AArch64/misched-fusion-lit-hip09.ll
|
||||
create mode 100644 llvm/test/CodeGen/AArch64/remat-hip09.ll
|
||||
|
||||
diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake
|
||||
index 8be5d4ba5..74e68e25d 100644
|
||||
--- a/llvm/cmake/modules/HandleLLVMOptions.cmake
|
||||
+++ b/llvm/cmake/modules/HandleLLVMOptions.cmake
|
||||
@@ -112,6 +112,14 @@ else()
|
||||
set(LLVM_ENABLE_AUTOTUNER 0)
|
||||
endif()
|
||||
|
||||
+option(LLVM_ENABLE_AARCH64_HIP09 "Enable HIP09 Processor" ON)
|
||||
+if(LLVM_ENABLE_AARCH64_HIP09)
|
||||
+ set(LLVM_ENABLE_AARCH64_HIP09 1)
|
||||
+ add_definitions( -DENABLE_AARCH64_HIP09 )
|
||||
+else()
|
||||
+ set(LLVM_ENABLE_AARCH64_HIP09 0)
|
||||
+endif()
|
||||
+
|
||||
if(LLVM_ENABLE_EXPENSIVE_CHECKS)
|
||||
add_compile_definitions(EXPENSIVE_CHECKS)
|
||||
|
||||
diff --git a/llvm/include/llvm/TargetParser/AArch64TargetParser.h b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
|
||||
index dc4cdfa8e..07cd2fcbb 100644
|
||||
--- a/llvm/include/llvm/TargetParser/AArch64TargetParser.h
|
||||
+++ b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
|
||||
@@ -542,6 +542,13 @@ inline constexpr CpuInfo CpuInfos[] = {
|
||||
(AArch64::AEK_FP16 | AArch64::AEK_RAND | AArch64::AEK_SM4 |
|
||||
AArch64::AEK_SHA3 | AArch64::AEK_SHA2 | AArch64::AEK_AES |
|
||||
AArch64::AEK_MTE | AArch64::AEK_SB | AArch64::AEK_SSBS)},
|
||||
+#if defined(ENABLE_AARCH64_HIP09)
|
||||
+ {"hip09", ARMV8_5A,
|
||||
+ (AArch64::AEK_AES | AArch64::AEK_SM4 | AArch64::AEK_SHA2 |
|
||||
+ AArch64::AEK_SHA3 | AArch64::AEK_FP16 | AArch64::AEK_PROFILE |
|
||||
+ AArch64::AEK_FP16FML | AArch64::AEK_SVE | AArch64::AEK_I8MM |
|
||||
+ AArch64::AEK_F32MM | AArch64::AEK_F64MM | AArch64::AEK_BF16)},
|
||||
+#endif
|
||||
};
|
||||
|
||||
// An alias for a CPU.
|
||||
diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td
|
||||
index 8f50af4b7..c8bfd770f 100644
|
||||
--- a/llvm/lib/Target/AArch64/AArch64.td
|
||||
+++ b/llvm/lib/Target/AArch64/AArch64.td
|
||||
@@ -296,6 +296,12 @@ def FeatureFuseAddSub2RegAndConstOne : SubtargetFeature<
|
||||
"fuse-addsub-2reg-const1", "HasFuseAddSub2RegAndConstOne", "true",
|
||||
"CPU fuses (a + b + 1) and (a - b - 1)">;
|
||||
|
||||
+#ifdef ENABLE_AARCH64_HIP09
|
||||
+def FeatureFuseMvnClz : SubtargetFeature<
|
||||
+ "fuse-mvn-clz", "HasFuseMvnClz", "true",
|
||||
+ "CPU fuses mvn+clz operations">;
|
||||
+#endif
|
||||
+
|
||||
def FeatureDisableLatencySchedHeuristic : SubtargetFeature<
|
||||
"disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true",
|
||||
"Disable latency scheduling heuristic">;
|
||||
@@ -1205,6 +1211,21 @@ def TuneTSV110 : SubtargetFeature<"tsv110", "ARMProcFamily", "TSV110",
|
||||
FeatureFuseAES,
|
||||
FeaturePostRAScheduler]>;
|
||||
|
||||
+#ifdef ENABLE_AARCH64_HIP09
|
||||
+def TuneHIP09 : SubtargetFeature<"hip09", "ARMProcFamily", "HIP09",
|
||||
+ "HiSilicon HIP-09 processors", [
|
||||
+ FeatureCustomCheapAsMoveHandling,
|
||||
+ FeatureExperimentalZeroingPseudos,
|
||||
+ FeatureFuseAES,
|
||||
+ FeatureLSLFast,
|
||||
+ FeatureAscendStoreAddress,
|
||||
+ FeatureCmpBccFusion,
|
||||
+ FeatureArithmeticBccFusion,
|
||||
+ FeatureFuseLiterals,
|
||||
+ FeatureFuseMvnClz,
|
||||
+ FeaturePostRAScheduler]>;
|
||||
+#endif
|
||||
+
|
||||
def TuneAmpere1 : SubtargetFeature<"ampere1", "ARMProcFamily", "Ampere1",
|
||||
"Ampere Computing Ampere-1 processors", [
|
||||
FeaturePostRAScheduler,
|
||||
@@ -1359,6 +1380,14 @@ def ProcessorFeatures {
|
||||
list<SubtargetFeature> TSV110 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8,
|
||||
FeatureNEON, FeaturePerfMon, FeatureSPE,
|
||||
FeatureFullFP16, FeatureFP16FML, FeatureDotProd];
|
||||
+#ifdef ENABLE_AARCH64_HIP09
|
||||
+ list<SubtargetFeature> HIP09 = [HasV8_5aOps, FeatureBF16, FeatureCrypto, FeatureFPARMv8,
|
||||
+ FeatureMatMulInt8, FeatureMatMulFP32, FeatureMatMulFP64,
|
||||
+ FeatureNEON, FeaturePerfMon, FeatureRandGen, FeatureSPE,
|
||||
+ FeatureFullFP16, FeatureFP16FML, FeatureDotProd,
|
||||
+ FeatureJS, FeatureComplxNum, FeatureSHA3, FeatureSM4,
|
||||
+ FeatureSVE];
|
||||
+#endif
|
||||
list<SubtargetFeature> Ampere1 = [HasV8_6aOps, FeatureNEON, FeaturePerfMon,
|
||||
FeatureSSBS, FeatureRandGen, FeatureSB,
|
||||
FeatureSHA2, FeatureSHA3, FeatureAES];
|
||||
@@ -1464,8 +1493,15 @@ def : ProcessorModel<"thunderx2t99", ThunderX2T99Model,
|
||||
// Marvell ThunderX3T110 Processors.
|
||||
def : ProcessorModel<"thunderx3t110", ThunderX3T110Model,
|
||||
ProcessorFeatures.ThunderX3T110, [TuneThunderX3T110]>;
|
||||
+
|
||||
+// HiSilicon Processors.
|
||||
def : ProcessorModel<"tsv110", TSV110Model, ProcessorFeatures.TSV110,
|
||||
[TuneTSV110]>;
|
||||
+#ifdef ENABLE_AARCH64_HIP09
|
||||
+// FIXME: HiSilicon HIP09 is currently modeled as a Cortex-A57.
|
||||
+def : ProcessorModel<"hip09", CortexA57Model, ProcessorFeatures.HIP09,
|
||||
+ [TuneHIP09]>;
|
||||
+#endif
|
||||
|
||||
// Support cyclone as an alias for apple-a7 so we can still LTO old bitcode.
|
||||
def : ProcessorModel<"cyclone", CycloneModel, ProcessorFeatures.AppleA7,
|
||||
diff --git a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
|
||||
index 05d60872b..4963ec350 100644
|
||||
--- a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
|
||||
+++ b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
|
||||
@@ -51,6 +51,12 @@ static bool isArithmeticBccPair(const MachineInstr *FirstMI,
|
||||
case AArch64::SUBSXrr:
|
||||
case AArch64::BICSWrr:
|
||||
case AArch64::BICSXrr:
|
||||
+#if defined(ENABLE_AARCH64_HIP09)
|
||||
+ case AArch64::ADCSWr:
|
||||
+ case AArch64::ADCSXr:
|
||||
+ case AArch64::SBCSWr:
|
||||
+ case AArch64::SBCSXr:
|
||||
+#endif
|
||||
return true;
|
||||
case AArch64::ADDSWrs:
|
||||
case AArch64::ADDSXrs:
|
||||
@@ -183,6 +189,20 @@ static bool isLiteralsPair(const MachineInstr *FirstMI,
|
||||
SecondMI.getOperand(3).getImm() == 16))
|
||||
return true;
|
||||
|
||||
+#if defined(ENABLE_AARCH64_HIP09)
|
||||
+ // 32 bit immediate.
|
||||
+ if ((FirstMI == nullptr || FirstMI->getOpcode() == AArch64::MOVNWi) &&
|
||||
+ (SecondMI.getOpcode() == AArch64::MOVKWi &&
|
||||
+ SecondMI.getOperand(3).getImm() == 16))
|
||||
+ return true;
|
||||
+
|
||||
+ // Lower half of 64 bit immediate.
|
||||
+ if ((FirstMI == nullptr || FirstMI->getOpcode() == AArch64::MOVNXi) &&
|
||||
+ (SecondMI.getOpcode() == AArch64::MOVKWi &&
|
||||
+ SecondMI.getOperand(3).getImm() == 16))
|
||||
+ return true;
|
||||
+#endif
|
||||
+
|
||||
// Upper half of 64 bit immediate.
|
||||
if ((FirstMI == nullptr ||
|
||||
(FirstMI->getOpcode() == AArch64::MOVKXi &&
|
||||
@@ -437,6 +457,37 @@ static bool isAddSub2RegAndConstOnePair(const MachineInstr *FirstMI,
|
||||
return false;
|
||||
}
|
||||
|
||||
+#if defined(ENABLE_AARCH64_HIP09)
|
||||
+static bool isMvnClzPair(const MachineInstr *FirstMI,
|
||||
+ const MachineInstr &SecondMI) {
|
||||
+ // HIP09 supports fusion of MVN + CLZ.
|
||||
+ // The CLZ can be fused with MVN and make execution faster.
|
||||
+ // And the fusion is not allowed for shifted forms.
|
||||
+ //
|
||||
+ // Instruction alias info:
|
||||
+ // 1. MVN <Wd>, <Wm>{, <shift> #<amount>} is equivalent to
|
||||
+ // ORN <Wd>, WZR, <Wm>{, <shift> #<amount>}
|
||||
+ // 2. MVN <Xd>, <Xm>{, <shift> #<amount>} is equivalent to
|
||||
+ // ORN <Xd>, XZR, <Xm>{, <shift> #<amount>}
|
||||
+ // Assume the 1st instr to be a wildcard if it is unspecified.
|
||||
+ if ((FirstMI == nullptr ||
|
||||
+ ((FirstMI->getOpcode() == AArch64::ORNWrs) &&
|
||||
+ (FirstMI->getOperand(1).getReg() == AArch64::WZR) &&
|
||||
+ (!AArch64InstrInfo::hasShiftedReg(*FirstMI)))) &&
|
||||
+ (SecondMI.getOpcode() == AArch64::CLZWr))
|
||||
+ return true;
|
||||
+
|
||||
+ if ((FirstMI == nullptr ||
|
||||
+ ((FirstMI->getOpcode() == AArch64::ORNXrs) &&
|
||||
+ (FirstMI->getOperand(1).getReg() == AArch64::XZR) &&
|
||||
+ (!AArch64InstrInfo::hasShiftedReg(*FirstMI)))) &&
|
||||
+ (SecondMI.getOpcode() == AArch64::CLZXr))
|
||||
+ return true;
|
||||
+
|
||||
+ return false;
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
/// \brief Check if the instr pair, FirstMI and SecondMI, should be fused
|
||||
/// together. Given SecondMI, when FirstMI is unspecified, then check if
|
||||
/// SecondMI may be part of a fused pair at all.
|
||||
@@ -472,6 +523,10 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
|
||||
if (ST.hasFuseAddSub2RegAndConstOne() &&
|
||||
isAddSub2RegAndConstOnePair(FirstMI, SecondMI))
|
||||
return true;
|
||||
+#if defined(ENABLE_AARCH64_HIP09)
|
||||
+ if (ST.hasFuseMvnClz() && isMvnClzPair(FirstMI, SecondMI))
|
||||
+ return true;
|
||||
+#endif
|
||||
|
||||
return false;
|
||||
}
|
||||
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
|
||||
index 450e27b8a..ddf22364c 100644
|
||||
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
|
||||
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
|
||||
@@ -266,6 +266,15 @@ void AArch64Subtarget::initializeProperties() {
|
||||
PrefFunctionAlignment = Align(16);
|
||||
PrefLoopAlignment = Align(4);
|
||||
break;
|
||||
+#if defined(ENABLE_AARCH64_HIP09)
|
||||
+ case HIP09:
|
||||
+ CacheLineSize = 64;
|
||||
+ PrefFunctionAlignment = Align(16);
|
||||
+ PrefLoopAlignment = Align(4);
|
||||
+ VScaleForTuning = 2;
|
||||
+ DefaultSVETFOpts = TailFoldingOpts::Simple;
|
||||
+ break;
|
||||
+#endif
|
||||
case ThunderX3T110:
|
||||
CacheLineSize = 64;
|
||||
PrefFunctionAlignment = Align(16);
|
||||
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
|
||||
index 5e20d1646..5f481f4f9 100644
|
||||
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
|
||||
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
|
||||
@@ -87,7 +87,10 @@ public:
|
||||
ThunderXT83,
|
||||
ThunderXT88,
|
||||
ThunderX3T110,
|
||||
- TSV110
|
||||
+ TSV110,
|
||||
+#if defined(ENABLE_AARCH64_HIP09)
|
||||
+ HIP09
|
||||
+#endif
|
||||
};
|
||||
|
||||
protected:
|
||||
@@ -239,7 +242,11 @@ public:
|
||||
bool hasFusion() const {
|
||||
return hasArithmeticBccFusion() || hasArithmeticCbzFusion() ||
|
||||
hasFuseAES() || hasFuseArithmeticLogic() || hasFuseCCSelect() ||
|
||||
+#if defined(ENABLE_AARCH64_HIP09)
|
||||
+ hasFuseAdrpAdd() || hasFuseLiterals() || hasFuseMvnClz();
|
||||
+#else
|
||||
hasFuseAdrpAdd() || hasFuseLiterals();
|
||||
+#endif
|
||||
}
|
||||
|
||||
unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
|
||||
diff --git a/llvm/lib/Target/CMakeLists.txt b/llvm/lib/Target/CMakeLists.txt
|
||||
index 2739233f9..501ce1f2f 100644
|
||||
--- a/llvm/lib/Target/CMakeLists.txt
|
||||
+++ b/llvm/lib/Target/CMakeLists.txt
|
||||
@@ -2,6 +2,10 @@ list(APPEND LLVM_COMMON_DEPENDS intrinsics_gen)
|
||||
|
||||
list(APPEND LLVM_TABLEGEN_FLAGS -I ${LLVM_MAIN_SRC_DIR}/lib/Target)
|
||||
|
||||
+if(LLVM_ENABLE_AARCH64_HIP09)
|
||||
+ list(APPEND LLVM_TABLEGEN_FLAGS "-DENABLE_AARCH64_HIP09")
|
||||
+endif()
|
||||
+
|
||||
add_llvm_component_library(LLVMTarget
|
||||
Target.cpp
|
||||
TargetIntrinsicInfo.cpp
|
||||
diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp
|
||||
index d11dc605e..8b23be02e 100644
|
||||
--- a/llvm/lib/TargetParser/Host.cpp
|
||||
+++ b/llvm/lib/TargetParser/Host.cpp
|
||||
@@ -257,6 +257,9 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
|
||||
// contents are specified in the various processor manuals.
|
||||
return StringSwitch<const char *>(Part)
|
||||
.Case("0xd01", "tsv110")
|
||||
+#if defined(ENABLE_AARCH64_HIP09)
|
||||
+ .Case("0xd02", "hip09")
|
||||
+#endif
|
||||
.Default("generic");
|
||||
|
||||
if (Implementer == "0x51") // Qualcomm Technologies, Inc.
|
||||
diff --git a/llvm/test/CodeGen/AArch64/cpus-hip09.ll b/llvm/test/CodeGen/AArch64/cpus-hip09.ll
|
||||
new file mode 100644
|
||||
index 000000000..dcf32e4dc
|
||||
--- /dev/null
|
||||
+++ b/llvm/test/CodeGen/AArch64/cpus-hip09.ll
|
||||
@@ -0,0 +1,11 @@
|
||||
+; REQUIRES: enable_enable_aarch64_hip09
|
||||
+; This tests that llc accepts all valid AArch64 CPUs
|
||||
+
|
||||
+; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=hip09 2>&1 | FileCheck %s
|
||||
+
|
||||
+; CHECK-NOT: {{.*}} is not a recognized processor for this target
|
||||
+; INVALID: {{.*}} is not a recognized processor for this target
|
||||
+
|
||||
+define i32 @f(i64 %z) {
|
||||
+ ret i32 0
|
||||
+}
|
||||
diff --git a/llvm/test/CodeGen/AArch64/macro-fusion-mvnclz.mir b/llvm/test/CodeGen/AArch64/macro-fusion-mvnclz.mir
|
||||
new file mode 100644
|
||||
index 000000000..64bf15937
|
||||
--- /dev/null
|
||||
+++ b/llvm/test/CodeGen/AArch64/macro-fusion-mvnclz.mir
|
||||
@@ -0,0 +1,20 @@
|
||||
+# REQUIRES: enable_enable_aarch64_hip09
|
||||
+# RUN: llc -o - %s -mtriple=aarch64-- -mattr=+fuse-mvn-clz -run-pass postmisched | FileCheck %s --check-prefixes=CHECK,FUSION
|
||||
+# RUN: llc -o - %s -mtriple=aarch64-- -mattr=-fuse-mvn-clz -run-pass postmisched | FileCheck %s --check-prefixes=CHECK,NOFUSION
|
||||
+---
|
||||
+# CHECK-LABEL: name: fuse-mvn-clz
|
||||
+# CHECK: $w2 = ORNWrs $wzr, $w1, 0
|
||||
+# FUSION: $w0 = CLZWr killed renamable $w2
|
||||
+# CHECK: $w3 = ADDWri killed renamable $w1, 1, 0
|
||||
+# NOFUSION: $w0 = CLZWr killed renamable $w2
|
||||
+name: fuse-mvn-clz
|
||||
+tracksRegLiveness: true
|
||||
+body: |
|
||||
+ bb.0:
|
||||
+ liveins: $w0, $w1, $w2, $w3
|
||||
+
|
||||
+ $w2 = ORNWrs $wzr, $w1, 0
|
||||
+ $w3 = ADDWri killed renamable $w1, 1, 0
|
||||
+ $w0 = CLZWr killed renamable $w2
|
||||
+ RET undef $lr, implicit $w0
|
||||
+...
|
||||
diff --git a/llvm/test/CodeGen/AArch64/misched-fusion-lit-hip09.ll b/llvm/test/CodeGen/AArch64/misched-fusion-lit-hip09.ll
|
||||
new file mode 100644
|
||||
index 000000000..d67fa5b43
|
||||
--- /dev/null
|
||||
+++ b/llvm/test/CodeGen/AArch64/misched-fusion-lit-hip09.ll
|
||||
@@ -0,0 +1,73 @@
|
||||
+; REQUIRES: enable_enable_aarch64_hip09
|
||||
+; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=hip09 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSE-HIP09
|
||||
+
|
||||
+@g = common local_unnamed_addr global ptr null, align 8
|
||||
+
|
||||
+define dso_local ptr @litp(i32 %a, i32 %b) {
|
||||
+entry:
|
||||
+ %add = add nsw i32 %b, %a
|
||||
+ %idx.ext = sext i32 %add to i64
|
||||
+ %add.ptr = getelementptr i8, ptr @litp, i64 %idx.ext
|
||||
+ store ptr %add.ptr, ptr @g, align 8
|
||||
+ ret ptr %add.ptr
|
||||
+
|
||||
+; CHECK-LABEL: litp:
|
||||
+; CHECK: adrp [[R:x[0-9]+]], litp
|
||||
+; CHECKFUSE-NEXT: add {{x[0-9]+}}, [[R]], :lo12:litp
|
||||
+}
|
||||
+
|
||||
+define dso_local ptr @litp_tune_generic(i32 %a, i32 %b) "tune-cpu"="generic" {
|
||||
+entry:
|
||||
+ %add = add nsw i32 %b, %a
|
||||
+ %idx.ext = sext i32 %add to i64
|
||||
+ %add.ptr = getelementptr i8, ptr @litp_tune_generic, i64 %idx.ext
|
||||
+ store ptr %add.ptr, ptr @g, align 8
|
||||
+ ret ptr %add.ptr
|
||||
+
|
||||
+; CHECK-LABEL: litp_tune_generic:
|
||||
+; CHECK: adrp [[R:x[0-9]+]], litp_tune_generic
|
||||
+; CHECK-NEXT: add {{x[0-9]+}}, [[R]], :lo12:litp_tune_generic
|
||||
+}
|
||||
+
|
||||
+define dso_local i32 @liti(i32 %a, i32 %b) {
|
||||
+entry:
|
||||
+ %add = add i32 %a, -262095121
|
||||
+ %add1 = add i32 %add, %b
|
||||
+ ret i32 %add1
|
||||
+
|
||||
+; CHECK-LABEL: liti:
|
||||
+; CHECK: mov [[R:w[0-9]+]], {{#[0-9]+}}
|
||||
+; CHECKDONT-NEXT: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
|
||||
+; CHECKFUSE-NEXT: movk [[R]], {{#[0-9]+}}, lsl #16
|
||||
+; CHECKFUSE-HIP09: movk [[R]], {{#[0-9]+}}, lsl #16
|
||||
+}
|
||||
+
|
||||
+; Function Attrs: norecurse nounwind readnone
|
||||
+define dso_local i64 @litl(i64 %a, i64 %b) {
|
||||
+entry:
|
||||
+ %add = add i64 %a, 2208998440489107183
|
||||
+ %add1 = add i64 %add, %b
|
||||
+ ret i64 %add1
|
||||
+
|
||||
+; CHECK-LABEL: litl:
|
||||
+; CHECK: mov [[R:x[0-9]+]], {{#[0-9]+}}
|
||||
+; CHECKDONT-NEXT: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
|
||||
+; CHECK-NEXT: movk [[R]], {{#[0-9]+}}, lsl #16
|
||||
+; CHECK: movk [[R]], {{#[0-9]+}}, lsl #32
|
||||
+; CHECK-NEXT: movk [[R]], {{#[0-9]+}}, lsl #48
|
||||
+}
|
||||
+
|
||||
+; Function Attrs: norecurse nounwind readnone
|
||||
+define dso_local double @litf() {
|
||||
+entry:
|
||||
+ ret double 0x400921FB54442D18
|
||||
+
|
||||
+; CHECK-LABEL: litf:
|
||||
+; CHECK-DONT: adrp [[ADDR:x[0-9]+]], [[CSTLABEL:.LCP.*]]
|
||||
+; CHECK-DONT-NEXT: ldr {{d[0-9]+}}, {{[[]}}[[ADDR]], :lo12:[[CSTLABEL]]{{[]]}}
|
||||
+; CHECKFUSE-HIP09: mov [[R:x[0-9]+]], #11544
|
||||
+; CHECKFUSE-HIP09: movk [[R]], #21572, lsl #16
|
||||
+; CHECKFUSE-HIP09: movk [[R]], #8699, lsl #32
|
||||
+; CHECKFUSE-HIP09: movk [[R]], #16393, lsl #48
|
||||
+; CHECKFUSE-HIP09: fmov {{d[0-9]+}}, [[R]]
|
||||
+}
|
||||
diff --git a/llvm/test/CodeGen/AArch64/remat-hip09.ll b/llvm/test/CodeGen/AArch64/remat-hip09.ll
|
||||
new file mode 100644
|
||||
index 000000000..aec0d18ae
|
||||
--- /dev/null
|
||||
+++ b/llvm/test/CodeGen/AArch64/remat-hip09.ll
|
||||
@@ -0,0 +1,18 @@
|
||||
+; REQUIRES: enable_enable_aarch64_hip09
|
||||
+; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=hip09 -o - %s | FileCheck %s
|
||||
+
|
||||
+%X = type { i64, i64, i64 }
|
||||
+declare void @f(ptr)
|
||||
+define void @t() {
|
||||
+entry:
|
||||
+ %tmp = alloca %X
|
||||
+ call void @f(ptr %tmp)
|
||||
+; CHECK: add x0, sp, #8
|
||||
+; CHECK-NOT: mov
|
||||
+; CHECK-NEXT: bl f
|
||||
+ call void @f(ptr %tmp)
|
||||
+; CHECK: add x0, sp, #8
|
||||
+; CHECK-NOT: mov
|
||||
+; CHECK-NEXT: bl f
|
||||
+ ret void
|
||||
+}
|
||||
diff --git a/llvm/test/lit.site.cfg.py.in b/llvm/test/lit.site.cfg.py.in
|
||||
index 20c1ecca1..6145a514f 100644
|
||||
--- a/llvm/test/lit.site.cfg.py.in
|
||||
+++ b/llvm/test/lit.site.cfg.py.in
|
||||
@@ -64,9 +64,13 @@ config.have_llvm_driver = @LLVM_TOOL_LLVM_DRIVER_BUILD@
|
||||
config.use_classic_flang = @LLVM_ENABLE_CLASSIC_FLANG@
|
||||
config.enable_enable_autotuner = @LLVM_ENABLE_AUTOTUNER@
|
||||
+config.enable_enable_aarch64_hip09 = @LLVM_ENABLE_AARCH64_HIP09@
|
||||
|
||||
import lit.llvm
|
||||
lit.llvm.initialize(lit_config, config)
|
||||
|
||||
+if config.enable_enable_aarch64_hip09:
|
||||
+ config.available_features.add("enable_enable_aarch64_hip09")
|
||||
+
|
||||
# Let the main config do the real work.
|
||||
lit_config.load_config(
|
||||
config, os.path.join(config.llvm_src_root, "test/lit.cfg.py"))
|
||||
diff --git a/llvm/unittests/TargetParser/Host.cpp b/llvm/unittests/TargetParser/Host.cpp
|
||||
index 452d0326c..4b4c81514 100644
|
||||
--- a/llvm/unittests/TargetParser/Host.cpp
|
||||
+++ b/llvm/unittests/TargetParser/Host.cpp
|
||||
@@ -250,6 +250,11 @@ CPU part : 0x0a1
|
||||
EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x48\n"
|
||||
"CPU part : 0xd01"),
|
||||
"tsv110");
|
||||
+#if defined(ENABLE_AARCH64_HIP09)
|
||||
+ EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x48\n"
|
||||
+ "CPU part : 0xd02"),
|
||||
+ "hip09");
|
||||
+#endif
|
||||
|
||||
// Verify A64FX.
|
||||
const std::string A64FXProcCpuInfo = R"(
|
||||
diff --git a/llvm/unittests/TargetParser/TargetParserTest.cpp b/llvm/unittests/TargetParser/TargetParserTest.cpp
|
||||
index 741d5a2d4..94e0047e5 100644
|
||||
--- a/llvm/unittests/TargetParser/TargetParserTest.cpp
|
||||
+++ b/llvm/unittests/TargetParser/TargetParserTest.cpp
|
||||
@@ -1421,6 +1421,18 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
AArch64::AEK_PROFILE | AArch64::AEK_FP16 |
|
||||
AArch64::AEK_FP16FML | AArch64::AEK_DOTPROD,
|
||||
"8.2-A"),
|
||||
+#if defined(ENABLE_AARCH64_HIP09)
|
||||
+ ARMCPUTestParams(
|
||||
+ "hip09", "armv8.5-a", "crypto-neon-fp-armv8",
|
||||
+ AArch64::AEK_CRC | AArch64::AEK_FP | AArch64::AEK_SIMD |
|
||||
+ AArch64::AEK_RAS | AArch64::AEK_LSE | AArch64::AEK_RDM |
|
||||
+ AArch64::AEK_RCPC | AArch64::AEK_DOTPROD | AArch64::AEK_AES |
|
||||
+ AArch64::AEK_SM4 | AArch64::AEK_SHA2 | AArch64::AEK_SHA3 |
|
||||
+ AArch64::AEK_FP16 | AArch64::AEK_PROFILE |
|
||||
+ AArch64::AEK_FP16FML | AArch64::AEK_SVE | AArch64::AEK_I8MM |
|
||||
+ AArch64::AEK_F32MM | AArch64::AEK_F64MM | AArch64::AEK_BF16,
|
||||
+ "8.5-A"),
|
||||
+#endif
|
||||
ARMCPUTestParams("a64fx", "armv8.2-a", "crypto-neon-fp-armv8",
|
||||
AArch64::AEK_CRC | AArch64::AEK_AES |
|
||||
AArch64::AEK_SHA2 | AArch64::AEK_FP |
|
||||
@@ -1437,7 +1449,11 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
"8.2-A")));
|
||||
|
||||
// Note: number of CPUs includes aliases.
|
||||
+#if defined(ENABLE_AARCH64_HIP09)
|
||||
+static constexpr unsigned NumAArch64CPUArchs = 63;
|
||||
+#else
|
||||
static constexpr unsigned NumAArch64CPUArchs = 62;
|
||||
+#endif
|
||||
|
||||
TEST(TargetParserTest, testAArch64CPUArchList) {
|
||||
SmallVector<StringRef, NumAArch64CPUArchs> List;
|
||||
--
|
||||
2.19.1
|
||||
|
||||
5463
0024-Backport-LoongArch-fix-and-add-some-new-support.patch
Normal file
5463
0024-Backport-LoongArch-fix-and-add-some-new-support.patch
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,30 @@
|
||||
From cf9d549f2c40d548587f8d2d3cda0d32f13c9256 Mon Sep 17 00:00:00 2001
|
||||
From: Temperatureblock <102174059+Temperature-block@users.noreply.github.com>
|
||||
Date: Mon, 12 Aug 2024 20:06:58 +0530
|
||||
Subject: [PATCH] Simple check to ignore Inline asm fwait insertion (#101686)
|
||||
|
||||
Just a simple check to ignore Inline asm fwait insertion
|
||||
|
||||
Fixes #101613
|
||||
---
|
||||
llvm/lib/Target/X86/X86InstrInfo.cpp | 5 +++++
|
||||
1 file changed, 5 insertions(+)
|
||||
|
||||
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
|
||||
index 10a0ccdcb023..e615fa09608c 100644
|
||||
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
|
||||
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
|
||||
@@ -2947,6 +2947,11 @@ static bool isX87Reg(unsigned Reg) {
|
||||
|
||||
/// check if the instruction is X87 instruction
|
||||
bool X86::isX87Instruction(MachineInstr &MI) {
|
||||
+ // Call and inlineasm defs X87 register, so we special case it here because
|
||||
+ // otherwise calls are incorrectly flagged as x87 instructions
|
||||
+ // as a result.
|
||||
+ if (MI.isInlineAsm())
|
||||
+ return false;
|
||||
for (const MachineOperand &MO : MI.operands()) {
|
||||
if (!MO.isReg())
|
||||
continue;
|
||||
--
|
||||
Gitee
|
||||
24
0026-Add-arch-restriction-for-BiSheng-Autotuner.patch
Normal file
24
0026-Add-arch-restriction-for-BiSheng-Autotuner.patch
Normal file
@ -0,0 +1,24 @@
|
||||
From 2513e90fd317bbe5854a06213e43cdf7029c3ee2 Mon Sep 17 00:00:00 2001
|
||||
From: liyunfei <liyunfei33@huawei.com>
|
||||
Date: Tue, 5 Nov 2024 18:18:19 +0800
|
||||
Subject: [PATCH] Add arch restriction for BiSheng Autotuner
|
||||
|
||||
BiSheng Autotuner only support x86_64 and aarch64 temporarily.
|
||||
|
||||
Signed-off-by: liyunfei <liyunfei33@huawei.com>
|
||||
---
|
||||
llvm/test/AutoTuning/lit.local.cfg | 2 ++
|
||||
1 file changed, 2 insertions(+)
|
||||
|
||||
diff --git a/llvm/test/AutoTuning/lit.local.cfg b/llvm/test/AutoTuning/lit.local.cfg
|
||||
index 13b4927257ab..c48c2c9eab6f 100644
|
||||
--- a/llvm/test/AutoTuning/lit.local.cfg
|
||||
+++ b/llvm/test/AutoTuning/lit.local.cfg
|
||||
@@ -1,2 +1,4 @@
|
||||
if not config.enable_enable_autotuner:
|
||||
config.unsupported = True
|
||||
+if config.host_arch not in ["x86", "X86", 'x86_64', 'aarch64']:
|
||||
+ config.unsupported = True
|
||||
\ No newline at end of file
|
||||
--
|
||||
Gitee
|
||||
514
0027-AArch64-Delete-hip09-macro.patch
Normal file
514
0027-AArch64-Delete-hip09-macro.patch
Normal file
@ -0,0 +1,514 @@
|
||||
From 42b0d16ab1ced5720e017fa9f6059c32489ab1bd Mon Sep 17 00:00:00 2001
|
||||
From: xiajingze <xiajingze1@huawei.com>
|
||||
Date: Wed, 9 Oct 2024 17:13:49 +0800
|
||||
Subject: [PATCH] [AArch64] Delete hip09 macro
|
||||
|
||||
Signed-off-by: xiajingze <xiajingze1@huawei.com>
|
||||
---
|
||||
llvm/cmake/modules/HandleLLVMOptions.cmake | 8 --
|
||||
.../llvm/TargetParser/AArch64TargetParser.h | 2 -
|
||||
llvm/lib/Target/AArch64/AArch64.td | 8 --
|
||||
.../lib/Target/AArch64/AArch64MacroFusion.cpp | 8 --
|
||||
llvm/lib/Target/AArch64/AArch64Subtarget.cpp | 2 -
|
||||
llvm/lib/Target/AArch64/AArch64Subtarget.h | 6 --
|
||||
llvm/lib/Target/CMakeLists.txt | 4 -
|
||||
llvm/lib/TargetParser/Host.cpp | 2 -
|
||||
llvm/test/CodeGen/AArch64/cpus-hip09.ll | 11 ---
|
||||
llvm/test/CodeGen/AArch64/cpus.ll | 1 +
|
||||
.../CodeGen/AArch64/macro-fusion-mvnclz.mir | 1 -
|
||||
.../AArch64/misched-fusion-lit-hip09.ll | 73 --------------
|
||||
.../CodeGen/AArch64/misched-fusion-lit.ll | 7 ++
|
||||
llvm/test/CodeGen/AArch64/remat-hip09.ll | 18 ----
|
||||
llvm/test/CodeGen/AArch64/remat.ll | 1 +
|
||||
llvm/test/lit.site.cfg.py.in | 4 -
|
||||
llvm/unittests/TargetParser/Host.cpp | 2 -
|
||||
.../TargetParser/TargetParserTest.cpp | 6 --
|
||||
18 files changed, 9 insertions(+), 155 deletions(-)
|
||||
delete mode 100644 llvm/test/CodeGen/AArch64/cpus-hip09.ll
|
||||
delete mode 100644 llvm/test/CodeGen/AArch64/misched-fusion-lit-hip09.ll
|
||||
delete mode 100644 llvm/test/CodeGen/AArch64/remat-hip09.ll
|
||||
|
||||
diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake
|
||||
index 74e68e25d85c..8be5d4ba52c2 100644
|
||||
--- a/llvm/cmake/modules/HandleLLVMOptions.cmake
|
||||
+++ b/llvm/cmake/modules/HandleLLVMOptions.cmake
|
||||
@@ -112,14 +112,6 @@ else()
|
||||
set(LLVM_ENABLE_AUTOTUNER 0)
|
||||
endif()
|
||||
|
||||
-option(LLVM_ENABLE_AARCH64_HIP09 "Enable HIP09 Processor" ON)
|
||||
-if(LLVM_ENABLE_AARCH64_HIP09)
|
||||
- set(LLVM_ENABLE_AARCH64_HIP09 1)
|
||||
- add_definitions( -DENABLE_AARCH64_HIP09 )
|
||||
-else()
|
||||
- set(LLVM_ENABLE_AARCH64_HIP09 0)
|
||||
-endif()
|
||||
-
|
||||
if(LLVM_ENABLE_EXPENSIVE_CHECKS)
|
||||
add_compile_definitions(EXPENSIVE_CHECKS)
|
||||
|
||||
diff --git a/llvm/include/llvm/TargetParser/AArch64TargetParser.h b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
|
||||
index 07cd2fcbb68d..8b25cce0abdc 100644
|
||||
--- a/llvm/include/llvm/TargetParser/AArch64TargetParser.h
|
||||
+++ b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
|
||||
@@ -542,13 +542,11 @@ inline constexpr CpuInfo CpuInfos[] = {
|
||||
(AArch64::AEK_FP16 | AArch64::AEK_RAND | AArch64::AEK_SM4 |
|
||||
AArch64::AEK_SHA3 | AArch64::AEK_SHA2 | AArch64::AEK_AES |
|
||||
AArch64::AEK_MTE | AArch64::AEK_SB | AArch64::AEK_SSBS)},
|
||||
-#if defined(ENABLE_AARCH64_HIP09)
|
||||
{"hip09", ARMV8_5A,
|
||||
(AArch64::AEK_AES | AArch64::AEK_SM4 | AArch64::AEK_SHA2 |
|
||||
AArch64::AEK_SHA3 | AArch64::AEK_FP16 | AArch64::AEK_PROFILE |
|
||||
AArch64::AEK_FP16FML | AArch64::AEK_SVE | AArch64::AEK_I8MM |
|
||||
AArch64::AEK_F32MM | AArch64::AEK_F64MM | AArch64::AEK_BF16)},
|
||||
-#endif
|
||||
};
|
||||
|
||||
// An alias for a CPU.
|
||||
diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td
|
||||
index c8bfd770f55f..fdb931a0fe6c 100644
|
||||
--- a/llvm/lib/Target/AArch64/AArch64.td
|
||||
+++ b/llvm/lib/Target/AArch64/AArch64.td
|
||||
@@ -296,11 +296,9 @@ def FeatureFuseAddSub2RegAndConstOne : SubtargetFeature<
|
||||
"fuse-addsub-2reg-const1", "HasFuseAddSub2RegAndConstOne", "true",
|
||||
"CPU fuses (a + b + 1) and (a - b - 1)">;
|
||||
|
||||
-#ifdef ENABLE_AARCH64_HIP09
|
||||
def FeatureFuseMvnClz : SubtargetFeature<
|
||||
"fuse-mvn-clz", "HasFuseMvnClz", "true",
|
||||
"CPU fuses mvn+clz operations">;
|
||||
-#endif
|
||||
|
||||
def FeatureDisableLatencySchedHeuristic : SubtargetFeature<
|
||||
"disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true",
|
||||
@@ -1211,7 +1209,6 @@ def TuneTSV110 : SubtargetFeature<"tsv110", "ARMProcFamily", "TSV110",
|
||||
FeatureFuseAES,
|
||||
FeaturePostRAScheduler]>;
|
||||
|
||||
-#ifdef ENABLE_AARCH64_HIP09
|
||||
def TuneHIP09 : SubtargetFeature<"hip09", "ARMProcFamily", "HIP09",
|
||||
"HiSilicon HIP-09 processors", [
|
||||
FeatureCustomCheapAsMoveHandling,
|
||||
@@ -1224,7 +1221,6 @@ def TuneHIP09 : SubtargetFeature<"hip09", "ARMProcFamily", "HIP09",
|
||||
FeatureFuseLiterals,
|
||||
FeatureFuseMvnClz,
|
||||
FeaturePostRAScheduler]>;
|
||||
-#endif
|
||||
|
||||
def TuneAmpere1 : SubtargetFeature<"ampere1", "ARMProcFamily", "Ampere1",
|
||||
"Ampere Computing Ampere-1 processors", [
|
||||
@@ -1380,14 +1376,12 @@ def ProcessorFeatures {
|
||||
list<SubtargetFeature> TSV110 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8,
|
||||
FeatureNEON, FeaturePerfMon, FeatureSPE,
|
||||
FeatureFullFP16, FeatureFP16FML, FeatureDotProd];
|
||||
-#ifdef ENABLE_AARCH64_HIP09
|
||||
list<SubtargetFeature> HIP09 = [HasV8_5aOps, FeatureBF16, FeatureCrypto, FeatureFPARMv8,
|
||||
FeatureMatMulInt8, FeatureMatMulFP32, FeatureMatMulFP64,
|
||||
FeatureNEON, FeaturePerfMon, FeatureRandGen, FeatureSPE,
|
||||
FeatureFullFP16, FeatureFP16FML, FeatureDotProd,
|
||||
FeatureJS, FeatureComplxNum, FeatureSHA3, FeatureSM4,
|
||||
FeatureSVE];
|
||||
-#endif
|
||||
list<SubtargetFeature> Ampere1 = [HasV8_6aOps, FeatureNEON, FeaturePerfMon,
|
||||
FeatureSSBS, FeatureRandGen, FeatureSB,
|
||||
FeatureSHA2, FeatureSHA3, FeatureAES];
|
||||
@@ -1497,11 +1491,9 @@ def : ProcessorModel<"thunderx3t110", ThunderX3T110Model,
|
||||
// HiSilicon Processors.
|
||||
def : ProcessorModel<"tsv110", TSV110Model, ProcessorFeatures.TSV110,
|
||||
[TuneTSV110]>;
|
||||
-#ifdef ENABLE_AARCH64_HIP09
|
||||
// FIXME: HiSilicon HIP09 is currently modeled as a Cortex-A57.
|
||||
def : ProcessorModel<"hip09", CortexA57Model, ProcessorFeatures.HIP09,
|
||||
[TuneHIP09]>;
|
||||
-#endif
|
||||
|
||||
// Support cyclone as an alias for apple-a7 so we can still LTO old bitcode.
|
||||
def : ProcessorModel<"cyclone", CycloneModel, ProcessorFeatures.AppleA7,
|
||||
diff --git a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
|
||||
index 4963ec350db2..44daa06468c5 100644
|
||||
--- a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
|
||||
+++ b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
|
||||
@@ -51,12 +51,10 @@ static bool isArithmeticBccPair(const MachineInstr *FirstMI,
|
||||
case AArch64::SUBSXrr:
|
||||
case AArch64::BICSWrr:
|
||||
case AArch64::BICSXrr:
|
||||
-#if defined(ENABLE_AARCH64_HIP09)
|
||||
case AArch64::ADCSWr:
|
||||
case AArch64::ADCSXr:
|
||||
case AArch64::SBCSWr:
|
||||
case AArch64::SBCSXr:
|
||||
-#endif
|
||||
return true;
|
||||
case AArch64::ADDSWrs:
|
||||
case AArch64::ADDSXrs:
|
||||
@@ -189,7 +187,6 @@ static bool isLiteralsPair(const MachineInstr *FirstMI,
|
||||
SecondMI.getOperand(3).getImm() == 16))
|
||||
return true;
|
||||
|
||||
-#if defined(ENABLE_AARCH64_HIP09)
|
||||
// 32 bit immediate.
|
||||
if ((FirstMI == nullptr || FirstMI->getOpcode() == AArch64::MOVNWi) &&
|
||||
(SecondMI.getOpcode() == AArch64::MOVKWi &&
|
||||
@@ -201,7 +198,6 @@ static bool isLiteralsPair(const MachineInstr *FirstMI,
|
||||
(SecondMI.getOpcode() == AArch64::MOVKWi &&
|
||||
SecondMI.getOperand(3).getImm() == 16))
|
||||
return true;
|
||||
-#endif
|
||||
|
||||
// Upper half of 64 bit immediate.
|
||||
if ((FirstMI == nullptr ||
|
||||
@@ -457,7 +453,6 @@ static bool isAddSub2RegAndConstOnePair(const MachineInstr *FirstMI,
|
||||
return false;
|
||||
}
|
||||
|
||||
-#if defined(ENABLE_AARCH64_HIP09)
|
||||
static bool isMvnClzPair(const MachineInstr *FirstMI,
|
||||
const MachineInstr &SecondMI) {
|
||||
// HIP09 supports fusion of MVN + CLZ.
|
||||
@@ -486,7 +481,6 @@ static bool isMvnClzPair(const MachineInstr *FirstMI,
|
||||
|
||||
return false;
|
||||
}
|
||||
-#endif
|
||||
|
||||
/// \brief Check if the instr pair, FirstMI and SecondMI, should be fused
|
||||
/// together. Given SecondMI, when FirstMI is unspecified, then check if
|
||||
@@ -523,10 +517,8 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
|
||||
if (ST.hasFuseAddSub2RegAndConstOne() &&
|
||||
isAddSub2RegAndConstOnePair(FirstMI, SecondMI))
|
||||
return true;
|
||||
-#if defined(ENABLE_AARCH64_HIP09)
|
||||
if (ST.hasFuseMvnClz() && isMvnClzPair(FirstMI, SecondMI))
|
||||
return true;
|
||||
-#endif
|
||||
|
||||
return false;
|
||||
}
|
||||
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
|
||||
index ddf22364c78e..1aff7e30a0cf 100644
|
||||
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
|
||||
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
|
||||
@@ -266,7 +266,6 @@ void AArch64Subtarget::initializeProperties() {
|
||||
PrefFunctionAlignment = Align(16);
|
||||
PrefLoopAlignment = Align(4);
|
||||
break;
|
||||
-#if defined(ENABLE_AARCH64_HIP09)
|
||||
case HIP09:
|
||||
CacheLineSize = 64;
|
||||
PrefFunctionAlignment = Align(16);
|
||||
@@ -274,7 +273,6 @@ void AArch64Subtarget::initializeProperties() {
|
||||
VScaleForTuning = 2;
|
||||
DefaultSVETFOpts = TailFoldingOpts::Simple;
|
||||
break;
|
||||
-#endif
|
||||
case ThunderX3T110:
|
||||
CacheLineSize = 64;
|
||||
PrefFunctionAlignment = Align(16);
|
||||
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
|
||||
index 5f481f4f976a..8a1cebe96894 100644
|
||||
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
|
||||
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
|
||||
@@ -88,9 +88,7 @@ public:
|
||||
ThunderXT88,
|
||||
ThunderX3T110,
|
||||
TSV110,
|
||||
-#if defined(ENABLE_AARCH64_HIP09)
|
||||
HIP09
|
||||
-#endif
|
||||
};
|
||||
|
||||
protected:
|
||||
@@ -242,11 +240,7 @@ public:
|
||||
bool hasFusion() const {
|
||||
return hasArithmeticBccFusion() || hasArithmeticCbzFusion() ||
|
||||
hasFuseAES() || hasFuseArithmeticLogic() || hasFuseCCSelect() ||
|
||||
-#if defined(ENABLE_AARCH64_HIP09)
|
||||
hasFuseAdrpAdd() || hasFuseLiterals() || hasFuseMvnClz();
|
||||
-#else
|
||||
- hasFuseAdrpAdd() || hasFuseLiterals();
|
||||
-#endif
|
||||
}
|
||||
|
||||
unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
|
||||
diff --git a/llvm/lib/Target/CMakeLists.txt b/llvm/lib/Target/CMakeLists.txt
|
||||
index 501ce1f2fe53..2739233f9ccb 100644
|
||||
--- a/llvm/lib/Target/CMakeLists.txt
|
||||
+++ b/llvm/lib/Target/CMakeLists.txt
|
||||
@@ -2,10 +2,6 @@ list(APPEND LLVM_COMMON_DEPENDS intrinsics_gen)
|
||||
|
||||
list(APPEND LLVM_TABLEGEN_FLAGS -I ${LLVM_MAIN_SRC_DIR}/lib/Target)
|
||||
|
||||
-if(LLVM_ENABLE_AARCH64_HIP09)
|
||||
- list(APPEND LLVM_TABLEGEN_FLAGS "-DENABLE_AARCH64_HIP09")
|
||||
-endif()
|
||||
-
|
||||
add_llvm_component_library(LLVMTarget
|
||||
Target.cpp
|
||||
TargetIntrinsicInfo.cpp
|
||||
diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp
|
||||
index 8b23be02edc0..8b1191a5b442 100644
|
||||
--- a/llvm/lib/TargetParser/Host.cpp
|
||||
+++ b/llvm/lib/TargetParser/Host.cpp
|
||||
@@ -257,9 +257,7 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
|
||||
// contents are specified in the various processor manuals.
|
||||
return StringSwitch<const char *>(Part)
|
||||
.Case("0xd01", "tsv110")
|
||||
-#if defined(ENABLE_AARCH64_HIP09)
|
||||
.Case("0xd02", "hip09")
|
||||
-#endif
|
||||
.Default("generic");
|
||||
|
||||
if (Implementer == "0x51") // Qualcomm Technologies, Inc.
|
||||
diff --git a/llvm/test/CodeGen/AArch64/cpus-hip09.ll b/llvm/test/CodeGen/AArch64/cpus-hip09.ll
|
||||
deleted file mode 100644
|
||||
index dcf32e4dca89..000000000000
|
||||
--- a/llvm/test/CodeGen/AArch64/cpus-hip09.ll
|
||||
+++ /dev/null
|
||||
@@ -1,11 +0,0 @@
|
||||
-; REQUIRES: enable_enable_aarch64_hip09
|
||||
-; This tests that llc accepts all valid AArch64 CPUs
|
||||
-
|
||||
-; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=hip09 2>&1 | FileCheck %s
|
||||
-
|
||||
-; CHECK-NOT: {{.*}} is not a recognized processor for this target
|
||||
-; INVALID: {{.*}} is not a recognized processor for this target
|
||||
-
|
||||
-define i32 @f(i64 %z) {
|
||||
- ret i32 0
|
||||
-}
|
||||
diff --git a/llvm/test/CodeGen/AArch64/cpus.ll b/llvm/test/CodeGen/AArch64/cpus.ll
|
||||
index b24866064efa..56772f6c6049 100644
|
||||
--- a/llvm/test/CodeGen/AArch64/cpus.ll
|
||||
+++ b/llvm/test/CodeGen/AArch64/cpus.ll
|
||||
@@ -33,6 +33,7 @@
|
||||
; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=thunderx2t99 2>&1 | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=thunderx3t110 2>&1 | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=tsv110 2>&1 | FileCheck %s
|
||||
+; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=hip09 2>&1 | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=apple-latest 2>&1 | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=a64fx 2>&1 | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=ampere1 2>&1 | FileCheck %s
|
||||
diff --git a/llvm/test/CodeGen/AArch64/macro-fusion-mvnclz.mir b/llvm/test/CodeGen/AArch64/macro-fusion-mvnclz.mir
|
||||
index 64bf159370f9..26ba76ef0af5 100644
|
||||
--- a/llvm/test/CodeGen/AArch64/macro-fusion-mvnclz.mir
|
||||
+++ b/llvm/test/CodeGen/AArch64/macro-fusion-mvnclz.mir
|
||||
@@ -1,4 +1,3 @@
|
||||
-# REQUIRES: enable_enable_aarch64_hip09
|
||||
# RUN: llc -o - %s -mtriple=aarch64-- -mattr=+fuse-mvn-clz -run-pass postmisched | FileCheck %s --check-prefixes=CHECK,FUSION
|
||||
# RUN: llc -o - %s -mtriple=aarch64-- -mattr=-fuse-mvn-clz -run-pass postmisched | FileCheck %s --check-prefixes=CHECK,NOFUSION
|
||||
---
|
||||
diff --git a/llvm/test/CodeGen/AArch64/misched-fusion-lit-hip09.ll b/llvm/test/CodeGen/AArch64/misched-fusion-lit-hip09.ll
|
||||
deleted file mode 100644
|
||||
index d67fa5b4374c..000000000000
|
||||
--- a/llvm/test/CodeGen/AArch64/misched-fusion-lit-hip09.ll
|
||||
+++ /dev/null
|
||||
@@ -1,73 +0,0 @@
|
||||
-; REQUIRES: enable_enable_aarch64_hip09
|
||||
-; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=hip09 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSE-HIP09
|
||||
-
|
||||
-@g = common local_unnamed_addr global ptr null, align 8
|
||||
-
|
||||
-define dso_local ptr @litp(i32 %a, i32 %b) {
|
||||
-entry:
|
||||
- %add = add nsw i32 %b, %a
|
||||
- %idx.ext = sext i32 %add to i64
|
||||
- %add.ptr = getelementptr i8, ptr @litp, i64 %idx.ext
|
||||
- store ptr %add.ptr, ptr @g, align 8
|
||||
- ret ptr %add.ptr
|
||||
-
|
||||
-; CHECK-LABEL: litp:
|
||||
-; CHECK: adrp [[R:x[0-9]+]], litp
|
||||
-; CHECKFUSE-NEXT: add {{x[0-9]+}}, [[R]], :lo12:litp
|
||||
-}
|
||||
-
|
||||
-define dso_local ptr @litp_tune_generic(i32 %a, i32 %b) "tune-cpu"="generic" {
|
||||
-entry:
|
||||
- %add = add nsw i32 %b, %a
|
||||
- %idx.ext = sext i32 %add to i64
|
||||
- %add.ptr = getelementptr i8, ptr @litp_tune_generic, i64 %idx.ext
|
||||
- store ptr %add.ptr, ptr @g, align 8
|
||||
- ret ptr %add.ptr
|
||||
-
|
||||
-; CHECK-LABEL: litp_tune_generic:
|
||||
-; CHECK: adrp [[R:x[0-9]+]], litp_tune_generic
|
||||
-; CHECK-NEXT: add {{x[0-9]+}}, [[R]], :lo12:litp_tune_generic
|
||||
-}
|
||||
-
|
||||
-define dso_local i32 @liti(i32 %a, i32 %b) {
|
||||
-entry:
|
||||
- %add = add i32 %a, -262095121
|
||||
- %add1 = add i32 %add, %b
|
||||
- ret i32 %add1
|
||||
-
|
||||
-; CHECK-LABEL: liti:
|
||||
-; CHECK: mov [[R:w[0-9]+]], {{#[0-9]+}}
|
||||
-; CHECKDONT-NEXT: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
|
||||
-; CHECKFUSE-NEXT: movk [[R]], {{#[0-9]+}}, lsl #16
|
||||
-; CHECKFUSE-HIP09: movk [[R]], {{#[0-9]+}}, lsl #16
|
||||
-}
|
||||
-
|
||||
-; Function Attrs: norecurse nounwind readnone
|
||||
-define dso_local i64 @litl(i64 %a, i64 %b) {
|
||||
-entry:
|
||||
- %add = add i64 %a, 2208998440489107183
|
||||
- %add1 = add i64 %add, %b
|
||||
- ret i64 %add1
|
||||
-
|
||||
-; CHECK-LABEL: litl:
|
||||
-; CHECK: mov [[R:x[0-9]+]], {{#[0-9]+}}
|
||||
-; CHECKDONT-NEXT: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
|
||||
-; CHECK-NEXT: movk [[R]], {{#[0-9]+}}, lsl #16
|
||||
-; CHECK: movk [[R]], {{#[0-9]+}}, lsl #32
|
||||
-; CHECK-NEXT: movk [[R]], {{#[0-9]+}}, lsl #48
|
||||
-}
|
||||
-
|
||||
-; Function Attrs: norecurse nounwind readnone
|
||||
-define dso_local double @litf() {
|
||||
-entry:
|
||||
- ret double 0x400921FB54442D18
|
||||
-
|
||||
-; CHECK-LABEL: litf:
|
||||
-; CHECK-DONT: adrp [[ADDR:x[0-9]+]], [[CSTLABEL:.LCP.*]]
|
||||
-; CHECK-DONT-NEXT: ldr {{d[0-9]+}}, {{[[]}}[[ADDR]], :lo12:[[CSTLABEL]]{{[]]}}
|
||||
-; CHECKFUSE-HIP09: mov [[R:x[0-9]+]], #11544
|
||||
-; CHECKFUSE-HIP09: movk [[R]], #21572, lsl #16
|
||||
-; CHECKFUSE-HIP09: movk [[R]], #8699, lsl #32
|
||||
-; CHECKFUSE-HIP09: movk [[R]], #16393, lsl #48
|
||||
-; CHECKFUSE-HIP09: fmov {{d[0-9]+}}, [[R]]
|
||||
-}
|
||||
diff --git a/llvm/test/CodeGen/AArch64/misched-fusion-lit.ll b/llvm/test/CodeGen/AArch64/misched-fusion-lit.ll
|
||||
index ad244d30df11..67cc7aa503b6 100644
|
||||
--- a/llvm/test/CodeGen/AArch64/misched-fusion-lit.ll
|
||||
+++ b/llvm/test/CodeGen/AArch64/misched-fusion-lit.ll
|
||||
@@ -7,6 +7,7 @@
|
||||
; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m4 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSE
|
||||
; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m5 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSE
|
||||
; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=neoverse-n1 | FileCheck %s --check-prefix=CHECKFUSE-NEOVERSE
|
||||
+; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=hip09 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSE-HIP09
|
||||
|
||||
@g = common local_unnamed_addr global ptr null, align 8
|
||||
|
||||
@@ -59,6 +60,7 @@ entry:
|
||||
; CHECK: mov [[R:w[0-9]+]], {{#[0-9]+}}
|
||||
; CHECKDONT-NEXT: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
|
||||
; CHECKFUSE-NEXT: movk [[R]], {{#[0-9]+}}, lsl #16
|
||||
+; CHECKFUSE-HIP09: movk [[R]], {{#[0-9]+}}, lsl #16
|
||||
}
|
||||
|
||||
; Function Attrs: norecurse nounwind readnone
|
||||
@@ -89,4 +91,9 @@ entry:
|
||||
; CHECK-FUSE: movk [[R]], #8699, lsl #32
|
||||
; CHECK-FUSE: movk [[R]], #16393, lsl #48
|
||||
; CHECK-FUSE: fmov {{d[0-9]+}}, [[R]]
|
||||
+; CHECKFUSE-HIP09: mov [[R:x[0-9]+]], #11544
|
||||
+; CHECKFUSE-HIP09: movk [[R]], #21572, lsl #16
|
||||
+; CHECKFUSE-HIP09: movk [[R]], #8699, lsl #32
|
||||
+; CHECKFUSE-HIP09: movk [[R]], #16393, lsl #48
|
||||
+; CHECKFUSE-HIP09: fmov {{d[0-9]+}}, [[R]]
|
||||
}
|
||||
diff --git a/llvm/test/CodeGen/AArch64/remat-hip09.ll b/llvm/test/CodeGen/AArch64/remat-hip09.ll
|
||||
deleted file mode 100644
|
||||
index aec0d18ae73f..000000000000
|
||||
--- a/llvm/test/CodeGen/AArch64/remat-hip09.ll
|
||||
+++ /dev/null
|
||||
@@ -1,18 +0,0 @@
|
||||
-; REQUIRES: enable_enable_aarch64_hip09
|
||||
-; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=hip09 -o - %s | FileCheck %s
|
||||
-
|
||||
-%X = type { i64, i64, i64 }
|
||||
-declare void @f(ptr)
|
||||
-define void @t() {
|
||||
-entry:
|
||||
- %tmp = alloca %X
|
||||
- call void @f(ptr %tmp)
|
||||
-; CHECK: add x0, sp, #8
|
||||
-; CHECK-NOT: mov
|
||||
-; CHECK-NEXT: bl f
|
||||
- call void @f(ptr %tmp)
|
||||
-; CHECK: add x0, sp, #8
|
||||
-; CHECK-NOT: mov
|
||||
-; CHECK-NEXT: bl f
|
||||
- ret void
|
||||
-}
|
||||
diff --git a/llvm/test/CodeGen/AArch64/remat.ll b/llvm/test/CodeGen/AArch64/remat.ll
|
||||
index 483c4d71ee21..fa039246c7f5 100644
|
||||
--- a/llvm/test/CodeGen/AArch64/remat.ll
|
||||
+++ b/llvm/test/CodeGen/AArch64/remat.ll
|
||||
@@ -22,6 +22,7 @@
|
||||
; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=kryo -o - %s | FileCheck %s
|
||||
; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=thunderx2t99 -o - %s | FileCheck %s
|
||||
; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=tsv110 -o - %s | FileCheck %s
|
||||
+; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=hip09 -o - %s | FileCheck %s
|
||||
; RUN: llc -mtriple=aarch64-linux-gnuabi -mattr=+custom-cheap-as-move -o - %s | FileCheck %s
|
||||
; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=thunderx3t110 -o - %s | FileCheck %s
|
||||
; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=ampere1 -o - %s | FileCheck %s
|
||||
diff --git a/llvm/test/lit.site.cfg.py.in b/llvm/test/lit.site.cfg.py.in
|
||||
index 6145a514f008..20c1ecca1d43 100644
|
||||
--- a/llvm/test/lit.site.cfg.py.in
|
||||
+++ b/llvm/test/lit.site.cfg.py.in
|
||||
@@ -63,14 +63,10 @@ config.dxil_tests = @LLVM_INCLUDE_DXIL_TESTS@
|
||||
config.have_llvm_driver = @LLVM_TOOL_LLVM_DRIVER_BUILD@
|
||||
config.use_classic_flang = @LLVM_ENABLE_CLASSIC_FLANG@
|
||||
config.enable_enable_autotuner = @LLVM_ENABLE_AUTOTUNER@
|
||||
-config.enable_enable_aarch64_hip09 = @LLVM_ENABLE_AARCH64_HIP09@
|
||||
|
||||
import lit.llvm
|
||||
lit.llvm.initialize(lit_config, config)
|
||||
|
||||
-if config.enable_enable_aarch64_hip09:
|
||||
- config.available_features.add("enable_enable_aarch64_hip09")
|
||||
-
|
||||
# Let the main config do the real work.
|
||||
lit_config.load_config(
|
||||
config, os.path.join(config.llvm_src_root, "test/lit.cfg.py"))
|
||||
diff --git a/llvm/unittests/TargetParser/Host.cpp b/llvm/unittests/TargetParser/Host.cpp
|
||||
index 4b4c81514896..cfc41486b173 100644
|
||||
--- a/llvm/unittests/TargetParser/Host.cpp
|
||||
+++ b/llvm/unittests/TargetParser/Host.cpp
|
||||
@@ -250,11 +250,9 @@ CPU part : 0x0a1
|
||||
EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x48\n"
|
||||
"CPU part : 0xd01"),
|
||||
"tsv110");
|
||||
-#if defined(ENABLE_AARCH64_HIP09)
|
||||
EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x48\n"
|
||||
"CPU part : 0xd02"),
|
||||
"hip09");
|
||||
-#endif
|
||||
|
||||
// Verify A64FX.
|
||||
const std::string A64FXProcCpuInfo = R"(
|
||||
diff --git a/llvm/unittests/TargetParser/TargetParserTest.cpp b/llvm/unittests/TargetParser/TargetParserTest.cpp
|
||||
index 94e0047e567b..daa38474004e 100644
|
||||
--- a/llvm/unittests/TargetParser/TargetParserTest.cpp
|
||||
+++ b/llvm/unittests/TargetParser/TargetParserTest.cpp
|
||||
@@ -1421,7 +1421,6 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
AArch64::AEK_PROFILE | AArch64::AEK_FP16 |
|
||||
AArch64::AEK_FP16FML | AArch64::AEK_DOTPROD,
|
||||
"8.2-A"),
|
||||
-#if defined(ENABLE_AARCH64_HIP09)
|
||||
ARMCPUTestParams(
|
||||
"hip09", "armv8.5-a", "crypto-neon-fp-armv8",
|
||||
AArch64::AEK_CRC | AArch64::AEK_FP | AArch64::AEK_SIMD |
|
||||
@@ -1432,7 +1431,6 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
AArch64::AEK_FP16FML | AArch64::AEK_SVE | AArch64::AEK_I8MM |
|
||||
AArch64::AEK_F32MM | AArch64::AEK_F64MM | AArch64::AEK_BF16,
|
||||
"8.5-A"),
|
||||
-#endif
|
||||
ARMCPUTestParams("a64fx", "armv8.2-a", "crypto-neon-fp-armv8",
|
||||
AArch64::AEK_CRC | AArch64::AEK_AES |
|
||||
AArch64::AEK_SHA2 | AArch64::AEK_FP |
|
||||
@@ -1449,11 +1447,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
"8.2-A")));
|
||||
|
||||
// Note: number of CPUs includes aliases.
|
||||
-#if defined(ENABLE_AARCH64_HIP09)
|
||||
static constexpr unsigned NumAArch64CPUArchs = 63;
|
||||
-#else
|
||||
-static constexpr unsigned NumAArch64CPUArchs = 62;
|
||||
-#endif
|
||||
|
||||
TEST(TargetParserTest, testAArch64CPUArchList) {
|
||||
SmallVector<StringRef, NumAArch64CPUArchs> List;
|
||||
--
|
||||
2.43.0
|
||||
|
||||
@ -0,0 +1,72 @@
|
||||
From 28e3fc80336935bc8bed372e78616ef5be9f4908 Mon Sep 17 00:00:00 2001
|
||||
From: Arthur Eubanks <aeubanks@google.com>
|
||||
Date: Thu, 27 Jul 2023 13:27:58 -0700
|
||||
Subject: [PATCH] Don't zero out noreg operands
|
||||
|
||||
A tail call may have $noreg operands.
|
||||
|
||||
Fixes a crash.
|
||||
|
||||
Reviewed By: xgupta
|
||||
|
||||
Differential Revision: https://reviews.llvm.org/D156485
|
||||
---
|
||||
llvm/lib/CodeGen/PrologEpilogInserter.cpp | 9 +++++++--
|
||||
llvm/test/CodeGen/X86/zero-call-used-regs.ll | 14 ++++++++++++++
|
||||
2 files changed, 21 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
|
||||
index e323aaaeefaf..49047719fdaa 100644
|
||||
--- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp
|
||||
+++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
|
||||
@@ -1285,6 +1285,8 @@ void PEI::insertZeroCallUsedRegs(MachineFunction &MF) {
|
||||
continue;
|
||||
|
||||
MCRegister Reg = MO.getReg();
|
||||
+ if (!Reg)
|
||||
+ continue;
|
||||
|
||||
// This picks up sibling registers (e.q. %al -> %ah).
|
||||
for (MCRegUnit Unit : TRI.regunits(Reg))
|
||||
@@ -1308,8 +1310,11 @@ void PEI::insertZeroCallUsedRegs(MachineFunction &MF) {
|
||||
if (!MO.isReg())
|
||||
continue;
|
||||
|
||||
- for (const MCPhysReg &Reg :
|
||||
- TRI.sub_and_superregs_inclusive(MO.getReg()))
|
||||
+ MCRegister Reg = MO.getReg();
|
||||
+ if (!Reg)
|
||||
+ continue;
|
||||
+
|
||||
+ for (const MCPhysReg Reg : TRI.sub_and_superregs_inclusive(Reg))
|
||||
RegsToZero.reset(Reg);
|
||||
}
|
||||
}
|
||||
diff --git a/llvm/test/CodeGen/X86/zero-call-used-regs.ll b/llvm/test/CodeGen/X86/zero-call-used-regs.ll
|
||||
index 63d51c916bb9..97ad5ce9c8cb 100644
|
||||
--- a/llvm/test/CodeGen/X86/zero-call-used-regs.ll
|
||||
+++ b/llvm/test/CodeGen/X86/zero-call-used-regs.ll
|
||||
@@ -241,6 +241,20 @@ entry:
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
+define dso_local void @tailcall(ptr %p) local_unnamed_addr #0 "zero-call-used-regs"="used-gpr" {
|
||||
+; I386-LABEL: tailcall:
|
||||
+; I386: # %bb.0:
|
||||
+; I386-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
+; I386-NEXT: jmpl *(%eax) # TAILCALL
|
||||
+;
|
||||
+; X86-64-LABEL: tailcall:
|
||||
+; X86-64: # %bb.0:
|
||||
+; X86-64-NEXT: jmpq *(%rdi) # TAILCALL
|
||||
+ %c = load ptr, ptr %p
|
||||
+ tail call void %c()
|
||||
+ ret void
|
||||
+}
|
||||
+
|
||||
; Don't emit zeroing registers in "main" function.
|
||||
define dso_local i32 @main() local_unnamed_addr #1 {
|
||||
; I386-LABEL: main:
|
||||
--
|
||||
2.43.0
|
||||
|
||||
246
0029-SimplifyLibCalls-Merge-sqrt-into-the-power-of-exp-79.patch
Normal file
246
0029-SimplifyLibCalls-Merge-sqrt-into-the-power-of-exp-79.patch
Normal file
@ -0,0 +1,246 @@
|
||||
From 60ff801d1ea96ab964039cc1ed42e1dca0a63d54 Mon Sep 17 00:00:00 2001
|
||||
From: Anton Sidorenko <anton.sidorenko@syntacore.com>
|
||||
Date: Tue, 6 Feb 2024 12:02:06 +0300
|
||||
Subject: [PATCH] [SimplifyLibCalls] Merge sqrt into the power of exp (#79146)
|
||||
|
||||
Under fast-math flags it's possible to convert `sqrt(exp(X)) `into
|
||||
`exp(X * 0.5)`. I suppose that this transformation is always profitable.
|
||||
This is similar to the optimization existing in GCC.
|
||||
---
|
||||
.../llvm/Transforms/Utils/SimplifyLibCalls.h | 1 +
|
||||
.../lib/Transforms/Utils/SimplifyLibCalls.cpp | 67 ++++++++++
|
||||
llvm/test/Transforms/InstCombine/sqrt.ll | 120 ++++++++++++++++++
|
||||
3 files changed, 188 insertions(+)
|
||||
|
||||
diff --git a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
|
||||
index eb10545ee149..1aad0b298845 100644
|
||||
--- a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
|
||||
+++ b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
|
||||
@@ -201,6 +201,7 @@ private:
|
||||
Value *optimizeFMinFMax(CallInst *CI, IRBuilderBase &B);
|
||||
Value *optimizeLog(CallInst *CI, IRBuilderBase &B);
|
||||
Value *optimizeSqrt(CallInst *CI, IRBuilderBase &B);
|
||||
+ Value *mergeSqrtToExp(CallInst *CI, IRBuilderBase &B);
|
||||
Value *optimizeSinCosPi(CallInst *CI, bool IsSin, IRBuilderBase &B);
|
||||
Value *optimizeTan(CallInst *CI, IRBuilderBase &B);
|
||||
// Wrapper for all floating point library call optimizations
|
||||
diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
|
||||
index 3ad97613fe7a..dd5bbdaaf6d3 100644
|
||||
--- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
|
||||
+++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
|
||||
@@ -2539,6 +2539,70 @@ Value *LibCallSimplifier::optimizeLog(CallInst *Log, IRBuilderBase &B) {
|
||||
return Ret;
|
||||
}
|
||||
|
||||
+// sqrt(exp(X)) -> exp(X * 0.5)
|
||||
+Value *LibCallSimplifier::mergeSqrtToExp(CallInst *CI, IRBuilderBase &B) {
|
||||
+ if (!CI->hasAllowReassoc())
|
||||
+ return nullptr;
|
||||
+
|
||||
+ Function *SqrtFn = CI->getCalledFunction();
|
||||
+ CallInst *Arg = dyn_cast<CallInst>(CI->getArgOperand(0));
|
||||
+ if (!Arg || !Arg->hasAllowReassoc() || !Arg->hasOneUse())
|
||||
+ return nullptr;
|
||||
+ Intrinsic::ID ArgID = Arg->getIntrinsicID();
|
||||
+ LibFunc ArgLb = NotLibFunc;
|
||||
+ TLI->getLibFunc(*Arg, ArgLb);
|
||||
+
|
||||
+ LibFunc SqrtLb, ExpLb, Exp2Lb, Exp10Lb;
|
||||
+
|
||||
+ if (TLI->getLibFunc(SqrtFn->getName(), SqrtLb))
|
||||
+ switch (SqrtLb) {
|
||||
+ case LibFunc_sqrtf:
|
||||
+ ExpLb = LibFunc_expf;
|
||||
+ Exp2Lb = LibFunc_exp2f;
|
||||
+ Exp10Lb = LibFunc_exp10f;
|
||||
+ break;
|
||||
+ case LibFunc_sqrt:
|
||||
+ ExpLb = LibFunc_exp;
|
||||
+ Exp2Lb = LibFunc_exp2;
|
||||
+ Exp10Lb = LibFunc_exp10;
|
||||
+ break;
|
||||
+ case LibFunc_sqrtl:
|
||||
+ ExpLb = LibFunc_expl;
|
||||
+ Exp2Lb = LibFunc_exp2l;
|
||||
+ Exp10Lb = LibFunc_exp10l;
|
||||
+ break;
|
||||
+ default:
|
||||
+ return nullptr;
|
||||
+ }
|
||||
+ else if (SqrtFn->getIntrinsicID() == Intrinsic::sqrt) {
|
||||
+ if (CI->getType()->getScalarType()->isFloatTy()) {
|
||||
+ ExpLb = LibFunc_expf;
|
||||
+ Exp2Lb = LibFunc_exp2f;
|
||||
+ Exp10Lb = LibFunc_exp10f;
|
||||
+ } else if (CI->getType()->getScalarType()->isDoubleTy()) {
|
||||
+ ExpLb = LibFunc_exp;
|
||||
+ Exp2Lb = LibFunc_exp2;
|
||||
+ Exp10Lb = LibFunc_exp10;
|
||||
+ } else
|
||||
+ return nullptr;
|
||||
+ } else
|
||||
+ return nullptr;
|
||||
+
|
||||
+ if (ArgLb != ExpLb && ArgLb != Exp2Lb && ArgLb != Exp10Lb &&
|
||||
+ ArgID != Intrinsic::exp && ArgID != Intrinsic::exp2)
|
||||
+ return nullptr;
|
||||
+
|
||||
+ IRBuilderBase::InsertPointGuard Guard(B);
|
||||
+ B.SetInsertPoint(Arg);
|
||||
+ auto *ExpOperand = Arg->getOperand(0);
|
||||
+ auto *FMul =
|
||||
+ B.CreateFMulFMF(ExpOperand, ConstantFP::get(ExpOperand->getType(), 0.5),
|
||||
+ CI, "merged.sqrt");
|
||||
+
|
||||
+ Arg->setOperand(0, FMul);
|
||||
+ return Arg;
|
||||
+}
|
||||
+
|
||||
Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilderBase &B) {
|
||||
Module *M = CI->getModule();
|
||||
Function *Callee = CI->getCalledFunction();
|
||||
@@ -2551,6 +2615,9 @@ Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilderBase &B) {
|
||||
Callee->getIntrinsicID() == Intrinsic::sqrt))
|
||||
Ret = optimizeUnaryDoubleFP(CI, B, TLI, true);
|
||||
|
||||
+ if (Value *Opt = mergeSqrtToExp(CI, B))
|
||||
+ return Opt;
|
||||
+
|
||||
if (!CI->isFast())
|
||||
return Ret;
|
||||
|
||||
diff --git a/llvm/test/Transforms/InstCombine/sqrt.ll b/llvm/test/Transforms/InstCombine/sqrt.ll
|
||||
index 004df3e30c72..f72fe5a6a581 100644
|
||||
--- a/llvm/test/Transforms/InstCombine/sqrt.ll
|
||||
+++ b/llvm/test/Transforms/InstCombine/sqrt.ll
|
||||
@@ -88,7 +88,127 @@ define float @sqrt_call_fabs_f32(float %x) {
|
||||
ret float %sqrt
|
||||
}
|
||||
|
||||
+define double @sqrt_exp(double %x) {
|
||||
+; CHECK-LABEL: @sqrt_exp(
|
||||
+; CHECK-NEXT: [[MERGED_SQRT:%.*]] = fmul reassoc double [[X:%.*]], 5.000000e-01
|
||||
+; CHECK-NEXT: [[E:%.*]] = call reassoc double @llvm.exp.f64(double [[MERGED_SQRT]])
|
||||
+; CHECK-NEXT: ret double [[E]]
|
||||
+;
|
||||
+ %e = call reassoc double @llvm.exp.f64(double %x)
|
||||
+ %res = call reassoc double @llvm.sqrt.f64(double %e)
|
||||
+ ret double %res
|
||||
+}
|
||||
+
|
||||
+define double @sqrt_exp_2(double %x) {
|
||||
+; CHECK-LABEL: @sqrt_exp_2(
|
||||
+; CHECK-NEXT: [[MERGED_SQRT:%.*]] = fmul reassoc double [[X:%.*]], 5.000000e-01
|
||||
+; CHECK-NEXT: [[E:%.*]] = call reassoc double @exp(double [[MERGED_SQRT]])
|
||||
+; CHECK-NEXT: ret double [[E]]
|
||||
+;
|
||||
+ %e = call reassoc double @exp(double %x)
|
||||
+ %res = call reassoc double @sqrt(double %e)
|
||||
+ ret double %res
|
||||
+}
|
||||
+
|
||||
+define double @sqrt_exp2(double %x) {
|
||||
+; CHECK-LABEL: @sqrt_exp2(
|
||||
+; CHECK-NEXT: [[MERGED_SQRT:%.*]] = fmul reassoc double [[X:%.*]], 5.000000e-01
|
||||
+; CHECK-NEXT: [[E:%.*]] = call reassoc double @exp2(double [[MERGED_SQRT]])
|
||||
+; CHECK-NEXT: ret double [[E]]
|
||||
+;
|
||||
+ %e = call reassoc double @exp2(double %x)
|
||||
+ %res = call reassoc double @sqrt(double %e)
|
||||
+ ret double %res
|
||||
+}
|
||||
+
|
||||
+define double @sqrt_exp10(double %x) {
|
||||
+; CHECK-LABEL: @sqrt_exp10(
|
||||
+; CHECK-NEXT: [[MERGED_SQRT:%.*]] = fmul reassoc double [[X:%.*]], 5.000000e-01
|
||||
+; CHECK-NEXT: [[E:%.*]] = call reassoc double @exp10(double [[MERGED_SQRT]])
|
||||
+; CHECK-NEXT: ret double [[E]]
|
||||
+;
|
||||
+ %e = call reassoc double @exp10(double %x)
|
||||
+ %res = call reassoc double @sqrt(double %e)
|
||||
+ ret double %res
|
||||
+}
|
||||
+
|
||||
+; Negative test
|
||||
+define double @sqrt_exp_nofast_1(double %x) {
|
||||
+; CHECK-LABEL: @sqrt_exp_nofast_1(
|
||||
+; CHECK-NEXT: [[E:%.*]] = call double @llvm.exp.f64(double [[X:%.*]])
|
||||
+; CHECK-NEXT: [[RES:%.*]] = call reassoc double @llvm.sqrt.f64(double [[E]])
|
||||
+; CHECK-NEXT: ret double [[RES]]
|
||||
+;
|
||||
+ %e = call double @llvm.exp.f64(double %x)
|
||||
+ %res = call reassoc double @llvm.sqrt.f64(double %e)
|
||||
+ ret double %res
|
||||
+}
|
||||
+
|
||||
+; Negative test
|
||||
+define double @sqrt_exp_nofast_2(double %x) {
|
||||
+; CHECK-LABEL: @sqrt_exp_nofast_2(
|
||||
+; CHECK-NEXT: [[E:%.*]] = call reassoc double @llvm.exp.f64(double [[X:%.*]])
|
||||
+; CHECK-NEXT: [[RES:%.*]] = call double @llvm.sqrt.f64(double [[E]])
|
||||
+; CHECK-NEXT: ret double [[RES]]
|
||||
+;
|
||||
+ %e = call reassoc double @llvm.exp.f64(double %x)
|
||||
+ %res = call double @llvm.sqrt.f64(double %e)
|
||||
+ ret double %res
|
||||
+}
|
||||
+
|
||||
+define double @sqrt_exp_merge_constant(double %x, double %y) {
|
||||
+; CHECK-LABEL: @sqrt_exp_merge_constant(
|
||||
+; CHECK-NEXT: [[MERGED_SQRT:%.*]] = fmul reassoc nsz double [[X:%.*]], 5.000000e+00
|
||||
+; CHECK-NEXT: [[E:%.*]] = call reassoc double @llvm.exp.f64(double [[MERGED_SQRT]])
|
||||
+; CHECK-NEXT: ret double [[E]]
|
||||
+;
|
||||
+ %mul = fmul reassoc nsz double %x, 10.0
|
||||
+ %e = call reassoc double @llvm.exp.f64(double %mul)
|
||||
+ %res = call reassoc nsz double @llvm.sqrt.f64(double %e)
|
||||
+ ret double %res
|
||||
+}
|
||||
+
|
||||
+define double @sqrt_exp_intr_and_libcall(double %x) {
|
||||
+; CHECK-LABEL: @sqrt_exp_intr_and_libcall(
|
||||
+; CHECK-NEXT: [[MERGED_SQRT:%.*]] = fmul reassoc double [[X:%.*]], 5.000000e-01
|
||||
+; CHECK-NEXT: [[E:%.*]] = call reassoc double @exp(double [[MERGED_SQRT]])
|
||||
+; CHECK-NEXT: ret double [[E]]
|
||||
+;
|
||||
+ %e = call reassoc double @exp(double %x)
|
||||
+ %res = call reassoc double @llvm.sqrt.f64(double %e)
|
||||
+ ret double %res
|
||||
+}
|
||||
+
|
||||
+define double @sqrt_exp_intr_and_libcall_2(double %x) {
|
||||
+; CHECK-LABEL: @sqrt_exp_intr_and_libcall_2(
|
||||
+; CHECK-NEXT: [[MERGED_SQRT:%.*]] = fmul reassoc double [[X:%.*]], 5.000000e-01
|
||||
+; CHECK-NEXT: [[E:%.*]] = call reassoc double @llvm.exp.f64(double [[MERGED_SQRT]])
|
||||
+; CHECK-NEXT: ret double [[E]]
|
||||
+;
|
||||
+ %e = call reassoc double @llvm.exp.f64(double %x)
|
||||
+ %res = call reassoc double @sqrt(double %e)
|
||||
+ ret double %res
|
||||
+}
|
||||
+
|
||||
+define <2 x float> @sqrt_exp_vec(<2 x float> %x) {
|
||||
+; CHECK-LABEL: @sqrt_exp_vec(
|
||||
+; CHECK-NEXT: [[MERGED_SQRT:%.*]] = fmul reassoc <2 x float> [[X:%.*]], <float 5.000000e-01, float 5.000000e-01>
|
||||
+; CHECK-NEXT: [[E:%.*]] = call reassoc <2 x float> @llvm.exp.v2f32(<2 x float> [[MERGED_SQRT]])
|
||||
+; CHECK-NEXT: ret <2 x float> [[E]]
|
||||
+;
|
||||
+ %e = call reassoc <2 x float> @llvm.exp.v2f32(<2 x float> %x)
|
||||
+ %res = call reassoc <2 x float> @llvm.sqrt.v2f32(<2 x float> %e)
|
||||
+ ret <2 x float> %res
|
||||
+}
|
||||
+
|
||||
declare i32 @foo(double)
|
||||
declare double @sqrt(double) readnone
|
||||
declare float @sqrtf(float)
|
||||
declare float @llvm.fabs.f32(float)
|
||||
+declare double @llvm.exp.f64(double)
|
||||
+declare double @llvm.sqrt.f64(double)
|
||||
+declare double @exp(double)
|
||||
+declare double @exp2(double)
|
||||
+declare double @exp10(double)
|
||||
+declare <2 x float> @llvm.exp.v2f32(<2 x float>)
|
||||
+declare <2 x float> @llvm.sqrt.v2f32(<2 x float>)
|
||||
--
|
||||
2.38.1.windows.1
|
||||
|
||||
187
0030-LICM-Solve-runtime-error-caused-by-the-signal-functi.patch
Normal file
187
0030-LICM-Solve-runtime-error-caused-by-the-signal-functi.patch
Normal file
@ -0,0 +1,187 @@
|
||||
From fdbf1bd9f1bdec32384eda47f419d895d11a1c50 Mon Sep 17 00:00:00 2001
|
||||
From: XingYuShuai <1150775134@qq.com>
|
||||
Date: Wed, 15 May 2024 14:42:27 +0800
|
||||
Subject: [PATCH] [LICM] Solve runtime error caused by the signal function.
|
||||
|
||||
Using the option enable-signal to control whether to solve the
|
||||
runtime error caused by the signal function when lto is turned on.
|
||||
---
|
||||
llvm/cmake/modules/HandleLLVMOptions.cmake | 8 ++++
|
||||
llvm/lib/Transforms/Scalar/LICM.cpp | 47 +++++++++++++++++++
|
||||
.../Transforms/LICM/signal-before-loop-2.ll | 25 ++++++++++
|
||||
.../Transforms/LICM/signal-before-loop.ll | 25 ++++++++++
|
||||
llvm/test/lit.site.cfg.py.in | 1 +
|
||||
5 files changed, 106 insertions(+)
|
||||
create mode 100644 llvm/test/Transforms/LICM/signal-before-loop-2.ll
|
||||
create mode 100644 llvm/test/Transforms/LICM/signal-before-loop.ll
|
||||
|
||||
diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake
|
||||
index b8e9dbe29d88..8be5d4ba52c2 100644
|
||||
--- a/llvm/cmake/modules/HandleLLVMOptions.cmake
|
||||
+++ b/llvm/cmake/modules/HandleLLVMOptions.cmake
|
||||
@@ -120,6 +120,14 @@ else()
|
||||
set(LLVM_ENABLE_AUTOTUNER 0)
|
||||
endif()
|
||||
|
||||
+option(LLVM_BUILD_FOR_COMMON "" ON)
|
||||
+if(LLVM_BUILD_FOR_COMMON)
|
||||
+ set(LLVM_BUILD_FOR_COMMON 1)
|
||||
+ add_definitions( -DBUILD_FOR_COMMON )
|
||||
+else()
|
||||
+ set(LLVM_BUILD_FOR_COMMON 0)
|
||||
+endif()
|
||||
+
|
||||
if(LLVM_ENABLE_EXPENSIVE_CHECKS)
|
||||
add_compile_definitions(EXPENSIVE_CHECKS)
|
||||
|
||||
diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
|
||||
index f8fab03f151d..2feec759f240 100644
|
||||
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
|
||||
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
|
||||
@@ -44,6 +44,9 @@
|
||||
#include "llvm/Analysis/AliasSetTracker.h"
|
||||
#include "llvm/Analysis/AssumptionCache.h"
|
||||
#include "llvm/Analysis/CaptureTracking.h"
|
||||
+#ifdef BUILD_FOR_COMMON
|
||||
+#include "llvm/Analysis/CFG.h"
|
||||
+#endif // BUILD_FOR_COMMON
|
||||
#include "llvm/Analysis/GuardUtils.h"
|
||||
#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
|
||||
#include "llvm/Analysis/Loads.h"
|
||||
@@ -122,6 +125,13 @@ static cl::opt<bool>
|
||||
SingleThread("licm-force-thread-model-single", cl::Hidden, cl::init(false),
|
||||
cl::desc("Force thread model single in LICM pass"));
|
||||
|
||||
+#ifdef BUILD_FOR_COMMON
|
||||
+static cl::opt<bool> DisableMovStoreInsOutsideOfLoopInSigFun(
|
||||
+ "disable-move-store-ins-outside-of-loop",
|
||||
+ cl::Hidden, cl::init(true), cl::desc("Disable move store instruction"
|
||||
+ "outside of loop in signal function."));
|
||||
+#endif // BUILD_FOR_COMMON
|
||||
+
|
||||
static cl::opt<uint32_t> MaxNumUsesTraversed(
|
||||
"licm-max-num-uses-traversed", cl::Hidden, cl::init(8),
|
||||
cl::desc("Max num uses visited for identifying load "
|
||||
@@ -2075,8 +2085,45 @@ bool llvm::promoteLoopAccessesToScalars(
|
||||
for (Use &U : ASIV->uses()) {
|
||||
// Ignore instructions that are outside the loop.
|
||||
Instruction *UI = dyn_cast<Instruction>(U.getUser());
|
||||
+ #if defined(BUILD_FOR_COMMON)
|
||||
+ if (DisableMovStoreInsOutsideOfLoopInSigFun) {
|
||||
+ if (!UI)
|
||||
+ continue;
|
||||
+
|
||||
+ // In the following scenario, there will be a loop index store
|
||||
+ // instruction that is moved outside the loop and when the termination
|
||||
+ // loop is triggered by the signal function, the store instruction is not
|
||||
+ // executed.However, the function registered by the signal will read the
|
||||
+ // data sored in the store instruction, so the data read is incorrect.
|
||||
+ // Solution: Prevent the store instruction form going outside the loop.
|
||||
+ // NOTE: The sys_signal function takes the same arguments and performs
|
||||
+ // the same task as signal. They all belong to glic.
|
||||
+ if(StoreSafety == StoreSafe && !CurLoop->contains(UI)) {
|
||||
+ if(LoadInst *NotCurLoopLoad = dyn_cast<LoadInst>(UI)) {
|
||||
+ Function *NotCurLoopFun = UI->getParent()->getParent();
|
||||
+ for (Use &UseFun : NotCurLoopFun->uses()) {
|
||||
+ CallInst *Call = dyn_cast<CallInst>(UseFun.getUser());
|
||||
+ if (Call && Call->getCalledFunction() &&
|
||||
+ (Call->getCalledFunction()->getName() == "__sysv_signal" ||
|
||||
+ Call->getCalledFunction()->getName() == "signal") &&
|
||||
+ isPotentiallyReachable(Call->getParent(),
|
||||
+ CurLoop->getLoopPreheader(),NULL,DT,
|
||||
+ LI))
|
||||
+ return false;
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ if (!CurLoop->contains(UI))
|
||||
+ continue;
|
||||
+ } else {
|
||||
+ if (!UI || !CurLoop->contains(UI))
|
||||
+ continue;
|
||||
+ }
|
||||
+#else
|
||||
if (!UI || !CurLoop->contains(UI))
|
||||
continue;
|
||||
+#endif // BUILD_FOR_COMMON
|
||||
|
||||
// If there is an non-load/store instruction in the loop, we can't promote
|
||||
// it.
|
||||
diff --git a/llvm/test/Transforms/LICM/signal-before-loop-2.ll b/llvm/test/Transforms/LICM/signal-before-loop-2.ll
|
||||
new file mode 100644
|
||||
index 000000000000..da878c6c691b
|
||||
--- /dev/null
|
||||
+++ b/llvm/test/Transforms/LICM/signal-before-loop-2.ll
|
||||
@@ -0,0 +1,25 @@
|
||||
+; REQUIRES: enable_build_for_common
|
||||
+; RUN:opt -disable-move-store-ins-outside-of-loop=true -S < %s | FileCheck %s
|
||||
+
|
||||
+@Run_Index = external global i64
|
||||
+
|
||||
+declare ptr @signal(ptr)
|
||||
+
|
||||
+define void @report() {
|
||||
+entry:
|
||||
+ %0 = load i64, ptr @Run_Index, align 8
|
||||
+ unreachable
|
||||
+}
|
||||
+
|
||||
+define i32 @main() {
|
||||
+if.end:
|
||||
+ %call.i4 = call ptr @signal(ptr @report)
|
||||
+ br label %for.cond
|
||||
+
|
||||
+; CHECK-LABEL: for.cond
|
||||
+; CHECK: store
|
||||
+for.cond:
|
||||
+ %0 = load i64, ptr @Run_Index, align 8
|
||||
+ store i64 %0, ptr @Run_Index, align 8
|
||||
+ br label %for.cond
|
||||
+}
|
||||
diff --git a/llvm/test/Transforms/LICM/signal-before-loop.ll b/llvm/test/Transforms/LICM/signal-before-loop.ll
|
||||
new file mode 100644
|
||||
index 000000000000..cfae4e87db56
|
||||
--- /dev/null
|
||||
+++ b/llvm/test/Transforms/LICM/signal-before-loop.ll
|
||||
@@ -0,0 +1,25 @@
|
||||
+; REQUIRES: enable_build_for_common
|
||||
+; RUN:opt -disable-move-store-ins-outside-of-loop=true -S < %s | FileCheck %s
|
||||
+
|
||||
+@Run_Index = external global i64
|
||||
+
|
||||
+declare ptr @__sysv_signal(ptr)
|
||||
+
|
||||
+define void @report() {
|
||||
+entry:
|
||||
+ %0 = load i64, ptr @Run_Index, align 8
|
||||
+ unreachable
|
||||
+}
|
||||
+
|
||||
+define i32 @main() {
|
||||
+if.end:
|
||||
+ %call.i4 = call ptr @__sysv_signal(ptr @report)
|
||||
+ br label %for.cond
|
||||
+
|
||||
+; CHECK-LABEL: for.cond
|
||||
+; CHECK: store
|
||||
+for.cond:
|
||||
+ %0 = load i64, ptr @Run_Index, align 8
|
||||
+ store i64 %0, ptr @Run_Index, align 8
|
||||
+ br label %for.cond
|
||||
+}
|
||||
diff --git a/llvm/test/lit.site.cfg.py.in b/llvm/test/lit.site.cfg.py.in
|
||||
index 0e9396e3b014..20c1ecca1d43 100644
|
||||
--- a/llvm/test/lit.site.cfg.py.in
|
||||
+++ b/llvm/test/lit.site.cfg.py.in
|
||||
@@ -63,6 +63,7 @@ config.dxil_tests = @LLVM_INCLUDE_DXIL_TESTS@
|
||||
config.have_llvm_driver = @LLVM_TOOL_LLVM_DRIVER_BUILD@
|
||||
config.use_classic_flang = @LLVM_ENABLE_CLASSIC_FLANG@
|
||||
config.enable_enable_autotuner = @LLVM_ENABLE_AUTOTUNER@
|
||||
+config.enable_build_for_common = @LLVM_BUILD_FOR_COMMON@
|
||||
|
||||
import lit.llvm
|
||||
lit.llvm.initialize(lit_config, config)
|
||||
--
|
||||
2.38.1.windows.1
|
||||
|
||||
6173
0031-ACPO-ACPO-Infrastructure.patch
Normal file
6173
0031-ACPO-ACPO-Infrastructure.patch
Normal file
File diff suppressed because it is too large
Load Diff
1748
0032-ACPO-Introduce-MLInliner-using-ACPO-infrastructure.patch
Normal file
1748
0032-ACPO-Introduce-MLInliner-using-ACPO-infrastructure.patch
Normal file
File diff suppressed because it is too large
Load Diff
34
0033-Find-Python3-in-default-env-PATH-for-ACPO.patch
Normal file
34
0033-Find-Python3-in-default-env-PATH-for-ACPO.patch
Normal file
@ -0,0 +1,34 @@
|
||||
From d4cfa4fd4496735ea45afcd2b0cfb3607cadd1c9 Mon Sep 17 00:00:00 2001
|
||||
From: yinrun <lvyinrun@huawei.com>
|
||||
Date: Thu, 17 Oct 2024 18:47:40 +0800
|
||||
Subject: [PATCH] Find Python3 in default env PATH for ACPO
|
||||
|
||||
Enable the use of user python version, avoid the wrong version of python without AI infra.
|
||||
---
|
||||
llvm/lib/Analysis/ACPOMLInterface.cpp | 10 +++++++++-
|
||||
1 file changed, 9 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/llvm/lib/Analysis/ACPOMLInterface.cpp b/llvm/lib/Analysis/ACPOMLInterface.cpp
|
||||
index f48eb46638e3..7d84bd5112d6 100644
|
||||
--- a/llvm/lib/Analysis/ACPOMLInterface.cpp
|
||||
+++ b/llvm/lib/Analysis/ACPOMLInterface.cpp
|
||||
@@ -146,7 +146,15 @@ ACPOMLPythonInterface::ACPOMLPythonInterface() : NextID{0} {
|
||||
}
|
||||
|
||||
int32_t PID = (int32_t) llvm::sys::Process::getProcessId();
|
||||
- std::string ExecPython = "/usr/bin/python3";
|
||||
+ std::string ExecPython;
|
||||
+ llvm::ErrorOr<std::string> Res = llvm::sys::findProgramByName("python3");
|
||||
+ if (std::error_code EC = Res.getError()) {
|
||||
+ LLVM_DEBUG(dbgs() << "python3 could not be found, error_code " << EC.value() << "\n");
|
||||
+ return;
|
||||
+ } else {
|
||||
+ ExecPython = Res.get();
|
||||
+ LLVM_DEBUG(dbgs() << "python3 version found in " << ExecPython << "\n");
|
||||
+ }
|
||||
std::string
|
||||
PythonScript = *Env + "/" + std::string(ACPO_ML_PYTHON_INTERFACE_PY);
|
||||
std::string PIDStr = std::to_string(PID);
|
||||
--
|
||||
2.38.1.windows.1
|
||||
|
||||
2201
0034-AArch64-Support-HiSilicon-s-HIP09-sched-model.patch
Normal file
2201
0034-AArch64-Support-HiSilicon-s-HIP09-sched-model.patch
Normal file
File diff suppressed because it is too large
Load Diff
90
llvm.spec
90
llvm.spec
@ -1,6 +1,13 @@
|
||||
%bcond_without sys_llvm
|
||||
%bcond_without check
|
||||
%bcond_with classic_flang
|
||||
%bcond_with toolchain_clang
|
||||
%bcond_without bisheng_autotuner
|
||||
%bcond_without ACPO
|
||||
|
||||
%if %{with toolchain_clang}
|
||||
%global toolchain clang
|
||||
%endif
|
||||
|
||||
%global maj_ver 17
|
||||
%global min_ver 0
|
||||
@ -38,7 +45,7 @@
|
||||
|
||||
Name: %{pkg_name}
|
||||
Version: %{maj_ver}.%{min_ver}.%{patch_ver}
|
||||
Release: 14
|
||||
Release: 28
|
||||
Summary: The Low Level Virtual Machine
|
||||
|
||||
License: NCSA
|
||||
@ -69,7 +76,20 @@ Patch17: 0017-Add-the-support-for-classic-flang.patch
|
||||
Patch18: 0018-Fix-declaration-definition-mismatch-for-classic-flang.patch
|
||||
Patch19: 0019-Backport-LoongArch-Improve-the-support-for-atomic-and-clear_cache.patch
|
||||
Patch20: 0020-Update-llvm-lit-config-to-support-build_for_openeule.patch
|
||||
Patch21: 0021-Prevent-environment-variables-from-exceeding-NAME_MA.patch
|
||||
Patch21: 0021-Add-BiSheng-Autotuner-support-for-LLVM-compiler.patch
|
||||
Patch22: 0022-Prevent-environment-variables-from-exceeding-NAME_MA.patch
|
||||
Patch23: 0023-AArch64-Support-HiSilicon-s-HIP09-Processor.patch
|
||||
Patch24: 0024-Backport-LoongArch-fix-and-add-some-new-support.patch
|
||||
Patch25: 0025-Backport-Simple-check-to-ignore-Inline-asm-fwait-insertion.patch
|
||||
Patch26: 0026-Add-arch-restriction-for-BiSheng-Autotuner.patch
|
||||
Patch27: 0027-AArch64-Delete-hip09-macro.patch
|
||||
Patch28: 0028-backport-Clang-Fix-crash-with-fzero-call-used-regs.patch
|
||||
Patch29: 0029-SimplifyLibCalls-Merge-sqrt-into-the-power-of-exp-79.patch
|
||||
Patch30: 0030-LICM-Solve-runtime-error-caused-by-the-signal-functi.patch
|
||||
Patch31: 0031-ACPO-ACPO-Infrastructure.patch
|
||||
Patch32: 0032-ACPO-Introduce-MLInliner-using-ACPO-infrastructure.patch
|
||||
Patch33: 0033-Find-Python3-in-default-env-PATH-for-ACPO.patch
|
||||
Patch34: 0034-AArch64-Support-HiSilicon-s-HIP09-sched-model.patch
|
||||
|
||||
BuildRequires: binutils-devel
|
||||
BuildRequires: cmake
|
||||
@ -86,6 +106,9 @@ BuildRequires: python3-recommonmark
|
||||
BuildRequires: python3-sphinx
|
||||
BuildRequires: python3-setuptools
|
||||
BuildRequires: zlib-devel
|
||||
%if %{with toolchain_clang}
|
||||
BuildRequires: clang
|
||||
%endif
|
||||
|
||||
Requires: %{name}-libs%{?_isa} = %{version}-%{release}
|
||||
|
||||
@ -185,6 +208,12 @@ pathfix.py -i %{__python3} -pn \
|
||||
mkdir -p _build
|
||||
cd _build
|
||||
|
||||
%if %{with ACPO}
|
||||
echo "enable ACPO"
|
||||
export CFLAGS="-Wp,-DENABLE_ACPO ${CFLAGS}"
|
||||
export CXXFLAGS="-Wp,-DENABLE_ACPO ${CXXFLAGS}"
|
||||
%endif
|
||||
|
||||
%cmake .. -G Ninja \
|
||||
-DBUILD_SHARED_LIBS:BOOL=OFF \
|
||||
-DLLVM_PARALLEL_LINK_JOBS=%{max_link_jobs} \
|
||||
@ -233,6 +262,13 @@ cd _build
|
||||
%endif
|
||||
%if %{with classic_flang}
|
||||
-DLLVM_ENABLE_CLASSIC_FLANG=ON \
|
||||
%endif
|
||||
%if "%{toolchain}" == "clang"
|
||||
-DCMAKE_C_COMPILER=clang \
|
||||
-DCMAKE_CXX_COMPILER=clang++ \
|
||||
%endif
|
||||
%if %{with bisheng_autotuner}
|
||||
-DLLVM_ENABLE_AUTOTUNER=ON \
|
||||
%endif
|
||||
-DLLVM_INCLUDE_BENCHMARKS=OFF
|
||||
%ninja_build LLVM
|
||||
@ -356,18 +392,60 @@ LD_LIBRARY_PATH=%{buildroot}/%{install_libdir} %{__ninja} check-all -C ./_build
|
||||
%{install_includedir}/llvm-gmock
|
||||
|
||||
%changelog
|
||||
* Wed Sep 11 2024 hongjinghao <hongjinghao@huawei.com> - 17.0.6-14
|
||||
* Fri Nov 22 2024 xiajingze <xiajingze1@huawei.com> - 17.0.6-28
|
||||
- [AArch64] Support HiSilicon's HIP09 sched model
|
||||
|
||||
* Wed Nov 20 2024 eastb233 <xiezhiheng@huawei.com> - 17.0.6-27
|
||||
- Find Python3 in default env PATH for ACPO
|
||||
|
||||
* Wed Nov 20 2024 eastb233 <xiezhiheng@huawei.com> - 17.0.6-26
|
||||
- ACPO Infrastructure for ML integration into LLVM compiler
|
||||
|
||||
* Wed Nov 20 2024 eastb233 <xiezhiheng@huawei.com> - 17.0.6-25
|
||||
- [LICM] Solve runtime error caused by the signal function.
|
||||
|
||||
* Wed Nov 20 2024 eastb233 <xiezhiheng@huawei.com> - 17.0.6-24
|
||||
- [SimplifyLibCalls] Merge sqrt into the power of exp (#79146)
|
||||
|
||||
* Tue Nov 19 2024 xiajingze <xiajingze1@huawei.com> - 17.0.6-23
|
||||
- [backport][Clang] Fix crash with -fzero-call-used-regs
|
||||
|
||||
* Mon Nov 18 2024 xiajingze <xiajingze1@huawei.com> - 17.0.6-22
|
||||
- [AArch64] Delete hip09 macro
|
||||
|
||||
* Mon Nov 18 2024 liyunfei <liyunfei33@huawei.net> - 17.0.6-21
|
||||
- Add arch restriction for BiSheng Autotuner
|
||||
|
||||
* Mon Nov 18 2024 liyunfei <liyunfei33@huawei.net> - 17.0.6-20
|
||||
- [Backport] Simple check to ignore Inline asm fwait insertion
|
||||
|
||||
* Mon Sep 23 2024 zhanglimin <zhanglimin@loongson.cn> - 17.0.6-19
|
||||
- [LoongArch] Backport some new support
|
||||
|
||||
* Thu Sep 12 2024 xiajingze <xiajingze1@huawei.com> - 17.0.6-18
|
||||
- [AArch64] Support HiSilicon's HIP09 Processor
|
||||
|
||||
* Wed Sep 11 2024 hongjinghao <hongjinghao@huawei.com> - 17.0.6-17
|
||||
- doc add Provides llvm-help
|
||||
|
||||
* Tue Sep 10 2024 hongjinghao <hongjinghao@huawei.com> - 17.0.6-13
|
||||
* Tue Sep 10 2024 hongjinghao <hongjinghao@huawei.com> - 17.0.6-16
|
||||
- doc add Obsoletes llvm-help
|
||||
|
||||
* Thu Sep 5 2024 hongjinghao <hongjinghao@huawei.com> - 17.0.6-12
|
||||
* Tue Sep 3 2024 hongjinghao <hongjinghao@huawei.com> - 17.0.6-15
|
||||
- mv man to doc subpackage
|
||||
|
||||
* Mon Jul 22 2024 liyunfei <liyunfei33@huawei.com> - 17.0.6-11
|
||||
* Mon Jul 22 2024 liyunfei <liyunfei33@huawei.com> - 17.0.6-14
|
||||
- Prevent environment variables from exceeding NAME_MAX.
|
||||
|
||||
* Mon Jul 22 2024 liyunfei <liyunfei33@huawei.com> - 17.0.6-13
|
||||
- Disable toolchain_clang build for BiSheng Autotuner support temporary.
|
||||
|
||||
* Tue Jul 16 2024 liyunfei <liyunfei33@huawei.com> - 17.0.6-12
|
||||
- Add BiSheng Autotuner support.
|
||||
|
||||
* Fri Jul 5 2024 liyunfei <liyunfei33@huawei.com> - 17.0.6-11
|
||||
- Add toolchain_clang build support
|
||||
|
||||
* Mon Apr 29 2024 wangqiang <wangqiang1@kylinos.cn> - 17.0.6-10
|
||||
- Update llvm-lit config to support macro `build_for_openeuler`
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user