!181 [sync] PR-180: [AArch64] Support HiSilicon's HIP09 sched model

From: @openeuler-sync-bot Reviewed-by: @eastb233 Signed-off-by: @eastb233
[AArch64] Support HiSilicon's HIP09 sched model
2024-11-25 03:17:59 +00:00 · 2024-11-25 09:40:17 +08:00 · 2024-11-20 10:50:19 +00:00 · 2024-11-20 18:33:41 +08:00 · 2024-11-20 18:33:41 +08:00 · 2024-11-20 07:26:43 +00:00
15 changed files with 27208 additions and 6 deletions
--- a/0021-Add-BiSheng-Autotuner-support-for-LLVM-compiler.patch
+++ b/0021-Add-BiSheng-Autotuner-support-for-LLVM-compiler.patch
--- a/0022-Prevent-environment-variables-from-exceeding-NAME_MA.patch
+++ b/0022-Prevent-environment-variables-from-exceeding-NAME_MA.patch
--- a/0023-AArch64-Support-HiSilicon-s-HIP09-Processor.patch
+++ b/0023-AArch64-Support-HiSilicon-s-HIP09-Processor.patch
@ -0,0 +1,517 @@
+From cac43828d26b178807d194b4bd7c5df69603df29 Mon Sep 17 00:00:00 2001
+From: xiajingze <xiajingze1@huawei.com>
+Date: Wed, 31 Jul 2024 18:37:29 +0800
+Subject: [PATCH] [AArch64] Support HiSilicon's HIP09 Processor
+
+Signed-off-by: xiajingze <xiajingze1@huawei.com>
+---
+ llvm/cmake/modules/HandleLLVMOptions.cmake    |  8 ++
+ .../llvm/TargetParser/AArch64TargetParser.h   |  7 ++
+ llvm/lib/Target/AArch64/AArch64.td            | 36 +++++++
+ .../lib/Target/AArch64/AArch64MacroFusion.cpp | 55 +++++++++++
+ llvm/lib/Target/AArch64/AArch64Subtarget.cpp  |  9 ++
+ llvm/lib/Target/AArch64/AArch64Subtarget.h    |  9 +-
+ llvm/lib/Target/CMakeLists.txt                |  4 +
+ llvm/lib/TargetParser/Host.cpp                |  3 +
+ llvm/test/CodeGen/AArch64/cpus-hip09.ll       | 11 +++
+ .../CodeGen/AArch64/macro-fusion-mvnclz.mir   | 20 ++++
+ .../AArch64/misched-fusion-lit-hip09.ll       | 73 ++++++++++++++
+ llvm/test/CodeGen/AArch64/remat-hip09.ll      | 18 ++++
+ llvm/test/lit.site.cfg.py.in                  |  4 +
+ llvm/unittests/TargetParser/Host.cpp          |  5 +
+ .../TargetParser/TargetParserTest.cpp         | 16 +++
+ 15 files changed, 277 insertions(+), 1 deletion(-)
+ create mode 100644 llvm/test/CodeGen/AArch64/cpus-hip09.ll
+ create mode 100644 llvm/test/CodeGen/AArch64/macro-fusion-mvnclz.mir
+ create mode 100644 llvm/test/CodeGen/AArch64/misched-fusion-lit-hip09.ll
+ create mode 100644 llvm/test/CodeGen/AArch64/remat-hip09.ll
+
+diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake
+index 8be5d4ba5..74e68e25d 100644
+--- a/llvm/cmake/modules/HandleLLVMOptions.cmake
+++ b/llvm/cmake/modules/HandleLLVMOptions.cmake
+@@ -112,6 +112,14 @@ else()
+   set(LLVM_ENABLE_AUTOTUNER 0)
+ endif()
+ 
+option(LLVM_ENABLE_AARCH64_HIP09 "Enable HIP09 Processor" ON)
+if(LLVM_ENABLE_AARCH64_HIP09)
+  set(LLVM_ENABLE_AARCH64_HIP09 1)
+  add_definitions( -DENABLE_AARCH64_HIP09 )
+else()
+  set(LLVM_ENABLE_AARCH64_HIP09 0)
+endif()
+
+ if(LLVM_ENABLE_EXPENSIVE_CHECKS)
+   add_compile_definitions(EXPENSIVE_CHECKS)
+ 
+diff --git a/llvm/include/llvm/TargetParser/AArch64TargetParser.h b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
+index dc4cdfa8e..07cd2fcbb 100644
+--- a/llvm/include/llvm/TargetParser/AArch64TargetParser.h
+++ b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
+@@ -542,6 +542,13 @@ inline constexpr CpuInfo CpuInfos[] = {
+      (AArch64::AEK_FP16 | AArch64::AEK_RAND | AArch64::AEK_SM4 |
+       AArch64::AEK_SHA3 | AArch64::AEK_SHA2 | AArch64::AEK_AES |
+       AArch64::AEK_MTE | AArch64::AEK_SB | AArch64::AEK_SSBS)},
+#if defined(ENABLE_AARCH64_HIP09)
+    {"hip09", ARMV8_5A,
+     (AArch64::AEK_AES | AArch64::AEK_SM4 | AArch64::AEK_SHA2 |
+      AArch64::AEK_SHA3 | AArch64::AEK_FP16 | AArch64::AEK_PROFILE |
+      AArch64::AEK_FP16FML | AArch64::AEK_SVE | AArch64::AEK_I8MM |
+      AArch64::AEK_F32MM | AArch64::AEK_F64MM | AArch64::AEK_BF16)},
+#endif
+ };
+ 
+ // An alias for a CPU.
+diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td
+index 8f50af4b7..c8bfd770f 100644
+--- a/llvm/lib/Target/AArch64/AArch64.td
+++ b/llvm/lib/Target/AArch64/AArch64.td
+@@ -296,6 +296,12 @@ def FeatureFuseAddSub2RegAndConstOne : SubtargetFeature<
+    "fuse-addsub-2reg-const1", "HasFuseAddSub2RegAndConstOne", "true",
+    "CPU fuses (a + b + 1) and (a - b - 1)">;
+ 
+#ifdef ENABLE_AARCH64_HIP09
+def FeatureFuseMvnClz : SubtargetFeature<
+    "fuse-mvn-clz", "HasFuseMvnClz", "true",
+    "CPU fuses mvn+clz operations">;
+#endif
+
+ def FeatureDisableLatencySchedHeuristic : SubtargetFeature<
+     "disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true",
+     "Disable latency scheduling heuristic">;
+@@ -1205,6 +1211,21 @@ def TuneTSV110 : SubtargetFeature<"tsv110", "ARMProcFamily", "TSV110",
+                                   FeatureFuseAES,
+                                   FeaturePostRAScheduler]>;
+ 
+#ifdef ENABLE_AARCH64_HIP09
+def TuneHIP09 : SubtargetFeature<"hip09", "ARMProcFamily", "HIP09",
+                                   "HiSilicon HIP-09 processors", [
+                                   FeatureCustomCheapAsMoveHandling,
+                                   FeatureExperimentalZeroingPseudos,
+                                   FeatureFuseAES,
+                                   FeatureLSLFast,
+                                   FeatureAscendStoreAddress,
+                                   FeatureCmpBccFusion,
+                                   FeatureArithmeticBccFusion,
+                                   FeatureFuseLiterals,
+                                   FeatureFuseMvnClz,
+                                   FeaturePostRAScheduler]>;
+#endif
+
+ def TuneAmpere1 : SubtargetFeature<"ampere1", "ARMProcFamily", "Ampere1",
+                                    "Ampere Computing Ampere-1 processors", [
+                                    FeaturePostRAScheduler,
+@@ -1359,6 +1380,14 @@ def ProcessorFeatures {
+   list<SubtargetFeature> TSV110 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8,
+                                    FeatureNEON, FeaturePerfMon, FeatureSPE,
+                                    FeatureFullFP16, FeatureFP16FML, FeatureDotProd];
+#ifdef ENABLE_AARCH64_HIP09
+  list<SubtargetFeature> HIP09 = [HasV8_5aOps, FeatureBF16, FeatureCrypto, FeatureFPARMv8,
+                                  FeatureMatMulInt8, FeatureMatMulFP32, FeatureMatMulFP64,
+                                  FeatureNEON, FeaturePerfMon, FeatureRandGen, FeatureSPE,
+                                  FeatureFullFP16, FeatureFP16FML, FeatureDotProd,
+                                  FeatureJS, FeatureComplxNum, FeatureSHA3, FeatureSM4,
+                                  FeatureSVE];
+#endif
+   list<SubtargetFeature> Ampere1 = [HasV8_6aOps, FeatureNEON, FeaturePerfMon,
+                                     FeatureSSBS, FeatureRandGen, FeatureSB,
+                                     FeatureSHA2, FeatureSHA3, FeatureAES];
+@@ -1464,8 +1493,15 @@ def : ProcessorModel<"thunderx2t99", ThunderX2T99Model,
+ // Marvell ThunderX3T110 Processors.
+ def : ProcessorModel<"thunderx3t110", ThunderX3T110Model,
+                      ProcessorFeatures.ThunderX3T110, [TuneThunderX3T110]>;
+
+// HiSilicon Processors.
+ def : ProcessorModel<"tsv110", TSV110Model, ProcessorFeatures.TSV110,
+                      [TuneTSV110]>;
+#ifdef ENABLE_AARCH64_HIP09
+// FIXME: HiSilicon HIP09 is currently modeled as a Cortex-A57.
+def : ProcessorModel<"hip09", CortexA57Model, ProcessorFeatures.HIP09,
+                     [TuneHIP09]>;
+#endif
+ 
+ // Support cyclone as an alias for apple-a7 so we can still LTO old bitcode.
+ def : ProcessorModel<"cyclone", CycloneModel, ProcessorFeatures.AppleA7,
+diff --git a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
+index 05d60872b..4963ec350 100644
+--- a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
+++ b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
+@@ -51,6 +51,12 @@ static bool isArithmeticBccPair(const MachineInstr *FirstMI,
+   case AArch64::SUBSXrr:
+   case AArch64::BICSWrr:
+   case AArch64::BICSXrr:
+#if defined(ENABLE_AARCH64_HIP09)
+  case AArch64::ADCSWr:
+  case AArch64::ADCSXr:
+  case AArch64::SBCSWr:
+  case AArch64::SBCSXr:
+#endif
+     return true;
+   case AArch64::ADDSWrs:
+   case AArch64::ADDSXrs:
+@@ -183,6 +189,20 @@ static bool isLiteralsPair(const MachineInstr *FirstMI,
+       SecondMI.getOperand(3).getImm() == 16))
+     return true;
+ 
+#if defined(ENABLE_AARCH64_HIP09)
+  // 32 bit immediate.
+  if ((FirstMI == nullptr || FirstMI->getOpcode() == AArch64::MOVNWi) &&
+      (SecondMI.getOpcode() == AArch64::MOVKWi &&
+       SecondMI.getOperand(3).getImm() == 16))
+    return true;
+
+  // Lower half of 64 bit immediate.
+  if ((FirstMI == nullptr || FirstMI->getOpcode() == AArch64::MOVNXi) &&
+      (SecondMI.getOpcode() == AArch64::MOVKWi &&
+       SecondMI.getOperand(3).getImm() == 16))
+    return true;
+#endif
+
+   // Upper half of 64 bit immediate.
+   if ((FirstMI == nullptr ||
+        (FirstMI->getOpcode() == AArch64::MOVKXi &&
+@@ -437,6 +457,37 @@ static bool isAddSub2RegAndConstOnePair(const MachineInstr *FirstMI,
+   return false;
+ }
+ 
+#if defined(ENABLE_AARCH64_HIP09)
+static bool isMvnClzPair(const MachineInstr *FirstMI,
+                         const MachineInstr &SecondMI) {
+  // HIP09 supports fusion of MVN + CLZ.
+  // The CLZ can be fused with MVN and make execution faster.
+  // And the fusion is not allowed for shifted forms.
+  //
+  // Instruction alias info:
+  // 1. MVN <Wd>, <Wm>{, <shift> #<amount>} is equivalent to
+  //    ORN <Wd>, WZR, <Wm>{, <shift> #<amount>}
+  // 2. MVN <Xd>, <Xm>{, <shift> #<amount>} is equivalent to
+  //    ORN <Xd>, XZR, <Xm>{, <shift> #<amount>}
+  // Assume the 1st instr to be a wildcard if it is unspecified.
+  if ((FirstMI == nullptr ||
+       ((FirstMI->getOpcode() == AArch64::ORNWrs) &&
+        (FirstMI->getOperand(1).getReg() == AArch64::WZR) &&
+        (!AArch64InstrInfo::hasShiftedReg(*FirstMI)))) &&
+      (SecondMI.getOpcode() == AArch64::CLZWr))
+    return true;
+
+  if ((FirstMI == nullptr ||
+       ((FirstMI->getOpcode() == AArch64::ORNXrs) &&
+        (FirstMI->getOperand(1).getReg() == AArch64::XZR) &&
+        (!AArch64InstrInfo::hasShiftedReg(*FirstMI)))) &&
+      (SecondMI.getOpcode() == AArch64::CLZXr))
+    return true;
+
+  return false;
+}
+#endif
+
+ /// \brief Check if the instr pair, FirstMI and SecondMI, should be fused
+ /// together. Given SecondMI, when FirstMI is unspecified, then check if
+ /// SecondMI may be part of a fused pair at all.
+@@ -472,6 +523,10 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
+   if (ST.hasFuseAddSub2RegAndConstOne() &&
+       isAddSub2RegAndConstOnePair(FirstMI, SecondMI))
+     return true;
+#if defined(ENABLE_AARCH64_HIP09)
+  if (ST.hasFuseMvnClz() && isMvnClzPair(FirstMI, SecondMI))
+    return true;
+#endif
+ 
+   return false;
+ }
+diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+index 450e27b8a..ddf22364c 100644
+--- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+@@ -266,6 +266,15 @@ void AArch64Subtarget::initializeProperties() {
+     PrefFunctionAlignment = Align(16);
+     PrefLoopAlignment = Align(4);
+     break;
+#if defined(ENABLE_AARCH64_HIP09)
+  case HIP09:
+    CacheLineSize = 64;
+    PrefFunctionAlignment = Align(16);
+    PrefLoopAlignment = Align(4);
+    VScaleForTuning = 2;
+    DefaultSVETFOpts = TailFoldingOpts::Simple;
+    break;
+#endif
+   case ThunderX3T110:
+     CacheLineSize = 64;
+     PrefFunctionAlignment = Align(16);
+diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
+index 5e20d1646..5f481f4f9 100644
+--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
+@@ -87,7 +87,10 @@ public:
+     ThunderXT83,
+     ThunderXT88,
+     ThunderX3T110,
+-    TSV110
+    TSV110,
+#if defined(ENABLE_AARCH64_HIP09)
+    HIP09
+#endif
+   };
+ 
+ protected:
+@@ -239,7 +242,11 @@ public:
+   bool hasFusion() const {
+     return hasArithmeticBccFusion() || hasArithmeticCbzFusion() ||
+            hasFuseAES() || hasFuseArithmeticLogic() || hasFuseCCSelect() ||
+#if defined(ENABLE_AARCH64_HIP09)
+           hasFuseAdrpAdd() || hasFuseLiterals() || hasFuseMvnClz();
+#else
+            hasFuseAdrpAdd() || hasFuseLiterals();
+#endif
+   }
+ 
+   unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
+diff --git a/llvm/lib/Target/CMakeLists.txt b/llvm/lib/Target/CMakeLists.txt
+index 2739233f9..501ce1f2f 100644
+--- a/llvm/lib/Target/CMakeLists.txt
+++ b/llvm/lib/Target/CMakeLists.txt
+@@ -2,6 +2,10 @@ list(APPEND LLVM_COMMON_DEPENDS intrinsics_gen)
+ 
+ list(APPEND LLVM_TABLEGEN_FLAGS -I ${LLVM_MAIN_SRC_DIR}/lib/Target)
+ 
+if(LLVM_ENABLE_AARCH64_HIP09)
+  list(APPEND LLVM_TABLEGEN_FLAGS "-DENABLE_AARCH64_HIP09")
+endif()
+
+ add_llvm_component_library(LLVMTarget
+   Target.cpp
+   TargetIntrinsicInfo.cpp
+diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp
+index d11dc605e..8b23be02e 100644
+--- a/llvm/lib/TargetParser/Host.cpp
+++ b/llvm/lib/TargetParser/Host.cpp
+@@ -257,6 +257,9 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
+     // contents are specified in the various processor manuals.
+     return StringSwitch<const char *>(Part)
+       .Case("0xd01", "tsv110")
+#if defined(ENABLE_AARCH64_HIP09)
+      .Case("0xd02", "hip09")
+#endif
+       .Default("generic");
+ 
+   if (Implementer == "0x51") // Qualcomm Technologies, Inc.
+diff --git a/llvm/test/CodeGen/AArch64/cpus-hip09.ll b/llvm/test/CodeGen/AArch64/cpus-hip09.ll
+new file mode 100644
+index 000000000..dcf32e4dc
+--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/cpus-hip09.ll
+@@ -0,0 +1,11 @@
+; REQUIRES: enable_enable_aarch64_hip09
+; This tests that llc accepts all valid AArch64 CPUs
+
+; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=hip09 2>&1 | FileCheck %s
+
+; CHECK-NOT: {{.*}}  is not a recognized processor for this target
+; INVALID: {{.*}}  is not a recognized processor for this target
+
+define i32 @f(i64 %z) {
+	ret i32 0
+}
+diff --git a/llvm/test/CodeGen/AArch64/macro-fusion-mvnclz.mir b/llvm/test/CodeGen/AArch64/macro-fusion-mvnclz.mir
+new file mode 100644
+index 000000000..64bf15937
+--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/macro-fusion-mvnclz.mir
+@@ -0,0 +1,20 @@
+# REQUIRES: enable_enable_aarch64_hip09
+# RUN: llc -o - %s -mtriple=aarch64-- -mattr=+fuse-mvn-clz -run-pass postmisched | FileCheck %s --check-prefixes=CHECK,FUSION
+# RUN: llc -o - %s -mtriple=aarch64-- -mattr=-fuse-mvn-clz -run-pass postmisched | FileCheck %s --check-prefixes=CHECK,NOFUSION
+---
+# CHECK-LABEL: name: fuse-mvn-clz
+# CHECK: $w2 = ORNWrs $wzr, $w1, 0
+# FUSION: $w0 = CLZWr killed renamable $w2
+# CHECK: $w3 = ADDWri killed renamable $w1, 1, 0
+# NOFUSION: $w0 = CLZWr killed renamable $w2
+name: fuse-mvn-clz
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $w0, $w1, $w2, $w3
+
+    $w2 = ORNWrs $wzr, $w1, 0
+    $w3 = ADDWri killed renamable $w1, 1, 0 
+    $w0 = CLZWr killed renamable $w2
+    RET undef $lr, implicit $w0
+...
+diff --git a/llvm/test/CodeGen/AArch64/misched-fusion-lit-hip09.ll b/llvm/test/CodeGen/AArch64/misched-fusion-lit-hip09.ll
+new file mode 100644
+index 000000000..d67fa5b43
+--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/misched-fusion-lit-hip09.ll
+@@ -0,0 +1,73 @@
+; REQUIRES: enable_enable_aarch64_hip09
+; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=hip09           | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSE-HIP09
+
+@g = common local_unnamed_addr global ptr null, align 8
+
+define dso_local ptr @litp(i32 %a, i32 %b) {
+entry:
+  %add = add nsw i32 %b, %a
+  %idx.ext = sext i32 %add to i64
+  %add.ptr = getelementptr i8, ptr @litp, i64 %idx.ext
+  store ptr %add.ptr, ptr @g, align 8
+  ret ptr %add.ptr
+
+; CHECK-LABEL: litp:
+; CHECK: adrp [[R:x[0-9]+]], litp
+; CHECKFUSE-NEXT: add {{x[0-9]+}}, [[R]], :lo12:litp
+}
+
+define dso_local ptr @litp_tune_generic(i32 %a, i32 %b) "tune-cpu"="generic" {
+entry:
+  %add = add nsw i32 %b, %a
+  %idx.ext = sext i32 %add to i64
+  %add.ptr = getelementptr i8, ptr @litp_tune_generic, i64 %idx.ext
+  store ptr %add.ptr, ptr @g, align 8
+  ret ptr %add.ptr
+
+; CHECK-LABEL: litp_tune_generic:
+; CHECK:         adrp [[R:x[0-9]+]], litp_tune_generic
+; CHECK-NEXT:    add {{x[0-9]+}}, [[R]], :lo12:litp_tune_generic
+}
+
+define dso_local i32 @liti(i32 %a, i32 %b) {
+entry:
+  %add = add i32 %a, -262095121
+  %add1 = add i32 %add, %b
+  ret i32 %add1
+
+; CHECK-LABEL: liti:
+; CHECK: mov [[R:w[0-9]+]], {{#[0-9]+}}
+; CHECKDONT-NEXT: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+; CHECKFUSE-NEXT: movk [[R]], {{#[0-9]+}}, lsl #16
+; CHECKFUSE-HIP09: movk [[R]], {{#[0-9]+}}, lsl #16
+}
+
+; Function Attrs: norecurse nounwind readnone
+define dso_local i64 @litl(i64 %a, i64 %b) {
+entry:
+  %add = add i64 %a, 2208998440489107183
+  %add1 = add i64 %add, %b
+  ret i64 %add1
+
+; CHECK-LABEL: litl:
+; CHECK: mov [[R:x[0-9]+]], {{#[0-9]+}}
+; CHECKDONT-NEXT: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+; CHECK-NEXT: movk [[R]], {{#[0-9]+}}, lsl #16
+; CHECK: movk [[R]], {{#[0-9]+}}, lsl #32
+; CHECK-NEXT: movk [[R]], {{#[0-9]+}}, lsl #48
+}
+
+; Function Attrs: norecurse nounwind readnone
+define dso_local double @litf() {
+entry:
+  ret double 0x400921FB54442D18
+
+; CHECK-LABEL: litf:
+; CHECK-DONT:      adrp [[ADDR:x[0-9]+]], [[CSTLABEL:.LCP.*]]
+; CHECK-DONT-NEXT: ldr  {{d[0-9]+}}, {{[[]}}[[ADDR]], :lo12:[[CSTLABEL]]{{[]]}}
+; CHECKFUSE-HIP09:    mov  [[R:x[0-9]+]], #11544
+; CHECKFUSE-HIP09:    movk [[R]], #21572, lsl #16
+; CHECKFUSE-HIP09:    movk [[R]], #8699, lsl #32
+; CHECKFUSE-HIP09:    movk [[R]], #16393, lsl #48
+; CHECKFUSE-HIP09:    fmov {{d[0-9]+}}, [[R]]
+}
+diff --git a/llvm/test/CodeGen/AArch64/remat-hip09.ll b/llvm/test/CodeGen/AArch64/remat-hip09.ll
+new file mode 100644
+index 000000000..aec0d18ae
+--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/remat-hip09.ll
+@@ -0,0 +1,18 @@
+; REQUIRES: enable_enable_aarch64_hip09
+; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=hip09 -o - %s | FileCheck %s
+
+%X = type { i64, i64, i64 }
+declare void @f(ptr)
+define void @t() {
+entry:
+  %tmp = alloca %X
+  call void @f(ptr %tmp)
+; CHECK: add x0, sp, #8
+; CHECK-NOT: mov
+; CHECK-NEXT: bl f
+  call void @f(ptr %tmp)
+; CHECK: add x0, sp, #8
+; CHECK-NOT: mov
+; CHECK-NEXT: bl f
+  ret void
+}
+diff --git a/llvm/test/lit.site.cfg.py.in b/llvm/test/lit.site.cfg.py.in
+index 20c1ecca1..6145a514f 100644
+--- a/llvm/test/lit.site.cfg.py.in
+++ b/llvm/test/lit.site.cfg.py.in
+@@ -64,9 +64,13 @@ config.have_llvm_driver = @LLVM_TOOL_LLVM_DRIVER_BUILD@
+ config.use_classic_flang = @LLVM_ENABLE_CLASSIC_FLANG@
+ config.enable_enable_autotuner = @LLVM_ENABLE_AUTOTUNER@
+config.enable_enable_aarch64_hip09 = @LLVM_ENABLE_AARCH64_HIP09@
+ 
+ import lit.llvm
+ lit.llvm.initialize(lit_config, config)
+ 
+if config.enable_enable_aarch64_hip09:
+    config.available_features.add("enable_enable_aarch64_hip09")
+
+ # Let the main config do the real work.
+ lit_config.load_config(
+     config, os.path.join(config.llvm_src_root, "test/lit.cfg.py"))
+diff --git a/llvm/unittests/TargetParser/Host.cpp b/llvm/unittests/TargetParser/Host.cpp
+index 452d0326c..4b4c81514 100644
+--- a/llvm/unittests/TargetParser/Host.cpp
+++ b/llvm/unittests/TargetParser/Host.cpp
+@@ -250,6 +250,11 @@ CPU part	: 0x0a1
+   EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x48\n"
+                                               "CPU part        : 0xd01"),
+             "tsv110");
+#if defined(ENABLE_AARCH64_HIP09)
+  EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x48\n"
+                                              "CPU part        : 0xd02"),
+            "hip09");
+#endif
+ 
+   // Verify A64FX.
+   const std::string A64FXProcCpuInfo = R"(
+diff --git a/llvm/unittests/TargetParser/TargetParserTest.cpp b/llvm/unittests/TargetParser/TargetParserTest.cpp
+index 741d5a2d4..94e0047e5 100644
+--- a/llvm/unittests/TargetParser/TargetParserTest.cpp
+++ b/llvm/unittests/TargetParser/TargetParserTest.cpp
+@@ -1421,6 +1421,18 @@ INSTANTIATE_TEST_SUITE_P(
+                              AArch64::AEK_PROFILE | AArch64::AEK_FP16 |
+                              AArch64::AEK_FP16FML | AArch64::AEK_DOTPROD,
+                          "8.2-A"),
+#if defined(ENABLE_AARCH64_HIP09)
+        ARMCPUTestParams(
+            "hip09", "armv8.5-a", "crypto-neon-fp-armv8",
+            AArch64::AEK_CRC | AArch64::AEK_FP | AArch64::AEK_SIMD |
+                AArch64::AEK_RAS | AArch64::AEK_LSE | AArch64::AEK_RDM |
+                AArch64::AEK_RCPC | AArch64::AEK_DOTPROD | AArch64::AEK_AES |
+                AArch64::AEK_SM4 | AArch64::AEK_SHA2 | AArch64::AEK_SHA3 |
+                AArch64::AEK_FP16 | AArch64::AEK_PROFILE |
+                AArch64::AEK_FP16FML | AArch64::AEK_SVE | AArch64::AEK_I8MM |
+                AArch64::AEK_F32MM | AArch64::AEK_F64MM | AArch64::AEK_BF16,
+            "8.5-A"),
+#endif
+         ARMCPUTestParams("a64fx", "armv8.2-a", "crypto-neon-fp-armv8",
+                          AArch64::AEK_CRC | AArch64::AEK_AES |
+                              AArch64::AEK_SHA2 | AArch64::AEK_FP |
+@@ -1437,7 +1449,11 @@ INSTANTIATE_TEST_SUITE_P(
+                          "8.2-A")));
+ 
+ // Note: number of CPUs includes aliases.
+#if defined(ENABLE_AARCH64_HIP09)
+static constexpr unsigned NumAArch64CPUArchs = 63;
+#else
+ static constexpr unsigned NumAArch64CPUArchs = 62;
+#endif
+ 
+ TEST(TargetParserTest, testAArch64CPUArchList) {
+   SmallVector<StringRef, NumAArch64CPUArchs> List;
+-- 
+2.19.1
+
--- a/0024-Backport-LoongArch-fix-and-add-some-new-support.patch
+++ b/0024-Backport-LoongArch-fix-and-add-some-new-support.patch
--- a/0025-Backport-Simple-check-to-ignore-Inline-asm-fwait-insertion.patch
+++ b/0025-Backport-Simple-check-to-ignore-Inline-asm-fwait-insertion.patch
@ -0,0 +1,30 @@
+From cf9d549f2c40d548587f8d2d3cda0d32f13c9256 Mon Sep 17 00:00:00 2001
+From: Temperatureblock <102174059+Temperature-block@users.noreply.github.com>
+Date: Mon, 12 Aug 2024 20:06:58 +0530
+Subject: [PATCH] Simple check to ignore Inline asm fwait insertion (#101686)
+
+Just a simple check to ignore Inline asm fwait insertion
+
+Fixes #101613
+---
+ llvm/lib/Target/X86/X86InstrInfo.cpp | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
+index 10a0ccdcb023..e615fa09608c 100644
+--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
+@@ -2947,6 +2947,11 @@ static bool isX87Reg(unsigned Reg) {
+ 
+ /// check if the instruction is X87 instruction
+ bool X86::isX87Instruction(MachineInstr &MI) {
+  // Call and inlineasm defs X87 register, so we special case it here because
+  // otherwise calls are incorrectly flagged as x87 instructions
+  // as a result.
+  if (MI.isInlineAsm())
+    return false;
+   for (const MachineOperand &MO : MI.operands()) {
+     if (!MO.isReg())
+       continue;
+-- 
+Gitee
--- a/0026-Add-arch-restriction-for-BiSheng-Autotuner.patch
+++ b/0026-Add-arch-restriction-for-BiSheng-Autotuner.patch
@ -0,0 +1,24 @@
+From 2513e90fd317bbe5854a06213e43cdf7029c3ee2 Mon Sep 17 00:00:00 2001
+From: liyunfei <liyunfei33@huawei.com>
+Date: Tue, 5 Nov 2024 18:18:19 +0800
+Subject: [PATCH] Add arch restriction for BiSheng Autotuner
+
+BiSheng Autotuner only support x86_64 and aarch64 temporarily.
+
+Signed-off-by: liyunfei <liyunfei33@huawei.com>
+---
+ llvm/test/AutoTuning/lit.local.cfg | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/llvm/test/AutoTuning/lit.local.cfg b/llvm/test/AutoTuning/lit.local.cfg
+index 13b4927257ab..c48c2c9eab6f 100644
+--- a/llvm/test/AutoTuning/lit.local.cfg
+++ b/llvm/test/AutoTuning/lit.local.cfg
+@@ -1,2 +1,4 @@
+ if not config.enable_enable_autotuner:
+     config.unsupported = True
+if config.host_arch not in ["x86", "X86", 'x86_64', 'aarch64']:
+    config.unsupported = True
+\ No newline at end of file
+-- 
+Gitee
--- a/0027-AArch64-Delete-hip09-macro.patch
+++ b/0027-AArch64-Delete-hip09-macro.patch
@ -0,0 +1,514 @@
+From 42b0d16ab1ced5720e017fa9f6059c32489ab1bd Mon Sep 17 00:00:00 2001
+From: xiajingze <xiajingze1@huawei.com>
+Date: Wed, 9 Oct 2024 17:13:49 +0800
+Subject: [PATCH] [AArch64] Delete hip09 macro
+
+Signed-off-by: xiajingze <xiajingze1@huawei.com>
+---
+ llvm/cmake/modules/HandleLLVMOptions.cmake    |  8 --
+ .../llvm/TargetParser/AArch64TargetParser.h   |  2 -
+ llvm/lib/Target/AArch64/AArch64.td            |  8 --
+ .../lib/Target/AArch64/AArch64MacroFusion.cpp |  8 --
+ llvm/lib/Target/AArch64/AArch64Subtarget.cpp  |  2 -
+ llvm/lib/Target/AArch64/AArch64Subtarget.h    |  6 --
+ llvm/lib/Target/CMakeLists.txt                |  4 -
+ llvm/lib/TargetParser/Host.cpp                |  2 -
+ llvm/test/CodeGen/AArch64/cpus-hip09.ll       | 11 ---
+ llvm/test/CodeGen/AArch64/cpus.ll             |  1 +
+ .../CodeGen/AArch64/macro-fusion-mvnclz.mir   |  1 -
+ .../AArch64/misched-fusion-lit-hip09.ll       | 73 --------------
+ .../CodeGen/AArch64/misched-fusion-lit.ll     |  7 ++
+ llvm/test/CodeGen/AArch64/remat-hip09.ll      | 18 ----
+ llvm/test/CodeGen/AArch64/remat.ll            |  1 +
+ llvm/test/lit.site.cfg.py.in                  |  4 -
+ llvm/unittests/TargetParser/Host.cpp          |  2 -
+ .../TargetParser/TargetParserTest.cpp         |  6 --
+ 18 files changed, 9 insertions(+), 155 deletions(-)
+ delete mode 100644 llvm/test/CodeGen/AArch64/cpus-hip09.ll
+ delete mode 100644 llvm/test/CodeGen/AArch64/misched-fusion-lit-hip09.ll
+ delete mode 100644 llvm/test/CodeGen/AArch64/remat-hip09.ll
+
+diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake
+index 74e68e25d85c..8be5d4ba52c2 100644
+--- a/llvm/cmake/modules/HandleLLVMOptions.cmake
+++ b/llvm/cmake/modules/HandleLLVMOptions.cmake
+@@ -112,14 +112,6 @@ else()
+   set(LLVM_ENABLE_AUTOTUNER 0)
+ endif()
+ 
+-option(LLVM_ENABLE_AARCH64_HIP09 "Enable HIP09 Processor" ON)
+-if(LLVM_ENABLE_AARCH64_HIP09)
+-  set(LLVM_ENABLE_AARCH64_HIP09 1)
+-  add_definitions( -DENABLE_AARCH64_HIP09 )
+-else()
+-  set(LLVM_ENABLE_AARCH64_HIP09 0)
+-endif()
+-
+ if(LLVM_ENABLE_EXPENSIVE_CHECKS)
+   add_compile_definitions(EXPENSIVE_CHECKS)
+ 
+diff --git a/llvm/include/llvm/TargetParser/AArch64TargetParser.h b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
+index 07cd2fcbb68d..8b25cce0abdc 100644
+--- a/llvm/include/llvm/TargetParser/AArch64TargetParser.h
+++ b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
+@@ -542,13 +542,11 @@ inline constexpr CpuInfo CpuInfos[] = {
+      (AArch64::AEK_FP16 | AArch64::AEK_RAND | AArch64::AEK_SM4 |
+       AArch64::AEK_SHA3 | AArch64::AEK_SHA2 | AArch64::AEK_AES |
+       AArch64::AEK_MTE | AArch64::AEK_SB | AArch64::AEK_SSBS)},
+-#if defined(ENABLE_AARCH64_HIP09)
+     {"hip09", ARMV8_5A,
+      (AArch64::AEK_AES | AArch64::AEK_SM4 | AArch64::AEK_SHA2 |
+       AArch64::AEK_SHA3 | AArch64::AEK_FP16 | AArch64::AEK_PROFILE |
+       AArch64::AEK_FP16FML | AArch64::AEK_SVE | AArch64::AEK_I8MM |
+       AArch64::AEK_F32MM | AArch64::AEK_F64MM | AArch64::AEK_BF16)},
+-#endif
+ };
+ 
+ // An alias for a CPU.
+diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td
+index c8bfd770f55f..fdb931a0fe6c 100644
+--- a/llvm/lib/Target/AArch64/AArch64.td
+++ b/llvm/lib/Target/AArch64/AArch64.td
+@@ -296,11 +296,9 @@ def FeatureFuseAddSub2RegAndConstOne : SubtargetFeature<
+    "fuse-addsub-2reg-const1", "HasFuseAddSub2RegAndConstOne", "true",
+    "CPU fuses (a + b + 1) and (a - b - 1)">;
+ 
+-#ifdef ENABLE_AARCH64_HIP09
+ def FeatureFuseMvnClz : SubtargetFeature<
+     "fuse-mvn-clz", "HasFuseMvnClz", "true",
+     "CPU fuses mvn+clz operations">;
+-#endif
+ 
+ def FeatureDisableLatencySchedHeuristic : SubtargetFeature<
+     "disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true",
+@@ -1211,7 +1209,6 @@ def TuneTSV110 : SubtargetFeature<"tsv110", "ARMProcFamily", "TSV110",
+                                   FeatureFuseAES,
+                                   FeaturePostRAScheduler]>;
+ 
+-#ifdef ENABLE_AARCH64_HIP09
+ def TuneHIP09 : SubtargetFeature<"hip09", "ARMProcFamily", "HIP09",
+                                    "HiSilicon HIP-09 processors", [
+                                    FeatureCustomCheapAsMoveHandling,
+@@ -1224,7 +1221,6 @@ def TuneHIP09 : SubtargetFeature<"hip09", "ARMProcFamily", "HIP09",
+                                    FeatureFuseLiterals,
+                                    FeatureFuseMvnClz,
+                                    FeaturePostRAScheduler]>;
+-#endif
+ 
+ def TuneAmpere1 : SubtargetFeature<"ampere1", "ARMProcFamily", "Ampere1",
+                                    "Ampere Computing Ampere-1 processors", [
+@@ -1380,14 +1376,12 @@ def ProcessorFeatures {
+   list<SubtargetFeature> TSV110 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8,
+                                    FeatureNEON, FeaturePerfMon, FeatureSPE,
+                                    FeatureFullFP16, FeatureFP16FML, FeatureDotProd];
+-#ifdef ENABLE_AARCH64_HIP09
+   list<SubtargetFeature> HIP09 = [HasV8_5aOps, FeatureBF16, FeatureCrypto, FeatureFPARMv8,
+                                   FeatureMatMulInt8, FeatureMatMulFP32, FeatureMatMulFP64,
+                                   FeatureNEON, FeaturePerfMon, FeatureRandGen, FeatureSPE,
+                                   FeatureFullFP16, FeatureFP16FML, FeatureDotProd,
+                                   FeatureJS, FeatureComplxNum, FeatureSHA3, FeatureSM4,
+                                   FeatureSVE];
+-#endif
+   list<SubtargetFeature> Ampere1 = [HasV8_6aOps, FeatureNEON, FeaturePerfMon,
+                                     FeatureSSBS, FeatureRandGen, FeatureSB,
+                                     FeatureSHA2, FeatureSHA3, FeatureAES];
+@@ -1497,11 +1491,9 @@ def : ProcessorModel<"thunderx3t110", ThunderX3T110Model,
+ // HiSilicon Processors.
+ def : ProcessorModel<"tsv110", TSV110Model, ProcessorFeatures.TSV110,
+                      [TuneTSV110]>;
+-#ifdef ENABLE_AARCH64_HIP09
+ // FIXME: HiSilicon HIP09 is currently modeled as a Cortex-A57.
+ def : ProcessorModel<"hip09", CortexA57Model, ProcessorFeatures.HIP09,
+                      [TuneHIP09]>;
+-#endif
+ 
+ // Support cyclone as an alias for apple-a7 so we can still LTO old bitcode.
+ def : ProcessorModel<"cyclone", CycloneModel, ProcessorFeatures.AppleA7,
+diff --git a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
+index 4963ec350db2..44daa06468c5 100644
+--- a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
+++ b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
+@@ -51,12 +51,10 @@ static bool isArithmeticBccPair(const MachineInstr *FirstMI,
+   case AArch64::SUBSXrr:
+   case AArch64::BICSWrr:
+   case AArch64::BICSXrr:
+-#if defined(ENABLE_AARCH64_HIP09)
+   case AArch64::ADCSWr:
+   case AArch64::ADCSXr:
+   case AArch64::SBCSWr:
+   case AArch64::SBCSXr:
+-#endif
+     return true;
+   case AArch64::ADDSWrs:
+   case AArch64::ADDSXrs:
+@@ -189,7 +187,6 @@ static bool isLiteralsPair(const MachineInstr *FirstMI,
+       SecondMI.getOperand(3).getImm() == 16))
+     return true;
+ 
+-#if defined(ENABLE_AARCH64_HIP09)
+   // 32 bit immediate.
+   if ((FirstMI == nullptr || FirstMI->getOpcode() == AArch64::MOVNWi) &&
+       (SecondMI.getOpcode() == AArch64::MOVKWi &&
+@@ -201,7 +198,6 @@ static bool isLiteralsPair(const MachineInstr *FirstMI,
+       (SecondMI.getOpcode() == AArch64::MOVKWi &&
+        SecondMI.getOperand(3).getImm() == 16))
+     return true;
+-#endif
+ 
+   // Upper half of 64 bit immediate.
+   if ((FirstMI == nullptr ||
+@@ -457,7 +453,6 @@ static bool isAddSub2RegAndConstOnePair(const MachineInstr *FirstMI,
+   return false;
+ }
+ 
+-#if defined(ENABLE_AARCH64_HIP09)
+ static bool isMvnClzPair(const MachineInstr *FirstMI,
+                          const MachineInstr &SecondMI) {
+   // HIP09 supports fusion of MVN + CLZ.
+@@ -486,7 +481,6 @@ static bool isMvnClzPair(const MachineInstr *FirstMI,
+ 
+   return false;
+ }
+-#endif
+ 
+ /// \brief Check if the instr pair, FirstMI and SecondMI, should be fused
+ /// together. Given SecondMI, when FirstMI is unspecified, then check if
+@@ -523,10 +517,8 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
+   if (ST.hasFuseAddSub2RegAndConstOne() &&
+       isAddSub2RegAndConstOnePair(FirstMI, SecondMI))
+     return true;
+-#if defined(ENABLE_AARCH64_HIP09)
+   if (ST.hasFuseMvnClz() && isMvnClzPair(FirstMI, SecondMI))
+     return true;
+-#endif
+ 
+   return false;
+ }
+diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+index ddf22364c78e..1aff7e30a0cf 100644
+--- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+@@ -266,7 +266,6 @@ void AArch64Subtarget::initializeProperties() {
+     PrefFunctionAlignment = Align(16);
+     PrefLoopAlignment = Align(4);
+     break;
+-#if defined(ENABLE_AARCH64_HIP09)
+   case HIP09:
+     CacheLineSize = 64;
+     PrefFunctionAlignment = Align(16);
+@@ -274,7 +273,6 @@ void AArch64Subtarget::initializeProperties() {
+     VScaleForTuning = 2;
+     DefaultSVETFOpts = TailFoldingOpts::Simple;
+     break;
+-#endif
+   case ThunderX3T110:
+     CacheLineSize = 64;
+     PrefFunctionAlignment = Align(16);
+diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
+index 5f481f4f976a..8a1cebe96894 100644
+--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
+@@ -88,9 +88,7 @@ public:
+     ThunderXT88,
+     ThunderX3T110,
+     TSV110,
+-#if defined(ENABLE_AARCH64_HIP09)
+     HIP09
+-#endif
+   };
+ 
+ protected:
+@@ -242,11 +240,7 @@ public:
+   bool hasFusion() const {
+     return hasArithmeticBccFusion() || hasArithmeticCbzFusion() ||
+            hasFuseAES() || hasFuseArithmeticLogic() || hasFuseCCSelect() ||
+-#if defined(ENABLE_AARCH64_HIP09)
+            hasFuseAdrpAdd() || hasFuseLiterals() || hasFuseMvnClz();
+-#else
+-           hasFuseAdrpAdd() || hasFuseLiterals();
+-#endif
+   }
+ 
+   unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
+diff --git a/llvm/lib/Target/CMakeLists.txt b/llvm/lib/Target/CMakeLists.txt
+index 501ce1f2fe53..2739233f9ccb 100644
+--- a/llvm/lib/Target/CMakeLists.txt
+++ b/llvm/lib/Target/CMakeLists.txt
+@@ -2,10 +2,6 @@ list(APPEND LLVM_COMMON_DEPENDS intrinsics_gen)
+ 
+ list(APPEND LLVM_TABLEGEN_FLAGS -I ${LLVM_MAIN_SRC_DIR}/lib/Target)
+ 
+-if(LLVM_ENABLE_AARCH64_HIP09)
+-  list(APPEND LLVM_TABLEGEN_FLAGS "-DENABLE_AARCH64_HIP09")
+-endif()
+-
+ add_llvm_component_library(LLVMTarget
+   Target.cpp
+   TargetIntrinsicInfo.cpp
+diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp
+index 8b23be02edc0..8b1191a5b442 100644
+--- a/llvm/lib/TargetParser/Host.cpp
+++ b/llvm/lib/TargetParser/Host.cpp
+@@ -257,9 +257,7 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
+     // contents are specified in the various processor manuals.
+     return StringSwitch<const char *>(Part)
+       .Case("0xd01", "tsv110")
+-#if defined(ENABLE_AARCH64_HIP09)
+       .Case("0xd02", "hip09")
+-#endif
+       .Default("generic");
+ 
+   if (Implementer == "0x51") // Qualcomm Technologies, Inc.
+diff --git a/llvm/test/CodeGen/AArch64/cpus-hip09.ll b/llvm/test/CodeGen/AArch64/cpus-hip09.ll
+deleted file mode 100644
+index dcf32e4dca89..000000000000
+--- a/llvm/test/CodeGen/AArch64/cpus-hip09.ll
+++ /dev/null
+@@ -1,11 +0,0 @@
+-; REQUIRES: enable_enable_aarch64_hip09
+-; This tests that llc accepts all valid AArch64 CPUs
+-
+-; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=hip09 2>&1 | FileCheck %s
+-
+-; CHECK-NOT: {{.*}}  is not a recognized processor for this target
+-; INVALID: {{.*}}  is not a recognized processor for this target
+-
+-define i32 @f(i64 %z) {
+-	ret i32 0
+-}
+diff --git a/llvm/test/CodeGen/AArch64/cpus.ll b/llvm/test/CodeGen/AArch64/cpus.ll
+index b24866064efa..56772f6c6049 100644
+--- a/llvm/test/CodeGen/AArch64/cpus.ll
+++ b/llvm/test/CodeGen/AArch64/cpus.ll
+@@ -33,6 +33,7 @@
+ ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=thunderx2t99 2>&1 | FileCheck %s
+ ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=thunderx3t110 2>&1 | FileCheck %s
+ ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=tsv110 2>&1 | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=hip09 2>&1 | FileCheck %s
+ ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=apple-latest 2>&1 | FileCheck %s
+ ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=a64fx 2>&1 | FileCheck %s
+ ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=ampere1 2>&1 | FileCheck %s
+diff --git a/llvm/test/CodeGen/AArch64/macro-fusion-mvnclz.mir b/llvm/test/CodeGen/AArch64/macro-fusion-mvnclz.mir
+index 64bf159370f9..26ba76ef0af5 100644
+--- a/llvm/test/CodeGen/AArch64/macro-fusion-mvnclz.mir
+++ b/llvm/test/CodeGen/AArch64/macro-fusion-mvnclz.mir
+@@ -1,4 +1,3 @@
+-# REQUIRES: enable_enable_aarch64_hip09
+ # RUN: llc -o - %s -mtriple=aarch64-- -mattr=+fuse-mvn-clz -run-pass postmisched | FileCheck %s --check-prefixes=CHECK,FUSION
+ # RUN: llc -o - %s -mtriple=aarch64-- -mattr=-fuse-mvn-clz -run-pass postmisched | FileCheck %s --check-prefixes=CHECK,NOFUSION
+ ---
+diff --git a/llvm/test/CodeGen/AArch64/misched-fusion-lit-hip09.ll b/llvm/test/CodeGen/AArch64/misched-fusion-lit-hip09.ll
+deleted file mode 100644
+index d67fa5b4374c..000000000000
+--- a/llvm/test/CodeGen/AArch64/misched-fusion-lit-hip09.ll
+++ /dev/null
+@@ -1,73 +0,0 @@
+-; REQUIRES: enable_enable_aarch64_hip09
+-; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=hip09           | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSE-HIP09
+-
+-@g = common local_unnamed_addr global ptr null, align 8
+-
+-define dso_local ptr @litp(i32 %a, i32 %b) {
+-entry:
+-  %add = add nsw i32 %b, %a
+-  %idx.ext = sext i32 %add to i64
+-  %add.ptr = getelementptr i8, ptr @litp, i64 %idx.ext
+-  store ptr %add.ptr, ptr @g, align 8
+-  ret ptr %add.ptr
+-
+-; CHECK-LABEL: litp:
+-; CHECK: adrp [[R:x[0-9]+]], litp
+-; CHECKFUSE-NEXT: add {{x[0-9]+}}, [[R]], :lo12:litp
+-}
+-
+-define dso_local ptr @litp_tune_generic(i32 %a, i32 %b) "tune-cpu"="generic" {
+-entry:
+-  %add = add nsw i32 %b, %a
+-  %idx.ext = sext i32 %add to i64
+-  %add.ptr = getelementptr i8, ptr @litp_tune_generic, i64 %idx.ext
+-  store ptr %add.ptr, ptr @g, align 8
+-  ret ptr %add.ptr
+-
+-; CHECK-LABEL: litp_tune_generic:
+-; CHECK:         adrp [[R:x[0-9]+]], litp_tune_generic
+-; CHECK-NEXT:    add {{x[0-9]+}}, [[R]], :lo12:litp_tune_generic
+-}
+-
+-define dso_local i32 @liti(i32 %a, i32 %b) {
+-entry:
+-  %add = add i32 %a, -262095121
+-  %add1 = add i32 %add, %b
+-  ret i32 %add1
+-
+-; CHECK-LABEL: liti:
+-; CHECK: mov [[R:w[0-9]+]], {{#[0-9]+}}
+-; CHECKDONT-NEXT: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+-; CHECKFUSE-NEXT: movk [[R]], {{#[0-9]+}}, lsl #16
+-; CHECKFUSE-HIP09: movk [[R]], {{#[0-9]+}}, lsl #16
+-}
+-
+-; Function Attrs: norecurse nounwind readnone
+-define dso_local i64 @litl(i64 %a, i64 %b) {
+-entry:
+-  %add = add i64 %a, 2208998440489107183
+-  %add1 = add i64 %add, %b
+-  ret i64 %add1
+-
+-; CHECK-LABEL: litl:
+-; CHECK: mov [[R:x[0-9]+]], {{#[0-9]+}}
+-; CHECKDONT-NEXT: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
+-; CHECK-NEXT: movk [[R]], {{#[0-9]+}}, lsl #16
+-; CHECK: movk [[R]], {{#[0-9]+}}, lsl #32
+-; CHECK-NEXT: movk [[R]], {{#[0-9]+}}, lsl #48
+-}
+-
+-; Function Attrs: norecurse nounwind readnone
+-define dso_local double @litf() {
+-entry:
+-  ret double 0x400921FB54442D18
+-
+-; CHECK-LABEL: litf:
+-; CHECK-DONT:      adrp [[ADDR:x[0-9]+]], [[CSTLABEL:.LCP.*]]
+-; CHECK-DONT-NEXT: ldr  {{d[0-9]+}}, {{[[]}}[[ADDR]], :lo12:[[CSTLABEL]]{{[]]}}
+-; CHECKFUSE-HIP09:    mov  [[R:x[0-9]+]], #11544
+-; CHECKFUSE-HIP09:    movk [[R]], #21572, lsl #16
+-; CHECKFUSE-HIP09:    movk [[R]], #8699, lsl #32
+-; CHECKFUSE-HIP09:    movk [[R]], #16393, lsl #48
+-; CHECKFUSE-HIP09:    fmov {{d[0-9]+}}, [[R]]
+-}
+diff --git a/llvm/test/CodeGen/AArch64/misched-fusion-lit.ll b/llvm/test/CodeGen/AArch64/misched-fusion-lit.ll
+index ad244d30df11..67cc7aa503b6 100644
+--- a/llvm/test/CodeGen/AArch64/misched-fusion-lit.ll
+++ b/llvm/test/CodeGen/AArch64/misched-fusion-lit.ll
+@@ -7,6 +7,7 @@
+ ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m4       | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSE
+ ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m5       | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSE
+ ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=neoverse-n1     | FileCheck %s --check-prefix=CHECKFUSE-NEOVERSE
+; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=hip09           | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSE-HIP09
+ 
+ @g = common local_unnamed_addr global ptr null, align 8
+ 
+@@ -59,6 +60,7 @@ entry:
+ ; CHECK: mov [[R:w[0-9]+]], {{#[0-9]+}}
+ ; CHECKDONT-NEXT: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+ ; CHECKFUSE-NEXT: movk [[R]], {{#[0-9]+}}, lsl #16
+; CHECKFUSE-HIP09: movk [[R]], {{#[0-9]+}}, lsl #16
+ }
+ 
+ ; Function Attrs: norecurse nounwind readnone
+@@ -89,4 +91,9 @@ entry:
+ ; CHECK-FUSE:      movk [[R]], #8699, lsl #32
+ ; CHECK-FUSE:      movk [[R]], #16393, lsl #48
+ ; CHECK-FUSE:      fmov {{d[0-9]+}}, [[R]]
+; CHECKFUSE-HIP09:    mov  [[R:x[0-9]+]], #11544
+; CHECKFUSE-HIP09:    movk [[R]], #21572, lsl #16
+; CHECKFUSE-HIP09:    movk [[R]], #8699, lsl #32
+; CHECKFUSE-HIP09:    movk [[R]], #16393, lsl #48
+; CHECKFUSE-HIP09:    fmov {{d[0-9]+}}, [[R]]
+ }
+diff --git a/llvm/test/CodeGen/AArch64/remat-hip09.ll b/llvm/test/CodeGen/AArch64/remat-hip09.ll
+deleted file mode 100644
+index aec0d18ae73f..000000000000
+--- a/llvm/test/CodeGen/AArch64/remat-hip09.ll
+++ /dev/null
+@@ -1,18 +0,0 @@
+-; REQUIRES: enable_enable_aarch64_hip09
+-; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=hip09 -o - %s | FileCheck %s
+-
+-%X = type { i64, i64, i64 }
+-declare void @f(ptr)
+-define void @t() {
+-entry:
+-  %tmp = alloca %X
+-  call void @f(ptr %tmp)
+-; CHECK: add x0, sp, #8
+-; CHECK-NOT: mov
+-; CHECK-NEXT: bl f
+-  call void @f(ptr %tmp)
+-; CHECK: add x0, sp, #8
+-; CHECK-NOT: mov
+-; CHECK-NEXT: bl f
+-  ret void
+-}
+diff --git a/llvm/test/CodeGen/AArch64/remat.ll b/llvm/test/CodeGen/AArch64/remat.ll
+index 483c4d71ee21..fa039246c7f5 100644
+--- a/llvm/test/CodeGen/AArch64/remat.ll
+++ b/llvm/test/CodeGen/AArch64/remat.ll
+@@ -22,6 +22,7 @@
+ ; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=kryo -o - %s | FileCheck %s
+ ; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=thunderx2t99 -o - %s | FileCheck %s
+ ; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=tsv110 -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=hip09 -o - %s | FileCheck %s
+ ; RUN: llc -mtriple=aarch64-linux-gnuabi -mattr=+custom-cheap-as-move -o - %s | FileCheck %s
+ ; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=thunderx3t110 -o - %s | FileCheck %s
+ ; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=ampere1 -o - %s | FileCheck %s
+diff --git a/llvm/test/lit.site.cfg.py.in b/llvm/test/lit.site.cfg.py.in
+index 6145a514f008..20c1ecca1d43 100644
+--- a/llvm/test/lit.site.cfg.py.in
+++ b/llvm/test/lit.site.cfg.py.in
+@@ -63,14 +63,10 @@ config.dxil_tests = @LLVM_INCLUDE_DXIL_TESTS@
+ config.have_llvm_driver = @LLVM_TOOL_LLVM_DRIVER_BUILD@
+ config.use_classic_flang = @LLVM_ENABLE_CLASSIC_FLANG@
+ config.enable_enable_autotuner = @LLVM_ENABLE_AUTOTUNER@
+-config.enable_enable_aarch64_hip09 = @LLVM_ENABLE_AARCH64_HIP09@
+ 
+ import lit.llvm
+ lit.llvm.initialize(lit_config, config)
+ 
+-if config.enable_enable_aarch64_hip09:
+-    config.available_features.add("enable_enable_aarch64_hip09")
+-
+ # Let the main config do the real work.
+ lit_config.load_config(
+     config, os.path.join(config.llvm_src_root, "test/lit.cfg.py"))
+diff --git a/llvm/unittests/TargetParser/Host.cpp b/llvm/unittests/TargetParser/Host.cpp
+index 4b4c81514896..cfc41486b173 100644
+--- a/llvm/unittests/TargetParser/Host.cpp
+++ b/llvm/unittests/TargetParser/Host.cpp
+@@ -250,11 +250,9 @@ CPU part	: 0x0a1
+   EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x48\n"
+                                               "CPU part        : 0xd01"),
+             "tsv110");
+-#if defined(ENABLE_AARCH64_HIP09)
+   EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x48\n"
+                                               "CPU part        : 0xd02"),
+             "hip09");
+-#endif
+ 
+   // Verify A64FX.
+   const std::string A64FXProcCpuInfo = R"(
+diff --git a/llvm/unittests/TargetParser/TargetParserTest.cpp b/llvm/unittests/TargetParser/TargetParserTest.cpp
+index 94e0047e567b..daa38474004e 100644
+--- a/llvm/unittests/TargetParser/TargetParserTest.cpp
+++ b/llvm/unittests/TargetParser/TargetParserTest.cpp
+@@ -1421,7 +1421,6 @@ INSTANTIATE_TEST_SUITE_P(
+                              AArch64::AEK_PROFILE | AArch64::AEK_FP16 |
+                              AArch64::AEK_FP16FML | AArch64::AEK_DOTPROD,
+                          "8.2-A"),
+-#if defined(ENABLE_AARCH64_HIP09)
+         ARMCPUTestParams(
+             "hip09", "armv8.5-a", "crypto-neon-fp-armv8",
+             AArch64::AEK_CRC | AArch64::AEK_FP | AArch64::AEK_SIMD |
+@@ -1432,7 +1431,6 @@ INSTANTIATE_TEST_SUITE_P(
+                 AArch64::AEK_FP16FML | AArch64::AEK_SVE | AArch64::AEK_I8MM |
+                 AArch64::AEK_F32MM | AArch64::AEK_F64MM | AArch64::AEK_BF16,
+             "8.5-A"),
+-#endif
+         ARMCPUTestParams("a64fx", "armv8.2-a", "crypto-neon-fp-armv8",
+                          AArch64::AEK_CRC | AArch64::AEK_AES |
+                              AArch64::AEK_SHA2 | AArch64::AEK_FP |
+@@ -1449,11 +1447,7 @@ INSTANTIATE_TEST_SUITE_P(
+                          "8.2-A")));
+ 
+ // Note: number of CPUs includes aliases.
+-#if defined(ENABLE_AARCH64_HIP09)
+ static constexpr unsigned NumAArch64CPUArchs = 63;
+-#else
+-static constexpr unsigned NumAArch64CPUArchs = 62;
+-#endif
+ 
+ TEST(TargetParserTest, testAArch64CPUArchList) {
+   SmallVector<StringRef, NumAArch64CPUArchs> List;
+-- 
+2.43.0
+
--- a/0028-backport-Clang-Fix-crash-with-fzero-call-used-regs.patch
+++ b/0028-backport-Clang-Fix-crash-with-fzero-call-used-regs.patch
@ -0,0 +1,72 @@
+From 28e3fc80336935bc8bed372e78616ef5be9f4908 Mon Sep 17 00:00:00 2001
+From: Arthur Eubanks <aeubanks@google.com>
+Date: Thu, 27 Jul 2023 13:27:58 -0700
+Subject: [PATCH] Don't zero out noreg operands
+
+A tail call may have $noreg operands.
+
+Fixes a crash.
+
+Reviewed By: xgupta
+
+Differential Revision: https://reviews.llvm.org/D156485
+---
+ llvm/lib/CodeGen/PrologEpilogInserter.cpp    |  9 +++++++--
+ llvm/test/CodeGen/X86/zero-call-used-regs.ll | 14 ++++++++++++++
+ 2 files changed, 21 insertions(+), 2 deletions(-)
+
+diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+index e323aaaeefaf..49047719fdaa 100644
+--- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+@@ -1285,6 +1285,8 @@ void PEI::insertZeroCallUsedRegs(MachineFunction &MF) {
+           continue;
+ 
+         MCRegister Reg = MO.getReg();
+        if (!Reg)
+          continue;
+ 
+         // This picks up sibling registers (e.q. %al -> %ah).
+         for (MCRegUnit Unit : TRI.regunits(Reg))
+@@ -1308,8 +1310,11 @@ void PEI::insertZeroCallUsedRegs(MachineFunction &MF) {
+         if (!MO.isReg())
+           continue;
+ 
+-        for (const MCPhysReg &Reg :
+-             TRI.sub_and_superregs_inclusive(MO.getReg()))
+        MCRegister Reg = MO.getReg();
+        if (!Reg)
+          continue;
+
+        for (const MCPhysReg Reg : TRI.sub_and_superregs_inclusive(Reg))
+           RegsToZero.reset(Reg);
+       }
+     }
+diff --git a/llvm/test/CodeGen/X86/zero-call-used-regs.ll b/llvm/test/CodeGen/X86/zero-call-used-regs.ll
+index 63d51c916bb9..97ad5ce9c8cb 100644
+--- a/llvm/test/CodeGen/X86/zero-call-used-regs.ll
+++ b/llvm/test/CodeGen/X86/zero-call-used-regs.ll
+@@ -241,6 +241,20 @@ entry:
+   ret i32 %x
+ }
+ 
+define dso_local void @tailcall(ptr %p) local_unnamed_addr #0 "zero-call-used-regs"="used-gpr" {
+; I386-LABEL: tailcall:
+; I386:       # %bb.0:
+; I386-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; I386-NEXT:    jmpl *(%eax) # TAILCALL
+;
+; X86-64-LABEL: tailcall:
+; X86-64:       # %bb.0:
+; X86-64-NEXT:    jmpq *(%rdi) # TAILCALL
+  %c = load ptr, ptr %p
+  tail call void %c()
+  ret void
+}
+
+ ; Don't emit zeroing registers in "main" function.
+ define dso_local i32 @main() local_unnamed_addr #1 {
+ ; I386-LABEL: main:
+-- 
+2.43.0
+
--- a/0029-SimplifyLibCalls-Merge-sqrt-into-the-power-of-exp-79.patch
+++ b/0029-SimplifyLibCalls-Merge-sqrt-into-the-power-of-exp-79.patch
@ -0,0 +1,246 @@
+From 60ff801d1ea96ab964039cc1ed42e1dca0a63d54 Mon Sep 17 00:00:00 2001
+From: Anton Sidorenko <anton.sidorenko@syntacore.com>
+Date: Tue, 6 Feb 2024 12:02:06 +0300
+Subject: [PATCH] [SimplifyLibCalls] Merge sqrt into the power of exp (#79146)
+
+Under fast-math flags it's possible to convert `sqrt(exp(X)) `into
+`exp(X * 0.5)`. I suppose that this transformation is always profitable.
+This is similar to the optimization existing in GCC.
+---
+ .../llvm/Transforms/Utils/SimplifyLibCalls.h  |   1 +
+ .../lib/Transforms/Utils/SimplifyLibCalls.cpp |  67 ++++++++++
+ llvm/test/Transforms/InstCombine/sqrt.ll      | 120 ++++++++++++++++++
+ 3 files changed, 188 insertions(+)
+
+diff --git a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
+index eb10545ee149..1aad0b298845 100644
+--- a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
+++ b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
+@@ -201,6 +201,7 @@ private:
+   Value *optimizeFMinFMax(CallInst *CI, IRBuilderBase &B);
+   Value *optimizeLog(CallInst *CI, IRBuilderBase &B);
+   Value *optimizeSqrt(CallInst *CI, IRBuilderBase &B);
+  Value *mergeSqrtToExp(CallInst *CI, IRBuilderBase &B);
+   Value *optimizeSinCosPi(CallInst *CI, bool IsSin, IRBuilderBase &B);
+   Value *optimizeTan(CallInst *CI, IRBuilderBase &B);
+   // Wrapper for all floating point library call optimizations
+diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+index 3ad97613fe7a..dd5bbdaaf6d3 100644
+--- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+@@ -2539,6 +2539,70 @@ Value *LibCallSimplifier::optimizeLog(CallInst *Log, IRBuilderBase &B) {
+   return Ret;
+ }
+ 
+// sqrt(exp(X)) -> exp(X * 0.5)
+Value *LibCallSimplifier::mergeSqrtToExp(CallInst *CI, IRBuilderBase &B) {
+  if (!CI->hasAllowReassoc())
+    return nullptr;
+
+  Function *SqrtFn = CI->getCalledFunction();
+  CallInst *Arg = dyn_cast<CallInst>(CI->getArgOperand(0));
+  if (!Arg || !Arg->hasAllowReassoc() || !Arg->hasOneUse())
+    return nullptr;
+  Intrinsic::ID ArgID = Arg->getIntrinsicID();
+  LibFunc ArgLb = NotLibFunc;
+  TLI->getLibFunc(*Arg, ArgLb);
+
+  LibFunc SqrtLb, ExpLb, Exp2Lb, Exp10Lb;
+
+  if (TLI->getLibFunc(SqrtFn->getName(), SqrtLb))
+    switch (SqrtLb) {
+    case LibFunc_sqrtf:
+      ExpLb = LibFunc_expf;
+      Exp2Lb = LibFunc_exp2f;
+      Exp10Lb = LibFunc_exp10f;
+      break;
+    case LibFunc_sqrt:
+      ExpLb = LibFunc_exp;
+      Exp2Lb = LibFunc_exp2;
+      Exp10Lb = LibFunc_exp10;
+      break;
+    case LibFunc_sqrtl:
+      ExpLb = LibFunc_expl;
+      Exp2Lb = LibFunc_exp2l;
+      Exp10Lb = LibFunc_exp10l;
+      break;
+    default:
+      return nullptr;
+    }
+  else if (SqrtFn->getIntrinsicID() == Intrinsic::sqrt) {
+    if (CI->getType()->getScalarType()->isFloatTy()) {
+      ExpLb = LibFunc_expf;
+      Exp2Lb = LibFunc_exp2f;
+      Exp10Lb = LibFunc_exp10f;
+    } else if (CI->getType()->getScalarType()->isDoubleTy()) {
+      ExpLb = LibFunc_exp;
+      Exp2Lb = LibFunc_exp2;
+      Exp10Lb = LibFunc_exp10;
+    } else
+      return nullptr;
+  } else
+    return nullptr;
+
+  if (ArgLb != ExpLb && ArgLb != Exp2Lb && ArgLb != Exp10Lb &&
+      ArgID != Intrinsic::exp && ArgID != Intrinsic::exp2)
+    return nullptr;
+
+  IRBuilderBase::InsertPointGuard Guard(B);
+  B.SetInsertPoint(Arg);
+  auto *ExpOperand = Arg->getOperand(0);
+  auto *FMul =
+      B.CreateFMulFMF(ExpOperand, ConstantFP::get(ExpOperand->getType(), 0.5),
+                      CI, "merged.sqrt");
+
+  Arg->setOperand(0, FMul);
+  return Arg;
+}
+
+ Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilderBase &B) {
+   Module *M = CI->getModule();
+   Function *Callee = CI->getCalledFunction();
+@@ -2551,6 +2615,9 @@ Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilderBase &B) {
+        Callee->getIntrinsicID() == Intrinsic::sqrt))
+     Ret = optimizeUnaryDoubleFP(CI, B, TLI, true);
+ 
+  if (Value *Opt = mergeSqrtToExp(CI, B))
+    return Opt;
+
+   if (!CI->isFast())
+     return Ret;
+ 
+diff --git a/llvm/test/Transforms/InstCombine/sqrt.ll b/llvm/test/Transforms/InstCombine/sqrt.ll
+index 004df3e30c72..f72fe5a6a581 100644
+--- a/llvm/test/Transforms/InstCombine/sqrt.ll
+++ b/llvm/test/Transforms/InstCombine/sqrt.ll
+@@ -88,7 +88,127 @@ define float @sqrt_call_fabs_f32(float %x) {
+   ret float %sqrt
+ }
+ 
+define double @sqrt_exp(double %x) {
+; CHECK-LABEL: @sqrt_exp(
+; CHECK-NEXT:    [[MERGED_SQRT:%.*]] = fmul reassoc double [[X:%.*]], 5.000000e-01
+; CHECK-NEXT:    [[E:%.*]] = call reassoc double @llvm.exp.f64(double [[MERGED_SQRT]])
+; CHECK-NEXT:    ret double [[E]]
+;
+  %e = call reassoc double @llvm.exp.f64(double %x)
+  %res = call reassoc double @llvm.sqrt.f64(double %e)
+  ret double %res
+}
+
+define double @sqrt_exp_2(double %x) {
+; CHECK-LABEL: @sqrt_exp_2(
+; CHECK-NEXT:    [[MERGED_SQRT:%.*]] = fmul reassoc double [[X:%.*]], 5.000000e-01
+; CHECK-NEXT:    [[E:%.*]] = call reassoc double @exp(double [[MERGED_SQRT]])
+; CHECK-NEXT:    ret double [[E]]
+;
+  %e = call reassoc double @exp(double %x)
+  %res = call reassoc double @sqrt(double %e)
+  ret double %res
+}
+
+define double @sqrt_exp2(double %x) {
+; CHECK-LABEL: @sqrt_exp2(
+; CHECK-NEXT:    [[MERGED_SQRT:%.*]] = fmul reassoc double [[X:%.*]], 5.000000e-01
+; CHECK-NEXT:    [[E:%.*]] = call reassoc double @exp2(double [[MERGED_SQRT]])
+; CHECK-NEXT:    ret double [[E]]
+;
+  %e = call reassoc double @exp2(double %x)
+  %res = call reassoc double @sqrt(double %e)
+  ret double %res
+}
+
+define double @sqrt_exp10(double %x) {
+; CHECK-LABEL: @sqrt_exp10(
+; CHECK-NEXT:    [[MERGED_SQRT:%.*]] = fmul reassoc double [[X:%.*]], 5.000000e-01
+; CHECK-NEXT:    [[E:%.*]] = call reassoc double @exp10(double [[MERGED_SQRT]])
+; CHECK-NEXT:    ret double [[E]]
+;
+  %e = call reassoc double @exp10(double %x)
+  %res = call reassoc double @sqrt(double %e)
+  ret double %res
+}
+
+; Negative test
+define double @sqrt_exp_nofast_1(double %x) {
+; CHECK-LABEL: @sqrt_exp_nofast_1(
+; CHECK-NEXT:    [[E:%.*]] = call double @llvm.exp.f64(double [[X:%.*]])
+; CHECK-NEXT:    [[RES:%.*]] = call reassoc double @llvm.sqrt.f64(double [[E]])
+; CHECK-NEXT:    ret double [[RES]]
+;
+  %e = call double @llvm.exp.f64(double %x)
+  %res = call reassoc double @llvm.sqrt.f64(double %e)
+  ret double %res
+}
+
+; Negative test
+define double @sqrt_exp_nofast_2(double %x) {
+; CHECK-LABEL: @sqrt_exp_nofast_2(
+; CHECK-NEXT:    [[E:%.*]] = call reassoc double @llvm.exp.f64(double [[X:%.*]])
+; CHECK-NEXT:    [[RES:%.*]] = call double @llvm.sqrt.f64(double [[E]])
+; CHECK-NEXT:    ret double [[RES]]
+;
+  %e = call reassoc double @llvm.exp.f64(double %x)
+  %res = call double @llvm.sqrt.f64(double %e)
+  ret double %res
+}
+
+define double @sqrt_exp_merge_constant(double %x, double %y) {
+; CHECK-LABEL: @sqrt_exp_merge_constant(
+; CHECK-NEXT:    [[MERGED_SQRT:%.*]] = fmul reassoc nsz double [[X:%.*]], 5.000000e+00
+; CHECK-NEXT:    [[E:%.*]] = call reassoc double @llvm.exp.f64(double [[MERGED_SQRT]])
+; CHECK-NEXT:    ret double [[E]]
+;
+  %mul = fmul reassoc nsz double %x, 10.0
+  %e = call reassoc double @llvm.exp.f64(double %mul)
+  %res = call reassoc nsz double @llvm.sqrt.f64(double %e)
+  ret double %res
+}
+
+define double @sqrt_exp_intr_and_libcall(double %x) {
+; CHECK-LABEL: @sqrt_exp_intr_and_libcall(
+; CHECK-NEXT:    [[MERGED_SQRT:%.*]] = fmul reassoc double [[X:%.*]], 5.000000e-01
+; CHECK-NEXT:    [[E:%.*]] = call reassoc double @exp(double [[MERGED_SQRT]])
+; CHECK-NEXT:    ret double [[E]]
+;
+  %e = call reassoc double @exp(double %x)
+  %res = call reassoc double @llvm.sqrt.f64(double %e)
+  ret double %res
+}
+
+define double @sqrt_exp_intr_and_libcall_2(double %x) {
+; CHECK-LABEL: @sqrt_exp_intr_and_libcall_2(
+; CHECK-NEXT:    [[MERGED_SQRT:%.*]] = fmul reassoc double [[X:%.*]], 5.000000e-01
+; CHECK-NEXT:    [[E:%.*]] = call reassoc double @llvm.exp.f64(double [[MERGED_SQRT]])
+; CHECK-NEXT:    ret double [[E]]
+;
+  %e = call reassoc double @llvm.exp.f64(double %x)
+  %res = call reassoc double @sqrt(double %e)
+  ret double %res
+}
+
+define <2 x float> @sqrt_exp_vec(<2 x float> %x) {
+; CHECK-LABEL: @sqrt_exp_vec(
+; CHECK-NEXT:    [[MERGED_SQRT:%.*]] = fmul reassoc <2 x float> [[X:%.*]], <float 5.000000e-01, float 5.000000e-01>
+; CHECK-NEXT:    [[E:%.*]] = call reassoc <2 x float> @llvm.exp.v2f32(<2 x float> [[MERGED_SQRT]])
+; CHECK-NEXT:    ret <2 x float> [[E]]
+;
+  %e = call reassoc <2 x float> @llvm.exp.v2f32(<2 x float> %x)
+  %res = call reassoc <2 x float> @llvm.sqrt.v2f32(<2 x float> %e)
+  ret <2 x float> %res
+}
+
+ declare i32 @foo(double)
+ declare double @sqrt(double) readnone
+ declare float @sqrtf(float)
+ declare float @llvm.fabs.f32(float)
+declare double @llvm.exp.f64(double)
+declare double @llvm.sqrt.f64(double)
+declare double @exp(double)
+declare double @exp2(double)
+declare double @exp10(double)
+declare <2 x float> @llvm.exp.v2f32(<2 x float>)
+declare <2 x float> @llvm.sqrt.v2f32(<2 x float>)
+-- 
+2.38.1.windows.1
+
--- a/0030-LICM-Solve-runtime-error-caused-by-the-signal-functi.patch
+++ b/0030-LICM-Solve-runtime-error-caused-by-the-signal-functi.patch
@ -0,0 +1,187 @@
+From fdbf1bd9f1bdec32384eda47f419d895d11a1c50 Mon Sep 17 00:00:00 2001
+From: XingYuShuai <1150775134@qq.com>
+Date: Wed, 15 May 2024 14:42:27 +0800
+Subject: [PATCH] [LICM] Solve runtime error caused by the signal function.
+
+Using the option enable-signal to control whether to solve the
+runtime error caused by the signal function when lto is turned on.
+---
+ llvm/cmake/modules/HandleLLVMOptions.cmake    |  8 ++++
+ llvm/lib/Transforms/Scalar/LICM.cpp           | 47 +++++++++++++++++++
+ .../Transforms/LICM/signal-before-loop-2.ll   | 25 ++++++++++
+ .../Transforms/LICM/signal-before-loop.ll     | 25 ++++++++++
+ llvm/test/lit.site.cfg.py.in                  |  1 +
+ 5 files changed, 106 insertions(+)
+ create mode 100644 llvm/test/Transforms/LICM/signal-before-loop-2.ll
+ create mode 100644 llvm/test/Transforms/LICM/signal-before-loop.ll
+
+diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake
+index b8e9dbe29d88..8be5d4ba52c2 100644
+--- a/llvm/cmake/modules/HandleLLVMOptions.cmake
+++ b/llvm/cmake/modules/HandleLLVMOptions.cmake
+@@ -120,6 +120,14 @@ else()
+   set(LLVM_ENABLE_AUTOTUNER 0)
+ endif()
+ 
+option(LLVM_BUILD_FOR_COMMON "" ON)
+if(LLVM_BUILD_FOR_COMMON)
+  set(LLVM_BUILD_FOR_COMMON 1)
+  add_definitions( -DBUILD_FOR_COMMON )
+else()
+  set(LLVM_BUILD_FOR_COMMON 0)
+endif()
+
+ if(LLVM_ENABLE_EXPENSIVE_CHECKS)
+   add_compile_definitions(EXPENSIVE_CHECKS)
+ 
+diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
+index f8fab03f151d..2feec759f240 100644
+--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
+@@ -44,6 +44,9 @@
+ #include "llvm/Analysis/AliasSetTracker.h"
+ #include "llvm/Analysis/AssumptionCache.h"
+ #include "llvm/Analysis/CaptureTracking.h"
+#ifdef BUILD_FOR_COMMON
+#include "llvm/Analysis/CFG.h"
+#endif // BUILD_FOR_COMMON
+ #include "llvm/Analysis/GuardUtils.h"
+ #include "llvm/Analysis/LazyBlockFrequencyInfo.h"
+ #include "llvm/Analysis/Loads.h"
+@@ -122,6 +125,13 @@ static cl::opt<bool>
+     SingleThread("licm-force-thread-model-single", cl::Hidden, cl::init(false),
+                  cl::desc("Force thread model single in LICM pass"));
+ 
+#ifdef BUILD_FOR_COMMON
+static cl::opt<bool> DisableMovStoreInsOutsideOfLoopInSigFun(
+  "disable-move-store-ins-outside-of-loop",
+    cl::Hidden, cl::init(true), cl::desc("Disable move store instruction"
+    "outside of loop in signal function."));
+#endif // BUILD_FOR_COMMON
+
+ static cl::opt<uint32_t> MaxNumUsesTraversed(
+     "licm-max-num-uses-traversed", cl::Hidden, cl::init(8),
+     cl::desc("Max num uses visited for identifying load "
+@@ -2075,8 +2085,45 @@ bool llvm::promoteLoopAccessesToScalars(
+     for (Use &U : ASIV->uses()) {
+       // Ignore instructions that are outside the loop.
+       Instruction *UI = dyn_cast<Instruction>(U.getUser());
+ #if defined(BUILD_FOR_COMMON)
+      if (DisableMovStoreInsOutsideOfLoopInSigFun) {
+        if (!UI)
+          continue;
+
+        // In the following scenario, there will be a loop index store 
+        // instruction that is moved outside the loop and when the termination 
+        // loop is triggered by the signal function, the store instruction is not 
+        // executed.However, the function registered by the signal will read the
+        // data sored in the store instruction, so the data read is incorrect.
+        // Solution: Prevent the store instruction form going outside the loop.
+        // NOTE: The sys_signal function takes the same arguments and performs 
+        // the same task as signal. They all belong to glic.
+        if(StoreSafety == StoreSafe && !CurLoop->contains(UI)) {
+          if(LoadInst *NotCurLoopLoad = dyn_cast<LoadInst>(UI)) {
+            Function *NotCurLoopFun = UI->getParent()->getParent();
+            for (Use &UseFun : NotCurLoopFun->uses()) {
+            CallInst *Call = dyn_cast<CallInst>(UseFun.getUser());
+            if (Call && Call->getCalledFunction() &&
+                (Call->getCalledFunction()->getName() == "__sysv_signal" ||
+                 Call->getCalledFunction()->getName() == "signal") &&
+                isPotentiallyReachable(Call->getParent(),
+                                       CurLoop->getLoopPreheader(),NULL,DT,
+                                       LI))
+              return false;
+            }
+          }
+        }
+
+        if (!CurLoop->contains(UI))
+          continue;
+      } else {
+        if (!UI || !CurLoop->contains(UI))
+          continue;
+      }
+#else
+       if (!UI || !CurLoop->contains(UI))
+         continue;
+#endif // BUILD_FOR_COMMON
+ 
+       // If there is an non-load/store instruction in the loop, we can't promote
+       // it.
+diff --git a/llvm/test/Transforms/LICM/signal-before-loop-2.ll b/llvm/test/Transforms/LICM/signal-before-loop-2.ll
+new file mode 100644
+index 000000000000..da878c6c691b
+--- /dev/null
+++ b/llvm/test/Transforms/LICM/signal-before-loop-2.ll
+@@ -0,0 +1,25 @@
+; REQUIRES: enable_build_for_common
+; RUN:opt -disable-move-store-ins-outside-of-loop=true -S < %s | FileCheck %s 
+
+@Run_Index = external global i64
+
+declare ptr @signal(ptr)
+
+define void @report() {
+entry:
+  %0 = load i64, ptr @Run_Index, align 8
+  unreachable
+}
+
+define i32 @main() {
+if.end:
+  %call.i4 = call ptr @signal(ptr @report)
+  br label %for.cond
+
+; CHECK-LABEL: for.cond
+; CHECK: store
+for.cond:
+  %0 = load i64, ptr @Run_Index, align 8
+  store i64 %0, ptr @Run_Index, align 8
+  br label %for.cond
+}
+diff --git a/llvm/test/Transforms/LICM/signal-before-loop.ll b/llvm/test/Transforms/LICM/signal-before-loop.ll
+new file mode 100644
+index 000000000000..cfae4e87db56
+--- /dev/null
+++ b/llvm/test/Transforms/LICM/signal-before-loop.ll
+@@ -0,0 +1,25 @@
+; REQUIRES: enable_build_for_common
+; RUN:opt -disable-move-store-ins-outside-of-loop=true -S < %s | FileCheck %s 
+
+@Run_Index = external global i64
+
+declare ptr @__sysv_signal(ptr)
+
+define void @report() {
+entry:
+  %0 = load i64, ptr @Run_Index, align 8
+  unreachable
+}
+
+define i32 @main() {
+if.end:
+  %call.i4 = call ptr @__sysv_signal(ptr @report)
+  br label %for.cond
+
+; CHECK-LABEL: for.cond
+; CHECK: store
+for.cond:
+  %0 = load i64, ptr @Run_Index, align 8
+  store i64 %0, ptr @Run_Index, align 8
+  br label %for.cond
+}
+diff --git a/llvm/test/lit.site.cfg.py.in b/llvm/test/lit.site.cfg.py.in
+index 0e9396e3b014..20c1ecca1d43 100644
+--- a/llvm/test/lit.site.cfg.py.in
+++ b/llvm/test/lit.site.cfg.py.in
+@@ -63,6 +63,7 @@ config.dxil_tests = @LLVM_INCLUDE_DXIL_TESTS@
+ config.have_llvm_driver = @LLVM_TOOL_LLVM_DRIVER_BUILD@
+ config.use_classic_flang = @LLVM_ENABLE_CLASSIC_FLANG@
+ config.enable_enable_autotuner = @LLVM_ENABLE_AUTOTUNER@
+config.enable_build_for_common = @LLVM_BUILD_FOR_COMMON@
+ 
+ import lit.llvm
+ lit.llvm.initialize(lit_config, config)
+-- 
+2.38.1.windows.1
+
--- a/0031-ACPO-ACPO-Infrastructure.patch
+++ b/0031-ACPO-ACPO-Infrastructure.patch
--- a/0032-ACPO-Introduce-MLInliner-using-ACPO-infrastructure.patch
+++ b/0032-ACPO-Introduce-MLInliner-using-ACPO-infrastructure.patch
--- a/0033-Find-Python3-in-default-env-PATH-for-ACPO.patch
+++ b/0033-Find-Python3-in-default-env-PATH-for-ACPO.patch
@ -0,0 +1,34 @@
+From d4cfa4fd4496735ea45afcd2b0cfb3607cadd1c9 Mon Sep 17 00:00:00 2001
+From: yinrun <lvyinrun@huawei.com>
+Date: Thu, 17 Oct 2024 18:47:40 +0800
+Subject: [PATCH] Find Python3 in default env PATH for ACPO
+
+Enable the use of user python version, avoid the wrong version of python without AI infra.
+---
+ llvm/lib/Analysis/ACPOMLInterface.cpp | 10 +++++++++-
+ 1 file changed, 9 insertions(+), 1 deletion(-)
+
+diff --git a/llvm/lib/Analysis/ACPOMLInterface.cpp b/llvm/lib/Analysis/ACPOMLInterface.cpp
+index f48eb46638e3..7d84bd5112d6 100644
+--- a/llvm/lib/Analysis/ACPOMLInterface.cpp
+++ b/llvm/lib/Analysis/ACPOMLInterface.cpp
+@@ -146,7 +146,15 @@ ACPOMLPythonInterface::ACPOMLPythonInterface() : NextID{0} {
+   }
+ 
+   int32_t PID = (int32_t) llvm::sys::Process::getProcessId();
+-  std::string ExecPython = "/usr/bin/python3";
+  std::string ExecPython;
+  llvm::ErrorOr<std::string> Res = llvm::sys::findProgramByName("python3");
+  if (std::error_code EC = Res.getError()) {
+      LLVM_DEBUG(dbgs() << "python3 could not be found, error_code " << EC.value() << "\n");
+      return;
+  } else {
+    ExecPython = Res.get();
+    LLVM_DEBUG(dbgs() << "python3 version found in " << ExecPython << "\n");
+  }
+   std::string
+       PythonScript = *Env + "/" + std::string(ACPO_ML_PYTHON_INTERFACE_PY);
+   std::string PIDStr = std::to_string(PID);
+-- 
+2.38.1.windows.1
+
--- a/0034-AArch64-Support-HiSilicon-s-HIP09-sched-model.patch
+++ b/0034-AArch64-Support-HiSilicon-s-HIP09-sched-model.patch
--- a/llvm.spec
+++ b/llvm.spec
@ -1,6 +1,13 @@
 %bcond_without sys_llvm
 %bcond_without check
 %bcond_with classic_flang
+%bcond_with toolchain_clang
+%bcond_without bisheng_autotuner
+%bcond_without ACPO
+
+%if %{with toolchain_clang}
+%global toolchain clang
+%endif

 %global maj_ver 17
 %global min_ver 0
@ -38,7 +45,7 @@

 Name:		%{pkg_name}
 Version:	%{maj_ver}.%{min_ver}.%{patch_ver}
-Release:	14
+Release:	28
 Summary:	The Low Level Virtual Machine

 License:	NCSA
@ -69,7 +76,20 @@ Patch17:	0017-Add-the-support-for-classic-flang.patch
 Patch18:	0018-Fix-declaration-definition-mismatch-for-classic-flang.patch
 Patch19: 	0019-Backport-LoongArch-Improve-the-support-for-atomic-and-clear_cache.patch
 Patch20: 	0020-Update-llvm-lit-config-to-support-build_for_openeule.patch
-Patch21:	0021-Prevent-environment-variables-from-exceeding-NAME_MA.patch
+Patch21:	0021-Add-BiSheng-Autotuner-support-for-LLVM-compiler.patch
+Patch22:	0022-Prevent-environment-variables-from-exceeding-NAME_MA.patch
+Patch23:	0023-AArch64-Support-HiSilicon-s-HIP09-Processor.patch
+Patch24:	0024-Backport-LoongArch-fix-and-add-some-new-support.patch
+Patch25:    0025-Backport-Simple-check-to-ignore-Inline-asm-fwait-insertion.patch
+Patch26:    0026-Add-arch-restriction-for-BiSheng-Autotuner.patch
+Patch27:	0027-AArch64-Delete-hip09-macro.patch
+Patch28:	0028-backport-Clang-Fix-crash-with-fzero-call-used-regs.patch
+Patch29:	0029-SimplifyLibCalls-Merge-sqrt-into-the-power-of-exp-79.patch
+Patch30:	0030-LICM-Solve-runtime-error-caused-by-the-signal-functi.patch
+Patch31:	0031-ACPO-ACPO-Infrastructure.patch
+Patch32:	0032-ACPO-Introduce-MLInliner-using-ACPO-infrastructure.patch
+Patch33:	0033-Find-Python3-in-default-env-PATH-for-ACPO.patch
+Patch34:	0034-AArch64-Support-HiSilicon-s-HIP09-sched-model.patch

 BuildRequires:	binutils-devel
 BuildRequires:	cmake
@ -86,6 +106,9 @@ BuildRequires:	python3-recommonmark
 BuildRequires:	python3-sphinx
 BuildRequires:	python3-setuptools
 BuildRequires:	zlib-devel
+%if %{with toolchain_clang}
+BuildRequires:	clang
+%endif

 Requires:	%{name}-libs%{?_isa} = %{version}-%{release}

@ -185,6 +208,12 @@ pathfix.py -i %{__python3} -pn \
 mkdir -p _build
 cd _build

+%if %{with ACPO}
+  echo "enable ACPO"
+  export CFLAGS="-Wp,-DENABLE_ACPO ${CFLAGS}"
+  export CXXFLAGS="-Wp,-DENABLE_ACPO ${CXXFLAGS}"
+%endif
+
 %cmake	.. -G Ninja \
 	-DBUILD_SHARED_LIBS:BOOL=OFF \
 	-DLLVM_PARALLEL_LINK_JOBS=%{max_link_jobs} \
@ -233,6 +262,13 @@ cd _build
 %endif
 %if %{with classic_flang}
        -DLLVM_ENABLE_CLASSIC_FLANG=ON \
+%endif
+%if "%{toolchain}" == "clang"
+	-DCMAKE_C_COMPILER=clang \
+	-DCMAKE_CXX_COMPILER=clang++ \
+%endif
+%if %{with bisheng_autotuner}
+	-DLLVM_ENABLE_AUTOTUNER=ON \
 %endif
 	-DLLVM_INCLUDE_BENCHMARKS=OFF
 %ninja_build LLVM
@ -356,18 +392,60 @@ LD_LIBRARY_PATH=%{buildroot}/%{install_libdir}  %{__ninja} check-all -C ./_build
 %{install_includedir}/llvm-gmock

 %changelog
-* Wed Sep 11 2024 hongjinghao <hongjinghao@huawei.com> - 17.0.6-14
+* Fri Nov 22 2024 xiajingze <xiajingze1@huawei.com> - 17.0.6-28
+- [AArch64] Support HiSilicon's HIP09 sched model
+
+* Wed Nov 20 2024 eastb233 <xiezhiheng@huawei.com> - 17.0.6-27
+- Find Python3 in default env PATH for ACPO
+
+* Wed Nov 20 2024 eastb233 <xiezhiheng@huawei.com> - 17.0.6-26
+- ACPO Infrastructure for ML integration into LLVM compiler
+
+* Wed Nov 20 2024 eastb233 <xiezhiheng@huawei.com> - 17.0.6-25
+- [LICM] Solve runtime error caused by the signal function.
+
+* Wed Nov 20 2024 eastb233 <xiezhiheng@huawei.com> - 17.0.6-24
+- [SimplifyLibCalls] Merge sqrt into the power of exp (#79146)
+
+* Tue Nov 19 2024 xiajingze <xiajingze1@huawei.com> - 17.0.6-23
+- [backport][Clang] Fix crash with -fzero-call-used-regs
+
+* Mon Nov 18 2024 xiajingze <xiajingze1@huawei.com> - 17.0.6-22
+- [AArch64] Delete hip09 macro
+
+* Mon Nov 18 2024 liyunfei <liyunfei33@huawei.net> - 17.0.6-21
+- Add arch restriction for BiSheng Autotuner
+
+* Mon Nov 18 2024 liyunfei <liyunfei33@huawei.net> - 17.0.6-20
+- [Backport] Simple check to ignore Inline asm fwait insertion
+
+* Mon Sep 23 2024 zhanglimin <zhanglimin@loongson.cn> - 17.0.6-19
+- [LoongArch] Backport some new support
+
+* Thu Sep 12 2024 xiajingze <xiajingze1@huawei.com> - 17.0.6-18
+- [AArch64] Support HiSilicon's HIP09 Processor
+
+* Wed Sep 11 2024 hongjinghao <hongjinghao@huawei.com> - 17.0.6-17
 - doc add Provides llvm-help

-* Tue Sep 10 2024 hongjinghao <hongjinghao@huawei.com> - 17.0.6-13
+* Tue Sep 10 2024 hongjinghao <hongjinghao@huawei.com> - 17.0.6-16
 - doc add Obsoletes llvm-help

-* Thu Sep 5 2024 hongjinghao <hongjinghao@huawei.com> - 17.0.6-12
+* Tue Sep 3 2024 hongjinghao <hongjinghao@huawei.com> - 17.0.6-15
 - mv man to doc subpackage

-* Mon Jul 22 2024 liyunfei <liyunfei33@huawei.com> - 17.0.6-11
+* Mon Jul 22 2024 liyunfei <liyunfei33@huawei.com> - 17.0.6-14
 - Prevent environment variables from exceeding NAME_MAX.

+* Mon Jul 22 2024 liyunfei <liyunfei33@huawei.com> - 17.0.6-13
+- Disable toolchain_clang build for BiSheng Autotuner support temporary.
+
+* Tue Jul 16 2024 liyunfei <liyunfei33@huawei.com> - 17.0.6-12
+- Add BiSheng Autotuner support.
+
+* Fri Jul 5 2024 liyunfei <liyunfei33@huawei.com> - 17.0.6-11
+- Add toolchain_clang build support
+
 * Mon Apr 29 2024 wangqiang <wangqiang1@kylinos.cn> - 17.0.6-10
 - Update llvm-lit config to support macro `build_for_openeuler`
Author	SHA1	Message	Date
openeuler-ci-bot	4244eb1cd9	!181 [sync] PR-180: [AArch64] Support HiSilicon's HIP09 sched model From: @openeuler-sync-bot Reviewed-by: @eastb233 Signed-off-by: @eastb233	2024-11-25 03:17:59 +00:00
xiajingze	f08325c08c	[AArch64] Support HiSilicon's HIP09 sched model Signed-off-by: xiajingze <xiajingze1@huawei.com> (cherry picked from commit 16c2fa56344da079171f6b9f0151b98deed0af91)	2024-11-25 09:40:17 +08:00
openeuler-ci-bot	ad1341bec0	!177 [sync] PR-176: ACPO Infrastructure for ML integration into LLVM compiler From: @openeuler-sync-bot Reviewed-by: @eastb233 Signed-off-by: @eastb233	2024-11-20 10:50:19 +00:00
eastb233	b3f456737d	Find Python3 in default env PATH for ACPO Sync https://gitee.com/openeuler/llvm-project/pulls/102 (cherry picked from commit 41e289aa66b2e8a068fc4bc21d63661878921d99)	2024-11-20 18:33:41 +08:00
eastb233	8883917445	ACPO Infrastructure for ML integration into LLVM compiler Sync https://gitee.com/openeuler/llvm-project/pulls/89 and build.sh in https://gitee.com/openeuler/llvm-project/pulls/92 (cherry picked from commit 8f5e9315bb0f90e7efa610731832780d27fe6037)	2024-11-20 18:33:41 +08:00
openeuler-ci-bot	791c03733c	!175 [sync] PR-173: [SimplifyLibCalls] Merge sqrt into the power of exp (#79146 ) && [LICM] Solve runtime error caused by the signal function. From: @openeuler-sync-bot Reviewed-by: @eastb233 Signed-off-by: @eastb233	2024-11-20 07:26:43 +00:00
eastb233	d8e386d7fe	[LICM] Solve runtime error caused by the signal function. Sync https://gitee.com/openeuler/llvm-project/pulls/77 (cherry picked from commit fe8d18290462a7e599e706ccf78ffb9ab194dc1a)	2024-11-20 12:03:56 +08:00
eastb233	b38cde7035	[SimplifyLibCalls] Merge sqrt into the power of exp (#79146 ) Sync https://gitee.com/openeuler/llvm-project/pulls/76 (cherry picked from commit e595b466af2018f7d739cb6b6ec24697a2bb8464)	2024-11-20 12:03:56 +08:00
openeuler-ci-bot	b974980003	!172 [sync] PR-171: [AArch64] Delete hip09 macro && [backport][Clang] Fix crash with -fzero-call-used-regs From: @openeuler-sync-bot Reviewed-by: @eastb233 Signed-off-by: @eastb233	2024-11-20 01:19:23 +00:00
xiajingze	a4deed733e	[backport][Clang] Fix crash with -fzero-call-used-reg Signed-off-by: xiajingze <xiajingze1@huawei.com> (cherry picked from commit d549dcb722349678013666446a2e99d17c69e59c)	2024-11-20 00:07:21 +08:00
xiajingze	05329f777b	[AArch64] Delete hip09 macro Signed-off-by: xiajingze <xiajingze1@huawei.com> (cherry picked from commit acb1c24cf2a5c512dc7d61dd58eab94f4a33c7cd)	2024-11-20 00:07:21 +08:00
openeuler-ci-bot	4af6572f3a	!170 [sync] PR-164: Add arch restriction for BiSheng Autotuner From: @openeuler-sync-bot Reviewed-by: @eastb233 Signed-off-by: @eastb233	2024-11-19 10:47:34 +00:00
liyunfei	d54dd2eaf0	Add arch restriction for BiSheng Autotuner Signed-off-by: liyunfei <liyunfei33@huawei.com> (cherry picked from commit 8837457e73d3fc458b8c7f4d687c7fc0d6bb57fb)	2024-11-19 18:46:41 +08:00
openeuler-ci-bot	6a68b69cf9	!168 [sync] PR-166: [Backport] Simple check to ignore Inline asm fwait insertion From: @openeuler-sync-bot Reviewed-by: @eastb233 Signed-off-by: @eastb233	2024-11-19 08:49:49 +00:00
liyunfei	45ff6f68c3	[Backport] Simple check to ignore Inline asm fwait insertion Signed-off-by: liyunfei <liyunfei33@huawei.com> (cherry picked from commit 9083dc26a0b2c3eb6f80adcd469a6181287bbf6e)	2024-11-19 15:18:11 +08:00
openeuler-ci-bot	8943f6328a	!157 [sync] PR-154: [Backport][LoongArch] Fix and add some new support From: @openeuler-sync-bot Reviewed-by: @cf-zhao Signed-off-by: @cf-zhao	2024-10-12 00:34:05 +00:00
Ami-zhang	623acebaa6	[Backport][LoongArch] Fix and add some new support (cherry picked from commit f29e3618e267533beb40d05dc5974488ce5f7847)	2024-10-11 17:49:17 +08:00
openeuler-ci-bot	f7be2638ca	!156 [sync] PR-155: Fix the issue that the date in the changelog is not sorted in descending order. From: @openeuler-sync-bot Reviewed-by: @cf-zhao Signed-off-by: @cf-zhao	2024-10-11 06:38:30 +00:00
cf-zhao	80bf5380a0	Fix the issue that the date in the changelog is not sorted in descending order. (cherry picked from commit c7e8c735ce72bb09fe47dbbd7f53722ff6cd9183)	2024-10-11 09:59:26 +08:00
openeuler-ci-bot	a0725de2c4	!153 [sync] PR-148: [AArch64] Support HiSilicon's HIP09 Processor From: @openeuler-sync-bot Reviewed-by: @liyunfei33 Signed-off-by: @liyunfei33	2024-09-13 07:36:59 +00:00
xiajingze	a8cfa61489	[AArch64] Support HiSilicon's HIP09 Processor (cherry picked from commit 95487e968ff91c07708ed07075820405d5a8b960)	2024-09-12 21:49:19 +08:00
openeuler-ci-bot	4d72b8e986	!152 [sync] PR-151: doc add Provides llvm-help From: @openeuler-sync-bot Reviewed-by: @liyunfei33 Signed-off-by: @liyunfei33	2024-09-12 11:23:11 +00:00
hongjinghao	406b179b49	doc add Provides llvm-help (cherry picked from commit 329d4ab7997e1c4e427b311db986be7215503834)	2024-09-12 09:31:03 +08:00
openeuler-ci-bot	552f089b68	!149 [sync] PR-146: doc add Obsoletes llvm-help From: @openeuler-sync-bot Reviewed-by: @cf-zhao Signed-off-by: @cf-zhao	2024-09-11 01:34:33 +00:00
hongjinghao	3e4b4fcc87	doc add Obsoletes llvm-help (cherry picked from commit 865578178c9bbf96e725dd37b34b9cef587df281)	2024-09-10 16:42:19 +08:00
openeuler-ci-bot	b76688231e	!145 [sync] PR-141: mv man to doc subpackage From: @openeuler-sync-bot Reviewed-by: @liyunfei33 Signed-off-by: @liyunfei33	2024-09-10 01:23:01 +00:00
hongjinghao	99903bd672	mv man to doc subpackage (cherry picked from commit cd66419e417f492591824298d239e51782750ee5)	2024-09-06 21:26:07 +08:00
openeuler-ci-bot	590b980ffc	!144 [sync] PR-140: Prevent environment variables from exceeding NAME_MAX. From: @openeuler-sync-bot Reviewed-by: @liyunfei33 Signed-off-by: @liyunfei33	2024-09-06 13:25:56 +00:00
liyunfei	f3670876e5	Prevent environment variables from exceeding NAME_MAX (cherry picked from commit 4996d19ff15e056b817dc50067b6ea52669c5f24)	2024-09-06 16:17:36 +08:00
openeuler-ci-bot	e5ad654e46	!139 [sync] PR-135: Disable toolchain_clang build for BiSheng Autotuner support temporary From: @openeuler-sync-bot Reviewed-by: @liyunfei33 Signed-off-by: @liyunfei33	2024-09-03 11:11:22 +00:00
liyunfei	069f629df7	Disable toolchain_clang build Disable toolchain_clang build for BiSheng Autotuner support temporary. Signed-off-by: liyunfei <liyunfei33@huawei.com> (cherry picked from commit 4dce262ee770ebe675eaf42d50b13b54348af5e9)	2024-09-03 17:39:19 +08:00
openeuler-ci-bot	520e062268	!138 [sync] PR-125: Add BiSheng Autotuner support From: @openeuler-sync-bot Reviewed-by: @liyunfei33 Signed-off-by: @liyunfei33	2024-09-03 09:38:35 +00:00
liyunfei	aca9c09071	Add BiSheng Autotuner support Signed-off-by: liyunfei <liyunfei33@huawei.com> (cherry picked from commit 776d4f615bc87ec2775ee7e94c44e51ca56da0b2)	2024-09-03 16:31:51 +08:00
openeuler-ci-bot	1087136411	!137 [sync] PR-134: Add toolchain_clang build support From: @openeuler-sync-bot Reviewed-by: @liyunfei33 Signed-off-by: @liyunfei33	2024-09-03 08:31:11 +00:00
liyunfei	88f3e45ed8	Add toolchain_clang build support Signed-off-by: liyunfei <liyunfei33@huawei.com> (cherry picked from commit 02ab7ced7eea43a784dce393877e5c1d7210460d)	2024-09-03 15:01:48 +08:00
openeuler-ci-bot	a5110dd911	!132 [sync] PR-129: Revert "Support stack clash protection" From: @openeuler-sync-bot Reviewed-by: @liyunfei33 Signed-off-by: @liyunfei33	2024-09-03 06:32:36 +00:00
cf-zhao	51f4a7d312	Revert "Support stack clash protection" This reverts commit 4f4298791f15f26e0649f57c6edfd999af51ec41. (cherry picked from commit f9af047c9f0602b71489d2f042fecdbe22ae100f)	2024-05-20 09:04:51 +08:00
openeuler-ci-bot	6d7af1becf	!127 [sync] PR-124: Support stack clash protection From: @openeuler-sync-bot Reviewed-by: @cf-zhao Signed-off-by: @cf-zhao	2024-05-17 02:37:08 +00:00
rickyleung	700751006e	Support stack clash protection (cherry picked from commit 4f4298791f15f26e0649f57c6edfd999af51ec41)	2024-05-13 15:00:04 +08:00
openeuler-ci-bot	ede988ff44	!123 [sync] PR-121: Update llvm-lit config to support macro `build_for_openeuler` From: @openeuler-sync-bot Reviewed-by: @cf-zhao Signed-off-by: @cf-zhao	2024-05-03 00:00:07 +00:00
wangqiang	9327ad0fab	Update llvm-lit config to support macro `build_for_openeuler` (cherry picked from commit 2b03ba072ed723b232d1b29a1be921b2536de495)	2024-05-01 19:19:45 +08:00
openeuler-ci-bot	1a7a96c62f	!119 [sync] PR-113: [Backport][LoongArch] Improve the support for atomic and clear_cache From: @openeuler-sync-bot Reviewed-by: @cf-zhao Signed-off-by: @cf-zhao	2024-04-28 02:57:11 +00:00
Ami-zhang	73499e9115	[Backport][LoongArch] Improve the support for atomic and clear_cache (cherry picked from commit 374a99221881e29f75891c82a38acc4ba65a17e1)	2024-04-26 09:32:57 +08:00
openeuler-ci-bot	a8517f9424	!115 [sync] PR-82: [ClassicFlang] Add the support for classic flang. From: @openeuler-sync-bot Reviewed-by: @cf-zhao Signed-off-by: @cf-zhao	2024-04-26 01:14:54 +00:00
luofeng14	29111d6e55	Add the support for classic-flang (cherry picked from commit c18d0cd9f75f5d7c05818f1dcaef6a3a6ea33232)	2024-04-19 16:00:48 +08:00
openeuler-ci-bot	5b03eda1dd	!112 [sync] PR-110: fix some typo From: @openeuler-sync-bot Reviewed-by: @cf-zhao Signed-off-by: @cf-zhao	2024-04-16 02:18:00 +00:00
liyunfei	cb05b56921	fix some typo (cherry picked from commit ae7f796c92df9a13db28974bc99921eb922c2ef5)	2024-04-16 10:17:33 +08:00
openeuler-ci-bot	df9493e61b	!109 [sync] PR-105: Backport patch to fix CVE-2024-31852 From: @openeuler-sync-bot Reviewed-by: @cf-zhao Signed-off-by: @cf-zhao	2024-04-15 09:44:48 +00:00
liyunfei	ce0a2fbef7	Backport patch to fix CVE-2024-31852 reference: `b1a5ee1feb` and `749384c08e` Signed-off-by: liyunfei <liyunfei33@huawei.com> (cherry picked from commit 332fb03bb7615eb0f23f10d953b8b168a8c319a1)	2024-04-15 14:42:28 +08:00
openeuler-ci-bot	80348b01a4	!107 [sync] PR-97: [Backport][X86][Inline] Skip inline asm in inlining target feature check From: @openeuler-sync-bot Reviewed-by: @cf-zhao Signed-off-by: @cf-zhao	2024-04-15 01:18:02 +00:00
wangqiang	f6bb1b6dd7	[Backport][X86][Inline] Skip inline asm in inlining target feature check reference: `8c6015db59` Signed-off-by: wangqiang <wangqiang1@kylinos.cn> (cherry picked from commit 4606ba60e6d02da53420f910ac047108c3b88c48)	2024-04-13 17:30:06 +08:00
openeuler-ci-bot	410a2c1e56	!101 [sync] PR-87: Backport patch to fix CVE-2023-46049 From: @openeuler-sync-bot Reviewed-by: @cf-zhao Signed-off-by: @cf-zhao	2024-04-13 05:50:31 +00:00
liyunfei	ab7ce29778	Backport patch to fix CVE-2023-46049 reference:`c2515a8f2b` Signed-off-by: liyunfei <liyunfei33@huawei.com> (cherry picked from commit 86e71f1261e4179e36923e08d2b518016a77b835)	2024-04-12 14:46:34 +08:00
openeuler-ci-bot	b78fcfa91d	!99 [sync] PR-86: [Backport][LoongArch] Improve the support for compiler-rt and bugfix From: @openeuler-sync-bot Reviewed-by: @cf-zhao Signed-off-by: @cf-zhao	2024-04-12 06:23:52 +00:00
Ami-zhang	5eae06c472	[Backport][LoongArch] Improve the support for compiler-rt and bugfix (cherry picked from commit b86be6988c4e9c6b488369fe6d74a274827ae1a9)	2024-04-11 20:08:54 +08:00
openeuler-ci-bot	e8b73631a6	!92 [sync] PR-85: [Backport][LoongArch] Add the support for vector in llvm17 From: @openeuler-sync-bot Reviewed-by: @cf-zhao Signed-off-by: @cf-zhao	2024-04-11 12:06:25 +00:00
Ami-zhang	190a438b45	[Backport][LoongArch] Add the support for vector in llvm17 (cherry picked from commit 52d71959de0a5dca6d4dde05a8223685df449d1c)	2024-04-11 09:29:10 +08:00
openeuler-ci-bot	dea14417fd	!90 [sync] PR-84: [Backport][LoongArch] Support `relax` feature on LoongArch in llvm17 From: @openeuler-sync-bot Reviewed-by: @cf-zhao Signed-off-by: @cf-zhao	2024-04-11 01:17:28 +00:00
Ami-zhang	d72a7d64b1	[Backport][LoongArch] Support `relax` feature (cherry picked from commit 8cf6fb10e49140722443997596f2ae55bec9d525)	2024-04-10 16:17:52 +08:00