[SDAG] Add missing ppc_fp128 ExpandFloatRes for sincos[pi] (llvm#128514)

ROCm · Feb 25, 2025 · ea4e19d · ea4e19d
1 parent 5114b9b
commit ea4e19d
Show file tree

Hide file tree

Showing 3 changed files with 160 additions and 0 deletions.
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -1570,6 +1570,8 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
   case ISD::STRICT_FREM:
   case ISD::FREM:       ExpandFloatRes_FREM(N, Lo, Hi); break;
   case ISD::FMODF:   ExpandFloatRes_FMODF(N); break;
+  case ISD::FSINCOS: ExpandFloatRes_FSINCOS(N); break;
+  case ISD::FSINCOSPI: ExpandFloatRes_FSINCOSPI(N); break;
     // clang-format on
   }
 
@@ -1625,6 +1627,15 @@ void DAGTypeLegalizer::ExpandFloatRes_FMODF(SDNode *N) {
                                        /*CallRetResNo=*/0);
 }
 
+void DAGTypeLegalizer::ExpandFloatRes_FSINCOS(SDNode *N) {
+  ExpandFloatRes_UnaryWithTwoFPResults(N, RTLIB::getSINCOS(N->getValueType(0)));
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FSINCOSPI(SDNode *N) {
+  ExpandFloatRes_UnaryWithTwoFPResults(N,
+                                       RTLIB::getSINCOSPI(N->getValueType(0)));
+}
+
 void DAGTypeLegalizer::ExpandFloatRes_UnaryWithTwoFPResults(
     SDNode *N, RTLIB::Libcall LC, std::optional<unsigned> CallRetResNo) {
   assert(!N->isStrictFPOpcode() && "strictfp not implemented");

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -718,6 +718,8 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   void ExpandFloatRes_LOAD      (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_FMODF(SDNode *N);
+  void ExpandFloatRes_FSINCOS(SDNode* N);
+  void ExpandFloatRes_FSINCOSPI(SDNode* N);
   // clang-format on
 
   // Float Operand Expansion.

diff --git a/llvm/test/CodeGen/PowerPC/llvm.sincos.ll b/llvm/test/CodeGen/PowerPC/llvm.sincos.ll
@@ -0,0 +1,147 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-gnu-linux \
+; RUN:   -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s | FileCheck %s
+
+define { ppc_fp128, ppc_fp128 } @test_sincos_ppcf128(ppc_fp128 %a) {
+; CHECK-LABEL: test_sincos_ppcf128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    mflr r0
+; CHECK-NEXT:    stdu r1, -64(r1)
+; CHECK-NEXT:    std r0, 80(r1)
+; CHECK-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-NEXT:    .cfi_offset lr, 16
+; CHECK-NEXT:    addi r5, r1, 48
+; CHECK-NEXT:    addi r6, r1, 32
+; CHECK-NEXT:    bl sincosl
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    lfd f1, 48(r1)
+; CHECK-NEXT:    lfd f2, 56(r1)
+; CHECK-NEXT:    lfd f3, 32(r1)
+; CHECK-NEXT:    lfd f4, 40(r1)
+; CHECK-NEXT:    addi r1, r1, 64
+; CHECK-NEXT:    ld r0, 16(r1)
+; CHECK-NEXT:    mtlr r0
+; CHECK-NEXT:    blr
+  %result = call { ppc_fp128, ppc_fp128 } @llvm.sincos.ppcf128(ppc_fp128 %a)
+  ret { ppc_fp128, ppc_fp128 } %result
+}
+
+define { ppc_fp128, ppc_fp128 } @test_sincospi_ppcf128(ppc_fp128 %a) {
+; CHECK-LABEL: test_sincospi_ppcf128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    mflr r0
+; CHECK-NEXT:    stdu r1, -64(r1)
+; CHECK-NEXT:    std r0, 80(r1)
+; CHECK-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-NEXT:    .cfi_offset lr, 16
+; CHECK-NEXT:    addi r5, r1, 48
+; CHECK-NEXT:    addi r6, r1, 32
+; CHECK-NEXT:    bl sincospil
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    lfd f1, 48(r1)
+; CHECK-NEXT:    lfd f2, 56(r1)
+; CHECK-NEXT:    lfd f3, 32(r1)
+; CHECK-NEXT:    lfd f4, 40(r1)
+; CHECK-NEXT:    addi r1, r1, 64
+; CHECK-NEXT:    ld r0, 16(r1)
+; CHECK-NEXT:    mtlr r0
+; CHECK-NEXT:    blr
+  %result = call { ppc_fp128, ppc_fp128 } @llvm.sincospi.ppcf128(ppc_fp128 %a)
+  ret { ppc_fp128, ppc_fp128 } %result
+}
+
+; FIXME: This could be made a tail call with the default expansion of llvm.sincos.
+define void @test_sincos_ppcf128_void_tail_call(ppc_fp128 %a, ptr noalias %out_sin, ptr noalias %out_cos) {
+; CHECK-LABEL: test_sincos_ppcf128_void_tail_call:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    mflr r0
+; CHECK-NEXT:    stdu r1, -32(r1)
+; CHECK-NEXT:    std r0, 48(r1)
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    .cfi_offset lr, 16
+; CHECK-NEXT:    bl sincosl
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    addi r1, r1, 32
+; CHECK-NEXT:    ld r0, 16(r1)
+; CHECK-NEXT:    mtlr r0
+; CHECK-NEXT:    blr
+  %result = tail call { ppc_fp128, ppc_fp128 } @llvm.sincos.ppcf128(ppc_fp128 %a)
+  %result.0 = extractvalue { ppc_fp128, ppc_fp128 } %result, 0
+  %result.1 = extractvalue { ppc_fp128, ppc_fp128 } %result, 1
+  store ppc_fp128 %result.0, ptr %out_sin, align 16
+  store ppc_fp128 %result.1, ptr %out_cos, align 16
+  ret void
+}
+
+; FIXME: This could be made a tail call with the default expansion of llvm.sincospi.
+define void @test_sincospi_ppcf128_void_tail_call(ppc_fp128 %a, ptr noalias %out_sin, ptr noalias %out_cos) {
+; CHECK-LABEL: test_sincospi_ppcf128_void_tail_call:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    mflr r0
+; CHECK-NEXT:    stdu r1, -32(r1)
+; CHECK-NEXT:    std r0, 48(r1)
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    .cfi_offset lr, 16
+; CHECK-NEXT:    bl sincospil
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    addi r1, r1, 32
+; CHECK-NEXT:    ld r0, 16(r1)
+; CHECK-NEXT:    mtlr r0
+; CHECK-NEXT:    blr
+  %result = tail call { ppc_fp128, ppc_fp128 } @llvm.sincospi.ppcf128(ppc_fp128 %a)
+  %result.0 = extractvalue { ppc_fp128, ppc_fp128 } %result, 0
+  %result.1 = extractvalue { ppc_fp128, ppc_fp128 } %result, 1
+  store ppc_fp128 %result.0, ptr %out_sin, align 16
+  store ppc_fp128 %result.1, ptr %out_cos, align 16
+  ret void
+}
+
+; NOTE: This would need a struct-return library call for llvm.sincos to become a tail call.
+define { ppc_fp128, ppc_fp128 } @test_sincos_ppcf128_tail_call(ppc_fp128 %a) {
+; CHECK-LABEL: test_sincos_ppcf128_tail_call:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    mflr r0
+; CHECK-NEXT:    stdu r1, -64(r1)
+; CHECK-NEXT:    std r0, 80(r1)
+; CHECK-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-NEXT:    .cfi_offset lr, 16
+; CHECK-NEXT:    addi r5, r1, 48
+; CHECK-NEXT:    addi r6, r1, 32
+; CHECK-NEXT:    bl sincosl
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    lfd f1, 48(r1)
+; CHECK-NEXT:    lfd f2, 56(r1)
+; CHECK-NEXT:    lfd f3, 32(r1)
+; CHECK-NEXT:    lfd f4, 40(r1)
+; CHECK-NEXT:    addi r1, r1, 64
+; CHECK-NEXT:    ld r0, 16(r1)
+; CHECK-NEXT:    mtlr r0
+; CHECK-NEXT:    blr
+  %result = tail call { ppc_fp128, ppc_fp128 } @llvm.sincos.ppcf128(ppc_fp128 %a)
+  ret { ppc_fp128, ppc_fp128 } %result
+}
+
+; NOTE: This would need a struct-return library call for llvm.sincospi to become a tail call.
+define { ppc_fp128, ppc_fp128 } @test_sincospi_ppcf128_tail_call(ppc_fp128 %a) {
+; CHECK-LABEL: test_sincospi_ppcf128_tail_call:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    mflr r0
+; CHECK-NEXT:    stdu r1, -64(r1)
+; CHECK-NEXT:    std r0, 80(r1)
+; CHECK-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-NEXT:    .cfi_offset lr, 16
+; CHECK-NEXT:    addi r5, r1, 48
+; CHECK-NEXT:    addi r6, r1, 32
+; CHECK-NEXT:    bl sincospil
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    lfd f1, 48(r1)
+; CHECK-NEXT:    lfd f2, 56(r1)
+; CHECK-NEXT:    lfd f3, 32(r1)
+; CHECK-NEXT:    lfd f4, 40(r1)
+; CHECK-NEXT:    addi r1, r1, 64
+; CHECK-NEXT:    ld r0, 16(r1)
+; CHECK-NEXT:    mtlr r0
+; CHECK-NEXT:    blr
+  %result = tail call { ppc_fp128, ppc_fp128 } @llvm.sincospi.ppcf128(ppc_fp128 %a)
+  ret { ppc_fp128, ppc_fp128 } %result
+}